In [None]:
!pip cache purge


Found existing installation: torch 2.7.0
Uninstalling torch-2.7.0:
  Would remove:
    /home/manoj/Desktop/MH/.venv/bin/torchfrtrace
    /home/manoj/Desktop/MH/.venv/bin/torchrun
    /home/manoj/Desktop/MH/.venv/lib/python3.12/site-packages/functorch/*
    /home/manoj/Desktop/MH/.venv/lib/python3.12/site-packages/torch-2.7.0.dist-info/*
    /home/manoj/Desktop/MH/.venv/lib/python3.12/site-packages/torch/*
    /home/manoj/Desktop/MH/.venv/lib/python3.12/site-packages/torchgen/*
Proceed (Y/n)? [31mERROR: Operation cancelled by user[0m[31m
[0m^C
Files removed: 18 (30.6 MB)
Looking in indexes: https://download.pytorch.org/whl/cu121
Collecting torchaudio
  Downloading https://download.pytorch.org/whl/cu121/torchaudio-2.5.1%2Bcu121-cp312-cp312-linux_x86_64.whl (3.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.4/3.4 MB[0m [31m19.1 MB/s[0m eta [36m0:00:00[0m
INFO: pip is looking at multiple versions of torchaudio to determine which version is compatible with o

In [2]:
import os
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"

In [12]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim

# ---- 1. Load data ----
df = pd.read_csv("binarized_outputs/subset_18.9_72.8167.csv")
rainfall = df["Rainfall"].values.astype(np.float32)

# ---- 2. Create sliding windows ----
def create_sequences(data, window_size=30):
    X, y = [], []
    for i in range(len(data) - window_size):
        X.append(data[i:i+window_size])
        y.append(data[i+window_size])  # Next value as label
    return np.array(X), np.array(y)

window_size = 30
X, y = create_sequences(rainfall, window_size=window_size)

# ---- 3. Train-test split ----
split_idx = int(0.8 * len(X))
X_train, X_test = X[:split_idx], X[split_idx:]
y_train, y_test = y[:split_idx], y[split_idx:]

# ---- 4. Custom Dataset ----
class RainDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X).unsqueeze(1)  # Shape: [N, 1, window_size]
        self.y = torch.tensor(y)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

train_dataset = RainDataset(X_train, y_train)
test_dataset = RainDataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

# ---- 5. CNN Model ----
class RainCNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv1d(1, 16, kernel_size=15, padding=1)
        self.relu = nn.ReLU()
        self.pool = nn.AdaptiveMaxPool1d(1)
        self.fc = nn.Linear(16, 1)

    def forward(self, x):
        x = self.relu(self.conv1(x))     # [B, 16, W]
        x = self.pool(x).squeeze(-1)     # [B, 16]
        x = self.fc(x).squeeze(-1)       # [B]
        return x  # <- No sigmoid here


# ---- 6. Training Loop ----
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
model = RainCNN().to(device)
# Compute positive and negative class weights
pos_weight = (len(y_train) - y_train.sum()) / y_train.sum()
pos_weight_tensor = torch.tensor(pos_weight, dtype=torch.float32).to(device)

# Use BCEWithLogitsLoss with pos_weight
criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weight_tensor)
optimizer = optim.Adam(model.parameters(), lr=0.001)

for epoch in range(10):
    model.train()
    total_loss = 0
    for X_batch, y_batch in train_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        optimizer.zero_grad()
        logits = model(X_batch)
        loss = criterion(logits, y_batch)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1}: Loss = {total_loss:.4f}")


Using device: cuda
Epoch 1: Loss = 20.6480
Epoch 2: Loss = 20.4672
Epoch 3: Loss = 20.4136
Epoch 4: Loss = 20.3378
Epoch 5: Loss = 20.2653
Epoch 6: Loss = 20.0750
Epoch 7: Loss = 20.1824
Epoch 8: Loss = 19.9173
Epoch 9: Loss = 19.9018
Epoch 10: Loss = 20.0102


In [13]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

model.eval()
with torch.no_grad():
    X_test_tensor = torch.tensor(X_test).unsqueeze(1).to(device)
    y_test_tensor = torch.tensor(y_test).to(device)

    logits = model(X_test_tensor).squeeze()
    probs = torch.sigmoid(logits).cpu().numpy()
    preds_label = (probs > 0.5).astype(int)
    y_true = y_test_tensor.cpu().numpy()


    # Metrics
    acc = accuracy_score(y_true, preds_label)
    prec = precision_score(y_true, preds_label)
    rec = recall_score(y_true, preds_label)
    f1 = f1_score(y_true, preds_label)
    cm = confusion_matrix(y_true, preds_label)

    print(f"Test Accuracy  : {acc:.4f}")
    print(f"Precision      : {prec:.4f}")
    print(f"Recall         : {rec:.4f}")
    print(f"F1 Score       : {f1:.4f}")
    print("Confusion Matrix:\n", cm)


Test Accuracy  : 0.8296
Precision      : 0.0000
Recall         : 0.0000
F1 Score       : 0.0000
Confusion Matrix:
 [[112   0]
 [ 23   0]]


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
