In [None]:
import pandas as pd
import torch 
from torch.utils.data import DataLoader, TensorDataset, random_split
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from torch.nn import BCEWithLogitsLoss

In [None]:
device = torch.device("cpu")

In [None]:
device

In [None]:
data = pd.read_csv("Data/train.csv")

In [None]:
if "id" in data.columns and "day" in data.columns:
    
    data.drop(["id", "day"], axis = 1, inplace = True)


In [None]:
X = data.drop("rainfall", axis = 1)
y = data["rainfall"]

In [None]:
data.shape

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size = 0.8, random_state = 42, shuffle = True)

# Standardised

In [None]:
X_train_np = X_train.to_numpy()
X_test_np = X_test.to_numpy()

In [None]:
Std = StandardScaler()

X_std = Std.fit_transform(X_train)
X_t_std = Std.transform(X_test)



In [None]:
X_train = torch.tensor(X_std, device = device, dtype = torch.float32)
X_test = torch.tensor(X_t_std, device = device, dtype = torch.float32)


In [None]:
X_train = X_train.T
y_train = y_train.values.reshape(1,-1)

X_test = X_test.T
y_test = y_test.values.reshape(1, -1)

In [None]:
print(f"X_train shape: {X_train.shape}")
print(f"y_train shape: {y_train.shape}")



In [None]:
y_train = torch.tensor(y_train, device = device)

# Weights Initialization

In [None]:
torch.manual_seed(1)

w1 = torch.randn([10, 10], dtype = torch.float32, device = device, requires_grad = True)
b1 = torch.full([10,1], 0.01, dtype = torch.float32, device = device, requires_grad = True)


w2 = torch.randn([1, 10], dtype = torch.float32, device = device, requires_grad = True)
b2 = torch.full([1,1],0.01, dtype = torch.float32, device = device, requires_grad = True)


lr = 0.1

for i in range(10000):

    Z1 = torch.matmul(w1, X_train) + b1
    A1 = torch.relu(Z1)
                                        
    Z2 = torch.matmul(w2, A1) + b2
    A2 = torch.sigmoid(Z2)



    loss = -torch.mean((y_train*torch.log(A2)) + ((1-y_train)*torch.log(1-A2)))

    loss.backward()

    with torch.no_grad():

        w1 -= lr * w1.grad
        b1 -= lr * b1.grad

        w2 -= lr * w2.grad
        b2 -= lr * b2.grad



    w1.grad.zero_()
    b1.grad.zero_()
    w2.grad.zero_()
    b2.grad.zero_()

    if i%1000 == 0:
        print(f"At Iteration {i}, Loss = {loss}")
    
                       

    
    


In [None]:
loss

# Predictions

In [None]:
X_test.shape

In [None]:
final_Z1 = torch.matmul(w1, X_test) + b1
final_A1 = torch.relu(final_Z1)

final_Z2 = torch.matmul(w2, final_A1) + b2
final_A2 = torch.sigmoid(final_Z2)



In [None]:
final_A2

In [None]:
y_pred = (final_A2 >= 0.5).int()

In [None]:
y_test = torch.tensor(y_test, dtype = torch.float32)

In [None]:
(y_test == y_pred).float().mean().item()

# Kaggle Submission

In [None]:
k_test = pd.read_csv("data/test.csv")

In [None]:
k_test.drop(["id", "day"], axis = 1, inplace = True)

In [None]:
k_test.isnull().sum()

In [None]:
k_test["winddirection"].fillna(k_test["winddirection"].mode()[0], inplace=True)


In [None]:
k_test.isnull().sum()

In [None]:
k_test_std = Std.transform(k_test)

In [None]:
k_test = torch.tensor(k_test_std, device = device, dtype = torch.float32)

In [None]:
k_test = k_test.T

## Predictions

In [None]:
test_Z1 = torch.matmul(w1, k_test) + b1
test_A1 = torch.relu(test_Z1)

test_Z2 = torch.matmul(w2, test_A1) + b2
test_A2 = torch.sigmoid(test_Z2)



In [None]:
y_test_pred = (test_A2 >= 0.5).int()

In [None]:
y_test_pred

In [None]:
y_test_prob = test_A2.squeeze().cpu().detach().numpy()  # Convert to NumPy for CSV


In [None]:
y_test_prob

In [None]:
k_test = pd.read_csv("data/test.csv")

# Ensure y_test_prob is a NumPy array and has correct shape
y_test_prob = y_test_prob.flatten()  # Flatten to ensure it's 1D

# Create submission DataFrame
submission = pd.DataFrame({
    "id": k_test["id"],  # Use the row index as ID
    "rainfall": y_test_prob  # Use predicted probabilities
})

# Save the submission file
submission.to_csv("submission.csv", index=False)

print("✅ Submission file saved as 'submission.csv'. Ready for Kaggle upload! 🎯")