In [1]:
import pandas as pd
import torch 
from torch.utils.data import DataLoader, TensorDataset, random_split
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler 
from sklearn.model_selection import train_test_split 
from torch.nn import BCEWithLogitsLoss
import numpy as np
import numpy as np

In [2]:
device = torch.device("cpu")

In [3]:
device

device(type='cpu')

In [4]:
data = pd.read_csv("Data/train.csv")

In [5]:
if "id" in data.columns and "day" in data.columns:
    
    data.drop(["id", "day"], axis = 1, inplace = True)


In [6]:
X = data.drop("rainfall", axis = 1)
y = data["rainfall"]

In [7]:
data.shape

(2190, 11)

In [15]:
data["rainfall"].value_counts(normalize= True)

rainfall
1    0.753425
0    0.246575
Name: proportion, dtype: float64

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size = 0.8, random_state = 42, shuffle = True)

# Standardised

In [167]:
X_train_np = X_train.to_numpy()
X_test_np = X_test.to_numpy()

In [168]:
Std = StandardScaler()

X_std = Std.fit_transform(X_train)
X_t_std = Std.transform(X_test)



In [169]:
X_train = torch.tensor(X_std, device = device, dtype = torch.float32)
X_test = torch.tensor(X_t_std, device = device, dtype = torch.float32)


In [170]:
X_train = X_train.T
y_train = y_train.values.reshape(1,-1)

X_test = X_test.T
y_test = y_test.values.reshape(1, -1)

In [171]:
print(f"X_train shape: {X_train.shape}")
print(f"y_train shape: {y_train.shape}")



X_train shape: torch.Size([10, 1752])
y_train shape: (1, 1752)


In [172]:
y_train = torch.tensor(y_train, device = device)

In [173]:
X_train.shape

torch.Size([10, 1752])

# Weights Initialization

In [202]:
torch.manual_seed(1)

w1 = torch.randn([10, 10], dtype = torch.float32, device = device, requires_grad = True)
b1 = torch.full([10,1], 0.01, dtype = torch.float32, device = device, requires_grad = True)


w2 = torch.randn([1, 10], dtype = torch.float32, device = device, requires_grad = True)
b2 = torch.full([1,1],0.01, dtype = torch.float32, device = device, requires_grad = True)

          

    
    


In [203]:
params_grid = {
"lr" : [0.1, 0.01, 0.001, 0.0001],
"ite" : [100, 1000, 10000, 100000],
"batch_size" : [16, 32, 64, 128, 256, 512]
}

Results = []
best_params = {}
best_accuracy = -np.inf

In [210]:

for lr in params_grid["lr"]:
    for ite in params_grid["ite"]:
        for batch_size in params_grid["batch_size"]:
            for i in range(ite):
            
                idx = torch.randint(0, X_train.shape[1], (batch_size, ))
                X_sample = X_train[:, idx]
                y_sample = y_train[:, idx]
            
                Z1 = torch.matmul(w1, X_sample) + b1
                A1 = torch.relu(Z1)
                                                    
                Z2 = torch.matmul(w2, A1) + b2
                A2 = torch.sigmoid(Z2)
            
            
            
                loss = -torch.mean((y_sample*torch.log(A2)) + ((1-y_sample)*torch.log(1-A2)))
            
                loss.backward()
            
                with torch.no_grad():
            
                    w1 -= lr * w1.grad
                    b1 -= lr * b1.grad
            
                    w2 -= lr * w2.grad
                    b2 -= lr * b2.grad
            
            
            
                w1.grad.zero_()
                b1.grad.zero_()
                w2.grad.zero_()
                b2.grad.zero_()
            
                #if i%100000 == 0:
                    #print(f"At Iteration {i}, Loss = {loss}")
        
            final_Z1 = torch.matmul(w1, X_test) + b1
            final_A1 = torch.relu(final_Z1)
            
            final_Z2 = torch.matmul(w2, final_A1) + b2
            final_A2 = torch.sigmoid(final_Z2)
            
            y_pred = (final_A2 >= 0.5).int()
            
            Accuracy = (y_test == y_pred).float().mean().item()
    
            
            Results.append({
                "Learning_rate" : lr,
                "Iteration" : ite,
                "Batch_size": batch_size,
                "Accuracy" : Accuracy,
                "loss" : loss.item(), 
                
            })
    
            if Accuracy >= best_accuracy:
                best_accuracy = Accuracy 
    
                best_params = {
                    "Ite" : ite,
                    "Learning_rate" : lr,
                    "batch_size" : batch_size,
                    "Accuracy" : Accuracy
                }

       



In [212]:
best_params

{'Ite': 1000,
 'Learning_rate': 0.1,
 'batch_size': 512,
 'Accuracy': 0.8493150472640991}

In [208]:
loss.item()

0.5099633932113647

# Predictions

In [150]:
X_test.shape

torch.Size([10, 438])

In [151]:
final_Z1 = torch.matmul(w1, X_test) + b1
final_A1 = torch.relu(final_Z1)

final_Z2 = torch.matmul(w2, final_A1) + b2
final_A2 = torch.sigmoid(final_Z2)



In [152]:
final_A2

tensor([[6.1172e-01, 7.6232e-01, 4.8169e-01, 9.5808e-01, 9.6560e-01, 9.9058e-01,
         9.9076e-01, 9.2262e-01, 7.0839e-01, 9.6504e-01, 9.8487e-01, 8.8726e-01,
         6.4536e-01, 1.0850e-01, 3.2694e-01, 7.1136e-01, 9.6697e-01, 8.5379e-01,
         8.1677e-01, 6.7105e-02, 4.8086e-01, 9.8681e-01, 2.1076e-01, 9.3232e-01,
         3.7371e-02, 9.7269e-01, 8.3395e-01, 7.7542e-01, 9.1609e-01, 9.7966e-01,
         9.9125e-01, 9.0154e-01, 9.0527e-01, 7.1159e-02, 3.6512e-02, 9.9062e-01,
         9.2626e-01, 9.8183e-01, 6.3955e-01, 3.8964e-01, 9.0763e-01, 9.0056e-01,
         9.8331e-01, 9.6483e-01, 9.5170e-01, 9.8514e-01, 7.3565e-01, 9.6929e-01,
         2.0873e-01, 8.9131e-01, 9.6151e-01, 1.2251e-01, 9.6514e-01, 9.7198e-01,
         9.0863e-01, 4.0659e-01, 9.2132e-01, 9.7611e-01, 9.6962e-01, 7.8780e-01,
         5.0257e-01, 9.8070e-01, 9.5479e-01, 9.8337e-01, 9.8388e-01, 8.6632e-01,
         9.5961e-01, 9.6981e-01, 9.6123e-01, 7.1251e-01, 9.5791e-01, 9.7655e-01,
         8.3834e-01, 9.7026e

In [153]:
y_pred = (final_A2 >= 0.5).int()

In [154]:
y_test = torch.tensor(y_test, dtype = torch.float32)

  y_test = torch.tensor(y_test, dtype = torch.float32)


In [155]:
(y_test == y_pred).float().mean().item()

0.844748854637146

# Kaggle Submission

In [144]:
k_test = pd.read_csv("data/test.csv")

In [145]:
k_test.drop(["id", "day"], axis = 1, inplace = True)

In [146]:
k_test.isnull().sum()

pressure         0
maxtemp          0
temparature      0
mintemp          0
dewpoint         0
humidity         0
cloud            0
sunshine         0
winddirection    1
windspeed        0
dtype: int64

In [41]:
k_test["winddirection"].fillna(k_test["winddirection"].mode()[0], inplace=True)


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  k_test["winddirection"].fillna(k_test["winddirection"].mode()[0], inplace=True)


In [42]:
k_test.isnull().sum()

pressure         0
maxtemp          0
temparature      0
mintemp          0
dewpoint         0
humidity         0
cloud            0
sunshine         0
winddirection    0
windspeed        0
dtype: int64

In [43]:
k_test_std = Std.transform(k_test)

In [44]:
k_test = torch.tensor(k_test_std, device = device, dtype = torch.float32)

In [45]:
k_test = k_test.T

## Predictions

In [46]:
test_Z1 = torch.matmul(w1, k_test) + b1
test_A1 = torch.relu(test_Z1)

test_Z2 = torch.matmul(w2, test_A1) + b2
test_A2 = torch.sigmoid(test_Z2)



In [47]:
y_test_pred = (test_A2 >= 0.5).int()

In [48]:
y_test_pred

tensor([[1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1,
         1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1,
         0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1,
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1,
         1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1,
         1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1,
         1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,
         1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0,
         1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1,
         1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1,
         1, 1, 1, 1, 1, 1, 1

In [49]:
y_test_prob = test_A2.squeeze().cpu().detach().numpy()  # Convert to NumPy for CSV


In [50]:
y_test_prob

array([9.93108749e-01, 9.90953088e-01, 9.73390460e-01, 7.38606602e-02,
       1.39745353e-02, 9.08041775e-01, 9.63401854e-01, 9.84901965e-01,
       9.77720022e-01, 8.04153383e-01, 9.91400898e-01, 1.98115502e-02,
       9.88853335e-01, 9.85260665e-01, 2.09158987e-01, 1.61510217e-03,
       8.99447680e-01, 7.96839297e-01, 6.61294237e-02, 9.36185126e-04,
       3.00977170e-01, 7.27620423e-02, 8.94426346e-01, 9.91838872e-01,
       9.05807078e-01, 3.28235775e-01, 4.02877759e-03, 9.92687702e-01,
       9.48047340e-01, 6.21690571e-01, 9.69919503e-01, 9.76192772e-01,
       8.44778657e-01, 9.79111910e-01, 8.17008436e-01, 9.57372308e-01,
       2.63905883e-01, 9.51115966e-01, 8.52480352e-01, 9.00979638e-01,
       8.36312056e-01, 9.38207507e-01, 9.55059603e-02, 9.50962543e-01,
       9.68384564e-01, 3.09221774e-01, 2.45900434e-02, 9.82563317e-01,
       1.76036537e-01, 8.80445302e-01, 9.78822529e-01, 9.90248621e-01,
       9.91860092e-01, 9.89055991e-01, 9.89233494e-01, 9.81396377e-01,
      

In [51]:
k_test = pd.read_csv("data/test.csv")

# Ensure y_test_prob is a NumPy array and has correct shape
y_test_prob = y_test_prob.flatten()  # Flatten to ensure it's 1D

# Create submission DataFrame
submission = pd.DataFrame({
    "id": k_test["id"],  # Use the row index as ID
    "rainfall": y_test_prob  # Use predicted probabilities
})

# Save the submission file
submission.to_csv("submission.csv", index=False)

print("✅ Submission file saved as 'submission.csv'. Ready for Kaggle upload! 🎯")

✅ Submission file saved as 'submission.csv'. Ready for Kaggle upload! 🎯
