In [1]:
import pandas as pd
import torch 
from torch.utils.data import DataLoader, TensorDataset, random_split
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from torch.nn import BCEWithLogitsLoss

In [2]:
device = torch.device("cpu")

In [3]:
device

device(type='cpu')

In [3]:
data = pd.read_csv("Data/train.csv")

In [4]:
if "id" in data.columns and "day" in data.columns:
    
    data.drop(["id", "day"], axis = 1, inplace = True)


In [5]:
X = data.drop("rainfall", axis = 1)
y = data["rainfall"]

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size = 0.8, random_state = 42, shuffle = True)

# Standardised

In [7]:
X_train_np = X_train.to_numpy()
X_test_np = X_test.to_numpy()

In [8]:
Std = StandardScaler()

X_std = Std.fit_transform(X_train)
X_t_std = Std.transform(X_test)



In [9]:
X_train = torch.tensor(X_std, device = device, dtype = torch.float32)
X_test = torch.tensor(X_t_std, device = device, dtype = torch.float32)


In [10]:
X_train = X_train.T
y_train = y_train.values.reshape(1,-1)

X_test = X_test.T
y_test = y_test.values.reshape(1, -1)

In [11]:
print(f"X_train shape: {X_train.shape}")
print(f"y_train shape: {y_train.shape}")



X_train shape: torch.Size([10, 1752])
y_train shape: (1, 1752)


In [12]:
y_train = torch.tensor(y_train, device = device)

# Weights Initialization

In [16]:
torch.manual_seed(1)

w1 = torch.randn([10, 10], dtype = torch.float32, device = device, requires_grad = True)
b1 = torch.full([10,1], 0.01, dtype = torch.float32, device = device, requires_grad = True)


w2 = torch.randn([1, 10], dtype = torch.float32, device = device, requires_grad = True)
b2 = torch.full([1,1],0.01, dtype = torch.float32, device = device, requires_grad = True)


lr = 0.01

for i in range(10000):

    Z1 = torch.matmul(w1, X_train) + b1
    A1 = torch.relu(Z1)

    Z2 = torch.matmul(w2, A1) + b2
    A2 = torch.sigmoid(Z2)


    loss = -torch.mean((y_train*torch.log(A2)) + ((1-y_train)*torch.log(1-A2)))

    loss.backward()

    with torch.no_grad():

        w1 -= lr * w1.grad
        b1 -= lr * b1.grad

        w2 -= lr * w2.grad
        b2 -= lr * b2.grad

    w1.grad.zero_()
    b1.grad.zero_()
    w2.grad.zero_()
    b2.grad.zero_()

    if i%1000 == 0:
        print(f"At Iteration {i}, Loss = {loss}")
    
                       

    
    


At Iteration 0, Loss = 2.5848615169525146
At Iteration 1000, Loss = 0.38620156049728394
At Iteration 2000, Loss = 0.35089510679244995
At Iteration 3000, Loss = 0.3376148045063019
At Iteration 4000, Loss = 0.3314429819583893
At Iteration 5000, Loss = 0.32692083716392517
At Iteration 6000, Loss = 0.32407906651496887
At Iteration 7000, Loss = 0.32178911566734314
At Iteration 8000, Loss = 0.31984269618988037
At Iteration 9000, Loss = 0.31814199686050415


In [17]:
loss

tensor(0.3167, grad_fn=<NegBackward0>)

# Predictions

In [18]:
X_test.shape

torch.Size([10, 438])

In [19]:
final_Z1 = torch.matmul(w1, X_test) + b1
final_A1 = torch.relu(final_Z1)

final_Z2 = torch.matmul(w2, final_A1) + b2
final_A2 = torch.sigmoid(final_Z2)

In [20]:
final_A2

tensor([[0.7461, 0.7896, 0.5444, 0.9332, 0.9324, 0.9804, 0.9796, 0.9502, 0.6217,
         0.9696, 0.9812, 0.8919, 0.7186, 0.1340, 0.3534, 0.7455, 0.9744, 0.8384,
         0.8119, 0.1186, 0.4982, 0.9930, 0.2424, 0.9186, 0.0330, 0.9855, 0.8635,
         0.8041, 0.8779, 0.9918, 0.9872, 0.9568, 0.8788, 0.1204, 0.0205, 0.9947,
         0.9351, 0.9819, 0.6453, 0.4184, 0.8756, 0.9152, 0.9862, 0.9441, 0.9761,
         0.9874, 0.9014, 0.9900, 0.2478, 0.9400, 0.9426, 0.1678, 0.9670, 0.9747,
         0.9305, 0.2003, 0.9619, 0.9560, 0.9668, 0.8356, 0.4605, 0.9700, 0.9207,
         0.9675, 0.9811, 0.8344, 0.9490, 0.9605, 0.9779, 0.6964, 0.9370, 0.9794,
         0.8599, 0.9502, 0.0620, 0.9359, 0.6057, 0.9785, 0.6113, 0.2457, 0.5738,
         0.9570, 0.3313, 0.9213, 0.9180, 0.9779, 0.9367, 0.6670, 0.9904, 0.7626,
         0.9403, 0.9548, 0.3232, 0.2113, 0.8341, 0.7385, 0.9819, 0.9482, 0.1078,
         0.9813, 0.8629, 0.3368, 0.5079, 0.9227, 0.3869, 0.8110, 0.8664, 0.9798,
         0.9290, 0.9896, 0.0

In [21]:
y_pred = (final_A2 >= 0.5).int()

In [22]:
y_test = torch.tensor(y_test, dtype = torch.float32)

In [23]:
(y_test == y_pred).float().mean().item()

0.8424657583236694

# Kaggle Submission

In [24]:
k_test = pd.read_csv("data/test.csv")

In [25]:
k_test.drop(["id", "day"], axis = 1, inplace = True)

In [26]:
k_test.isnull().sum()

pressure         0
maxtemp          0
temparature      0
mintemp          0
dewpoint         0
humidity         0
cloud            0
sunshine         0
winddirection    1
windspeed        0
dtype: int64

In [27]:
k_test["winddirection"].fillna(k_test["winddirection"].mode()[0], inplace=True)


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  k_test["winddirection"].fillna(k_test["winddirection"].mode()[0], inplace=True)


In [28]:
k_test.isnull().sum()

pressure         0
maxtemp          0
temparature      0
mintemp          0
dewpoint         0
humidity         0
cloud            0
sunshine         0
winddirection    0
windspeed        0
dtype: int64

In [29]:
k_test_std = Std.transform(k_test)

In [30]:
k_test = torch.tensor(k_test_std, device = device, dtype = torch.float32)

In [31]:
k_test = k_test.T

## Predictions

In [32]:
test_Z1 = torch.matmul(w1, k_test) + b1
test_A1 = torch.relu(test_Z1)

test_Z2 = torch.matmul(w2, test_A1) + b2
test_A2 = torch.sigmoid(test_Z2)


In [33]:
y_test_pred = (test_A2 >= 0.5).int()

In [34]:
y_test_pred

tensor([[1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1,
         1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1,
         0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1,
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1,
         1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1,
         1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1,
         1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,
         1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0,
         1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1,
         1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1,
         1, 0, 1, 1, 1, 1, 1

In [35]:
y_test_prob = test_A2.squeeze().cpu().detach().numpy()  # Convert to NumPy for CSV


In [36]:
y_test_prob

array([9.90133405e-01, 9.85136211e-01, 9.65964556e-01, 1.71178162e-01,
       1.63404629e-01, 8.97166073e-01, 9.46128368e-01, 9.69987512e-01,
       9.58753049e-01, 8.34697187e-01, 9.84625578e-01, 5.41937649e-02,
       9.79223847e-01, 9.79398608e-01, 6.13314748e-01, 1.07249629e-03,
       9.52468276e-01, 8.47739100e-01, 6.18655756e-02, 1.15815189e-03,
       4.78692532e-01, 2.57421196e-01, 8.91842484e-01, 9.90497112e-01,
       9.47244644e-01, 3.24970663e-01, 7.86254834e-03, 9.88100052e-01,
       9.48047400e-01, 5.69992363e-01, 9.68638837e-01, 9.70900416e-01,
       8.16138923e-01, 9.62393224e-01, 9.21959400e-01, 9.44647014e-01,
       3.27669382e-01, 9.07337487e-01, 8.72606158e-01, 8.72347713e-01,
       8.20195317e-01, 9.44058239e-01, 1.06379941e-01, 9.17797625e-01,
       9.62400258e-01, 2.02885970e-01, 1.12677172e-01, 9.81524825e-01,
       1.51410788e-01, 8.76604855e-01, 9.57401216e-01, 9.70695198e-01,
       9.85504925e-01, 9.88190293e-01, 9.83716249e-01, 9.76386607e-01,
      

In [37]:
k_test = pd.read_csv("data/test.csv")

# Ensure y_test_prob is a NumPy array and has correct shape
y_test_prob = y_test_prob.flatten()  # Flatten to ensure it's 1D

# Create submission DataFrame
submission = pd.DataFrame({
    "id": k_test["id"],  # Use the row index as ID
    "rainfall": y_test_prob  # Use predicted probabilities
})

# Save the submission file
submission.to_csv("submission.csv", index=False)

print("✅ Submission file saved as 'submission.csv'. Ready for Kaggle upload! 🎯")

✅ Submission file saved as 'submission.csv'. Ready for Kaggle upload! 🎯
