In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import torch
from torch import nn
from torch.utils.data import TensorDataset, DataLoader
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score,recall_score,roc_auc_score,confusion_matrix,precision_score
import warnings

warnings.filterwarnings('ignore')

In [2]:
df = pd.read_csv('data/kerala.csv')
df.head()

Unnamed: 0,SUBDIVISION,YEAR,JAN,FEB,MAR,APR,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC,ANNUAL RAINFALL,FLOODS
0,KERALA,1901,28.7,44.7,51.6,160.0,174.7,824.6,743.0,357.5,197.7,266.9,350.8,48.4,3248.6,YES
1,KERALA,1902,6.7,2.6,57.3,83.9,134.5,390.9,1205.0,315.8,491.6,358.4,158.3,121.5,3326.6,YES
2,KERALA,1903,3.2,18.6,3.1,83.6,249.7,558.6,1022.5,420.2,341.8,354.1,157.0,59.0,3271.2,YES
3,KERALA,1904,23.7,3.0,32.2,71.5,235.7,1098.2,725.5,351.8,222.7,328.1,33.9,3.3,3129.7,YES
4,KERALA,1905,1.2,22.3,9.4,105.9,263.3,850.2,520.5,293.6,217.2,383.5,74.4,0.2,2741.6,NO


In [3]:
df.drop(['SUBDIVISION', 'YEAR'], axis=1, inplace=True)

In [4]:
df.FLOODS = df.FLOODS.map({'YES': 1, 'NO': 0})

In [5]:
X, y = df.drop('FLOODS', axis=1), df['FLOODS']

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False, random_state=1)

In [7]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train.to_numpy())
X_train_scaled = torch.from_numpy(X_train_scaled)

train_data = TensorDataset(X_train_scaled, torch.tensor(np.array(y_train)))
train_loader = DataLoader(train_data, batch_size=4, shuffle=True)
 
test_data = TensorDataset(torch.from_numpy(scaler.transform(X_test.to_numpy())), torch.tensor(np.array(y_test)))
test_loader = DataLoader(test_data, batch_size=len(y_test), shuffle=False) 

In [8]:

class Net(nn.Module):
  def __init__(self):
    super(Net, self).__init__()
    self.fc1 = nn.Linear(13, 26, dtype=float)
    self.relu = nn.ReLU()
    self.fc2 = nn.Linear(26, 1, dtype=float)

  def forward(self, x):
    x = self.fc1(x)
    x = self.relu(x)
    x = self.fc2(x)
    output = torch.sigmoid(x)
    return output

model = Net()

criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

for epoch in range(10):
  for i, (data, target) in enumerate(train_loader):
    output = model(data)
    target = torch.tensor(target.float(), dtype=torch.float64)
    loss = criterion(output, (target.view(-1, 1)))

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if (i+1) % 10 == 0:
      print(f'Epoch [{epoch+1}/{10}], Step [{i+1}/{len(train_loader)}], Loss: {loss.item():.4f}')

with torch.no_grad():
  for data, target in test_loader:
    outputs = model(data)
    predictions = (outputs > 0.5).float()

predictions = predictions.detach().numpy()
print("Predictions:", predictions.flatten())


Epoch [1/10], Step [10/24], Loss: 0.5282
Epoch [1/10], Step [20/24], Loss: 0.3603
Epoch [2/10], Step [10/24], Loss: 0.2309
Epoch [2/10], Step [20/24], Loss: 0.1173
Epoch [3/10], Step [10/24], Loss: 0.2869
Epoch [3/10], Step [20/24], Loss: 0.0739
Epoch [4/10], Step [10/24], Loss: 0.0375
Epoch [4/10], Step [20/24], Loss: 0.0314
Epoch [5/10], Step [10/24], Loss: 0.0407
Epoch [5/10], Step [20/24], Loss: 0.0316
Epoch [6/10], Step [10/24], Loss: 0.0340
Epoch [6/10], Step [20/24], Loss: 0.0012
Epoch [7/10], Step [10/24], Loss: 0.0035
Epoch [7/10], Step [20/24], Loss: 0.0027
Epoch [8/10], Step [10/24], Loss: 0.0010
Epoch [8/10], Step [20/24], Loss: 0.0065
Epoch [9/10], Step [10/24], Loss: 0.0004
Epoch [9/10], Step [20/24], Loss: 0.0063
Epoch [10/10], Step [10/24], Loss: 0.0194
Epoch [10/10], Step [20/24], Loss: 0.0027
Predictions: [1. 0. 1. 1. 0. 0. 0. 0. 0. 1. 1. 1. 1. 0. 0. 1. 1. 0. 1. 1. 0. 0. 0. 1.]


In [9]:
y_pred = predictions
print(accuracy_score(y_test, y_pred))
print(precision_score(y_test, y_pred))
print(recall_score(y_test, y_pred))
print(roc_auc_score(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))

0.9166666666666666
0.8333333333333334
1.0
0.9285714285714286
[[12  2]
 [ 0 10]]


In [10]:
inference = torch.from_numpy(np.array([[
 29.1,
 52.1,
 48.6,
 116.4,
 183.8,
 625.4,
 1048.5,
 1398.9,
 423.6,
 356.1,
 125.4,
 65.1,
 4473.0]]))
with torch.no_grad():
    print(pd.DataFrame(model(inference)).iloc[:,0].map({1:'YES', 0:'NO'}))

0    YES
Name: 0, dtype: object
