https://www.kaggle.com/jsphyg/weather-dataset-rattle-package

###Preprocessing

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [3]:
df = pd.read_csv("/content/drive/MyDrive/weatherAUS - weatherAUS.csv")

In [4]:
df['RainToday'].replace({'No': 0, 'Yes': 1}, inplace = True)
df['RainTomorrow'].replace({'No': 0, 'Yes': 1}, inplace = True)
df = df.dropna(how='any')
X = df[['Rainfall', 'Humidity3pm', 'RainToday', 'Pressure9am']]
y = df[['RainTomorrow']]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
torch.manual_seed(0)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['RainToday'].replace({'No': 0, 'Yes': 1}, inplace = True)
  df['RainToday'].replace({'No': 0, 'Yes': 1}, inplace = True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['RainTomorrow'].replace({'No': 0, 'Yes': 1}, inplace = True)
  df['RainTomorrow'].replace({'No': 0, 'Yes'

<torch._C.Generator at 0x7e69fa7dccf0>

###PyTorch Model

In [5]:
X_train = torch.from_numpy(X_train.to_numpy()).float()
y_train = torch.squeeze(torch.from_numpy(y_train.to_numpy()).float())
X_test = torch.from_numpy(X_test.to_numpy()).float()
y_test = torch.squeeze(torch.from_numpy(y_test.to_numpy()).float())

In [6]:
class Net(nn.Module):
  def __init__(self, n_features):
    super(Net, self).__init__()
    self.fc1 = nn.Linear(n_features, 5)
    self.fc2 = nn.Linear(5, 3) #5 nodes to 3 #nodes
    self.fc3 = nn.Linear(3, 1)
  def forward(self, x):
    x = F.relu(self.fc1(x))
    x = F.relu(self.fc2(x))
    return F.sigmoid(self.fc3(x))

In [7]:
net = Net(X_train.shape[1])
print(net)

Net(
  (fc1): Linear(in_features=4, out_features=5, bias=True)
  (fc2): Linear(in_features=5, out_features=3, bias=True)
  (fc3): Linear(in_features=3, out_features=1, bias=True)
)


In [8]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [9]:
num_epochs = 1000
net = net.to(device)
criterion = nn.BCELoss().to(device)
optimizer = optim.Adam(net.parameters(), lr=0.001)

In [10]:
X_train = X_train.to(device)
y_train = y_train.to(device)
X_test = X_test.to(device)
y_test = y_test.to(device)

In [11]:
def calculate_accuracy(y_true, y_pred):
  predicted = y_pred.ge(.5).view(-1) #set to 0 if < 0.5, 1 if >= 0.5
  return (y_true == predicted).sum().float() / len(y_true)

def round_tensor(t, decimal_places=3):
  return round(t.item(), decimal_places)

###Training

In [12]:
for epoch in range(num_epochs):
    y_pred = net(X_train) ##important
    y_pred = torch.squeeze(y_pred)
    train_loss = criterion(y_pred, y_train) ##important
    if epoch % 100 == 0:
      train_acc = calculate_accuracy(y_train, y_pred)
      y_test_pred = net(X_test)
      y_test_pred = torch.squeeze(y_test_pred)
      test_loss = criterion(y_test_pred, y_test)
      test_acc = calculate_accuracy(y_test, y_test_pred)

      model_file = {'model': Net(4),
          'state_dict': net.state_dict(),
          'optimizer' : optimizer.state_dict()}

      torch.save(model_file, 'model%d.pth'% epoch)

      print(
f'''epoch {epoch}
Train set - loss: {round_tensor(train_loss)}, accuracy: {round_tensor(train_acc)}
Test  set - loss: {round_tensor(test_loss)}, accuracy: {round_tensor(test_acc)}
''')
    optimizer.zero_grad() ##important, set the gradient to 0 so gradients don't accumulate
    train_loss.backward() ##important, backward propagation
    optimizer.step() ##important, updates the optimizer

epoch 0
Train set - loss: 23.981, accuracy: 0.76
Test  set - loss: 22.182, accuracy: 0.778

epoch 100
Train set - loss: 23.981, accuracy: 0.76
Test  set - loss: 22.182, accuracy: 0.778

epoch 200
Train set - loss: 23.981, accuracy: 0.76
Test  set - loss: 22.182, accuracy: 0.778

epoch 300
Train set - loss: 23.981, accuracy: 0.76
Test  set - loss: 22.182, accuracy: 0.778

epoch 400
Train set - loss: 23.981, accuracy: 0.76
Test  set - loss: 22.182, accuracy: 0.778

epoch 500
Train set - loss: 23.981, accuracy: 0.76
Test  set - loss: 22.182, accuracy: 0.778

epoch 600
Train set - loss: 23.981, accuracy: 0.76
Test  set - loss: 22.182, accuracy: 0.778

epoch 700
Train set - loss: 23.981, accuracy: 0.76
Test  set - loss: 22.182, accuracy: 0.778

epoch 800
Train set - loss: 23.981, accuracy: 0.76
Test  set - loss: 22.182, accuracy: 0.778

epoch 900
Train set - loss: 23.981, accuracy: 0.76
Test  set - loss: 22.182, accuracy: 0.778



###Inference

In [13]:
def load_model(filepath):
    model_path = torch.load(filepath)
    model = model_path['model']
    model.load_state_dict(model_path['state_dict'])
    for parameter in model.parameters():
        parameter.requires_grad = False

    model.eval() ##super important
    return model

model = load_model('model900.pth')

  model_path = torch.load(filepath)


In [14]:
model.cuda() #if on GPU, or don't need this line

Net(
  (fc1): Linear(in_features=4, out_features=5, bias=True)
  (fc2): Linear(in_features=5, out_features=3, bias=True)
  (fc3): Linear(in_features=3, out_features=1, bias=True)
)

In [15]:
output = model(X_test)

In [16]:
y_pred = torch.squeeze(output)

In [17]:
calculate_accuracy(y_test, y_pred)

tensor(0.7782, device='cuda:0')

In [18]:
model.state_dict()

OrderedDict([('fc1.weight',
              tensor([[-0.0037,  0.2682, -0.4115, -0.3680],
                      [-0.1926,  0.1341, -0.0099,  0.3964],
                      [-0.0444,  0.1323, -0.1511, -0.0983],
                      [-0.4777, -0.3311, -0.2061,  0.0185],
                      [ 0.1977,  0.3000, -0.3390, -0.2177]], device='cuda:0')),
             ('fc1.bias',
              tensor([ 0.1816,  0.4152, -0.1029,  0.3742, -0.0806], device='cuda:0')),
             ('fc2.weight',
              tensor([[ 0.0473,  0.4049, -0.4149, -0.2815, -0.1132],
                      [-0.1743,  0.3864, -0.2899, -0.2059, -0.3124],
                      [-0.4188, -0.2611,  0.3844,  0.1996,  0.2168]], device='cuda:0')),
             ('fc2.bias',
              tensor([ 0.0235, -0.2293,  0.0757], device='cuda:0')),
             ('fc3.weight',
              tensor([[-0.5391, -0.4172, -0.2976]], device='cuda:0')),
             ('fc3.bias', tensor([0.3643], device='cuda:0'))])