In [1]:
import json

import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data
import torch.nn.functional as F

import numpy as np
import pandas as pd

# Data import

In [2]:
with open("example-data.json", "r") as filehandle:
    data = json.load(filehandle)

# Data Parsing

In [3]:
data_parsed = [entry.replace(", ", ",") for entry in data]
data_parsed = [entry.split(",") for entry in data_parsed]
data_parsed = pd.DataFrame.from_dict(data_parsed)
data_parsed.columns = ["timestamp", "x", "temp", "humid"]
# Convert temp and humid to numeric
data_parsed["temp"] = data_parsed["temp"].astype(float)
data_parsed["humid"] = data_parsed["humid"].astype(float)
# Last row is empty
data_parsed = data_parsed[:-1]
# Remove X
data_parsed.drop(columns="x", inplace=True)
# Convert to datetime
data_parsed["timestamp"] = pd.to_datetime(data_parsed["timestamp"])


# Data Stats

In [4]:
# Time range
print(data_parsed["timestamp"].min(), data_parsed["timestamp"].max())
# Mean Temp per weekday
print(data_parsed.groupby(
    [data_parsed["timestamp"].dt.weekday])["temp"].mean())
# Mean Humid per weekday
print(data_parsed.groupby(
    [data_parsed["timestamp"].dt.weekday])["humid"].mean())

2022-11-21 11:00:00 2022-11-29 07:00:00
timestamp
0    18.461081
1    18.409063
2    18.484167
3    18.720833
4    18.228333
5    17.581667
6    16.770417
Name: temp, dtype: float64
timestamp
0    57.836216
1    55.476250
2    54.791250
3    56.201667
4    56.857500
5    58.713750
6    58.419583
Name: humid, dtype: float64


# Feature Enhancement

In [5]:
# Add hour
data_parsed["hour"] = data_parsed["timestamp"].dt.hour
# Add day of year
data_parsed["day_of_year"] = data_parsed["timestamp"].dt.day_of_year
# Add weekday
data_parsed["weekday"] = data_parsed["timestamp"].dt.weekday

# ML

In [6]:
def training_loop(n_epochs, optimiser, model, loss_fn, train_dl, val_dl):
    for epoch in range(1, n_epochs + 1):
        model.train()
        for i, data in enumerate(train_dl):
            optimiser.zero_grad() # set gradients to zero
            inputs, targets = data
            output_train = model(inputs) # forwards pass
            loss_train = loss_fn(output_train, targets) # calculate loss
            loss_train.backward() # backwards pass
            optimiser.step() # update model parameters

        model.eval()
        for i, data in enumerate(val_dl):
            inputs, targets = data
            output_val = model(inputs)
            loss_val = loss_fn(output_val, targets)
        if epoch == 1 or epoch % 100 == 0:
            print(f"Epoch {epoch}, Training loss {loss_train.item():.4f},"
                f" Validation loss {loss_val.item():.4f}")

In [7]:
class StartNet(nn.Module):

    def __init__(self):
        super(StartNet, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(3, 15),
            nn.ReLU(),
            nn.Linear(15, 1))

    def forward(self, x):
        x = self.model(x)
        return x

startnet = StartNet()
optimizer = optim.Adam(startnet.parameters(), lr=0.001)

In [8]:
# Train Val Split
train = data_parsed[0:int(len(data_parsed)/1.5)]
val = data_parsed[int(len(data_parsed)/1.5):]
# Full Training
train = data_parsed

class MyDataset(torch.utils.data.Dataset):

  def __init__(self, df):
 
    x = df[["hour", "weekday", "day_of_year"]].values
    y = df[["temp"]].values

    self.x_train=torch.tensor(x, dtype=torch.float32)
    self.y_train=torch.tensor(y, dtype=torch.float32)

  def __len__(self):
    return len(self.y_train)
  
  def __getitem__(self, idx):
    return self.x_train[idx], self.y_train[idx]

In [9]:
train_dl  = torch.utils.data.DataLoader(MyDataset(train), batch_size=10, shuffle=False)
val_dl    = torch.utils.data.DataLoader(MyDataset(val), batch_size=10, shuffle=False)

In [13]:
training_loop(
    n_epochs = 700, 
    optimiser = optimizer,
    model = startnet,
    loss_fn = nn.MSELoss(),
    train_dl = train_dl,
    val_dl = val_dl,
    )

Epoch 1, Training loss 2.0649, Validation loss 2.2547
Epoch 100, Training loss 1.9485, Validation loss 2.2322
Epoch 200, Training loss 1.7902, Validation loss 2.1652
Epoch 300, Training loss 1.7415, Validation loss 2.1415
Epoch 400, Training loss 1.7457, Validation loss 2.1607
Epoch 500, Training loss 1.8906, Validation loss 2.3163
Epoch 600, Training loss 1.8170, Validation loss 2.1674
Epoch 700, Training loss 1.3048, Validation loss 1.5797


In [15]:
startnet.eval()
a = startnet(torch.tensor([15.0, 6.0, 330]))
print(round(a.tolist()[0]))

16


In [16]:
torch.save(startnet.state_dict(), "startnet.model") 