In [95]:
import json

import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data
import torch.nn.functional as F

import numpy as np
import pandas as pd

# Data import

In [96]:
with open("te-data.json", "r") as filehandle:
    data = json.load(filehandle)

# Data Parsing

In [97]:
data_parsed = [entry.replace(", ", ",") for entry in data]
data_parsed = [entry.split(",") for entry in data_parsed]
data_parsed = pd.DataFrame.from_dict(data_parsed)
data_parsed.columns = ["timestamp", "x", "temp", "humid"]
# Convert temp and humid to numeric
data_parsed["temp"] = data_parsed["temp"].astype(float)
data_parsed["humid"] = data_parsed["humid"].astype(float)
# Last row is empty
data_parsed = data_parsed[:-1]
# Remove X
data_parsed.drop(columns="x", inplace=True)
# Convert to datetime
data_parsed["timestamp"] = pd.to_datetime(data_parsed["timestamp"])


# Data Stats

In [98]:
# Time range
print(data_parsed["timestamp"].min(), data_parsed["timestamp"].max())
# Mean Temp per weekday
print(data_parsed.groupby(
    [data_parsed["timestamp"].dt.weekday])["temp"].mean())
# Mean Humid per weekday
print(data_parsed.groupby(
    [data_parsed["timestamp"].dt.weekday])["humid"].mean())

2022-11-21 11:00:00 2022-11-30 11:55:50
timestamp
0    18.461081
1    18.497826
2    18.353548
3    18.720833
4    18.228333
5    17.581667
6    16.770417
Name: temp, dtype: float64
timestamp
0    57.836216
1    56.195435
2    55.726452
3    56.201667
4    56.857500
5    58.713750
6    58.419583
Name: humid, dtype: float64


# Feature Enhancement

In [99]:
# Add hour
data_parsed["hour"] = data_parsed["timestamp"].dt.hour
# Add day of year
data_parsed["day_of_year"] = data_parsed["timestamp"].dt.day_of_year
# Add weekday
data_parsed["weekday"] = data_parsed["timestamp"].dt.weekday

# ML

In [197]:
def training_loop(n_epochs, optimiser, model, loss_fn, train_dl, val_dl):
    for epoch in range(1, n_epochs + 1):
        model.train()
        for i, data in enumerate(train_dl):
            optimiser.zero_grad() # set gradients to zero
            inputs, targets = data
            output_train = model(inputs) # forwards pass
            loss_train = loss_fn(output_train, targets) # calculate loss
            loss_train.backward() # backwards pass
            optimiser.step() # update model parameters

        model.eval()
        for i, data in enumerate(val_dl):
            inputs, targets = data
            output_val = model(inputs)
            loss_val = loss_fn(output_val, targets)
        if epoch == 1 or epoch % 100 == 0:
            print(f"Epoch {epoch}, Training loss {loss_train.item():.4f},"
                f" Validation loss {loss_val.item():.4f}")

In [198]:
class StartNet(nn.Module):

    def __init__(self):
        super(StartNet, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(3, 20),
            nn.Dropout(),
            nn.Sigmoid(),
            nn.Linear(20, 1))

    def forward(self, x):
        x = self.model(x)
        return x

startnet = StartNet()
optimizer = optim.Adam(startnet.parameters(), lr=0.001)

In [199]:
# Train Val Split
train = data_parsed[0:int(len(data_parsed)/1.5)]
val = data_parsed[int(len(data_parsed)/1.5):]
# Full Training
train = data_parsed

class MyDataset(torch.utils.data.Dataset):

  def __init__(self, df):
 
    x = df[["hour", "weekday", "day_of_year"]].values
    y = df[["temp"]].values

    self.x_train=torch.tensor(x, dtype=torch.float32)
    self.y_train=torch.tensor(y, dtype=torch.float32)

  def __len__(self):
    return len(self.y_train)
  
  def __getitem__(self, idx):
    return self.x_train[idx], self.y_train[idx]

In [200]:
train_dl  = torch.utils.data.DataLoader(MyDataset(train), batch_size=10, shuffle=False)
val_dl    = torch.utils.data.DataLoader(MyDataset(val), batch_size=10, shuffle=False)

In [201]:
training_loop(
    n_epochs = 900, 
    optimiser = optimizer,
    model = startnet,
    loss_fn = nn.MSELoss(),
    train_dl = train_dl,
    val_dl = val_dl,
    )

Epoch 1, Training loss 327.1317, Validation loss 331.6660
Epoch 100, Training loss 6.2673, Validation loss 21.8608
Epoch 200, Training loss 2.8396, Validation loss 2.6661
Epoch 300, Training loss 1.6324, Validation loss 1.0182
Epoch 400, Training loss 4.0452, Validation loss 0.3526
Epoch 500, Training loss 3.4077, Validation loss 0.7254
Epoch 600, Training loss 1.7778, Validation loss 1.4191
Epoch 700, Training loss 5.9741, Validation loss 2.0398
Epoch 800, Training loss 2.3051, Validation loss 2.4255
Epoch 900, Training loss 2.6402, Validation loss 2.6289


In [202]:
for name, param in startnet.named_parameters():
    print(name, param)

model.0.weight Parameter containing:
tensor([[-0.1369,  0.0197, -0.4656],
        [ 0.0209, -0.0879, -0.0935],
        [-0.1078,  0.1304, -0.5757],
        [ 0.1630, -0.0057, -0.2906],
        [-0.2905,  0.1107, -0.1901],
        [-0.4044, -0.3209,  0.0550],
        [-0.3785,  0.5840,  0.0471],
        [-0.3668, -0.5328, -0.5008],
        [-0.2334, -0.5352,  0.3599],
        [ 0.2100,  0.2421, -0.5394],
        [ 0.5444, -0.2359,  0.3346],
        [-0.5390, -0.0543,  0.1871],
        [ 0.0931,  0.3250, -0.5007],
        [-0.2895,  0.0629, -0.1130],
        [ 0.3300,  0.1307, -0.5312],
        [-0.3485, -0.5219,  0.0743],
        [-0.4587,  0.2642, -0.3691],
        [-0.2386,  0.3772,  0.1658],
        [-0.1272, -0.4160, -0.4054],
        [ 0.4760, -0.1987,  0.3894]], requires_grad=True)
model.0.bias Parameter containing:
tensor([-0.5408,  0.5670,  0.3512,  0.0693,  0.1928,  0.4445,  0.4591,  0.5208,
         0.0034, -0.1035, -0.2588, -0.3055,  0.0406,  0.5243, -0.0549, -0.5721,
       

In [203]:
startnet.eval()
for i in range(12, 23):
    for j in range(3, 6):
        for k in range(310, 320):
            print(startnet(torch.tensor([float(i), j, k])).tolist()[0])

19.782838821411133
19.782840728759766
19.7828426361084
19.7828426361084
19.78284454345703
19.78284454345703
19.782846450805664
19.782848358154297
19.782848358154297
19.782848358154297
19.7828369140625
19.7828369140625
19.782838821411133
19.782840728759766
19.7828426361084
19.7828426361084
19.78284454345703
19.782846450805664
19.782846450805664
19.782848358154297
19.78282928466797
19.7828311920166
19.782833099365234
19.782835006713867
19.7828369140625
19.782838821411133
19.782838821411133
19.782840728759766
19.7828426361084
19.78284454345703
19.782825469970703
19.782827377319336
19.78282928466797
19.7828311920166
19.782833099365234
19.782835006713867
19.7828369140625
19.782838821411133
19.782840728759766
19.782840728759766
19.78282356262207
19.782825469970703
19.782827377319336
19.782827377319336
19.7828311920166
19.782833099365234
19.782835006713867
19.7828369140625
19.7828369140625
19.782838821411133
19.782812118530273
19.78281593322754
19.782817840576172
19.782821655273438
19.7828235

In [204]:
torch.save(startnet.state_dict(), "startnet.model") 