# Pora na machanie rękami

In [1]:
import math
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as f

In [2]:
SEED = 999
DEVICE = 'cuda'
DATASET_PATH = "./bike_sharing_dataset/hour.csv"

In [3]:
device = torch.device(DEVICE) 

In [4]:
_df = pd.read_csv(DATASET_PATH)
_df

Unnamed: 0,instant,dteday,season,yr,mnth,hr,holiday,weekday,workingday,weathersit,temp,atemp,hum,windspeed,casual,registered,cnt
0,1,2011-01-01,1,0,1,0,0,6,0,1,0.24,0.2879,0.81,0.0000,3,13,16
1,2,2011-01-01,1,0,1,1,0,6,0,1,0.22,0.2727,0.80,0.0000,8,32,40
2,3,2011-01-01,1,0,1,2,0,6,0,1,0.22,0.2727,0.80,0.0000,5,27,32
3,4,2011-01-01,1,0,1,3,0,6,0,1,0.24,0.2879,0.75,0.0000,3,10,13
4,5,2011-01-01,1,0,1,4,0,6,0,1,0.24,0.2879,0.75,0.0000,0,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17374,17375,2012-12-31,1,1,12,19,0,1,1,2,0.26,0.2576,0.60,0.1642,11,108,119
17375,17376,2012-12-31,1,1,12,20,0,1,1,2,0.26,0.2576,0.60,0.1642,8,81,89
17376,17377,2012-12-31,1,1,12,21,0,1,1,1,0.26,0.2576,0.60,0.1642,7,83,90
17377,17378,2012-12-31,1,1,12,22,0,1,1,1,0.26,0.2727,0.56,0.1343,13,48,61


In [5]:
from torch.utils.data import random_split, DataLoader, Dataset

In [6]:
class BikeDataset(Dataset):
    def __init__(self, features, labels):
        assert len(features) == len(labels)
        self._features = torch.from_numpy(features)
        self._labels = torch.from_numpy(labels)

    def __len__(self):
        return len(self._features)

    def __getitem__(self, idx):
        return self._features[idx], self._labels[idx]

In [7]:
df = _df.drop(columns=['instant', 'dteday', 'registered', 'casual'])
dataset = BikeDataset(features=df.values[:, :-1], labels=df.values[:, -1])

In [8]:
train, test = random_split(
    dataset=dataset,
    lengths=[round(len(df) * 0.8), round(len(df) * 0.2)],
    generator=torch.Generator().manual_seed(SEED)
)

In [9]:
train

<torch.utils.data.dataset.Subset at 0x7f3d3316bf10>

In [10]:
train_loader = DataLoader(train, batch_size=128, shuffle=True)
test_loader = DataLoader(test, batch_size=1, shuffle=True)

In [11]:
class BikeModel(nn.Module):
    def __init__(self, num_inputs, num_hidden, num_outputs):
        super().__init__()
        self.linear1 = nn.Linear(num_inputs, num_hidden)
        self.linear2 = nn.Linear(num_hidden, num_hidden)
        self.linear3 = nn.Linear(num_hidden, num_hidden)
        self.linear4 = nn.Linear(num_hidden, num_outputs)
        self.act_fn = nn.ReLU()

    def forward(self, x):
        x = self.linear1(x)
        x = self.act_fn(x)
        x = self.linear2(x)
        x = self.act_fn(x)
        x = self.linear3(x)
        x = self.act_fn(x)
        x = self.linear4(x)
        return x

In [18]:
class RMSLELoss(nn.Module):
    def __init__(self):
        super().__init__()
        self.mse = nn.MSELoss()
        
    def forward(self, pred, actual):
        return torch.sqrt(self.mse(torch.log(pred + 1), torch.log(actual + 1)))

In [19]:
model = BikeModel(
    num_inputs=12,
    num_hidden=128,
    num_outputs=1
).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
loss_module = RMSLELoss()

In [20]:
model.train()
for epoch in range(250):
    for X, y in train_loader:
        X = X.float().to(device)
        y = y.float().to(device)

        pred = model(X).squeeze(dim=1).float()

        loss = loss_module(y, pred)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    if not epoch % 25:
        print(f"epoch: {epoch}, loss: {loss.item()}")

epoch: 0, loss: 2.882294178009033
epoch: 25, loss: 0.8065440654754639
epoch: 50, loss: 0.7374131679534912
epoch: 75, loss: 0.5801187753677368
epoch: 100, loss: 0.5216390490531921
epoch: 125, loss: 0.4501436948776245
epoch: 150, loss: 0.5113668441772461
epoch: 175, loss: 0.5439280271530151
epoch: 200, loss: 0.5051448941230774
epoch: 225, loss: 0.48754459619522095


In [27]:
model.eval()
with torch.no_grad():
    rmsle = torch.mean(torch.Tensor([loss_module.forward(model(X.float().to(device)).squeeze(dim=1).float(), y.float().to(device)) for X, y in test_loader]))
    
print(f"RMSLE = {rmsle:.5f}")

RMSLE = 0.37293
