In [40]:
import pandas as pd
import torch
import numpy as np
from torch.utils import data
from sklearn.model_selection import train_test_split

df = pd.read_csv('data.csv', delimiter=",")

df = df.drop(['instant', 'dteday', 'casual', 'registered'], axis=1)

# Split the data into train and test sets
train, test = train_test_split(df, test_size=0.2, random_state=42)

train_dataset = data.TensorDataset(torch.tensor(train.values[:,:-1]), torch.tensor(train.values[:,-1]))
test_dataset = data.TensorDataset(torch.tensor(test.values[:,:-1]), torch.tensor(test.values[:,-1]))
train_data_loader = data.DataLoader(train_dataset, batch_size=128, shuffle=False)

In [41]:
import torch.nn as nn

class Evaluator(nn.Module):

    def __init__(self, num_inputs, num_hidden, num_outputs):
        super().__init__()
        # Initialize the modules we need to build the network
        self.linear1 = nn.Linear(num_inputs, num_hidden)
        self.batch_norm = nn.BatchNorm1d(num_hidden)
        self.act_fn = nn.LeakyReLU()
        self.linear2 = nn.Linear(num_hidden, num_hidden)
        self.batch_norm2 = nn.BatchNorm1d(num_hidden)
        self.linear3 = nn.Linear(num_hidden, num_outputs)

    def forward(self, x):
        # Perform the calculation of the model to determine the prediction
        x = self.linear1(x)
        x = self.batch_norm(x)
        x = self.act_fn(x)
        x = self.linear2(x)
        x = self.batch_norm2(x)
        x = self.act_fn(x)
        x = self.linear3(x)
        return x

In [42]:
model = Evaluator(num_inputs=12, num_hidden=64, num_outputs=1)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

optimizer = torch.optim.Adam(model.parameters(), lr = 0.001)

loss_module = nn.MSELoss()

model.train() 

# Training loop
for epoch in range(100):
    for data_inputs, data_labels in train_data_loader:

        ## Step 1: Move input data to device (only strictly necessary if we use GPU)
        data_inputs = data_inputs.float().to(device)
        data_labels = data_labels.float().to(device)

        ## Step 2: Run the model on the input data
        preds = model(data_inputs)
        preds = preds.squeeze(dim=1) # Output is [Batch size, 1], but we want [Batch size]

        ## Step 3: Calculate the loss
        loss = loss_module(preds, data_labels)

        ## Step 4: Perform backpropagation
        # Before calculating the gradients, we need to ensure that they are all zero. 
        # The gradients would not be overwritten, but actually added to the existing ones.
        optimizer.zero_grad() 
        # Perform backpropagation
        loss.backward()

        ## Step 5: Update the parameters
        optimizer.step()
    print(f"Epoch: {epoch}, loss: {loss.item():.3}")



Epoch: 0, loss: 5.03e+04
Epoch: 1, loss: 4.86e+04
Epoch: 2, loss: 4.57e+04
Epoch: 3, loss: 4.19e+04
Epoch: 4, loss: 3.76e+04
Epoch: 5, loss: 3.33e+04
Epoch: 6, loss: 2.86e+04
Epoch: 7, loss: 2.45e+04
Epoch: 8, loss: 2e+04
Epoch: 9, loss: 1.56e+04
Epoch: 10, loss: 1.2e+04
Epoch: 11, loss: 8.83e+03
Epoch: 12, loss: 6.73e+03
Epoch: 13, loss: 4.31e+03
Epoch: 14, loss: 2.97e+03
Epoch: 15, loss: 1.98e+03
Epoch: 16, loss: 1.45e+03
Epoch: 17, loss: 1.3e+03
Epoch: 18, loss: 1.08e+03
Epoch: 19, loss: 1.01e+03
Epoch: 20, loss: 4.45e+02
Epoch: 21, loss: 4.04e+02
Epoch: 22, loss: 2.43e+02
Epoch: 23, loss: 2.22e+02
Epoch: 24, loss: 1.88e+02
Epoch: 25, loss: 2.18e+02
Epoch: 26, loss: 1.41e+02
Epoch: 27, loss: 1.52e+02
Epoch: 28, loss: 1.45e+02
Epoch: 29, loss: 1.21e+02
Epoch: 30, loss: 1.21e+02
Epoch: 31, loss: 91.4
Epoch: 32, loss: 90.8
Epoch: 33, loss: 92.2
Epoch: 34, loss: 62.6
Epoch: 35, loss: 68.3
Epoch: 36, loss: 50.2
Epoch: 37, loss: 53.3
Epoch: 38, loss: 34.9
Epoch: 39, loss: 31.5
Epoch: 40, 

In [43]:
def rmsle(y_true,y_pred):
    n = len(y_true)
    msle = np.mean([(np.log(max(y_pred[i],0) + 1) - np.log(y_true[i] + 1)) ** 2.0 for i in range(n)])
    return np.sqrt(msle)

data_inputs, data_labels = test_dataset.tensors
data_inputs = data_inputs.float().to(device)
data_labels = data_labels.float().to(device)

model.eval()
with torch.no_grad():
    preds = model(data_inputs)
    preds = preds.squeeze(dim=1)

preds = preds.cpu().numpy()
data_labels = data_labels.cpu().numpy()

loss = rmsle(data_labels, preds)
print(f"Loss on test data: {loss:.3}")

Loss on test data: 0.613


In [None]:
targets = pd.read_csv("evaluation_data.csv", delimiter=",")
targets = targets.drop(['dteday'], axis=1)
inputs = torch.tensor(targets.values, dtype=torch.float32)
inputs = inputs.to(device)

model.eval()

with torch.no_grad():
    outputs = model(inputs).squeeze()

pd.DataFrame(outputs.detach().cpu()).to_csv("results.csv", index=False, header=None)