# Import libraries and data

In [1]:
import pandas as pd
import torch.nn as nn
df = pd.read_csv('./data/data.csv')
df_eval = pd.read_csv('./data/evaluation_data.csv')

In [2]:
df.head()

Unnamed: 0,instant,dteday,season,yr,mnth,hr,holiday,weekday,workingday,weathersit,temp,atemp,hum,windspeed,casual,registered,cnt
0,1,2011-01-01,1,0,1,0,0,6,0,1,0.24,0.2879,0.81,0.0,3,13,16
1,2,2011-01-01,1,0,1,1,0,6,0,1,0.22,0.2727,0.8,0.0,8,32,40
2,3,2011-01-01,1,0,1,2,0,6,0,1,0.22,0.2727,0.8,0.0,5,27,32
3,4,2011-01-01,1,0,1,3,0,6,0,1,0.24,0.2879,0.75,0.0,3,10,13
4,5,2011-01-01,1,0,1,4,0,6,0,1,0.24,0.2879,0.75,0.0,0,1,1


In [9]:
train = df.sample(frac=0.8,random_state=200) #random state is a seed value
valid = df.drop(train.index)
test = df_eval

X_train = train.drop(columns = ['casual','registered', 'cnt', 'instant', 'dteday'])
Y_train = train['cnt']

X_valid = valid.drop(columns = ['casual','registered', 'cnt', 'instant', 'dteday'])
Y_valid= valid['cnt']

X_test = test.drop(columns = ['dteday'])
X_test.head()


Unnamed: 0,season,yr,mnth,hr,holiday,weekday,workingday,weathersit,temp,atemp,hum,windspeed
0,1,0,1,0,0,4,1,1,0.26,0.2273,0.56,0.3881
1,1,0,1,1,0,4,1,1,0.26,0.2727,0.56,0.0
2,1,0,1,2,0,4,1,1,0.26,0.2727,0.56,0.0
3,1,0,1,3,0,4,1,1,0.26,0.2576,0.56,0.1642
4,1,0,1,4,0,4,1,1,0.26,0.2576,0.56,0.1642


In [10]:
class SimpleClassifier(nn.Module):

    def __init__(self, num_inputs, num_h1, num_h2, num_outputs):
        super().__init__()
        # Initialize the modules we need to build the network
        self.act_fn = nn.ReLU()
        
        self.linear1 = nn.Linear(num_inputs, num_h1)
        self.hidden1 = nn.Linear(num_h1, num_h2)
        self.linear2 = nn.Linear(num_h2, num_outputs)

    def forward(self, x):
        # Perform the calculation of the model to determine the prediction
        x = self.linear1(x)
        x = self.act_fn(x)
        x = self.hidden1(x)
        x = self.act_fn(x)
        x = self.linear2(x)
        return x

In [11]:
import torch.utils.data as data
import torch.optim as optim

In [6]:
X_train.head()

Unnamed: 0,season,yr,mnth,hr,holiday,weekday,workingday,weathersit,temp,atemp,hum,windspeed
4396,4,0,10,5,0,6,0,1,0.42,0.4242,0.71,0.1642
6205,1,1,2,19,0,2,1,2,0.36,0.3485,0.5,0.1642
2558,2,0,6,12,0,2,1,1,0.64,0.6212,0.41,0.3582
7225,2,1,4,10,0,4,1,1,0.48,0.4697,0.63,0.0896
3305,3,0,8,15,0,0,0,3,0.72,0.697,0.74,0.2985


# Create datasets

In [12]:
import torch
import numpy as np

train_dataset = data.TensorDataset(torch.from_numpy(np.array(X_train)), torch.from_numpy(np.array(Y_train)))
valid_dataset = data.TensorDataset(torch.from_numpy(np.array(X_valid)), torch.from_numpy(np.array(Y_valid)))

train_data_loader = data.DataLoader(train_dataset, batch_size=128, shuffle=True)

# Train model

In [27]:
model = SimpleClassifier(12, 1000, 900, 1)
MSELoss = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

model.train()

for epoch in range(100):  # loop over the dataset multiple times
    
    sum_loss = 0

    for inputs, labels in train_data_loader:
        # get the inputs; data is a list of [inputs, labels]

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        preds = model(inputs.float())
        preds = preds.squeeze(dim=1)
        loss = MSELoss(preds, labels.float())
        
        loss.backward()
        optimizer.step()

        sum_loss += loss
    
    print(epoch, sum_loss / X_train.shape[0])


0 tensor(174.4543, grad_fn=<DivBackward0>)
1 tensor(125.6260, grad_fn=<DivBackward0>)
2 tensor(107.2926, grad_fn=<DivBackward0>)
3 tensor(98.8868, grad_fn=<DivBackward0>)
4 tensor(98.7203, grad_fn=<DivBackward0>)
5 tensor(90.6446, grad_fn=<DivBackward0>)
6 tensor(83.6105, grad_fn=<DivBackward0>)
7 tensor(83.9948, grad_fn=<DivBackward0>)
8 tensor(71.5142, grad_fn=<DivBackward0>)
9 tensor(73.7397, grad_fn=<DivBackward0>)
10 tensor(69.9939, grad_fn=<DivBackward0>)
11 tensor(63.5013, grad_fn=<DivBackward0>)
12 tensor(59.6870, grad_fn=<DivBackward0>)
13 tensor(61.4284, grad_fn=<DivBackward0>)
14 tensor(70.8249, grad_fn=<DivBackward0>)
15 tensor(57.5080, grad_fn=<DivBackward0>)
16 tensor(52.6479, grad_fn=<DivBackward0>)
17 tensor(50.7516, grad_fn=<DivBackward0>)
18 tensor(42.9205, grad_fn=<DivBackward0>)
19 tensor(41.8314, grad_fn=<DivBackward0>)
20 tensor(39.4175, grad_fn=<DivBackward0>)
21 tensor(41.0600, grad_fn=<DivBackward0>)
22 tensor(37.6238, grad_fn=<DivBackward0>)
23 tensor(36.1429,

In [12]:
X_test.dtypes

season          int64
yr              int64
mnth            int64
hr              int64
holiday         int64
weekday         int64
workingday      int64
weathersit      int64
temp          float64
atemp         float64
hum           float64
windspeed     float64
dtype: object

In [28]:
all_preds = []
with torch.no_grad(): # Deactivate gradients for the following code
    for data_inputs in X_test.values:
        preds = model(torch.tensor(data_inputs).float())
        preds = preds.squeeze(dim=0)
        all_preds.append(preds)

pd.DataFrame(all_preds).to_csv("output.csv", index=False, header=None)
        

# Check predictions quality

In [29]:
def rmsle(y_true,y_pred):
    n = len(y_true)
    msle = np.mean([(np.log(max(y_pred[i],0) + 1) - np.log(y_true[i] + 1)) ** 2.0 for i in range(n)])
    return np.sqrt(msle)

In [30]:
all_inputs = []
all_labels = []

with torch.no_grad():
    for inputs, labels in valid_dataset:
        preds = model(inputs.float())
        all_inputs.append(float(preds))
        all_labels.append(float(labels))

print(rmsle(np.array(all_labels), np.array(all_inputs)))

0.44132054059388176
