## Load in Packages and UCI Bike Share Data


UCI Data can be found [here](https://archive.ics.uci.edu/dataset/275/bike+sharing+dataset).

Notebook inspired by [Hands-On Machine Learning with Scikit-Learn and PyTorch](https://www.oreilly.com/library/view/hands-on-machine-learning/9798341607972/).

In [64]:
import torch
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, TensorDataset
import pandas as pd
import numpy as np

In [65]:
dat = pd.read_csv('day.csv')

In [66]:
X = dat.drop(columns = ['cnt', 'instant','dteday'])
y = dat['cnt']

## Create Train, Val, Test Splits

In [67]:
# create train & test splits
X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                    test_size = 0.2,
                                                    random_state = 501)

# create validation split
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train,
                                                    test_size = 0.2,
                                                    random_state = 501)

In [68]:
# center and scale X data
X_train = torch.FloatTensor(X_train.to_numpy())
X_valid = torch.FloatTensor(X_val.to_numpy())
X_test = torch.FloatTensor(X_test.to_numpy())

means = X_train.mean(dim=0, keepdims=True)
stds = X_train.std(dim=0, keepdims=True)

X_train = (X_train - means) / stds
X_valid = (X_valid - means) / stds
X_test = (X_test - means) / stds

In [69]:
# center and scale y data
y_train = torch.FloatTensor(y_train.to_numpy()).reshape(-1,1)
y_valid = torch.FloatTensor(y_val.to_numpy()).reshape(-1,1)
y_test = torch.FloatTensor(y_test.to_numpy()).reshape(-1,1)

y_mean = y_train.mean(dim=0, keepdims=True)
y_std  = y_train.std(dim=0, keepdims=True)

y_train = (y_train - y_mean) / y_std
y_valid = (y_valid - y_mean) / y_std
y_test  = (y_test  - y_mean) / y_std

## Batch Gradient Descent Function

In [70]:
def train_bgd(model, optimizer, criterion, X_train, y_train, n_epochs):
  for epoch in range(n_epochs):
    y_pred = model(X_train)
    loss = criterion(y_pred, y_train) # get loss val
    loss.backward() # calc grads
    optimizer.step() # take grad desc step
    optimizer.zero_grad() # zero out grads for next pass
    print(f'Epoch {epoch + 1}, Loss: {loss.item()}')

## Regression MLP

In [71]:
n_features = X_train.shape[1] # get cols

In [72]:
import torch.nn as nn
torch.manual_seed(501)
# (input, output) for each layer
model = nn.Sequential(
    nn.Linear(n_features, 40),
    nn.ReLU(),
    nn.Linear(40, 40),
    nn.ReLU(),
    nn.Linear(40, 1) # 1 output --> regression val
)

## Train Model

In [73]:
# set model training params
learning_rate = 0.01
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
criterion = nn.MSELoss()
n_epochs = 100

In [74]:
train_bgd(model, optimizer, criterion, X_train, y_train, n_epochs)

Epoch 1, Loss: 1.0571339130401611
Epoch 2, Loss: 1.0426732301712036
Epoch 3, Loss: 1.0291247367858887
Epoch 4, Loss: 1.0163812637329102
Epoch 5, Loss: 1.0043340921401978
Epoch 6, Loss: 0.9928900003433228
Epoch 7, Loss: 0.9820040464401245
Epoch 8, Loss: 0.9715561270713806
Epoch 9, Loss: 0.9614793658256531
Epoch 10, Loss: 0.951737642288208
Epoch 11, Loss: 0.9422871470451355
Epoch 12, Loss: 0.9330822229385376
Epoch 13, Loss: 0.9240773916244507
Epoch 14, Loss: 0.9152446985244751
Epoch 15, Loss: 0.9065364599227905
Epoch 16, Loss: 0.897942841053009
Epoch 17, Loss: 0.889440655708313
Epoch 18, Loss: 0.8810222744941711
Epoch 19, Loss: 0.872648298740387
Epoch 20, Loss: 0.8643302917480469
Epoch 21, Loss: 0.856031596660614
Epoch 22, Loss: 0.8477593660354614
Epoch 23, Loss: 0.8394768238067627
Epoch 24, Loss: 0.8311606049537659
Epoch 25, Loss: 0.8228361010551453
Epoch 26, Loss: 0.8144875168800354
Epoch 27, Loss: 0.8061072826385498
Epoch 28, Loss: 0.7976723313331604
Epoch 29, Loss: 0.7891790270805359

## Mini-Batch GD Using DataLoaders

In [75]:
# set device depending on what's available
if torch.cuda.is_available():
  device = 'cuda'
elif torch.backends.mps.is_available():
  device = 'mps'
else:
  device = 'cpu'

In [76]:
train_dataset = TensorDataset(X_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle = True)

In [77]:
torch.manual_seed(501)
model = nn.Sequential(
    nn.Linear(n_features, 40),
    nn.ReLU(),
    nn.Linear(40, 40),
    nn.ReLU(),
    nn.Linear(40, 1) # 1 output --> regression val
)

model = model.to(device)

In [78]:
learning_rate = 0.01
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
criterion = nn.MSELoss()
n_epochs = 100

In [79]:
# train function to implement mb gd
def train_mbgd(model, optimizer, criterion, train_loader, n_epochs):
  model.train() # set training mode
  for epoch in range(n_epochs):
    total_loss = 0
    for X_batch, y_batch in train_loader:
      # get batch
      X_batch, y_batch = X_batch.to(device), y_batch.to(device)
      # mod pred
      y_pred = model(X_batch)
      # calc loss and tally
      loss = criterion(y_pred, y_batch)
      total_loss += loss.item()
      # calc grads and do step
      loss.backward()
      optimizer.step()
      optimizer.zero_grad()

    mean_loss = total_loss / len(train_loader)
    if epoch % 10 == 0: # every ten epochs, print out loss
      print(f'Epoch {epoch + 1}, Loss: {mean_loss}')

In [80]:
train_mbgd(model, optimizer, criterion, train_loader, n_epochs)

Epoch 1, Loss: 0.9749118367830912
Epoch 11, Loss: 0.10588822265466054
Epoch 21, Loss: 0.043283493568499885
Epoch 31, Loss: 0.027103760838508607
Epoch 41, Loss: 0.018806432994703453
Epoch 51, Loss: 0.014053460645178954
Epoch 61, Loss: 0.010648593616982302
Epoch 71, Loss: 0.008801982241372268
Epoch 81, Loss: 0.007529840804636478
Epoch 91, Loss: 0.006357607307533423


## Evaluating the Model

In [81]:
## create evaluation function
def evaluate(model, data_loader, metric, aggregate = torch.mean):
  model.eval() # change model mode to evaluation (no gradient work)
  metrics = []

  with torch.no_grad():
    for X_batch, y_batch in data_loader:
      # move data to GPU / cuda
      X_batch, y_batch = X_batch.to(device), y_batch.to(device)
      y_pred = model(X_batch)
      metric_val = metric(y_pred, y_batch)
      metrics.append(metric_val)

  # retrun agg met over all batches
  return aggregate(torch.stack(metrics))

In [82]:
# set up validation data loader
# don't typically use shuffle in evaluation so it's deterministic
# and the ordering is stable
valid_dataset = TensorDataset(X_valid, y_valid)
valid_loader = DataLoader(valid_dataset, batch_size=32, shuffle = False)

In [83]:
# RMSE on validation data
evaluate(model, valid_loader, criterion,
         aggregate = lambda metrics: torch.sqrt(torch.mean(metrics)))

tensor(0.1094, device='cuda:0')

In [84]:
# MSE on validation data
evaluate(model, valid_loader, criterion)

tensor(0.0120, device='cuda:0')