## Download dataset

In [35]:
!gdown 1qiUDDoYyRLBiKOoYWdFl_5WByHE8Cugu

Downloading...
From: https://drive.google.com/uc?id=1qiUDDoYyRLBiKOoYWdFl_5WByHE8Cugu
To: /content/Auto_MPG_data.csv
  0% 0.00/15.4k [00:00<?, ?B/s]100% 15.4k/15.4k [00:00<00:00, 54.5MB/s]


## Import libraries

In [36]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

## Set fixed random seed

In [37]:
random_state = 59
np.random.seed(random_state)
torch.manual_seed(random_state)
if torch.cuda.is_available():
  torch.cuda.manual_seed(random_state)

## Set up computing device

In [38]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

## Read the dataset

In [39]:
dataset_path = '/content/Auto_MPG_data.csv'
dataset = pd.read_csv(dataset_path)

## Preprocessing dataset

### Split X feature and y label

In [40]:
X = dataset.drop(columns='MPG').values
y = dataset['MPG'].values

### Split train/val/test

In [41]:
val_size = 0.2
test_size = 0.125
is_shuffle = True

X_train, X_val, y_train, y_val = train_test_split(
    X, y,
    test_size=val_size,
    random_state=random_state,
    shuffle=is_shuffle
)

X_train, X_test, y_train, y_test = train_test_split(
    X_train, y_train,
    test_size=test_size,
    random_state=random_state,
    shuffle=is_shuffle
)

### Normalizing input features

In [42]:
normalizer = StandardScaler()
X_train = normalizer.fit_transform(X_train)
X_val = normalizer.transform(X_val)
X_test = normalizer.transform(X_test)

X_train = torch.tensor(X_train, dtype=torch.float32)
X_val = torch.tensor(X_val, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32)
y_val = torch.tensor(y_val, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32)

## Building the DataLoader

In [43]:
class CustomDataset(Dataset):
  def __init__(self, X, y):
    self.X = X
    self.y = y

  def __len__(self):
    return len(self.y)

  def __getitem__(self, idx):
    return self.X[idx], self.y[idx]

In [44]:
batch_size = 32
train_dataset = CustomDataset(X_train, y_train)
val_dataset = CustomDataset(X_val, y_val)
train_loader = DataLoader(train_dataset,
                          batch_size=batch_size,
                          shuffle=True)
val_loader = DataLoader(val_dataset,
                        batch_size=batch_size,
                        shuffle=False)

## Building MLP network

In [45]:
class MLP(nn.Module):
  def __init__(self, input_dims, hidden_dims, output_dims):
    super().__init__()
    self.linear1 = nn.Linear(input_dims, hidden_dims)
    self.linear2 = nn.Linear(hidden_dims, hidden_dims)
    self.output = nn.Linear(hidden_dims, output_dims)

  def forward(self, x):
    x = self.linear1(x)
    x = F.relu(x)
    x = self.linear2(x)
    x = F.relu(x)
    out = self.output(x)

    return out.squeeze(1)

In [46]:
input_dims = X_train.shape[1]
output_dims = 1
hidden_dims = 64

model = MLP(input_dims=input_dims,
            hidden_dims=hidden_dims,
            output_dims=output_dims).to(device)

## Declare loss function and optimizer

In [47]:
lr = 1e-2
criterion = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=lr)

## Bulding R2 score function

In [48]:
def r_squared(y_true, y_pred):
  y_true = torch.Tensor(y_true).to(device)
  y_pred = torch.Tensor(y_pred).to(device)
  mean_true = torch.mean(y_true)
  ss_total = torch.sum((y_true - mean_true) ** 2)
  ss_res = torch.sum((y_true - y_pred) ** 2)
  r2 = 1 - (ss_res/ss_total)

  return r2

## Training model

In [49]:
epochs = 100
train_losses = []
val_losses = []
train_r2 = []
val_r2 = []

for epoch in range(epochs):
  train_loss = 0.0
  train_target = []
  val_target = []
  train_predict = []
  val_predict = []
  model.train()

  for X_samples, y_samples in train_loader:
    X_samples = X_samples.to(device)
    y_samples = y_samples.to(device)
    optimizer.zero_grad()
    outputs = model(X_samples)
    train_predict += outputs.tolist()
    train_target += y_samples.tolist()
    loss = criterion(outputs, y_samples)
    train_loss += loss.item()
    loss.backward()
    optimizer.step()
    train_loss += loss.item()

  train_loss /= len(train_loader)
  train_losses.append(train_loss)
  train_r2.append(r_squared(train_target, train_predict))
  model.eval()
  val_loss = 0.0

  with torch.no_grad():
    for X_samples, y_samples in val_loader:
      X_samples = X_samples.to(device)
      y_samples = y_samples.to(device)
      outputs = model(X_samples)
      val_predict += outputs.tolist()
      val_target += y_samples.tolist()
      loss = criterion(outputs, y_samples)
      val_loss += loss.item()

    val_loss /= len(val_loader)
epochs = 100
train_losses = []
val_losses = []
train_r2 = []
val_r2 = []

for epoch in range(epochs):
  train_loss = 0.0
  train_target = []
  val_target = []
  train_predict = []
  val_predict = []
  model.train()

  for X_samples, y_samples in train_loader:
    X_samples = X_samples.to(device)
    y_samples = y_samples.to(device)
    optimizer.zero_grad()
    outputs = model(X_samples)
    train_predict += outputs.tolist()
    train_target += y_samples.tolist()
    loss = criterion(outputs, y_samples)
    train_loss += loss.item()
    loss.backward()
    optimizer.step()


  train_loss /= len(train_loader)
  train_losses.append(train_loss)
  train_r2.append(r_squared(train_target, train_predict))
  model.eval()
  val_loss = 0.0

  with torch.no_grad():
    for X_samples, y_samples in val_loader:
      X_samples = X_samples.to(device)
      y_samples = y_samples.to(device)
      outputs = model(X_samples)
      val_predict += outputs.tolist()
      val_target += y_samples.tolist()
      loss = criterion(outputs, y_samples)
      val_loss += loss.item()

    val_loss /= len(val_loader)
    val_losses.append(val_loss)
    val_r2.append(r_squared(val_target, val_predict))
    print(f'\nEPOCH {epoch+1}: \t Training loss: {train_loss:.3f}\t Validation loss: {val_loss:.3f}')



EPOCH 1: 	 Training loss: 5.541	 Validation loss: 31.191

EPOCH 2: 	 Training loss: 11.539	 Validation loss: 4.913

EPOCH 3: 	 Training loss: 5.414	 Validation loss: 4.508

EPOCH 4: 	 Training loss: 5.265	 Validation loss: 13.841

EPOCH 5: 	 Training loss: 7.851	 Validation loss: 5.260

EPOCH 6: 	 Training loss: 5.706	 Validation loss: 4.380

EPOCH 7: 	 Training loss: 4.466	 Validation loss: 5.553

EPOCH 8: 	 Training loss: 6.830	 Validation loss: 6.446

EPOCH 9: 	 Training loss: 6.447	 Validation loss: 6.575

EPOCH 10: 	 Training loss: 6.139	 Validation loss: 16.582

EPOCH 11: 	 Training loss: 5.617	 Validation loss: 7.986

EPOCH 12: 	 Training loss: 4.594	 Validation loss: 5.643

EPOCH 13: 	 Training loss: 5.586	 Validation loss: 6.282

EPOCH 14: 	 Training loss: 7.652	 Validation loss: 8.013

EPOCH 15: 	 Training loss: 9.135	 Validation loss: 8.246

EPOCH 16: 	 Training loss: 5.909	 Validation loss: 8.748

EPOCH 17: 	 Training loss: 8.293	 Validation loss: 7.667

EPOCH 18: 	 Traini

## Evaluate model

In [51]:
model.eval()
with torch.no_grad():
  X_test = X_test.to(device)
  y_hat = model(X_test)
  test_set_r2 = r_squared(y_hat, y_test)
  print('Evaluation on test set:')
  print(f'R2: {test_set_r2}')

Evaluation on test set:
R2: 0.8361618518829346
