In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [3]:
import pandas as pd

In [4]:
import pandas as pd
url = 'http://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data'
column_names = ['MPG', 'Cylinders', 'Displacement', 'Horsepower',
'Weight', 'Acceleration', 'Model Year', 'Origin']
df = pd.read_csv(url, names=column_names,
na_values = "?", comment='\t',
sep=" ", skipinitialspace=True)

In [5]:
df = df.dropna()
df = df.reset_index(drop=True)

In [6]:
import sklearn
from sklearn.model_selection import train_test_split

df_train, df_test = train_test_split(df, train_size=0.8, random_state=1)
train_stats = df_train.describe().transpose()

numeric_column_names = ['Cylinders', 'Displacement',
 'Horsepower', 'Weight', 'Acceleration']

In [7]:
df_train_norm, df_test_norm = df_train.copy(), df_test.copy()
for col_name in numeric_column_names:
    mean = train_stats.loc[col_name, 'mean']
    std = train_stats.loc[col_name, 'std']
    df_train_norm.loc[:, col_name] = \
        (df_train_norm.loc[:, col_name] - mean)/std
    df_test_norm.loc[:, col_name] = \
        (df_test_norm.loc[:, col_name] - mean)/std
df_train_norm.tail()

  0.3511267  -0.8243028  -0.8243028  -0.8243028   0.3511267  -0.8243028
  0.3511267   1.52655621  1.52655621  1.52655621  0.3511267   1.52655621
 -0.8243028   0.3511267   1.52655621 -0.8243028  -0.8243028   0.3511267
 -0.8243028  -0.8243028  -0.8243028   0.3511267  -0.8243028   1.52655621
  0.3511267  -0.8243028   0.3511267  -0.8243028  -0.8243028   1.52655621
 -0.8243028   1.52655621  1.52655621 -0.8243028  -0.8243028  -0.8243028
 -0.8243028   0.3511267  -0.8243028   1.52655621 -0.8243028  -0.8243028
  1.52655621 -0.8243028  -0.8243028  -0.8243028   1.52655621  1.52655621
  0.3511267   0.3511267   1.52655621 -0.8243028  -0.8243028   1.52655621
  1.52655621 -0.8243028  -0.8243028   0.3511267   1.52655621 -0.8243028
  0.3511267  -0.8243028   1.52655621  1.52655621 -0.8243028  -0.8243028
 -1.41201755  1.52655621  0.3511267   1.52655621 -0.8243028  -0.8243028
 -0.8243028   1.52655621  1.52655621  0.3511267   0.3511267   1.52655621
 -0.8243028   1.52655621 -0.23658805 -0.8243028  -0.824302

Unnamed: 0,MPG,Cylinders,Displacement,Horsepower,Weight,Acceleration,Model Year,Origin
203,28.0,-0.824303,-0.90102,-0.736562,-0.950031,0.255202,76,3
255,19.4,0.351127,0.4138,-0.340982,0.29319,0.548737,78,1
72,13.0,1.526556,1.144256,0.713897,1.339617,-0.625403,72,1
235,30.5,-0.824303,-0.89128,-1.053025,-1.072585,0.475353,77,1
37,14.0,1.526556,1.563051,1.636916,1.47042,-1.35924,71,1


In [8]:
boundaries = torch.tensor([73, 76, 79])
v = torch.tensor(df_train_norm['Model Year'].values)
df_train_norm['Model Year Bucketed'] = torch.bucketize(
    v, boundaries, right=True
)
v = torch.tensor(df_test_norm['Model Year'].values)
df_test_norm['Model Year Bucketed'] = torch.bucketize(
v, boundaries, right=True
)
numeric_column_names.append('Model Year Bucketed')

In [9]:
from torch.nn.functional import one_hot
total_origin = len(set(df_train_norm['Origin']))
origin_encoded = one_hot(torch.from_numpy(df_train_norm['Origin'].values) % total_origin)
x_train_numeric = torch.tensor(df_train_norm[numeric_column_names].values)
x_train = torch.cat([x_train_numeric, origin_encoded], 1).float()
origin_encoded = one_hot(torch.from_numpy(df_test_norm['Origin'].values) % total_origin)
x_test_numeric = torch.tensor(df_test_norm[numeric_column_names].values)
x_test = torch.cat([x_test_numeric, origin_encoded], 1).float()


In [10]:
y_train = torch.tensor(df_train_norm['MPG'].values).float()
y_test = torch.tensor(df_test_norm['MPG'].values).float()

In [11]:
from torch.utils.data import DataLoader, TensorDataset
train_ds = TensorDataset(x_train, y_train)
batch_size = 8
torch.manual_seed(1)
train_dl = DataLoader(train_ds, batch_size=batch_size, shuffle=True)

In [12]:
class MyModel(nn.Module):

    def __init__(self):
        super().__init__()
        self.l1 = nn.Linear(in_features=9, out_features=8, bias=True)
        self.r1 = nn.ReLU()
        self.l2 = nn.Linear(in_features=8, out_features=4, bias=True)
        self.r2 = nn.ReLU()
        self.l3 = nn.Linear(in_features=4, out_features=1, bias=True)

        l = [self.l1, self.r1, self.l2, self.r2, self.l3]
        self.module_list = nn.ModuleList(l)

    def forward(self, x):
        for f in self.module_list:
            x = f(x)
        
        return x

In [20]:
model = MyModel()

In [21]:
loss_fn = nn.MSELoss()
optimizer = torch.optim.SGD(params=model.parameters(), lr=1e-2)
torch.manual_seed(1)
num_epochs = 200
log_epochs = 20

In [22]:
for epoch in range(num_epochs):
    loss_hist_train = 0
    for x_batch, y_batch in train_dl:
        pred = model(x_batch)[:, 0]
        loss = loss_fn(pred, y_batch)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        loss_hist_train += loss.item()

    if epoch % log_epochs == 0:
        print(f'Epoch {epoch} Loss 'f'{loss_hist_train/len(train_dl):.4f}')

Epoch 0 Loss 324.3488
Epoch 20 Loss 12.6089
Epoch 40 Loss 11.6462
Epoch 60 Loss 18.6859
Epoch 80 Loss 12.2866
Epoch 100 Loss 12.3306
Epoch 120 Loss 11.8049
Epoch 140 Loss 11.7714
Epoch 160 Loss 12.6640
Epoch 180 Loss 12.7677


In [24]:
with torch.no_grad():
    pred = model(x_test.float())[:, 0]
    loss = loss_fn(pred, y_test)
    print(f'Test MSE: {loss.item():.4f}')
    print(f'Test MAE: {nn.L1Loss()(pred, y_test).item():.4f}')

Test MSE: 9.7841
Test MAE: 2.4953
