In [735]:
import warnings

warnings.filterwarnings('ignore')

In [736]:
from torch import nn
from torch import functional as F 
import torchmetrics 
import lightning as L 
import pandas as pd
from torch.utils import data
import torch

In [737]:
torch.set_num_threads(8)

In [738]:
df = pd.read_csv('CARS.csv')

In [739]:
df.head()

Unnamed: 0,Make,Model,Type,Origin,DriveTrain,MSRP,Invoice,EngineSize,Cylinders,Horsepower,MPG_City,MPG_Highway,Weight,Wheelbase,Length
0,Acura,MDX,SUV,Asia,All,"$36,945","$33,337",3.5,6.0,265,17,23,4451,106,189
1,Acura,RSX Type S 2dr,Sedan,Asia,Front,"$23,820","$21,761",2.0,4.0,200,24,31,2778,101,172
2,Acura,TSX 4dr,Sedan,Asia,Front,"$26,990","$24,647",2.4,4.0,200,22,29,3230,105,183
3,Acura,TL 4dr,Sedan,Asia,Front,"$33,195","$30,299",3.2,6.0,270,20,28,3575,108,186
4,Acura,3.5 RL 4dr,Sedan,Asia,Front,"$43,755","$39,014",3.5,6.0,225,18,24,3880,115,197


In [740]:
category_columns = ['Make', 'Type', 'Origin', 'DriveTrain']

In [741]:
df[category_columns] = df[category_columns].astype('category')

In [742]:
df['Invoice'] = df['Invoice'].str.replace(',' , '.').str.replace('$', '').astype(float)
df['MSRP'] = df['MSRP'].str.replace(',' , '.').str.replace('$', '').astype(float)

In [743]:
df = df[['MPG_Highway', 'Length', 'Weight', 'Wheelbase', 'Horsepower', 'Invoice', 'EngineSize', 'Cylinders', 'Origin', 'Type']]

In [744]:
df = pd.get_dummies(df, columns=['Origin', 'Type'])

In [745]:
df.isna().sum()

MPG_Highway      0
Length           0
Weight           0
Wheelbase        0
Horsepower       0
Invoice          0
EngineSize       0
Cylinders        2
Origin_Asia      0
Origin_Europe    0
Origin_USA       0
Type_Hybrid      0
Type_SUV         0
Type_Sedan       0
Type_Sports      0
Type_Truck       0
Type_Wagon       0
dtype: int64

In [746]:
df.dropna(axis='rows', inplace=True)

In [747]:
df.head()

Unnamed: 0,MPG_Highway,Length,Weight,Wheelbase,Horsepower,Invoice,EngineSize,Cylinders,Origin_Asia,Origin_Europe,Origin_USA,Type_Hybrid,Type_SUV,Type_Sedan,Type_Sports,Type_Truck,Type_Wagon
0,23,189,4451,106,265,33.337,3.5,6.0,True,False,False,False,True,False,False,False,False
1,31,172,2778,101,200,21.761,2.0,4.0,True,False,False,False,False,True,False,False,False
2,29,183,3230,105,200,24.647,2.4,4.0,True,False,False,False,False,True,False,False,False
3,28,186,3575,108,270,30.299,3.2,6.0,True,False,False,False,False,True,False,False,False
4,24,197,3880,115,225,39.014,3.5,6.0,True,False,False,False,False,True,False,False,False


In [748]:
target = 'MPG_Highway'
predictors = [column for column in df.columns if column != target]

In [749]:
class CarsDataset(data.Dataset):
    def __init__(self):
        self.data = df
        
    def __len__(self):
        return len(self.data)

    
    def __getitem__(self, idx):
            features = self.data.iloc[idx, :][predictors]
            label = self.data.iloc[idx, :][target]

            features = torch.tensor(features, dtype=torch.float32)
            label = torch.tensor(label, dtype=torch.long).reshape((1))

            sample = features, label

            return sample

In [750]:
dataset = CarsDataset()

In [751]:
train_length = int(len(dataset) * 0.8)
val_length = len(dataset) - train_length

In [752]:
train, val = data.random_split(dataset, [train_length, val_length])

In [753]:
class MLPRegressor(L.LightningModule):
    def __init__(self, in_features, layer_sizes, activation='relu'):
        super().__init__()
        layers = []
        in_size = in_features
        match activation:
            case 'relu':
                act = nn.ReLU()
            case 'tanh':
                act = nn.Tanh()
            case 'sigmoid':
                act = nn.Sigmoid()
        for out_size in layer_sizes:
            layers.append(nn.Linear(in_size, out_size))
            layers.append(act)
            in_size = out_size
            
        layers.append(nn.Linear(in_size, 1))
        self.mlp = nn.Sequential(*layers)
        
    def forward(self, data):
        return self.mlp(data)
    
    def training_step(self, batch, batch_idx):
        x, label = batch
        z = self(x)
        loss = torchmetrics.MeanSquaredError()(label, z)
        self.log("train mse:", loss, prog_bar=True)
        return loss
    
    def validation_step(self, batch, batch_idx):
        x, label = batch
        z = self(x)
        loss = torchmetrics.MeanAbsolutePercentageError()(label, z)
        self.log("val mape:", loss, prog_bar=True)
        return loss
        
    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=1e-3)
        return optimizer

In [762]:
model = MLPRegressor(len(predictors), [32, 64, 32, 24, 26, 8, 4])

In [763]:
model.parameters

<bound method Module.parameters of MLPRegressor(
  (mlp): Sequential(
    (0): Linear(in_features=16, out_features=32, bias=True)
    (1): ReLU()
    (2): Linear(in_features=32, out_features=64, bias=True)
    (3): ReLU()
    (4): Linear(in_features=64, out_features=32, bias=True)
    (5): ReLU()
    (6): Linear(in_features=32, out_features=24, bias=True)
    (7): ReLU()
    (8): Linear(in_features=24, out_features=26, bias=True)
    (9): ReLU()
    (10): Linear(in_features=26, out_features=8, bias=True)
    (11): ReLU()
    (12): Linear(in_features=8, out_features=4, bias=True)
    (13): ReLU()
    (14): Linear(in_features=4, out_features=1, bias=True)
  )
)>

In [769]:
trainer = L.Trainer(max_epochs=2000)

ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores


In [770]:
trainer.fit(model, data.DataLoader(train, batch_size=64), data.DataLoader(val, batch_size=64))


  | Name | Type       | Params | Mode 
--------------------------------------------
0 | mlp  | Sequential | 6.4 K  | train
--------------------------------------------
6.4 K     Trainable params
0         Non-trainable params
6.4 K     Total params
0.026     Total estimated model params size (MB)
10        Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Epoch 1999: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 6/6 [00:00<00:00, 25.02it/s, v_num=32, train mse:=1.460, val mape:=0.069] 

`Trainer.fit` stopped: `max_epochs=2000` reached.


Epoch 1999: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 6/6 [00:00<00:00, 24.63it/s, v_num=32, train mse:=1.460, val mape:=0.069]


In [771]:
metric = torchmetrics.MeanAbsolutePercentageError()

In [772]:
with torch.no_grad():
    for inputs, targets in data.DataLoader(val):
        targets_pred = model(inputs)
        metric.update(targets, targets_pred)

In [773]:
metric.compute()

tensor(0.0690)

In [779]:
%load_ext tensorboard
%tensorboard --logdir lightning_logs

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


Reusing TensorBoard on port 6006 (pid 184287), started 2:49:57 ago. (Use '!kill 184287' to kill it.)