In [509]:
import warnings

warnings.filterwarnings('ignore')

In [510]:
from torch import nn
from torch import functional as F 
import torchmetrics 
import lightning as L 
import pandas as pd
from torch.utils import data
import torch

In [511]:
torch.set_num_threads(8)

In [512]:
df = pd.read_csv('CARS.csv')

In [513]:
df.head()

Unnamed: 0,Make,Model,Type,Origin,DriveTrain,MSRP,Invoice,EngineSize,Cylinders,Horsepower,MPG_City,MPG_Highway,Weight,Wheelbase,Length
0,Acura,MDX,SUV,Asia,All,"$36,945","$33,337",3.5,6.0,265,17,23,4451,106,189
1,Acura,RSX Type S 2dr,Sedan,Asia,Front,"$23,820","$21,761",2.0,4.0,200,24,31,2778,101,172
2,Acura,TSX 4dr,Sedan,Asia,Front,"$26,990","$24,647",2.4,4.0,200,22,29,3230,105,183
3,Acura,TL 4dr,Sedan,Asia,Front,"$33,195","$30,299",3.2,6.0,270,20,28,3575,108,186
4,Acura,3.5 RL 4dr,Sedan,Asia,Front,"$43,755","$39,014",3.5,6.0,225,18,24,3880,115,197


In [514]:
category_columns = ['Make', 'Type', 'Origin', 'DriveTrain']

In [515]:
df[category_columns] = df[category_columns].astype('category')

In [516]:
df['Invoice'] = df['Invoice'].str.replace(',' , '.').str.replace('$', '').astype(float)
df['MSRP'] = df['MSRP'].str.replace(',' , '.').str.replace('$', '').astype(float)

In [517]:
df = df[['MPG_Highway', 'Length', 'Weight', 'Wheelbase', 'Horsepower', 'Invoice', 'EngineSize', 'Cylinders', 'Origin', 'Type']]

In [518]:
df = pd.get_dummies(df, columns=['Origin', 'Type'])

In [519]:
df.isna().sum()

MPG_Highway      0
Length           0
Weight           0
Wheelbase        0
Horsepower       0
Invoice          0
EngineSize       0
Cylinders        2
Origin_Asia      0
Origin_Europe    0
Origin_USA       0
Type_Hybrid      0
Type_SUV         0
Type_Sedan       0
Type_Sports      0
Type_Truck       0
Type_Wagon       0
dtype: int64

In [520]:
df.dropna(axis='rows', inplace=True)

In [521]:
target = 'MPG_Highway'
predictors = [column for column in df.columns if column != target]

In [522]:
class CarsDataset(data.Dataset):
    def __init__(self):
        self.data = df
        
    def __len__(self):
        return len(self.data)

    
    def __getitem__(self, idx):
            features = self.data.iloc[idx, :][predictors]
            label = self.data.iloc[idx, :][target]

            features = torch.tensor(features, dtype=torch.float32)
            label = torch.tensor(label, dtype=torch.long).reshape((1))

            sample = features, label

            return sample

In [523]:
dataset = CarsDataset()

In [524]:
class MLPRegressor(L.LightningModule):
    def __init__(self, in_features):
        super().__init__()
        self.reg = nn.Sequential(
            nn.Linear(in_features, 16),
            nn.ReLU(),
            nn.Linear(16, 32),
            nn.ReLU(),
            nn.Linear(32, 24),
            nn.ReLU(),
            nn.Linear(24, 16),
            nn.ReLU(),
            nn.Linear(16, 8),
            nn.ReLU(),
            nn.Linear(8, 4),
            nn.ReLU(),
            nn.Linear(4, 1)
        )
        
    def forward(self, data):
        return self.reg(data)
    
    def training_step(self, batch, batch_idx):
        x, label = batch
        z = self(x)
        loss = torchmetrics.MeanSquaredError()(label, z)
        self.log("train mse:", loss, prog_bar=True)
        return loss
        
    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=1e-3)
        return optimizer

In [525]:
dataset = CarsDataset()

In [526]:
train_length = int(len(dataset) * 0.8)
val_length = len(dataset) - train_length

In [527]:
train, val = data.random_split(dataset, [train_length, val_length])

In [528]:
trainer = L.Trainer(max_epochs=250)

ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores


In [529]:
model = MLPRegressor(len(predictors))

In [530]:
trainer.fit(model, data.DataLoader(train), data.DataLoader(val))


  | Name | Type       | Params | Mode 
--------------------------------------------
0 | reg  | Sequential | 2.2 K  | train
--------------------------------------------
2.2 K     Trainable params
0         Non-trainable params
2.2 K     Total params
0.009     Total estimated model params size (MB)
14        Modules in train mode
0         Modules in eval mode


Epoch 17:  80%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ  | 272/340 [00:00<00:00, 312.06it/s, v_num=23, train mse:=5.140]   


Detected KeyboardInterrupt, attempting graceful shutdown ...


SystemExit: 1

In [None]:
metric = torchmetrics.MeanAbsolutePercentageError()

In [None]:
with torch.no_grad():
    for inputs, targets in data.DataLoader(val):
        targets_pred = model(inputs)
        metric.update(targets, targets_pred)

In [None]:
metric.compute()

tensor(0.1130)