#### Importing required Libraries

In [1]:
import torch
import jovian
import torch.nn as nn
import pandas as pd
import matplotlib.pyplot as plt
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset, random_split

<IPython.core.display.Javascript object>

#### Importing dataset

In [2]:
DATA_FILENAME = "car_data.csv"
dataframe_raw = pd.read_csv(DATA_FILENAME)
dataframe_raw.head()

Unnamed: 0,Car_Name,Year,Selling_Price,Present_Price,Kms_Driven,Fuel_Type,Seller_Type,Transmission,Owner
0,ritz,2014,3.35,5.59,27000,Petrol,Dealer,Manual,0
1,sx4,2013,4.75,9.54,43000,Diesel,Dealer,Manual,0
2,ciaz,2017,7.25,9.85,6900,Petrol,Dealer,Manual,0
3,wagon r,2011,2.85,4.15,5200,Petrol,Dealer,Manual,0
4,swift,2014,4.6,6.87,42450,Diesel,Dealer,Manual,0


#### Random subsampling to improve generalization
#### Input scaling/noise to promote robustness
#### Feature removal to improve understanding

In [3]:
your_name = "Surya"
def customize_dataset(dataframe_raw, rand_str):
    dataframe = dataframe_raw.copy(deep=True)
    # drop some rows
    dataframe = dataframe.sample(int(0.95*len(dataframe)), random_state=int(ord(rand_str[0])))
    # scale input
    dataframe.Year = dataframe.Year * ord(rand_str[1])/100.
    # scale target
    dataframe.Selling_Price = dataframe.Selling_Price * ord(rand_str[2])/100.
    # drop column
    if ord(rand_str[3]) % 2 == 1:
        dataframe = dataframe.drop(['Car_Name'], axis=1)
    return dataframe

dataframe = customize_dataset(dataframe_raw, your_name)
dataframe.head()

Unnamed: 0,Year,Selling_Price,Present_Price,Kms_Driven,Fuel_Type,Seller_Type,Transmission,Owner
119,2355.21,1.197,1.9,5400,Petrol,Individual,Manual,0
61,2357.55,5.13,7.7,40588,Petrol,Dealer,Manual,0
211,2357.55,13.395,14.79,43535,Diesel,Dealer,Manual,0
42,2349.36,2.223,7.15,58000,Petrol,Dealer,Manual,0
262,2357.55,4.56,5.8,40023,Petrol,Dealer,Manual,0


#### Data preparation

In [4]:
input_cols = ["Year","Present_Price","Kms_Driven","Owner"]
categorical_cols = ["Fuel_Type","Seller_Type","Transmission"]
output_cols = ["Selling_Price"]

#### Label encoding for categorical columns
#### Transforming numerical columns to numpy arrays 

In [5]:
def dataframe_to_arrays(dataframe):
    # Make a copy of the original dataframe
    dataframe1 = dataframe.copy(deep=True)
    # Convert non-numeric categorical columns to numbers
    for col in categorical_cols:
        dataframe1[col] = dataframe1[col].astype('category').cat.codes
    # Extract input & outupts as numpy arrays
    inputs_array = dataframe1[input_cols].to_numpy()
    targets_array = dataframe1[output_cols].to_numpy()
    return inputs_array, targets_array

inputs_array, targets_array = dataframe_to_arrays(dataframe)
inputs_array, targets_array

(array([[2.35521e+03, 1.90000e+00, 5.40000e+03, 0.00000e+00],
        [2.35755e+03, 7.70000e+00, 4.05880e+04, 0.00000e+00],
        [2.35755e+03, 1.47900e+01, 4.35350e+04, 0.00000e+00],
        ...,
        [2.35287e+03, 9.50000e-01, 2.40000e+04, 0.00000e+00],
        [2.35755e+03, 8.40000e-01, 2.90000e+04, 0.00000e+00],
        [2.34468e+03, 1.23500e+01, 1.35154e+05, 0.00000e+00]]),
 array([[ 1.197 ],
        [ 5.13  ],
        [13.395 ],
        [ 2.223 ],
        [ 4.56  ],
        [ 0.684 ],
        [ 1.197 ],
        [ 1.197 ],
        [ 7.695 ],
        [ 9.405 ],
        [ 0.285 ],
        [ 2.85  ],
        [ 0.57  ],
        [ 6.441 ],
        [ 7.524 ],
        [ 0.456 ],
        [ 2.907 ],
        [ 1.254 ],
        [ 3.534 ],
        [ 4.959 ],
        [ 1.425 ],
        [ 3.819 ],
        [ 5.016 ],
        [ 3.078 ],
        [ 3.933 ],
        [ 1.539 ],
        [ 4.731 ],
        [ 0.5472],
        [ 0.342 ],
        [12.825 ],
        [ 0.456 ],
        [ 5.301 ],
     

In [6]:
inputs = torch.Tensor(inputs_array)
targets = torch.Tensor(targets_array)

dataset = TensorDataset(inputs, targets)
train_ds, val_ds = random_split(dataset, [228, 57])
batch_size = 128

train_loader = DataLoader(train_ds, batch_size, shuffle=True)
val_loader = DataLoader(val_ds, batch_size)

In [7]:
input_size = len(input_cols)
output_size = len(output_cols)

class CarsModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_size, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, output_size)
        )                  # fill this (hint: use input_size & output_size defined above)
        
    def forward(self, xb):
        return self.net(xb)
    
    def training_step(self, batch):
        inputs, targets = batch 
        # Generate predictions
        out = self(inputs)          
        # Calcuate loss
        loss = F.l1_loss(out, targets)                         # fill this
        return loss
    
    def validation_step(self, batch):
        inputs, targets = batch
        # Generate predictions
        out = self(inputs)
        # Calculate loss
        loss = F.l1_loss(out, targets)                           # fill this    
        return {'val_loss': loss.detach()}
        
    def validation_epoch_end(self, outputs):
        batch_losses = [x['val_loss'] for x in outputs]
        epoch_loss = torch.stack(batch_losses).mean()   # Combine losses
        return {'val_loss': epoch_loss.item()}
    
    def epoch_end(self, epoch, result, num_epochs):
        # Print result every 20th epoch
        if (epoch+1) % 20 == 0 or epoch == num_epochs-1:
            print("Epoch [{}], val_loss: {:.4f}".format(epoch+1, result['val_loss']))
            
model = CarsModel()

list(model.parameters())

[Parameter containing:
 tensor([[-0.3744, -0.2792,  0.1062,  0.0022],
         [ 0.3048,  0.3769, -0.0989, -0.1960],
         [-0.1728,  0.0645,  0.1521, -0.0071],
         [ 0.4827,  0.1037, -0.4889,  0.3749],
         [-0.0119, -0.4841,  0.3592,  0.2679],
         [ 0.3541, -0.4454,  0.4918,  0.1747],
         [-0.2639,  0.3941,  0.4460,  0.4889],
         [ 0.2306,  0.1024, -0.1764,  0.1976],
         [-0.1798,  0.3395,  0.2860,  0.3863],
         [-0.4514,  0.2046,  0.3815, -0.1850],
         [ 0.2268, -0.2876,  0.1134, -0.1993],
         [-0.3644,  0.2970, -0.4665,  0.4300],
         [-0.2935, -0.4207, -0.4579,  0.2940],
         [ 0.1461,  0.3382, -0.1487, -0.2336],
         [-0.0057, -0.0264, -0.0465, -0.1755],
         [ 0.1736,  0.3929,  0.4592, -0.0700],
         [ 0.4169, -0.4179, -0.3982,  0.2980],
         [-0.3277,  0.0810, -0.3056,  0.3523],
         [-0.4339, -0.4556, -0.0894, -0.3779],
         [-0.4708,  0.3871, -0.1322,  0.1694],
         [-0.4681, -0.4861, -0.4223, 

In [8]:
# Eval algorithm
def evaluate(model, val_loader):
    outputs = [model.validation_step(batch) for batch in val_loader]
    return model.validation_epoch_end(outputs)

# Fitting algorithm
def fit(epochs, lr, model, train_loader, val_loader, opt_func=torch.optim.SGD):
    history = []
    optimizer = opt_func(model.parameters(), lr)
    for epoch in range(epochs):
        # Training Phase 
        for batch in train_loader:
            loss = model.training_step(batch)
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
        # Validation phase
        result = evaluate(model, val_loader)
        model.epoch_end(epoch, result, epochs)
        history.append(result)
    return history

# Check the initial value that val_loss have
result = evaluate(model, val_loader)
print(result)

{'val_loss': 1283.2998046875}


In [9]:
# Start with the Fitting
epochs = 50
lr = 1e-8
history1 = fit(epochs, lr, model, train_loader, val_loader)

Epoch [20], val_loss: 804.0395
Epoch [40], val_loss: 328.6771
Epoch [50], val_loss: 166.0798


In [10]:
# Train repeatdly until have a 'good' val_loss
epochs = 50
lr = 1e-3
history2 = fit(epochs, lr, model, train_loader, val_loader)

Epoch [20], val_loss: 4.0061
Epoch [40], val_loss: 3.9319
Epoch [50], val_loss: 3.9224


In [11]:
epochs = 125
lr = 1e-4
history3 = fit(epochs, lr, model, train_loader, val_loader)

Epoch [20], val_loss: 3.9217
Epoch [40], val_loss: 3.9214
Epoch [60], val_loss: 3.9212
Epoch [80], val_loss: 3.9215
Epoch [100], val_loss: 3.9214
Epoch [120], val_loss: 3.9215
Epoch [125], val_loss: 3.9215


In [12]:
from torch.optim import Adam
epochs = 50
lr = 1e-4
history4 = fit(epochs, lr, model, train_loader, val_loader, opt_func=Adam)

Epoch [20], val_loss: 3.9218
Epoch [40], val_loss: 3.9216
Epoch [50], val_loss: 3.9215


In [13]:
# Prediction Algorithm
def predict_single(input, target, model):
    inputs = input.unsqueeze(0)
    predictions = model(inputs)                # fill this
    prediction = predictions[0].detach()
    print("Input:", input)
    print("Target:", target)
    print("Prediction:", prediction)

# Testing the model with some samples
input, target = val_ds[0]
predict_single(input, target, model)

Input: tensor([ 2359.8899,    23.1500, 11000.0000,     0.0000])
Target: tensor([22.5150])
Prediction: tensor([3.8223])


In [14]:
input, target = val_ds[10]
predict_single(input, target, model)

Input: tensor([2.3540e+03, 4.4300e+00, 2.3709e+04, 0.0000e+00])
Target: tensor([2.2800])
Prediction: tensor([3.8223])


In [15]:
torch.save(model.state_dict(), 'car_model.pth')