<div align="center" style="font-family: 'Consolas', monospace;"><h1> Model Training For Car Price Predictor </h1> </div>

<p align = "center" style="font-family: 'Consolas', monospace;"> The purpose of this notebook is to train a suitiable regression model, i will regularize the data, apply scaling and build a pipeline and finally find the best model</p>

<br><ul> <li style="font-family: 'Consolas', monospace;">Importing Necessary Libraries</li></ul>

In [17]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import OneHotEncoder, QuantileTransformer
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.model_selection import GridSearchCV

<ul> <li style="font-family: 'Consolas', monospace;">Loading and Preparing Data</li></ul>

In [18]:
df = pd.read_csv('data/Cleaned_Car_Price_Data.csv')
df.head()

X = df.drop('price', axis=1)
y = df['price']

<ul> <li style="font-family: 'Consolas', monospace;">Build Pipeline</li></ul>

In [19]:
#Defining Variables
categories = ['fueltype', 'aspiration', 'doornumber', 'carbody', 'drivewheel', 'enginetype', 'fuelsystem', 'cylindernumber','doornumber']
numericals = [col for col in X.columns if col not in categories]

#preprocessing pipeline
preprocessor = ColumnTransformer(
    transformers=[
        ('num', QuantileTransformer(n_quantiles=200), numericals),
        ('cat', OneHotEncoder(handle_unknown='ignore', sparse_output=False), categories)
    ])

#building ML pipeline
pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', LinearRegression())
    ])

<ul> <li style="font-family: 'Consolas', monospace;">Training Using GridSearch</li></ul>

In [20]:
model = GridSearchCV (pipeline,
                      param_grid=[
                          {
                              'regressor' : [Ridge()],
                           'regressor__alpha': np.logspace(-3, 3, 13)
                           },
                          {
                              'regressor' : [Lasso()],
                           'regressor__alpha': np.logspace(-3, 3, 13)
                           }
                      ],
                      cv=7,
                      n_jobs=-1)

model.fit(X, y)
print("Best parameters:", model.best_params_)

Best parameters: {'regressor': Lasso(), 'regressor__alpha': 1000.0}
