# <div align="center"> Machine Learning Models

#### Imports

In [2]:
import pandas as pd
import numpy as np
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_score
from sklearn import metrics
from sklearn.preprocessing import StandardScaler
pd.set_option('display.max_rows',2000)

In [3]:
df = pd.read_csv('../Clean_Data/land_rover_esp.csv')

In [4]:
df_t=pd.get_dummies(df, columns=["Model",'Fuel_type'])
df_t['Kms']=StandardScaler().fit_transform(df_t[['Km']])
df_t['Years']=StandardScaler().fit_transform(df_t[['Year']])
df_t['Powers']=StandardScaler().fit_transform(df_t[['Power']])
X =  df_t.drop(columns=['Price','Brand'])
y = df_t.loc[:,'Price']

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1/8, random_state=0)

In [6]:
X_train.shape,y_train.shape,X_test.shape,y_test.shape

((964, 14), (964,), (138, 14), (138,))

-------------------------------------------------------------------------------------------------------------------------------

### KNeighborsRegressor

In [7]:
from sklearn.neighbors import KNeighborsRegressor

In [8]:
clf =GridSearchCV(KNeighborsRegressor(weights='distance'),param_grid={
    'n_neighbors':range(2,35),'leaf_size':range(5,15)},scoring='neg_mean_absolute_error',
    cv=5, return_train_score=False) 

In [9]:
clf.fit(X_train,y_train)
results=pd.DataFrame(clf.cv_results_)

In [10]:
results

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_leaf_size,param_n_neighbors,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.007999,0.001897,0.002999,0.000898,5,2,"{'leaf_size': 5, 'n_neighbors': 2}",-9872.503546,-8662.6301,-9034.211748,-9668.288416,-10602.49885,-9568.026532,674.921121,209
1,0.005736,0.00528,0.0008,0.00098,5,3,"{'leaf_size': 5, 'n_neighbors': 3}",-9647.970936,-8612.438688,-9006.398238,-9883.115096,-10632.829258,-9556.550443,702.48028,169
2,0.0,0.0,0.00625,0.007654,5,4,"{'leaf_size': 5, 'n_neighbors': 4}",-9792.173014,-8500.37835,-8893.709569,-9817.173024,-10521.799856,-9505.046763,720.658439,19
3,0.003125,0.006249,0.003125,0.00625,5,5,"{'leaf_size': 5, 'n_neighbors': 5}",-9713.973331,-8427.458778,-8853.332012,-9989.53379,-10447.341705,-9486.327923,741.683727,9
4,0.006249,0.007654,0.003125,0.00625,5,6,"{'leaf_size': 5, 'n_neighbors': 6}",-9760.987518,-8452.823169,-8893.297221,-10107.644022,-10541.723136,-9551.295013,771.238552,153
5,0.00625,0.007654,0.0,0.0,5,7,"{'leaf_size': 5, 'n_neighbors': 7}",-9847.651993,-8518.6822,-8947.583206,-10115.702414,-10569.850053,-9599.893973,756.675018,261
6,0.003125,0.00625,0.00625,0.007654,5,8,"{'leaf_size': 5, 'n_neighbors': 8}",-9901.545835,-8522.93377,-8962.814312,-10199.7386,-10515.417781,-9620.490059,755.325631,299
7,0.006246,0.00765,0.003125,0.006249,5,9,"{'leaf_size': 5, 'n_neighbors': 9}",-9851.580378,-8493.332776,-8989.197369,-10187.755326,-10649.421747,-9634.257519,787.649583,317
8,0.00625,0.007654,0.0,0.0,5,10,"{'leaf_size': 5, 'n_neighbors': 10}",-9776.617393,-8495.052707,-8957.932764,-10272.392056,-10695.419495,-9639.482883,813.463339,321
9,0.009374,0.007654,0.0,0.0,5,11,"{'leaf_size': 5, 'n_neighbors': 11}",-9726.058844,-8508.91162,-8973.099997,-10239.424018,-10672.439128,-9623.986721,794.415539,301


###  DecisionTreeRegressor

In [11]:
from sklearn.tree import DecisionTreeRegressor

In [12]:
clf =GridSearchCV(DecisionTreeRegressor(),param_grid={
    'criterion':('mse','friedman_mse','mae'),'splitter':('best','random'),'max_depth':range(5,30,1)},
    scoring='neg_mean_absolute_error', cv=5, return_train_score=False) 

In [13]:
clf.fit(X_train,y_train)
results=pd.DataFrame(clf.cv_results_)

In [14]:
results.sort_values(by='rank_test_score',ascending=True)

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_criterion,param_max_depth,param_splitter,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
61,0.003125,0.006249332,0.003125,0.006249428,friedman_mse,10,random,"{'criterion': 'friedman_mse', 'max_depth': 10,...",-4212.790415,-4090.521447,-4000.492894,-4853.456081,-4376.418351,-4306.735838,301.03748,1
9,0.0,0.0,0.003124,0.006248951,mse,9,random,"{'criterion': 'mse', 'max_depth': 9, 'splitter...",-4335.951382,-4166.541991,-4358.398075,-4446.133902,-4404.220794,-4342.249229,95.714139,2
109,0.018753,0.006256986,0.0,0.0,mae,9,random,"{'criterion': 'mae', 'max_depth': 9, 'splitter...",-3822.556995,-4467.292746,-4875.095855,-4353.976684,-4476.973958,-4399.179248,338.14925,3
105,0.018749,0.006248403,0.0,0.0,mae,7,random,"{'criterion': 'mae', 'max_depth': 7, 'splitter...",-4397.049223,-4330.367876,-4230.360104,-4670.611399,-4531.835938,-4432.044908,154.345001,4
107,0.021874,0.007653526,0.0,0.0,mae,8,random,"{'criterion': 'mae', 'max_depth': 8, 'splitter...",-4432.748705,-4176.158031,-4644.34456,-4378.227979,-4539.75,-4434.245855,158.127897,5
104,0.031244,7.226876e-06,0.003125,0.006249523,mae,7,best,"{'criterion': 'mae', 'max_depth': 7, 'splitter...",-4624.689119,-4015.660622,-4361.968912,-4675.963731,-4531.359375,-4441.928352,238.440623,6
5,0.003125,0.006249714,0.0,0.0,mse,7,random,"{'criterion': 'mse', 'max_depth': 7, 'splitter...",-4844.342777,-4140.648854,-4023.346208,-4723.146492,-4485.384208,-4443.373708,319.023299,7
106,0.034372,0.006249428,0.0,0.0,mae,8,best,"{'criterion': 'mae', 'max_depth': 8, 'splitter...",-4436.134715,-4184.836788,-4421.282383,-4753.393782,-4430.104167,-4445.150367,180.916839,8
58,0.003125,0.006249523,0.0,0.0,friedman_mse,9,best,"{'criterion': 'friedman_mse', 'max_depth': 9, ...",-4568.417003,-4293.68844,-4165.841789,-4745.797692,-4507.243499,-4456.197685,204.833013,9
108,0.037498,0.007654344,0.0,0.0,mae,9,best,"{'criterion': 'mae', 'max_depth': 9, 'splitter...",-4251.443005,-4009.090674,-4495.251295,-4918.012953,-4659.515625,-4466.66271,315.4103,10


In [15]:
dtree = DecisionTreeRegressor()

In [16]:
cross_val_score(dtree,X_train,y_train,cv=10,scoring='neg_mean_absolute_error').mean()

-4756.243278565292

###  RandomForestRegressor

In [17]:
from sklearn.ensemble import RandomForestRegressor

In [18]:
clf =GridSearchCV(DecisionTreeRegressor(),param_grid={
    'criterion':('friedman_mse','mae'),'splitter':('best','random'),'max_depth':range(5,30,3)},
    scoring='neg_mean_absolute_error', cv=5, return_train_score=False) 

In [19]:
clf.fit(X_train,y_train)
results=pd.DataFrame(clf.cv_results_)

In [20]:
results.sort_values(by='rank_test_score',ascending=True)

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_criterion,param_max_depth,param_splitter,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
20,0.037493,0.007649,0.0,0.0,mae,8,best,"{'criterion': 'mae', 'max_depth': 8, 'splitter...",-4469.352332,-4264.42228,-4273.401554,-4787.466321,-4464.169271,-4451.762352,189.773218,1
2,0.002723,0.001463,0.0006,0.00049,friedman_mse,8,best,"{'criterion': 'friedman_mse', 'max_depth': 8, ...",-4742.421826,-4410.712965,-4201.106363,-4633.501879,-4385.124487,-4474.573504,191.762626,2
23,0.024513,0.008207,0.000404,0.000495,mae,11,random,"{'criterion': 'mae', 'max_depth': 11, 'splitte...",-4457.30829,-4396.518135,-4531.766839,-4946.253886,-4307.302083,-4527.829847,221.784908,3
5,0.006249,0.007653,0.0,0.0,friedman_mse,11,random,"{'criterion': 'friedman_mse', 'max_depth': 11,...",-4366.752669,-4521.2088,-4394.72842,-4396.663611,-4972.884755,-4530.447651,227.574108,4
3,0.003125,0.006249,0.0,0.0,friedman_mse,8,random,"{'criterion': 'friedman_mse', 'max_depth': 8, ...",-4429.156944,-4410.072809,-4283.498518,-4898.228494,-4928.765997,-4589.944553,269.055619,5
22,0.036002,0.009509,0.00625,0.007654,mae,11,best,"{'criterion': 'mae', 'max_depth': 11, 'splitte...",-4419.670984,-4160.341969,-4446.834197,-5014.176166,-5000.447917,-4608.294247,340.832498,6
4,0.006254,0.00766,0.0,0.0,friedman_mse,11,best,"{'criterion': 'friedman_mse', 'max_depth': 11,...",-4987.175783,-4413.05351,-4152.985433,-4930.686894,-4665.95113,-4629.97055,314.289136,7
21,0.018752,0.006248,0.0,0.0,mae,8,random,"{'criterion': 'mae', 'max_depth': 8, 'splitter...",-4825.831606,-4856.854922,-4542.129534,-4551.018135,-4474.802083,-4650.127256,158.640089,8
15,0.003125,0.00625,0.0,0.0,friedman_mse,26,random,"{'criterion': 'friedman_mse', 'max_depth': 26,...",-4653.265112,-4605.880829,-4269.091537,-5219.508636,-4560.863542,-4661.721931,309.401614,9
0,0.004596,0.001356,0.0016,0.00049,friedman_mse,5,best,"{'criterion': 'friedman_mse', 'max_depth': 5, ...",-4967.728348,-4436.449855,-4988.384025,-4964.12477,-4454.235101,-4762.18442,258.893902,10


### Gradient Boosting

In [21]:
from sklearn.ensemble import GradientBoostingRegressor

In [22]:
clf =GridSearchCV(GradientBoostingRegressor(loss='huber',learning_rate=0.1),param_grid={
    'n_estimators':range(75,120,10),'criterion':('friedman_mse','mae'),'max_depth':range(1,7,3)},
    scoring='neg_mean_absolute_error', cv=5, return_train_score=False) 

In [23]:
clf.fit(X_train,y_train)
results=pd.DataFrame(clf.cv_results_)

In [24]:
results.sort_values(by='rank_test_score',ascending=True)

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_criterion,param_max_depth,param_n_estimators,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
6,0.383797,0.033035,0.001797,0.000972,friedman_mse,4,85,"{'criterion': 'friedman_mse', 'max_depth': 4, ...",-3839.195082,-3786.98132,-3807.88268,-3909.629224,-3827.89159,-3834.315979,41.656818,1
8,0.388584,0.008024,0.003125,0.00625,friedman_mse,4,105,"{'criterion': 'friedman_mse', 'max_depth': 4, ...",-3818.015992,-3871.40129,-3811.117678,-3838.176728,-3881.723013,-3844.08694,28.157219,2
7,0.391705,0.018677,0.000799,0.000748,friedman_mse,4,95,"{'criterion': 'friedman_mse', 'max_depth': 4, ...",-3883.404157,-3842.398322,-3807.179453,-3852.642806,-3865.842452,-3850.293438,25.552924,3
9,0.489599,0.069331,0.0004,0.0008,friedman_mse,4,115,"{'criterion': 'friedman_mse', 'max_depth': 4, ...",-3869.797202,-3889.891138,-3784.006553,-3830.211068,-3878.373879,-3850.455968,38.817538,4
5,0.296237,0.022844,0.001603,0.001359,friedman_mse,4,75,"{'criterion': 'friedman_mse', 'max_depth': 4, ...",-3891.664334,-3798.997089,-3846.094584,-3888.387797,-3834.766076,-3851.981976,34.752036,5
16,2.354776,0.046551,0.0,0.0,mae,4,85,"{'criterion': 'mae', 'max_depth': 4, 'n_estima...",-3828.0538,-3789.418601,-3840.228054,-3997.542053,-3966.364212,-3884.321344,82.056988,6
15,2.12431,0.083296,0.0,0.0,mae,4,75,"{'criterion': 'mae', 'max_depth': 4, 'n_estima...",-3878.869745,-3767.047748,-3847.110227,-3997.630347,-3938.91801,-3885.915215,78.707697,7
17,2.707184,0.043196,0.003125,0.00625,mae,4,95,"{'criterion': 'mae', 'max_depth': 4, 'n_estima...",-3961.600838,-3858.746272,-3770.095073,-4048.415119,-3865.40611,-3900.852682,95.490285,8
18,2.971502,0.051655,0.00625,0.007654,mae,4,105,"{'criterion': 'mae', 'max_depth': 4, 'n_estima...",-4102.785996,-3813.135162,-3831.361941,-3995.725974,-3952.410075,-3939.08383,107.379135,9
19,3.46131,0.189666,0.0004,0.0008,mae,4,115,"{'criterion': 'mae', 'max_depth': 4, 'n_estima...",-3857.040133,-3951.752017,-3886.706768,-4100.5359,-3954.253682,-3950.0577,84.059234,10


### Linear Regression

In [8]:
from sklearn.linear_model import LinearRegression

In [9]:
reg = LinearRegression()

In [10]:
cross_val_score(reg,X_train,y_train,cv=5,scoring='neg_mean_absolute_error').mean()

-5160.294813399473