In [1]:
import numpy as np 
import pandas as pd 

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [2]:
df=pd.read_csv('Clean_car.csv')

In [3]:
df.head()

Unnamed: 0.1,Unnamed: 0,model,vehicle_age,km_driven,seller_type,fuel_type,transmission_type,mileage,engine,max_power,seats,selling_price
0,0,Alto,9,120000,Individual,Petrol,Manual,19.7,796,46.3,5,120000
1,1,Grand,5,20000,Individual,Petrol,Manual,18.9,1197,82.0,5,550000
2,2,i20,11,60000,Individual,Petrol,Manual,17.0,1197,80.0,5,215000
3,3,Alto,9,37000,Individual,Petrol,Manual,20.92,998,67.1,5,226000
4,4,Ecosport,6,30000,Dealer,Diesel,Manual,22.77,1498,98.59,5,570000


In [6]:
df.drop('Unnamed: 0',axis=1,inplace=True)

In [7]:
x=df.drop('selling_price',axis=1)

In [8]:
y=df['selling_price']

In [10]:
from sklearn.preprocessing import LabelEncoder
le=LabelEncoder()
x['model']=le.fit_transform(x['model'])

## One Hot Encoding

In [12]:
num_features = x.select_dtypes(exclude="object").columns
onehot_columns = ['seller_type','fuel_type','transmission_type']

from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer

numeric_transformer = StandardScaler()
oh_transformer = OneHotEncoder(drop='first')

preprocessor = ColumnTransformer(
    [
        ("OneHotEncoder", oh_transformer, onehot_columns),
        ("StandardScaler", numeric_transformer, num_features)
        
    ],remainder='passthrough'
    
)

x=preprocessor.fit_transform(x)
pd.DataFrame(x)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13
0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,-1.519714,0.983562,1.247335,-0.000276,-1.324259,-1.263352,-0.403022
1,1.0,0.0,0.0,0.0,0.0,1.0,1.0,-0.225693,-0.343933,-0.690016,-0.192071,-0.554718,-0.432571,-0.403022
2,1.0,0.0,0.0,0.0,0.0,1.0,1.0,1.536377,1.647309,0.084924,-0.647583,-0.554718,-0.479113,-0.403022
3,1.0,0.0,0.0,0.0,0.0,1.0,1.0,-1.519714,0.983562,-0.360667,0.292211,-0.936610,-0.779312,-0.403022
4,0.0,0.0,1.0,0.0,0.0,0.0,1.0,-0.666211,-0.012060,-0.496281,0.735736,0.022918,-0.046502,-0.403022
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15406,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.508844,0.983562,-0.869744,0.026096,-0.767733,-0.757204,-0.403022
15407,0.0,0.0,0.0,0.0,0.0,1.0,1.0,-0.556082,-1.339555,-0.728763,-0.527711,-0.216964,-0.220803,2.073444
15408,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.407551,-0.012060,0.220539,0.344954,0.022918,0.068225,-0.403022
15409,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.426247,-0.343933,72.541850,-0.887326,1.329794,0.917158,2.073444


## Train Test Split

In [13]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.3,random_state=42)

## Model Training & Testing

In [16]:
from sklearn.ensemble import AdaBoostRegressor

regressor=AdaBoostRegressor()

regressor.fit(x_train,y_train)

In [18]:
y_pred=regressor.predict(x_test)
x_train_pred=regressor.predict(x_train)

In [19]:
from sklearn.metrics import mean_absolute_error,mean_squared_error,r2_score

In [22]:
mae=mean_absolute_error(y_train,x_train_pred)
mse=mean_squared_error(y_train,x_train_pred)
r2=r2_score(y_train,x_train_pred)
print('For Training Set')
print('Mean Absolute Erroe: ',mae)
print('Mean Squared Error: ',mse)
print('R2 Score: ',r2)

print("-----------------------------")

mae=mean_absolute_error(y_test,y_pred)
mse=mean_squared_error(y_test,y_pred)
r2=r2_score(y_test,y_pred)
print('For Testing Set')
print('Mean Absolute Erroe: ',mae)
print('Mean Squared Error: ',mse)
print('R2 Score: ',r2)

For Training Set
Mean Absolute Erroe:  362562.2434811738
Mean Squared Error:  214041059596.349
R2 Score:  0.7388401212552452
-----------------------------
For Testing Set
Mean Absolute Erroe:  386301.29770803533
Mean Squared Error:  258930722249.42322
R2 Score:  0.65570621734139


## Hyper Parameter Tuning

In [23]:
from sklearn.model_selection import GridSearchCV

In [25]:
params={
    'n_estimators':[50,60,70,80],
    'loss':['linear','square','exponential']
}

params

{'n_estimators': [50, 60, 70, 80], 'loss': ['linear', 'square', 'exponential']}

In [26]:
grid=GridSearchCV(estimator=regressor,param_grid=params,verbose=2,n_jobs=-1)

In [27]:
grid.fit(x_train,y_train)

Fitting 5 folds for each of 12 candidates, totalling 60 fits


In [28]:
grid.best_params_

{'loss': 'square', 'n_estimators': 70}

In [29]:
grid.best_index_

6

In [30]:
y_pred=grid.predict(x_test)
x_train_pred=grid.predict(x_train)

In [31]:
mae=mean_absolute_error(y_train,x_train_pred)
mse=mean_squared_error(y_train,x_train_pred)
r2=r2_score(y_train,x_train_pred)
print('For Training Set For HyperParameter Tuning')
print('Mean Absolute Erroe: ',mae)
print('Mean Squared Error: ',mse)
print('R2 Score: ',r2)

print("-----------------------------")

mae=mean_absolute_error(y_test,y_pred)
mse=mean_squared_error(y_test,y_pred)
r2=r2_score(y_test,y_pred)
print('For Testing Set For HyperParameter Tuning')
print('Mean Absolute Erroe: ',mae)
print('Mean Squared Error: ',mse)
print('R2 Score: ',r2)

For Training Set For HyperParameter Tuning
Mean Absolute Erroe:  297246.3575563342
Mean Squared Error:  166442035788.59772
R2 Score:  0.7969175541993917
-----------------------------
For Testing Set For HyperParameter Tuning
Mean Absolute Erroe:  321378.0498071881
Mean Squared Error:  233808995835.09164
R2 Score:  0.6891099561444405
