In [None]:
import numpy as np
import pandas as pd
import scipy
from scipy.stats import zscore
import matplotlib.pyplot as plt
import sklearn
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import power_transform
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import SGDRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import cross_val_score as cvs
from sklearn.model_selection import GridSearchCV
import seaborn as sn
import warnings
warnings.filterwarnings('ignore')

In [None]:
df = pd.read_csv('Data/CarPrice.csv')
df.head()

Unnamed: 0,Brand & Model,Varient,Fuel Type,Driven Kilometers,Transmission,Owner,Location,Date of Posting Ad,Price (in ₹)
0,Mahindra Xuv500 (2013),W8 Dual Tone,DIESEL,"58,000 KM",MANUAL,1st Owner,"Pitampura, Delhi",01/27/2022,435000
1,Hyundai Creta (2020),1.6 SX Option Executive Diesel,DIESEL,43861.0 KM,MANUAL,1st Owner,"Ahiritola, Kolkata",01/23/2022,1165101
2,Hyundai Verna (2019),VTVT 1.4 EX,PETROL,"17,000 KM",MANUAL,2nd Owner,"Chelavoor, Pantheeramkavu",01/25/2022,815000
3,Datsun Redigo (2020),D,PETROL,10000 KM,MANUAL,1st Owner,"Palam, Delhi",01/13/2022,270000
4,Hyundai I10 (2011),Sportz 1.1 iRDE2,PETROL,70000 KM,MANUAL,1st Owner,"Dwarka Sector 13, Delhi",01/13/2022,185000


In [None]:
df.shape

(5050, 9)

In [None]:
df.dtypes

Brand & Model         object
Varient               object
Fuel Type             object
Driven Kilometers     object
Transmission          object
Owner                 object
Location              object
Date of Posting Ad    object
Price (in ₹)           int64
dtype: object

In [None]:
df = df.dropna(subset=['Varient'])

In [None]:
df.isnull().sum()

Brand & Model         0
Varient               0
Fuel Type             0
Driven Kilometers     0
Transmission          0
Owner                 0
Location              0
Date of Posting Ad    0
Price (in ₹)          0
dtype: int64

In [None]:
le = LabelEncoder()
for column in df.drop(['Price (in ₹)'],axis=1).columns:
    df[column]=le.fit_transform(df[column])
df

Unnamed: 0,Brand & Model,Varient,Fuel Type,Driven Kilometers,Transmission,Owner,Location,Date of Posting Ad,Price (in ₹)
0,266,143,3,537,10,1,123,31,435000
1,243,22,3,415,10,1,2,28,1165101
2,255,130,6,156,10,2,33,30,815000
3,226,79,6,79,10,1,115,20,270000
4,246,121,6,650,10,1,42,20,185000
...,...,...,...,...,...,...,...,...,...
5045,355,87,5,479,6,1,6,28,50000
5046,369,56,4,95,12,1,81,28,320000
5047,348,122,7,467,6,1,126,28,599000
5048,350,131,7,580,12,1,87,31,395000


In [None]:
x= df.drop(['Price (in ₹)'],axis=1)
y=df['Price (in ₹)']

In [None]:
x=power_transform(x,method='yeo-johnson')
scale = StandardScaler()
x=scale.fit_transform(x)

In [None]:
xtrain, xtest, ytrain, ytest = train_test_split(x, y, test_size = 0.3, random_state = 45)


In [None]:
import time as time

In [None]:
start_time_cpu = time.time()
knr = KNeighborsRegressor()
knr.fit(xtrain,ytrain)
pred_train_knr=knr.predict(xtrain)
pred_test_knr=knr.predict(xtest)
print('K Neighbors Regressor r2_score:',r2_score(ytest,pred_test_knr))
end_time_cpu = time.time()
execution_time_cpu = end_time_cpu - start_time_cpu
print("Execution time on CPU: {} seconds".format(execution_time_cpu))

K Neighbors Regressor r2_score: 0.5272520951788017
Execution time on CPU: 0.14844274520874023 seconds


In [None]:
start_time_cpu = time.time()
dtr=DecisionTreeRegressor(criterion='absolute_error')
dtr.fit(xtrain,ytrain)
pred_train_dtr=dtr.predict(xtrain)
pred_test_dtr=dtr.predict(xtest)
print('Decision Tree Regressor r2_score:',r2_score(ytest,pred_test_dtr))
end_time_cpu = time.time()
execution_time_cpu = end_time_cpu - start_time_cpu
print("Execution time on CPU: {} seconds".format(execution_time_cpu))

Decision Tree Regressor r2_score: 0.7960999430928439
Execution time on CPU: 0.6699047088623047 seconds


In [None]:
start_time_cpu = time.time()
rf=RandomForestRegressor()
rf.fit(xtrain,ytrain)
pred_train_rf=rf.predict(xtrain)
pred_test_rf=rf.predict(xtest)
print('Random Forest Regressor r2_score:',r2_score(ytest,pred_test_rf))
end_time_cpu = time.time()
execution_time_cpu = end_time_cpu - start_time_cpu
print("Execution time on CPU: {} seconds".format(execution_time_cpu))

Random Forest Regressor r2_score: 0.8792100810146755
Execution time on CPU: 2.9708855152130127 seconds


In [None]:
parameter = { 'bootstrap': [True, False],
 'max_features': ['auto', 'sqrt'],
 'min_samples_leaf': [1, 2, 4],
 'min_samples_split': [2, 5, 10],}

start_time_cpu = time.time()
gvc = GridSearchCV(RandomForestRegressor(),parameter,cv=5)
gvc.fit(xtrain,ytrain)
gvc.best_params_
end_time_cpu = time.time()
execution_time_cpu = end_time_cpu - start_time_cpu
print("Execution time on CPU: {} seconds".format(execution_time_cpu))

Execution time on CPU: 175.23037910461426 seconds


In [None]:
start_time_cpu = time.time()
pricecar = RandomForestRegressor(bootstrap=False,min_samples_leaf=1,max_features='auto',min_samples_split=2, n_estimators=1000)
pricecar.fit(xtrain,ytrain)
pred=pricecar.predict(xtest)
acc=r2_score(ytest,pred)
print('Score of Hyper Parameter Tuned Ranfom Forest Regressor is:',acc)
end_time_cpu = time.time()
execution_time_cpu = end_time_cpu - start_time_cpu
print("Execution time on CPU: {} seconds".format(execution_time_cpu))

Score of Hyper Parameter Tuned Ranfom Forest Regressor is: 0.8969149705693856
Execution time on CPU: 20.40273666381836 seconds


In [None]:
start_time_cpu = time.time()
sgd=SGDRegressor()
sgd.fit(xtrain,ytrain)
pred_train_sgd=sgd.predict(xtrain)
pred_test_sgd=sgd.predict(xtest)

print('SGD Regressor r2_score:',r2_score(ytest,pred_test_sgd))
end_time_cpu = time.time()
execution_time_cpu = end_time_cpu - start_time_cpu
print("Execution time on CPU: {} seconds".format(execution_time_cpu))

SGD Regressor r2_score: 0.05446869988537639
Execution time on CPU: 0.07907366752624512 seconds


In [None]:
from sklearn.ensemble import AdaBoostRegressor
regr = AdaBoostRegressor(random_state=42, n_estimators=1000)
regr.fit(xtrain, ytrain)
pred_test_ada=regr.predict(xtest)
print('ada Regressor r2_score:',r2_score(ytest,pred_test_ada))

ada Regressor r2_score: -0.2064115958500854
