In [None]:
import pandas as pd
import numpy as np


from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.ensemble import (GradientBoostingRegressor, RandomForestRegressor)
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import warnings
warnings.filterwarnings("ignore")



In [None]:
def evaluation(model, X, y):
    y_pred = model.predict(X)
    y_base = np.ones(len(y)) * y.median()
    rmse_model = np.sqrt(mean_squared_error(y, y_pred))
    rmse_base = np.sqrt(mean_squared_error(y, y_base))

    print('RMSE of model : {:2.2f}'.format(np.sqrt(mean_squared_error(y, y_pred))))
    print('R2 of model : {:2.2f}'.format(r2_score(y, y_pred)))
    print('Base RMSE: {}'.format(rmse_base))
    print('Error reduction (RMSE of predict/RMSE of base) : {:2.2f}'.format(rmse_model / rmse_base))
    return rmse_model


In [None]:
raw_df = pd.read_csv('training_data.csv')


In [None]:
df = raw_df[(raw_df.engineDisplacement >= 660)
          & (raw_df.engineDisplacement <= 4600)
          & (raw_df.Year >= 1985) & (raw_df.Price <= 70000000)]


In [None]:
df1 = df.drop(['City', '_id'], axis=1)
df = df1
df['Year'] = df['Year'].astype(str)


In [None]:
for column in df.columns:
    if column not in ['engineDisplacement', 'Price', 'Mileage']:
        dummy = pd.get_dummies(df[column], prefix=column)
        df = pd.concat([df, dummy], axis=1)

In [None]:
df1 = df.drop(['Make', 'Model', 'Year', 'fuelType', 'Transmission'], axis=1)
y = df1['Price']
X = df1.drop(['Price'], axis=1)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=15)


In [None]:
imp_feat_perm = ['Model_Vios','Model_Coo','Model_Cedric','Model_Element','Model_Q5'
,'Model_X2','Model_Solio','Model_Caldina','Model_2 Series'
,'Model_Concerto','Model_Benz Smart','Model_1 Series','Model_Benz 220'
,'Model_Murrano','Model_Spade','Model_i8','Model_Samuari','Model_Santa Fe'
,'Model_Palette Sw','Model_Serena','Model_Charmant','Model_Beat'
,'Model_Nv200 Wagon','Model_Benz Series','Model_Solio Bandit'
,'Transmission_manual','Model_Accent','Model_Twin','Model_Kizashi'
,'Model_Ractis','Model_Cappuccino','Model_Pulsar','Model_Thats'
,'Model_YRV','Model_IST','Model_Fuga','Model_Wake','Model_Today'
,'Model_Cresta','Model_Micra','Model_Zest Spark','Model_Aerio'
,'Model_Verossa','Model_Matrix','Model_Benz Forfour','Model_Pixis Space'
,'Model_H1','Model_Move Conte','Model_N Plus','Model_120 Y','Model_Gaia'
,'Model_Pino','Model_Zest','Model_Besturn','Model_Palette','Model_Ignis'
,'Model_Path Finder','Model_Jiaxing Mpvs','Model_Mira Gino','Model_Storia'
,'Model_Cefiro','Model_Sonica','Model_Opti','Model_N Box'
,'Model_Alto Lapin','Model_Jazz','Model_Integra','Model_Otti'
,'Model_Roomy','Model_Sprinter','Model_Carina','Model_Kix','Model_Lucida'
,'Model_Starlet','Model_Spacia','Model_Corolla Cross','Model_Nv350 Wagon'
,'Model_Bego','Model_N Slash','Model_Mira Cocoa','Model_Atrai Wagon'
,'Model_Grace','Model_Escudo','Model_Blue Bird','Model_HR-V'
,'Model_Benz Mb140','Model_Kei','Model_Inspire','Model_Sonata'
,'Model_Pixis Epoch','Model_Benz T','Model_Raum','Model_Caravan'
,'Model_X Trail','Model_B B','Model_Cervo','Model_Benz Vito','Model_Ferio'
,'Model_Van','Model_Auris','Model_Carrier','Model_Pixis Van','Model_Cami'
,'Model_Moco','Model_ISIS','Model_Carry','Model_Sj410','Model_Stream'
,'Model_Bluebird Sylphy','Model_Vamos','Model_Qashqai','Model_Sirion'
,'Model_Will','Model_iQ','Model_Mega Xtra','Model_March','Model_Life'
,'Model_N Custom','Model_Esse','Model_Wish','Model_i3','Model_Acty'
,'Model_86','Model_Civic Hybrid','Model_Roox','Model_Tanto','Model_Coupe'
,'Model_Spike','Model_Excel','Model_Terracan','Transmission_automatic'
,'Model_Tiida','Model_i10','Model_Vamos Hobio','Model_Avanza','Model_Z4'
,'Model_MR Wagon','Model_Shehzore','Model_X-PV','Model_Succeed'
,'Model_Dayz Star','Model_Dayz','Model_Santro','Model_Pickup','Model_Aygo'
,'Model_Benz D','Model_Patrol','Model_Fit Aria','Model_Ravi','Model_Cast'
,'Model_Potohar','Model_Copen','Model_Terios Kid','Model_Allion'
,'Model_N Wgn','Model_Porte','Model_Tt','Model_Qashqai +2','Model_Belta'
,'Model_Sirius','Model_AD Van','Model_N One','Model_Airwave'
,'Model_Celerio','Model_Esquire','Model_Jade','Model_A8','Model_Lite Ace'
,'Model_Benz Other','Model_Move','Model_Hustler','Model_Boon'
,'Model_Wingroad','Model_Q3','Model_Duet','Model_H-100','Model_Safari'
,'Model_Corolla Assista','Model_Navara','Model_CR-Z Hybrid'
,'Model_Grand Starex','Model_Ciaz','Model_CR-V','Model_Kluger'
,'Model_Town Ace','Model_Cross Road','Model_6 Series','Model_Tucson'
,'Model_Cuore','Model_Probox','Model_350Z','Model_Platz','Model_Mira'
,'Model_Cressida','Model_Sx4','Model_Ioniq','Model_Clipper','Model_Sienta'
,'Model_Rav4','Model_Estima','Model_Bolan','Model_Other','Model_Voxy'
,'Model_Benz Estate','fuelType_Lpg','Model_Every Wagon','Model_S660'
,'Model_Freed','Model_Jimny Sierra','Model_Fit','Model_Mark II'
,'Model_Every','Model_Benz Coupe','Model_Terios','Model_Passo'
,'Model_Juke','Model_Sai','Model_Alto','Model_Hijet','Model_Grace Hybrid'
,'Model_BR-V','Make_Daihatsu','Model_Celica','Model_Fj Cruiser'
,'Model_Baleno','Model_Benz Cabriolet','Model_Vitara','Model_Mark X'
,'Model_V2','Year_1989','Model_FX','fuelType_CNG','Model_Liana'
,'Model_Mehran','Model_Vitz','Model_Corona','Model_Tacoma','Model_Sunny'
,'Model_A5','Model_Rocky','Model_Note','Model_Surf','Model_Khyber'
,'Year_1987','Model_Noah','Model_Camry','Model_Jimny','Model_Yaris'
,'Model_Charade','Model_Corolla Fielder','Model_APV','Model_Aqua'
,'Year_1993','Year_1988','Model_3 Series','Year_1985','Model_e-tron'
,'Model_Margalla','Make_Nissan','Model_Prius Alpha','Model_Rush'
,'Make_FAW','Year_1995','Year_1990','Model_Insight Exclusive','Year_1999'
,'Model_5 Series','Year_1997','Year_1994','Model_Alphard Hybrid'
,'Make_Hyundai','Model_Swift','Model_Corolla Axio','Year_1991','Year_1996'
,'Model_Hiace','Make_Honda','Year_2001','Year_1992','Year_1998'
,'Year_2002','Year_1986','Model_Accord','Model_A4','Year_2000'
,'Model_Insight','Model_X1','Model_Cultus','Model_A6','Model_Premio'
,'Year_2004','Model_City','Model_Prius','Model_Q7','Model_A3'
,'Model_Coaster','Year_2007','Make_Suzuki','Model_Land Cruiser'
,'Model_Hilux','Model_Harrier','Year_2006','Model_X5 Series'
,'Model_Corolla','Year_2005','fuelType_Hybrid','Model_Crown'
,'Model_Fortuner','Year_2019','Model_Wagon R','Make_BMW','Year_2003'
,'Model_Civic','Year_2009','Model_Vezel','Make_Toyota','Year_2010'
,'Year_2011','Year_2008','Year_2020','Year_2012','Model_C-HR'
,'Model_Benz Class','Year_2018','fuelType_Petrol','fuelType_Diesel'
,'Year_2014','Year_2013','Year_2016','Year_2015','Year_2017'
,'Make_Mercedes Benz','Transmission_Automatic','Model_Prado'
,'Model_7 Series','Make_Audi','Transmission_Manual','Mileage'
,'engineDisplacement']

In [None]:
X_train_if=X_train[imp_feat_perm]
X_test_if=X_test[imp_feat_perm]

In [None]:
rfb = RandomForestRegressor(n_estimators= 400,min_samples_split= 10,min_samples_leaf= 1,max_features='sqrt',max_depth= 60,bootstrap= False)
rfb.fit(X_train, y_train)

evaluation(rfb,X_test,y_test)

RMSE of model : 640697.68
R2 of model : 0.94
Base RMSE: 2796631.776296845
Error reduction (RMSE of predict/RMSE of base) : 0.23


640697.6832887591

In [None]:
print(rfb.score(X_test,y_test))

0.9449980294636486


In [None]:
import pickle
Pkl_Filename = "Pickle_RL_Model.pkl"  

with open(Pkl_Filename, 'wb') as file:  
    pickle.dump(rfb, file)



In [None]:
# Load the Model back from file
with open(Pkl_Filename, 'rb') as file:  
    Pickled_RFB_Model = pickle.load(file)

Pickled_RFB_Model

RandomForestRegressor(bootstrap=False, ccp_alpha=0.0, criterion='mse',
                      max_depth=60, max_features='sqrt', max_leaf_nodes=None,
                      max_samples=None, min_impurity_decrease=0.0,
                      min_impurity_split=None, min_samples_leaf=1,
                      min_samples_split=10, min_weight_fraction_leaf=0.0,
                      n_estimators=400, n_jobs=None, oob_score=False,
                      random_state=None, verbose=0, warm_start=False)

In [None]:
score = Pickled_RFB_Model.score(X_test, y_test)  
# Print the Score
print("Test score: {0:.2f} %".format(100 * score))  

# Predict the Labels using the reloaded Model
Ypredict = Pickled_RFB_Model.predict(X_test)  

Ypredict

Test score: 94.50 %


array([ 547709.31556434,  834687.59894998,  420648.37115809, ...,
       1650718.66512346,  799237.53326565, 1310551.49592301])

In [None]:
import joblib

# save the model to disk
filename = 'finalized_model.sav'
joblib.dump(rfb, filename)

# some time later...

# load the model from disk
loaded_model = joblib.load(filename)
result = loaded_model.score(X_test, y_test)
print("Test score: {0:.2f} %".format(100 * result))

Test score: 94.50 %


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
