In [1]:
import numpy as np
import pandas as pd
import pickle


#### Reading original data set

In [2]:
df=pd.read_csv(r'CAR DETAILS.csv')
df.head()

Unnamed: 0,name,year,selling_price,km_driven,fuel,seller_type,transmission,owner
0,Maruti 800 AC,2007,60000,70000,Petrol,Individual,Manual,First Owner
1,Maruti Wagon R LXI Minor,2007,135000,50000,Petrol,Individual,Manual,First Owner
2,Hyundai Verna 1.6 SX,2012,600000,100000,Diesel,Individual,Manual,First Owner
3,Datsun RediGO T Option,2017,250000,46000,Petrol,Individual,Manual,First Owner
4,Honda Amaze VX i-DTEC,2014,450000,141000,Diesel,Individual,Manual,Second Owner


In [3]:
df.duplicated().sum()
df.drop_duplicates(inplace=True)
df.shape

(3577, 8)

#### Aplying same proceducre that done while building model

In [4]:
car_names=list(df['name'])
brand,model,sub_class=[],[],[]
for car in car_names:
    parts=car.split()
    x=parts[0]
    y=parts[1]
    z=parts[2:]
    brand.append(x)
    model.append(y)
    sub_class.append(z)
    
print(len(brand))
print(len(model))
print(len(sub_class))

3577
3577
3577


In [5]:
sub_class=[' '.join(map(str, item)) for item in sub_class]
df['brand']=brand
df['model']=model
df['sub_class']=sub_class
df.head()

Unnamed: 0,name,year,selling_price,km_driven,fuel,seller_type,transmission,owner,brand,model,sub_class
0,Maruti 800 AC,2007,60000,70000,Petrol,Individual,Manual,First Owner,Maruti,800,AC
1,Maruti Wagon R LXI Minor,2007,135000,50000,Petrol,Individual,Manual,First Owner,Maruti,Wagon,R LXI Minor
2,Hyundai Verna 1.6 SX,2012,600000,100000,Diesel,Individual,Manual,First Owner,Hyundai,Verna,1.6 SX
3,Datsun RediGO T Option,2017,250000,46000,Petrol,Individual,Manual,First Owner,Datsun,RediGO,T Option
4,Honda Amaze VX i-DTEC,2014,450000,141000,Diesel,Individual,Manual,Second Owner,Honda,Amaze,VX i-DTEC


In [6]:
df.drop(['name','sub_class'],axis=1,inplace=True)
df.head()

Unnamed: 0,year,selling_price,km_driven,fuel,seller_type,transmission,owner,brand,model
0,2007,60000,70000,Petrol,Individual,Manual,First Owner,Maruti,800
1,2007,135000,50000,Petrol,Individual,Manual,First Owner,Maruti,Wagon
2,2012,600000,100000,Diesel,Individual,Manual,First Owner,Hyundai,Verna
3,2017,250000,46000,Petrol,Individual,Manual,First Owner,Datsun,RediGO
4,2014,450000,141000,Diesel,Individual,Manual,Second Owner,Honda,Amaze


In [7]:
df['selling_price']=np.where(df["selling_price"]>2675000.0,2675000.0,df['selling_price'])
df['km_driven']=np.where(df['km_driven']>223158.4,223158.4,df['km_driven'])
df['selling_price']=np.where(df["selling_price"]<51786.64,51786.64,df['selling_price'])
df['km_driven']=np.where(df['km_driven']<1744.08,1744.08,df['km_driven'])

In [8]:
from sklearn.preprocessing import LabelEncoder
lb=LabelEncoder()
df['model_enc']=lb.fit_transform(df['model'])

In [9]:
df.drop(['model'],axis=1,inplace=True)
df.head()

Unnamed: 0,year,selling_price,km_driven,fuel,seller_type,transmission,owner,brand,model_enc
0,2007,60000.0,70000.0,Petrol,Individual,Manual,First Owner,Maruti,6
1,2007,135000.0,50000.0,Petrol,Individual,Manual,First Owner,Maruti,164
2,2012,600000.0,100000.0,Diesel,Individual,Manual,First Owner,Hyundai,161
3,2017,250000.0,46000.0,Petrol,Individual,Manual,First Owner,Datsun,125
4,2014,450000.0,141000.0,Diesel,Individual,Manual,Second Owner,Honda,17


In [10]:
df_encoded=pd.get_dummies(df)
df_encoded.head()

Unnamed: 0,year,selling_price,km_driven,model_enc,fuel_CNG,fuel_Diesel,fuel_Electric,fuel_LPG,fuel_Petrol,seller_type_Dealer,...,brand_Mercedes-Benz,brand_Mitsubishi,brand_Nissan,brand_OpelCorsa,brand_Renault,brand_Skoda,brand_Tata,brand_Toyota,brand_Volkswagen,brand_Volvo
0,2007,60000.0,70000.0,6,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
1,2007,135000.0,50000.0,164,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
2,2012,600000.0,100000.0,161,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,2017,250000.0,46000.0,125,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
4,2014,450000.0,141000.0,17,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


#### selecting 20 data samples

In [11]:
samples=df_encoded.sample(20,random_state=67)
samples

Unnamed: 0,year,selling_price,km_driven,model_enc,fuel_CNG,fuel_Diesel,fuel_Electric,fuel_LPG,fuel_Petrol,seller_type_Dealer,...,brand_Mercedes-Benz,brand_Mitsubishi,brand_Nissan,brand_OpelCorsa,brand_Renault,brand_Skoda,brand_Tata,brand_Toyota,brand_Volkswagen,brand_Volvo
1047,2013,500000.0,60000.0,52,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1048,2010,400000.0,130000.0,93,0,1,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
1224,2018,780000.0,50000.0,162,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1865,2006,160000.0,63230.0,14,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
4044,2014,285000.0,64000.0,14,0,0,0,0,1,1,...,0,0,0,0,0,0,0,0,0,0
3753,2014,600000.0,87000.0,52,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1316,2015,275000.0,60000.0,164,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
2767,2004,100000.0,70000.0,14,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
244,2014,1050000.0,70000.0,85,0,1,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
1424,2011,300000.0,118700.0,183,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [12]:
with open('final.pkl', 'rb') as model_file:
    rf = pickle.load(model_file)

### predicting the out put

In [13]:
x=samples.drop('selling_price',axis=1)
y=samples['selling_price']

In [14]:
ypred=rf.predict(x)
print(ypred)

[435116.95309734 533414.79816017 810282.23554869 111278.77194306
 246596.61720535 579846.74507736 302994.01482073 101372.54689361
 942593.28201533 277099.72010191 132436.07817869 331316.78469197
 461268.04474557 388059.52228488 509608.58539988 352380.01286412
 882500.78483235 327842.45009856 523081.01114556 555894.01180167]


In [15]:
samples['y_predict']=ypred

In [16]:
out_put=samples[['selling_price','y_predict']].round(2)
out_put

Unnamed: 0,selling_price,y_predict
1047,500000.0,435116.95
1048,400000.0,533414.8
1224,780000.0,810282.24
1865,160000.0,111278.77
4044,285000.0,246596.62
3753,600000.0,579846.75
1316,275000.0,302994.01
2767,100000.0,101372.55
244,1050000.0,942593.28
1424,300000.0,277099.72
