#                                                      Importing Modules

In [165]:
import pandas as pd
import numpy as np
import seaborn as sns
sns.set(rc={'figure.figsize':(20,20)})
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score

# Reading and exploring the data

In [166]:
df=pd.read_csv('Cars.csv')
df.info()
df.describe()
df.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 14741 entries, 0 to 14740
Data columns (total 12 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Unnamed: 0    14741 non-null  int64  
 1   Brand         14741 non-null  object 
 2   Model         14741 non-null  object 
 3   Body          14741 non-null  object 
 4   Color         14741 non-null  object 
 5   Year          14741 non-null  int64  
 6   Fuel          14741 non-null  object 
 7   Kilometers    14741 non-null  object 
 8   Engine        14741 non-null  object 
 9   Transmission  14741 non-null  object 
 10  Price         14741 non-null  float64
 11  Gov           14741 non-null  object 
dtypes: float64(1), int64(2), object(9)
memory usage: 1.3+ MB


Unnamed: 0.1,Unnamed: 0,Brand,Model,Body,Color,Year,Fuel,Kilometers,Engine,Transmission,Price,Gov
0,5337,Hyundai,Accent,Sedan,Black,2007,Benzine,140000 to 159999,1600 CC,Automatic,140.0,Giza
1,5338,Hyundai,Accent,Sedan,Silver,2005,Benzine,180000 to 199999,1000 - 1300 CC,Manual,78.0,Qena
2,5339,Hyundai,Accent,Sedan,Gray,1999,Benzine,140000 to 159999,1400 - 1500 CC,Manual,70.0,Giza
3,5340,Hyundai,Accent,Sedan,Blue- Navy Blue,2009,Benzine,140000 to 159999,1600 CC,Automatic,150.0,Cairo
4,5341,Hyundai,Accent,Sedan,Silver,2000,Benzine,10000 to 19999,1000 - 1300 CC,Manual,75.0,Giza


# Feature Engineering:

In [167]:
#Converting car year to age to see how old the car is:
df['this_year']= 2022
df['car_age']=df['this_year']-df['Year']

In [168]:
#Getting Unique and NULL Values 
print(df['Brand'].unique())
print(df['Model'].unique())
print(df['Body'].unique())
print(df['car_age'].unique())
print(df['Fuel'].unique())
print(df['Kilometers'].unique())
print(df['Engine'].unique())
print(df['Transmission'].unique())
print(df['Gov'].unique())

df.isnull().sum()

['Hyundai' 'Chevrolet' 'Fiat']
['Accent' 'Avante' 'I10' 'Elantra' 'Excel' 'Matrix' 'Tucson' 'Verna'
 'Cruze' 'Aveo' 'Lanos' 'Optra' '128' '131' 'Punto' 'Shahin' 'Tipo' 'Uno']
['Sedan' 'Hatchback' 'SUV']
[15 17 23 13 22 10 12 16  7  5 21 11 26 14  2  9  3  8  6  0 24 20  1 18
 19 28 25 27  4 29 30 32 46 35 31 37 45 44 34 43 47 40 39 48 42 33 38 41
 49 36 50 51 52]
['Benzine' 'Natural Gas']
['140000 to 159999' '180000 to 199999' '10000 to 19999' 'More than 200000'
 '90000 to 99999' '100000 to 119999' '160000 to 179999' '120000 to 139999'
 '0 to 9999' '20000 to 29999' '30000 to 39999' '80000 to 89999'
 '60000 to 69999' '70000 to 79999' '40000 to 49999' '50000 to 59999']
['1600 CC' '1000 - 1300 CC' '1400 - 1500 CC']
['Automatic' 'Manual']
['Giza' 'Qena' 'Cairo' 'Minya' 'Alexandria' 'Dakahlia' 'Suez' 'Sharqia'
 'Kafr al-Sheikh' 'Beheira' 'Ismailia' 'Sohag' 'Monufia' 'Qalyubia'
 'Beni Suef' 'Asyut' 'Fayoum' 'Gharbia' 'Matruh' 'Damietta' 'Red Sea'
 'Port Said' 'Luxor' 'South Sinai' 'New Valle

Unnamed: 0      0
Brand           0
Model           0
Body            0
Color           0
Year            0
Fuel            0
Kilometers      0
Engine          0
Transmission    0
Price           0
Gov             0
this_year       0
car_age         0
dtype: int64

In [169]:
#Important Transforming functions
size_map_engine = {'1600 CC':1600, '1000 - 1300 CC':1300, '1400 - 1500 CC':1500 }
size_map_transmission={'Automatic':1,'Manual':0}
size_map_km={'140000 to 159999':159999, '180000 to 199999':199999, '10000 to 19999':19999, 'More than 200000':200000,'90000 to 99999':99999, '100000 to 119999':119999 ,'160000 to 179999':179999,'120000 to 139999':139999,'0 to 9999':9999, '20000 to 29999':29999, '30000 to 39999':39999, '80000 to 89999':89999,'60000 to 69999':69999,'70000 to 79999':79999, '40000 to 49999':49999, '50000 to 59999':59999}
df['Engine_size'] = df['Engine'].map(size_map_engine)
df['Transmission_type_code'] = df['Transmission'].map(size_map_transmission)
df['kms'] = df['Kilometers'].map(size_map_km)

In [170]:
#Dropping non important columns and transform categorical data to binary:
df.drop(columns=['Unnamed: 0','this_year','Year','Color','Engine','Transmission'], inplace=True)
df=pd.get_dummies(df,drop_first=True)
df

Unnamed: 0,Price,car_age,Engine_size,Transmission_type_code,kms,Brand_Fiat,Brand_Hyundai,Model_131,Model_Accent,Model_Avante,...,Gov_Monufia,Gov_New Valley,Gov_Port Said,Gov_Qalyubia,Gov_Qena,Gov_Red Sea,Gov_Sharqia,Gov_Sohag,Gov_South Sinai,Gov_Suez
0,140.0,15,1600,1,159999,0,1,0,1,0,...,0,0,0,0,0,0,0,0,0,0
1,78.0,17,1300,0,199999,0,1,0,1,0,...,0,0,0,0,1,0,0,0,0,0
2,70.0,23,1500,0,159999,0,1,0,1,0,...,0,0,0,0,0,0,0,0,0,0
3,150.0,13,1600,1,159999,0,1,0,1,0,...,0,0,0,0,0,0,0,0,0,0
4,75.0,22,1300,0,19999,0,1,0,1,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14736,46.0,25,1300,0,179999,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
14737,46.0,26,1300,0,29999,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
14738,43.7,29,1300,0,19999,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
14739,69.0,26,1300,0,19999,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [171]:
df.corr(method ='pearson')

Unnamed: 0,Price,car_age,Engine_size,Transmission_type_code,kms,Brand_Fiat,Brand_Hyundai,Model_131,Model_Accent,Model_Avante,...,Gov_Monufia,Gov_New Valley,Gov_Port Said,Gov_Qalyubia,Gov_Qena,Gov_Red Sea,Gov_Sharqia,Gov_Sohag,Gov_South Sinai,Gov_Suez
Price,1.000000,-0.774288,0.599329,0.695100,-0.208787,-0.576436,0.369179,-0.213976,0.021285,0.103364,...,-0.010044,-0.007761,0.025482,-0.039353,0.008775,0.006467,-0.010145,0.004395,0.008171,-0.025441
car_age,-0.774288,1.000000,-0.628982,-0.485228,0.221121,0.751192,-0.368678,0.387185,-0.049386,-0.035852,...,0.004417,0.003603,0.001241,0.034759,-0.007008,0.003773,0.019484,-0.012432,-0.006659,0.018091
Engine_size,0.599329,-0.628982,1.000000,0.467829,-0.212464,-0.605318,0.388056,0.172888,-0.043121,0.105316,...,0.010773,0.005491,0.007433,-0.006845,0.029802,0.003800,0.020825,0.029192,0.010876,-0.026035
Transmission_type_code,0.695100,-0.485228,0.467829,1.000000,-0.116562,-0.453545,0.276874,-0.141323,-0.009759,0.188027,...,-0.014302,-0.004010,0.027401,-0.013171,-0.001929,-0.011767,0.011333,0.005187,-0.010627,-0.040259
kms,-0.208787,0.221121,-0.212464,-0.116562,1.000000,0.077223,-0.072542,-0.084439,0.000439,-0.020451,...,-0.018905,-0.009898,-0.010067,-0.004425,-0.016672,0.000186,0.010731,0.002031,-0.002583,-0.002109
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Gov_Red Sea,0.006467,0.003773,0.003800,-0.011767,0.000186,0.004452,0.007440,0.010731,0.011822,-0.002761,...,-0.016751,-0.002215,-0.008896,-0.022861,-0.007856,1.000000,-0.023528,-0.009894,-0.005870,-0.013886
Gov_Sharqia,-0.010145,0.019484,0.020825,0.011333,0.010731,0.005180,-0.024255,0.022558,-0.034632,0.058859,...,-0.043620,-0.005768,-0.023166,-0.059529,-0.020456,-0.023528,1.000000,-0.025764,-0.015285,-0.036159
Gov_Sohag,0.004395,-0.012432,0.029192,0.005187,0.002031,-0.019375,0.022991,-0.020914,-0.017910,0.023938,...,-0.018343,-0.002426,-0.009742,-0.025033,-0.008602,-0.009894,-0.025764,1.000000,-0.006428,-0.015206
Gov_South Sinai,0.008171,-0.006659,0.010876,-0.010627,-0.002583,0.002046,-0.012737,-0.000988,-0.011123,-0.008624,...,-0.010882,-0.001439,-0.005780,-0.014852,-0.005104,-0.005870,-0.015285,-0.006428,1.000000,-0.009021


# Splitting The data

In [172]:
x=df.drop('Price',axis=1)
y=df['Price']

In [173]:
#Divise into 80% Train and 20% test
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=0)

# ML

In [174]:
regressor=RandomForestRegressor()

In [175]:
regressor.fit(x_train,y_train)

RandomForestRegressor()

In [176]:
predictions=regressor.predict(x_test)
r = r2_score(y_test, predictions)

In [177]:
r

0.9041675563333498

In [178]:
! git clone https://github.com/Abdelrahmansaied/Old-Cars-Price.git


Cloning into 'Old-Cars-Price'...
