# **CAR PRIZE PREDICTION USING MACHINE LEARNING**

In [1]:
import pandas as pd

## Getting the Dataset

In [31]:
path = '/content/drive/MyDrive/Datasets/car_data.csv'
df = pd.read_csv(path)
df

Unnamed: 0,Car_Name,Year,Selling_Price,Present_Price,Driven_kms,Fuel_Type,Selling_type,Transmission,Owner
0,ritz,2014,3.35,5.59,27000,Petrol,Dealer,Manual,0
1,sx4,2013,4.75,9.54,43000,Diesel,Dealer,Manual,0
2,ciaz,2017,7.25,9.85,6900,Petrol,Dealer,Manual,0
3,wagon r,2011,2.85,4.15,5200,Petrol,Dealer,Manual,0
4,swift,2014,4.60,6.87,42450,Diesel,Dealer,Manual,0
...,...,...,...,...,...,...,...,...,...
296,city,2016,9.50,11.60,33988,Diesel,Dealer,Manual,0
297,brio,2015,4.00,5.90,60000,Petrol,Dealer,Manual,0
298,city,2009,3.35,11.00,87934,Petrol,Dealer,Manual,0
299,city,2017,11.50,12.50,9000,Diesel,Dealer,Manual,0


## Checking the Statistical Analysis

In [3]:
df.describe()

Unnamed: 0,Year,Selling_Price,Present_Price,Driven_kms,Owner
count,301.0,301.0,301.0,301.0,301.0
mean,2013.627907,4.661296,7.628472,36947.20598,0.043189
std,2.891554,5.082812,8.642584,38886.883882,0.247915
min,2003.0,0.1,0.32,500.0,0.0
25%,2012.0,0.9,1.2,15000.0,0.0
50%,2014.0,3.6,6.4,32000.0,0.0
75%,2016.0,6.0,9.9,48767.0,0.0
max,2018.0,35.0,92.6,500000.0,3.0


## Checking if there is any NaN values/None Values

In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 301 entries, 0 to 300
Data columns (total 9 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Car_Name       301 non-null    object 
 1   Year           301 non-null    int64  
 2   Selling_Price  301 non-null    float64
 3   Present_Price  301 non-null    float64
 4   Driven_kms     301 non-null    int64  
 5   Fuel_Type      301 non-null    object 
 6   Selling_type   301 non-null    object 
 7   Transmission   301 non-null    object 
 8   Owner          301 non-null    int64  
dtypes: float64(2), int64(3), object(4)
memory usage: 21.3+ KB


## Encoding the Label to a numeric form

In [5]:
from sklearn.preprocessing import LabelEncoder

In [6]:
le = LabelEncoder()

df['Car_Name'] = le.fit_transform(df['Car_Name'])
df['Fuel_Type'] = le.fit_transform(df['Fuel_Type'])
df['Selling_type'] = le.fit_transform(df['Selling_type'])

df

Unnamed: 0,Car_Name,Year,Selling_Price,Present_Price,Driven_kms,Fuel_Type,Selling_type,Transmission,Owner
0,90,2014,3.35,5.59,27000,2,0,Manual,0
1,93,2013,4.75,9.54,43000,1,0,Manual,0
2,68,2017,7.25,9.85,6900,2,0,Manual,0
3,96,2011,2.85,4.15,5200,2,0,Manual,0
4,92,2014,4.60,6.87,42450,1,0,Manual,0
...,...,...,...,...,...,...,...,...,...
296,69,2016,9.50,11.60,33988,1,0,Manual,0
297,66,2015,4.00,5.90,60000,2,0,Manual,0
298,69,2009,3.35,11.00,87934,2,0,Manual,0
299,69,2017,11.50,12.50,9000,1,0,Manual,0


## Splitting the data into training and testing variables

In [7]:
x = df.drop(columns=['Selling_Price', 'Present_Price', 'Transmission', 'Owner'])
x

Unnamed: 0,Car_Name,Year,Driven_kms,Fuel_Type,Selling_type
0,90,2014,27000,2,0
1,93,2013,43000,1,0
2,68,2017,6900,2,0
3,96,2011,5200,2,0
4,92,2014,42450,1,0
...,...,...,...,...,...
296,69,2016,33988,1,0
297,66,2015,60000,2,0
298,69,2009,87934,2,0
299,69,2017,9000,1,0


In [8]:
y = df['Present_Price']
y

0       5.59
1       9.54
2       9.85
3       4.15
4       6.87
       ...  
296    11.60
297     5.90
298    11.00
299    12.50
300     5.90
Name: Present_Price, Length: 301, dtype: float64

## Creating and Training the Model

In [20]:
from sklearn.ensemble import RandomForestRegressor

In [21]:
rfr = RandomForestRegressor()

In [22]:
from sklearn.model_selection import train_test_split

In [23]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)

In [24]:
rfr.fit(x_train, y_train)

## Predicting the output

In [25]:
y_pred = rfr.predict(x_test)
y_pred

array([11.78753333,  9.3373    ,  1.8353    , 10.4245    ,  1.5267    ,
        9.1372    ,  0.79048   ,  2.37211   , 11.433     ,  6.8293    ,
        0.75269   ,  8.7858    ,  0.7999    ,  0.80591   , 12.847     ,
        6.8149    ,  6.6686    ,  1.1818    ,  6.2581    ,  7.8536    ,
        6.4237    ,  9.8254    ,  0.60751   ,  0.9424    ,  1.6177    ,
        0.87082   ,  9.23265   ,  1.6004    ,  7.5217    , 18.3964    ,
        0.53097   ,  1.6287    ,  1.1321    , 13.2085    ,  0.6528    ,
        9.9294    ,  6.4982    ,  2.0885    ,  1.4793    ,  6.1786    ,
       17.896     ,  0.8282    ,  8.4473    , 10.0021    ,  0.73172   ,
        8.08846   ,  6.1786    , 14.9244    ,  0.79791   ,  6.7542    ,
        1.4643    ,  0.66157   ,  6.2529    ,  0.75618   ,  9.126     ,
        0.7792    ,  5.4433    ,  3.6913    ,  7.3285    ,  1.5773    ,
        6.1506    ])

## Checking predicted output

In [26]:
pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})

Unnamed: 0,Actual,Predicted
267,9.40,11.787533
230,9.40,9.337300
113,1.40,1.835300
70,6.76,10.424500
107,1.50,1.526700
...,...,...
217,4.43,5.443300
41,3.98,3.691300
249,7.60,7.328500
125,1.75,1.577300
