# Car Price Prediction Model
#### This project aims to predict the selling price of used cars based on the identified features which were found to be relevant for accurate predictions during analysis

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
df = pd.read_csv("cardata.csv")

In [3]:
df.head()

Unnamed: 0,Car_Name,Year,Selling_Price,Present_Price,Kms_Driven,Fuel_Type,Seller_Type,Transmission,Owner
0,ritz,2014,3.35,5.59,27000,Petrol,Dealer,Manual,0
1,sx4,2013,4.75,9.54,43000,Diesel,Dealer,Manual,0
2,ciaz,2017,7.25,9.85,6900,Petrol,Dealer,Manual,0
3,wagon r,2011,2.85,4.15,5200,Petrol,Dealer,Manual,0
4,swift,2014,4.6,6.87,42450,Diesel,Dealer,Manual,0


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 301 entries, 0 to 300
Data columns (total 9 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Car_Name       301 non-null    object 
 1   Year           301 non-null    int64  
 2   Selling_Price  301 non-null    float64
 3   Present_Price  301 non-null    float64
 4   Kms_Driven     301 non-null    int64  
 5   Fuel_Type      301 non-null    object 
 6   Seller_Type    301 non-null    object 
 7   Transmission   301 non-null    object 
 8   Owner          301 non-null    int64  
dtypes: float64(2), int64(3), object(4)
memory usage: 21.3+ KB


In [5]:
df.describe()

Unnamed: 0,Year,Selling_Price,Present_Price,Kms_Driven,Owner
count,301.0,301.0,301.0,301.0,301.0
mean,2013.627907,4.661296,7.628472,36947.20598,0.043189
std,2.891554,5.082812,8.644115,38886.883882,0.247915
min,2003.0,0.1,0.32,500.0,0.0
25%,2012.0,0.9,1.2,15000.0,0.0
50%,2014.0,3.6,6.4,32000.0,0.0
75%,2016.0,6.0,9.9,48767.0,0.0
max,2018.0,35.0,92.6,500000.0,3.0


In [6]:
df.isnull().sum()

Car_Name         0
Year             0
Selling_Price    0
Present_Price    0
Kms_Driven       0
Fuel_Type        0
Seller_Type      0
Transmission     0
Owner            0
dtype: int64

In [7]:
df.duplicated().sum()

2

In [8]:
df.drop_duplicates(inplace = True)

In [9]:
df.nunique()

Car_Name          98
Year              16
Selling_Price    156
Present_Price    147
Kms_Driven       206
Fuel_Type          3
Seller_Type        2
Transmission       2
Owner              3
dtype: int64

In [10]:
df['Age'] = 2023 - df['Year']

In [11]:
df = df.drop(['Car_Name','Year'], axis=1)

In [12]:
df['Fuel_Type'].unique()

array(['Petrol', 'Diesel', 'CNG'], dtype=object)

In [13]:
df = pd.get_dummies(df, drop_first = True)

In [14]:
df

Unnamed: 0,Selling_Price,Present_Price,Kms_Driven,Owner,Age,Fuel_Type_Diesel,Fuel_Type_Petrol,Seller_Type_Individual,Transmission_Manual
0,3.35,5.59,27000,0,9,0,1,0,1
1,4.75,9.54,43000,0,10,1,0,0,1
2,7.25,9.85,6900,0,6,0,1,0,1
3,2.85,4.15,5200,0,12,0,1,0,1
4,4.60,6.87,42450,0,9,1,0,0,1
...,...,...,...,...,...,...,...,...,...
296,9.50,11.60,33988,0,7,1,0,0,1
297,4.00,5.90,60000,0,8,0,1,0,1
298,3.35,11.00,87934,0,14,0,1,0,1
299,11.50,12.50,9000,0,6,1,0,0,1


### Building the model

In [16]:
x = df[['Present_Price','Kms_Driven','Fuel_Type_Diesel', 'Fuel_Type_Petrol','Seller_Type_Individual','Transmission_Manual','Owner','Age']]
y = df['Selling_Price']

In [35]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=1)

In [37]:
print(x.shape)
print(y.shape)
print(type(x))
print(type(y))
print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)

(299, 8)
(299,)
<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.series.Series'>
(209, 8)
(209,)
(90, 8)
(90,)


In [38]:
from sklearn import linear_model
lm = linear_model.LinearRegression()

In [39]:
model = lm.fit(x_train, y_train)

In [40]:
y_predict = lm.predict(x_test)

In [41]:
lm.coef_

array([ 5.27219836e-01, -3.99925567e-06,  8.24934465e-01, -8.24934465e-01,
       -5.70280439e-01, -5.45763631e-01, -8.53239589e-01, -4.58068642e-01])

In [42]:
lm.intercept_

6.327440586577032

In [43]:
from sklearn.metrics import mean_squared_error
print(mean_squared_error(y_test, y_predict))

4.45604930650703


### End of project