In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
from sklearn import metrics
from sklearn.metrics import r2_score

In [3]:
data=pd.read_csv('car data.csv')

In [4]:
data.drop('Car_Name',axis=1,inplace=True)

Encoding

In [5]:
from sklearn.preprocessing import LabelEncoder

In [6]:
encoder=LabelEncoder()

In [7]:
data.Fuel_Type=encoder.fit_transform(data.Fuel_Type)
data.Seller_Type=encoder.fit_transform(data.Seller_Type)
data.Transmission=encoder.fit_transform(data.Transmission)

In [8]:
data.shape

(301, 8)

Outlier removal

In [9]:
outlier_pp = data[data['Present_Price'] >25]
data = data[~data.index.isin(outlier_pp.index)]

In [10]:
outlier_sp = data[data['Selling_Price'] >17]
data = data[~data.index.isin(outlier_sp.index)]

In [11]:
outlier_km = data[data['Kms_Driven'] >100000]
data = data[~data.index.isin(outlier_km.index)]

Train test split

In [12]:
from sklearn.model_selection import train_test_split

In [13]:
x=data.drop('Selling_Price',axis=1)
y=data.iloc[:,[1] ]

In [14]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.3,random_state=42)

Preprocessing

In [15]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler

In [16]:
stdScaler = StandardScaler()
stdScaler.fit(x_train[['Present_Price']])

In [17]:
x_train['Present_Price']=stdScaler.transform(x_train[['Present_Price']])
x_test['Present_Price']=stdScaler.transform(x_test[['Present_Price']])

In [18]:
tarScaler = StandardScaler()

In [19]:
tarScaler.fit(y_train[['Selling_Price']])
y_train['Selling_Price']=tarScaler.transform(y_train[['Selling_Price']])
y_test['Selling_Price']=tarScaler.transform(y_test[['Selling_Price']])

In [20]:
mmScaler = MinMaxScaler()
mmScaler.fit(x_train[['Kms_Driven']])

In [21]:
x_train['Kms_Driven']=mmScaler.transform(x_train[['Kms_Driven']])
x_test['Kms_Driven']=mmScaler.transform(x_test[['Kms_Driven']])

Linear Regression

In [22]:
from sklearn.linear_model import LinearRegression

In [23]:
lr=LinearRegression()

In [24]:
lr.fit(x_train,y_train)
y_pred_lr=lr.predict(x_test)

Metrics for linear regression

In [25]:
r2=r2_score(y_test,y_pred_lr)
r2

0.8087872917066251

In [26]:
mae=metrics.mean_absolute_error(y_test,y_pred_lr)
mse=metrics.mean_squared_error(y_test,y_pred_lr)
print("mae=",mae)
print("mse=",mse)

mae= 0.33213799083593054
mse= 0.24372750628827283


Random forest

In [27]:
from sklearn.ensemble import RandomForestRegressor

In [28]:
rf=RandomForestRegressor(n_estimators=150,random_state=42)

In [29]:
rf.fit(x_train,y_train)
y_pred_rf=rf.predict(x_test)

  rf.fit(x_train,y_train)


In [30]:
r2=r2_score(y_test,y_pred_rf)
r2

0.9275469953585207

In [31]:
mae=metrics.mean_absolute_error(y_test,y_pred_rf)
mse=metrics.mean_squared_error(y_test,y_pred_rf)
print("mae=",mae)
print("mse=",mse)

mae= 0.1827207480224079
mse= 0.09235155080417966


XG Boost regressor

In [32]:
from sklearn.ensemble import GradientBoostingRegressor

In [33]:
gr=GradientBoostingRegressor(random_state=42)

In [34]:
gr.fit(x_train,y_train)
y_pred_xg=gr.predict(x_test)

  y = column_or_1d(y, warn=True)


In [35]:
r2=r2_score(y_test,y_pred_xg)
r2

0.9264979138745917

In [36]:
mae=metrics.mean_absolute_error(y_test,y_pred_xg)
mse=metrics.mean_squared_error(y_test,y_pred_xg)
print("mae=",mae)
print("mse=",mse)

mae= 0.16683427123144837
mse= 0.09368875279380337


Making prediction

In [37]:
features=x_train.columns
print(features)

Index(['Year', 'Present_Price', 'Kms_Driven', 'Fuel_Type', 'Seller_Type',
       'Transmission', 'Owner'],
      dtype='object')


In [38]:
data.head()

Unnamed: 0,Year,Selling_Price,Present_Price,Kms_Driven,Fuel_Type,Seller_Type,Transmission,Owner
0,2014,3.35,5.59,27000,2,0,1,0
1,2013,4.75,9.54,43000,1,0,1,0
2,2017,7.25,9.85,6900,2,0,1,0
3,2011,2.85,4.15,5200,2,0,1,0
4,2014,4.6,6.87,42450,1,0,1,0


In [39]:
inputs=[]
for i in features:
  if(i=='Present_Price'):
     temp=stdScaler.transform([[float(input(f"Enter {i}: "))]])
     inputs.append(temp)
  elif(i=='Kms_Driven'):
     temp=mmScaler.transform([[float(input(f"Enter {i}: "))]])
     inputs.append(temp)
  else:
     inputs.append([[float(input(f"Enter {i}: "))]])




In [40]:
inputs

[[[2014.0]],
 array([[0.11687209]]),
 array([[0.45313238]]),
 [[1.0]],
 [[0.0]],
 [[1.0]],
 [[0.0]]]

In [41]:
input_array = np.array(inputs)
input_array = input_array.reshape(1, -1)

In [42]:
predicted_price_scaled = gr.predict(input_array)
predicted_price = tarScaler.inverse_transform(predicted_price_scaled.reshape(-1, 1))
print("The predicted selling price will be: {:.2f}".format(predicted_price[0][0]))

The predicted selling price will be: 4.35


