In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn import metrics

### Data Collection and Analysis

In [2]:
car_dataset = pd.read_csv('car data.csv')

In [3]:
# Displaying first 5 rows
car_dataset.head()

Unnamed: 0,Car_Name,Year,Selling_Price,Present_Price,Kms_Driven,Fuel_Type,Seller_Type,Transmission,Owner
0,ritz,2014,3.35,5.59,27000,Petrol,Dealer,Manual,0
1,sx4,2013,4.75,9.54,43000,Diesel,Dealer,Manual,0
2,ciaz,2017,7.25,9.85,6900,Petrol,Dealer,Manual,0
3,wagon r,2011,2.85,4.15,5200,Petrol,Dealer,Manual,0
4,swift,2014,4.6,6.87,42450,Diesel,Dealer,Manual,0


In [4]:
car_dataset.shape

(301, 9)

In [5]:
car_dataset.isnull().sum()

Car_Name         0
Year             0
Selling_Price    0
Present_Price    0
Kms_Driven       0
Fuel_Type        0
Seller_Type      0
Transmission     0
Owner            0
dtype: int64

In [6]:
# Checking the distribution of categorical data
car_dataset['Fuel_Type'].value_counts()

Petrol    239
Diesel     60
CNG         2
Name: Fuel_Type, dtype: int64

In [7]:
car_dataset['Transmission'].value_counts()

Manual       261
Automatic     40
Name: Transmission, dtype: int64

In [8]:
car_dataset['Seller_Type'].value_counts()

Dealer        195
Individual    106
Name: Seller_Type, dtype: int64

Encoding the Categorical Data

In [9]:
# Econding the Fuel_type Column
car_dataset.replace({'Fuel_Type':{'Petrol': 0, 'Diesel' : 1, 'CNG' : 2}}, inplace = True)

#Encoding the Transmission Column
car_dataset.replace({'Transmission':{'Manual':0, 'Automatic':1}}, inplace=True)

#Encoding the Seller Type
car_dataset.replace({'Seller_Type':{'Dealer':0, 'Individual':1}}, inplace=True)

### Splitting the data into Training Data And Test Data

In [10]:
X = car_dataset.drop(['Car_Name','Selling_Price'], axis = 1)
Y = car_dataset['Selling_Price']

In [11]:
X_train, X_test, Y_train, y_test = train_test_split(X, Y, test_size = 0.2, random_state = 2)

In [12]:
print(X.shape, X_train.shape, X_test.shape)

(301, 7) (240, 7) (61, 7)


## Model Training

In [13]:
# Calling Linear Regression Model
model = LinearRegression()

In [14]:
model.fit(X_train, Y_train)

In [15]:
training_data_prediction = model.predict(X_train)

In [16]:
training_data_score = metrics.r2_score(Y_train, training_data_prediction)
print('Training data Score:',training_data_score)

Training data Score: 0.8838169193709792


In [17]:
testing_data_prediction = model.predict(X_test)

In [18]:
testing_data_score = metrics.r2_score(y_test, testing_data_prediction)
print('Testing data Score:',testing_data_score)

Testing data Score: 0.8401532365377782


### Building a Predictive System

In [19]:
input_data = (2014,5.59,27000,0,0,0,0)

input_data_as_numpy_array = np.asarray(input_data)

input_data_reshaped = input_data_as_numpy_array.reshape(1,-1)

In [20]:
prediction = model.predict(input_data_reshaped)
print("Car price according to the prediction is:",prediction)

Car price according to the prediction is: [3.82765933]


