Import Necessary Libraries

In [None]:
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt 
import seaborn as sns 
from sklearn.model_selection import train_test_split 
from sklearn.linear_model import LinearRegression 
from sklearn.linear_model import Lasso 
from sklearn.preprocessing import LabelEncoder 
import pickle


Loading the Dataset

In [None]:
car_dataset = pd.read_csv('data.csv') 
car_dataset.head() 

Unnamed: 0.1,Unnamed: 0,Name,Location,Year,Kilometers_Driven,Fuel_Type,Transmission,Owner_Type,Mileage,Engine,Power,Seats,New_Price,Price
0,0,Maruti Wagon R LXI CNG,Mumbai,2010,72000,CNG,Manual,First,26.6 km/kg,998 CC,58.16 bhp,5.0,,1.75
1,1,Hyundai Creta 1.6 CRDi SX Option,Pune,2015,41000,Diesel,Manual,First,19.67 kmpl,1582 CC,126.2 bhp,5.0,,12.5
2,2,Honda Jazz V,Chennai,2011,46000,Petrol,Manual,First,18.2 kmpl,1199 CC,88.7 bhp,5.0,8.61 Lakh,4.5
3,3,Maruti Ertiga VDI,Chennai,2012,87000,Diesel,Manual,First,20.77 kmpl,1248 CC,88.76 bhp,7.0,,6.0
4,4,Audi A4 New 2.0 TDI Multitronic,Coimbatore,2013,40670,Diesel,Automatic,Second,15.2 kmpl,1968 CC,140.8 bhp,5.0,,17.74


In [None]:
car_dataset.shape 

(6019, 14)

In [None]:
car_dataset.info() 

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6019 entries, 0 to 6018
Data columns (total 14 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Unnamed: 0         6019 non-null   int64  
 1   Name               6019 non-null   object 
 2   Location           6019 non-null   object 
 3   Year               6019 non-null   int64  
 4   Kilometers_Driven  6019 non-null   int64  
 5   Fuel_Type          6019 non-null   object 
 6   Transmission       6019 non-null   object 
 7   Owner_Type         6019 non-null   object 
 8   Mileage            6017 non-null   object 
 9   Engine             5983 non-null   object 
 10  Power              5983 non-null   object 
 11  Seats              5977 non-null   float64
 12  New_Price          824 non-null    object 
 13  Price              6019 non-null   float64
dtypes: float64(2), int64(3), object(9)
memory usage: 658.5+ KB


In [None]:
car_dataset.isnull().sum() 

Unnamed: 0              0
Name                    0
Location                0
Year                    0
Kilometers_Driven       0
Fuel_Type               0
Transmission            0
Owner_Type              0
Mileage                 2
Engine                 36
Power                  36
Seats                  42
New_Price            5195
Price                   0
dtype: int64

Label Encoding

In [None]:
#encoding 'Fuel_Type' column 
car_dataset.replace({'Fuel_Type' : {'Petrol':0,'Diesel':1,'CNG':2}},inplace=True) 
#encoding 'Seller_Type' column 
car_dataset.replace({'Seller_Type' : {'Dealer':0,'Individual':1}},inplace=True) 
#encoding 'Transmission' column 
car_dataset.replace({'Transmission' : {'Manual':0,'Automatic':1}},inplace=True) 
car_dataset.head()

Unnamed: 0.1,Unnamed: 0,Name,Location,Year,Kilometers_Driven,Fuel_Type,Transmission,Owner_Type,Mileage,Engine,Power,Seats,New_Price,Price
0,0,Maruti Wagon R LXI CNG,Mumbai,2010,72000,2,0,First,26.6 km/kg,998 CC,58.16 bhp,5.0,,1.75
1,1,Hyundai Creta 1.6 CRDi SX Option,Pune,2015,41000,1,0,First,19.67 kmpl,1582 CC,126.2 bhp,5.0,,12.5
2,2,Honda Jazz V,Chennai,2011,46000,0,0,First,18.2 kmpl,1199 CC,88.7 bhp,5.0,8.61 Lakh,4.5
3,3,Maruti Ertiga VDI,Chennai,2012,87000,1,0,First,20.77 kmpl,1248 CC,88.76 bhp,7.0,,6.0
4,4,Audi A4 New 2.0 TDI Multitronic,Coimbatore,2013,40670,1,1,Second,15.2 kmpl,1968 CC,140.8 bhp,5.0,,17.74


In [None]:
car_dataset.replace({'Transmission' : {'Manual':0,'Automatic':1}},inplace=True)
car_dataset.head()

Unnamed: 0.1,Unnamed: 0,Name,Location,Year,Kilometers_Driven,Fuel_Type,Transmission,Owner_Type,Mileage,Engine,Power,Seats,New_Price,Price
0,0,Maruti Wagon R LXI CNG,Mumbai,2010,72000,2,0,First,26.6 km/kg,998 CC,58.16 bhp,5.0,,1.75
1,1,Hyundai Creta 1.6 CRDi SX Option,Pune,2015,41000,1,0,First,19.67 kmpl,1582 CC,126.2 bhp,5.0,,12.5
2,2,Honda Jazz V,Chennai,2011,46000,0,0,First,18.2 kmpl,1199 CC,88.7 bhp,5.0,8.61 Lakh,4.5
3,3,Maruti Ertiga VDI,Chennai,2012,87000,1,0,First,20.77 kmpl,1248 CC,88.76 bhp,7.0,,6.0
4,4,Audi A4 New 2.0 TDI Multitronic,Coimbatore,2013,40670,1,1,Second,15.2 kmpl,1968 CC,140.8 bhp,5.0,,17.74


Splitting Data into Independent and Dependent Variables

In [None]:
X = car_dataset.drop(['Name','Price'],axis= 1) 
Y = car_dataset['Price']
print(X)

      Unnamed: 0    Location  Year  Kilometers_Driven Fuel_Type  Transmission  \
0              0      Mumbai  2010              72000         2             0   
1              1        Pune  2015              41000         1             0   
2              2     Chennai  2011              46000         0             0   
3              3     Chennai  2012              87000         1             0   
4              4  Coimbatore  2013              40670         1             1   
...          ...         ...   ...                ...       ...           ...   
6014        6014       Delhi  2014              27365         1             0   
6015        6015      Jaipur  2015             100000         1             0   
6016        6016      Jaipur  2012              55000         1             0   
6017        6017     Kolkata  2013              46000         0             0   
6018        6018   Hyderabad  2011              47000         1             0   

     Owner_Type     Mileage

In [None]:
print(Y)

0        1.75
1       12.50
2        4.50
3        6.00
4       17.74
        ...  
6014     4.75
6015     4.00
6016     2.90
6017     2.65
6018     2.50
Name: Price, Length: 6019, dtype: float64


In [None]:
X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size = 0.1,random_state = 2)
print(X_train)

      Unnamed: 0   Location  Year  Kilometers_Driven Fuel_Type  Transmission  \
2083        2083  Bangalore  2015              65000         1             1   
2887        2887     Mumbai  2015              39000         0             0   
1230        1230  Ahmedabad  2012              52000         1             0   
307          307    Chennai  2006              58000         0             0   
5555        5555      Delhi  2010              75319         0             0   
...          ...        ...   ...                ...       ...           ...   
1099        1099    Chennai  2012              50000         0             0   
2514        2514    Kolkata  2010              35008         0             0   
3606        3606      Delhi  2015              53000         0             0   
5704        5704     Jaipur  2017              54455         1             1   
2575        2575  Hyderabad  2015             120000         1             0   

     Owner_Type     Mileage   Engine   

In [None]:
print(X_test)

      Unnamed: 0    Location  Year  Kilometers_Driven Fuel_Type  Transmission  \
4930        4930  Coimbatore  2017              60993         1             0   
463          463       Delhi  2014              31465         1             0   
338          338        Pune  2012              73000         1             0   
3090        3090   Bangalore  2010              72010         1             0   
1826        1826       Kochi  2012              83184         0             0   
...          ...         ...   ...                ...       ...           ...   
3054        3054      Mumbai  2014              11000         0             0   
887          887      Mumbai  2011              48000         0             1   
418          418  Coimbatore  2018              22397         0             1   
1838        1838   Hyderabad  2007              70000         0             0   
1064        1064     Kolkata  2009              43002         0             0   

     Owner_Type     Mileage

In [None]:
print(Y_train)

2083    29.00
2887     5.50
1230     6.50
307      1.50
5555     2.00
        ...  
1099     4.00
2514     1.36
3606     6.95
5704     6.50
2575     5.50
Name: Price, Length: 5417, dtype: float64


In [None]:
print(Y_test)

4930     5.21
463      6.50
338      3.40
3090     4.45
1826     6.22
        ...  
3054     6.10
887      9.75
418     70.99
1838     2.00
1064     1.92
Name: Price, Length: 602, dtype: float64
