## Importing Libraries

In [27]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso
from sklearn import metrics

## Loading the Dataset

In [28]:
car_dataset = pd.read_csv('car_data.csv')

car_dataset.head()

Unnamed: 0,Car_Name,Year,Selling_Price,Present_Price,Kms_Driven,Fuel_Type,Seller_Type,Transmission,Owner
0,ritz,2014,3.35,5.59,27000,Petrol,Dealer,Manual,0
1,sx4,2013,4.75,9.54,43000,Diesel,Dealer,Manual,0
2,ciaz,2017,7.25,9.85,6900,Petrol,Dealer,Manual,0
3,wagon r,2011,2.85,4.15,5200,Petrol,Dealer,Manual,0
4,swift,2014,4.6,6.87,42450,Diesel,Dealer,Manual,0


##  Basic Information about the Dataset

In [29]:
print("Dataset Shape: ", car_dataset.shape)

# Check for missing values in each column
print("Missing Values: \n", car_dataset.isnull().sum())

Dataset Shape:  (301, 9)
Missing Values: 
 Car_Name         0
Year             0
Selling_Price    0
Present_Price    0
Kms_Driven       0
Fuel_Type        0
Seller_Type      0
Transmission     0
Owner            0
dtype: int64


## Analyzing Categorical Columns

In [30]:
print("Fuel Type Counts:\n", car_dataset.Fuel_Type.value_counts())
print("\nSeller Type Counts:\n", car_dataset.Seller_Type.value_counts())
print("\nTransmission Type Counts:\n", car_dataset.Transmission.value_counts())

Fuel Type Counts:
 Fuel_Type
Petrol    239
Diesel     60
CNG         2
Name: count, dtype: int64

Seller Type Counts:
 Seller_Type
Dealer        195
Individual    106
Name: count, dtype: int64

Transmission Type Counts:
 Transmission
Manual       261
Automatic     40
Name: count, dtype: int64


## Encoding Categorical Columns

In [31]:
car_dataset.replace({'Fuel_Type': {'Petrol': 0, 'Diesel': 1, 'CNG': 2}}, inplace=True)
car_dataset.replace({'Seller_Type': {'Dealer': 0, 'Individual': 1}}, inplace=True)
car_dataset.replace({'Transmission': {'Manual': 0, 'Automatic': 1}}, inplace=True)

car_dataset.head()

  car_dataset.replace({'Fuel_Type': {'Petrol': 0, 'Diesel': 1, 'CNG': 2}}, inplace=True)
  car_dataset.replace({'Seller_Type': {'Dealer': 0, 'Individual': 1}}, inplace=True)
  car_dataset.replace({'Transmission': {'Manual': 0, 'Automatic': 1}}, inplace=True)


Unnamed: 0,Car_Name,Year,Selling_Price,Present_Price,Kms_Driven,Fuel_Type,Seller_Type,Transmission,Owner
0,ritz,2014,3.35,5.59,27000,0,0,0,0
1,sx4,2013,4.75,9.54,43000,1,0,0,0
2,ciaz,2017,7.25,9.85,6900,0,0,0,0
3,wagon r,2011,2.85,4.15,5200,0,0,0,0
4,swift,2014,4.6,6.87,42450,1,0,0,0


## Splitting Features and Target

In [32]:
X = car_dataset.drop(['Car_Name', 'Selling_Price'], axis=1)
Y = car_dataset['Selling_Price']

print("Features: \n", X.head())
print("\nTarget: \n", Y.head())

Features: 
    Year  Present_Price  Kms_Driven  Fuel_Type  Seller_Type  Transmission  \
0  2014           5.59       27000          0            0             0   
1  2013           9.54       43000          1            0             0   
2  2017           9.85        6900          0            0             0   
3  2011           4.15        5200          0            0             0   
4  2014           6.87       42450          1            0             0   

   Owner  
0      0  
1      0  
2      0  
3      0  
4      0  

Target: 
 0    3.35
1    4.75
2    7.25
3    2.85
4    4.60
Name: Selling_Price, dtype: float64


## Splitting the Data into Train and Test Sets

In [33]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.1, random_state=2)

## Linear Regression Model

In [34]:
lin_reg_model = LinearRegression()
lin_reg_model.fit(X_train, Y_train)

training_data_prediction = lin_reg_model.predict(X_train)
training_error = metrics.r2_score(Y_train, training_data_prediction)
print("Linear Regression - R Squared Error (Training): ", training_error)

test_data_prediction = lin_reg_model.predict(X_test)
test_error = metrics.r2_score(Y_test, test_data_prediction)
print("Linear Regression - R Squared Error (Test): ", test_error)

Linear Regression - R Squared Error (Training):  0.8799451660493711
Linear Regression - R Squared Error (Test):  0.8365766715027051


## Lasso Regression Model

In [35]:
lass_reg_model = Lasso()
lass_reg_model.fit(X_train, Y_train)

training_data_prediction = lass_reg_model.predict(X_train)
training_error = metrics.r2_score(Y_train, training_data_prediction)
print("Lasso Regression - R Squared Error (Training): ", training_error)

test_data_prediction = lass_reg_model.predict(X_test)
test_error = metrics.r2_score(Y_test, test_data_prediction)
print("Lasso Regression - R Squared Error (Test): ", test_error)

Lasso Regression - R Squared Error (Training):  0.8427856123435794
Lasso Regression - R Squared Error (Test):  0.8709167941173195
