In [1]:
import seaborn as sns
import numpy as np


In [4]:
df = sns.load_dataset('mpg')


In [6]:
df.drop('name', inplace=True, axis=1)

In [8]:
df.isnull().sum()

mpg             0
cylinders       0
displacement    0
horsepower      6
weight          0
acceleration    0
model_year      0
origin          0
dtype: int64

In [16]:
df.fillna(df['horsepower'].mean(), inplace=True)

In [19]:
df['origin'].value_counts()

origin
usa       249
japan      79
europe     70
Name: count, dtype: int64

In [21]:
df['origin'] = df['origin'].map({"usa":1,"japan":2,"europe":3})

In [34]:
df

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model_year,origin
0,18.0,8,307.0,130.0,3504,12.0,70,1
1,15.0,8,350.0,165.0,3693,11.5,70,1
2,18.0,8,318.0,150.0,3436,11.0,70,1
3,16.0,8,304.0,150.0,3433,12.0,70,1
4,17.0,8,302.0,140.0,3449,10.5,70,1
...,...,...,...,...,...,...,...,...
393,27.0,4,140.0,86.0,2790,15.6,82,1
394,44.0,4,97.0,52.0,2130,24.6,82,3
395,32.0,4,135.0,84.0,2295,11.6,82,1
396,28.0,4,120.0,79.0,2625,18.6,82,1


In [29]:
X = df.iloc[:, 1:]
y = df['mpg']

In [None]:
# Train test split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

In [39]:
# model learning
from sklearn.linear_model import LinearRegression
regression = LinearRegression()
regression

In [40]:
regression.fit(X_train, y_train)

In [45]:
for i, col_features in enumerate(X_train.columns):
  print(f"The coefficient for {col_features} is {regression.coef_[i]}")

The coefficient for cylinders is -0.217115831836747
The coefficient for displacement is 0.019832988493913538
The coefficient for horsepower is -0.01231903232468544
The coefficient for weight is -0.007172500920386061
The coefficient for acceleration is 0.09246703801061516
The coefficient for model_year is 0.8465209890548182
The coefficient for origin is 1.4259618028008088


In [50]:
from sklearn.metrics import r2_score
y_pred = regression.predict(X_test)
print(f"R Square of linear regression is {r2_score(y_test, y_pred)}")

R Square of linear regression is 0.8563221414573969


In [88]:
# Ridge Regression 
from sklearn.linear_model import Ridge
Ridge_Regression = Ridge(alpha=0.1)

In [89]:
Ridge_Regression.fit(X_train, y_train)

In [90]:
for i, col_features in enumerate(X_train.columns):
  print(f"The coefficient for {col_features} is {Ridge_Regression.coef_[i]}")

The coefficient for cylinders is -0.21673046760432732
The coefficient for displacement is 0.019815326712145153
The coefficient for horsepower is -0.012311983531047658
The coefficient for weight is -0.0071722227755142125
The coefficient for acceleration is 0.0924458876409954
The coefficient for model_year is 0.8464532658022683
The coefficient for origin is 1.4246344308829417


In [91]:
y_pred = Ridge_Regression.predict(X_test)
print(f"R Square of Ridge_Regression is {r2_score(y_test, y_pred)}")

R Square of Ridge_Regression is 0.8563234962947284


In [92]:
# Lasso Regression
from sklearn.linear_model import Lasso
Lasso_Regression = Lasso(alpha=0.5)

In [93]:
Lasso_Regression.fit(X_train, y_train)

In [94]:
for i, col_features in enumerate(X_train.columns):
  print(f"The coefficient for {col_features} is {Lasso_Regression.coef_[i]}")

The coefficient for cylinders is -0.0
The coefficient for displacement is 0.0019485866151450896
The coefficient for horsepower is -0.009201130535129241
The coefficient for weight is -0.006590968819205764
The coefficient for acceleration is 0.0
The coefficient for model_year is 0.753509325384892
The coefficient for origin is 0.0


In [95]:
y_pred = Lasso_Regression.predict(X_test)
print(f"R Square of Lasso_Regression is {r2_score(y_test, y_pred)}")

R Square of Lasso_Regression is 0.8457674415554649


In [102]:
from sklearn.linear_model import ElasticNet
ElasticNet_r = ElasticNet(alpha=1, l1_ratio=0.5)

In [103]:
ElasticNet_r.fit(X_train, y_train)

In [104]:
for i, col_features in enumerate(X_train.columns):
  print(f"The coefficient for {col_features} is {ElasticNet_r.coef_[i]}")

The coefficient for cylinders is -0.0
The coefficient for displacement is 0.0014955918908507633
The coefficient for horsepower is -0.010493062642771998
The coefficient for weight is -0.006534340496450016
The coefficient for acceleration is 0.0
The coefficient for model_year is 0.7207308111190328
The coefficient for origin is 0.0


In [105]:
y_pred = ElasticNet_r.predict(X_test)
print(f"R Square of ElasticNet_r is {r2_score(y_test, y_pred)}")

R Square of ElasticNet_r is 0.8477380072124668


In [107]:
# Cross Validation
from sklearn.linear_model import RidgeCV
RidgeCV_r = RidgeCV(cv=5)

In [108]:
RidgeCV_r.fit(X_train, y_train)

In [None]:
RidgeCV_r_pred = RidgeCV_r.predict(X_test)
print(f"R Square of RidgeCV_r is {r2_score(y_test, RidgeCV_r_pred)}")

R Square of ElasticNet_r is 0.8563651026581756


In [115]:
from sklearn.linear_model import LassoCV
LassoCV_r = LassoCV(cv=5)
LassoCV_r.fit(X_train, y_train)

In [117]:
LassoCV_r_pred =LassoCV_r.predict(X_test)
print(f"R Square of LassoCV_r is {r2_score(y_test, RidgeCV_r_pred)}")

R Square of LassoCV_r is 0.8563651026581756
