In [19]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline

from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso

from sklearn.metrics import r2_score
from sklearn.preprocessing import MinMaxScaler

In [83]:
mpg = pd.read_csv ("data\\auto-mpg.csv")

In [84]:
mpg = mpg.drop("car name", axis = 1)

In [85]:
mpg = pd.get_dummies(mpg, columns = ["origin"],drop_first=True)

In [86]:
mpg.head()

Unnamed: 0,mpg,cyl,disp,hp,wt,acc,yr,origin_2,origin_3
0,18.0,8,307.0,130,3504,12.0,70,0,0
1,15.0,8,350.0,165,3693,11.5,70,0,0
2,18.0,8,318.0,150,3436,11.0,70,0,0
3,16.0,8,304.0,150,3433,12.0,70,0,0
4,17.0,8,302.0,140,3449,10.5,70,0,0


In [87]:
def updatehp(hp):
    if hp == "?":
        return pd.to_numeric(mpg[mpg["hp"]!="?"]["hp"]).mean()
    else:
        return int(hp)

mpg["hp"] = mpg["hp"].apply(updatehp)

In [88]:
minmax = MinMaxScaler()

In [89]:
y = mpg.iloc[:,:1]
x = mpg.iloc[:,1:]
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 101)
cols = x.columns
x = pd.DataFrame(minmax.fit_transform(x), columns = cols)

In [90]:
lr = LinearRegression()
lr.fit(x_train, y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

In [91]:
for i in lr.coef_[0]:
    print (i)

-0.6602220441817541
0.03168976853738249
-0.01759174439632904
-0.00727496342531473
0.08755446200392349
0.8167663725468354
2.756759130456777
2.3975716688372994


In [92]:
ridge = Ridge(alpha = 0.3)
ridge.fit(x_train, y_train)

Ridge(alpha=0.3, copy_X=True, fit_intercept=True, max_iter=None,
      normalize=False, random_state=None, solver='auto', tol=0.001)

In [93]:
for i in ridge.coef_[0]:
    print(i)

-0.6560306684349266
0.03142061063545262
-0.01741777468167507
-0.007272084455926668
0.08721147306327844
0.8163382989466673
2.7174648169761713
2.3610055340965226


In [94]:
lasso = Lasso(alpha = 0.2)
lasso.fit(x_train, y_train)
for i in lasso.coef_:
    print(i)

-0.0
0.005979364456849162
-0.00967893696610914
-0.006843696704664469
0.0036434773131799134
0.7720939958799874
0.0
0.0


In [95]:
print(lr.score(x_test, y_test))

0.7984347837760659


In [96]:
print(ridge.score(x_test, y_test))

0.7982887514696327


In [97]:
print(lasso.score(x_test, y_test))

0.7819522546872685


In [98]:
from sklearn.preprocessing import PolynomialFeatures

In [99]:
poly  = PolynomialFeatures(degree = 2, interaction_only = False)

In [100]:
poly_x = poly.fit_transform(x)

In [101]:
x_train, x_test, y_train, y_test = train_test_split(poly_x, y, test_size = 0.2, random_state = 101)

In [108]:
lr = LinearRegression()
lr.fit(x_train, y_train)
ridge = Ridge(alpha = 0.3)
lasso = Lasso(alpha = 0.1)
ridge.fit(x_train, y_train)
lasso.fit(x_train, y_train)
print(lr.score(x_test, y_test))
print(ridge.score(x_test, y_test))
print(lasso.score(x_test, y_test))

0.869663849407176
0.8976871705832363
0.8217547971968701


In [109]:
for i in lasso.coef_:
    print(i)
    
for i in ridge.coef_[0]:
    print(i)

0.0
-1.2124433741544265
-0.0
-0.42444216052541367
-18.289303783254024
0.0
0.0
0.09365060935907582
0.608140360712415
-0.0
-0.0
-0.0
-0.0
-0.0
-0.0
0.0
0.0
-0.0
-0.0
-0.0
-0.0
-0.0
-0.0
-0.0
-0.0
-0.0
-0.0
-0.0
-0.0
-0.0
-0.0
-0.0
-0.0
-0.0
-0.0
0.0
0.0
0.0
0.0
8.212901043444683
2.3524587067707476
0.9378796984016601
0.0
0.0
0.0
0.0
-3.7009734204203175
-6.384152974700735
-5.947155717097877
-14.1861609283307
-3.388442661084924
2.3737843293037977
-1.042200872067195
-0.5490628966093448
-2.2041264105620293
0.8256402104239173
-0.12343669338291104
4.474321024163958
4.693024691941119
2.976982763820864
-3.734159616367807
4.35623230031268
2.4246122021749628
0.8858515329362325
4.356393831234121
-0.793794030835071
1.8913272005038353
-2.0288824159289662
2.4991008223157065
1.3181328073488896
-0.8185788382786393
-4.777095357798538
-6.416347542864581
-0.8303602238552688
-3.10505621051565
2.6354190242843987
-6.696934464437634
-6.900866186529182
-2.2248192906859376
-4.585326236585712
-0.6025351499484409
3