In [1]:
# データ加工・処理・分析ライブラリ
import numpy as np
import numpy.random as random
import scipy as sp
from pandas import Series, DataFrame
import pandas as pd

# 可視化ライブラリ
import matplotlib.pyplot as plt
import matplotlib as mpl
import seaborn as sns
%matplotlib inline

# 機械学習ライブラリ
import sklearn

# 小数第三位まで表示
%precision 3

'%.3f'

In [2]:
# インポート
import requests, zipfile
import io

# 自動車価格データを取得
url = 'http://archive.ics.uci.edu/ml/machine-learning-databases/autos/imports-85.data'
res = requests.get(url).content

# 取得したデータをDataFrameオブジェクトとして読み込み
auto_df = pd.read_csv(io.StringIO(res.decode('utf-8')), header=None)

# データの列にラベルを設定
auto_df.columns = ['symboling', 'normalized-losses', 'make', 'fuel-type', 'aspiration', 
                   'num-of-doors', 'body-style', 'drive-wheels', 'engine-location', 'wheel-base', 
                   'length', 'width', 'height', 'curb-weight', 'engine-type', 
                   'num-of-cylinders', 'engine-size', 'fuel-system', 'bore', 'stroke', 
                   'compression-ratio', 'horsepower', 'peak-rpm', 'city-mpg', 'highway-mpg', 
                   'price']


In [3]:
# それぞれのカラムに？が何個あるかをカウント
auto = auto_df[['price', 'horsepower', 'width', 'height']]
auto.isin(['?']).sum()

# ?をNaNに置換して、NaNがある行を削除
auto = auto.replace('?', np.nan).dropna()

auto = auto[['price', 'horsepower', 'width', 'height']]

In [14]:
from sklearn.linear_model import Ridge, Lasso, LinearRegression
from sklearn.model_selection import train_test_split

# 訓練データとテストデータに分割
X = auto.drop('price', axis=1)
y = auto['price']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0)

# モデルの構築と評価
linear = LinearRegression()
ridge = Ridge(random_state=0)
lasso = Lasso(alpha=1000, random_state=0)

for model in [linear, ridge, lasso]:
    model.fit(X_train, y_train)
    print('{}(train):{:.6f}'.format(model.__class__.__name__, model.score(X_train, y_train)))
    print('{}(test):{:.6f}'.format(model.__class__.__name__, model.score(X_test, y_test)))
 

LinearRegression(train):0.733358
LinearRegression(test):0.737069
Ridge(train):0.733357
Ridge(test):0.737420
Lasso(train):0.726472
Lasso(test):0.762360


In [10]:
?Lasso

[1;31mInit signature:[0m
[0mLasso[0m[1;33m([0m[1;33m
[0m    [0malpha[0m[1;33m=[0m[1;36m1.0[0m[1;33m,[0m[1;33m
[0m    [0mfit_intercept[0m[1;33m=[0m[1;32mTrue[0m[1;33m,[0m[1;33m
[0m    [0mnormalize[0m[1;33m=[0m[1;32mFalse[0m[1;33m,[0m[1;33m
[0m    [0mprecompute[0m[1;33m=[0m[1;32mFalse[0m[1;33m,[0m[1;33m
[0m    [0mcopy_X[0m[1;33m=[0m[1;32mTrue[0m[1;33m,[0m[1;33m
[0m    [0mmax_iter[0m[1;33m=[0m[1;36m1000[0m[1;33m,[0m[1;33m
[0m    [0mtol[0m[1;33m=[0m[1;36m0.0001[0m[1;33m,[0m[1;33m
[0m    [0mwarm_start[0m[1;33m=[0m[1;32mFalse[0m[1;33m,[0m[1;33m
[0m    [0mpositive[0m[1;33m=[0m[1;32mFalse[0m[1;33m,[0m[1;33m
[0m    [0mrandom_state[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mselection[0m[1;33m=[0m[1;34m'cyclic'[0m[1;33m,[0m[1;33m
[0m[1;33m)[0m[1;33m[0m[1;33m[0m[0m
[1;31mDocstring:[0m     
Linear Model trained with L1 prior as regularizer (aka the Lasso)

The opti