In [0]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
from sklearn.linear_model import LinearRegression
from sklearn.datasets import load_boston

## データ読み込み

In [0]:
boston = load_boston()
print(boston.DESCR)

In [0]:
data_boston = pd.DataFrame(boston.data, columns=boston.feature_names)
data_boston['PRICE'] = boston.target

In [0]:
print(data_boston.head())

In [0]:
print(data_boston.tail())

## 可視化

In [0]:
sns.jointplot('RM', 'PRICE', data=data_boston)

In [0]:
sns.pairplot(data_boston)

In [0]:
sns.pairplot(data_boston, vars=["PRICE", "RM", "DIS"])

## 線形回帰を実践

In [0]:
lr = LinearRegression()

In [0]:
x_column_list = ['RM']
y_column_list = ['PRICE']

data_boston_x = data_boston[x_column_list]
data_boston_y = data_boston[y_column_list]

lr.fit(data_boston_x, data_boston_y)

In [0]:
print(lr.coef_)
print(lr.intercept_)

### 重回帰分析

In [0]:
lr_multi = LinearRegression()

x_column_list_for_multi = ['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT']
y_column_list_for_multi = ['PRICE']

data_boston_x = data_boston[x_column_list_for_multi]
data_boston_y = data_boston[y_column_list_for_multi]

lr_multi.fit(data_boston_x,  data_boston_y)

In [0]:
print(lr_multi.coef_)
print(lr_multi.intercept_)

## 予測

In [0]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(data_boston_x, data_boston_y, test_size=0.3)

In [0]:
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

In [0]:
lr_multi2 = LinearRegression()

lr_multi2.fit(X_train, y_train) 
print(lr_multi2.coef_)
print(lr_multi2.intercept_)

In [0]:
y_pred = lr_multi2.predict(X_test)

In [0]:
print(y_pred - y_test)

## MAE

In [0]:
from sklearn.metrics import mean_absolute_error

In [0]:
x_column_list = ['RM']
y_column_list = ['PRICE']

X_train, X_test, y_train, y_test = train_test_split(data_boston[x_column_list], data_boston[y_column_list], test_size=0.3)

lr_single = LinearRegression()

lr_single.fit(X_train, y_train) 
y_pred = lr_single.predict(X_test)

print(mean_absolute_error(y_pred, y_test))

In [0]:
x_column_list_for_multi = ['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT']
y_column_list_for_multi = ['PRICE']

X_train, X_test, y_train, y_test = train_test_split(data_boston[x_column_list_for_multi], data_boston[y_column_list_for_multi], test_size=0.3)

lr_multi2 = LinearRegression()

lr_multi2.fit(X_train, y_train) 
y_pred = lr_multi2.predict(X_test)

print(mean_absolute_error(y_pred, y_test))