## ①データ読み込み

In [1]:
import warnings
warnings.simplefilter('ignore')

from sklearn.datasets import load_boston
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import linear_model
from sklearn import preprocessing
from sklearn.linear_model import LinearRegression
 
boston = load_boston()

In [2]:
df = pd.DataFrame(boston.data, columns=boston.feature_names)
df['y'] = boston.target
df.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,y
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33,36.2


## ②説明変数、目的変数の設定

In [3]:
X = df[['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'LSTAT']]
y = df[['y']]

In [4]:
#標準化
sscaler = preprocessing.StandardScaler()
sscaler.fit(X)
xss_sk = sscaler.transform(X) 
sscaler.fit(y)
yss_sk = sscaler.transform(y)

# ③重回帰分析(1)sklearn

In [5]:
model_lr_std = LinearRegression()
model_lr_std.fit(xss_sk, yss_sk)
print("傾き:", model_lr_std.coef_)
print("切片:", model_lr_std.intercept_)
print("決定係数:", model_lr_std.score(xss_sk, yss_sk))

傾き: [[-0.10804054  0.19670965 -0.03233951  0.09412602 -0.12166671  0.31679756
  -0.01224934 -0.37174184  0.18995007 -0.26926318 -0.43997373]]
切片: [-6.742777e-16]
決定係数: 0.7072049841081048


In [6]:
#標準化前のデータでの値
pred_before = model_lr_std.predict(xss_sk)
pred_before[1]

array([0.20033217])

In [7]:
#標準化前のデータに戻した値
pred = sscaler.inverse_transform(model_lr_std.predict(xss_sk))
pred[1]

array([24.37346062])

# ③重回帰分析(2)numpy

In [15]:
from numpy import linalg as LA
#傾き
coef = LA.inv(xss_sk.T @ xss_sk) @ xss_sk.T @ yss_sk

In [16]:
xss_sk.shape

(506, 11)

In [17]:
coef.shape

(11, 1)

In [19]:
#傾き　(1)のsklearnと結果が一致することを確認
coef

array([[-0.10804054],
       [ 0.19670965],
       [-0.03233951],
       [ 0.09412602],
       [-0.12166671],
       [ 0.31679756],
       [-0.01224934],
       [-0.37174184],
       [ 0.18995007],
       [-0.26926318],
       [-0.43997373]])

In [20]:
#predの計算
pred_numpy = xss_sk @ coef

In [21]:
pred_numpy[1]

array([0.20033217])

In [22]:
#予測値を標準化前の数値に戻す
pred_numpy_std = sscaler.inverse_transform(pred_numpy)

In [23]:
pred_numpy_std[1]

array([24.37346062])

# ③重回帰分析(3)statsmodel

In [15]:
import statsmodels.api as sm

x_add_const = sm.add_constant(xss_sk)
model_sm = sm.OLS(yss_sk, x_add_const).fit()
print(model_sm.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.707
Model:                            OLS   Adj. R-squared:                  0.701
Method:                 Least Squares   F-statistic:                     108.5
Date:                Wed, 29 Dec 2021   Prob (F-statistic):          4.34e-124
Time:                        01:06:56   Log-Likelihood:                -407.23
No. Observations:                 506   AIC:                             838.5
Df Residuals:                     494   BIC:                             889.2
Df Model:                          11                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const      -5.235e-16      0.024  -2.15e-14      1.0