In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import statsmodels.formula.api as smf
from statsmodels.stats.outliers_influence import variance_inflation_factor # VIFを求めるため

###  データの読み込み Loading data

In [2]:
dat = pd.read_csv("data/airquality.csv", delimiter=",")
print(dat.head())
dat.dropna(inplace=True) #欠損値NaNを除外  Remove missing values
print(dat.head())

   Unnamed: 0  Ozone  Solar  Wind  Temp  Month  Day
0           1   41.0  190.0   7.4    67      5    1
1           2   36.0  118.0   8.0    72      5    2
2           3   12.0  149.0  12.6    74      5    3
3           4   18.0  313.0  11.5    62      5    4
4           5    NaN    NaN  14.3    56      5    5
   Unnamed: 0  Ozone  Solar  Wind  Temp  Month  Day
0           1   41.0  190.0   7.4    67      5    1
1           2   36.0  118.0   8.0    72      5    2
2           3   12.0  149.0  12.6    74      5    3
3           4   18.0  313.0  11.5    62      5    4
6           7   23.0  299.0   8.6    65      5    7


### 線形回帰モデル (独立変数 Solar のみ)  Linear regression model (IV is "Solar")

In [3]:
# モデルの設定 Construct a linear regression model
model = smf.ols(formula = "Ozone ~ Solar", data = dat)
# 回帰分析の実行 Execute linear  regression 
results = model.fit()
# 結果を表示　Print results
print(results.summary())
# VIFを求める
col = model.exog.shape[1]# 説明変数の列数
vifs = [variance_inflation_factor(model.exog, i) for i in range(col)]
print(pd.DataFrame(vifs, index=model.exog_names, columns=["VIF"]))

                            OLS Regression Results                            
Dep. Variable:                  Ozone   R-squared:                       0.121
Model:                            OLS   Adj. R-squared:                  0.113
Method:                 Least Squares   F-statistic:                     15.05
Date:                Mon, 24 Dec 2018   Prob (F-statistic):           0.000179
Time:                        15:18:29   Log-Likelihood:                -538.86
No. Observations:                 111   AIC:                             1082.
Df Residuals:                     109   BIC:                             1087.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     18.5987      6.748      2.756      0.0

### 線形回帰モデル (独立変数 Windのみ)  Linear regression model (IV is "Wind")

In [4]:
# モデルの設定 Construct a linear regression model
model = smf.ols(formula = "Ozone ~ Wind", data = dat)

# 回帰分析の実行 Execute linear  regression 
results = model.fit()
# 結果を表示　Print results
print(results.summary())
# VIFを求める
col = model.exog.shape[1]# 説明変数の列数
vifs = [variance_inflation_factor(model.exog, i) for i in range(col)]
print(pd.DataFrame(vifs, index=model.exog_names, columns=["VIF"]))

                            OLS Regression Results                            
Dep. Variable:                  Ozone   R-squared:                       0.375
Model:                            OLS   Adj. R-squared:                  0.369
Method:                 Least Squares   F-statistic:                     65.44
Date:                Mon, 24 Dec 2018   Prob (F-statistic):           9.09e-13
Time:                        15:18:32   Log-Likelihood:                -519.94
No. Observations:                 111   AIC:                             1044.
Df Residuals:                     109   BIC:                             1049.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     99.0413      7.472     13.254      0.0

### 線形回帰モデル (独立変数 Tempのみ)  Linear regression model (IV is "Temp")

In [5]:
# モデルの設定 Construct a linear regression model
model = smf.ols(formula = "Ozone ~ Temp", data = dat)
# 回帰分析の実行 Execute linear  regression 
results = model.fit()
# 結果を表示　Print results
print(results.summary())
# VIFを求める
col = model.exog.shape[1]# 説明変数の列数
vifs = [variance_inflation_factor(model.exog, i) for i in range(col)]
print(pd.DataFrame(vifs, index=model.exog_names, columns=["VIF"]))

                            OLS Regression Results                            
Dep. Variable:                  Ozone   R-squared:                       0.488
Model:                            OLS   Adj. R-squared:                  0.483
Method:                 Least Squares   F-statistic:                     103.9
Date:                Mon, 24 Dec 2018   Prob (F-statistic):           1.55e-17
Time:                        15:18:39   Log-Likelihood:                -508.89
No. Observations:                 111   AIC:                             1022.
Df Residuals:                     109   BIC:                             1027.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept   -147.6461     18.755     -7.872      0.0

### 線形回帰モデル (独立変数 Solar, Windの２つ)  Linear regression model (IVs are "Solar" and "Wind")

In [6]:
# モデルの設定 Construct a linear regression model
model = smf.ols(formula = "Ozone ~ Solar + Wind", data = dat)
# 回帰分析の実行 Execute linear  regression 
results = model.fit()
# 結果を表示　Print results
print(results.summary())
# VIFを求める
col = model.exog.shape[1]# 説明変数の列数
vifs = [variance_inflation_factor(model.exog, i) for i in range(col)]
print(pd.DataFrame(vifs, index=model.exog_names, columns=["VIF"]))

                            OLS Regression Results                            
Dep. Variable:                  Ozone   R-squared:                       0.449
Model:                            OLS   Adj. R-squared:                  0.439
Method:                 Least Squares   F-statistic:                     44.09
Date:                Mon, 24 Dec 2018   Prob (F-statistic):           1.00e-14
Time:                        15:18:42   Log-Likelihood:                -512.91
No. Observations:                 111   AIC:                             1032.
Df Residuals:                     108   BIC:                             1040.
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     77.2460      9.068      8.519      0.0

### 線形回帰モデル (独立変数 Solar, Tempの２つ)  Linear regression model (IVs are "Solar" and "Temp")

In [7]:
# モデルの設定 Construct a linear regression model
model = smf.ols(formula = "Ozone ~ Solar + Temp", data = dat)
# 回帰分析の実行 Execute linear  regression 
results = model.fit()
# 結果を表示　Print results
print(results.summary())
# VIFを求める
col = model.exog.shape[1]# 説明変数の列数
vifs = [variance_inflation_factor(model.exog, i) for i in range(col)]
print(pd.DataFrame(vifs, index=model.exog_names, columns=["VIF"]))

                            OLS Regression Results                            
Dep. Variable:                  Ozone   R-squared:                       0.510
Model:                            OLS   Adj. R-squared:                  0.501
Method:                 Least Squares   F-statistic:                     56.28
Date:                Mon, 24 Dec 2018   Prob (F-statistic):           1.80e-17
Time:                        15:18:44   Log-Likelihood:                -506.41
No. Observations:                 111   AIC:                             1019.
Df Residuals:                     108   BIC:                             1027.
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept   -145.7032     18.447     -7.899      0.0

### 線形回帰モデル (独立変数 Wind, Tempの２つ)  Linear regression model (IVs are "Wind" and "Temp")

In [None]:
# モデルの設定 Construct a linear regression model
model = smf.ols(formula = "Ozone ~ Wind + Temp", data = dat)
# 回帰分析の実行 Execute linear  regression 
results = model.fit()
# 結果を表示　Print results
print(results.summary())
# VIFを求める
col = model.exog.shape[1]# 説明変数の列数
vifs = [variance_inflation_factor(model.exog, i) for i in range(col)]
print(pd.DataFrame(vifs, index=model.exog_names, columns=["VIF"]))

### 線形回帰モデル (独立変数 Solar, Wind, Tempすべて)  Linear regression model (IVs are "Solar" ,"Wind" and "Temp")

In [None]:
# モデルの設定 Construct a linear regression model
model = smf.ols(formula = "Ozone ~ Solar + Wind + Temp", data = dat)
# 回帰分析の実行 Execute linear  regression 
results = model.fit()
# 結果を表示　Print results
print(results.summary())
# VIFを求める  Calculate VIFs
col = model.exog.shape[1]# 説明変数の列数 Number of columns of independent variable
vifs = [variance_inflation_factor(model.exog, i) for i in range(col)]
print(pd.DataFrame(vifs, index=model.exog_names, columns=["VIF"]))