# 重回帰分析および構造方程式モデルの実装

----

ライブラリのインポート

In [1]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
import semopy as sem

import graphviz

### シミュレーションデータの作成  
アイスが売れた数 = a + (-2 * 価格) + (3 * 気温) + 誤差項

In [2]:
#set seed
np.random.seed(77)
#size
n = 300
# 説明変数とerror term
price = np.random.normal(200,20,n)

temp = np.random.uniform(0,40,n)

e = np.random.normal(0,10,n)

#アイスの売れた数
q = 10 + (-2*price) + 3*temp + e

In [3]:
df = pd.DataFrame({
    'quantity': q,
    'price': price, 'temperature': temp
})

In [4]:
df

Unnamed: 0,quantity,price,temperature
0,-339.903523,204.488897,22.119739
1,-320.213616,213.230629,28.569477
2,-255.717824,188.404847,38.656291
3,-300.597746,208.156226,38.516902
4,-395.951054,209.329587,6.422726
...,...,...,...
295,-320.716243,191.322213,14.559923
296,-310.860445,185.473721,19.801930
297,-395.660084,206.049081,1.986467
298,-374.059167,225.408076,23.041919


### 重回帰分析

In [5]:
X = sm.add_constant(df[['price', 'temperature']])
y = df['quantity']

model = sm.OLS(y, X)
res = model.fit()
print(res.summary())

                            OLS Regression Results                            
Dep. Variable:               quantity   R-squared:                       0.967
Model:                            OLS   Adj. R-squared:                  0.966
Method:                 Least Squares   F-statistic:                     4302.
Date:                Wed, 11 Dec 2024   Prob (F-statistic):          5.13e-220
Time:                        17:53:49   Log-Likelihood:                -1115.3
No. Observations:                 300   AIC:                             2237.
Df Residuals:                     297   BIC:                             2248.
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                  coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------
const          12.2704      6.235      1.968      

### 構造方程式モデル

In [6]:
#構造方程式モデル https://semopy.com/

model_sem = sem.Model('quantity ~ price + temperature')

#デフォルトではWishart Maximum Likelihoodを逐次最小2乗計画法で最小化
result = model_sem.fit(df, obj="GLS") 
print(result)


Name of objective: GLS
Optimization method: SLSQP
Optimization successful.
Optimization terminated successfully
Objective value: 0.000
Number of iterations: 28
Params: -2.010 3.013 99.270


In [None]:
#SEMの評価

## 全体的評価
eval = sem.calc_stats(model_sem)
print("モデルの評価")
print(eval.T)

print('----------')

ins = model_sem.inspect()
print("モデルの詳細")
print(ins)

モデルの評価
                      Value
DoF            3.000000e+00
DoF Baseline   5.000000e+00
chi2           2.026843e-05
chi2 p-value   1.000000e+00
chi2 Baseline  2.034331e+05
CFI            1.000015e+00
GFI            1.000000e+00
AGFI           1.000000e+00
NFI            1.000000e+00
TLI            1.000025e+00
RMSEA          0.000000e+00
AIC            6.000000e+00
BIC            1.711135e+01
LogLik         3.376918e-08
モデルの詳細
       lval  op         rval   Estimate  Std. Err    z-value  p-value
0  quantity   ~        price  -2.010228  0.030060 -66.872894      0.0
1  quantity   ~  temperature   3.013471  0.052730  57.149560      0.0
2  quantity  ~~     quantity  99.269594  8.105328  12.247449      0.0


In [9]:
#グラフィカルな分析
plot = sem.semplot(model_sem, "sem_tutorial.png")
plot