# 頻度論

In [19]:
import re
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import japanize_matplotlib
import seaborn as sns
import statsmodels.api as sm

from sklearn.linear_model import Lasso
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

from patsy import dmatrices, dmatrix

figsize = (10, 5)

In [2]:
path = '../../data/processed/df_filtered.xlsx'
df = pd.read_excel(path)

## 島ごとに異なるタイムトレンドを入れたTWFE

In [17]:
y = df['log_pop']
X = dmatrix('C(island_ids) + C(year) + C(island_ids):year + after_treatment - 1', data=df, return_type='dataframe')
model = sm.OLS(y, X)
result = model.fit()
print(result.summary())

                            OLS Regression Results                            
Dep. Variable:                log_pop   R-squared:                       0.996
Model:                            OLS   Adj. R-squared:                  0.995
Method:                 Least Squares   F-statistic:                     895.0
Date:                Sun, 12 Jan 2025   Prob (F-statistic):               0.00
Time:                        18:07:49   Log-Likelihood:                 491.87
No. Observations:                 794   AIC:                            -647.7
Df Residuals:                     626   BIC:                             138.0
Df Model:                         167                                         
Covariance Type:            nonrobust                                         
                              coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------------------
C(island_ids)[67]         

In [21]:
y = df['log_pop']
X = dmatrix('C(island_ids) + C(year) + C(island_ids):year + after_treatment - 1', data=df, return_type='dataframe')

# ラッソ回帰の実行
lasso = Pipeline([
    ('scaler', StandardScaler()),  # 標準化
    ('lasso', Lasso(alpha=0.1))    # ラッソ回帰
])

lasso.fit(X, y)

# 結果の表示
# 係数の表示
lasso_model = lasso.named_steps['lasso']
coefficients = lasso_model.coef_

# 係数と対応する変数名を表示
coef_df = pd.DataFrame({'Variable': X.columns, 'Coefficient': coefficients})
print(coef_df)

                    Variable  Coefficient
0          C(island_ids)[67]    -0.000000
1          C(island_ids)[68]    -0.000000
2          C(island_ids)[69]    -0.000000
3          C(island_ids)[70]     0.000000
4          C(island_ids)[71]    -0.010582
..                       ...          ...
164  C(island_ids)[217]:year    -0.000000
165  C(island_ids)[218]:year     0.000000
166  C(island_ids)[221]:year    -0.019012
167  C(island_ids)[223]:year     0.000000
168          after_treatment     0.222840

[169 rows x 2 columns]
