In [2]:
import pandas as pd
import numpy as np
import statsmodels.api as sm

In [86]:
filepath = '../../data/raw/df.csv'
df = pd.read_csv(filepath)

df['population'] = pd.to_numeric(df['population'], errors='coerce')

df = df.dropna()

df['year'] = df['year'].astype('category')
df['island_id'] = df['island_id'].astype('category')
df = pd.get_dummies(df, columns=['year', 'island_id'], dtype=int, drop_first=True)

year_dummies = [col for col in df.columns if col.startswith('year')]
island_dummies = [col for col in df.columns if col.startswith('island_id')]

df.head()

Unnamed: 0,island,region_code,region,prefecture_code,pref,population,dummy_has_bridge,dummy_connect_mainland_by_bridge,bridge_opened_year,dummy_after_bridge_build,...,island_id_150,island_id_151,island_id_152,island_id_153,island_id_154,island_id_155,island_id_156,island_id_157,island_id_162,island_id_163
4,島後,32528.0,隠岐の島町,32,島根県,17259.0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,島後,32528.0,隠岐の島町,32,島根県,17016.0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6,島後,32528.0,隠岐の島町,32,島根県,16779.0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
7,島後,32528.0,隠岐の島町,32,島根県,16417.0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
8,島後,32528.0,隠岐の島町,32,島根県,16099.0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


## Pooled OLS

In [88]:
endog = df['population']
exog = df[['dummy_after_bridge_build', 'income']]
exog = sm.add_constant(exog)

model = sm.OLS(endog, exog)
results = model.fit()

print(results.summary())

                            OLS Regression Results                            
Dep. Variable:             population   R-squared:                       0.045
Model:                            OLS   Adj. R-squared:                  0.043
Method:                 Least Squares   F-statistic:                     34.15
Date:                Mon, 30 Sep 2024   Prob (F-statistic):           3.21e-15
Time:                        10:13:35   Log-Likelihood:                -14050.
No. Observations:                1461   AIC:                         2.811e+04
Df Residuals:                    1458   BIC:                         2.812e+04
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                               coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------------------
const                   

## Fixed effects

In [89]:
endog = df['population']
exog = df[['dummy_after_bridge_build', 'income'] + island_dummies]

model = sm.OLS(endog, exog)
results = model.fit()

print(results.summary())

                                 OLS Regression Results                                
Dep. Variable:             population   R-squared (uncentered):                   0.815
Model:                            OLS   Adj. R-squared (uncentered):              0.797
Method:                 Least Squares   F-statistic:                              47.01
Date:                Mon, 30 Sep 2024   Prob (F-statistic):                        0.00
Time:                        10:13:40   Log-Likelihood:                         -12918.
No. Observations:                1461   AIC:                                  2.609e+04
Df Residuals:                    1336   BIC:                                  2.675e+04
Df Model:                         125                                                  
Covariance Type:            nonrobust                                                  
                               coef    std err          t      P>|t|      [0.025      0.975]
---------------------------

## Two-way fixed effects

In [90]:
endog = df['population']
exog = df[['dummy_after_bridge_build', 'income'] + year_dummies + island_dummies]

model = sm.OLS(endog, exog)
results = model.fit()

print(results.summary())

                                 OLS Regression Results                                
Dep. Variable:             population   R-squared (uncentered):                   0.937
Model:                            OLS   Adj. R-squared (uncentered):              0.930
Method:                 Least Squares   F-statistic:                              132.6
Date:                Mon, 30 Sep 2024   Prob (F-statistic):                        0.00
Time:                        10:13:46   Log-Likelihood:                         -12126.
No. Observations:                1461   AIC:                                  2.455e+04
Df Residuals:                    1313   BIC:                                  2.533e+04
Df Model:                         148                                                  
Covariance Type:            nonrobust                                                  
                               coef    std err          t      P>|t|      [0.025      0.975]
---------------------------