# GLMM

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import gpboost as gpb
from patsy import dmatrix

In [4]:
filepath = '../../../../data/processed/df_filtered_5years.xlsx'
df = pd.read_excel(filepath).drop(columns='special')
df['population_change_rate'] = df.groupby('island_id')['population'].pct_change()
df['log_income'] = np.log(df['income'])
df.dropna(inplace=True)

# 四分位範囲（IQR）を計算
Q1 = df['population_change_rate'].quantile(0.25)
Q3 = df['population_change_rate'].quantile(0.75)
IQR = Q3 - Q1

# 外れ値の定義
lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR

# 外れ値を除外
df_filtered = df[(df['population_change_rate'] >= lower_bound) & (df['population_change_rate'] <= upper_bound)]

  df['population_change_rate'] = df.groupby('island_id')['population'].pct_change()


In [5]:
group = df_filtered[['island_id', 'year']]
y = df_filtered['population_change_rate']
X = dmatrix('dummy_after_bridge_opened + log_income', data=df_filtered, return_type='dataframe')

model = gpb.GPModel(group_data=group, likelihood='gaussian') # 正規分布
model.fit(y=y, X=X, params={'std_dev': True})

pred = model.predict(X_pred=X, group_data_pred=group)['mu']
residuals = y - pred

print(model.summary())

Model summary:
 Log-lik      AIC     BIC
  510.05 -1008.11 -982.11
Nb. observations: 563
Nb. groups: 151 (island_id), 8 (year)
-----------------------------------------------------
Covariance parameters (random effects):
            Param.  Std. dev.
Error_term  0.0083     0.0006
island_id   0.0013     0.0004
year        0.0013     0.0008
-----------------------------------------------------
Linear regression coefficients (fixed effects):
                           Param.  Std. dev.  z value  P(>|z|)
Intercept                 -0.0658     0.0281  -2.3393   0.0193
dummy_after_bridge_opened  0.0265     0.0111   2.3906   0.0168
log_income                -0.0093     0.0029  -3.2073   0.0013
<gpboost.basic.GPModel object at 0x7f7794b88d00>
