In [11]:
pip install pandas statsmodels

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.2 -> 24.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip


### Import Library

In [12]:
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols
from statsmodels.stats.diagnostic import het_breuschpagan, het_white

### Load Data

In [13]:
# Membaca data dari file CSV
data = pd.read_csv('C:/Users/User/Documents/Semester 5/data/PDRB Menurut Pengeluaran.csv')

# Filter data untuk kedua provinsi
data_filtered = data[data['Provinsi'].isin(['PAPUA', 'ACEH'])]
data_filtered

Unnamed: 0,Provinsi,Triwulan,Pengeluaran_Konsumsi_Rumah_Tangga,Pengeluaran_Konsumsi_LNPRT,Pengeluaran_Konsumsi_Pemerintah,PDRB
0,ACEH,Triwulan 1,153.5,149.8,170.39,153.11
1,ACEH,Triwulan2,155.04,149.93,176.95,153.82
2,ACEH,Triwulan 3,155.63,149.98,175.16,154.96
3,ACEH,Triwulan 4,156.07,150.9,178.5,156.18
132,PAPUA,Triwulan 1,175.03,172.66,168.11,159.46
133,PAPUA,Triwulan2,179.07,176.18,170.99,158.05
134,PAPUA,Triwulan 3,180.28,177.6,170.32,157.27
135,PAPUA,Triwulan 4,180.08,177.46,170.81,158.36


### Chow Test

In [14]:
# Membuat model regresi untuk kedua provinsi
# Model untuk PAPUA
model_papua = ols('PDRB ~ Pengeluaran_Konsumsi_Rumah_Tangga + Pengeluaran_Konsumsi_LNPRT + Pengeluaran_Konsumsi_Pemerintah', data=data_filtered[data_filtered['Provinsi'] == 'PAPUA']).fit()

# Model untuk ACEH
model_aceh = ols('PDRB ~ Pengeluaran_Konsumsi_Rumah_Tangga + Pengeluaran_Konsumsi_LNPRT + Pengeluaran_Konsumsi_Pemerintah', data=data_filtered[data_filtered['Provinsi'] == 'ACEH']).fit()

# Model gabungan
model_combined = ols('PDRB ~ Pengeluaran_Konsumsi_Rumah_Tangga + Pengeluaran_Konsumsi_LNPRT + Pengeluaran_Konsumsi_Pemerintah + C(Provinsi)', data=data_filtered).fit()

# Menghitung nilai R-squared dari masing-masing model
r_squared_papua = model_papua.rsquared
r_squared_aceh = model_aceh.rsquared
r_squared_combined = model_combined.rsquared

# Menghitung jumlah observasi untuk masing-masing model
n_aceh = model_papua.nobs
n_sumut = model_aceh.nobs
n_combined = model_combined.nobs

# Menghitung Chow test statistic
numerator = (r_squared_combined - (r_squared_papua + r_squared_aceh)) / 2
denominator = (1 - r_squared_combined) / (n_combined - 4)  # 4 adalah jumlah parameter dalam model
chow_statistic = numerator / denominator

# Menghitung nilai p-value dari Chow test statistic
from scipy.stats import f

p_value = 1 - f.cdf(chow_statistic, 2, n_combined - 4)

# Menampilkan hasil
print(f'Chow Test Statistic: {chow_statistic}')
print(f'P-Value: {p_value}')

if p_value < 0.05:
    print("Tolak H0: Lakukan Hausman Test")
else:
    print("Gagal Tolak H0: Lakukan BP-LM Test")

Chow Test Statistic: -22.455121798234796
P-Value: 1.0
Gagal Tolak H0: Lakukan BP-LM Test


### BP-LM Test

In [15]:
# Melakukan BP-LM test
bp_test = het_breuschpagan(model_combined.resid, model_combined.model.exog)

# Hasil BP-LM test
bp_test_results = {
    'LM Statistic': bp_test[0],
    'LM p-value': bp_test[1],
    'F Statistic': bp_test[2],
    'F p-value': bp_test[3]
}

# Menampilkan hasil BP-LM test
print("Hasil BP-LM Test:")
for key, value in bp_test_results.items():
    print(f"{key}: {value}")

# Interpretasi hasil
if bp_test[1] < 0.05:
    print("Tolak H0: Lakukan Hausman Test.")
else:
    print("Gagal tolak H0: Gunakan Common Effect Model.")

Hasil BP-LM Test:
LM Statistic: 5.789756221891169
LM p-value: 0.21540921682359898
F Statistic: 1.9646326841529804
F p-value: 0.3028670242284117
Gagal tolak H0: Gunakan Common Effect Model.


### Regresi Data Model Common Effect Model

In [16]:
# Menambahkan konstanta untuk model
data_filtered['Intercept'] = 1

# Membuat model regresi Pooled OLS
model_pooled = sm.OLS(data_filtered['PDRB'], data_filtered[['Intercept', 'Pengeluaran_Konsumsi_Rumah_Tangga', 'Pengeluaran_Konsumsi_LNPRT', 'Pengeluaran_Konsumsi_Pemerintah']]).fit()

# Menampilkan ringkasan hasil model
print(model_pooled.summary())

                            OLS Regression Results                            
Dep. Variable:                   PDRB   R-squared:                       0.761
Model:                            OLS   Adj. R-squared:                  0.582
Method:                 Least Squares   F-statistic:                     4.249
Date:                Sat, 30 Nov 2024   Prob (F-statistic):             0.0980
Time:                        00:06:48   Log-Likelihood:                -11.672
No. Observations:                   8   AIC:                             31.34
Df Residuals:                       4   BIC:                             31.66
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                                        coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------------------
Interc

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_filtered['Intercept'] = 1


### Uji Heterokedastisitas

In [17]:
# Menambahkan konstanta untuk model
data_filtered['Intercept'] = 1

# Membuat model regresi Pooled OLS
model_pooled = sm.OLS(data_filtered['PDRB'], data_filtered[['Intercept', 'Pengeluaran_Konsumsi_Rumah_Tangga', 'Pengeluaran_Konsumsi_LNPRT', 'Pengeluaran_Konsumsi_Pemerintah']]).fit()

# Uji Heteroskedastisitas Breusch-Pagan
bp_test = het_breuschpagan(model_pooled.resid, model_pooled.model.exog)
print("Hasil Uji Heteroskedastisitas Breusch-Pagan:")
print(f"LM Statistic: {bp_test[0]}")
print(f"LM p-value: {bp_test[1]}")
print(f"F Statistic: {bp_test[2]}")
print(f"F p-value: {bp_test[3]}")

# Uji Heteroskedastisitas White
white_test = het_white(model_pooled.resid, model_pooled.model.exog)
print("\nHasil Uji Heteroskedastisitas White:")
print(f"LM Statistic: {white_test[0]}")
print(f"LM p-value: {white_test[1]}")
print(f"F Statistic: {white_test[2]}")
print(f"F p-value: {white_test[3]}")

Hasil Uji Heteroskedastisitas Breusch-Pagan:
LM Statistic: 1.5569104962218896
LM p-value: 0.6691990488209734
F Statistic: 0.3221871526713897
F p-value: 0.8104275808598106

Hasil Uji Heteroskedastisitas White:
LM Statistic: 8.0
LM p-value: 0.3325939025993081
F Statistic: nan
F p-value: nan


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_filtered['Intercept'] = 1
