In [None]:
from google.colab import files
import pandas as pd
import numpy as np
import statsmodels.api as sm
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.stats.stattools import jarque_bera
from statsmodels.graphics.api import plot_leverage_resid2, influence_plot
from statsmodels.stats.outliers_influence import variance_inflation_factor
from statsmodels.stats.stattools import durbin_watson
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from matplotlib.colors import LinearSegmentedColormap
from mpl_toolkits.mplot3d import Axes3D
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from itertools import combinations

In [None]:
uploaded = files.upload()

Saving gdp_norm_monthly_agg.csv to gdp_norm_monthly_agg.csv
Saving general_cpi_monthly_agg.csv to general_cpi_monthly_agg.csv


In [None]:
df_gdp_norm_monthly = pd.read_csv('gdp_norm_monthly_agg.csv')
df_cpi_monthly = pd.read_csv('general_cpi_monthly_agg.csv')

## Chile GDP

In [None]:
df_gdp_norm_monthly = df_gdp_norm_monthly[['YEAR', 'MONTH', 'GDP_NORMALIZED', 'LIKELIHOODOFDEFAULT']]
df_gdp_norm_monthly

Unnamed: 0,YEAR,MONTH,GDP_NORMALIZED,LIKELIHOODOFDEFAULT
0,2006,1,100.776983,0.019363
1,2006,2,100.804387,0.017998
2,2006,3,100.858167,0.036930
3,2006,4,100.934271,0.036691
4,2006,5,101.023159,0.026561
...,...,...,...,...
186,2021,7,100.708930,0.144391
187,2021,8,101.208495,0.166013
188,2021,9,101.663892,0.157074
189,2021,10,102.068399,0.139014


In [None]:
# Standardizing
scaler = StandardScaler()
df_gdp_norm_monthly['GDP'] = scaler.fit_transform(df_gdp_norm_monthly[['GDP_NORMALIZED']])

# Converting default rate to percentage
df_gdp_norm_monthly['Default%'] = df_gdp_norm_monthly['LIKELIHOODOFDEFAULT'] * 100

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_gdp_norm_monthly['GDP'] = scaler.fit_transform(df_gdp_norm_monthly[['GDP_NORMALIZED']])


In [None]:
df_gdp_norm_monthly

Unnamed: 0,YEAR,MONTH,GDP_NORMALIZED,LIKELIHOODOFDEFAULT,GDP,Default%
0,2006,1,100.776983,0.019363,0.486165,1.9363
1,2006,2,100.804387,0.017998,0.502420,1.7998
2,2006,3,100.858167,0.036930,0.534320,3.6930
3,2006,4,100.934271,0.036691,0.579462,3.6691
4,2006,5,101.023159,0.026561,0.632187,2.6561
...,...,...,...,...,...,...
186,2021,7,100.708930,0.144391,0.445799,14.4391
187,2021,8,101.208495,0.166013,0.742122,16.6013
188,2021,9,101.663892,0.157074,1.012246,15.7074
189,2021,10,102.068399,0.139014,1.252184,13.9014


In [None]:
df_gdp = df_gdp_norm_monthly[['YEAR', 'MONTH', 'GDP', 'Default%']]
df_gdp

Unnamed: 0,YEAR,MONTH,GDP,Default%
0,2006,1,0.486165,1.9363
1,2006,2,0.502420,1.7998
2,2006,3,0.534320,3.6930
3,2006,4,0.579462,3.6691
4,2006,5,0.632187,2.6561
...,...,...,...,...
186,2021,7,0.445799,14.4391
187,2021,8,0.742122,16.6013
188,2021,9,1.012246,15.7074
189,2021,10,1.252184,13.9014


## Chile GDP Moving Average (window=33)

In [None]:
df_gdp_norm_monthly

Unnamed: 0,YEAR,MONTH,GDP_NORMALIZED,LIKELIHOODOFDEFAULT,GDP,Default%
0,2006,1,100.776983,0.019363,0.486165,1.9363
1,2006,2,100.804387,0.017998,0.502420,1.7998
2,2006,3,100.858167,0.036930,0.534320,3.6930
3,2006,4,100.934271,0.036691,0.579462,3.6691
4,2006,5,101.023159,0.026561,0.632187,2.6561
...,...,...,...,...,...,...
186,2021,7,100.708930,0.144391,0.445799,14.4391
187,2021,8,101.208495,0.166013,0.742122,16.6013
188,2021,9,101.663892,0.157074,1.012246,15.7074
189,2021,10,102.068399,0.139014,1.252184,13.9014


In [None]:
df_gdp_ma = df_gdp_norm_monthly.copy()
df_gdp_ma['GDP_MA'] = df_gdp_ma['GDP_NORMALIZED'].rolling(window=33).mean()

# Standardizing
scaler = StandardScaler()
df_gdp_ma['GDP_MA_st'] = scaler.fit_transform(df_gdp_ma[['GDP_MA']])

df_gdp_ma = df_gdp_ma.dropna()
df_gdp_ma = df_gdp_ma[['YEAR', 'MONTH', 'GDP_MA_st', 'Default%']]
df_gdp_ma

Unnamed: 0,YEAR,MONTH,GDP_MA_st,Default%
32,2008,9,1.857999,5.7256
33,2008,10,1.825485,5.7546
34,2008,11,1.776311,5.5915
35,2008,12,1.709978,5.6387
36,2009,1,1.626467,5.2036
...,...,...,...,...
186,2021,7,-2.212050,14.4391
187,2021,8,-2.196736,16.6013
188,2021,9,-2.163385,15.7074
189,2021,10,-2.113988,13.9014


## Chile GDP Growth Rate

## CPI

In [None]:
df_cpi_monthly = df_cpi_monthly[['YEAR', 'MONTH', 'IPC', 'LIKELIHOODOFDEFAULT']]
df_cpi_monthly

Unnamed: 0,YEAR,MONTH,IPC,LIKELIHOODOFDEFAULT
0,2009,1,59.05,0.052036
1,2009,2,58.28,0.050888
2,2009,3,58.30,0.050589
3,2009,4,58.21,0.058679
4,2009,5,58.09,0.065349
...,...,...,...,...
176,2023,9,100.53,0.293259
177,2023,10,100.82,0.267782
178,2023,11,101.58,0.360104
179,2023,12,101.04,0.439466


In [None]:
# Standardizing
scaler = StandardScaler()
df_cpi_monthly['CPI'] = scaler.fit_transform(df_cpi_monthly[['IPC']])

# Converting default rate to percentage
df_cpi_monthly['Default%'] = df_cpi_monthly['LIKELIHOODOFDEFAULT'] * 100

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_cpi_monthly['CPI'] = scaler.fit_transform(df_cpi_monthly[['IPC']])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_cpi_monthly['Default%'] = df_cpi_monthly['LIKELIHOODOFDEFAULT'] * 100


In [None]:
df_cpi = df_cpi_monthly[['YEAR', 'MONTH', 'CPI', 'Default%']]
df_cpi

Unnamed: 0,YEAR,MONTH,CPI,Default%
0,2009,1,-1.188744,5.2036
1,2009,2,-1.252645,5.0888
2,2009,3,-1.250985,5.0589
3,2009,4,-1.258454,5.8679
4,2009,5,-1.268413,6.5349
...,...,...,...,...
176,2023,9,2.253594,29.3259
177,2023,10,2.277660,26.7782
178,2023,11,2.340731,36.0104
179,2023,12,2.295917,43.9466


## CPI MA

In [43]:
df_cpi_monthly

Unnamed: 0,YEAR,MONTH,IPC,LIKELIHOODOFDEFAULT,CPI,Default%
0,2009,1,59.05,0.052036,-1.188744,5.2036
1,2009,2,58.28,0.050888,-1.252645,5.0888
2,2009,3,58.30,0.050589,-1.250985,5.0589
3,2009,4,58.21,0.058679,-1.258454,5.8679
4,2009,5,58.09,0.065349,-1.268413,6.5349
...,...,...,...,...,...,...
176,2023,9,100.53,0.293259,2.253594,29.3259
177,2023,10,100.82,0.267782,2.277660,26.7782
178,2023,11,101.58,0.360104,2.340731,36.0104
179,2023,12,101.04,0.439466,2.295917,43.9466


In [45]:
df_cpi_ma = df_cpi_monthly.copy()
df_cpi_ma['CPI_MA'] = df_cpi_ma['IPC'].rolling(window=26).mean()

# Standardizing
scaler = StandardScaler()
df_cpi_ma['CPI_MA_st'] = scaler.fit_transform(df_cpi_ma[['CPI_MA']])

df_cpi_ma = df_cpi_ma.dropna()
df_cpi_ma = df_cpi_ma[['YEAR', 'MONTH', 'CPI_MA_st', 'Default%']]
df_cpi_ma

Unnamed: 0,YEAR,MONTH,CPI_MA_st,Default%
25,2011,2,-1.443616,9.5455
26,2011,3,-1.437965,9.7173
27,2011,4,-1.428440,9.4676
28,2011,5,-1.418027,14.0466
29,2011,6,-1.406807,9.7461
...,...,...,...,...
176,2023,9,2.240655,29.3259
177,2023,10,2.310762,26.7782
178,2023,11,2.379941,36.0104
179,2023,12,2.442379,43.9466


## CPI Growth Rate

In [None]:
df_cpi_gr = df_cpi.copy()

In [None]:
#Calculate growth rate into column
df_cpi_gr['CPI'] = pd.to_numeric(df_cpi_gr['CPI'], errors='coerce')
df_cpi_gr['CPI_GR'] = df_cpi_gr['CPI'].pct_change()
df_cpi_gr['CPI_GR'] = df_cpi_gr['CPI_GR'] * 100
df_cpi_gr

Unnamed: 0,YEAR,MONTH,CPI,Default%,CPI_GR
0,2009,1,-1.188744,5.2036,
1,2009,2,-1.252645,5.0888,5.375477
2,2009,3,-1.250985,5.0589,-0.132500
3,2009,4,-1.258454,5.8679,0.597042
4,2009,5,-1.268413,6.5349,0.791332
...,...,...,...,...,...
176,2023,9,2.253594,29.3259,2.375056
177,2023,10,2.277660,26.7782,1.067916
178,2023,11,2.340731,36.0104,2.769105
179,2023,12,2.295917,43.9466,-1.914507


In [None]:
# Standardizing
scaler = StandardScaler()
df_cpi_gr['CPI_GR_st'] = scaler.fit_transform(df_cpi_gr[['CPI_GR']])
df_cpi_gr

Unnamed: 0,YEAR,MONTH,CPI,Default%,CPI_GR,CPI_GR_st
0,2009,1,-1.188744,5.2036,,
1,2009,2,-1.252645,5.0888,5.375477,-0.056583
2,2009,3,-1.250985,5.0589,-0.132500,-0.079953
3,2009,4,-1.258454,5.8679,0.597042,-0.076858
4,2009,5,-1.268413,6.5349,0.791332,-0.076034
...,...,...,...,...,...,...
176,2023,9,2.253594,29.3259,2.375056,-0.069314
177,2023,10,2.277660,26.7782,1.067916,-0.074860
178,2023,11,2.340731,36.0104,2.769105,-0.067642
179,2023,12,2.295917,43.9466,-1.914507,-0.087515


In [None]:
df_cpi_gr_final = df_cpi_gr[['YEAR', 'MONTH', 'CPI_GR_st', 'Default%']].dropna()
df_cpi_gr_final

Unnamed: 0,YEAR,MONTH,CPI_GR_st,Default%
1,2009,2,-0.056583,5.0888
2,2009,3,-0.079953,5.0589
3,2009,4,-0.076858,5.8679
4,2009,5,-0.076034,6.5349
5,2009,6,-0.084666,6.7624
...,...,...,...,...
176,2023,9,-0.069314,29.3259
177,2023,10,-0.074860,26.7782
178,2023,11,-0.067642,36.0104
179,2023,12,-0.087515,43.9466


## CPI Growth Rate Moving Average (window=26)

In [None]:
df_cpi_grma = df_cpi_gr.copy().dropna()
df_cpi_grma['CPI_GRMA'] = df_cpi_grma['CPI_GR'].rolling(window=26).mean()

# Standardizing
scaler = StandardScaler()
df_cpi_grma['CPI_GRMA_st'] = scaler.fit_transform(df_cpi_grma[['CPI_GRMA']])

df_cpi_grma_final = df_cpi_grma.dropna()
df_cpi_grma_final = df_cpi_grma_final[['YEAR', 'MONTH', 'CPI_GRMA_st', 'Default%']]
df_cpi_grma_final

Unnamed: 0,YEAR,MONTH,CPI_GRMA_st,Default%
26,2011,3,-0.468348,9.7173
27,2011,4,-0.473968,9.4676
28,2011,5,-0.475407,14.0466
29,2011,6,-0.476620,9.7461
30,2011,7,-0.477734,9.9735
...,...,...,...,...
176,2023,9,-0.373667,29.3259
177,2023,10,-0.375310,26.7782
178,2023,11,-0.381104,36.0104
179,2023,12,-0.391060,43.9466


## Merge

In [47]:
# Create dfs - put unemployment at top since it has data until 2024
dfs = [df_gdp_ma, df_cpi_ma]

# Now you can merge without worrying about duplicate 'Default%' columns
merged_df = dfs[0]
for df in dfs[1:]:
    merged_df = pd.merge(merged_df, df, on=['YEAR','MONTH'], how='outer')

In [48]:
merged_df

Unnamed: 0,YEAR,MONTH,GDP_MA_st,Default%_x,CPI_MA_st,Default%_y
0,2008,9,1.857999,5.7256,,
1,2008,10,1.825485,5.7546,,
2,2008,11,1.776311,5.5915,,
3,2008,12,1.709978,5.6387,,
4,2009,1,1.626467,5.2036,,
...,...,...,...,...,...,...
180,2023,9,,,2.240655,29.3259
181,2023,10,,,2.310762,26.7782
182,2023,11,,,2.379941,36.0104
183,2023,12,,,2.442379,43.9466


In [49]:
merged_df = merged_df.dropna()
merged_df

Unnamed: 0,YEAR,MONTH,GDP_MA_st,Default%_x,CPI_MA_st,Default%_y
29,2011,2,-1.305158,9.5455,-1.443616,9.5455
30,2011,3,-1.362532,9.7173,-1.437965,9.7173
31,2011,4,-1.406466,9.4676,-1.428440,9.4676
32,2011,5,-1.435993,14.0466,-1.418027,14.0466
33,2011,6,-1.450633,9.7461,-1.406807,9.7461
...,...,...,...,...,...,...
154,2021,7,-2.212050,14.4391,0.781404,14.4391
155,2021,8,-2.196736,16.6013,0.805701,16.6013
156,2021,9,-2.163385,15.7074,0.833308,15.7074
157,2021,10,-2.113988,13.9014,0.864870,13.9014


In [50]:
final_df = merged_df[['YEAR', 'MONTH', 'GDP_MA_st', 'CPI_MA_st', 'Default%_x']]
final_df

Unnamed: 0,YEAR,MONTH,GDP_MA_st,CPI_MA_st,Default%_x
29,2011,2,-1.305158,-1.443616,9.5455
30,2011,3,-1.362532,-1.437965,9.7173
31,2011,4,-1.406466,-1.428440,9.4676
32,2011,5,-1.435993,-1.418027,14.0466
33,2011,6,-1.450633,-1.406807,9.7461
...,...,...,...,...,...
154,2021,7,-2.212050,0.781404,14.4391
155,2021,8,-2.196736,0.805701,16.6013
156,2021,9,-2.163385,0.833308,15.7074
157,2021,10,-2.113988,0.864870,13.9014


## Regression

In [51]:
# Prepare data for OLS regression
X = final_df[['GDP_MA_st', 'CPI_MA_st']]
X = sm.add_constant(X)  # adding a constant to the model
y = final_df['Default%_x']

# Fit OLS model
model = sm.OLS(y, X).fit()

# Print the summary of the OLS model
print(model.summary())

                            OLS Regression Results                            
Dep. Variable:             Default%_x   R-squared:                       0.707
Model:                            OLS   Adj. R-squared:                  0.702
Method:                 Least Squares   F-statistic:                     153.0
Date:                Fri, 03 May 2024   Prob (F-statistic):           1.50e-34
Time:                        08:22:50   Log-Likelihood:                -244.30
No. Observations:                 130   AIC:                             494.6
Df Residuals:                     127   BIC:                             503.2
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         10.7690      0.159     67.886      0.0