In [1]:
import numpy as np
import pandas as pd
import statsmodels.formula.api as smf
from scipy.stats import ttest_ind
from sklearn.linear_model import LinearRegression

In [2]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [3]:
df = pd.read_csv('gdrive/My Drive/Stats_HW1/Inflation_Targeting.csv')
df.head()

Unnamed: 0,Страна,-5,-4,-3,-2,-1,1,2,3,4,5,Год перехода к инфляционному таргетированию,Средняя инфляция перехода,Средняя инфляция после перехода,Развивающая страна,Таргетирует инфляцию
0,Армения,4.5,5.9,0.7,3.4,4.6,9.0,3.5,7.3,7.7,2.5,2008,3.8,6.0,0,1
1,Бразилия,66.0,15.8,6.9,3.2,4.9,7.0,6.8,8.4,14.7,6.6,2000,17.3,8.7,0,1
2,Венгрия,18.3,14.2,10.0,9.8,9.2,5.3,4.7,6.7,3.6,3.9,2002,12.2,4.8,0,1
3,Гана,14.8,26.6,12.7,15.1,11.7,10.7,16.5,13.1,6.7,7.7,2007,16.1,10.9,0,1
4,Гватемала,5.2,6.0,7.3,8.1,5.6,7.6,9.1,6.6,6.8,11.4,2004,6.4,8.3,0,1


In [4]:
target_before = df.loc[df['Таргетирует инфляцию'] == 1, 'Средняя инфляция перехода']
target_after = df.loc[df['Таргетирует инфляцию'] == 1, 'Средняя инфляция после перехода']

avg_before = target_before.mean()
avg_after = target_after.mean()

t_stat, p_value = ttest_ind(target_before, target_after)

print(f"Average Inflation Before: {avg_before}")
print(f"Average Inflation After: {avg_after}")
print(f"T-Statistic: {t_stat}, P-Value: {p_value}")

Average Inflation Before: 10.9
Average Inflation After: 5.093103448275863
T-Statistic: 2.7615825347309197, P-Value: 0.007764623568338804


In [5]:
df_new = df.groupby('Таргетирует инфляцию').mean()
df_new

  df_new = df.groupby('Таргетирует инфляцию').mean()


Unnamed: 0_level_0,-5,-4,-3,-2,-1,1,2,3,4,5,Год перехода к инфляционному таргетированию,Средняя инфляция перехода,Средняя инфляция после перехода,Развивающая страна
Таргетирует инфляцию,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
0,8.744762,7.052381,5.524762,4.604762,4.733333,4.742857,4.654286,4.631429,5.202857,9.081905,2002.32381,5.848571,5.612381,0.209524
1,15.655172,12.575862,10.686207,8.744828,7.617241,6.558621,5.67931,4.927586,4.141379,4.251724,2001.068966,10.9,5.093103,0.344828


In [6]:
mean_inf_nt_before = df_new.iloc[0, 11]
mean_inf_nt_after = df_new.iloc[0, 12]
mean_inf_t_before = df_new.iloc[1, 11]
mean_inf_t_after = df_new.iloc[1, 12]

print(f'mean non-target inflation rate before: {mean_inf_nt_before:.2f}')
print(f'mean non-target inflation rate after: {mean_inf_nt_after:.2f}')
print(f'mean target inflation rate before: {mean_inf_t_before:.2f}')
print(f'mean target inflation rate after: {mean_inf_t_after:.2f}')

nt_diff = mean_inf_nt_after - mean_inf_nt_before
t_diff = mean_inf_t_after - mean_inf_t_before
did = t_diff - nt_diff

print(f'DID in mean inflation rate is {did:.2f}')

mean non-target inflation rate before: 5.85
mean non-target inflation rate after: 5.61
mean target inflation rate before: 10.90
mean target inflation rate after: 5.09
DID in mean inflation rate is -5.57


In [7]:
# group g: 0 control group (non-target), 1 treatment group (target)
# t: 0 before treatment, 1 after treatment
# gt: interaction of g * t

# data before the treatment
df_before = df[['Средняя инфляция перехода', 'Таргетирует инфляцию']]
df_before['t'] = 0
df_before.columns = ['inf_rate', 'g', 't']

# data after the treatment
df_after = df[['Средняя инфляция после перехода', 'Таргетирует инфляцию']]
df_after['t'] = 1
df_after.columns = ['inf_rate', 'g', 't']

# data for regression
df_reg = pd.concat([df_before, df_after])

# create the interaction
df_reg['gt'] = df_reg.g * df_reg.t

df_reg

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_before['t'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_after['t'] = 1


Unnamed: 0,inf_rate,g,t,gt
0,3.8,1,0,0
1,17.3,1,0,0
2,12.2,1,0,0
3,16.1,1,0,0
4,6.4,1,0,0
...,...,...,...,...
129,1.4,0,1,0
130,1.6,0,1,0
131,1.3,0,1,0
132,0.7,0,1,0


In [8]:
lr = LinearRegression()

X = df_reg[['g', 't', 'gt']]
y = df_reg.inf_rate

lr.fit(X, y)
lr.coef_  # the coefficient for gt is the DID, which is -5.57

array([ 5.05142857, -0.23619048, -5.57070608])

In [9]:
ols = smf.ols('inf_rate ~ g + t + gt', data=df_reg).fit(cov_type='HC0')
print(ols.summary())

                            OLS Regression Results                            
Dep. Variable:               inf_rate   R-squared:                       0.039
Model:                            OLS   Adj. R-squared:                  0.028
Method:                 Least Squares   F-statistic:                     2.686
Date:                Sun, 31 Dec 2023   Prob (F-statistic):             0.0470
Time:                        14:09:46   Log-Likelihood:                -944.42
No. Observations:                 268   AIC:                             1897.
Df Residuals:                     264   BIC:                             1911.
Df Model:                           3                                         
Covariance Type:                  HC0                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      5.8486      1.080      5.417      0.0

In [10]:
df_reg['developed'] = df['Развивающая страна']
df_reg

Unnamed: 0,inf_rate,g,t,gt,developed
0,3.8,1,0,0,0
1,17.3,1,0,0,0
2,12.2,1,0,0,0
3,16.1,1,0,0,0
4,6.4,1,0,0,0
...,...,...,...,...,...
129,1.4,0,1,0,1
130,1.6,0,1,0,1
131,1.3,0,1,0,1
132,0.7,0,1,0,1


In [11]:
df_developed = df_reg.loc[df_reg['developed'] == 1]
df_developed = df_developed.iloc[: , :-1]
df_developing = df_reg.loc[df_reg['developed'] == 0]
df_developing = df_developing.iloc[: , :-1]

In [12]:
X_2 = df_developed[['g', 't', 'gt']]
y_2 = df_developed.inf_rate

lr.fit(X_2, y_2)
lr.coef_  # the coefficient for gt is the DID, which is -0.977

array([ 1.84272727, -2.27272727, -0.97727273])

In [13]:
ols2 = smf.ols('inf_rate ~ g + t + gt', data=df_developed).fit(cov_type='HC0')
print(ols2.summary())

                            OLS Regression Results                            
Dep. Variable:               inf_rate   R-squared:                       0.233
Model:                            OLS   Adj. R-squared:                  0.195
Method:                 Least Squares   F-statistic:                     6.243
Date:                Sun, 31 Dec 2023   Prob (F-statistic):           0.000925
Time:                        14:09:46   Log-Likelihood:                -152.72
No. Observations:                  64   AIC:                             313.4
Df Residuals:                      60   BIC:                             322.1
Df Model:                           3                                         
Covariance Type:                  HC0                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      4.4773      0.715      6.265      0.0

In [14]:
X_3 = df_developing[['g', 't', 'gt']]
y_3 = df_developing.inf_rate

lr.fit(X_3, y_3)
lr.coef_  # the coefficient for gt is the DID, which is -7.46

array([ 7.09847812,  0.30361446, -7.45624604])

In [15]:
ols3 = smf.ols('inf_rate ~ g + t + gt', data=df_developing).fit(cov_type='HC0')
print(ols3.summary())

                            OLS Regression Results                            
Dep. Variable:               inf_rate   R-squared:                       0.048
Model:                            OLS   Adj. R-squared:                  0.033
Method:                 Least Squares   F-statistic:                     2.023
Date:                Sun, 31 Dec 2023   Prob (F-statistic):              0.112
Time:                        14:09:46   Log-Likelihood:                -739.70
No. Observations:                 204   AIC:                             1487.
Df Residuals:                     200   BIC:                             1501.
Df Model:                           3                                         
Covariance Type:                  HC0                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      6.2120      1.350      4.602      0.0