In [1]:
import numpy as np
from numpy import linalg as la
from scipy.stats import chi2
from tabulate import tabulate
import LinearModels as lm
import pandas as pd
import matplotlib.pyplot as plt
%load_ext autoreload
%autoreload 2



This code relies on the py-file "LinearModels," which was created by Anders Munk Nielsen and Jesper Vestergaard from the course Advanced Microeconometrics 2022. 

# Read data

In [20]:
# Import data
excel_file_path = './Eventstudydata.xlsx'

# Read the second sheet (index 1) from the Excel file
sheet_name = pd.ExcelFile(excel_file_path).sheet_names[1]
data = pd.read_excel(excel_file_path, sheet_name, header=1)

# Drop the first column
data = data.drop(data.columns[0], axis=1)

print(data.head(5))

      Cities     Day_pre   Day_after  Treat  Concert  Weekend  \
0  Stockholm  2024-04-26  2024-04-29      1        0        1   
1  Stockholm  2024-05-03  2024-05-06      1        0        1   
2  Stockholm  2024-05-10  2024-05-13      1        0        1   
3  Stockholm  2024-05-17  2024-05-20      1        1        1   
4  Stockholm  2024-05-24  2024-05-27      1        0        1   

   Avg_airbnb_price  t_3  t_2  t_1  ...  t1  t2  t3  Obs  Month  month_4  \
0              1111    1    0    0  ...   0   0   0    1      4        1   
1              1128    0    1    0  ...   0   0   0    2      5        0   
2              1193    0    0    1  ...   0   0   0    3      5        0   
3              1857    0    0    0  ...   0   0   0    4      5        0   
4              1198    0    0    0  ...   1   0   0    5      5        0   

   month_5  month_6  month_7  month_8  
0        0        0        0        0  
1        1        0        0        0  
2        1        0        0    

# Estimation 1: No fixed effects

In [3]:
data1 = data[data['Treat'] == 1]

In [4]:
# Defining dimensions
N = data1.Cities.unique().size
T = data1.Obs.unique().size
assert data1.shape[0] == N*T, f'Error: data is not a balanced panel'
print(f'Data has N={N} and T={T}')

Data has N=13 and T=7


In [5]:
# Setting up data
y = data1.Avg_airbnb_price.values.reshape((N*T,1))

ones = np.ones((N*T,1))
t_3 = data1.t_3.values.reshape((N*T,1))
t_2 = data1.t_2.values.reshape((N*T,1))
t_1 = data1.t_1.values.reshape((N*T,1))
t = data1.t.values.reshape((N*T,1))
t1 = data1.t1.values.reshape((N*T,1))
t2 = data1.t2.values.reshape((N*T,1))
t3 = data1.t3.values.reshape((N*T,1))
m4 = data1.month_4.values.reshape((N*T,1))
m5 = data1.month_5.values.reshape((N*T,1))
m6 = data1.month_6.values.reshape((N*T,1))
m7 = data1.month_7.values.reshape((N*T,1))
m8 = data1.month_8.values.reshape((N*T,1))

x = np.hstack([ones,t_3,t_2,t,t1,t2,t3,m5,m6,m7,m8]) 

label_y = ['Average Airbnb Price']
label_x = ['cons','t-3','t-2','t','t+1','t+2','t+3','m5','m6','m7','m8']

In [6]:
no_fe_result = lm.estimate(y, x, '', T = T, robust = True)

lm.print_table([label_y, label_x], no_fe_result)

Results
Dependent variable: ['Average Airbnb Price']

           Beta       Se    t-values
----  ---------  -------  ----------
cons  1722.51    254.583    6.76598
t-3    -61.2947  142.775   -0.42931
t-2    -37.9297  136.159   -0.278569
t      396.948   131.079    3.0283
t+1     30.541   149.343    0.204502
t+2    117.288   164.61     0.712517
t+3    130.244   183.97     0.70796
m5    -357.126   240.544   -1.48466
m6    -448.112   258.583   -1.73295
m7    -409.731   233.653   -1.75359
m8    -425.452   251.01    -1.69496
R² = 0.152
σ² = 149643.281


# Estimation 2: Fixed effects and leave out two pre-periods

In [7]:
# Setting up data
x = np.hstack([t_2,t,t1,t2,t3,m5,m6,m7,m8]) 

label_y = ['Average Airbnb Price']
label_x = ['t-2','t','t+1','t+2','t+3','m5','m6','m7','m8']

In [8]:
# Create the transformation matrix
def demeaning_matrix(T):
    return np.eye(T) - np.tile(1/T,(T,T))
    
Q_T = demeaning_matrix(T)

# Transforming data
y_demean = lm.perm(Q_T,y)
x_demean = lm.perm(Q_T,x)

# Estimation
fe_result_1 = lm.estimate(y_demean, x_demean, 'fe', T = T, robust = True)

lm.print_table([label_y, label_x], fe_result_1)

Results
Dependent variable: ['Average Airbnb Price']

         Beta        Se    t-values
---  --------  --------  ----------
t-2  -10.2327   8.96366   -1.14157
t    412.268   58.9395     6.99477
t+1   42.8572  20.6157     2.07886
t+2  126.6     53.7867     2.35375
t+3  133.344   61.8746     2.15507
m5    78.282   22.6971     3.44898
m6   -15.3688  59.6466    -0.257664
m7   -95.4097  85.8608    -1.11121
m8   -69.4186  76.8589    -0.903194
R² = 0.593
σ² = 17136.396


In [9]:
# Add two empty rows to 'b_hat', 'se', and 't_values' arrays
fe_result_1_tab = fe_result_1

empty_row = np.full((2, 1), np.nan)
fe_result_1_tab['b_hat'] = np.vstack((empty_row, fe_result_1['b_hat']))
fe_result_1_tab['se'] = np.vstack((empty_row, fe_result_1['se']))
fe_result_1_tab['t_values'] = np.vstack((empty_row, fe_result_1['t_values']))

# Estimation 3: Control group and fixed effects 

In [10]:
# Defining dimensions
N = data.Cities.unique().size
T = data.Obs.unique().size
assert data.shape[0] == N*T, f'Error: data is not a balanced panel'
print(f'Data has N={N} and T={T}')

Data has N=26 and T=7


In [11]:
# Setting up data
y = data.Avg_airbnb_price.values.reshape((N*T,1))

ones = np.ones((N*T,1))
t_3 = data.t_3.values.reshape((N*T,1))
t_2 = data.t_2.values.reshape((N*T,1))
t_1 = data.t_1.values.reshape((N*T,1))
t = data.t.values.reshape((N*T,1))
t1 = data.t1.values.reshape((N*T,1))
t2 = data.t2.values.reshape((N*T,1))
t3 = data.t3.values.reshape((N*T,1))
m4 = data.month_4.values.reshape((N*T,1))
m5 = data.month_5.values.reshape((N*T,1))
m6 = data.month_6.values.reshape((N*T,1))
m7 = data.month_7.values.reshape((N*T,1))
m8 = data.month_8.values.reshape((N*T,1))
x = np.hstack([t_3,t_2,t,t1,t2,t3,m5,m6,m7,m8]) # no constant term for FE

label_y = ['Average Airbnb Price']
label_x = ['t-3','t-2','t','t+1','t+2','t+3','m5','m6','m7','m8']

In [12]:
# Create the transformation matrix
def demeaning_matrix(T):
    return np.eye(T) - np.tile(1/T,(T,T))
    
Q_T = demeaning_matrix(T)

# Transforming data
y_demean = lm.perm(Q_T,y)
x_demean = lm.perm(Q_T,x)

# Estimation
fe_result_2 = lm.estimate(y_demean, x_demean, 'fe', T = T, robust = True)

lm.print_table([label_y, label_x], fe_result_2)

Results
Dependent variable: ['Average Airbnb Price']

          Beta        Se    t-values
---  ---------  --------  ----------
t-3   -6.87189   9.55247   -0.719384
t-2  -11.686     8.04325   -1.45289
t    394.17     59.4036     6.63545
t+1   19.6421   16.1363     1.21726
t+2   98.2683   52.3837     1.87593
t+3   99.6972   54.8283     1.81835
m5    78.6483   15.896      4.94768
m6    48.9428   31.6886     1.54449
m7    17.8776   43.6779     0.409305
m8    46.4423   36.6286     1.26792
R² = 0.547
σ² = 9638.178


In [13]:
# Add two empty rows to 'b_hat', 'se', and 't_values' arrays
fe_result_2_tab = fe_result_2

empty_row = np.full((1, 1), np.nan)

fe_result_2_tab['b_hat'] = np.vstack((empty_row, fe_result_2['b_hat']))
fe_result_2_tab['se'] = np.vstack((empty_row, fe_result_2['se']))
fe_result_2_tab['t_values'] = np.vstack((empty_row, fe_result_2['t_values']))

# Overview of estimates and robust standard errors

In [14]:
# Collecting all estimated parameters
est_params = np.concatenate((no_fe_result['b_hat'],fe_result_1_tab['b_hat'],fe_result_2_tab['b_hat']), axis = 1)

# Collecting all estimated std errors
est_se = np.concatenate((no_fe_result['se'],fe_result_1_tab['se'],fe_result_2_tab['se']), axis = 1)

# Setting up comparison table
est_comp = np.row_stack((est_params[0,:], est_se[0,:], 
est_params[1,:], est_se[1,:],
est_params[2,:], est_se[2,:], 
est_params[3,:], est_se[3,:],
est_params[4,:], est_se[4,:], 
est_params[5,:], est_se[5,:],
est_params[6,:], est_se[6,:],)
)

model_headers = ["(1)","(2)","(3)"]

row_names = np.array([["cons","","t-3","","t-2","","t","","t+1","","t+2","","t+3",""]])

est_comp = np.concatenate((row_names.T, est_comp), axis = 1)

est_table = tabulate(est_comp, model_headers, floatfmt= '.2f')

In [15]:
est_table = tabulate(est_comp, model_headers, floatfmt= '.2f')

In [16]:
headers = ["", "(1)", "(2)", "(3)"]

# Generate LaTeX code
latex_table = tabulate(est_comp, headers=headers, tablefmt="latex_raw", colalign=("center", "center", "center", "center"),floatfmt=".2f")

latex_table = latex_table.replace("nan", ".")

print(latex_table)

\begin{tabular}{cccc}
\hline
      &   (1)   &  (2)   &  (3)   \\
\hline
 cons & 1722.51 &  .   &  .   \\
      & 254.58  &  .   &  .   \\
 t-3  & -61.29  &  .   & -6.87  \\
      & 142.78  &  .   &  9.55  \\
 t-2  & -37.93  & -10.23 & -11.69 \\
      & 136.16  &  8.96  &  8.04  \\
  t   & 396.95  & 412.27 & 394.17 \\
      & 131.08  & 58.94  & 59.40  \\
 t+1  &  30.54  & 42.86  & 19.64  \\
      & 149.34  & 20.62  & 16.14  \\
 t+2  & 117.29  & 126.60 & 98.27  \\
      & 164.61  & 53.79  & 52.38  \\
 t+3  & 130.24  & 133.34 & 99.70  \\
      & 183.97  & 61.87  & 54.83  \\
\hline
\end{tabular}


# Checking significance level of estimates

In [18]:
sig_no_fe_result = lm.significance(no_fe_result['b_hat'], no_fe_result['se'])
print(sig_no_fe_result)

sig_fe_result_1 = lm.significance(fe_result_1['b_hat'], fe_result_1['se'])
print(sig_fe_result_1)

sig_fe_result_2 = lm.significance(fe_result_2['b_hat'], fe_result_2['se'])
print(sig_fe_result_2)

[[10]
 [ 0]
 [ 0]
 [10]
 [ 0]
 [ 0]
 [ 0]
 [ 0]
 [ 0]
 [ 0]
 [ 0]]
[[ 0]
 [ 0]
 [ 0]
 [10]
 [10]
 [10]
 [10]
 [10]
 [ 0]
 [ 0]
 [ 0]]
[[ 0]
 [ 0]
 [ 0]
 [10]
 [ 0]
 [10]
 [10]
 [10]
 [ 0]
 [ 0]
 [ 0]]
