In [1]:
import numpy as np
from numpy import linalg as la
from scipy.stats import chi2
from tabulate import tabulate
import LinearModels as lm
import pandas as pd
import matplotlib.pyplot as plt
%load_ext autoreload
%autoreload 2



In [2]:
# Import data
excel_file_path = './TaylorSwift.xlsx'

# Read the second sheet (index 1) from the Excel file
sheet_name = pd.ExcelFile(excel_file_path).sheet_names[1]
data = pd.read_excel(excel_file_path, sheet_name, header=1)

# Drop the first column
data = data.drop(data.columns[0], axis=1)

data

Unnamed: 0,Cities,Day_pre,Day_after,Treat,Concert,Weekend,Avg_airbnb_price,t_3,t_2,t_1,...,t1,t2,t3,Month,month_4,month_5,month_6,month_7,month_8,Obs
0,Stockholm,2024-04-26,2024-04-28,1,0,1,1066,1,0,0,...,0,0,0,4,1,0,0,0,0,1
1,Stockholm,2024-05-03,2024-05-05,1,0,1,1116,0,1,0,...,0,0,0,5,0,1,0,0,0,2
2,Stockholm,2024-05-10,2024-05-12,1,0,1,1188,0,0,1,...,0,0,0,5,0,1,0,0,0,3
3,Stockholm,2024-05-17,2024-05-19,1,1,1,1647,0,0,0,...,0,0,0,5,0,1,0,0,0,4
4,Stockholm,2024-05-24,2024-05-26,1,0,1,1153,0,0,0,...,1,0,0,5,0,1,0,0,0,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
177,Firenze,2024-08-01,2024-08-03,0,0,1,1724,0,0,0,...,0,0,0,8,0,0,0,0,1,3
178,Firenze,2024-08-08,2024-08-10,0,1,1,1713,0,0,0,...,0,0,0,8,0,0,0,0,1,4
179,Firenze,2024-08-15,2024-08-17,0,0,1,1730,0,0,0,...,0,0,0,8,0,0,0,0,1,5
180,Firenze,2024-08-22,2024-08-24,0,0,1,1719,0,0,0,...,0,0,0,8,0,0,0,0,1,6


# Estimation 1: No fixed effects

In [3]:
data1 = data[data['Treat'] == 1]

In [4]:
# Defining dimensions
N = data1.Cities.unique().size
T = data1.Obs.unique().size
assert data1.shape[0] == N*T, f'Error: data is not a balanced panel'
print(f'Data has N={N} and T={T}')

Data has N=13 and T=7


In [5]:
# Setting up data
y = data1.Avg_airbnb_price.values.reshape((N*T,1))

ones = np.ones((N*T,1))
t_3 = data1.t_3.values.reshape((N*T,1))
t_2 = data1.t_2.values.reshape((N*T,1))
t_1 = data1.t_1.values.reshape((N*T,1))
t = data1.t.values.reshape((N*T,1))
t1 = data1.t1.values.reshape((N*T,1))
t2 = data1.t2.values.reshape((N*T,1))
t3 = data1.t3.values.reshape((N*T,1))
m4 = data1.month_4.values.reshape((N*T,1))
m5 = data1.month_5.values.reshape((N*T,1))
m6 = data1.month_6.values.reshape((N*T,1))
m7 = data1.month_7.values.reshape((N*T,1))
m8 = data1.month_8.values.reshape((N*T,1))

x = np.hstack([ones,t_3,t_2,t,t1,t2,t3,m5,m6,m7,m8]) 

label_y = ['Average Airbnb Price']
label_x = ['cons','t-3','t-2','t','t+1','t+2','t+3','m5','m6','m7','m8']

In [6]:
no_fe_result = lm.estimate(y, x, '', T = T, robust = True)

lm.print_table([label_y, label_x], no_fe_result)

Results
Dependent variable: ['Average Airbnb Price']

           Beta       Se    t-values
----  ---------  -------  ----------
cons  1641.25    243.622    6.73687
t-3    -77.0505  155.676   -0.494942
t-2    -40.6422  148.582   -0.273533
t      601.676   272.022    2.21187
t+1    -58.2396  175.285   -0.332257
t+2    -41.5394  191.638   -0.21676
t+3   -126.811   236.898   -0.535299
m5    -395.628   224.1     -1.76541
m6    -341.37    249.101   -1.37041
m7    -402.05    221.487   -1.81523
m8     -48.4106  331.587   -0.145997
R² = 0.195
σ² = 312505.995


# Estimation 2: Fixed effects and leave out two pre-periods

In [7]:
# Setting up data
x = np.hstack([t_2,t,t1,t2,t3,m5,m6,m7,m8]) 

label_y = ['Average Airbnb Price']
label_x = ['t-2','t','t+1','t+2','t+3','m5','m6','m7','m8']

In [8]:
# Create the transformation matrix
def demeaning_matrix(T):
    return np.eye(T) - np.tile(1/T,(T,T))
    
Q_T = demeaning_matrix(T)

# Transforming data
y_demean = lm.perm(Q_T,y)
x_demean = lm.perm(Q_T,x)

# Estimation
fe_result_1 = lm.estimate(y_demean, x_demean, 'fe', T = T, robust = True)

lm.print_table([label_y, label_x], fe_result_1)

Results
Dependent variable: ['Average Airbnb Price']

           Beta        Se    t-values
---  ----------  --------  ----------
t-2     4.39443   20.8718    0.210544
t     569.105    233.76      2.43457
t+1  -108.035    110.894    -0.974221
t+2  -108.56     136.06     -0.797879
t+3  -220.819    173.316    -1.27408
m5    135.264     49.1075    2.75445
m6    286.54     193.741     1.47898
m7    268.249    235.872     1.13726
m8    748.795    444.306     1.68531
R² = 0.319
σ² = 173856.697


In [9]:
# Add two empty rows to 'b_hat', 'se', and 't_values' arrays
fe_result_1_tab = fe_result_1

empty_row = np.full((2, 1), np.nan)
fe_result_1_tab['b_hat'] = np.vstack((empty_row, fe_result_1['b_hat']))
fe_result_1_tab['se'] = np.vstack((empty_row, fe_result_1['se']))
fe_result_1_tab['t_values'] = np.vstack((empty_row, fe_result_1['t_values']))

# Estimation 3: Control group and fixed effects 

In [10]:
# Defining dimensions
N = data.Cities.unique().size
T = data.Obs.unique().size
assert data.shape[0] == N*T, f'Error: data is not a balanced panel'
print(f'Data has N={N} and T={T}')

Data has N=26 and T=7


In [11]:
# Setting up data
y = data.Avg_airbnb_price.values.reshape((N*T,1))

ones = np.ones((N*T,1))
t_3 = data.t_3.values.reshape((N*T,1))
t_2 = data.t_2.values.reshape((N*T,1))
t_1 = data.t_1.values.reshape((N*T,1))
t = data.t.values.reshape((N*T,1))
t1 = data.t1.values.reshape((N*T,1))
t2 = data.t2.values.reshape((N*T,1))
t3 = data.t3.values.reshape((N*T,1))
m4 = data.month_4.values.reshape((N*T,1))
m5 = data.month_5.values.reshape((N*T,1))
m6 = data.month_6.values.reshape((N*T,1))
m7 = data.month_7.values.reshape((N*T,1))
m8 = data.month_8.values.reshape((N*T,1))
x = np.hstack([t_3,t_2,t,t1,t2,t3,m5,m6,m7,m8]) # no constant term for FE

label_y = ['Average Airbnb Price']
label_x = ['t-3','t-2','t','t+1','t+2','t+3','m5','m6','m7','m8']

In [12]:
# Create the transformation matrix
def demeaning_matrix(T):
    return np.eye(T) - np.tile(1/T,(T,T))
    
Q_T = demeaning_matrix(T)

# Transforming data
y_demean = lm.perm(Q_T,y)
x_demean = lm.perm(Q_T,x)

# Estimation
fe_result_2 = lm.estimate(y_demean, x_demean, 'fe', T = T, robust = True)

lm.print_table([label_y, label_x], fe_result_2)

Results
Dependent variable: ['Average Airbnb Price']

          Beta        Se    t-values
---  ---------  --------  ----------
t-3  -18.3904    19.3778  -0.949046
t-2  -16.9591    18.8409  -0.900122
t    615.741    285.572    2.15617
t+1  -29.8182    38.7171  -0.770156
t+2    1.23833   54.3624   0.0227792
t+3  -58.0925    62.4983  -0.929505
m5    55.2876    24.4607   2.26026
m6    73.5184    46.4552   1.58257
m7    16.6232    63.2403   0.262858
m8   219.658    132.546    1.65723
R² = 0.281
σ² = 88366.241


In [13]:
# Add two empty rows to 'b_hat', 'se', and 't_values' arrays
fe_result_2_tab = fe_result_2

empty_row = np.full((1, 1), np.nan)

fe_result_2_tab['b_hat'] = np.vstack((empty_row, fe_result_2['b_hat']))
fe_result_2_tab['se'] = np.vstack((empty_row, fe_result_2['se']))
fe_result_2_tab['t_values'] = np.vstack((empty_row, fe_result_2['t_values']))

# Overview of estimates and robust standard errors

In [14]:
# Collecting all estimated parameters
est_params = np.concatenate((no_fe_result['b_hat'],fe_result_1_tab['b_hat'],fe_result_2_tab['b_hat']), axis = 1)

# Collecting all estimated std errors
est_se = np.concatenate((no_fe_result['se'],fe_result_1_tab['se'],fe_result_2_tab['se']), axis = 1)

# Setting up comparison table
est_comp = np.row_stack((est_params[0,:], est_se[0,:], 
est_params[1,:], est_se[1,:],
est_params[2,:], est_se[2,:], 
est_params[3,:], est_se[3,:],
est_params[4,:], est_se[4,:], 
est_params[5,:], est_se[5,:],
est_params[6,:], est_se[6,:],)
)

model_headers = ["(1)","(2)","(3)"]

row_names = np.array([["cons","","t-3","","t-2","","t","","t+1","","t+2","","t+3",""]])

est_comp = np.concatenate((row_names.T, est_comp), axis = 1)

est_table = tabulate(est_comp, model_headers, floatfmt= '.2f')

In [15]:
est_table = tabulate(est_comp, model_headers, floatfmt= '.2f')

In [16]:
headers = ["", "(1)", "(2)", "(3)"]

# Generate LaTeX code
latex_table = tabulate(est_comp, headers=headers, tablefmt="latex_raw", colalign=("center", "center", "center", "center"),floatfmt=".2f")

latex_table = latex_table.replace("nan", ".")

print(latex_table)

\begin{tabular}{cccc}
\hline
      &   (1)   &   (2)   &  (3)   \\
\hline
 cons & 1641.25 &   .   &  .   \\
      & 243.62  &   .   &  .   \\
 t-3  & -77.05  &   .   & -18.39 \\
      & 155.68  &   .   & 19.38  \\
 t-2  & -40.64  &  4.39   & -16.96 \\
      & 148.58  &  20.87  & 18.84  \\
  t   & 601.68  & 569.11  & 615.74 \\
      & 272.02  & 233.76  & 285.57 \\
 t+1  & -58.24  & -108.03 & -29.82 \\
      & 175.28  & 110.89  & 38.72  \\
 t+2  & -41.54  & -108.56 &  1.24  \\
      & 191.64  & 136.06  & 54.36  \\
 t+3  & -126.81 & -220.82 & -58.09 \\
      & 236.90  & 173.32  & 62.50  \\
\hline
\end{tabular}


In [18]:
no_fe_result['b_hat']

array([[1641.24769882],
       [ -77.05046261],
       [ -40.64217123],
       [ 601.67556102],
       [ -58.23960807],
       [ -41.53939254],
       [-126.81141191],
       [-395.62829655],
       [-341.3701519 ],
       [-402.04964335],
       [ -48.41058983]])

In [17]:
# Checking significance level of estimates
sig_no_fe_result = lm.significance(no_fe_result['b_hat'], no_fe_result['se'])
print(sig_no_fe_result)

sig_fe_result_1 = lm.significance(fe_result_1['b_hat'], fe_result_1['se'])
print(sig_fe_result_1)

sig_fe_result_2 = lm.significance(fe_result_2['b_hat'], fe_result_2['se'])
print(sig_fe_result_2)

[[10]
 [ 0]
 [ 0]
 [10]
 [ 0]
 [ 0]
 [ 0]
 [ 0]
 [ 0]
 [ 0]
 [ 0]]
[[ 0]
 [ 0]
 [ 0]
 [10]
 [ 0]
 [ 0]
 [ 0]
 [10]
 [ 0]
 [ 0]
 [10]]
[[ 0]
 [ 0]
 [ 0]
 [10]
 [ 0]
 [ 0]
 [ 0]
 [10]
 [ 0]
 [ 0]
 [10]]
