In [None]:
import os
os.chdir("/home/youhui/Git/Synth")

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec

from utils.synth import *
from utils.competitors import *
from datetime import datetime

import matplotlib as mpl

mpl.rc('text', usetex=True)
mpl.rcParams['text.latex.preamble'] = r'\usepackage{amsmath}'
plt.rcParams.update({
    "font.family": "serif",
})

methods = [
    'SC',
    'Lasso', 
    'Ridge', 
    'Elastic Net',
    r'$L_{\infty}$', 
    r'$L_1 + L_{\infty}$'
] 

# colors = [
#     '#E69F00',  # Orange
#     '#56B4E9',  # Sky Blue
#     '#CC79A7',  # Reddish Purple
#     '#F0E442',  # Yellow
#     '#0072B2',  # Blue
#     '#D55E00',  # Vermilion
#     '#009E73',  # Bluish Green
#     '#999999'
# ]

colors = [
    '#E69F00',  # Orange
    '#E69F00',  # Orange
    '#0000FF',  # Blue
    '#0000FF',  # Blue
    '#009E73',  # Green
    '#009E73',  # Green
]
line_styles = [
    '-',    # Solid
    '--',   # Dashed
    # '-.',   # Dash-dot
]

In [None]:
# Load the sho_sector_info.csv file
sho_sector_df = pd.read_csv('SHO/sho_sector_info.csv')

# Filter for a certain sector
sector_df = sho_sector_df[sho_sector_df['sector'] == 'Consumer Defensive']

# Create lists for companies in the experiment and control groups
sho1 = sector_df[sector_df['sho_pilot'] == 1]['tsymbol'].tolist()
sho0 = sector_df[sector_df['sho_pilot'] == 0]['tsymbol'].tolist()

# Print the lists for verification
print(f"Companies in experiment group (sho_pilot = 1):", sho1)
print(f"Companies in control group (sho_pilot = 0):", sho0)

In [None]:
START_TIME = '2005-01-01'
INTERVENTION_TIME = '2005-05-02'
STOP_TIME = '2005-05-31'

START_TIME = datetime.strptime(START_TIME, '%Y-%m-%d')
INTERVENTION_TIME = datetime.strptime(INTERVENTION_TIME, '%Y-%m-%d')
STOP_TIME = datetime.strptime(STOP_TIME, '%Y-%m-%d')

In [None]:
df = pd.read_csv('SHO/Consumer_Defensive_data.csv')
df.set_index('Date', inplace=True)
df.index = pd.to_datetime(df.index)
df = df.resample('W').sum()  # Sum the daily prices to get weekly prices
Y_pre = df.loc[df.index < INTERVENTION_TIME]
Y_post = df.loc[df.index >= INTERVENTION_TIME]

In [None]:
control_companies = sho0
Y1_pre = Y_pre[['COST']]
Y0_pre = Y_pre.loc[:, sho0]
Y1_post = Y_post[['COST']]
Y0_post = Y_post.loc[:, sho0]

Y1_pre = Y1_pre.to_numpy().astype('float64')
Y0_pre = Y0_pre.to_numpy().astype('float64')
Y1_post = Y1_post.to_numpy().astype('float64')
Y0_post = Y0_post.to_numpy().astype('float64')

In [None]:
T0, J = Y0_pre.shape
T1 = Y0_post.shape[0]
# Print T0, T1, and J
print("T0:", T0, "T1:", T1, "J:", J)
std = False
n_folds = 5
fixed_alpha=None

## looking for the best alpha and K's.
alpha_inf, lam_inf = param_selector(Y1_pre[:,0], Y0_pre, method='inf', std=std, n_folds=n_folds)
alpha_l1, lam_l1 = param_selector(Y1_pre[:,0], Y0_pre, method='l1', std=std, n_folds=n_folds)
alpha_l2, lam_l2 = param_selector(Y1_pre[:,0], Y0_pre, method='l2', std=std, n_folds=n_folds)
alpha_l1_inf, lam_l1_inf = param_selector(Y1_pre[:,0], Y0_pre, method='l1-inf', fixed_alpha=fixed_alpha, std=std, n_folds=n_folds)
alpha_l1_l2, lam_l1_l2 = param_selector(Y1_pre[:,0], Y0_pre, method='l1-l2', fixed_alpha=fixed_alpha, std=std, n_folds=n_folds)
# alpha_inf, lam_inf = param_selector(Y1_pre[:,0], Y0_pre, method='inf', test_size=0.5)
# alpha_l1, lam_l1 = param_selector(Y1_pre[:,0], Y0_pre, method='l1', test_size=0.5)
# alpha_l2, lam_l2 = param_selector(Y1_pre[:,0], Y0_pre, method='l2', test_size=0.5)
# alpha_l1_inf, lam_l1_inf = param_selector(Y1_pre[:,0], Y0_pre, method='l1-inf', test_size=0.5)
# alpha_l1_l2, lam_l1_l2 = param_selector(Y1_pre[:,0], Y0_pre, method='l1-l2', test_size=0.5)

# Print statements
print("Inf method - Lambda:", lam_inf)
print("L1 method - Lambda:", lam_l1)
print("L2 method - Lambda:", lam_l2)
print("L1-Inf method - Alpha:", alpha_l1_inf, "Lambda:", lam_l1_inf)
print("L1-L2 method - Alpha:", alpha_l1_l2, "Lambda:", lam_l1_l2)

In [None]:
w_inf = our(Y1_pre, Y0_pre, alpha_inf, lam_inf, 'inf', std=std)
w_l1 = our(Y1_pre, Y0_pre, alpha_l1, lam_l1, 'l1', std=std)
w_l2 = our(Y1_pre, Y0_pre, alpha_l2, lam_l2, 'l2', std=std)
w_l1_inf = our(Y1_pre, Y0_pre, alpha_l1_inf, lam_l1_inf, 'l1-inf', std=std)
w_l1_l2 = our(Y1_pre, Y0_pre, alpha_l1_l2, lam_l1_l2, 'l1-l2', std=std)

## synthetic control
w_sc = sc(Y1_pre, Y0_pre)

In [None]:
Y0_post_plus = np.hstack([np.ones((Y1_post.shape[0], 1)), Y0_post])

tau_sc = Y1_post - Y0_post @ w_sc
tau_inf = Y1_post - Y0_post_plus @ w_inf
tau_l1 = Y1_post - Y0_post_plus @ w_l1
tau_l2 = Y1_post - Y0_post_plus @ w_l2
tau_l1_inf = Y1_post - Y0_post_plus @ w_l1_inf
tau_l1_l2 = Y1_post - Y0_post_plus @ w_l1_l2

In [None]:
W = np.array([w_sc, w_l1[1:], w_l2[1:], w_l1_l2[1:], w_inf[1:], w_l1_inf[1:]])
mu = np.array([0, w_l1[0], w_l2[0], w_l1_l2[0], w_inf[0], w_l1_inf[0]])
Tau = np.array([tau_sc, tau_l1, tau_l2, tau_l1_l2, tau_inf, tau_l1_inf])

In [None]:
SC_outcomes = np.vstack([Y0_pre, Y0_post]).dot(W.T) + mu.reshape(1, len(methods))
True_outcomes = np.vstack([Y1_pre, Y1_post]).flatten()
treatment_effect = True_outcomes[T0:, np.newaxis] - SC_outcomes[T0:, :]
average_treatment_effect = np.cumsum(treatment_effect, axis=0) / np.arange(1, treatment_effect.shape[0] + 1)[:, np.newaxis]

fig = plt.figure(figsize=(6.5,4.5)) 
plt.plot(df.index.to_numpy(), True_outcomes, 'k-', label="True Price");
for i in range(len(methods)):
    plt.plot(df.index.to_numpy(), SC_outcomes[:, i], color=colors[i], linestyle=line_styles[i%2], label=methods[i])

plt.ylabel('Stock Price', fontsize=16)
plt.xlabel('Date', fontsize=16)

plt.axvline(INTERVENTION_TIME)
plt.text(x=INTERVENTION_TIME, 
         y=max(True_outcomes)-0.2,  # Adjust this based on where you want the text vertically
         fontsize=14,
         s='SHO commencement',
         ha='right',  # Align the text horizontally (can also use 'left' or 'right')
         va='bottom')  # Align the text vertically (can also use 'top' or 'center')
plt.legend(loc='lower left', fontsize=14)
plt.xticks(rotation=45)
plt.grid()
plt.tight_layout()
plt.show()
fig.savefig("SHO/sho.png")
plt.close()

In [None]:
# Create a figure with 1 row and 3 columns for side-by-side plots with custom spacing
fig = plt.figure(figsize=(14, 6))
# gs = gridspec.GridSpec(1, 3, width_ratios=[1.5, 1.5, 1])  
gs = gridspec.GridSpec(1, 2, width_ratios=[1.5, 1])  

# Plot the first graph (Tobacco/tobacco.png)
# ax0 = fig.add_subplot(gs[0])
# ax0.plot(df['date'], True_outcomes, 'b-', label="True Price")
# for i in range(len(methods)):
#     # ax0.plot(returns_df.index.to_numpy(), SC_outcomes[:, i], color=colors[i], linestyle=line_styles[i % 3], label=methods[i])
#     ax0.plot(df['date'], SC_outcomes[:, i], color=colors[i], linestyle=line_styles[i % 3], label=methods[i])
# ax0.set_ylabel('Stock Price', fontsize=16)
# ax0.set_xlabel('Date', fontsize=16)
# ax0.legend(loc='lower left', fontsize=14)
# ax0.axvline(INTERVENTION_TIME)
# ax0.text(x=INTERVENTION_TIME, 
#              y=max(True_outcomes),  # Adjust this based on where you want the text vertically
#              s='SHO commencement',
#              ha='center',  # Align the text horizontally (can also use 'left' or 'right')
#              va='bottom',  # Align the text vertically (can also use 'top' or 'center')
#              fontsize=15)  
# ax0.grid()
# ax0.tick_params(axis='both', labelsize=12)  # Adjust tick label size
# ax0.tick_params(axis='x', rotation=45)
# plt.tight_layout()

# Plot the second graph (Tobacco/tobacco_trt.png)
ax1 = fig.add_subplot(gs[0])
for i in range(len(methods)):
    ax1.plot(df.index.to_numpy()[T0:], 
             treatment_effect[:, i], 
             color=colors[i], 
             linestyle=line_styles[i % 2], 
             label=methods[i], 
             linewidth=1.5)
ax1.axhline(0, color='black', linestyle='-', linewidth=1.0)  # Add a horizontal line at y=0
ax1.set_ylabel('Policy Effect', fontsize=16)
ax1.set_xlabel('Date', fontsize=16)
ax1.legend(loc='upper right', fontsize=14)
ax1.grid()
ax1.tick_params(axis='both', labelsize=14)  # Adjust tick label size
ax1.tick_params(axis='x', rotation=45)
plt.tight_layout()

# Plot the third graph
ax2 = fig.add_subplot(gs[1])
ax2.barh(methods[::-1], average_treatment_effect[-1][::-1], 
             color=colors[:len(methods)][::-1])
ax2.set_xlabel('Average Treatment Effect', fontsize=16)
ax2.tick_params(axis='both', labelsize=14)
# plt.xticks(rotation=45)
plt.tight_layout()

# Save the combined figure
plt.show()
fig.savefig('SHO/sho_combined.png')
plt.close()

In [None]:
# Set a threshold for weights
threshold = 1e-3

# Apply threshold to weights
w_inf[np.abs(w_inf) < threshold] = 0
w_l1[np.abs(w_l1) < threshold] = 0
w_l2[np.abs(w_l2) < threshold] = 0
w_l1_inf[np.abs(w_l1_inf) < threshold] = 0
w_l1_l2[np.abs(w_l1_l2) < threshold] = 0

W = np.array([w_sc, w_l1[1:], w_l2[1:], w_l1_l2[1:], w_inf[1:], w_l1_inf[1:]])

# Assuming you have a DataFrame `df` with the states as the index
# and each column representing a different method's weights

# Example DataFrame creation
data = W.T
df2 = pd.DataFrame(data, index=control_companies, columns=methods)
df2 = df2.sort_index(ascending=False)

# Create subplots
ncols = len(methods)  # Change to use all methods in one row
fig, axes = plt.subplots(nrows=1, ncols=ncols, figsize=(10, 6), sharey=True)  # Adjust figsize for one row

# Set a title for the entire figure with larger font
# fig.suptitle('Original: Weights', fontsize=20)

# Calculate the maximum absolute value across all methods
max_abs_value = df2.abs().max().max()

# Loop over the methods and plot each as a horizontal bar plot
for idx, method in enumerate(methods):
    i, j = 0, idx  # Adjust to use a single row
    axes[j].set_xlabel(method, fontsize=16)
    axes[j].set_xlim([-max_abs_value, max_abs_value])  # Set x-axis limits using the global max
    axes[j].grid(True, linestyle='-', which='major', color='lightgrey', alpha=0.5)
    axes[j].barh(df2.index, df2[method])
    axes[j].set_yticks(np.arange(len(df2.index)))
    axes[j].set_yticklabels(df2.index, fontsize=10)
    # axes[i,j].legend(fontsize=14)

# Set the y-axis labels only on the first subplot with larger font
axes[0].set_ylabel('Companies', fontsize=17)

# Display the plot
plt.tight_layout(rect=[0, 0, 1, 0.95])  # Adjust the rect to make space for the suptitle
# plt.show()
fig.savefig("SHO/sho_weights.png")

In [None]:
# ## looking for the best alpha and K's.

# alphas = np.array([np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, alpha_l1_l2, alpha_l1_inf])
# Ks = np.array([np.nan, lam_l1, lam_l2, lam_inf, lam_l1_l2, lam_l1_inf])
# df = pd.DataFrame(np.hstack([SC_outcomes[T0:,:].T, alphas.reshape(-1, 1), Ks.reshape(-1, 1)]), 
#                   index=methods, columns=np.append(np.arange(INTERVENTION_TIME, STOP_TIME), ['alpha', 'K']))
# pd.set_option('display.width', 120)  # Set this as per your screen width
# pd.set_option('display.max_columns', None)  # None means unlimited
# # Display the DataFrame
# styled_df = df.style.format('{:,.6f}') \
#                     .set_properties(**{'border': '1px solid black', 'text-align': 'right'}) \
#                     .set_table_styles([{'selector': 'th', 
#                                         'props': [('background-color', 'lightgray'), 
#                                                   ('border', '2px solid black'),
#                                                   ('text-align', 'center')]}],
#                                       overwrite=False)
# styled_df