In [1]:
'''

This notebook is used to generate the RMSE tables and plots.

Figures 3, 4, 5, 6, 7, B.4, and B.5
Tables C.1, C.2, C.3, C.4, C.5, C.6, C.7, C.8

To exactly reproduce the figures and tables ensure that the RMSE Files folder is set in the same directory as this notebook.

'''

'\n\nThis notebook is used to generate the RMSE tables and plots.\n\nFigures 3, 4, 5, 6, 7, B.4, and B.5\nTables C.1, C.2, C.3, C.4, C.5, C.6, C.7, C.8\n\nTo exactly reproduce the figures and tables ensure that the RMSE Files folder is set in the same directory as this notebook.\n\n'

In [2]:
import pickle
import Graph_MSE as plot
import pandas as pd
import matplotlib.pyplot as plt

Graph MSE with Linear CATE

In [3]:
#Create a list containing the .pkl file names
mse_files_linear = ['mse_20_low_features_linear', 'mse_30_low_features_linear', 'mse_40_low_features_linear', 'mse_50_low_features_linear',
                    'mse_20_medium_features_linear', 'mse_30_medium_features_linear', 'mse_40_medium_features_linear', 'mse_50_medium_features_linear',
                    'mse_20_high_features_linear', 'mse_30_high_features_linear', 'mse_40_high_features_linear', 'mse_50_high_features_linear'
                    ]


In [4]:
#Load the .pkl files and create df dictionary with all values for the linear setting
mse_dict_linear = {}
for file_name in mse_files_linear:
    with open(f'RMSE Files/{file_name}.pkl', 'rb') as file:
        globals()[file_name] = pickle.load(file)

    key = f'{file_name}'
    mse_dict_linear[key] = globals()[file_name]



In [5]:
#Plot RMSE and the relationship with correlation
plot.plot_rmse_corr(mse_dict_linear)

In [6]:
#Plot RMSE with OLS, linear
plot.plot_rmse_analysis_test(mse_dict_linear, analysis_type='Linear', data='Test', estimators=['OLS', 'T-Learner', 'GRF', 'CF DML'])
plot.plot_rmse_analysis_test(mse_dict_linear, analysis_type='Linear', data='Train', estimators=['OLS', 'T-Learner', 'GRF', 'CF DML'])


In [7]:
#Plot RMSE without OLS, linear
plot.plot_rmse_analysis_test(mse_dict_linear, analysis_type='Linear', data='Test', estimators=['T-Learner', 'GRF', 'CF DML'])
plot.plot_rmse_analysis_test(mse_dict_linear, analysis_type='Linear', data='Train', estimators=['T-Learner', 'GRF', 'CF DML'])

Graph MSE with Non-Linear CATE

In [8]:
#Create a list containing the .pkl file names
mse_files_quadratic = ['mse_20_low_features_quadratic', 'mse_30_low_features_quadratic', 'mse_40_low_features_quadratic', 'mse_50_low_features_quadratic',
                       'mse_20_medium_features_quadratic', 'mse_30_medium_features_quadratic', 'mse_40_medium_features_quadratic', 'mse_50_medium_features_quadratic',
                       'mse_20_high_features_quadratic', 'mse_30_high_features_quadratic', 'mse_40_high_features_quadratic', 'mse_50_high_features_quadratic'
                      ]

In [9]:
#Load the .pkl files and create df dictionary with all values for the non-linear setting
mse_dict_quadratic = {}
for file_name in mse_files_quadratic:
    with open(f'RMSE Files/{file_name}.pkl', 'rb') as file:
        globals()[file_name] = pickle.load(file)

    key = f'{file_name}'
    mse_dict_quadratic[key] = globals()[file_name]

In [10]:
#Plot RMSE with OLS, quadratic
plot.plot_rmse_analysis_test(mse_dict_quadratic, analysis_type='Quadratic', data='Test')
plot.plot_rmse_analysis_test(mse_dict_quadratic, analysis_type='Quadratic', data='Train')

In [11]:
#Plot RMSE Without OLS, quadratic
plot.plot_rmse_analysis_test(mse_dict_quadratic, analysis_type='Quadratic', data='Test', estimators=['T-Learner', 'GRF', 'CF DML'])
plot.plot_rmse_analysis_test(mse_dict_quadratic, analysis_type='Quadratic', data='Train', estimators=['T-Learner', 'GRF', 'CF DML'])


Get RMSE Tables

In [12]:
#Create the tables with the RMSE values
def create_rmse_table(name, mse_dict):

    setup_name = name
    dfs = []

    for model_name, df in mse_dict[setup_name].items():
        df = df.rename(columns={'RMSE Test': f'{model_name} RMSE Test', 'RMSE Train': f'{model_name} RMSE Train'})
        dfs.append(df)

    merged_df = dfs[0]
    for df in dfs[1:]:
        merged_df = pd.merge(merged_df, df, on='n')

    merged_df.to_excel(f'{setup_name}_Table.xlsx', index=False)
    latex_table = merged_df.to_latex(index=False, float_format="%.3f", caption="RMSE Comparison Across Models", label="tab:rmse_comparison")
    
    with open(f'{setup_name}_Table.tex', 'w') as f:
        f.write(latex_table)

    return merged_df


for data_name in mse_dict_linear.keys():
    create_rmse_table(data_name, mse_dict_linear)

for data_name in mse_dict_quadratic.keys():
    create_rmse_table(data_name, mse_dict_quadratic)