# Load and format data into tables
This code finds data from previous runs of code and formats it nicely.

In [1]:
import os
import numpy as np
import pandas as pd

In [2]:
version = "version15/"
subsubfolder_labels = {
    "constsust_20": "Neutral rated 20", 
    "constsust_34": "Neutral rated 34", 
    "greencred_20": "Green rated 20",
    "greencred_34": "Green rated 34",
    "fossil_20": "Fossil 20", 
    "fossil_34": "Fossil 34", 
    "fossil_100": "Fossil 100",
    "renewable_20": "Renewable 20",
    "renewable_all": "Renewable 34",
}
results_dir = "./output/"
afterlength = 0
kstestfolder = f"before0_after{afterlength}_normNone"
olstestfolder = f"before0_after{afterlength}_normconstsust_34_cleaned"
kstestfilename = "Kolmogorov_Smirnoff_test_results.csv"
olsresultsfile = "OLSsummary.txt"
olsresultsSqfile = "OLSsummaryDailyVar.txt"

In [3]:
# Define some functions required to read the COP values from the OLS regression table


def extract_cop_value(file_path):
    with open(file_path, 'r') as file:
        lines = file.readlines()
    for line in lines:
        if line.strip().startswith("COP"):
            # Split the line to find the COP value
            parts = line.split()
            if len(parts) > 1:
                # Reformat the percentage for convenience
                parts = [float(x) for x in parts[1:]]
                parts[4] = np.round(parts[4], 3)
                return parts
    
    return None  # Return None if "COP" is not found in the file

In [4]:
for subfolder in [""]:
    results = []
    results_sup = []
    for key, val in subsubfolder_labels.items():
        tempolstestfolder =  olstestfolder.replace("constsust_34", "None") if ("constsust" in key) else olstestfolder
        subresult = pd.read_csv(
            os.path.join(results_dir, version, subfolder, key, kstestfolder, kstestfilename)
        )
        subresult = subresult.round(3)
        subOLSresult = extract_cop_value(
            os.path.join(results_dir, version, subfolder, key, tempolstestfolder, olsresultsfile)
        )
        subOLSsqresult = extract_cop_value(
            os.path.join(results_dir, version, subfolder, key, tempolstestfolder, olsresultsSqfile)
        )

        results.append(
            pd.Series({
                "Portfolio": val,
                "KS p-val for fractional change": f'{subresult.loc[subresult["Test"]=="KSDiff", "pval"].iloc[0]:.3f}',
                "t test sign for fractional change": f'({"+" if (subresult.loc[subresult["Test"]=="t-Diff", "testStat"].iloc[0] > 0) else "-"})',
                "KS p-val for gSD change": f'{subresult.loc[subresult["Test"]=="KSgeoSD", "pval"].iloc[0]:.3f}',
                "t test sign for gSD change": f'({"+" if (subresult.loc[subresult["Test"]=="t-geoSD", "testStat"].iloc[0] > 0) else "-"})', 
                "Sign and p-val for linear diff model COP term": f'{"(+) " if (subOLSresult[0] > 0) else "(-) "}{subOLSresult[3]:.3f}',
                "Sign and p-val for variability model COP term": f'{"(+) " if (subOLSsqresult[0] > 0) else "(-) "}{subOLSsqresult[3]:.3f}',
            })
        )
        results_sup.append(
            pd.Series({
                "Portfolio": val,
                "t test for fractional change": f'({"+" if (subresult.loc[subresult["Test"]=="t-Diff", "testStat"].iloc[0] > 0) else "-"}) {subresult.loc[subresult["Test"]=="t-Diff", "pval"].iloc[0]:.3f}',
                "Truncated t test for fractional change": f'({"+" if (subresult.loc[subresult["Test"]=="t-Diff-truncated", "testStat"].iloc[0] > 0) else "-"}) {subresult.loc[subresult["Test"]=="t-Diff-truncated", "pval"].iloc[0]:.3f}',
                "t test for gSD change": f'({"+" if (subresult.loc[subresult["Test"]=="t-geoSD", "testStat"].iloc[0] > 0) else "-"}) {subresult.loc[subresult["Test"]=="t-geoSD", "pval"].iloc[0]:.3f}', 
                "Truncated t test for gSD change": f'({"+" if (subresult.loc[subresult["Test"]=="t-geoSD-truncated", "testStat"].iloc[0] > 0) else "-"}) {subresult.loc[subresult["Test"]=="t-geoSD-truncated", "pval"].iloc[0]:.3f}', 
            })
        )
    results = pd.DataFrame(results)
    results.to_csv(os.path.join(results_dir, version, subfolder, "ResultsSummaryTable.csv"))
    results_sup = pd.DataFrame(results_sup)
    results_sup.to_csv(
        os.path.join(results_dir, version, subfolder, "ResultsSummarySupplementaryTable.csv")
    )

In [5]:
os.path.join(results_dir, version, subfolder, key, tempolstestfolder, olsresultsSqfile)

'./output/version15/renewable_all\\before0_after0_normconstsust_34_cleaned\\OLSsummaryDailyVar.txt'

In [6]:
results

Unnamed: 0,Portfolio,KS p-val for fractional change,t test sign for fractional change,KS p-val for gSD change,t test sign for gSD change,Sign and p-val for linear diff model COP term,Sign and p-val for variability model COP term
0,Neutral rated 20,0.433,(+),0.689,(-),(+) 0.343,(+) 0.002
1,Neutral rated 34,0.39,(+),0.956,(-),(+) 0.482,(+) 0.001
2,Green rated 20,0.001,(+),0.091,(+),(+) 0.000,(+) 0.179
3,Green rated 34,0.0,(+),0.42,(+),(+) 0.000,(+) 0.311
4,Fossil 20,0.291,(-),0.054,(+),(-) 0.031,(+) 0.607
5,Fossil 34,0.404,(-),0.044,(+),(-) 0.016,(+) 0.177
6,Fossil 100,0.155,(-),0.004,(+),(-) 0.000,(+) 0.038
7,Renewable 20,0.339,(-),0.059,(+),(-) 0.468,(+) 0.205
8,Renewable 34,0.562,(-),0.003,(+),(-) 0.311,(+) 0.024


In [7]:
results_sup

Unnamed: 0,Portfolio,t test for fractional change,Truncated t test for fractional change,t test for gSD change,Truncated t test for gSD change
0,Neutral rated 20,(+) 0.350,(+) 0.225,(-) 0.561,(+) 0.721
1,Neutral rated 34,(+) 0.279,(+) 0.160,(-) 0.509,(+) 0.930
2,Green rated 20,(+) 0.000,(+) 0.000,(+) 0.051,(+) 0.236
3,Green rated 34,(+) 0.000,(+) 0.000,(+) 0.091,(+) 0.275
4,Fossil 20,(-) 0.236,(-) 0.233,(+) 0.437,(+) 0.261
5,Fossil 34,(-) 0.208,(-) 0.117,(+) 0.242,(+) 0.123
6,Fossil 100,(-) 0.340,(-) 0.009,(+) 0.067,(+) 0.006
7,Renewable 20,(-) 0.448,(-) 0.456,(+) 0.643,(+) 0.184
8,Renewable 34,(-) 0.424,(-) 0.190,(+) 0.005,(+) 0.116
