# Load and format data into tables
This code finds data from previous runs of code and formats it nicely.

In [1]:
import os
import numpy as np
import pandas as pd

In [2]:
# Code choices to decide what to make into tables. Add the subfolder here for e.g. OPEC analysis
version = "version18"
subfolderlist = [""]
# Name of folders to open
subsubfolder_labels = {
    "constsust_20": "Neutral rated 20", 
    "constsust_32": "Neutral rated 32", 
    "greencred_20": "Green rated 20",
    "greencred_32": "Green rated 32",
    "fossil_20": "Fossil 20", 
    "fossil_32": "Fossil 32", 
    "renewable_20": "Renewable 20",
    "renewable_all": "Renewable 32",
}
# Directory of results. 
results_dir = "./output/"
# May want to use results with different length factors for ranking methods and OLS methods. 
afterlength = 0
olsafterlength = 0

# File names shouldn't need editing
kstestfolder = f"before0_after{afterlength}_normconstsust_32_1"
olstestfolder = f"before0_after{olsafterlength}_normconstsust_32_cleaned"
kstestfilename = "Kolmogorov_Smirnoff_test_results.csv"
olsresultsfile = "OLSsummary.txt"
olsresultsSqfile = "OLSsummaryDailyVar.txt"
olsresultsCopYearfile = "OLSsummary_COPyear.txt"
olsresultsSqCopYearfile = "OLSsummaryDailyVar_COPyear.txt"
diffCOPYearTrendFile = "COPyearDiffTrend.csv"
varCOPYearTrendFile = "COPyearVarianceTrend.csv"


In [3]:
# Define some functions required to read the COP values from the OLS regression table
def extract_cop_value(file_path):
    with open(file_path, 'r') as file:
        lines = file.readlines()
    for line in lines:
        if line.strip().startswith("COP"):
            # Split the line to find the COP value
            parts = line.split()
            if len(parts) > 1:
                # Reformat the percentage for convenience
                parts = [float(x) for x in parts[1:]]
                parts[4] = np.round(parts[4], 3)
                return parts
    
    return None  # Return None if "COP" is not found in the file
# Define some functions required to read the COP values from the OLS regression table
def extract_cop_year_value(file_path):
    with open(file_path, 'r') as file:
        lines = file.readlines()
    for line in lines:
        if line.strip().startswith("COPnumbers"):
            # Split the line to find the COP value
            parts = line.split()
            if len(parts) > 1:
                # Reformat the percentage for convenience
                parts = [float(x) for x in parts[1:]]
                parts[4] = np.round(parts[4], 3)
                return parts
    
    return None  # Return None if "COP" is not found in the file

# Function to read the trend line experiment data
def extract_cop_year_trend(file_path):
    df = pd.read_csv(file_path, index_col=0)
    return df.loc["x1", :] 

In [4]:
subsubfolder_labels

{'constsust_20': 'Neutral rated 20',
 'constsust_32': 'Neutral rated 32',
 'greencred_20': 'Green rated 20',
 'greencred_32': 'Green rated 32',
 'fossil_20': 'Fossil 20',
 'fossil_32': 'Fossil 32',
 'renewable_20': 'Renewable 20',
 'renewable_all': 'Renewable 32'}

In [5]:
def convert_one_tailed_to_two(p):
    return 1-2 * abs(p-0.5)

In [6]:
for subfolder in subfolderlist:
    results = []
    results_sup = []
    for key, val in subsubfolder_labels.items():
        tempolstestfolder = olstestfolder.replace("constsust_32", "None") if ("constsust" in key) else olstestfolder
        tempkstestfolder = kstestfolder.replace("constsust_32", "None") if ("constsust" in key) else kstestfolder
        subresult = pd.read_csv(
            os.path.join(results_dir, version, subfolder, key, tempkstestfolder, kstestfilename)
        )
        if type(subresult["testStat"][0]) == type(""):
            subresult = subresult.iloc[:-1, :]
            subresult["testStat"] = pd.to_numeric(subresult["testStat"])
        subOLSresult = extract_cop_value(
            os.path.join(results_dir, version, subfolder, key, tempolstestfolder, olsresultsfile)
        )
        subOLSsqresult = extract_cop_value(
            os.path.join(results_dir, version, subfolder, key, tempolstestfolder, olsresultsSqfile)
        )
        subOLSresultCOPyearTerm = extract_cop_year_value(
            os.path.join(results_dir, version, subfolder, key, tempolstestfolder, olsresultsCopYearfile)
        )
        subOLSsqresultCOPyearTerm = extract_cop_year_value(
            os.path.join(results_dir, version, subfolder, key, tempolstestfolder, olsresultsSqCopYearfile)
        )
        diffPeriodCOPyearTrend = extract_cop_year_trend(
             os.path.join(results_dir, version, subfolder, key, tempkstestfolder, diffCOPYearTrendFile)
        )
        varPeriodCOPyearTrend = extract_cop_year_trend(
             os.path.join(results_dir, version, subfolder, key, tempkstestfolder, varCOPYearTrendFile)
        )
        

        results.append(
            pd.Series({
                "Portfolio": val,
                "Difference rank": f'{subresult.loc[subresult["Test"]=="RankTestDiff", "testStat"].iloc[0]:.3} ({convert_one_tailed_to_two(subresult.loc[subresult["Test"]=="RankTestDiff", "pval"].iloc[0]):.3f})', 
                "gSD rank": f'{subresult.loc[subresult["Test"]=="RankTestgeoSD", "testStat"].iloc[0]:.3f} ({convert_one_tailed_to_two(subresult.loc[subresult["Test"]=="RankTestgeoSD", "pval"].iloc[0]):.3f})', 
                "COP linear diff term": f'{subOLSresult[0]:.4f} ({subOLSresult[3]:.4f})',
                "COP linear range term": f'{subOLSsqresult[0]:.4f} ({subOLSsqresult[3]:.4f})',
            })
        )
        results_sup.append(
            pd.Series({
                "Portfolio": val,
                "COP period trend diff fit": f'{diffPeriodCOPyearTrend["BestEstimate"]:.4f} [{diffPeriodCOPyearTrend["LowerConf"]:.4f} - {diffPeriodCOPyearTrend["UpperConf"]:.4f}]',
                "COP period trend gSD fit": f'{diffPeriodCOPyearTrend["BestEstimate"]:.4f} [{diffPeriodCOPyearTrend["LowerConf"]:.4f} - {diffPeriodCOPyearTrend["UpperConf"]:.4f}]',
                "COP trend linear diff term": f'{subOLSresultCOPyearTerm[0]:.4f} ({subOLSresultCOPyearTerm[3]:.4f})',
                "COP trend linear range term": f'{subOLSsqresultCOPyearTerm[0]:.4f} ({subOLSsqresultCOPyearTerm[3]:.4f})',
                
            })
        )
    results = pd.DataFrame(results)
    results.to_csv(os.path.join(results_dir, version, subfolder, "ResultsSummaryTable.csv"))
    results_sup = pd.DataFrame(results_sup)
    results_sup.to_csv(
        os.path.join(results_dir, version, subfolder, "ResultsSummarySupplementaryTable.csv")
    )

In [7]:
results

Unnamed: 0,Portfolio,Difference rank,gSD rank,COP linear diff term,COP linear range term
0,Neutral rated 20,0.651 (0.723),0.302 (0.585),0.0002 (0.3530),0.0005 (0.0040)
1,Neutral rated 32,0.698 (0.631),0.419 (0.816),0.0002 (0.3060),0.0005 (0.0010)
2,Green rated 20,1.0 (0.032),0.977 (0.078),0.0013 (0.0000),0.0003 (0.1850)
3,Green rated 32,0.977 (0.078),0.977 (0.078),0.0009 (0.0000),0.0002 (0.1000)
4,Fossil 20,0.256 (0.493),0.837 (0.355),-0.0006 (0.0190),0.0001 (0.5680)
5,Fossil 32,0.233 (0.447),0.860 (0.308),-0.0006 (0.0120),0.0002 (0.1960)
6,Renewable 20,0.302 (0.585),0.744 (0.539),-0.0004 (0.4230),0.0008 (0.0310)
7,Renewable 32,0.163 (0.308),0.674 (0.677),-0.0005 (0.3250),0.0008 (0.0370)


In [8]:
results_sup

Unnamed: 0,Portfolio,COP period trend diff fit,COP period trend gSD fit,COP trend linear diff term,COP trend linear range term
0,Neutral rated 20,-0.0026 [-0.1404 - 0.1352],-0.0026 [-0.1404 - 0.1352],0.0000 (0.3610),0.0000 (0.8340)
1,Neutral rated 32,0.0007 [-0.1450 - 0.1464],0.0007 [-0.1450 - 0.1464],0.0000 (0.0780),-0.0000 (0.6360)
2,Green rated 20,0.0589 [-0.1390 - 0.2568],0.0589 [-0.1390 - 0.2568],0.0000 (0.5870),-0.0000 (0.1980)
3,Green rated 32,0.0728 [-0.0682 - 0.2138],0.0728 [-0.0682 - 0.2138],0.0001 (0.0440),-0.0000 (0.1200)
4,Fossil 20,-0.0806 [-0.2124 - 0.0511],-0.0806 [-0.2124 - 0.0511],-0.0001 (0.0000),-0.0001 (0.0070)
5,Fossil 32,-0.0852 [-0.2229 - 0.0526],-0.0852 [-0.2229 - 0.0526],-0.0001 (0.0000),-0.0000 (0.2460)
6,Renewable 20,-0.1694 [-0.3355 - -0.0034],-0.1694 [-0.3355 - -0.0034],-0.0002 (0.0360),0.0001 (0.1000)
7,Renewable 32,-0.0988 [-0.3170 - 0.1194],-0.0988 [-0.3170 - 0.1194],-0.0001 (0.1160),0.0002 (0.0000)
