In [None]:
import pandas as pd
import numpy as np

In [None]:
# Function that ranks data into quintiles
def rank_factors(info):
    for item in info.columns.tolist():
        string = item + " Rank" 
        info[string] = pd.qcut(info[item], 5, labels = [item + "1", item + "2", item + "3",

        item + "4", item + "5"])
    return info

In [None]:
# Function that creates an array of lists of factors for each available asset
def fpg_prep(info):
    # Drop the columns that do not contain the rankings
    state = info.drop(['X', 'Z', 'Y', 'D'], axis=1)
    # Drop the returns ranked column and assign remaining info to new 
    new = state.drop("Y Rank", axis = 1)
    # Reset the index of state and drop the names column
    state = state.reset_index()
    final = []
    # For each row, append final with each row as an array of its own
    for i in range(0,len(state)):
        final.append(state.loc[i, state.columns[1:]].tolist())
    # Return both final and new
    return final , new

In [None]:
# Function that mines assocation and lift rules, the 5th quintile returns and the true/false df for the FPG algorithm
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import fpgrowth
from mlxtend.frequent_patterns import association_rules
def rules(final):
    # Preprocessing of input argument into true and false for each discretisation
    te = TransactionEncoder()
    te_ary = te.fit(final).transform(final)
    # Eg is the true/false dataframe in this form for the fp growth algorithm
    eg = pd.DataFrame(te_ary , columns=te.columns_)
    # True/false column of the highest quintile of returns
    high_returns = eg.Y5
    # Finding frequent items in data for a minimum support of 5%
    freq_items = fpgrowth(eg, min_support=0.05, use_colnames = True)
    # Discover association and causal rules
    asso_rules = association_rules(freq_items , metric="confidence",min_threshold =0.2)
    lift_rules = association_rules(freq_items , metric="lift", min_threshold=1.2)
    # Return true\false dataframe , high returns , and the found
    return eg, high_returns , asso_rules , lift_rules

In [None]:
# Get high return rules from rule set
def high_ret_rules(asso_rules): 
    factors = []
# For each row, if row contains "Ret5", then add that row’s antecedents to the factors array
    for index in asso_rules.index.tolist():
        #if (list(asso_rules.loc[index, ’consequents’])[0] in ["Ret5"]): 
        if (set(["Y5"]).issubset(set(list(asso_rules.loc[index, 'consequents'])))):
            factors.append(list(asso_rules.loc[index, 'antecedents']))
    return factors

In [None]:
# Function to get rules
def get_rules(unique_asso): 
    associatons = []
    for asso in unique_asso:
        # Check if each antecedent is in the right format (list) 
        if (isinstance(asso, list) == False):
            associatons.append([asso]) 
        else:
            associatons.append(asso)
    # Return list of lists
    return associatons

In [None]:
# Function to get rules for the period, by using the above functions
def rules_for_period(info):
    info = rank_factors(info)
    final, new = fpg_prep(info)
    eg, high_returns , asso_rules , lift_rules = rules(final)
    factors = high_ret_rules(asso_rules)
    info = info.drop(['X', 'Z', 'Y', 'D'], axis=1)
    rules_set = get_rules(factors)
    rules_set = list(np.unique(np.array(rules_set)))
    if len(rules_set) == 0:
          return rules_set , eg, high_returns, info
    if (isinstance(rules_set[0], list) == False):
        rules_set_2 = []
        for i in rules_set:
            rules_set_2.append([i])
            rules_set = rules_set_2
    return rules_set , eg, high_returns, info

# Chi Squared

In [None]:
# Function to get expected and actual frequency of a rule
def expected_freq(info, rule):
    # Get number of equites with top quintile Y
    num_ret_ind = len(info[info['Y Rank'] == 'Y5'])
    
    # Calculate expected frequency
    mask = info.isin(rule)
    filtered = info[mask].dropna(axis = 0, how = 'all')
    num_rule_ind = len(filtered)
    data_len = len(info)
    ef_ind = (num_ret_ind/data_len)*(num_rule_ind/data_len)*data_len
    
    # Calculate actual frequency
    rule.append('Y5')
    mask = info.isin(rule)
    filtered = info[mask]
    filtered = filtered.dropna(thresh=len(rule))
    actual_freq = len(filtered)
    rule.pop(-1)
    return actual_freq, ef_ind

In [None]:
from scipy import stats
# Function to get rules that pass the chi-squared pruning
def causal_chi(info, rules):
    causal = []
    # For each rule, if chi-stat is significant append the rule to the set
    for rule in rules:        
        if type(rule)==np.str_:
            rule = [rule]
        actual_freq, ef_ind = expected_freq(info, rule)
        stat = ((actual_freq-ef_ind)**2)/ef_ind
        dof = len(info['Y Rank'].unique())-1
        p = stats.chi2.cdf(stat, dof)
        if p > 0.99:
            causal.append(rule)
    return causal

# LLM

In [None]:
#!pip install --upgrade openai wandb
#!pip install requests

In [None]:
import requests
import json

In [None]:
# Set parameters for Open AI API 
API_KEY = 'Put API key here'
API_ENDPOINT = "https://api.openai.com/v1/chat/completions"

In [None]:
# Function to prompt gpt-3.5 turbo
def generate_chat_completion(messages, model="gpt-3.5-turbo", temperature=1, max_tokens=None):
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {API_KEY}",
    }

    data = {
        "model": model,
        "messages": messages,
        "temperature": temperature,
    }

    response = requests.post(API_ENDPOINT, headers=headers, data=json.dumps(data))

    if response.status_code == 200:
        return response.json()["choices"][0]["message"]["content"]
    else:
        raise Exception(f"Error {response.status_code}: {response.text}")

In [None]:
# This part rephrase rule into something the LLM will understand
dic = {'1':'very low', '2':'low',"3":'medium', '4':'high','5':'very high'}
def rephrase(rule):
    quant = dic[rule[-1]]
    factor = rule[:-1]
    return quant, factor

In [None]:
# Get rules that survived the LLM pruning
import time
def causal_LLM(rules, content):
    causal = []
    for rule in rules: 
        # Creating the prompt for each rule
        if type(rule)==np.str_:
            rule = [rule]
        if len(rule)==1:
            quant, factor = rephrase(rule[0])
            q = 'Does a ' + quant  + ' ' + factor + ' value cause a very high Y value?'
        else:
            q = 'Does'
            for i in rule[:-1]:
                quant, factor = rephrase(i)
                q = q + ' a ' + quant + ' ' + factor +' value,'
            quant, factor = rephrase(rule[-1])
            q = q[:-1] + ' and a ' + quant + ' ' + factor + ' value cause a very high Y value?'
        result = None
        
        # Ask the LLM the prompt
        while result is None:
            try:
                messages = [{"role": "system", "content": content},{"role": "user", "content": q}]
                #messages = [{"role": "system", "content": "you only give one word, yes or no answers, no other answer is acceptable"},{"role": "user", "content": q}]
                response_text = generate_chat_completion(messages)
                result = 1
            except:
                 pass
                 print('error')

        # Append valid rules to causal set
        if response_text[0:3]=='yes' or response_text[0:3]=='Yes':
            causal.append(rule)

    return causal

# Odds ratio

In [None]:
# Function to get fair dataset for a rule
def get_fair_datasets(true_control, false_control):
    true_match = []
    false_match = []
    
    if true_control.empty or false_control.empty:
        return true_match, false_match
    
    inter = pd.merge(true_control, false_control, how='inner')

    # Build the fair datasets
    for i, row in inter.iterrows():
        mask_false = (false_control == row).all(axis=1)
        mask_true = (true_control == row).all(axis=1)
        false_match.extend(false_control[mask_false].index.tolist())
        true_match.extend(true_control[mask_true].index.tolist())

    return true_match, false_match

In [None]:
# Get odds ratio confidence interval
def get_oddsratio_CI(exposure , non_exposure , returns):
    # Count for number of times both exposure and non-exposure groups have the consequent
    n11 = 0
    # Count for number of times the exposure has the consequent and non- exposure groups does not
    n12 = 0
    # Count for number of times the exposure does not have the consequent and non-exposure groups does
    n21 = 0
    # Count for number of times both exposure and non-exposure groups do not have the consequent
    n22 = 0
    for i in range(len(exposure)):
    # If both the exposure and non exposure groups have returns in the 5th quantile , increment n11 by one
        if (returns.loc[exposure.index[i]] == True) and (returns.loc[ non_exposure.index[i]] == True):
            n11 += 1
        elif (returns.loc[exposure.index[i]] == True) and (returns.loc[non_exposure.index[i]] == False): 
            n12 += 1
        elif (returns.loc[exposure.index[i]] == False) and (returns.loc[ non_exposure.index[i]] == True):
            n21 += 1
        elif (returns.loc[exposure.index[i]] == False) and (returns.loc[non_exposure.index[i]] == False): 
            n22 += 1
    # To ensure that you are not dividing by 0, if n12 or n21 are zero, set them to one
    if n21 == 0: 
        n21 = 1
    if n12 == 0: 
        n12 = 1
    # Calculate the odds ratio point estimate
    odds_ratio = n12/n21
    # Compute the lower and upper bounds of the odds ratio’s 80% conficence interval
    lower_bound = np.exp(np.log(odds_ratio) - (1.15*np.sqrt((1/n12) + (1/n21 ))))
    upper_bound = np.exp(np.log(odds_ratio) + (1.15*np.sqrt((1/n12) + (1/n21 ))))
    return lower_bound , upper_bound

In [None]:
# Function that mines causal rules from established associations
def get_causal_rules(eg, rules, returns): 
    # Array to store mined causal rules 
    causal_rules = []
    for rule in rules:
        if type(rule)==np.str_:
            rule = [rule]

        # Otherwise , for each association antecedent , search rows for when antecedent and true and false
        trues = list(np.ones(len(rule), dtype=bool)) 
        falses = list(np.zeros(len(rule), dtype=bool))
        
        true_indices = np.all(eg[rule].values == trues, axis=1)
        false_indices = np.all(eg[rule].values == falses, axis=1)
        true = eg[true_indices]
        false = eg[false_indices]

        # Remove the returns columns and columns with antecedants in question. Only the control variables remain
        remove = ["Y1", "Y2", "Y3", "Y4", 'Y5'] 
        for cond in rule:
            remove.append(cond[:-1] + "1") 
            remove.append(cond[:-1] + "2") 
            remove.append(cond[:-1] + "3")
            remove.append(cond[:-1] + "4")
            remove.append(cond[:-1] + "5")

        
        true_control = true.drop(remove , axis = 1)
        false_control = false.drop(remove , axis = 1)
        # Drop duplicates from true and false control sets to ensure that there is at most one set of matching rows

        
        true_control = true_control.drop_duplicates(subset = true_control.columns , keep='first')
        false_control = false_control.drop_duplicates(subset = false_control.columns , keep='first')
        
        # Retrieve the date indices from the fair datasets
        true_match , false_match = get_fair_datasets(true_control, false_control)
        
        # Getting returns columns for the rows of the fair dataset
        exposure_returns = returns[true_match]
        non_exposure_returns = returns[false_match]

        # Compute the bounds of the rule’s odd ratio confidence interval
        lower, _ = get_oddsratio_CI(exposure_returns, non_exposure_returns , returns)

        #print(lower)
        if (lower > 1): 
            causal_rules.append(rule)
    return causal_rules

# Simulation

In [None]:
# Function that gets all association rules and applies causal pruning to them for a given dataset
def get_all_rule_sets(period_data, content):
    assoc_rules, eg, ret, info = rules_for_period(period_data)

    chi = causal_chi(info, assoc_rules)

    LMM = causal_LLM(assoc_rules, content)

    odds = get_causal_rules(eg, assoc_rules, ret)
    
    print('done')
    return chi, LMM, odds, assoc_rules

In [None]:
# Gets unique elements from list
def get_unique_elements(input_list):
    unique_elements = []
    for element in input_list:
        if element not in unique_elements:
            unique_elements.append(element)
    return unique_elements

In [None]:
# Makes the ensemble models from the causal rule set
def combine_rules(rule_set1, rule_set2):
    or_rules = get_unique_elements(rule_set1+rule_set2)
    #or_rules =list(np.unique(np.array(rule_set1+rule_set2)))
    and_rules = [value for value in rule_set1 if value in rule_set2]
    return or_rules, and_rules

In [None]:
# Gets associations rules from data for association rule , causal pruning and ensemble methods
def simulation(data, content):
    
    # Get ARM and CRM rulesets
    chi, LLM, odds, assoc_rules = get_all_rule_sets(data, content)
    
    # Get ensemble models
    chi_or_LLM, chi_and_LLM = combine_rules(chi, LLM)
    chi_or_odds, chi_and_odds = combine_rules(chi, odds)
    LLM_or_odds, LLM_and_odds = combine_rules(LLM, odds)
    chi_or_odds_or_LLM, _ = combine_rules(LLM_or_odds, chi)
    _, chi_and_odds_and_LLM = combine_rules(LLM_and_odds, chi)
        
        
    return assoc_rules, chi, LLM, odds, chi_or_LLM, chi_and_LLM, chi_or_odds, chi_and_odds, LLM_or_odds, LLM_and_odds, chi_or_odds_or_LLM, chi_and_odds_and_LLM

# Create Data

In [None]:
# Generate data with causal structure 1
def df_1():
    # Generate random normal numbers
    data = np.random.normal(0, 1, 1000) 

    # Create DataFrame from random normal numbers
    df1 = pd.DataFrame(data, columns=['X'])

    df1['Z'] = np.random.normal(0, 1, 1000) 
    df1['D'] = np.random.normal(0, 1, 1000) 
    df1['Y'] = np.random.normal(0, 1, 1000) 

    return df1

In [None]:
# Generate data with causal structure 2
def df_2():
    # Generate random normal numbers
    data = np.random.normal(0, 1, 1000) 

    # Create DataFrame from random normal numbers
    df2 = pd.DataFrame(data, columns=['X'])

    df2['Z'] = np.random.normal(0, 1, 1000) 
    df2['D'] = np.random.normal(0, 1, 1000)
    df2['Y'] = np.random.normal(df2['X']+df2['Z'], 1, 1000)

    return df2

In [None]:
# Generate data with causal structure 3
def df_3():
    # Fork
    data = np.random.normal(0, 1, 1000) 

    # Create DataFrame from random normal numbers
    df3 = pd.DataFrame(data, columns=['X'])

    df3['Z'] = np.random.normal(df3['X'], 1, 1000) 
    df3['D'] = np.random.normal(0, 1, 1000)
    df3['Y'] = np.random.normal(df3['X'], 1, 1000) 

    return df3

In [None]:
# Generate data with causal structure 4
def df_4():
    # IMMORALITY
    data = np.random.normal(0, 1, 1000) 

    # Create DataFrame from random normal numbers
    df4 = pd.DataFrame(data, columns=['X'])

    df4['Y'] = np.random.normal(0, 1, 1000) 
    df4['D'] = np.random.normal(0, 1, 1000)
    df4['Z'] = np.random.normal((df4['X']+df4['Y']), 1, 1000) 

    return df4

In [None]:
# Generate data with causal structure 5
def df_5():
    # CHAIN
    data = np.random.normal(0, 1, 1000) 

    # Create DataFrame from random normal numbers
    df5 = pd.DataFrame(data, columns=['X'])

    df5['Z'] = np.random.normal((df5['X']), 1, 1000) 
    df5['D'] = np.random.normal(0, 1, 1000)
    df5['Y'] = np.random.normal((df5['Z']), 1, 1000) 

    return df5

In [None]:
# Run the simulation 1000 times for causal data structure 1

column_names = ['assoc_rules','chi', 'LLM', 'odds', 'chi_or_LLM', 'chi_and_LLM','chi_or_odds', 'chi_and_odds', 'LLM_or_odds', 'LLM_and_odds', 'chi_or_odds_or_LLM', 'chi_and_odds_and_LLM']
# Create an empty DataFrame with column names
results1 = pd.DataFrame(columns=column_names)

for i in range(1000):
    start_time = time.time()
    
    df1 = df_1()
    content =  "you only give one word, yes or no answers, no other answer is acceptable. X, Z, D and Y are independently randomly generated numbers"
    assoc_rules, chi, LLM, odds, chi_or_LLM, chi_and_LLM, chi_or_odds, chi_and_odds, LLM_or_odds, LLM_and_odds, chi_or_odds_or_LLM, chi_and_odds_and_LLM = simulation(df1, content)
    results1.loc[len(results1)] = [assoc_rules, chi, LLM, odds, chi_or_LLM, chi_and_LLM,chi_or_odds, chi_and_odds, LLM_or_odds, LLM_and_odds, chi_or_odds_or_LLM, chi_and_odds_and_LLM]
    
    end_time = time.time()
    elapsed_time = end_time - start_time
    print(elapsed_time)

In [None]:
results1

print(assoc_rules)
print(chi)
print(LLM)
print(odds)

In [None]:
# Run the simulation 1000 times for causal data structure 2

column_names = ['assoc_rules','chi', 'LLM', 'odds', 'chi_or_LLM', 'chi_and_LLM','chi_or_odds', 'chi_and_odds', 'LLM_or_odds', 'LLM_and_odds', 'chi_or_odds_or_LLM', 'chi_and_odds_and_LLM']
# Create an empty DataFrame with column names
results2 = pd.DataFrame(columns=column_names)

for i in range(1000):
    
    #start_time = time.time()
    
    df2 = df_2()
    content =  "you only give one word, yes or no answers, no other answer is acceptable. X, D and Z are independently randomly generated numbers, Y = X + Z"
    assoc_rules, chi, LLM, odds, chi_or_LLM, chi_and_LLM, chi_or_odds, chi_and_odds, LLM_or_odds, LLM_and_odds, chi_or_odds_or_LLM, chi_and_odds_and_LLM = simulation(df2, content)
    results2.loc[len(results2)] = [assoc_rules, chi, LLM, odds, chi_or_LLM, chi_and_LLM,chi_or_odds, chi_and_odds, LLM_or_odds, LLM_and_odds, chi_or_odds_or_LLM, chi_and_odds_and_LLM]
    
    #end_time = time.time()
    #elapsed_time = end_time - start_time
    #print(elapsed_time)

In [None]:
results2

print(assoc_rules)
print(chi)
print(LLM)
print(odds)

In [None]:
# Run the simulation 1000 times for causal data structure 3

column_names = ['assoc_rules','chi', 'LLM', 'odds', 'chi_or_LLM', 'chi_and_LLM','chi_or_odds', 'chi_and_odds', 'LLM_or_odds', 'LLM_and_odds', 'chi_or_odds_or_LLM', 'chi_and_odds_and_LLM']
# Create an empty DataFrame with column names
results3 = pd.DataFrame(columns=column_names)

for i in range(1000):
    start_time = time.time()
    
    df3 = df_3()
    content =  "you only give one word, yes or no answers, no other answer is acceptable. X and D are randomly generated numbers, Z and Y are independent randomly generated numbers with mean X"
    assoc_rules, chi, LLM, odds, chi_or_LLM, chi_and_LLM, chi_or_odds, chi_and_odds, LLM_or_odds, LLM_and_odds, chi_or_odds_or_LLM, chi_and_odds_and_LLM = simulation(df3, content)
    results3.loc[len(results3)] = [assoc_rules, chi, LLM, odds, chi_or_LLM, chi_and_LLM,chi_or_odds, chi_and_odds, LLM_or_odds, LLM_and_odds, chi_or_odds_or_LLM, chi_and_odds_and_LLM]
    
    end_time = time.time()
    elapsed_time = end_time - start_time
    print(elapsed_time)

In [None]:
results3

print(assoc_rules)
print(chi)
print(LLM)
print(odds)

In [None]:
# Run the simulation 1000 times for causal data structure 4

column_names = ['assoc_rules','chi', 'LLM', 'odds', 'chi_or_LLM', 'chi_and_LLM','chi_or_odds', 'chi_and_odds', 'LLM_or_odds', 'LLM_and_odds', 'chi_or_odds_or_LLM', 'chi_and_odds_and_LLM']
# Create an empty DataFrame with column names
results4 = pd.DataFrame(columns=column_names)

for i in range(1000):
    start_time = time.time()
        
    df4 = df_4()
    content =  "you only give one word, yes or no answers, no other answer is acceptable. X, D and Y are independent randomly generated numbers, Z is a randomly generated numbers with mean X + Y"
    assoc_rules, chi, LLM, odds, chi_or_LLM, chi_and_LLM, chi_or_odds, chi_and_odds, LLM_or_odds, LLM_and_odds, chi_or_odds_or_LLM, chi_and_odds_and_LLM = simulation(df4, content)
    results4.loc[len(results4)] = [assoc_rules, chi, LLM, odds, chi_or_LLM, chi_and_LLM,chi_or_odds, chi_and_odds, LLM_or_odds, LLM_and_odds, chi_or_odds_or_LLM, chi_and_odds_and_LLM]
    
    end_time = time.time()
    elapsed_time = end_time - start_time
    print(elapsed_time)

In [None]:
results4

print(assoc_rules)
print(chi)
print(LLM)
print(odds)

In [None]:
# Run the simulation 1000 times for causal data structure 5

column_names = ['assoc_rules','chi', 'LLM', 'odds', 'chi_or_LLM', 'chi_and_LLM','chi_or_odds', 'chi_and_odds', 'LLM_or_odds', 'LLM_and_odds', 'chi_or_odds_or_LLM', 'chi_and_odds_and_LLM']
# Create an empty DataFrame with column names
results5 = pd.DataFrame(columns=column_names)

for i in range(1000):
    start_time = time.time()
        
    df5 = df_5()
    content =  "you only give one word, yes or no answers, no other answer is acceptable. X and D are randomly generated numbers, Z is randomly generated numbers with mean X, Y is randomly generated numbers with mean Z"
    assoc_rules, chi, LLM, odds, chi_or_LLM, chi_and_LLM, chi_or_odds, chi_and_odds, LLM_or_odds, LLM_and_odds, chi_or_odds_or_LLM, chi_and_odds_and_LLM = simulation(df5, content)
    results5.loc[len(results5)] = [assoc_rules, chi, LLM, odds, chi_or_LLM, chi_and_LLM,chi_or_odds, chi_and_odds, LLM_or_odds, LLM_and_odds, chi_or_odds_or_LLM, chi_and_odds_and_LLM]
    
    end_time = time.time()
    elapsed_time = end_time - start_time
    print(elapsed_time)

In [None]:
results5

print(assoc_rules)
print(chi)
print(LLM)
print(odds)

# Testing

In [None]:
# Functions for calculating performance metrics for the MC experiments

# Counts empty rulesets
def count_empty(data):
    empty_counts = {}

    # Iterate over each column
    for column in data.columns:
        # Count the number of empty lists in the column
        empty_counts[column] = data[column].apply(lambda x: len(x) == 0).sum()
    
    return empty_counts

# Finds mean length of rulesets
def mean_len(data):
    mean_lengths = {}

    # Iterate over each column
    for column in data.columns:
        # Compute the mean length of the lists in the column
        mean_lengths[column] = np.mean(data[column].apply(len))
    return mean_lengths

# Counts number of causal rules in set
def Caus_rule_count(data, causal_lis):
    list_counts = {}

    # Iterate over each column
    for column in data.columns:
        # Count the number of inner lists from each list in the column that are present in the external list
        big_list = data[column].explode().tolist()
        caus_sum = 0
        for r in causal_lis:
            caus_sum += big_list.count(r)

        list_counts[column] = {'causal':caus_sum, 'total':len(big_list)}
    return list_counts

In [None]:
# Make a results table for causal structure 1
results_table_1 = pd.DataFrame(count_empty(results1), ['Number of Empty Sets',])

In [None]:
# Add mean rule length to data
results_table_1.loc['Mean Rule Length'] = mean_len(results1)

In [None]:
results_table_1

## results 2

In [None]:
# Make lists of legit causal rules
causal_lis = [['X5'],['Z5'], ['X5','Z5'], ['Z5','X5']]

In [None]:
# Make a results table for causal structure 2
results_table_2 = pd.DataFrame(count_empty(results2), ['Number of Empty Sets',])
# Add mean rule length to data
results_table_2.loc['Mean Rule Length'] = mean_len(results2)

# Add Percentage of rules mined that are causal  and Percentage of causal rules mind that could have been to results
results_table_2.loc['Percentage of rules mined that are causal '] = Caus_rule_count(results2, causal_lis)
results_table_2.loc['Percentage of causal rules mind that could have been'] = Caus_rule_count(results2, causal_lis)

In [None]:
results_table_2

## Resuts 3

In [None]:
# Make lists of legit causal rules
causal_lis = [['X5']]

In [None]:
# Make a results table for causal structure 2
results_table_3 = pd.DataFrame(count_empty(results3), ['Number of Empty Sets',])
# Add mean rule length to data
results_table_3.loc['Mean Rule Length'] = mean_len(results3)

# Add Percentage of rules mined that are causal  and Percentage of causal rules mind that could have been to results
results_table_3.loc['Percentage of rules mined that are causal '] = Caus_rule_count(results3, causal_lis)
results_table_3.loc['Percentage of causal rules mind that could have been'] = Caus_rule_count(results3, causal_lis)

In [None]:
results_table_3

## Results 4

In [None]:
# Make a results table for causal structure 4
results_table_4 = pd.DataFrame(count_empty(results4), ['Number of Empty Sets',])
results_table_4.loc['Mean Rule Length'] = mean_len(results4)

In [None]:
results_table_4

# Results 5

In [None]:
# Make lists of legit causal rules
causal_lis = [['X5'],['Z5'], ['X5','Z5'], ['Z5','X5']] # CHECK THAT Z SHOULD BE IN HERE

In [None]:
# Make a results table for causal structure 5
results_table_5 = pd.DataFrame(count_empty(results5), ['Number of Empty Sets',])
# Add mean rule length to data
results_table_5.loc['Mean Rule Length'] = mean_len(results5)

# Add Percentage of rules mined that are causal  and Percentage of causal rules mind that could have been to results
results_table_5.loc['Percentage of rules mined that are causal '] = Caus_rule_count(results5, causal_lis)
results_table_5.loc['Percentage of causal rules mind that could have been'] = Caus_rule_count(results5, causal_lis)

In [None]:
results_table_5

In [None]:
# Drop results tables 
results_table_1.to_csv('results_table_1.csv',index=True)
results_table_2.to_csv('results_table_2.csv',index=True)
results_table_3.to_csv('results_table_3.csv',index=True)
results_table_4.to_csv('results_table_4.csv',index=True)
results_table_5.to_csv('results_table_5.csv',index=True)
