In [None]:
import os
import glob
import pandas as pd


def extract_short_version(s):
    start = s.find("'") + 1
    end = s.find("'", start)
    return s[start:end] 

def clean_dataframe(df):
    df["answer_binary"] = df["answer"].astype(bool).astype(int)
    df = df[~df["verb"].str.contains("NOT", na=False)].reset_index(drop=True)
    df["var1"] = df["var1"].apply(extract_short_version).str.replace(" ", "_")
    df["var2"] = df["var2"].apply(extract_short_version).str.replace(" ", "_")
    df["verb"] = df["verb"].str.replace(" ", "_")
    return df

    


results/asia\causal_relationships_affect.csv
results/asia\causal_relationships_cause.csv
results/asia\causal_relationships_increase_the_chance_.csv
results/asia\causal_relationships_influence.csv
results/asia\causal_relationships_lead_to.csv
results/asia\causal_relationships_raise_the_risk.csv
results/asia\causal_relationships_result_in.csv


In [None]:
path = "results/asia/"
files = glob.glob(os.path.join(path, "*.csv"))

df_combined = pd.DataFrame()



for file in files:
    print(file)
    df = pd.read_csv(file)
    df = clean_dataframe(df)
    df_combined = pd.concat([df_combined, df], ignore_index=True)
    

In [6]:
df_combined

Unnamed: 0,var1,var2,verb,answer,probability,answer_binary
0,Visited_Asia,Smoker,affect,False,0.999999,0
1,Smoker,Visited_Asia,affect,False,0.999999,0
2,Visited_Asia,Tuberculosis,affect,True,1.000000,1
3,Tuberculosis,Visited_Asia,affect,False,0.999999,0
4,Visited_Asia,Lung_Cancer,affect,False,1.000000,0
...,...,...,...,...,...,...
387,X-ray_Result,individual_has_either_Tuberculosis_or_Lung_Cancer,result_in,False,1.000000,0
388,individual_has_either_Tuberculosis_or_Lung_Cancer,Dyspnea,result_in,True,1.000000,1
389,Dyspnea,individual_has_either_Tuberculosis_or_Lung_Cancer,result_in,False,0.999999,0
390,X-ray_Result,Dyspnea,result_in,False,1.000000,0


In [15]:
verb_list = list(df_combined["verb"].unique())
print("Unique verbs:" , (verb_list))

Unique verbs: ['affect', 'cause', 'increase_the_chance_of', 'influence', 'lead_to', 'raise_the_risk_of', 'result_in']


In [25]:
from scipy.stats import friedmanchisquare
from scipy.stats import wilcoxon

def significant_check (df_combined, column_name, p_value):
    
    df_results = pd.DataFrame(columns=["verb1", "verb2", "statistic", "p_value", "significant"])
            
    if p_value < 0.05:
        print("Friedman test is significant, performing post-hoc tests")
        # Perform Wilcoxon signed-rank tests for all pairs of verbs
        for i in range(len(verb_list)):
            for j in range(i + 1, len(verb_list)):
                verb1 = verb_list[i]
                verb2 = verb_list[j]
                stat, p = wilcoxon(
                    df_combined[df_combined["verb"] == verb1][column_name],
                    df_combined[df_combined["verb"] == verb2][column_name]
                )

                # concatinate the results into a dataframe
                df_results = pd.concat([df_results, pd.DataFrame({
                    "verb1": [verb1],
                    "verb2": [verb2],
                    "statistic": [stat],
                    "p_value": [p],
                    "significant": [p < 0.05]
                })], ignore_index=True)


            

                print(f"Wilcoxon test between {verb1} and {verb2}: statistic={stat}, p-value={p}")
                if p < 0.05:
                    print(f"Significant difference between {verb1} and {verb2}")
    else:
        print("The distributions are not significantly different., no post-hoc tests needed")
    return df_results        

In [14]:

# Unique verbs: ['affect', 'cause', 'increase_the_chance_of', 'influence', 'lead_to', 'raise_the_risk_of', 'result_in']

friedman_stat, p_value = friedmanchisquare( 
    df_combined[df_combined["verb"] == "affect"]["answer_binary"],
    df_combined[df_combined["verb"] == "cause"]["answer_binary"],
    df_combined[df_combined["verb"] == "increase_the_chance_of"]["answer_binary"],
    df_combined[df_combined["verb"] == "influence"]["answer_binary"],
    df_combined[df_combined["verb"] == "lead_to"]["answer_binary"],
    df_combined[df_combined["verb"] == "raise_the_risk_of"]["answer_binary"],
    df_combined[df_combined["verb"] == "result_in"]["answer_binary"]
)
print("Friedman test statistic:", friedman_stat)
print("Friedman test p-value:", p_value)

significant_check(df_combined, "probability", p_value)

Unique verbs: ['affect', 'cause', 'increase_the_chance_of', 'influence', 'lead_to', 'raise_the_risk_of', 'result_in']
Friedman test statistic: 5.166666666665047
Friedman test p-value: 0.5226216531459891
The distributions are not significantly different., no post-hoc tests needed


In [26]:
from scipy.stats import friedmanchisquare
from scipy.stats import wilcoxon

friedman_stat, p_value = friedmanchisquare( 
    df_combined[df_combined["verb"] == "affect"]["probability"],
    df_combined[df_combined["verb"] == "cause"]["probability"],
    df_combined[df_combined["verb"] == "increase_the_chance_of"]["probability"],
    df_combined[df_combined["verb"] == "influence"]["probability"],
    df_combined[df_combined["verb"] == "lead_to"]["probability"],
    df_combined[df_combined["verb"] == "raise_the_risk_of"]["probability"],
    df_combined[df_combined["verb"] == "result_in"]["probability"]
)
print("Friedman test statistic:", friedman_stat)
print("Friedman test p-value:", p_value)


results_prob = significant_check(df_combined, "probability", p_value)

Friedman test statistic: 20.623880597014924
Friedman test p-value: 0.0021429213755423692
Friedman test is significant, performing post-hoc tests
Wilcoxon test between affect and cause: statistic=441.0, p-value=0.276817381620171
Wilcoxon test between affect and increase_the_chance_of: statistic=446.0, p-value=0.7442984489960107
Wilcoxon test between affect and influence: statistic=385.5, p-value=0.741265556167358
Wilcoxon test between affect and lead_to: statistic=332.5, p-value=0.1365568272459538
Wilcoxon test between affect and raise_the_risk_of: statistic=489.0, p-value=0.573534213478745
Wilcoxon test between affect and result_in: statistic=361.5, p-value=0.1189477491108064
Wilcoxon test between cause and increase_the_chance_of: statistic=455.5, p-value=0.48395800725136295
Wilcoxon test between cause and influence: statistic=362.0, p-value=0.032364972478192616
Significant difference between cause and influence
Wilcoxon test between cause and lead_to: statistic=403.0, p-value=0.397568

  df_results = pd.concat([df_results, pd.DataFrame({


In [27]:
results_prob

Unnamed: 0,verb1,verb2,statistic,p_value,significant
0,affect,cause,441.0,0.276817,False
1,affect,increase_the_chance_of,446.0,0.744298,False
2,affect,influence,385.5,0.741266,False
3,affect,lead_to,332.5,0.136557,False
4,affect,raise_the_risk_of,489.0,0.573534,False
5,affect,result_in,361.5,0.118948,False
6,cause,increase_the_chance_of,455.5,0.483958,False
7,cause,influence,362.0,0.032365,True
8,cause,lead_to,403.0,0.397569,False
9,cause,raise_the_risk_of,384.5,0.036837,True


In [28]:
def normalize_probs(df, answer_col, prob_col, new_col):
    df[new_col] = df.apply(
        lambda row: row[prob_col] if row[answer_col] == 1 else (1 - row[prob_col]),
        axis=1
    )
    return df

df_normalized = normalize_probs(df_combined, "answer_binary", "probability", "normalized_probability")

friedman_stat, p_value = friedmanchisquare( 
    df_normalized[df_normalized["verb"] == "affect"]["probability"],
    df_normalized[df_normalized["verb"] == "cause"]["probability"],
    df_normalized[df_normalized["verb"] == "increase_the_chance_of"]["probability"],
    df_normalized[df_normalized["verb"] == "influence"]["probability"],
    df_normalized[df_normalized["verb"] == "lead_to"]["probability"],
    df_normalized[df_normalized["verb"] == "raise_the_risk_of"]["probability"],
    df_normalized[df_normalized["verb"] == "result_in"]["probability"]
)
print("Friedman test statistic:", friedman_stat)
print("Friedman test p-value:", p_value)


results_prob_2 = significant_check(df_normalized, "probability", p_value)

Friedman test statistic: 20.623880597014924
Friedman test p-value: 0.0021429213755423692
Friedman test is significant, performing post-hoc tests
Wilcoxon test between affect and cause: statistic=441.0, p-value=0.276817381620171
Wilcoxon test between affect and increase_the_chance_of: statistic=446.0, p-value=0.7442984489960107
Wilcoxon test between affect and influence: statistic=385.5, p-value=0.741265556167358
Wilcoxon test between affect and lead_to: statistic=332.5, p-value=0.1365568272459538
Wilcoxon test between affect and raise_the_risk_of: statistic=489.0, p-value=0.573534213478745
Wilcoxon test between affect and result_in: statistic=361.5, p-value=0.1189477491108064
Wilcoxon test between cause and increase_the_chance_of: statistic=455.5, p-value=0.48395800725136295
Wilcoxon test between cause and influence: statistic=362.0, p-value=0.032364972478192616
Significant difference between cause and influence
Wilcoxon test between cause and lead_to: statistic=403.0, p-value=0.397568

  df_results = pd.concat([df_results, pd.DataFrame({


In [30]:
results_prob_2 


Unnamed: 0,verb1,verb2,statistic,p_value,significant
0,affect,cause,441.0,0.276817,False
1,affect,increase_the_chance_of,446.0,0.744298,False
2,affect,influence,385.5,0.741266,False
3,affect,lead_to,332.5,0.136557,False
4,affect,raise_the_risk_of,489.0,0.573534,False
5,affect,result_in,361.5,0.118948,False
6,cause,increase_the_chance_of,455.5,0.483958,False
7,cause,influence,362.0,0.032365,True
8,cause,lead_to,403.0,0.397569,False
9,cause,raise_the_risk_of,384.5,0.036837,True


In [None]:
from scipy.stats import friedmanchisquare


# friedman_stat, friedman_p = friedmanchisquare(
#     df_combined['cause_answer'],
#     df_combined['affect_answer'],
#     df_combined['old_cause_answer'],
#     df_combined['old_affect_answer'])

# print(f"Friedman test p-value: {friedman_p:.4f}")
# if friedman_p < 0.05:
#     print("The distributions are significantly different.")
# else:
#     print("The distributions are not significantly different.")

In [None]:
from statsmodels.stats.contingency_tables import cochrans_q

# Create a matrix (rows = comparisons, columns = verbs)
# 1 for TRUE, 0 for FALSE
data = df_combined[['affect_answer_binar', 'cause_answer_binar', 'old_cause_answer_binar', 'old_affect_answer_binar']].astype(int).values

result = cochrans_q(data)

print(f"Cochran's Q test statistic: {result.statistic:.4f}")
print(f"Cochran's Q test p-value: {result.pvalue:.4f}")
if result.pvalue < 0.05:
    print("The proportions are significantly different.")
else:
    print("The proportions are not significantly different.")

# Asia

In [None]:
import pandas as pd

# Assuming your dataframe is already loaded as 'df'

def normalize_probs(df, answer_col, prob_col, new_col):
    df[new_col] = df.apply(
        lambda row: row[prob_col] if row[answer_col] == 1 else (1 - row[prob_col]),
        axis=1
    )
    return df

# Apply normalization for each verb
normalize_probs(df_normalized, 'affect_answer_binar', 'affect_answer', 'affect_support_TRUE')
normalize_probs(df_normalized, 'cause_answer_binar', 'cause_answer', 'cause_support_TRUE')
normalize_probs(df_normalized, 'old_cause_answer_binar', 'old_cause_answer', 'old_cause_support_TRUE')
normalize_probs(df_normalized, 'old_affect_answer_binar', 'old_affect_answer', 'old_affect_support_TRUE')


from scipy.stats import friedmanchisquare


friedman_stat2, friedman_p2 = friedmanchisquare(
    df_normalized['affect_support_TRUE'],
    df_normalized['cause_support_TRUE'],
    df_normalized['old_cause_support_TRUE'],
    df_normalized['old_affect_support_TRUE'])

print(f"Friedman test p-value: {friedman_p2:.4f}")
if friedman_p2 < 0.05:
    print("The distributions are significantly different.")
else:
    print("The distributions are not significantly different.")



In [43]:
from scipy.stats import friedmanchisquare
from scipy.stats import wilcoxon

def significant_check (df_combined, column_name, p_value, data):
    
    df_results = pd.DataFrame(columns=["verb1", "verb2", "statistic", "p_value", "significant", "data"])
            
    if p_value < 0.05:
        print("Friedman test is significant, performing post-hoc tests")
        # Perform Wilcoxon signed-rank tests for all pairs of verbs
        for i in range(len(verb_list)):
            for j in range(i + 1, len(verb_list)):
                verb1 = verb_list[i]
                verb2 = verb_list[j]
                stat, p = wilcoxon(
                    df_combined[df_combined["verb"] == verb1][column_name],
                    df_combined[df_combined["verb"] == verb2][column_name]
                )

                df_results = pd.concat([df_results, pd.DataFrame({
                    "verb1": [verb1],
                    "verb2": [verb2],
                    "statistic": [stat],
                    "p_value": [p],
                    "significant": [p < 0.05],
                    "data": [data]
                })], ignore_index=True)

 
                # print(f"Wilcoxon test between {verb1} and {verb2}: statistic={stat}, p-value={p}")
                if p < 0.05:
                    print(f"Significant difference between {verb1} and {verb2}")
    else:
        print("The distributions are not significantly different., no post-hoc tests needed")
    return df_results   

In [45]:
# path = "results/asia/"
# files = glob.glob(os.path.join(path, "*.csv"))

# df_asia = pd.DataFrame()

# # df_asia = pd.concat([clean_dataframe(pd.read_csv(file)) for file in files], ignore_index=True)

# for file in files:
#     print(file)
#     df = pd.read_csv(file)
#     df = clean_dataframe(df)
#     df_asia = pd.concat([df_asia, df], ignore_index=True)

# verb_list = list(df_combined["verb"].unique())
# print("Unique verbs:" , (verb_list))    

# # Extract corresponding answer_binary values for the Friedman test
# groups = [df_combined[df_combined["verb"] == verb]["probability"] for verb in verb_list]

# # Run the Friedman test
# friedman_stat, p_value = friedmanchisquare(*groups)
# print("Friedman test statistic:", friedman_stat)
# print("Friedman test p-value:", p_value)

# # Perform post-hoc or significance check
# significant_check(df_combined, "probability", p_value, "asia")

path_list = ["results/asia/", "results/cancer/", "results/medicine/"]

df_combined = pd.DataFrame()
df_results = pd.DataFrame()

for path in path_list:
    print(path)
    files = glob.glob(os.path.join(path, "*.csv"))
    for file in files:
        print(file)
        df = pd.read_csv(file)
        df = clean_dataframe(df)
        df_combined = pd.concat([df_combined, df], ignore_index=True)

    verb_list = list(df_combined["verb"].unique())

    groups = [df_combined[df_combined["verb"] == verb]["probability"] for verb in verb_list]
    # Run the Friedman test
    friedman_stat, p_value = friedmanchisquare(*groups)

    print("Friedman test statistic:", friedman_stat)
    print("Friedman test p-value:", p_value)

    # Perform post-hoc or significance check
    df_new  = significant_check(df_combined, "probability", p_value, path)
    df_results = pd.concat([df_results, df_new], ignore_index=True)


results/asia/
results/asia\causal_relationships_affect.csv
results/asia\causal_relationships_cause.csv
results/asia\causal_relationships_increase_the_chance_.csv
results/asia\causal_relationships_influence.csv
results/asia\causal_relationships_lead_to.csv
results/asia\causal_relationships_raise_the_risk.csv
results/asia\causal_relationships_result_in.csv
Friedman test statistic: 20.623880597014924
Friedman test p-value: 0.0021429213755423692
Friedman test is significant, performing post-hoc tests
Significant difference between cause and influence
Significant difference between cause and raise_the_risk_of
Significant difference between influence and lead_to
Significant difference between influence and result_in
results/cancer/
results/cancer\causal_relationships_affect.csv
results/cancer\causal_relationships_cause.csv
results/cancer\causal_relationships_increase the chance of.csv
results/cancer\causal_relationships_influence.csv
results/cancer\causal_relationships_lead to.csv


  df_results = pd.concat([df_results, pd.DataFrame({
  df_results = pd.concat([df_results, pd.DataFrame({


results/cancer\causal_relationships_raise the risk of.csv
results/cancer\causal_relationships_result in.csv
Friedman test statistic: 29.00959692898247
Friedman test p-value: 6.0583049865279704e-05
Friedman test is significant, performing post-hoc tests
Significant difference between cause and influence
Significant difference between cause and raise_the_risk_of
Significant difference between cause and result_in
Significant difference between influence and lead_to
Significant difference between influence and result_in
results/medicine/
results/medicine\causal_relationships_affect.csv
results/medicine\causal_relationships_cause.csv
results/medicine\causal_relationships_increase the chance of.csv
results/medicine\causal_relationships_influence.csv
results/medicine\causal_relationships_lead to.csv
results/medicine\causal_relationships_raise the risk of.csv
results/medicine\causal_relationships_result in.csv
Friedman test statistic: 51.6858982451716
Friedman test p-value: 2.1566644240983773e

  df_results = pd.concat([df_results, pd.DataFrame({


In [48]:
display(df_results[df_results["data"] == "results/asia/"],
df_results[df_results["data"] == "results/cancer/"],
df_results[df_results["data"] == "results/medicine/"])

Unnamed: 0,verb1,verb2,statistic,p_value,significant,data
0,affect,cause,441.0,0.276817,False,results/asia/
1,affect,increase_the_chance_of,446.0,0.744298,False,results/asia/
2,affect,influence,385.5,0.741266,False,results/asia/
3,affect,lead_to,332.5,0.136557,False,results/asia/
4,affect,raise_the_risk_of,489.0,0.573534,False,results/asia/
5,affect,result_in,361.5,0.118948,False,results/asia/
6,cause,increase_the_chance_of,455.5,0.483958,False,results/asia/
7,cause,influence,362.0,0.032365,True,results/asia/
8,cause,lead_to,403.0,0.397569,False,results/asia/
9,cause,raise_the_risk_of,384.5,0.036837,True,results/asia/


Unnamed: 0,verb1,verb2,statistic,p_value,significant,data
21,affect,cause,763.5,0.13528,False,results/cancer/
22,affect,increase_the_chance_of,836.5,0.714259,False,results/cancer/
23,affect,influence,801.5,0.842403,False,results/cancer/
24,affect,lead_to,689.0,0.274522,False,results/cancer/
25,affect,raise_the_risk_of,891.5,0.862631,False,results/cancer/
26,affect,result_in,747.5,0.299174,False,results/cancer/
27,cause,increase_the_chance_of,780.5,0.32207,False,results/cancer/
28,cause,influence,673.0,0.014069,True,results/cancer/
29,cause,lead_to,681.5,0.249119,False,results/cancer/
30,cause,raise_the_risk_of,693.5,0.013251,True,results/cancer/


Unnamed: 0,verb1,verb2,statistic,p_value,significant,data
42,affect,cause,3467.5,0.036286,True,results/medicine/
43,affect,increase_the_chance_of,3632.0,0.760039,False,results/medicine/
44,affect,influence,3111.0,0.531397,False,results/medicine/
45,affect,lead_to,3362.0,0.200563,False,results/medicine/
46,affect,raise_the_risk_of,4131.0,0.768673,False,results/medicine/
47,affect,result_in,3955.5,0.912699,False,results/medicine/
48,cause,increase_the_chance_of,3567.5,0.062015,False,results/medicine/
49,cause,influence,2980.0,0.001358,True,results/medicine/
50,cause,lead_to,3035.0,0.049335,True,results/medicine/
51,cause,raise_the_risk_of,3721.5,0.005778,True,results/medicine/


In [51]:

df_filter_true = df_results[df_results["significant"] == True]
print(df_filter_true.verb1.unique())
df_filter_true

['cause' 'influence' 'affect']


Unnamed: 0,verb1,verb2,statistic,p_value,significant,data
7,cause,influence,362.0,0.032365,True,results/asia/
9,cause,raise_the_risk_of,384.5,0.036837,True,results/asia/
15,influence,lead_to,247.0,0.006316,True,results/asia/
17,influence,result_in,270.5,0.014396,True,results/asia/
28,cause,influence,673.0,0.014069,True,results/cancer/
30,cause,raise_the_risk_of,693.5,0.013251,True,results/cancer/
31,cause,result_in,576.0,0.046511,True,results/cancer/
36,influence,lead_to,511.5,0.007717,True,results/cancer/
38,influence,result_in,549.5,0.027691,True,results/cancer/
42,affect,cause,3467.5,0.036286,True,results/medicine/


In [None]:
# df_results.to_csv("results/sensitivity_verb.csv", index=False)

In [None]:
path_list = ["results/asia/", "results/cancer/", "results/medicine/"]

df_combined_2 = pd.DataFrame()
df_results_2 = pd.DataFrame()

for path in path_list:
    print(path)
    files = glob.glob(os.path.join(path, "*.csv"))
    for file in files:
        # print(file)
        df = pd.read_csv(file)
        df = clean_dataframe(df)
        df_combined_2 = pd.concat([df_combined_2, df], ignore_index=True)
        
    #group by the verb and calculate the mean probability and std
    df_combined_2 = df_combined_2.groupby("verb").agg({"probability": ["mean", "std"]}).reset_index()
    df_combined_2.columns = ["verb", "mean_probability", "std_probability"]
    df_combined_2["data"] = path.split("/")[-2]  # Extract the folder name as data


    df_results_2 = pd.concat([df_results_2, df_combined_2], ignore_index=True)


results/asia/
results/cancer/
results/medicine/


In [63]:
display(df_results_2[df_results_2["data"] == "asia"],
df_results_2[df_results_2["data"] == "cancer"],
df_results_2[df_results_2["data"] == "medicine"])

Unnamed: 0,verb,mean_probability,std_probability,data
0,affect,0.994768,0.021891,asia
1,cause,0.991857,0.043287,asia
2,increase_the_chance_of,0.970307,0.107637,asia
3,influence,0.996214,0.020654,asia
4,lead_to,0.994172,0.0276,asia
5,raise_the_risk_of,0.996012,0.015194,asia
6,result_in,0.992021,0.030638,asia


Unnamed: 0,verb,mean_probability,std_probability,data
7,affect,0.997565,0.010592,cancer
8,cause,0.997849,0.008361,cancer
9,increase_the_chance_of,0.962147,0.108026,cancer
10,influence,0.999788,0.00059,cancer
11,lead_to,0.999675,0.001171,cancer
12,raise_the_risk_of,0.995934,0.013791,cancer
13,result_in,0.983407,0.07165,cancer


Unnamed: 0,verb,mean_probability,std_probability,data
14,affect,0.996561,0.023388,medicine
15,cause,0.995238,0.032632,medicine
16,increase_the_chance_of,0.990724,0.040776,medicine
17,influence,0.99892,0.005729,medicine
18,lead_to,0.990238,0.054789,medicine
19,raise_the_risk_of,0.983231,0.076229,medicine
20,result_in,0.997544,0.019288,medicine
