# Analyse output of AnyBURL

In [104]:
import os
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

In [105]:
TH = ["regular", "study_mod", "var_mod"]
FOLDER_RULES = "./rules/"
COLUMNS = ['nb1', 'nb2', 'score', 'rule']
PREDS = ['hasNegativeEffectOn', 'hasPositiveEffectOn']

DATA = {
    th: pd.read_csv(os.path.join(FOLDER_RULES, th, "rules-100"), sep="\t").dropna() for th in TH
}
for th, data in DATA.items():
    data.columns = COLUMNS
    data["th"] = th

In [111]:
rules_effect = pd.DataFrame(columns=COLUMNS+["th"])
for th, data in DATA.items():
    data_f = data[data.rule.apply(lambda x: any(f in x for f in PREDS))]
    rules_effect = pd.concat([rules_effect, data_f]) if rules_effect.shape[0] > 0 else data_f
    to_p = [data.shape[0], data_f.shape[0]]
    print(f"{th}", " & ".join([f"{x:,}" for x in to_p]))
rules_effect["len_rule"] = rules_effect.rule.apply(lambda x: len(x.split(" <= ")[1].split(" ")))

print("======")
len_rules = sorted(rules_effect.len_rule.unique())
for th in TH:
    rule_n = [rules_effect[(rules_effect.th==th)& (rules_effect.len_rule==l)].shape[0] for l in len_rules]
    print(f"{th}", " & ".join([f"{x:,}" for x in rule_n]))

regular 2,482 & 1,095
study_mod 10,565 & 2,089
var_mod 2,517 & 659
regular 1,055 & 40
study_mod 2,041 & 48
var_mod 499 & 160


In [108]:
sorted(rules_effect.len_rule.unique())

[1, 3]

In [23]:
rules_effect


Unnamed: 0,nb1,nb2,score,rule,th
1,1073,15,0.013979,https://data.cooperationdatabank.org/vocab/pro...,regular
2,25,5,0.200000,https://data.cooperationdatabank.org/vocab/pro...,regular
3,248,43,0.173387,https://data.cooperationdatabank.org/vocab/pro...,regular
4,177,53,0.299435,https://data.cooperationdatabank.org/vocab/pro...,regular
6,28,17,0.607143,https://data.cooperationdatabank.org/vocab/pro...,regular
...,...,...,...,...,...
2504,1197,83,0.069340,https://data.cooperationdatabank.org/vocab/pro...,var_mod
2507,1991,23,0.011552,https://data.cooperationdatabank.org/vocab/pro...,var_mod
2513,13,5,0.384615,https://data.cooperationdatabank.org/vocab/pro...,var_mod
2515,20,13,0.650000,https://data.cooperationdatabank.org/vocab/pro...,var_mod


In [68]:
import plotly.graph_objects as go
import pandas as pd

# Assuming rules_effect is a DataFrame with 'score' and 'th' columns

fig = go.Figure()

# Define color palette with distinct colors for each 'th' value
color_palette = px.colors.qualitative.Safe  # Using a safe qualitative color palette

# Create histogram for each unique 'th' value
for i, th_value in enumerate(rules_effect['th'].unique()):
    filtered_df = rules_effect[rules_effect['th'] == th_value]
    fig.add_trace(go.Histogram(
        x=filtered_df['score'],
        name=f"{th_value}",
        marker_color=color_palette[i % len(color_palette)],  # Assigning color from palette
        xbins=dict(start=0, end=1, size=0.1),
        histnorm='percent',
        opacity=0.75,  # Adjusting opacity for better visibility
    ))

# Update layout
fig.update_layout(
    barmode='group',
    width=600,
    height=400,
    title_text='Histogram of Rule Scores',
    xaxis=dict(title='Rule Score', dtick=0.1, range=[0, 1]),
    yaxis=dict(title='Percentage'),
    legend=dict(title='Group'),
)

# Save figure as PDF
fig.write_image("../visualisation/anyburl_rules_percent.pdf", format='pdf')

# Show figure
fig.show()


In [69]:
import plotly.graph_objects as go
import pandas as pd

# Assuming rules_effect is a DataFrame with 'score' and 'th' columns
# and filtering scores >= 0.1

fig = go.Figure()

# Define color palette with distinct colors for each 'th' value
color_palette = px.colors.qualitative.Safe  # Using a safe qualitative color palette

# Create histogram for each unique 'th' value
for i, th_value in enumerate(rules_effect['th'].unique()):
    filtered_df = rules_effect[(rules_effect['th'] == th_value) & (rules_effect['score'] >= 0.1)]
    fig.add_trace(go.Histogram(
        x=filtered_df['score'],
        name=f"{th_value}",
        marker_color=color_palette[i % len(color_palette)],  # Assigning color from palette
        xbins=dict(start=0.1, end=1, size=0.1),
        opacity=0.75,  # Adjusting opacity for better visibility
    ))

# Update layout
fig.update_layout(
    barmode='group',
    width=600,
    height=400,
    title_text='Histogram of Rule Scores',
    xaxis=dict(title='Rule Score', dtick=0.1, range=[0.1, 1]),
    yaxis=dict(title='Count'),
    legend=dict(title='Group'),
)

# Save figure as PDF
fig.write_image("../visualisation/anyburl_rules_count.pdf", format='pdf')

# Show figure
fig.show()


In [73]:
test = rules_effect[rules_effect.th == "regular"].sort_values(by="score", ascending=False)[:3]
test

Unnamed: 0,nb1,nb2,score,rule,th
1161,8,8,1.0,https://data.cooperationdatabank.org/vocab/pro...,regular
1488,13,11,0.846154,https://data.cooperationdatabank.org/vocab/pro...,regular
345,6,5,0.833333,https://data.cooperationdatabank.org/vocab/pro...,regular


In [92]:
replace = [
    ("https://data.cooperationdatabank.org/vocab/prop/", "cp:"),
    ("https://data.cooperationdatabank.org/id/dependentvariable/", ""),
    ("https://data.cooperationdatabank.org/id/", "id:"),
    ("_", "\\_")
]

template = """
\\begin{figure}[h]
\\centering
\\begin{tcolorbox}[colback=white, colframe=black]
    <data>
\\end{tcolorbox}
\\caption{Top 3 Rules Learned for <th> Hypothesis}
\\label{fig:top-rules-<th>}
\\end{figure}
"""

def clean(text):
    for (old, new) in replace:
        text = text.replace(old, new)
    return text.replace("<=", "\\\\<=")

for th in TH:
    top_rules = rules_effect[rules_effect.th == th].sort_values(by="score", ascending=False)[:3]
    data = "\\\\\n".join(["\\texttt{" + clean(row.rule) + f"({str(round(row.score, 2))})" + "}" for _, row in top_rules.iterrows()])
    figure = template.replace("<th>", th.replace("_", "\\_")).replace("<data>", data)
    print(figure)


    


\begin{figure}[h]
\centering
\begin{tcolorbox}[colback=white, colframe=black]
    \texttt{cp:hasPositiveEffectOn(X,cooperation) \\<= cp:sivv2(X,id:intergroupcomp/intergroup\_prisoner's\_dilemma)(1.0)}\\
\texttt{cp:hasPositiveEffectOn(X,cooperation) \\<= cp:sivv2(X,id:motivationalorientation/competitive)(0.85)}\\
\texttt{cp:hasNegativeEffectOn(X,contributions) \\<= cp:sivv2(X,id:uncertaintytarget/loss)(0.83)}
\end{tcolorbox}
\caption{Top 3 Rules Learned for regular Hypothesis}
\label{fig:top-rules-regular}
\end{figure}


\begin{figure}[h]
\centering
\begin{tcolorbox}[colback=white, colframe=black]
    \texttt{cp:hasNegativeEffectOn(X,cooperation) \\<= cp:sivv2(X,id:hormonesadministration/placebo)(1.0)}\\
\texttt{cp:hasNegativeEffectOn(X,contributions) \\<= cp:sivv1(X,id:leadercharacteristic/positive\_emotion)(1.0)}\\
\texttt{cp:hasNegativeEffectOn(X,cooperation) \\<= cp:sivv1(X,id:continuationprobabilitylevel/low)(1.0)}
\end{tcolorbox}
\caption{Top 3 Rules Learned for study\_mod Hypoth

In [95]:
rules_effect.rule.values[0]

['https://data.cooperationdatabank.org/vocab/prop/hasNegativeEffectOn(X,A)']

In [97]:
rules_effect["len_rule"] = rules_effect.rule.apply(lambda x: len(x.split(" <= ")[1].split(" ")))

In [99]:
rules_effect.groupby(["th", "len_rule"]).agg({"rule": "count"})

Unnamed: 0_level_0,Unnamed: 1_level_0,rule
th,len_rule,Unnamed: 2_level_1
regular,1,1055
regular,3,40
study_mod,1,2041
study_mod,3,48
var_mod,1,499
var_mod,3,160


In [101]:
rules_effect[rules_effect.len_rule==3].sort_values(by="score", ascending=False)

Unnamed: 0,nb1,nb2,score,rule,th,len_rule
713,245,104,0.424490,https://data.cooperationdatabank.org/vocab/pro...,var_mod,3
1329,69,29,0.420290,https://data.cooperationdatabank.org/vocab/pro...,var_mod,3
1327,365,151,0.413699,https://data.cooperationdatabank.org/vocab/pro...,var_mod,3
899,126,52,0.412698,https://data.cooperationdatabank.org/vocab/pro...,var_mod,3
192,394,162,0.411168,https://data.cooperationdatabank.org/vocab/pro...,var_mod,3
...,...,...,...,...,...,...
602,2000,10,0.005000,https://data.cooperationdatabank.org/vocab/pro...,regular,3
1967,2000,8,0.004000,https://data.cooperationdatabank.org/vocab/pro...,regular,3
2573,2000,8,0.004000,https://data.cooperationdatabank.org/vocab/pro...,study_mod,3
8499,2000,7,0.003500,https://data.cooperationdatabank.org/vocab/pro...,study_mod,3


In [103]:
fig = go.Figure()

# Define color palette with distinct colors for each 'th' value
color_palette = px.colors.qualitative.Safe  # Using a safe qualitative color palette

# Create grouped bar chart for each unique 'th' value
for i, th_value in enumerate(rules_effect['th'].unique()):
    filtered_df = rules_effect[(rules_effect['th'] == th_value) & (rules_effect['score'] >= 0.1)]
    
    # Group by 'len_rule' and calculate counts
    len_rule_counts = filtered_df.groupby('len_rule').size().reset_index(name='count')
    
    fig.add_trace(go.Bar(
        x=len_rule_counts['len_rule'],
        y=len_rule_counts['count'],
        name=f"{th_value}",
        marker_color=color_palette[i % len(color_palette)],  # Assigning color from palette
        opacity=0.75,  # Adjusting opacity for better visibility
        offsetgroup=i,  # Group bars by 'th_value'
    ))

# Update layout
fig.update_layout(
    barmode='group',  # Grouped bars
    width=800,
    height=600,
    title_text='Grouped Bar Chart of Rule Scores by Length',
    xaxis=dict(title='Length of Rule'),
    yaxis=dict(title='Count'),
    legend=dict(title='Group'),
)