# Analyse output of AnyBURL

In [1]:
import os
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

In [2]:
TH = ["regular", "study_mod", "var_mod"]
FOLDER_RULES = "./rules/"
COLUMNS = ['nb1', 'nb2', 'score', 'rule']
PREDS = ['hasNegativeEffectOn', 'hasPositiveEffectOn']

DATA = {
    th: pd.read_csv(os.path.join(FOLDER_RULES, th, "rules-100"), sep="\t").dropna() for th in TH
}
for th, data in DATA.items():
    data.columns = COLUMNS
    data["th"] = th

In [3]:
rules_effect = pd.DataFrame(columns=COLUMNS+["th"])
for th, data in DATA.items():
    data_f = data[data.rule.apply(lambda x: any(f in x for f in PREDS))]
    rules_effect = pd.concat([rules_effect, data_f]) if rules_effect.shape[0] > 0 else data_f
    to_p = [data.shape[0], data_f.shape[0]]
    print(f"{th}", " & ".join([f"{x:,}" for x in to_p]))
rules_effect["len_rule"] = rules_effect.rule.apply(lambda x: len(x.split(" <= ")[1].split(" ")))

print("======")
len_rules = sorted(rules_effect.len_rule.unique())
for th in TH:
    rule_n = [rules_effect[(rules_effect.th==th)& (rules_effect.len_rule==l)].shape[0] for l in len_rules]
    print(f"{th}", " & ".join([f"{x:,}" for x in rule_n]))

regular 2,482 & 1,095
study_mod 10,673 & 2,088
var_mod 2,517 & 659
regular 1,055 & 40
study_mod 2,040 & 48
var_mod 499 & 160


In [7]:
rules_effect


Unnamed: 0,nb1,nb2,score,rule,th,len_rule
1,1073,15,0.013979,https://data.cooperationdatabank.org/vocab/pro...,regular,1
2,25,5,0.200000,https://data.cooperationdatabank.org/vocab/pro...,regular,1
3,177,53,0.299435,https://data.cooperationdatabank.org/vocab/pro...,regular,1
4,248,43,0.173387,https://data.cooperationdatabank.org/vocab/pro...,regular,1
6,28,17,0.607143,https://data.cooperationdatabank.org/vocab/pro...,regular,1
...,...,...,...,...,...,...
2504,1715,100,0.058309,https://data.cooperationdatabank.org/vocab/pro...,var_mod,3
2508,1866,21,0.011254,https://data.cooperationdatabank.org/vocab/pro...,var_mod,3
2512,13,5,0.384615,https://data.cooperationdatabank.org/vocab/pro...,var_mod,1
2515,34,7,0.205882,https://data.cooperationdatabank.org/vocab/pro...,var_mod,1


In [8]:
import plotly.graph_objects as go
import pandas as pd

# Assuming rules_effect is a DataFrame with 'score' and 'th' columns

fig = go.Figure()

# Define color palette with distinct colors for each 'th' value
color_palette = px.colors.qualitative.Safe  # Using a safe qualitative color palette

# Create histogram for each unique 'th' value
for i, th_value in enumerate(rules_effect['th'].unique()):
    filtered_df = rules_effect[rules_effect['th'] == th_value]
    fig.add_trace(go.Histogram(
        x=filtered_df['score'],
        name=f"{th_value}",
        marker_color=color_palette[i % len(color_palette)],  # Assigning color from palette
        xbins=dict(start=0, end=1, size=0.1),
        histnorm='percent',
        opacity=0.75,  # Adjusting opacity for better visibility
    ))

# Update layout
fig.update_layout(
    barmode='group',
    width=600,
    height=400,
    title_text='Histogram of Rule Scores',
    xaxis=dict(title='Rule Score', dtick=0.1, range=[0, 1]),
    yaxis=dict(title='Percentage'),
    legend=dict(title='Group'),
)

# Save figure as PDF
fig.write_image("../visualisations/anyburl_rules_percent.pdf", format='pdf')

# Show figure
fig.show()


In [9]:
import plotly.graph_objects as go
import pandas as pd

# Assuming rules_effect is a DataFrame with 'score' and 'th' columns
# and filtering scores >= 0.1

fig = go.Figure()

# Define color palette with distinct colors for each 'th' value
color_palette = px.colors.qualitative.Safe  # Using a safe qualitative color palette

# Create histogram for each unique 'th' value
for i, th_value in enumerate(rules_effect['th'].unique()):
    filtered_df = rules_effect[(rules_effect['th'] == th_value) & (rules_effect['score'] >= 0.1)]
    fig.add_trace(go.Histogram(
        x=filtered_df['score'],
        name=f"{th_value}",
        marker_color=color_palette[i % len(color_palette)],  # Assigning color from palette
        xbins=dict(start=0.1, end=1, size=0.1),
        opacity=0.75,  # Adjusting opacity for better visibility
    ))

# Update layout
fig.update_layout(
    barmode='group',
    width=600,
    height=400,
    title_text='Histogram of Rule Scores',
    xaxis=dict(title='Rule Score', dtick=0.1, range=[0.1, 1]),
    yaxis=dict(title='Count'),
    legend=dict(title='Group'),
)

# Save figure as PDF
fig.write_image("../visualisations/anyburl_rules_count.pdf", format='pdf')

# Show figure
fig.show()


In [13]:
test = rules_effect[rules_effect.th == "study_mod"].sort_values(by="score", ascending=False)[:3].rule.values
test

array(['https://data.cooperationdatabank.org/vocab/prop/hasNegativeEffectOn(X,https://data.cooperationdatabank.org/id/dependentvariable/cooperation) <= http://www.w3.org/2000/01/rdf-schema#subPropertyOf(X,https://data.cooperationdatabank.org/vocab/prop/continuationProbabilityLevel)',
       'https://data.cooperationdatabank.org/vocab/prop/hasPositiveEffectOn(X,https://data.cooperationdatabank.org/id/dependentvariable/contributions) <= https://data.cooperationdatabank.org/vocab/prop/sivv1(X,https://data.cooperationdatabank.org/id/religiouslevel/high)',
       'https://data.cooperationdatabank.org/vocab/prop/hasNegativeEffectOn(X,https://data.cooperationdatabank.org/id/dependentvariable/cooperation) <= https://data.cooperationdatabank.org/vocab/prop/sivv2(X,https://data.cooperationdatabank.org/id/emotion/happiness)'],
      dtype=object)

In [15]:
replace = [
    ("https://data.cooperationdatabank.org/vocab/prop/", "cp:"),
    ("https://data.cooperationdatabank.org/id/dependentvariable/", ""),
    ("https://data.cooperationdatabank.org/id/", "id:"),
    ("_", "\\_"),
    ('http://www.w3.org/2000/01/rdf-schema#', 'rdfs:')
]

template = """
\\begin{figure}[h]
\\centering
\\begin{tcolorbox}[colback=white, colframe=black]
    <data>
\\end{tcolorbox}
\\caption{Top 3 Rules Learned for <th> Hypothesis}
\\label{fig:top-rules-<th>}
\\end{figure}
"""

def clean(text):
    for (old, new) in replace:
        text = text.replace(old, new)
    return text.replace("<=", "\\\\<=")

for th in TH:
    top_rules = rules_effect[rules_effect.th == th].sort_values(by="score", ascending=False)[:3]
    data = "\\\\\n".join(["\\texttt{" + clean(row.rule) + f"({str(round(row.score, 2))})" + "}" for _, row in top_rules.iterrows()])
    figure = template.replace("<th>", th.replace("_", "-")).replace("<data>", data)
    print(figure)


    


\begin{figure}[h]
\centering
\begin{tcolorbox}[colback=white, colframe=black]
    \texttt{cp:hasPositiveEffectOn(X,cooperation) \\<= cp:sivv2(X,id:intergroupcomp/intergroup\_prisoner's\_dilemma)(1.0)}\\
\texttt{cp:hasPositiveEffectOn(X,cooperation) \\<= cp:sivv2(X,id:motivationalorientation/competitive)(0.85)}\\
\texttt{cp:hasNegativeEffectOn(X,contributions) \\<= cp:sivv1(X,id:uncertaintytarget/threshold)(0.83)}
\end{tcolorbox}
\caption{Top 3 Rules Learned for regular Hypothesis}
\label{fig:top-rules-regular}
\end{figure}


\begin{figure}[h]
\centering
\begin{tcolorbox}[colback=white, colframe=black]
    \texttt{cp:hasNegativeEffectOn(X,cooperation) \\<= rdfs:subPropertyOf(X,cp:continuationProbabilityLevel)(1.0)}\\
\texttt{cp:hasPositiveEffectOn(X,contributions) \\<= cp:sivv1(X,id:religiouslevel/high)(1.0)}\\
\texttt{cp:hasNegativeEffectOn(X,cooperation) \\<= cp:sivv2(X,id:emotion/happiness)(1.0)}
\end{tcolorbox}
\caption{Top 3 Rules Learned for study-mod Hypothesis}
\label{fig:top-r

In [77]:
print(rules_effect.shape)
rules_effect[rules_effect["head"].isin(["cp:hasPositiveEffectOn", "cp:hasNegativeEffectOn"])]

(3842, 8)


Unnamed: 0,nb1,nb2,score,rule,th,len_rule,head,tail
2,25,5,0.200000,https://data.cooperationdatabank.org/vocab/pro...,regular,1,cp:hasPositiveEffectOn,[http://www.w3.org/2000/01/rdf-schema#subPrope...
3,177,53,0.299435,https://data.cooperationdatabank.org/vocab/pro...,regular,1,cp:hasNegativeEffectOn,[https://data.cooperationdatabank.org/vocab/pr...
4,248,43,0.173387,https://data.cooperationdatabank.org/vocab/pro...,regular,1,cp:hasNegativeEffectOn,[http://www.w3.org/2000/01/rdf-schema#subPrope...
6,28,17,0.607143,https://data.cooperationdatabank.org/vocab/pro...,regular,1,cp:hasPositiveEffectOn,[https://data.cooperationdatabank.org/vocab/pr...
9,25,6,0.240000,https://data.cooperationdatabank.org/vocab/pro...,regular,1,cp:hasPositiveEffectOn,[https://data.cooperationdatabank.org/vocab/pr...
...,...,...,...,...,...,...,...,...
2467,27,5,0.185185,https://data.cooperationdatabank.org/vocab/pro...,var_mod,1,cp:hasPositiveEffectOn,[http://www.w3.org/2000/01/rdf-schema#subPrope...
2478,29,10,0.344828,https://data.cooperationdatabank.org/vocab/pro...,var_mod,1,cp:hasPositiveEffectOn,[https://data.cooperationdatabank.org/vocab/pr...
2512,13,5,0.384615,https://data.cooperationdatabank.org/vocab/pro...,var_mod,1,cp:hasNegativeEffectOn,[https://data.cooperationdatabank.org/vocab/pr...
2515,34,7,0.205882,https://data.cooperationdatabank.org/vocab/pro...,var_mod,1,cp:hasPositiveEffectOn,[https://data.cooperationdatabank.org/vocab/pr...


In [65]:
rules_effect[['head', 'tail']] = rules_effect['rule'].str.split('<=', expand=True)

# Strip any leading/trailing whitespace from the new columns
rules_effect['head'] = rules_effect['head'].apply(lambda x: clean(text=x.strip().split('(')[0]))
rules_effect['tail'] = rules_effect['tail'].str.strip()
print(rules_effect.shape)
rules_effect['tail'] = rules_effect['tail'].str.split(r'\), ')

# Explode the 'tail' column to create separate rows for each condition
rules_effect_exploded = rules_effect.explode('tail').reset_index(drop=True)
rules_effect_exploded = rules_effect_exploded[rules_effect_exploded["tail"]!='']
rules_effect_exploded["tail"] = rules_effect_exploded["tail"].apply(lambda x: clean(text=x.split('(')[0]))

print(rules_effect_exploded.shape)
rules_effect_exploded.sample(10)

(3842, 8)
(4326, 8)


Unnamed: 0,nb1,nb2,score,rule,th,len_rule,head,tail
2812,2000,5,0.0025,https://data.cooperationdatabank.org/vocab/pro...,study_mod,1,cp:sivv1,cp:hasPositiveEffectOn
3914,81,29,0.358025,https://data.cooperationdatabank.org/vocab/pro...,var_mod,3,cp:hasPositiveEffectOn,cp:mod1
3562,31,17,0.548387,https://data.cooperationdatabank.org/vocab/pro...,var_mod,1,cp:hasNegativeEffectOn,cp:mod1
3734,63,5,0.079365,http://www.w3.org/2000/01/rdf-schema#subProper...,var_mod,1,rdfs:subPropertyOf,cp:hasNegativeEffectOn
229,1197,34,0.028404,https://data.cooperationdatabank.org/vocab/pro...,regular,1,cp:sivv2,cp:hasPositiveEffectOn
58,1197,66,0.055138,https://data.cooperationdatabank.org/vocab/pro...,regular,1,cp:sivv1,cp:hasPositiveEffectOn
3602,37,7,0.189189,https://data.cooperationdatabank.org/vocab/pro...,var_mod,1,cp:hasPositiveEffectOn,cp:sivv1
2706,680,327,0.480882,https://data.cooperationdatabank.org/vocab/pro...,study_mod,1,cp:hasNegativeEffectOn,cp:sivv1
1448,2000,42,0.021,https://data.cooperationdatabank.org/vocab/pro...,study_mod,1,cp:sivv1,cp:hasNegativeEffectOn
1170,38,14,0.368421,https://data.cooperationdatabank.org/vocab/pro...,regular,1,cp:hasNegativeEffectOn,cp:sivv2


In [121]:
df_violin = rules_effect_exploded[ \
    (rules_effect_exploded["head"].isin(["cp:hasPositiveEffectOn"])) & \
        (~rules_effect_exploded["tail"].isin(["cp:hasPositiveEffectOn", "cp:hasNegativeEffectOn"]))]

colorblind_palette = ['#1f77b4', '#ff7f0e']  # Blue and orange colors for two categories
color_palette = px.colors.qualitative.Safe
# Create the box plot using Plotly Express
fig = px.box(
    df_violin,
    x='tail',
    y='score',
    color='th',
    # points='all',
    # title='Box Plot with Colorblind-Friendly Palette',
    labels={
        'head': 'Effect Type',
        'score': 'Score',
        'th': 'Category'
    },
    # color_discrete_sequence=colorblind_palette  # Use colorblind-friendly palette
    color_discrete_sequence=color_palette

)

# Update layout for better readability
fig.update_layout(
    title_font_size=20,
    title_x=0.5,  # Center the title
    xaxis_title_font_size=16,
    yaxis_title_font_size=16,
    legend_title_font_size=16,
    legend_font_size=12,
    margin=dict(l=40, r=40, t=40, b=40),  # Adjust margins
    width=800,  # Set the width of the plot
    height=600  # Set the height of the plot
)

# Show the plot
fig.write_image("../visualisations/anyburl_boxplot_positive_effect.pdf", format='pdf')
fig.show()

In [122]:
df_violin = rules_effect_exploded[ \
    (rules_effect_exploded["head"].isin(["cp:hasNegativeEffectOn"])) & \
        (~rules_effect_exploded["tail"].isin(["cp:hasPositiveEffectOn", "cp:hasNegativeEffectOn"]))]

colorblind_palette = ['#1f77b4', '#ff7f0e']  # Blue and orange colors for two categories
color_palette = px.colors.qualitative.Safe
# Create the box plot using Plotly Express
fig = px.box(
    df_violin,
    x='tail',
    y='score',
    color='th',
    # points='all',
    # title='Box Plot with Colorblind-Friendly Palette',
    labels={
        'head': 'Effect Type',
        'score': 'Score',
        'th': 'Category'
    },
    # color_discrete_sequence=colorblind_palette  # Use colorblind-friendly palette
    color_discrete_sequence=color_palette

)

# Update layout for better readability
fig.update_layout(
    title_font_size=20,
    title_x=0.5,  # Center the title
    xaxis_title_font_size=16,
    yaxis_title_font_size=16,
    legend_title_font_size=16,
    legend_font_size=12,
    margin=dict(l=40, r=40, t=40, b=40),  # Adjust margins
    width=800,  # Set the width of the plot
    height=600  # Set the height of the plot
)

# Show the plot
fig.write_image("../visualisations/anyburl_boxplot_negative_effect.pdf", format='pdf')
fig.show()