<h1 style="color:#005F6A">Imports and setup</h1>

In [None]:
# Imports
import pandas as pd
from src.data_cleaning import preprocessing_for_arm
from src.analysis import find_frequent_itemsets_and_rules
from src.eda import visualize_rules
import warnings
warnings.filterwarnings('ignore', category=DeprecationWarning)

<h1 style="color:#005F6A">Read Data</h1>

In [None]:
# Read datasets
ct = pd.read_parquet("/cluster/home/t128190uhn/datasets/clinical_trials/cleaning/ct.parquet")
ct_high_grades = pd.read_parquet("/cluster/home/t128190uhn/datasets/clinical_trials/cleaning/ct_high_grades.parquet")

<h1 style="color:#005F6A">Association Rule Mining</h1>

<h2 style="color:#5F9EA0">Association Rules Among AE Categories</h2>

<h3 style="color:#A98BC7">Events of All Grades</h3>

In [None]:
# Step 1: Preprocessing
ct_soc_basket_matrix = preprocessing_for_arm(ct, level="mapped_soc")

In [None]:
# Step 2: Mine frequent itemsets and association rules
ct_soc_freq_items, ct_soc_rules = find_frequent_itemsets_and_rules(
    ct_soc_basket_matrix,
    algo="fpgrowth", # or "apriori"
    min_support=0.1,
    min_threshold=0.6
)

In [None]:
# Step 3: Visualization
fig1, ax1 = visualize_rules(ct_soc_rules, cmap_name="magma", clip_light_fraction=0.35)
fig1.savefig("/cluster/home/t128190uhn/datasets/clinical_trials/association_rule_mining/ct_soc_rules.png", dpi=400, bbox_inches="tight")

<h3 style="color:#A98BC7">Grade 3+ Events</h3>

In [None]:
# Step 1: Preprocessing
cth_soc_basket_matrix = preprocessing_for_arm(ct_high_grades, level="mapped_soc")

In [None]:
# Step 2: Mine frequent itemsets and association rules
cth_soc_freq_items, cth_soc_rules = find_frequent_itemsets_and_rules(
    cth_soc_basket_matrix,
    algo="fpgrowth", # or "apriori"
    min_support=0.01,
    min_threshold=0.6
)

In [None]:
# Step 3: Visualization
fig2, ax2 = visualize_rules(cth_soc_rules, cmap_name="magma", clip_light_fraction=0.35)
fig2.savefig("/cluster/home/t128190uhn/datasets/clinical_trials/association_rule_mining/cth_soc_rules.png", dpi=400, bbox_inches="tight")

<h2 style="color:#5F9EA0">Association Rules Among AE Terms</h2>

<h3 style="color:#A98BC7">Events of All Grades</h3>

In [None]:
# Step 1: Preprocessing
ct_term_basket_matrix = preprocessing_for_arm(ct, level="mapped_term")

In [None]:
# Step 2: Mine frequent itemsets and association rules
ct_term_freq_items, ct_term_rules = find_frequent_itemsets_and_rules(
    ct_term_basket_matrix,
    algo="fpgrowth", # or "apriori"
    min_support=0.05,
    min_threshold=0.6
)

In [None]:
# Step 3: Visualization
fig3, ax3 = visualize_rules(ct_term_rules, cmap_name="magma", clip_light_fraction=0.35)
fig3.savefig("/cluster/home/t128190uhn/datasets/clinical_trials/association_rule_mining/ct_term_rules.png", dpi=400, bbox_inches="tight")

<h3 style="color:#A98BC7">Grade 3+ Events</h3>

In [None]:
# Step 1: Preprocessing
cth_term_basket_matrix = preprocessing_for_arm(ct_high_grades, level="mapped_term")

In [None]:
# Step 2: Mine frequent itemsets and association rules
cth_term_freq_items, cth_term_rules = find_frequent_itemsets_and_rules(
    cth_term_basket_matrix,
    algo="fpgrowth", # or "apriori"
    min_support=0.005,
    min_threshold=0.6
)

In [None]:
# Step 3: Visualization
fig4, ax4 = visualize_rules(cth_term_rules, cmap_name="magma", clip_light_fraction=0.35)
fig4.savefig("/cluster/home/t128190uhn/datasets/clinical_trials/association_rule_mining/cth_term_rules.png", dpi=400, bbox_inches="tight")