In [None]:
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import numpy as np

Balanced dataset

In [None]:
dataset= pd.read_csv('../Datasets/balanced_data.csv')

In [None]:
dataset.drop(columns=[ 'Prev_Courses_Category'], inplace=True)

In [None]:
dataset_encoded = pd.get_dummies(dataset)

In [None]:


train_dataset, test_dataset = train_test_split(dataset_encoded, test_size=0.3, random_state=42)

In [None]:
from mlxtend.frequent_patterns import apriori, association_rules
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import pandas as pd

results = []

for thresh in [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]:  # 0.1 â†’ 1.0
    print(f"\n=== Running for min_threshold = {thresh:.1f} ===")

    
    frequent_itemsets = apriori(train_dataset, min_support=0.05, use_colnames=True)

 
    rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=thresh)
    print(rules)

    
    rules_recommended = rules[
        rules["consequents"].apply(lambda x: len(x) == 1 and "Recommendation_Label_Recommended" in x)
    ].copy()

    print(f"Total Rules: {len(rules)} | RHS = 'Recommended': {len(rules_recommended)}")

   
    predictions_bal = []
    covered_bal = 0

    for _, row in test_dataset.iterrows():
        row_items = set(row[row == True].index)
        matched = False

        for _, rule in rules_recommended.iterrows():
            antecedent = set(rule["antecedents"])
            if antecedent.issubset(row_items):
                predictions_bal.append(1)
                matched = True
                covered_bal += 1
                break

        if not matched:
            predictions_bal.append(0)


    label_cols = [c for c in test_dataset.columns if "Recommendation_Label" in c]
    if "Recommendation_Label_Recommended" in label_cols:
        y_true = test_dataset["Recommendation_Label_Recommended"].astype(int)
    else:
        y_true = test_dataset[label_cols[0]].astype(int)

    y_pred = pd.Series(predictions_bal)

    acc = accuracy_score(y_true, y_pred)
    prec = precision_score(y_true, y_pred, zero_division=0)
    rec = recall_score(y_true, y_pred, zero_division=0)
    f1 = f1_score(y_true, y_pred, zero_division=0)
    coverage = covered_bal / len(test_dataset)

    results.append({
        "Confidence_Threshold": thresh,
        "Accuracy": round(acc, 3),
        "Precision": round(prec, 3),
        "Recall": round(rec, 3),
        "F1": round(f1, 3),
        "Coverage": round(coverage, 3),
        "rules_recom": len(rules_recommended)
    })

results_df = pd.DataFrame(results)
print("\n=== Summary over all thresholds ===")
print(results_df)



=== Running for min_threshold = 0.1 ===
                                            antecedents  \
0                                      (Course_ID_C001)   
1                      (Interaction_Rate_Category_high)   
2                      (Interaction_Rate_Category_high)   
3                                      (Course_ID_C002)   
4                                      (Course_ID_C003)   
...                                                 ...   
1193  (Learning_Efficiency_Category_high, Resource_T...   
1194                              (Device_Type_desktop)   
1195             (Recommendation_Label_Not_Recommended)   
1196                (Learning_Efficiency_Category_high)   
1197                              (Resource_Type_video)   

                                            consequents  antecedent support  \
0                      (Interaction_Rate_Category_high)            0.210004   
1                                      (Course_ID_C001)            0.332782   
2            

In [None]:
results_df[['Confidence_Threshold', 'rules_recom']].to_csv("Apriori_rules.csv")

In [None]:
train_dataset["Recommendation_Label_Recommended"].mean(), test_dataset["Recommendation_Label_Recommended"].mean()


(np.float64(0.5961140967341877), np.float64(0.630665380906461))

In [None]:
dataset_encoded["Recommendation_Label_Recommended"].mean()

np.float64(0.6064814814814815)