In [2]:
# Re-import necessary libraries due to environment reset
import pandas as pd

# Reload the dataset
file_path = 'mooc_dataset/output_truncated.csv'
df = pd.read_csv(file_path)

# Step 1: Filter active users
active_users_df = df[(df["viewed"] == 1) | (df["explored"] == 1)]

# Step 2: Group transactions by user and include additional attributes
transactions_enriched = (
    active_users_df.groupby("userid_DI")
    .agg({
        "course_id": list,  # List of courses taken
        "age": "first",  # Age
        "final_cc_cname_DI": "first",  # Country
    })
    .reset_index()
)

# Step 3: Categorize age into age groups
transactions_enriched["age_group"] = pd.cut(
    transactions_enriched["age"], bins=[0, 18, 25, 35, 50, 100], labels=["<18", "18-25", "26-35", "36-50", "50+"]
)

In [6]:
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

# Step 1: Prepare the transactions list
transactions_list = transactions_enriched["course_id"].tolist()

# Step 2: One-hot encode the transactions
te = TransactionEncoder()
te_ary = te.fit(transactions_list).transform(transactions_list)
one_hot_encoded_df = pd.DataFrame(te_ary, columns=te.columns_)

# Step 3: Apply Apriori
frequent_itemsets = apriori(one_hot_encoded_df, min_support=0.01, use_colnames=True)

frequent_itemsets

Unnamed: 0,support,itemsets
0,0.06193,(CB22x)
1,0.401106,(CS50x)
2,0.106924,(ER22x)
3,0.455724,(PH207x)
4,0.075053,(PH278x)
5,0.013561,"(CS50x, CB22x)"
6,0.016154,"(CB22x, ER22x)"
7,0.021997,"(CS50x, ER22x)"
8,0.020404,"(PH278x, PH207x)"


In [7]:

# Step 4: Generate Association Rules
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.4, num_itemsets=len(frequent_itemsets))

# Step 5: Include demographic filters in the rules
rules_filtered = rules.copy()
rules_filtered["age_group"] = transactions_enriched["age_group"]
rules_filtered["country"] = transactions_enriched["final_cc_cname_DI"]

# Display the rules
rules_filtered


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski,age_group,country
0,,,,,,,,,,,,,,,18-25,India
1,,,,,,,,,,,,,,,26-35,India
2,,,,,,,,,,,,,,,18-25,Russian Federation
3,,,,,,,,,,,,,,,26-35,India
4,,,,,,,,,,,,,,,26-35,Other Africa
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
31999,,,,,,,,,,,,,,,26-35,Egypt
32000,,,,,,,,,,,,,,,36-50,Canada
32001,,,,,,,,,,,,,,,36-50,United States
32002,,,,,,,,,,,,,,,18-25,Mexico


In [8]:
def recommend_courses(age_group, country, enrolled_courses, rules):
    # Filter rules based on user demographics
    demographic_rules = rules[
        (rules["age_group"] == age_group) & (rules["country"] == country)
    ]

    # Find rules where the antecedents match enrolled courses
    recommendations = []
    for _, row in demographic_rules.iterrows():
        if set(row["antecedents"]).issubset(set(enrolled_courses)):
            recommendations.extend(row["consequents"])

    # Remove duplicates and already enrolled courses
    recommendations = list(set(recommendations) - set(enrolled_courses))

    return recommendations


In [9]:
user_age_group = "18-25"
user_country = "India"
user_enrolled_courses = ["CS50x", "6.002x"]

# Get recommendations
recommended_courses = recommend_courses(user_age_group, user_country, user_enrolled_courses, rules_filtered)

# Display recommendations
print("Recommended Courses:", recommended_courses)

TypeError: 'float' object is not iterable