In [1]:
import pandas as pd
import re
from mlxtend.frequent_patterns import apriori, association_rules
data = pd.read_csv("Team_Sports_Survey_Preprocessed.csv")
# remove the eg. for each items
def remove_parentheses(text):
    if isinstance(text, str):  
        return re.sub(r"\s*\(.*?\)", "", text)
    return text  

In [2]:
# Split the values into a single items for the snacks and beverage column
data["Preferred Snacks During Activities"] = data["Preferred Snacks During Activities"].apply(remove_parentheses).astype(str).str.split(",")
data["Preferred Beverage During Activities"] = data["Preferred Beverage During Activities"].apply(remove_parentheses).astype(str).str.split(",")

# Add category labels "snack"
data["Preferred Snacks During Activities"] = data["Preferred Snacks During Activities"].apply(lambda x: [f"Snack: {item.strip()}" for item in x])
# Add category labels "Beverage"
data["Preferred Beverage During Activities"] = data["Preferred Beverage During Activities"].apply(lambda x: [f"Beverage: {item.strip()}" for item in x])

# Combine snacks and beverages into a single column 
data["snack_beverage"] = data["Preferred Snacks During Activities"] + data["Preferred Beverage During Activities"]

# Convert the items into transactional data
transactions = data["snack_beverage"].explode().str.strip().reset_index()
transactions_encoded = transactions.pivot_table(index="index", columns="snack_beverage", aggfunc=lambda x: 1, fill_value=0)


In [3]:
# Use Apriori algorithm to find frequent association
frequent_items = apriori(transactions_encoded, min_support=0.3, use_colnames=True)
rules = association_rules(frequent_items, metric="confidence", min_threshold=0.5)

# Apply the rule where antecedents are snacks and consequents are beverages
filtered_rules = rules[
    rules['antecedents'].apply(lambda x: all(item.startswith("Snack:") for item in x)) &
    rules['consequents'].apply(lambda x: all(item.startswith("Beverage:") for item in x))
]

# Display the final rules sorted by confidence
filtered_rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']].sort_values("confidence", ascending=False)



Unnamed: 0,antecedents,consequents,support,confidence,lift
33,"(Snack: Fast Food, Snack: Beverages)",(Beverage: Water),0.46,0.901961,1.202614
7,(Snack: Beverages),(Beverage: Water),0.64,0.888889,1.185185
8,(Snack: Fast Food),(Beverage: Water),0.52,0.866667,1.155556
10,(Snack: Healthy Options),(Beverage: Water),0.3,0.833333,1.111111
27,"(Snack: Fast Food, Snack: Beverages)",(Beverage: Non-Alcoholic),0.36,0.705882,1.411765
3,(Snack: Beverages),(Beverage: Non-Alcoholic),0.47,0.652778,1.305556
5,(Snack: Fast Food),(Beverage: Non-Alcoholic),0.38,0.633333,1.266667
45,"(Snack: Fast Food, Snack: Beverages)","(Beverage: Non-Alcoholic, Beverage: Water)",0.31,0.607843,1.482544
19,(Snack: Beverages),"(Beverage: Non-Alcoholic, Beverage: Water)",0.4,0.555556,1.355014
24,(Snack: Fast Food),"(Beverage: Non-Alcoholic, Beverage: Water)",0.32,0.533333,1.300813
