In [None]:
#imports
import pandas as pd 
import numpy as np
from mlxtend.frequent_patterns import apriori, association_rules
from surprise import Dataset, Reader, KNNBasic
from surprise.model_selection import train_test_split
from collections import defaultdict



In [None]:
# 1.FACEPLATE TRANSACTIONS

#1.1 load dataset
faceplate = pd.read_csv("Faceplate.csv")
faceplate.columns = [c.lower() for c in faceplate.columns]
print("First 10 transactions:")
print(faceplate.head(10))

In [None]:
#1.2 Support of {red, white}
support_red_white = ((faceplate['red'] == 1) & (faceplate['white'] == 1)).sum() / len(faceplate)
print("Support of {red, white}:", support_red_white)

#  Convert to binary
faceplate = faceplate.applymap(lambda x: 1 if x >= 1 else 0)


In [None]:
# 2.1 Frequent itemsets
frequent_itemsets = apriori(faceplate, min_support=0.2, use_colnames=True)
print(frequent_itemsets)

In [None]:
# 2.2 Association rules
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.5)
rules_sorted = rules.sort_values(by="lift", ascending=False)

In [None]:
#2.3 top 6 rules
top6 = rules_sorted.head(6).drop(columns=['antecedent support','consequent support','conviction'], errors='ignore')
print(top6)

In [None]:
#2.4 translate highest lift rule
best_rule = top6.iloc[0]
print(f"If {list(best_rule['antecedents'])} then {list(best_rule['consequents'])} "
      f"(confidence={best_rule['confidence']:.2f}, lift={best_rule['lift']:.2f})")


In [None]:
# 3. CHARLES BOOK CLUB 


bookclub = pd.read_csv("CharlesBookClub.csv")

# 3.1 binary incidence matrix
bookclub.columns = bookclub.columns.str.strip()
drop_cols = ['ID#', 'Seq#', 'Gender', 'M', 'R', 'F', 'FirstPurch', 'Related Purchase']
book_matrix = bookclub.drop(columns=drop_cols, errors='ignore')

book_matrix = (book_matrix > 0).astype(int)

print(" Binary Matrix:")
print(book_matrix.head(10))


In [None]:
#3.2 Frequent itemsets
frequent_books = apriori(book_matrix, min_support=200/len(book_matrix), use_colnames=True)
print("Number of frequent itemsets:", len(frequent_books))


In [None]:
#3.3 top 25 lift rules
book_rules = association_rules(frequent_books, metric="confidence", min_threshold=0.5)
book_rules_sorted = book_rules.sort_values(by="lift", ascending=False).head(25)
print(book_rules_sorted[['antecedents','consequents','support','confidence','lift','leverage']])


In [None]:
# 4.1 Rule with highest support
rule_high_support = book_rules.loc[book_rules['support'].idxmax()]
# 4.2 rule with highest lift
rule_high_lift = book_rules.loc[book_rules['lift'].idxmax()]
#4.3 lowest confidence lift
top10 = book_rules.sort_values(by="lift", ascending=False).head(10)
lowest_conf_rule = top10.loc[top10['confidence'].idxmin()]

print(rule_high_support[['antecedents','consequents','support','confidence','lift']])
print(rule_high_lift[['antecedents','consequents','support','confidence','lift']])
print(lowest_conf_rule)


In [None]:
# 5.1 SYNTHETIC DATASET 

np.random.seed(0)
synthetic = pd.DataFrame(
    np.random.randint(0, 2, size=(50, 9)),
    columns=[f'item{i}' for i in range(1, 10)]
)

print("\nQ5.1 Synthetic Dataset:")
print(synthetic.head())
print("Shape:", synthetic.shape) 

In [None]:
# 5.2 random data apriori
freq_syn = apriori(synthetic, min_support=2/50, use_colnames=True)
rules_syn = association_rules(freq_syn, metric="confidence", min_threshold=0.7)


In [None]:
# 5.3 Top 6 by lift
top6_syn = rules_syn.sort_values(by="lift", ascending=False).head(6)
print(top6_syn[['antecedents','consequents','support','confidence','lift']])


In [None]:
# 6.1 Generate synthetic ratings
np.random.seed(0)
ratings = pd.DataFrame({
    'userID': np.random.randint(0,1000,5000),
    'itemID': np.random.randint(0,100,5000),
    'rating': np.random.randint(1,6,5000)
})
print("\nRatings sample:")
print(ratings.head(10))

In [None]:
# 6.2 Convert to Surprise format
reader = Reader(rating_scale=(1,5))
data = Dataset.load_from_df(ratings[['userID','itemID','rating']], reader)

trainset, testset = train_test_split(data, test_size=0.25, random_state=42)
print("Train size:", trainset.n_ratings, "Test size:", len(testset))


In [None]:
# 6.3 Item-based CF using cosine similarity
sim_options = {'name': 'cosine', 'user_based': False}
algo = KNNBasic(sim_options=sim_options)
algo.fit(trainset)


In [None]:
# 6.4 Predictions # Top-N Recommendations
predictions = algo.test(testset)
print("\nSample Predictions:")
print(predictions[:10])


def get_top_n(predictions, n=5):
    top_n = defaultdict(list)
    for uid, iid, true_r, est, _ in predictions:
        top_n[uid].append((iid, est))
    for uid in top_n:
        top_n[uid].sort(key=lambda x: x[1], reverse=True)
        top_n[uid] = top_n[uid][:n]
    return top_n

top_n = get_top_n(predictions, n=5)
first_user = list(top_n.keys())[0]
print(f"Top recommendations for user {first_user}:", top_n[first_user])