In [1]:
import pandas as pd

url = 'https://raw.githubusercontent.com/zygmuntz/goodbooks-10k/master/ratings.csv'
ratings = pd.read_csv(url)

ratings.head()

Unnamed: 0,user_id,book_id,rating
0,1,258,5
1,2,4081,4
2,2,260,5
3,2,9296,5
4,2,2318,3


In [3]:
def compute_damped_mean(ratings, damping_factor=5):
    global_mean = ratings['rating'].mean()
    item_stats = ratings.groupby('book_id')['rating'].agg(['count', 'mean'])
    
    item_stats['damped_mean'] = (
        (item_stats['count'] * item_stats['mean'] + damping_factor * global_mean) /
        (item_stats['count'] + damping_factor)
    )
    return item_stats[['damped_mean']].sort_values('damped_mean', ascending=False)


In [4]:
def top_n_damped_recommendations(item_stats, n=10):
    return item_stats.sort_values('damped_mean', ascending=False).head(n)


In [6]:
itm_sts = compute_damped_mean(ratings)

In [7]:
top_n_damped_recommendations(itm_sts)

Unnamed: 0_level_0,damped_mean
book_id,Unnamed: 1_level_1
3628,4.820533
7947,4.769885
6920,4.747029
6361,4.744774
9566,4.740785
6590,4.74032
8978,4.738118
4483,4.736759
3275,4.722756
1788,4.722373


In [9]:
from mlxtend.frequent_patterns import apriori, association_rules
from mlxtend.preprocessing import TransactionEncoder

In [None]:
transactions = ratings.groupby('user_id')['book_id'].apply(list).tolist()

te = TransactionEncoder()
te_ary = te.fit(transactions).transform(transactions)
df = pd.DataFrame(te_ary, columns=te.columns_)

frequent_itemsets = apriori(df, min_support=0.2, use_colnames=True)
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1.0)


In [12]:
def recommend_from_rules(product_id, rules_df, n=10):
    recommendations = rules_df[rules_df['antecedents'].apply(lambda x: product_id in x)]
    recommendations = recommendations.sort_values('lift', ascending=False)
    
    result = set()
    for _, row in recommendations.iterrows():
        consequents = row['consequents']
        result.update(consequents)
        if len(result) >= n:
            break
    return list(result)[:n]

In [14]:
recommend_from_rules(1,rules)

[17, 2, 3, 20]

In [15]:
def precision_at_n(recommended, relevant, n=10):
    recommended_n = recommended[:n]
    return len(set(recommended_n) & set(relevant)) / n

def recall_at_n(recommended, relevant, n=10):
    recommended_n = recommended[:n]
    return len(set(recommended_n) & set(relevant)) / len(relevant)


In [None]:
user_id = ratings['user_id'].value_counts().idxmax()
user_books = ratings[ratings['user_id'] == user_id]['book_id'].tolist()

target_book = user_books[0]
recommended_books = recommend_from_rules(target_book, rules, n=10)

precision = precision_at_n(recommended_books, user_books, n=10)
recall = recall_at_n(recommended_books, user_books, n=10)

print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")


Precision: 0.60
Recall: 0.03
