In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.model_selection import train_test_split
from scipy.sparse import csr_matrix


In [5]:
url = 'https://raw.githubusercontent.com/zygmuntz/goodbooks-10k/master/ratings.csv'
ratings = pd.read_csv(url)

ratings = ratings.head(3000)
ratings.head()


Unnamed: 0,user_id,book_id,rating
0,1,258,5
1,2,4081,4
2,2,260,5
3,2,9296,5
4,2,2318,3


In [6]:
# Побудова матриці користувачів і книг
user_item_matrix = ratings.pivot_table(index='user_id', columns='book_id', values='rating')
user_item_matrix.fillna(0, inplace=True)

# Збережемо у форматі sparse
sparse_matrix = csr_matrix(user_item_matrix.values)

In [7]:
user_similarity = cosine_similarity(sparse_matrix)
user_similarity_df = pd.DataFrame(user_similarity, index=user_item_matrix.index, columns=user_item_matrix.index)

In [8]:
def get_user_based_recommendations(user_id, top_n=10):
    if user_id not in user_similarity_df.index:
        return []
    
    similar_users = user_similarity_df[user_id].drop(user_id).sort_values(ascending=False)
    
    # Зважені середні рейтинги на основі сусідів
    weighted_ratings = np.dot(similar_users.values, user_item_matrix.loc[similar_users.index].values)
    similarity_sums = np.array([np.abs(similar_users.values).sum()] * user_item_matrix.shape[1])
    
    prediction_scores = weighted_ratings / similarity_sums
    predictions = pd.Series(prediction_scores, index=user_item_matrix.columns)
    known_items = user_item_matrix.loc[user_id][user_item_matrix.loc[user_id] > 0].index
    recommendations = predictions.drop(index=known_items).sort_values(ascending=False).head(top_n)
    
    return recommendations


In [9]:
item_similarity = cosine_similarity(user_item_matrix.T)
item_similarity_df = pd.DataFrame(item_similarity, index=user_item_matrix.columns, columns=user_item_matrix.columns)


In [10]:
def get_item_based_recommendations(user_id, top_n=10):
    user_ratings = user_item_matrix.loc[user_id]
    scores = pd.Series(0, index=user_item_matrix.columns)
    
    for book_id, rating in user_ratings.items():
        if rating > 0:
            scores += item_similarity_df[book_id] * rating
    
    scores = scores[user_ratings == 0]
    
    return scores.sort_values(ascending=False).head(top_n)


In [11]:
sample_user = user_item_matrix.index[0]

print(f"\nUser-based рекомендації для користувача {sample_user}:")
print(get_user_based_recommendations(sample_user))

print(f"\nItem-based рекомендації для користувача {sample_user}:")
print(get_item_based_recommendations(sample_user))



User-based рекомендації для користувача 1:
book_id
11     2.499633
8      2.025971
94     1.704583
325    1.677042
45     1.523114
33     1.518361
26     1.495014
24     1.386060
2      1.341543
27     1.281687
dtype: float64

Item-based рекомендації для користувача 1:
book_id
2079    5.437404
3560    5.005537
3703    4.521320
6944    4.521320
9606    4.521320
1853    4.521320
2559    4.521320
522     4.521320
325     4.412428
233     3.364760
dtype: float64
