In [14]:
import numpy as np
import pandas as pd

# Create a synthetic dataset (user_id, item_id, rating)
data = {'user_id': [1, 1, 1, 2, 2, 2, 3, 3, 3],
        'item_id': [1, 2, 3, 1, 2, 4, 1, 3, 4],
        'rating': [5, 4, 3, 4, 5, 3, 3, 2, 5]}
df = pd.DataFrame(data)

# Create a user-item matrix
user_item_matrix = df.pivot(index='user_id', columns='item_id', values='rating').fillna(0)
user_item_matrix_np = user_item_matrix.to_numpy()

def z_score_normalization(matrix):
    means = np.mean(matrix, axis=1).reshape(-1, 1)
    std_devs = np.std(matrix, axis=1).reshape(-1, 1)
    normalized_matrix = (matrix - means) / std_devs
    return normalized_matrix, means, std_devs

normalized_ratings, means, std_devs = z_score_normalization(user_item_matrix_np)

def cosine_similarity(matrix):
    dot_product = np.dot(matrix, matrix.T)
    norms = np.linalg.norm(matrix, axis=1)
    norms = np.outer(norms, norms)
    similarity = dot_product / norms
    return similarity

user_similarity = cosine_similarity(normalized_ratings)

def get_top_k_neighbors(similarity_matrix, k=30):
    neighbors = np.argsort(-similarity_matrix, axis=1)[:, :k]
    return neighbors

top_k_neighbors = get_top_k_neighbors(user_similarity, k=30)

def predict_ratings(user_item_matrix, normalized_ratings, user_similarity, top_k_neighbors):
    num_users, num_items = user_item_matrix.shape
    predictions = np.zeros((num_users, num_items))
    
    for user in range(num_users):
        neighbors = top_k_neighbors[user]
        similarity_scores = user_similarity[user, neighbors]
        
        for item in range(num_items):
            # Only consider ratings from neighbors who have rated this item
            rated_neighbors = normalized_ratings[neighbors, item] != 0
            if np.any(rated_neighbors):
                relevant_similarities = similarity_scores[rated_neighbors]
                relevant_ratings = normalized_ratings[neighbors, item][rated_neighbors]
                
                # Compute weighted average
                weighted_sum = np.dot(relevant_similarities, relevant_ratings)
                sum_of_weights = np.sum(relevant_similarities)
                
                if sum_of_weights > 0:
                    predictions[user, item] = weighted_sum / sum_of_weights
                else:
                    predictions[user, item] = 0
    
    # Denormalize predictions
    predictions = (predictions * std_devs) + means
    
    return predictions

predicted_ratings = predict_ratings(user_item_matrix_np, normalized_ratings, user_similarity, top_k_neighbors)


def get_recommendations(predicted_ratings, user_item_matrix, user_id, n=10):
    user_index = user_id - 1  # Adjust for zero indexing
    user_ratings = predicted_ratings[user_index]
    
    # Exclude items the user has already rated
    # already_rated = user_item_matrix.iloc[user_index] != 0
    recommendations = [(item_id, rating) for item_id, rating in enumerate(user_ratings, start=1) if not already_rated[item_id-1]]
    
    # Sort by predicted rating and get top N
    recommendations.sort(key=lambda x: x[1], reverse=True)
    return recommendations[:n]

print(user_item_matrix_np)
print(predicted_ratings[0])

# # Get top 10 recommendations for user 1
# user_id = 1
# top_recommendations = get_recommendations(predicted_ratings, user_item_matrix, user_id, n=10)
# print(f"Top recommendations for user {user_id}:")
# for item_id, rating in top_recommendations:
#     print(f"Item ID: {item_id}, Predicted Rating: {rating}")


[[5. 4. 3. 0.]
 [4. 5. 0. 3.]
 [3. 0. 2. 5.]]
[  6.13545442   8.33826659   3.         -11.21182273]
