In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics import mean_squared_error

In [2]:
import pandas as pd

# Read the Excel file
books = pd.read_excel("C:/Users/DELL/Desktop/Book1.xlsx")

# Read the CSV file
ratings = pd.read_csv("C:/Users/DELL/Desktop/Book2.csv")

In [3]:
# Step 2: Preprocessing
# Drop unnecessary columns (timestamp) in ratings
ratings = ratings.drop(columns=["timestamp"])

# Merge ratings with book details
data = pd.merge(ratings, books, on="movieId")

In [4]:
# Step 3: Create a User-Item Matrix
user_item_matrix = data.pivot_table(index="userId", columns="movieId", values="rating")


In [5]:
# Step 4: Calculate Similarity Matrices

# a) Cosine Similarity for User-Based and Item-Based CF

# User-based cosine similarity
user_similarity_cosine = cosine_similarity(user_item_matrix.fillna(0))
user_similarity_cosine = pd.DataFrame(user_similarity_cosine, index=user_item_matrix.index, columns=user_item_matrix.index)

# Item-based cosine similarity
item_similarity_cosine = cosine_similarity(user_item_matrix.fillna(0).T)
item_similarity_cosine = pd.DataFrame(item_similarity_cosine, index=user_item_matrix.columns, columns=user_item_matrix.columns)

In [6]:
# b) Pearson Correlation for User-Based and Item-Based CF

# User-based Pearson correlation
user_similarity_pearson = user_item_matrix.T.corr()

# Item-based Pearson correlation
item_similarity_pearson = user_item_matrix.corr()

In [7]:
# Step 5: Rating Prediction Function
def predict_rating(user_id, movie_id, similarity_matrix, user_item_matrix, type="user"):
    if type == "user":
        # Get similar users and their ratings
        similar_users = similarity_matrix[user_id].drop(user_id)
        ratings = user_item_matrix.loc[similar_users.index, movie_id]
    else:
        # Get similar items and their ratings
        similar_items = similarity_matrix[movie_id].drop(movie_id)
        ratings = user_item_matrix.loc[user_id, similar_items.index]
    
    # Calculate weighted average
    weighted_sum = (similar_items * ratings).sum() if type == "item" else (similar_users * ratings).sum()
    sum_of_weights = similar_items.sum() if type == "item" else similar_users.sum()
    
    if sum_of_weights == 0 or weighted_sum == 0:
        # Handle NaN by returning the mean rating for that item/user or a default value
        return user_item_matrix[movie_id].mean() if type == "item" else user_item_matrix.loc[user_id].mean()

    return weighted_sum / sum_of_weights


In [8]:
# Step 6: Top-N Recommendation

def get_top_n_recommendations(user_id, user_item_matrix, similarity_matrix, n=5, type="user"):
    # Predict ratings for all items not yet rated by the user
    unrated_movies = user_item_matrix.loc[user_id].isna()
    predictions = [predict_rating(user_id, movie, similarity_matrix, user_item_matrix, type) for movie in unrated_movies.index[unrated_movies]]
    predictions = pd.Series(predictions, index=unrated_movies.index[unrated_movies])
    
    # Return top-N recommendations
    return predictions.nlargest(n)

In [9]:
# Step 7: Evaluation Function

def evaluate_recommendations(true_ratings, predicted_ratings):
    # Calculate RMSE
    rmse = np.sqrt(mean_squared_error(true_ratings, predicted_ratings))
    return rmse

In [10]:
# Example Usage
# Assuming we have a user_id and movie_id to test
user_id = 1  # Replace with an actual userId from your data
movie_id = 1  # Replace with an actual movieId from your data

In [11]:
# Predict rating for a specific user and item
predicted_rating_user_cosine = predict_rating(user_id, movie_id, user_similarity_cosine, user_item_matrix, type="user")
predicted_rating_item_cosine = predict_rating(user_id, movie_id, item_similarity_cosine, user_item_matrix, type="item")

print("Predicted Rating (User-based, Cosine):", predicted_rating_user_cosine)
print("Predicted Rating (Item-based, Cosine):", predicted_rating_item_cosine)


Predicted Rating (User-based, Cosine): 2.706082654941994
Predicted Rating (Item-based, Cosine): 0.6666945052408226


In [12]:
# Predict rating for a specific user and item using Pearson correlation
predicted_rating_user_pearson = predict_rating(user_id, movie_id, user_similarity_pearson, user_item_matrix, type="user")
predicted_rating_item_pearson = predict_rating(user_id, movie_id, item_similarity_pearson, user_item_matrix, type="item")

print("Predicted Rating (User-based, Pearson):", predicted_rating_user_pearson)
print("Predicted Rating (Item-based, Pearson):", predicted_rating_item_pearson)


Predicted Rating (User-based, Pearson): 3.6666666666666665
Predicted Rating (Item-based, Pearson): 4.0


In [13]:
# Get Top-N recommendations for the user
top_n_user_recommendations = get_top_n_recommendations(user_id, user_item_matrix, user_similarity_cosine, n=5, type="user")
print("Top-N Recommendations (User-based, Cosine):", top_n_user_recommendations)

Top-N Recommendations (User-based, Cosine): movieId
3      3.666667
70     3.666667
142    3.666667
166    3.666667
169    3.666667
dtype: float64


In [14]:
# Get Top-N recommendations for the user using item-based collaborative filtering
top_n_item_recommendations = get_top_n_recommendations(user_id, user_item_matrix, item_similarity_cosine, n=5, type="item")
print("Top-N Recommendations (Item-based, Cosine):", top_n_item_recommendations)

Top-N Recommendations (Item-based, Cosine): movieId
70     5.0
166    5.0
180    5.0
189    5.0
3      4.0
dtype: float64


In [15]:
# Get Top-N recommendations for the user using user-based collaborative filtering with Pearson correlation
top_n_user_recommendations_pearson = get_top_n_recommendations(user_id, user_item_matrix, user_similarity_pearson, n=5, type="user")
print("Top-N Recommendations (User-based, Pearson):", top_n_user_recommendations_pearson)




Top-N Recommendations (User-based, Pearson): movieId
1     3.666667
3     3.666667
6     3.666667
10    3.666667
11    3.666667
dtype: float64


In [16]:
# Get Top-N recommendations for the user using item-based collaborative filtering with Pearson correlation
top_n_item_recommendations_pearson = get_top_n_recommendations(user_id, user_item_matrix, item_similarity_pearson, n=5, type="item")
print("Top-N Recommendations (Item-based, Pearson):", top_n_item_recommendations_pearson)

Top-N Recommendations (Item-based, Pearson): movieId
11     5.0
62     5.0
70     5.0
116    5.0
123    5.0
dtype: float64
