# Practical No 3: a) Implement the Item-Item based collaborative filtering and suggest recommendation for the users.
# b) Calculate Prediction for a particular user by using method of Cosine similarity , Euclidean distance and Pearson correlation on Item-Item based collaborative filtering.

### Item-Based Collaborative Filtering

This technique recommends items to a user based on their similarity to items the user has previously rated highly.

### Approach:

Find Similar Items: Calculate similarity between items using metrics like cosine similarity or correlation.
Recommend Similar Items: Recommend items similar to those the user has rated highly.

Example:

If a user has rated "The Lord of the Rings" highly, the system might recommend "The Hobbit" because these items are similar in terms of genre, author, and theme.



In [None]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

In [None]:
movies = pd.read_csv('/content/drive/MyDrive/Recommendation System/movies.csv')
ratings = pd.read_csv('/content/drive/MyDrive/Recommendation System/ratings.csv')

In [None]:
movies.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [None]:
ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [None]:
item_user_matrix = ratings.pivot(index='movieId', columns='userId', values='rating')

In [None]:
item_user_matrix.head()

userId,1,2,3,4,5,6,7,8,9,10,...,601,602,603,604,605,606,607,608,609,610
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4.0,,,,4.0,,4.5,,,,...,4.0,,4.0,3.0,4.0,2.5,4.0,2.5,3.0,5.0
2,,,,,,4.0,,4.0,,,...,,4.0,,5.0,3.5,,,2.0,,
3,4.0,,,,,5.0,,,,,...,,,,,,,,2.0,,
4,,,,,,3.0,,,,,...,,,,,,,,,,
5,,,,,,5.0,,,,,...,,,,3.0,,,,,,


In [None]:
item_user_matrix_filled = item_user_matrix.fillna(0)
item_user_matrix_filled.head()

userId,1,2,3,4,5,6,7,8,9,10,...,601,602,603,604,605,606,607,608,609,610
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4.0,0.0,0.0,0.0,4.0,0.0,4.5,0.0,0.0,0.0,...,4.0,0.0,4.0,3.0,4.0,2.5,4.0,2.5,3.0,5.0
2,0.0,0.0,0.0,0.0,0.0,4.0,0.0,4.0,0.0,0.0,...,0.0,4.0,0.0,5.0,3.5,0.0,0.0,2.0,0.0,0.0
3,4.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
item_similarity = cosine_similarity(item_user_matrix_filled)

In [None]:
item_correlation = item_user_matrix_filled.corr(method='pearson')

In [None]:
from scipy.spatial.distance import pdist, squareform
item_distance = pdist(item_user_matrix_filled, metric='euclidean')

In [None]:
item_similarity_df = pd.DataFrame(item_similarity, index=item_user_matrix.index, columns=item_user_matrix.index)

In [None]:
item_similarity_df

movieId,1,2,3,4,5,6,7,8,9,10,...,193565,193567,193571,193573,193579,193581,193583,193585,193587,193609
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,1.000000,0.410562,0.296917,0.035573,0.308762,0.376316,0.277491,0.131629,0.232586,0.395573,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.410562,1.000000,0.282438,0.106415,0.287795,0.297009,0.228576,0.172498,0.044835,0.417693,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.296917,0.282438,1.000000,0.092406,0.417802,0.284257,0.402831,0.313434,0.304840,0.242954,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.035573,0.106415,0.092406,1.000000,0.188376,0.089685,0.275035,0.158022,0.000000,0.095598,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.308762,0.287795,0.417802,0.188376,1.000000,0.298969,0.474002,0.283523,0.335058,0.218061,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
193581,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0
193583,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0
193585,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0
193587,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0


In [None]:
item_correlation_df = pd.DataFrame(item_correlation, index=item_user_matrix.index, columns=item_user_matrix.index)
item_correlation_df = item_correlation_df.fillna(0)

In [None]:
item_distance_df = pd.DataFrame(squareform(item_distance), index=item_user_matrix.index, columns=item_user_matrix.index)

In [None]:
item_predicted_ratings_1 = pd.DataFrame(index=item_user_matrix.index, columns=item_user_matrix.columns)
item_predicted_ratings_2 = pd.DataFrame(index=item_user_matrix.index, columns=item_user_matrix.columns)
item_predicted_ratings_3 = pd.DataFrame(index=item_user_matrix.index, columns=item_user_matrix.columns)

In [None]:
for user in item_user_matrix.index:
    sim_scores = item_similarity_df[user]
    weighted_sum = sim_scores.values @ item_user_matrix_filled
    sim_sum = np.abs(sim_scores).sum()
    item_predicted_ratings_1.loc[user] = weighted_sum / sim_sum

In [None]:
for user in item_user_matrix.index:
    sim_scores = item_correlation_df[user]
    weighted_sum = sim_scores.values @ item_user_matrix_filled
    sim_sum = np.abs(sim_scores).sum()
    item_predicted_ratings_2.loc[user] = weighted_sum / sim_sum

In [None]:
for user in item_user_matrix.index:
    sim_scores = item_distance_df[user]
    weighted_sum = sim_scores.values @ item_user_matrix_filled
    sim_sum = np.abs(sim_scores).sum()
    item_predicted_ratings_3.loc[user] = weighted_sum / sim_sum

In [None]:
def recommend_users_cosine_similarity(user_id, rating, num_recommendations=10):
    user_ratings = rating.loc[user_id].sort_values(ascending=False)
    already_rated = item_user_matrix.loc[user_id].dropna().index
    recommendations = user_ratings.drop(already_rated)
    return recommendations.head(num_recommendations)

In [None]:
recommend_users_cosine_similarity(1, item_predicted_ratings_1, 5)

Unnamed: 0_level_0,1
userId,Unnamed: 1_level_1
387,0.634556
305,0.600262
318,0.547063
489,0.513742
105,0.492343


In [None]:
recommend_users_cosine_similarity(1, item_predicted_ratings_2, 5)

Unnamed: 0_level_0,1
userId,Unnamed: 1_level_1
6,1.760473
84,0.960144
602,0.876858
58,0.833532
117,0.824498


In [None]:
recommend_users_cosine_similarity(1, item_predicted_ratings_3, 5)

Unnamed: 0_level_0,1
userId,Unnamed: 1_level_1
387,0.34275
318,0.33897
105,0.306704
305,0.272841
111,0.220724


# Conclusion

Item-based collaborative filtering is a popular technique for recommending items to users based on their similarity to items the user has previously rated highly. By leveraging similarity metrics like cosine similarity, Pearson correlation, or Euclidean distance, we can effectively identify similar items and provide personalized recommendations. This approach is computationally efficient compared to user-based filtering, making it suitable for large-scale recommendation systems.