In [3]:
import pandas as pd
import numpy as np
import scipy as sp
from sklearn.metrics.pairwise import cosine_similarity
import operator

In [4]:
movies = pd.read_csv('movies.csv')
movies.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [5]:
ratings = pd.read_csv('ratings.csv')
ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [6]:
ratings.shape

(100836, 4)

In [7]:
ratings=ratings.drop(columns = 'timestamp')
ratings.head()

Unnamed: 0,userId,movieId,rating
0,1,1,4.0
1,1,3,4.0
2,1,6,4.0
3,1,47,5.0
4,1,50,5.0


In [8]:
ratings = ratings[ratings.userId <= 10000]

In [9]:
ratings['rating'] = ratings['rating'].replace(-1, pd.NA)

In [10]:
ratings = ratings.drop_duplicates(['userId', 'movieId'])

In [11]:
user_item_matrix = ratings.pivot(index='userId', columns='movieId', values='rating')

In [12]:
user_item_matrix.head()

movieId,1,2,3,4,5,6,7,8,9,10,...,193565,193567,193571,193573,193579,193581,193583,193585,193587,193609
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4.0,,4.0,,,4.0,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,4.0,,,,,,,,,,...,,,,,,,,,,


In [13]:
user_similarity = cosine_similarity(user_item_matrix.fillna(0))

In [14]:
def user_based_recommendations(user_id, user_item_matrix, user_similarity, n=5):
    # Get the similarity scores for the user
    user_scores = user_similarity[user_id - 1]
    similar_users = sorted(enumerate(user_scores), key=lambda x: x[1], reverse=True)

    # Initialize a list
    recommended_movie = []

    for user, score in similar_users[1:]:  # Exclude the target user
        # Get the movie the similar user has rated positively
        rated_movie = user_item_matrix.loc[user_item_matrix.index[user - 1]]
        positively_rated_movie = rated_movie[rated_movie >= 2.5].index

        # Exclude movie the user has already rated
        target_user_rated_movie = user_item_matrix.loc[user_id].dropna().index
        recommended_movie.extend(set(positively_rated_movie) - set(target_user_rated_movie))

        if len(recommended_movie) >= n:
            break

    return recommended_movie[:n]

In [15]:
user_id = 9
user_recommendations = user_based_recommendations(user_id, user_item_matrix, user_similarity, n=5)
print(f"User-Based Recommendations for User {user_id}:")
for movieId in user_recommendations:
    movie_name = movies.loc[movies['movieId'] == movieId, 'title'].values[0]
    print(f"- {movie_name}")

User-Based Recommendations for User 9:
- Legends of the Fall (1994)
- Apollo 13 (1995)
- Batman Forever (1995)
- Babe (1995)
- Die Hard: With a Vengeance (1995)


# Item based

In [16]:
item_similarity = cosine_similarity(user_item_matrix.fillna(0).T)

In [19]:
def item_based_recommendations(user_id, user_item_matrix, item_similarity, n=5):
    # Get the movie the user has rated positively
    positively_rated_movie = user_item_matrix.loc[user_id][user_item_matrix.loc[user_id] >= 3].index

    # Initialize a list
    recommended_movie = []

    for movie_id in positively_rated_movie:
        # Get movie similar to the liked movie
        similar_movie = item_similarity[movie_id - 1]
        similar_movie_sorted = sorted(enumerate(similar_movie), key=lambda x: x[1], reverse=True)

        for movie, score in similar_movie_sorted[1:]:  # Exclude the liked movie
            # Exclude movie the user has already rated
            user_rated_movie = user_item_matrix.loc[user_id].dropna().index
            if movie not in user_rated_movie:
                recommended_movie.append(movie)
            if len(recommended_movie) >= n:
                break

    return recommended_movie[:n]

In [20]:
item_recommendations = item_based_recommendations(user_id, user_item_matrix, item_similarity, n=5)
print(f"\nItem-Based Recommendations for User {user_id}:")
for movieId in item_recommendations:
    movie_name = movies.loc[movies['movieId'] == movieId, 'title'].values[0]
    print(f"- {movie_name}")


Item-Based Recommendations for User 9:
- Bad Boys (1995)
- Relative Fear (1994)
- Jimmy Hollywood (1994)
- Man of No Importance, A (1994)
- Sudden Death (1995)
