In [1]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
import seaborn as sns

In [16]:
ratings_unfiltered = pd.read_csv('ratings.csv')
movies_unfiltered = pd.read_csv('movies.csv')

In [19]:
def filter_data(ratings, movies, user_limit=200, movie_limit=200):
    filtered_ratings = ratings[ratings['userId'] <= user_limit]
    filtered_movies = movies[movies['movieId'] <= movie_limit]
    return filtered_ratings, filtered_movies

In [20]:
def standardize_ratings(df):
    df['rating'] = (df['rating'] - df['rating'].min()) / (df['rating'].max() - df['rating'].min())
    return df

In [21]:
def create_user_movie_matrix(df):
    return df.pivot(index='userId', columns='movieId', values='rating').fillna(0)

In [22]:
def compute_similarity_matrix(user_movie_matrix):
    similarity_matrix = cosine_similarity(user_movie_matrix.T)
    return pd.DataFrame(similarity_matrix, index=user_movie_matrix.columns, columns=user_movie_matrix.columns)

In [23]:
def get_top_n_similar_movies(movie_similarity_df, movie_id_to_title, movie_id, top_n=10):
    similar_movies = movie_similarity_df[movie_id].sort_values(ascending=False).index.tolist()
    similar_movie_titles = [movie_id_to_title[mid] for mid in similar_movies[1:top_n+1]]  # Exclude the movie itself
    return similar_movie_titles

In [24]:
ratings, movies = filter_data(ratings_unfiltered, movies_unfiltered)
df = pd.merge(ratings, movies, on="movieId")
df = standardize_ratings(df)
user_movie_matrix = create_user_movie_matrix(df)
movie_similarity_df = compute_similarity_matrix(user_movie_matrix)

# Create a mapping from movieId to title
movie_id_to_title = movies.set_index('movieId')['title'].to_dict()

# Get Recommendations
toy_story_recommendations = get_top_n_similar_movies(movie_similarity_df, movie_id_to_title, 1)  # Toy Story (1995)
waiting_to_exhale_recommendations = get_top_n_similar_movies(movie_similarity_df, movie_id_to_title, 4)  # Waiting to Exhale (1995)

# Output recommendations
print("Top 10 similar movies to 'Toy Story (1995)':", toy_story_recommendations)
print("Top 10 similar movies to 'Waiting to Exhale (1995)':", waiting_to_exhale_recommendations)

Top 10 similar movies to 'Toy Story (1995)': ['Braveheart (1995)', 'Apollo 13 (1995)', 'Usual Suspects, The (1995)', 'Happy Gilmore (1996)', 'Die Hard: With a Vengeance (1995)', 'Seven (a.k.a. Se7en) (1995)', 'Babe (1995)', 'Taxi Driver (1976)', 'Twelve Monkeys (a.k.a. 12 Monkeys) (1995)', 'Batman Forever (1995)']
Top 10 similar movies to 'Waiting to Exhale (1995)': ['Before and After (1996)', 'Mighty Morphin Power Rangers: The Movie (1995)', 'Mad Love (1995)', 'Now and Then (1995)', 'Reckless (1995)', 'Georgia (1995)', 'How to Make an American Quilt (1995)', 'Something to Talk About (1995)', 'Scarlet Letter, The (1995)', 'Big Green, The (1995)']


In [18]:
# Recommend movies to user 200 (with titles)
def recommend_movies_for_user(user_id, top_n=3):
    user_ratings = user_movie_matrix.loc[user_id]
    unseen_movies = user_ratings[user_ratings == 0].index
    movie_scores = {}

    for movie in unseen_movies:
        similar_movies = movie_similarity_df[movie].sort_values(ascending=False).index[1:top_n+1]
        score = sum(user_ratings[similar_movies]) / top_n
        movie_scores[movie] = score

    recommended_movies = sorted(movie_scores, key=movie_scores.get, reverse=True)[:top_n]
    recommended_movie_titles = [movie_id_to_title[mid] for mid in recommended_movies]
    return recommended_movie_titles

user_200_recommendations = recommend_movies_for_user(200)

print("Top 3 recommendations for user 200:", user_200_recommendations)

Top 3 recommendations for user 200: ['Twelve Monkeys (a.k.a. 12 Monkeys) (1995)', 'Crimson Tide (1995)', 'Batman Forever (1995)']
