In [30]:
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics import mean_squared_error
from math import sqrt

In [31]:
movie_data = pd.read_csv('data/movies_grouplens/movies.dat', delimiter='::', header=None, engine='python', encoding='latin1')
rating_data = pd.read_csv('data/movies_grouplens/ratings.dat', delimiter='::', header=None, engine='python', encoding='latin1')
user_data = pd.read_csv('data/movies_grouplens/users.dat', delimiter='::', header=None, engine='python', encoding='latin1')

In [32]:
movie_data.columns = ['MovieID', 'Title', 'Genres']
rating_data.columns = ['UserID', 'MovieID', 'Rating', 'Timestamp']
user_data.columns = ['UserID', 'Gender', 'Age', 'Occupation', 'Zip-code']

In [33]:
movie_data['Genres'] = movie_data['Genres'].fillna('')

tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(movie_data['Genres'])

In [34]:
user_movie_data = rating_data.merge(movie_data, on='MovieID')

In [35]:
movie_index = pd.Series(movie_data.index, index=movie_data['MovieID']).drop_duplicates()

In [36]:
user_profiles = {}

In [37]:
for user_id in rating_data['UserID'].unique():
    user_ratings = user_movie_data[user_movie_data['UserID'] == user_id]
    
    tfidf_weights = tfidf_matrix[user_ratings['MovieID'].apply(lambda x: movie_index[x])]
    user_profile = np.dot(user_ratings['Rating'], tfidf_weights.toarray()) / user_ratings['Rating'].sum()
    
    user_profile = user_profile / np.linalg.norm(user_profile)
    
    user_profiles[user_id] = user_profile

In [38]:
def recommend_movies_for_user(user_id, top_n=10):
    user_profile = user_profiles[user_id]
    
    sim_scores = cosine_similarity([user_profile], tfidf_matrix)[0]
    
    movie_indices = sim_scores.argsort()[-top_n:][::-1]
    
    return movie_data['Title'].iloc[movie_indices]

In [39]:
user_id = 54
print(f"Recommendations for User {user_id}:")
print(recommend_movies_for_user(user_id))

Recommendations for User 54:
2703             Detroit Rock City (1999)
2349             Nothing in Common (1986)
3377                   Funny Bones (1995)
340     Ace Ventura: Pet Detective (1994)
943               Bringing Up Baby (1938)
344          Bullets Over Broadway (1994)
348                       Crooklyn (1994)
939                His Girl Friday (1940)
2367            Tea with Mussolini (1999)
935                 My Man Godfrey (1936)
Name: Title, dtype: object
