In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt



In [None]:
# Load MovieLens dataset
movies = pd.read_csv('movies.csv')
ratings = pd.read_csv('ratings.csv')

# Merge datasets
data = pd.merge(ratings, movies, on='movieId')


In [None]:
# Preprocess data
user_movie_matrix = data.pivot_table(index='userId', columns='title', values='rating')

# Fill NaN values with 0
user_movie_matrix = user_movie_matrix.fillna(0)

# Collaborative filtering: Item-based
item_similarity = cosine_similarity(user_movie_matrix.T)
item_sim_df = pd.DataFrame(item_similarity, index=user_movie_matrix.columns, columns=user_movie_matrix.columns)


In [None]:
# Recommendation function
def recommend_movies(movie_title, n=5):
    if movie_title not in item_sim_df:
        return f"Movie '{movie_title}' not found in the dataset."
    similar_movies = item_sim_df[movie_title].sort_values(ascending=False)[1:n+1]
    return similar_movies


In [None]:
# Content-based filtering: Genres
movies['genres'] = movies['genres'].str.replace('|', ' ')
count_vectorizer = CountVectorizer()
genre_matrix = count_vectorizer.fit_transform(movies['genres'])

content_similarity = cosine_similarity(genre_matrix)

def recommend_based_on_genres(movie_title, n=5):
    idx = movies[movies['title'] == movie_title].index
    if len(idx) == 0:
        return f"Movie '{movie_title}' not found in the dataset."
    idx = idx[0]
    similar_indices = content_similarity[idx].argsort()[-(n+1):-1][::-1]
    recommendations = movies.iloc[similar_indices]
    return recommendations[['title', 'genres']]


In [None]:
# Evaluate collaborative filtering with RMSE
train, test = train_test_split(ratings, test_size=0.2, random_state=42)
user_movie_matrix_train = train.pivot_table(index='userId', columns='movieId', values='rating').fillna(0)
user_movie_matrix_test = test.pivot_table(index='userId', columns='movieId', values='rating').fillna(0)

predicted_ratings = user_movie_matrix_train.dot(item_similarity) / np.array(
    [np.abs(item_similarity).sum(axis=1)]
).T
rmse = np.sqrt(mean_squared_error(user_movie_matrix_test.values.flatten(), predicted_ratings.flatten()))
print(f"RMSE: {rmse}")

# Example usage
print(recommend_movies("Toy Story (1995)"))
print(recommend_based_on_genres("Toy Story (1995)"))
