In [None]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity


In [None]:
# Load the data files
movies = pd.read_csv('movies.csv')
ratings = pd.read_csv('ratings.csv')

# See the first few rows
movies.head()
ratings.head()


Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [None]:
# Merge ratings with movie titles
movie_ratings = ratings.merge(movies, on='movieId')

# Group by movie title
movie_stats = movie_ratings.groupby('title').agg({'rating': ['mean', 'count']})
movie_stats.columns = ['average_rating', 'rating_count']

# Sort by highest rated
top_movies = movie_stats.sort_values('average_rating', ascending=False)
top_movies.head(10)  # Shows top 10 movies


Unnamed: 0_level_0,average_rating,rating_count
title,Unnamed: 1_level_1,Unnamed: 2_level_1
Karlson Returns (1970),5.0,1
Zeitgeist: Moving Forward (2011),5.0,1
"Dream of Light (a.k.a. Quince Tree Sun, The) (Sol del membrillo, El) (1992)",5.0,1
Dragons: Gift of the Night Fury (2011),5.0,1
12 Angry Men (1997),5.0,1
Justice League: Doom (2012),5.0,1
Junior and Karlson (1968),5.0,1
Jump In! (2007),5.0,1
"Human Condition III, The (Ningen no joken III) (1961)",5.0,1
Louis Theroux: Law & Disorder (2008),5.0,1


In [None]:
# Fill empty genres with empty string
movies['genres'] = movies['genres'].fillna('')

# Convert genres to numbers using TF-IDF
tfidf = TfidfVectorizer()
tfidf_matrix = tfidf.fit_transform(movies['genres'])

# Calculate similarity
similarity = cosine_similarity(tfidf_matrix)

# Recommendation function
def recommend(movie_name):
    try:
        # Find the index of the movie
        movie_index = movies[movies['title'].str.contains(movie_name, case=False)].index[0]
        sim_scores = list(enumerate(similarity[movie_index]))
        sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
        sim_scores = sim_scores[1:6]  # Top 5 similar movies

        print(f"\nBecause you liked '{movies.iloc[movie_index]['title']}', you might also like:")
        for i in sim_scores:
            print(movies.iloc[i[0]]['title'])
    except:
        print("Movie not found. Please check the name.")


In [None]:
 #Try it out
recommend("Toy Story")


Because you liked 'Toy Story (1995)', you might also like:
Antz (1998)
Toy Story 2 (1999)
Adventures of Rocky and Bullwinkle, The (2000)
Emperor's New Groove, The (2000)
Monsters, Inc. (2001)
