In [11]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel, cosine_similarity

# Load dataset from kaggle (IMDB Movies Dataset)
data = pd.read_csv("movies.csv")
data = data.fillna('')


data['features'] = data['Genre'] + ' ' + data['IMDB_Rating'].astype(str) + ' ' + \
                   data['Meta_score'].astype(str) + ' ' + data['No_of_Votes'].astype(str) + ' ' + \
                   data['Gross'].astype(str) + ' ' + data['Released_Year'].astype(str)

tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(data['features'])

cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

if 'IMDB_Rating' in data.columns:
    user_item_matrix = data.pivot_table(index='Series_Title', values='IMDB_Rating', aggfunc='mean').fillna(0)
    collab_sim = cosine_similarity(user_item_matrix)
    collab_sim_df = pd.DataFrame(collab_sim, index=user_item_matrix.index, columns=user_item_matrix.index)

def get_recommendations(title, cosine_sim=cosine_sim):
    """Returns top 5 content-based recommendations for a given movie."""
    if title not in data['Series_Title'].values:
        return ["Movie not found in database"]

    idx = data[data['Series_Title'] == title].index[0]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:6]  # Get top 5 similar movies
    movie_indices = [i[0] for i in sim_scores]
    return data['Series_Title'].iloc[movie_indices].tolist()

def get_collaborative_recommendations(title):
    """Returns top 5 collaborative-based recommendations based on IMDB rating."""
    if title not in collab_sim_df.index:
        return ["Movie not found in rating database"]

    similar_movies = collab_sim_df[title].sort_values(ascending=False)[1:6]  # Top 5 movies
    return similar_movies.index.tolist()

def get_genre_recommendations(genre):
    """Returns top 5 movies in a genre sorted by IMDB rating."""
    genre_movies = data[data['Genre'].str.contains(genre, case=False, na=False)]
    return genre_movies.sort_values(by='IMDB_Rating', ascending=False)['Series_Title'].head(5).tolist()

In [12]:
# Example Usage
movie_title = 'Inception'
user_genre = 'Comedy'

print(f"\nTop 5 Movies Similar to '{movie_title}' (Content-Based):")
print(get_recommendations(movie_title))

print(f"\nTop 5 Movies Similar to '{movie_title}' (Collaborative Filtering):")
print(get_collaborative_recommendations(movie_title))

print(f"\nTop 5 Movies in '{user_genre}' Genre (Based on IMDB Rating):")
print(get_genre_recommendations(user_genre))


Top 5 Movies Similar to 'Inception' (Content-Based):
['Serenity', 'Interstellar', 'G.O.R.A.', 'Twelve Monkeys', 'Madeo']

Top 5 Movies Similar to 'Inception' (Collaborative Filtering):
['(500) Days of Summer', '12 Angry Men', '12 Years a Slave', '1917', '2001: A Space Odyssey']

Top 5 Movies in 'Comedy' Genre (Based on IMDB Rating):
['Gisaengchung', 'La vita è bella', 'The Intouchables', 'Back to the Future', 'Modern Times']
