In [6]:
import pandas as pd
import numpy as np
import ast
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split

In [7]:
#Loading
movies = pd.read_csv("tmdb_5000_movies.csv")

#Preprocessing
movies = movies[['id', 'title', 'overview', 'genres', 'vote_average', 'vote_count']]
movies.dropna(inplace=True)

#Convert genres from JSON format
movies['genres'] = movies['genres'].apply(lambda x: ' '.join([d['name'] for d in ast.literal_eval(x)]))

#Filtering Using TF-IDF
vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix = vectorizer.fit_transform(movies['overview'])
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

In [13]:
movies

Unnamed: 0,id,title,overview,genres,vote_average,vote_count
0,19995,Avatar,"In the 22nd century, a paraplegic Marine is di...",Action Adventure Fantasy Science Fiction,7.2,11800
1,285,Pirates of the Caribbean: At World's End,"Captain Barbossa, long believed to be dead, ha...",Adventure Fantasy Action,6.9,4500
2,206647,Spectre,A cryptic message from Bond’s past sends him o...,Action Adventure Crime,6.3,4466
3,49026,The Dark Knight Rises,Following the death of District Attorney Harve...,Action Crime Drama Thriller,7.6,9106
4,49529,John Carter,"John Carter is a war-weary, former military ca...",Action Adventure Science Fiction,6.1,2124
...,...,...,...,...,...,...
4798,9367,El Mariachi,El Mariachi just wants to play his guitar and ...,Action Crime Thriller,6.6,238
4799,72766,Newlyweds,A newlywed couple's honeymoon is upended by th...,Comedy Romance,5.9,5
4800,231617,"Signed, Sealed, Delivered","""Signed, Sealed, Delivered"" introduces a dedic...",Comedy Drama Romance TV Movie,7.0,6
4801,126186,Shanghai Calling,When ambitious New York attorney Sam is sent t...,,5.7,7


In [8]:
def recommend_movies_content(movie_title, top_n=10):
    idx = movies[movies['title'].str.lower() == movie_title.lower()].index
    if len(idx) == 0:
        return "Movie not found. Please check the title."
    idx = idx[0]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)[1:top_n+1]
    movie_indices = [i[0] for i in sim_scores]
    return movies.iloc[movie_indices][['title', 'vote_average']]

# Collaborative Filtering Using SVD
ratings_data = {'userId': np.random.randint(1, 1000, size=len(movies)),
                'movieId': movies['id'],
                'rating': movies['vote_average']}
ratings = pd.DataFrame(ratings_data)
reader = Reader(rating_scale=(0, 10))
data = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']], reader)
trainset, testset = train_test_split(data, test_size=0.2)
model = SVD()
model.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x13d4a83d0>

In [None]:
def recommend_movies_collaborative(user_id, top_n=10):
    movie_ids = movies['id'].values
    predictions = [model.predict(user_id, movie_id).est for movie_id in movie_ids]
    recommended_indices = np.argsort(predictions)[::-1][:top_n]
    return movies.iloc[recommended_indices][['title', 'vote_average']]

#Example Usage
y=input("Enter the movie Name: ")
print("Content-Based Recommendations for", y, " :")
print(recommend_movies_content(y))
print('\n')
print('\n')
print("Collaborative Filtering Recommendations for User 1:")
x=float(input("Enter the movie rating: "))
print(recommend_movies_collaborative(x))


Content-Based Recommendations for  Avatar  :
                             title  vote_average
3604                     Apollo 18           5.0
2130                  The American           5.8
634                     The Matrix           7.9
1341          The Inhabited Island           5.3
529               Tears of the Sun           6.4
1610                         Hanna           6.5
311   The Adventures of Pluto Nash           4.4
847                       Semi-Pro           5.4
775                      Supernova           4.9
2628           Blood and Chocolate           5.4




Collaborative Filtering Recommendations for User 1:
                            title  vote_average
4662               Little Big Top          10.0
4247        Me You and Five Bucks          10.0
4045        Dancer, Texas Pop. 81          10.0
3992                    Sardaarji           9.5
2970           There Goes My Baby           8.5
1881     The Shawshank Redemption           8.5
2760                    