# Movie Recommendation System

### Objective : to create a movie recommendation system 

#### Import required librries

In [None]:
import numpy as np
import pandas as pd

In [None]:
df = pd.read_csv('cleaned_df.csv')

In [None]:

df.head(60)


In [None]:
df.shape

In [None]:
df.tail(10)

In [None]:
df.describe()

In [None]:
df.info()

In [None]:
df.isnull().sum()

In [None]:
# select the features 
new_feature = ['genres','keywords','tagline','director']
print(new_feature)

In [None]:
# replacing null values with null string
for feature in new_feature:
    df[feature] = df[feature].fillna('')

In [None]:
# new_feature.isnull().sum()

In [None]:
new_feature

In [None]:
# combining all selected features
new_feature = df['genres']+' '+df['keywords']+' '+df['tagline']+' '+df['director']

In [None]:
print(new_feature)

In [None]:
# converting the text data to feature vectors

from sklearn.feature_extraction.text import TfidfVectorizer

vectorizer = TfidfVectorizer()

In [None]:
feature_vectors = vectorizer.fit_transform(new_feature)

In [None]:
print(feature_vectors)

## To build Recommendation System

# i)

## Cosine Similarities

In [None]:
import difflib
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [None]:
# getting the similarity scores using cosine similarity

similarity = cosine_similarity(feature_vectors)

In [None]:
print(similarity)

In [None]:
print(similarity.shape)

In [None]:
# getting the movie name from the user

movie_name = input(' Enter your favourite movie name : ')

In [None]:
# creating a list with all the movie names given in the dataset

list_of_all_titles = df['title'].tolist()
# print(list_of_all_titles)

In [None]:
# finding the close match for the movie name given by the user

find_close_match = difflib.get_close_matches(movie_name, list_of_all_titles)
print(find_close_match)

In [None]:
close_match = find_close_match[0]
print(close_match)

In [None]:
# finding the index of the movie with title

index_of_the_movie = df[df.title == close_match]['index'].values[0]
print(index_of_the_movie)

In [None]:
# getting a list of similar movies

similarity_score = list(enumerate(similarity[index_of_the_movie]))
# print(similarity_score)

In [None]:
len(similarity_score)

In [None]:
#  sorting the movies based on their similarity score

sorted_similar_movies = sorted(similarity_score, key = lambda x:x[1],reverse=True)
# print(sorted_similar_movies)

In [None]:
df.isnull

In [None]:
print('Movies suggested for you : \n')

i = 1

for movie in sorted_similar_movies:
  index = movie[0]
  title_from_index = df[df.index==index]['title'].values[0]
  if (i<25):
    print(i, '.',title_from_index)
    i+=1
    
    

### Recommendation System

In [None]:
movie_name = input(' Enter your favourite movie name : ')

list_of_all_titles = df['title'].tolist()

find_close_match = difflib.get_close_matches(movie_name, list_of_all_titles)

close_match = find_close_match[0]

index_of_the_movie = df[df.title == close_match]['index'].values[0]

similarity_score = list(enumerate(similarity[index_of_the_movie]))

sorted_similar_movies = sorted(similarity_score, key = lambda x:x[1], reverse = True)

print('Movies suggested for you : \n')

i = 1

for movie in sorted_similar_movies:
  index = movie[0]
  title_from_index = df[df.index==index]['title'].values[0] #df[a][b]
  if (i<11):
    print(i, '.',title_from_index)
    i+=1

### Conlclusion :
The result is a list of recommended movies that are considered similar to the user's favorite movie, with the most similar ones appearing at the top of the list. The number of recommendations displayed is limited to the first 30 similar movies. The user can explore these recommendations to find movies that align with their preferences based on the similarity metric used.

In [None]:
df.columns

# ii)

In [None]:
# from surprise import Dataset, Reader, SVD
# from surprise.model_selection import train_test_split
# from surprise import accuracy

In [None]:
# Create a Surprise Reader object to specify the rating scale (e.g., from 1 to 5).
reader = Reader(rating_scale=(1, 5))

# Load the data into a Surprise Dataset object.
data = Dataset.load_from_df(data[['index', 'id', 'vote_average']], reader)

# Split the data into training and testing sets (80% for training, 20% for testing).
trainset, testset = train_test_split(data, test_size=0.2, random_state=42)

# Initialize the SVD algorithm (you can tune hyperparameters here).
model = SVD(n_factors=100, n_epochs=20, random_state=42)

# Fit the model on the training data.
model.fit(trainset)

# Make predictions on the test set.
predictions = model.test(testset)

# Calculate RMSE (Root Mean Squared Error) as an evaluation metric.
rmse = accuracy.rmse(predictions)
print(f'RMSE: {rmse}')

# Let's make recommendations for a specific user (e.g., user with ID 1).
user_id_to_recommend_to = 1
movies_to_recommend = []

# Get a list of all movie IDs.
all_movie_ids = data.df['id'].unique()

# For each movie ID, predict the rating the user would give and store it.
for movie_id in all_movie_ids:
    predicted_rating = model.predict(user_id_to_recommend_to, movie_id).est
    movies_to_recommend.append((movie_id, predicted_rating))
    
# Sort the recommendations by predicted rating in descending order.
movies_to_recommend.sort(key=lambda x: x[1], reverse=True)

# Print the top N movie recommendations (e.g., top 10).
top_n = 10
print(f'Top {top_n} Movie Recommendations for User {user_id_to_recommend_to}:')
for i, (movie_id, predicted_rating) in enumerate(movies_to_recommend[:top_n], start=1):
    print(f'{i}. Movie ID: {movie_id}, Predicted Rating: {predicted_rating}')

In [None]:
# Create a user-item matrix where rows represent users and columns represent movies.
user_item_matrix = pd.pivot_table(df, values='vote_average', index='index', columns='id')

In [None]:
# Fill missing values with 0 (assuming missing ratings mean no interaction).
user_item_matrix = user_item_matrix.fillna(0)

In [None]:
# Convert the user-item matrix to a NumPy array.
user_item_matrix = user_item_matrix.values

In [None]:
# Calculate the similarity matrix (e.g., using Pearson correlation).
similarity_matrix = np.corrcoef(user_item_matrix)

In [None]:
# Define a function to get movie recommendations for a user.
def get_recommendations(user_id, user_item_matrix, similarity_matrix, top_n=10):
    user_ratings = user_item_matrix[user_id]
    
    # Find movies the user has not rated (rated as 0).
    unrated_movies = np.where(user_ratings == 0)[0]
    
    # Calculate predicted ratings for unrated movies based on similar users' ratings.
    predicted_ratings = np.dot(similarity_matrix[user_id], user_item_matrix)
    
    # Sort movies by predicted rating in descending order.
    top_movie_indices = np.argsort(predicted_ratings)[::-1]
    
    # Filter out movies that the user has already rated.
    top_movie_indices = [idx for idx in top_movie_indices if idx in unrated_movies]
    
    # Return the top N movie recommendations.
    top_n_movies = top_movie_indices[:top_n]
    return top_n_movies

# Example usage: Get recommendations for a user (user with ID 1).
user_id_to_recommend_to = 1
recommendations = get_recommendations(user_id_to_recommend_to, user_item_matrix, similarity_matrix)
print(f'Top 10 Movie Recommendations for User {user_id_to_recommend_to}:')
print(recommendations)

### Conclusion :
The result of running the code is the list of indices of the top N movie recommendations for the specified user. This list of indices represents the movies that are most likely to be of interest to the user based on their past ratings and the ratings of similar users.