# Importing required libraries

In [None]:
import numpy as np
import pandas as pd
import difflib
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Data Collection and Pre-Processing

In [None]:
movies_data = pd.read_csv('movies.csv')

# Printing the first 5 rows of the dataframe

In [None]:
movies_data.head()

# Number of rows and columns in the data frame

In [None]:
movies_data.shape

# selecting the relevant features for recommendation

In [None]:
selected_features = ['genres','keywords','tagline','cast','director']
print(selected_features)

# replacing the null valuess with null string

In [None]:
for feature in selected_features:
  movies_data[feature] = movies_data[feature].fillna('')

# Combining all the 5 selected features

In [None]:
combined_features = movies_data['genres']+' '+movies_data['keywords']+' '+movies_data['tagline']+' '+movies_data['cast']+' '+movies_data['director']

In [None]:
print(combined_features)

# converting the text data to feature vectors

In [None]:
vectorizer = TfidfVectorizer()

In [None]:
feature_vectors = vectorizer.fit_transform(combined_features)

In [None]:
print(feature_vectors)

# getting the similarity scores using cosine similarity

In [None]:
similarity = cosine_similarity(feature_vectors)

In [None]:
print(similarity)

In [None]:
print(similarity.shape)

In [None]:
# getting the movie name from the user

In [None]:
movie_name = input(' Enter your favourite movie name : ')

In [None]:
# creating a list with all the movie names given in the dataset

In [None]:
list_of_all_titles = movies_data['title'].tolist()
print(list_of_all_titles)

In [None]:
# finding the close match for the movie name given by the user

In [None]:
find_close_match = difflib.get_close_matches(movie_name, list_of_all_titles)
print(find_close_match)

In [None]:
close_match = find_close_match[0]
print(close_match)

In [None]:
# finding the index of the movie with title

In [None]:
index_of_the_movie = movies_data[movies_data.title == close_match]['index'].values[0]
print(index_of_the_movie)

In [None]:
# getting a list of similar movies

In [None]:
similarity_score = list(enumerate(similarity[index_of_the_movie]))
print(similarity_score)

In [None]:
len(similarity_score)

In [None]:
# sorting the movies based on their similarity score

In [None]:
sorted_similar_movies = sorted(similarity_score, key = lambda x:x[1], reverse = True) 
print(sorted_similar_movies)

In [None]:
# print the name of similar movies based on the index

In [None]:
print('Movies suggested for you : \n')

i = 1

for movie in sorted_similar_movies:
  index = movie[0]
  title_from_index = movies_data[movies_data.index==index]['title'].values[0]
  if (i<30):
    print(i, '.',title_from_index)
    i+=1

# Movie recommendation System

In [None]:
movie_name = input(' Enter your favourite movie name : ')

list_of_all_titles = movies_data['title'].tolist()

find_close_match = difflib.get_close_matches(movie_name, list_of_all_titles)

close_match = find_close_match[0]

index_of_the_movie = movies_data[movies_data.title == close_match]['index'].values[0]

similarity_score = list(enumerate(similarity[index_of_the_movie]))

sorted_similar_movies = sorted(similarity_score, key = lambda x:x[1], reverse = True) 

print('Movies suggested for you : \n')

i = 1

for movie in sorted_similar_movies:
  index = movie[0]
  title_from_index = movies_data[movies_data.index==index]['title'].values[0]
  if (i<30):
    print(i, '.',title_from_index)
    i+=1