In [1]:
import numpy as np
import pandas as pd
import difflib 
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity



# Data Collection and Pre-processing

In [2]:
movie_data = pd.read_csv('sample/movies_dataset.csv')

In [None]:
movie_data.head()

In [None]:
# Number of rows and columns

movie_data.shape

In [None]:
movie_data.isnull().sum()

In [None]:
# Selecting the relevant feature for recommendation

selected_feature = ['genres', 'keywords', 'tagline', 'cast', 'director']
print(selected_feature)

In [7]:
# Replacing the null values with null string

for feature in selected_feature:
    movie_data[feature] = movie_data[feature].fillna('')

In [8]:
# Combining all the 5 seleted feature

combined_feature = (
    movie_data['genres'].astype(str) + ' ' +
    movie_data['keywords'].astype(str) + ' ' +
    movie_data['tagline'].astype(str) + ' ' +
    movie_data['cast'].astype(str) + ' ' +
    movie_data['director'].astype(str)
)


In [None]:
# Print combined all feature

print(combined_feature)

In [10]:
# Converting the text data to feature vectors

vectorizer = TfidfVectorizer()

In [11]:
feature_vector = vectorizer.fit_transform(combined_feature)

In [None]:
print(feature_vector)

Cosine Similrity

In [None]:
# Getting the similarity score using cosine similiarity

similiarity = cosine_similarity(feature_vector)
print(similiarity)

In [None]:
# How much similarity a movie with another move

similiarity.shape

In [15]:
# Getting the movie name from user

movie_name = input('Enter your favourite movie name: ')

In [None]:
# Uer inputed movie name 

movie_name

In [None]:
# Creating a list with all the movie names given in the dataset

list_of_all_titles = movie_data['title'].tolist()

print(list_of_all_titles)

In [None]:
# FInding the close match for the move name given by the user

find_close_match = difflib.get_close_matches(movie_name, list_of_all_titles)
print(find_close_match)

In [None]:
close_match = find_close_match[0]
print(close_match)

In [None]:
# FInd the index of the movie with title

index_of_the_movie = movie_data[movie_data.title == close_match]['index'].values[0]
print(index_of_the_movie)

In [None]:
# Getting a list of similar movies

similiarity_score = list(enumerate(similiarity[index_of_the_movie]))
print(similiarity_score)

In [None]:
len(similiarity_score)

In [None]:
# Sorting the movies based on their similiarity score

sorted_similar_movies = sorted(similiarity_score, key = lambda x:x[1], reverse = True)
print(sorted_similar_movies)

In [None]:
# Print the name of similar movies based onn the index

print('Movies suggested for you: \n')

i = 1
for movie in sorted_similar_movies:
    index = movie[0]
    title_from_index = movie_data[movie_data.index == index]['title'].values[0]
    if (i<30):
        print(f'{i}. {title_from_index}')
        i += 1

# Movie Recommendation System

In [None]:
movie_name = input('Enter your favourite movie name: ')

list_of_all_titles = movie_data['title'].tolist()
find_close_match = difflib.get_close_matches(movie_name, list_of_all_titles)
close_match = find_close_match[0]
index_of_the_movie = movie_data[movie_data.title == close_match]['index'].values[0]
similiarity_score = list(enumerate(similiarity[index_of_the_movie]))
sorted_similar_movies = sorted(similiarity_score, key = lambda x:x[1], reverse = True)


print('Movies suggested for you: \n')

i = 1
for movie in sorted_similar_movies:
    index = movie[0]
    title_from_index = movie_data[movie_data.index == index]['title'].values[0]
    if (i<30):
        print(f'{i}. {title_from_index}')
        i += 1