In [27]:
#importing the required libraries 
import numpy as np # linear algebra
import pandas as pd # data processing, read CSV file 
import ast
from sklearn.feature_extraction.text import CountVectorizer #Convert a collection of text to a matrix of token counts
from sklearn.metrics.pairwise import cosine_similarity 

# function to get the name of the value from the string of dictionary
def convert(text):
    nameList = []
    for i in ast.literal_eval(text): #(ast.literal_eval()) converts string of list to a list
        nameList.append(i['name']) 
    return nameList 

#function to get the director of the movie 
def get_movie_director(text):
    directorList = []
    for i in ast.literal_eval(text):
        if i['job'] == 'Director':
            directorList.append(i['name'])
    return directorList 


#function to remove the spaces between words
def remove(text):
    list = []
    for i in text:
        list.append(i.replace(" ",""))
    return list

#funciton to remove unnecessary data from the dataset for further calculation
def preprocess(movies):
    #Removing the movies which do not have description of it
    movies.dropna(inplace=True)

    #Applying the convert function to get the movie genre from the string of dictionary    
    movies['genres'] = movies['genres'].apply(convert)
    
    #Applying the convert function to get the values for other columns
    movies['keywords'] = movies['keywords'].apply(convert)
    movies['cast'] = movies['cast'].apply(convert)
    
    #Applying the get_movie_director function to get the movie director
    movies['crew'] = movies['crew'].apply(get_movie_director)

    #Applying the remove function to remove space between words
    movies['cast'] = movies['cast'].apply(remove)
    movies['crew'] = movies['crew'].apply(remove)
    movies['genres'] = movies['genres'].apply(remove)
    movies['keywords'] = movies['keywords'].apply(remove)
    
    movies['overview'] = movies['overview'].apply(lambda x:x.split())

    #Concating the columns to a single column in the dataframe
    movies['details'] = movies['overview'] + movies['genres'] + movies['keywords'] + movies['cast'] + movies['crew']

    #Removing the unnnecessary columns from the dataframe
    new_movies = movies.drop(columns=['overview','genres','keywords','cast','crew'])

    new_movies['details'] = new_movies['details'].apply(lambda x: " ".join(x))
    
    return new_movies

#Using panda library to read the csv files
movies = pd.read_csv('movies.csv')
credits = pd.read_csv('credits.csv') 

#Merging the two csv files into one using the commmon attribute(title)
movies = movies.merge(credits,on='title')

#Extracting only the important attributes from the dataframe
movies = movies[['movie_id','title','overview','genres','keywords','cast','crew']]

#Removing unnecessary data from the dataframe
new_movies = preprocess(movies)    

cv = CountVectorizer(max_features=5000,stop_words='english')

vector = cv.fit_transform(new_movies['details']).toarray()

movie_similarity = cosine_similarity(vector)

# function to find ten similar movies to the movie entered
def recommend(movie):
    # if a movie name that is not present in the list is entered, it throws an exception
    try:
        #Comparing the entered movie with movie titles in the list and finding its movie_id
        movie_id = new_movies[new_movies['title'] == movie].index[0]
        similarMovies = sorted(list(enumerate(movie_similarity[movie_id])),reverse=True,key = lambda x: x[1])
        
        print('\n\n--------------------------------------')
        print('The recommended movies for you are:')
        print('--------------------------------------\n')
        a = 1
        
        for i in similarMovies[1:11]:
            print(a,". ", new_movies.iloc[i[0]].title)
            a+=1
        
        print('\n--------------------------------------')
    except:
        print("\nThe movie name you entered is incorrect")

movie = input('Enter the name of movie you want recommendation for \n')       
recommend(movie)


# dumping pickle file of movies list and similarity of movies list for displaying in UI 
import pickle

pickle.dump(new_movies,open('movies_list.pkl','wb'))
pickle.dump(movie_similarity,open('movie_similarity.pkl','wb'))

Enter the name of movie you want recommendation for 
The Dark Knight Rises


--------------------------------------
The recommended movies for you are:
--------------------------------------

1 .  The Dark Knight
2 .  Batman Begins
3 .  Batman
4 .  Batman Returns
5 .  Batman
6 .  Batman Forever
7 .  Batman & Robin
8 .  Batman: The Dark Knight Returns, Part 2
9 .  Slow Burn
10 .  Nighthawks

--------------------------------------
