# IMPORTING THE DATASET

In [5]:
#All import statements
import random
import pandas as pd
import numpy as np
from ast import literal_eval
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [6]:
#Importing the downloaded dataset
credits=pd.read_csv("credits.csv")
movies=pd.read_csv("movies.csv")

In [22]:
#Merging the dataset with the required columns
credits.columns=["id","title","cast","crew"]
movies=movies.merge(credits,on="id")

# CAST , DIRECTOR , KEYWORDS & GENRES WILL BE OUR FEATURES OF THE MODEL

In [8]:
features=["cast","crew","keywords","genres"]
#converting the list of strings to dictionary since it is more safe
for x in features:
    movies[x]=movies[x].apply(literal_eval)

In [9]:
#Function to find director name
def director(x):
    for i in x:
        if (i["job"]=="Director"):
            return i["name"]
    return np.nan

#Function to find main 2 actors of the movie
def get2(x):
    if isinstance(x,list):
        name=[i["name"] for i in x]
        if(len(name)>2):
            name=name[:2]
            return name
    return []
    

movies["director"]=movies["crew"].apply(director)
features=["cast","keywords","genres"]
for x in features:
    movies[x]=movies[x].apply(get2)

In [11]:
movies.rename(columns={"original_title":"title"},inplace=True)

# CLEANING AND PREPROCESSING THE DATA

In [13]:
#Creating a function which will convert all strings to lower case and without spaces
def clean(r):
    if isinstance(r, list):
        return [str.lower(i.replace(" ", "")) for i in r]
    else:
        if isinstance(r, str):
            return str.lower(r.replace(" ", ""))
        else:
            return ""

#Applying the above created function to all the features since we'll be using these features for the recommendation
features = ['cast', 'keywords', 'director', 'genres']
for x in features:
    movies[x] = movies[x].apply(clean)

In [14]:
#Creating a metadata column 
def create_meta(features):
    return ' '.join(features['keywords']) + ' ' + ' '.join(features['cast']) + ' ' + features['director'] + ' ' + ' '.join(features['genres'])


movies["meta"] = movies.apply(create_meta, axis=1)

In [15]:
#This model recommends movies based on the features metadata
#Now we convert the metadata into a vector for comparison and then use cosine similarity to find a similarity score and map the movies in descending order of similarity score

vectorizer = CountVectorizer(stop_words="english")
matrix = vectorizer.fit_transform(movies["meta"])

cosine_sim2 = cosine_similarity(matrix,matrix) 

movies = movies.reset_index()
indices = pd.Series(movies.index, index=movies['title']).drop_duplicates()

(4803, 8469)
(4803, 4803)


# COLLECTION OF THE RECOMMENDATIONS AND PRINTING IT

In [17]:
#This function takes title of movie and and the similarity function as input

def get_rec(title, cosine_sim):
    idx = indices[title]
    similarity_scores = list(enumerate(cosine_sim[idx]))
    similarity_scores= sorted(similarity_scores, key=lambda x: x[1], reverse=True)
    similarity_scores= similarity_scores[1:6]

    movies_indices = [ind[0] for ind in similarity_scores]
    movierecs = movies["title"].iloc[movies_indices]
    return movierecs.tolist()


In [21]:
#Inputting the 20 movies and returning 5 recommendations
print("-------------------------------Contend Based Movie Recommendation System-------------------------------")
print("Enter 20 movie names based off which the recommendations will be given")

set1=set()
set2=set()

#Input the movie names
for i in range (20):
    s=input("Enter movie name: ")
    set1.add(s)

#Save the recommendations in a set
for i in range (5):
    l=get_rec(i,cosine_sim2)
    for j in l:
        set2.add(j)

#Do set2-set1 to remove the inputted movie names just incase
setfinal=set2.difference(set1)

#Now we select 5 random movies from this final set of recommendations
finalrec=random.sample(random.sample(setfinal,10),5)
print("\n")

print("\033[1mThese are movies we recommend to you-\033[0m")
for i in finalrec:
    print(i)

-------------------------------Contend Based Movie Recommendation System-------------------------------
Enter 20 movie names based off which the recommendations will be given
Enter movie name: The Dark Knight Rises
Enter movie name: The Avengers
Enter movie name: Spectre
Enter movie name: Avatar
Enter movie name: Takers






















[1mThese are movies we recommend to you-[0m
John Carter
The Losers
Batman Begins
Pirates of the Caribbean: The Curse of the Black Pearl
The Martian
