## Loading Libraries

In [2]:
import pandas as pd
import numpy as np

## Loading Preprocessed data

In [49]:
movie_data=pd.read_csv(r"D:\Projects\Content-Based_Movie_Recommendation_System-main\Backend\Data\processed_movie_data.csv")

movie_data.head()
movie_data.shape    # (4809, 3)
movie_data.isna().sum() # 0
movie_data.columns

Index(['movie_id', 'title', 'tags'], dtype='object')

## Recommender System

### Make a Word-dictionary

In [19]:
## Using Count_vectorizer

# from sklearn.feature_extraction.text import CountVectorizer
# cv=CountVectorizer(max_features=10000)
# word_vectors=cv.fit_transform(movie_data["tags"]).toarray()
# cv.get_feature_names()

In [54]:
## Using TFIDF

from sklearn.feature_extraction.text import TfidfVectorizer
tfidf=TfidfVectorizer()
word_vector=tfidf.fit_transform(movie_data["tags"]).toarray()      # Convert words to arr[vectors]

word=tfidf.get_feature_names()
#word                                                              # 33313 of word dictionary

### Calculating Similarity or distance-metrics
* Here we need to calculate similarity of each movie with rest of movies
* In High-Dimensional Space "Eucleadian Distance" is not a good measure so we are using "Cosine Similarity"

In [55]:
from sklearn.metrics.pairwise import cosine_similarity

similarity_matrix=cosine_similarity(word_vector)  # creates a matrix containing matrix of "similarity_score" of each movies with each other
similarity_matrix.shape                           # (4809, 4809)
similarity_matrix

(4809, 4809)

In [57]:
 # write similarity matrix to npy file
np.save(r'D:\Projects\Content-Based_Movie_Recommendation_System-main\Backend\Data\similarity_matrix.npy',similarity_matrix) 

### Recommendation Engine

In [78]:
def recommend(movie_name):
    """ Takes Movie name & Returns top-5 similar movies """

    if movie_name in movie_data["title"].values:                                                 # checks if movie name in data or not
        movie_index=movie_data[movie_data["title"]==(movie_name)].index[0]                       # return movie_Id from movie name
        similarity=similarity_matrix[movie_index]
        recommend_movies=sorted(list(enumerate(similarity)),reverse=True,key=lambda x:x[1])[1:4] # return top-5 most similar(movie_Id,similarity_matrix)

        print("Your Recommended Movies:-")
        for movie in recommend_movies:
            print("Movie-ID: {} : '{}' ; Similarity:{}".format(movie[0],movie_data.iloc[movie[0]].title,round(movie[1],2)))
    else:
        print("Sorry!!! Try with another Movie name")
    print()

In [79]:
## Predict Your Next Movie
recommend("Avatar")
recommend("The Avengers")
recommend("Titanic")
recommend("Toy Story 3")
recommend("Furious 7")
recommend("World War Z")
recommend("The Dark Knight")

Your Recommended Movies:-
Movie-ID: 2409 : 'Aliens' ; Similarity:0.3
Movie-ID: 838 : 'Alien³' ; Similarity:0.28
Movie-ID: 3163 : 'Alien' ; Similarity:0.25

Your Recommended Movies:-
Movie-ID: 7 : 'Avengers: Age of Ultron' ; Similarity:0.54
Movie-ID: 85 : 'Captain America: The Winter Soldier' ; Similarity:0.39
Movie-ID: 26 : 'Captain America: Civil War' ; Similarity:0.39

Your Recommended Movies:-
Movie-ID: 2149 : 'Ghost Ship' ; Similarity:0.12
Movie-ID: 818 : 'Captain Phillips' ; Similarity:0.12
Movie-ID: 104 : 'Poseidon' ; Similarity:0.11

Your Recommended Movies:-
Movie-ID: 343 : 'Toy Story 2' ; Similarity:0.55
Movie-ID: 1547 : 'Toy Story' ; Similarity:0.52
Movie-ID: 1194 : 'Small Soldiers' ; Similarity:0.36

Your Recommended Movies:-
Movie-ID: 204 : 'Fast Five' ; Similarity:0.37
Movie-ID: 99 : 'The Fast and the Furious' ; Similarity:0.35
Movie-ID: 405 : 'The Fast and the Furious: Tokyo Drift' ; Similarity:0.23

Your Recommended Movies:-
Movie-ID: 1710 : 'The Big Short' ; Similarity:

In [81]:
## For Movie recommendation we only require data(movie-ID,title) & similarity-matrix.npy
df=movie_data[['movie_id','title']]
df.to_csv(r"D:\Projects\Content-Based_Movie_Recommendation_System-main\Backend\Data\movie_data_recommendation_engine.csv",index=False)