In [1]:
import pandas as pd
import numpy as np
import pickle

In [2]:
new_movies = pd.read_csv('new_movies.csv')
new_movies.head()

Unnamed: 0,title,combined
0,Avengers: Infinity War,the avengers and their allies must be willing ...
1,Black Panther,"t'challa, heir to the hidden but advanced king..."
2,Deadpool 2,foul-mouthed mutant mercenary wade wilson (a.k...
3,Bohemian Rhapsody,the story of the legendary british rock band q...
4,A Quiet Place,"in a post-apocalyptic world, a family is force..."


In [3]:
from tmdbv3api import TMDb,Movie
tmdb = TMDb()
tmdb_movie = Movie() 
tmdb.api_key = "API Key"
tmdb.language = 'en'
tmdb.debug = True

In [4]:
def get_movie_id(x):
    id_ = []
    search = tmdb_movie.search(x)
    for res in search:
        id_.append(res.id)
    try:
        return(id_[0])
    except:
        return np.NaN
    

In [5]:
#getting movie id for new movies
new_movies['movie_id'] = new_movies['title'].apply(get_movie_id)

In [6]:
#dropping missing values
new_movies.dropna(inplace = True)

In [7]:
new_movies['movie_id'] = new_movies['movie_id'].apply(lambda x:int(x))

In [8]:
old_movies = pd.read_csv('new_data.csv')
old_movies.head()

Unnamed: 0,movie_id,title,combined
0,19995,Avatar,"in the 22nd century, a paraplegic marine is di..."
1,285,Pirates of the Caribbean: At World's End,"captain barbossa, long believed to be dead, ha..."
2,206647,Spectre,a cryptic message from bond’s past sends him o...
3,49026,The Dark Knight Rises,following the death of district attorney harve...
4,49529,John Carter,"john carter is a war-weary, former military ca..."


In [9]:
#combining the dataframes and saiving it as a dictionary to be used in flask
new_movies = new_movies[['movie_id','title','combined']]
total_movies = old_movies.append(new_movies,ignore_index=True)
movie_dict = total_movies.to_dict()

with open('movie_dict5.pkl','wb') as f:
    pickle.dump(movie_dict,f)

## Text preprocessing

In [10]:
from sklearn.feature_extraction.text import TfidfVectorizer
tfv = TfidfVectorizer(min_df=3,max_features = None,strip_accents='unicode',analyzer='word',token_pattern=r'\w{1,}',
                     ngram_range=(1,3),stop_words='english')

In [11]:
matrix = tfv.fit_transform(total_movies['combined'])

## Model building

In [13]:
from sklearn.metrics.pairwise import sigmoid_kernel
sig = sigmoid_kernel(matrix,matrix)

In [14]:
def recommender(movie):
    movie_index = total_movies[total_movies['title'] == movie].index[0]
    distance =sig[movie_index]
    movie_list = sorted(list(enumerate(distance)),reverse = True,key = lambda x:x[1])[1:7]
    
    for i in movie_list:
        print(total_movies.iloc[i[0]].title)

In [48]:
#testing the recommender engine
recommender('Man of Steel')

Batman v Superman: Dawn of Justice
Zack Snyder's Justice League
Dragonball Evolution
Beastmaster 2: Through the Portal of Time
The Chronicles of Riddick
X-Men: Days of Future Past
