In [1]:
import pandas as pd
import requests
from  decouple import config
import ast
import time
import scipy.sparse as sp
from sklearn.feature_extraction.text import CountVectorizer,TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [15]:
def get_data():
    movie_data=pd.read_csv('../all_movies_data.csv')
    movie_data['name']=movie_data['name'].str.lower()
    return movie_data

In [16]:
df=get_data()
df.head()

Unnamed: 0,tmdb_id,imdb_id,year,name,rating,description,directors,cast,genres
0,274.0,tt0102926,1991,the silence of the lambs,8.6,A young F.B.I. cadet must receive the help of ...,Jonathan Demme,"Jodie Foster, Anthony Hopkins, Lawrence A. Bon...","Crime, Drama, Thriller"
1,280.0,tt0103064,1991,terminator 2: judgment day,8.6,"A cyborg, identical to the one who failed to k...",James Cameron,"Arnold Schwarzenegger, Linda Hamilton, Edward ...","Action, Sci-Fi"
2,10020.0,tt0101414,1991,beauty and the beast,8.0,A prince cursed to spend his days as a hideous...,"Gary Trousdale, Kirk Wise","Paige O'Hara, Robby Benson, Jesse Corti, Rex E...","Animation, Family, Fantasy"
3,879.0,tt0102057,1991,hook,6.8,"When Captain James Hook kidnaps his children, ...",Steven Spielberg,"Dustin Hoffman, Robin Williams, Julia Roberts,...","Adventure, Comedy, Family"
4,8367.0,tt0102798,1991,robin hood: prince of thieves,6.9,Robin Hood decides to fight back as an outlaw ...,Kevin Reynolds,"Kevin Costner, Morgan Freeman, Mary Elizabeth ...","Action, Adventure, Drama"


In [17]:
df.shape

(9499, 9)

In [39]:
df.isna().sum()

tmdb_id        160
imdb_id        165
year             0
name             0
rating           0
description      0
directors        3
cast             2
genres           0
dtype: int64

In [41]:
df=df.dropna(how='any')
df.shape

(9330, 9)

In [66]:
df.to_csv('all_movie_data.csv',index=False)

In [51]:
def combine_data(data):
    data_recommend = data.drop(columns=['tmdb_id','imdb_id','name','year','rating'],axis=1)
    data_recommend['combine'] = data_recommend[data_recommend.columns[0:4]].apply(lambda x: ','.join(x.dropna().astype(str)),axis=1)
    data_recommend = data_recommend.drop(columns=['description','directors','cast','genres'])
    return data_recommend

In [52]:
combine_res=combine_data(df)

In [53]:
combine_res.loc[0]

combine    A young F.B.I. cadet must receive the help of ...
Name: 0, dtype: object

In [56]:
def transform_data(data_combine, data):
    # count = CountVectorizer(stop_words='english')
    # count_matrix = count.fit_transform(data_combine['combine'])
    
    tfidf = TfidfVectorizer(stop_words='english',token_pattern=u'([a-zA-Z-/]{1,})')
    # tfidf_matrix = tfidf.fit_transform(data['description'])
    tfidf_matrix = tfidf.fit_transform(data_combine['combine'])

    # combine_sparse = sp.hstack([count_matrix, tfidf_matrix], format='csr')
    
    # cosine_sim = cosine_similarity(combine_sparse, combine_sparse)
    cosine_sim = cosine_similarity(tfidf_matrix)
    
    return cosine_sim

In [57]:
transform_res=transform_data(combine_data(df),df)
transform_res

array([[1.        , 0.        , 0.00738633, ..., 0.00688723, 0.00943751,
        0.        ],
       [0.        , 1.        , 0.        , ..., 0.00562545, 0.        ,
        0.        ],
       [0.00738633, 0.        , 1.        , ..., 0.00489646, 0.        ,
        0.02174332],
       ...,
       [0.00688723, 0.00562545, 0.00489646, ..., 1.        , 0.02691821,
        0.02433261],
       [0.00943751, 0.        , 0.        , ..., 0.02691821, 1.        ,
        0.02903748],
       [0.        , 0.        , 0.02174332, ..., 0.02433261, 0.02903748,
        1.        ]])

In [117]:
def recommend_movies(title, data, combine, transform):

    indices = pd.Series(data.index, index = data['name'])
    index = indices[title]

    sim_scores = list(enumerate(transform[index]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:12]
    
    movie_indices = [i[0] for i in sim_scores]

    imdb_id = data['imdb_id'].iloc[movie_indices]
    tmdb_id = data['tmdb_id'].iloc[movie_indices]
    movie_year = data['year'].iloc[movie_indices]
    movie_title = data['name'].iloc[movie_indices]
    movie_genres = data['genres'].iloc[movie_indices]

    recommendation_data = pd.DataFrame(columns=['imdb_id','tmdb_id','year','title','genres'])
    
    recommendation_data['imdb_id'] = imdb_id
    recommendation_data['tmdb_id'] = tmdb_id
    recommendation_data['year'] = movie_year
    recommendation_data['title'] = movie_title
    recommendation_data['genres'] = movie_genres

    return recommendation_data

In [67]:
def get_poster(id):
        response = requests.get('https://api.themoviedb.org/3/movie/{}?api_key={}'.format(id,config('API_KEY')))
        data_dict=response.json()
        return 'https://image.tmdb.org/t/p/original'+data_dict['poster_path']


In [69]:
def results(movie_name):
    movie_name = movie_name.lower()
    
    movie_df = get_data()
    combine_result = combine_data(movie_df)
    transform_result = transform_data(combine_result,movie_df)
    
    if movie_name not in movie_df['name'].unique():
        return 'NA'
    
    else:
        recommendations = recommend_movies(movie_name, movie_df, combine_result, transform_result)
        df=pd.DataFrame(recommendations.to_dict('records'))
        df['poster']=df['tmdb_id'].apply(lambda x: get_poster(x))
        return df.to_dict(orient='records')

In [118]:
results('avatar')

[{'imdb_id': 'tt2262227',
  'tmdb_id': 228326.0,
  'year': 2014,
  'title': 'the book of life',
  'genres': 'Animation, Adventure, Comedy',
  'poster': 'https://image.tmdb.org/t/p/original/aotTZos5KswgCryEzx2rlOjFsm1.jpg'},
 {'imdb_id': 'tt0325703',
  'tmdb_id': 1996.0,
  'year': 2003,
  'title': 'lara croft tomb raider: the cradle of life',
  'genres': 'Action, Adventure, Fantasy',
  'poster': 'https://image.tmdb.org/t/p/original/vzWqkXbqs3EEMi3jgFpiRPgFGlG.jpg'},
 {'imdb_id': 'tt0373024',
  'tmdb_id': 25350.0,
  'year': 2004,
  'title': 'imaginary heroes',
  'genres': 'Comedy, Drama',
  'poster': 'https://image.tmdb.org/t/p/original/tHMgJGkovOFpHi2lnQ9xTrIaR7N.jpg'},
 {'imdb_id': 'tt2015381',
  'tmdb_id': 118340.0,
  'year': 2014,
  'title': 'guardians of the galaxy',
  'genres': 'Action, Adventure, Comedy',
  'poster': 'https://image.tmdb.org/t/p/original/r7vmZjiyZw9rpJMQJdXpjgiCOk9.jpg'},
 {'imdb_id': 'tt4899370',
  'tmdb_id': 424488.0,
  'year': 2017,
  'title': 'megan leavey',
  

### tmdb api reomend movie and test accuracy of my model and actual

In [88]:
url=('https://api.themoviedb.org/3/movie/{}/similar?api_key={}&language=en-US&page=1'
.format(19995,config('API_KEY')))

In [89]:
resp=requests.get(url).json()

In [90]:
for movie in resp['results']:
    print(movie['title'])

The Iron Giant
The X Files: I Want to Believe
DragonHeart
The 6th Day
Species
Universal Soldier
Mad Max Beyond Thunderdome
Cannibal Holocaust
Cannibal Ferox
Antz
The Tree of Life
The Reader
Alien³
Alien Resurrection
The Faculty
The Longest Day
The Gods Must Be Crazy II
Legend
The Deer Hunter
Midway


In [87]:
for movie in resp['results']:
    print(movie['title'])

The Avengers
The Dark Knight Rises
The Hobbit: An Unexpected Journey
The Hunger Games
Inception
Iron Man
The Lord of the Rings: The Fellowship of the Ring
Skyfall
Pirates of the Caribbean: The Curse of the Black Pearl
Iron Man 3
Despicable Me
The Lord of the Rings: The Return of the King
Titanic
2012
The Dark Knight
The Lord of the Rings: The Two Towers
The Amazing Spider-Man
Pirates of the Caribbean: On Stranger Tides
Iron Man 2
Harry Potter and the Philosopher's Stone
Pirates of the Caribbean: Dead Man's Chest
