In [37]:
import pandas as pd
import requests
from decouple import config
import scipy.sparse as sp
from sklearn.feature_extraction.text import CountVectorizer,TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

#### Get_data() is used to fetch the data about the movies and return the dataset with it’s attributes as the result for further preprocessing.
dataset columns are:
- title
- description
- director(s)
- cast
- genre

In [38]:
def get_data():
    movie_data=pd.read_csv('./datasets/english_movies.csv')
    movie_data['title']=movie_data['title'].str.lower()
    return movie_data

In [39]:
df=get_data()
df.head()

Unnamed: 0,tmdb_id,title,description,director,cast,genres,keywords
0,19995,avatar,"In the 22nd century, a paraplegic Marine is di...",James Cameron,"Sam Worthington,Zoe Saldana,Sigourney Weaver","Action,Adventure,Fantasy,Sci-Fi","culture clash,future,space war,space colony,so..."
1,285,pirates of the caribbean: at world's end,"Captain Barbossa, long believed to be dead, ha...",Gore Verbinski,"Johnny Depp,Orlando Bloom,Keira Knightley","Adventure,Fantasy,Action","ocean,drug abuse,exotic island,east india trad..."
2,206647,spectre,A cryptic message from Bond’s past sends him o...,Sam Mendes,"Daniel Craig,Christoph Waltz,Léa Seydoux","Action,Adventure,Crime","spy,based on novel,secret agent,sequel,mi6,bri..."
3,49026,the dark knight rises,Following the death of District Attorney Harve...,Christopher Nolan,"Christian Bale,Michael Caine,Gary Oldman","Action,Crime,Drama,Thriller","dc comics,crime fighter,terrorist,secret ident..."
4,49529,john carter,"John Carter is a war-weary, former military ca...",Andrew Stanton,"Taylor Kitsch,Lynn Collins,Samantha Morton","Action,Adventure,Sci-Fi","based on novel,mars,medallion,space travel,pri..."


In [40]:
df.shape

(3455, 7)

#### Combine_data() drops the columns not required for feature extraction and then combines the cast and genres, finally returning the combine column as the result of this function.

In [52]:
def combine_data(data):
    data_recommend = data.drop(columns=[ 'tmdb_id','title','description','keywords'])
    data_recommend['combine'] = data_recommend[data_recommend.columns[0:3]].apply(lambda x: ','.join(x.dropna().astype(str)),axis=1)
    data_recommend = data_recommend.drop(columns=['director','cast','genres'])
    return data_recommend

#### Transform_data() takes the value returned by combine_data() and the plot column from get_data() and applies CountVectorizer and TfidfVectorizer respectively and calculates the Cosine values.

In [42]:
def transform_data(data_combine, data):
    count = CountVectorizer(stop_words='english')
    count_matrix = count.fit_transform(data_combine['combine'])

    tfidf = TfidfVectorizer(stop_words='english',token_pattern=u'([a-zA-Z-/]{1,})')
    tfidf_matrix = tfidf.fit_transform(data['description'])

    combine_sparse = sp.hstack([count_matrix, tfidf_matrix], format='csr')
    
    cosine_sim = cosine_similarity(combine_sparse, combine_sparse)
    
    return cosine_sim

#### Recommend_movies() takes four parameters.
title : Name of the movie
data : Return value of get_data()
combine : Return value of combine_data()
transform : Return value of transform_data()

In [43]:
def recommend_movies(title, data, combine, transform):

    indices = pd.Series(data.index, index = data['title'])
    index = indices[title]

    sim_scores = list(enumerate(transform[index]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:11]
    
    movie_indices = [i[0] for i in sim_scores]

    movie_id = data['tmdb_id'].iloc[movie_indices]
    movie_title = data['title'].iloc[movie_indices]
    movie_genres = data['genres'].iloc[movie_indices]

    recommendation_data = pd.DataFrame(columns=['movie_id','title','genres'])

    recommendation_data['movie_id']=movie_id
    recommendation_data['title'] = movie_title
    recommendation_data['genres'] = movie_genres

    return recommendation_data

#### Result() takes a movie’s title as input and returns the top 10 recommendations

In [44]:
def results(movie_name):
    movie_name = movie_name.lower()
    
    movie_df = get_data()
    combine_result = combine_data(movie_df)
    transform_result = transform_data(combine_result,movie_df)
    
    if movie_name not in movie_df['title'].unique():
        return 'Movie not in Database'
    
    else:
        recommendations = recommend_movies(movie_name, movie_df, combine_result, transform_result)
        df=pd.DataFrame(recommendations.to_dict('records'))
        df['movie_image']=df['movie_id'].apply(lambda x: get_movie_image(x))
        return df.to_dict(orient='records')

In [45]:
def get_movie_image(movie_id):
    response = requests.get('https://api.themoviedb.org/3/movie/{}?api_key={}'.format(movie_id,config('API_KEY')))
    data_dict=response.json()
    return 'https://image.tmdb.org/t/p/original'+data_dict['poster_path']

In [53]:
results('interstellar')

[{'movie_id': 286217,
  'title': 'the martian',
  'genres': 'Drama,Adventure,Sci-Fi',
  'movie_image': 'https://image.tmdb.org/t/p/original/5BHuvQ6p9kfc091Z8RiFNhCwL4b.jpg'},
 {'movie_id': 686,
  'title': 'contact',
  'genres': 'Drama,Sci-Fi,Mystery',
  'movie_image': 'https://image.tmdb.org/t/p/original/bCpMIywuNZeWt3i5UMLEIc0VSwM.jpg'},
 {'movie_id': 27205,
  'title': 'inception',
  'genres': 'Action,Thriller,Sci-Fi,Mystery,Adventure',
  'movie_image': 'https://image.tmdb.org/t/p/original/edv5CZvWj09upOsy2Y6IwDhK8bt.jpg'},
 {'movie_id': 11170,
  'title': 'we are marshall',
  'genres': 'Drama',
  'movie_image': 'https://image.tmdb.org/t/p/original/5PSiExbg6Fm8MiPJOikBCOcZFnd.jpg'},
 {'movie_id': 13001,
  'title': 'stargate: the ark of truth',
  'genres': 'Adventure,Sci-Fi',
  'movie_image': 'https://image.tmdb.org/t/p/original/wsKsWoRxhCuw2dczg1rx9fUAhIp.jpg'},
 {'movie_id': 11411,
  'title': 'superman iv: the quest for peace',
  'genres': 'Action,Adventure,Sci-Fi',
  'movie_image': '

In [51]:
results('interstellar')

[{'movie_id': 811,
  'title': 'silent running',
  'genres': 'Adventure,Drama,Sci-Fi',
  'movie_image': 'https://image.tmdb.org/t/p/original/uWoj7EfHBprcssXUzCCWeI383Tx.jpg'},
 {'movie_id': 8981,
  'title': 'dear frankie',
  'genres': 'Drama,Family',
  'movie_image': 'https://image.tmdb.org/t/p/original/5YRDvaBWBbhTKBYYc7BoOWdW01N.jpg'},
 {'movie_id': 95,
  'title': 'armageddon',
  'genres': 'Action,Thriller,Sci-Fi,Adventure',
  'movie_image': 'https://image.tmdb.org/t/p/original/eTM3qtGhDU8cvjpoa6KEt5E2auU.jpg'},
 {'movie_id': 39254,
  'title': 'real steel',
  'genres': 'Action,Sci-Fi,Drama',
  'movie_image': 'https://image.tmdb.org/t/p/original/4GIeI5K5YdDUkR3mNQBoScpSFEf.jpg'},
 {'movie_id': 2900,
  'title': "the astronaut's wife",
  'genres': 'Drama,Sci-Fi,Thriller',
  'movie_image': 'https://image.tmdb.org/t/p/original/54RGJ6INW9ERG8IaalmMKbbDgwH.jpg'},
 {'movie_id': 869,
  'title': 'planet of the apes',
  'genres': 'Thriller,Sci-Fi,Action,Adventure',
  'movie_image': 'https://imag