In [None]:
import numpy as np
import pandas as pd

In [None]:
movies=pd.read_csv('../input/tmdb-movie-metadata/tmdb_5000_movies.csv')
credits=pd.read_csv('../input/tmdb-movie-metadata/tmdb_5000_credits.csv')

In [None]:
movies.head()

In [None]:
credits.head()

In [None]:
movies=movies.merge(credits, on='title')

In [None]:
movies.head()

In [None]:
# UNWANTED COLUMNS
# budget
# homepage
# id
# original_language
# original_title
# popularity
# production_comapny
# production_countries
# release-date(not sure)

In [None]:
movies = movies[['movie_id','title','overview','genres','keywords','cast','crew']]

In [None]:
movies.head()

In [None]:
movies.isnull().sum()

In [None]:
movies.dropna(inplace=True)

In [None]:
movies.duplicated().sum()

In [None]:
movies.iloc[0].genres

In [None]:
import ast
def convert(object):
    list=[]
    for i in ast.literal_eval(object):
        list.append(i['name'])
    return list

In [None]:
movies['genres']=movies['genres'].apply(convert)

In [None]:
movies.head()

In [None]:
movies['keywords']=movies['keywords'].apply(convert)

In [None]:
movies.head()

In [None]:
import ast
def convert3(obj):
    list=[]
    count=0
    for i in ast.literal_eval(obj):
        if count != 3:
            list.append(i['name'])
            count+=1
        else:
            break
    return list

In [None]:
movies['cast'] = movies['cast'].apply(convert3)

In [None]:
movies.head()

In [None]:
def director(text):
    L = []
    for i in ast.literal_eval(text):
        if i['job'] == 'Director':
            L.append(i['name'])
    return L 

In [None]:
movies['crew'] = movies['crew'].apply(director)

In [None]:
movies.head()

In [None]:
movies['overview']=movies['overview'].apply(lambda x:x.split())

In [None]:
movies.head()

In [None]:
movies['genres']=movies['genres'].apply(lambda x:[i.replace(" ","") for i in x])
movies['keywords']=movies['keywords'].apply(lambda x:[i.replace(" ","") for i in x])
movies['crew']=movies['crew'].apply(lambda x:[i.replace(" ","") for i in x])
movies['cast']=movies['cast'].apply(lambda x:[i.replace(" ","") for i in x])

In [None]:
movies.head()

In [None]:
movies['tags'] = movies['overview'] + movies['genres'] + movies['keywords'] + movies['cast'] + movies['crew']

In [None]:
movies.head()

In [None]:
movies=movies[['movie_id','title','tags']]

In [None]:
movies.head()

In [None]:
movies['tags']=movies['tags'].apply(lambda x:" ".join(x))

In [None]:
movies.head()

In [None]:
movies['tags']=movies['tags'].apply(lambda x:x.lower())

In [None]:
movies.head()

# Vectorization 
### Bag of Words Technique

In [None]:
from sklearn.feature_extraction.text import CountVectorizer

In [None]:
v = CountVectorizer(max_features =5000, stop_words='english')

In [None]:
vectors=v.fit_transform(movies['tags']).toarray()

In [None]:
vectors

In [None]:
vectors[0]

In [None]:
v.get_feature_names()

# Steming

In [None]:
import nltk
from nltk.stem.porter import PorterStemmer

In [None]:
ps=PorterStemmer()

In [None]:
def stem(text):
    li=[]
    for i in text.split():
        li.append(ps.stem(i))
    string=" ".join(li)
    return string
    

In [None]:
movies['tags']=movies['tags'].apply(stem)

# Measuring Cosine Distance b/w vectors

In [None]:
from sklearn.metrics.pairwise import cosine_similarity

In [None]:
similarity =cosine_similarity(vectors)

In [None]:
similarity[0]

In [None]:
def recommend(movie):
    movie_index=movies[movies['title']==movie].index[0]
    distances=similarity[movie_index]
    movies_list=sorted(list(enumerate(distances)), reverse=True ,key=lambda x:x[1])[1:6]
    for i in movies_list:
         print(movies.iloc[i[0]].title)
       
    

In [None]:
recommend('Avatar')

In [None]:
movies.head()

# Exporting to Create GUI


In [None]:
import pickle

In [None]:
pickle.dump(movies.to_dict(),open('movies.pkl','wb'))

In [None]:
pickle.dump(similarity,open('similarity.pkl','wb'))

## Creating A Streamlit App Using The Movies DataFrame and Cosine Similarity

### Create a python file and copy the below code and run to see the Web APP
#### and please use your own api key to get the posters i am not hiding my api key

In [None]:
!pip install streamlit
import streamlit as st
import requests
import pickle

@st.cache
def get_posters(movie_id):
    data=requests.get("https://api.themoviedb.org/3/movie/{}?api_key=8265bd1679663a7ea12ac168da84d2e8&language=en-US".format(movie_id))
    response=data.json()
    poster_path = response['poster_path']
    
    return "https://image.tmdb.org/t/p/w500/" + poster_path

def recommend(movie):
    movie_index = movies[movies['title']==movie].index[0]
    distances=similarity[movie_index]
    movies_list=sorted(list(enumerate(distances)), reverse=True ,key=lambda x:x[1])[1:6]
    recommended=[]
    movie_posters=[]
    for i in movies_list:
        movie_id=movies.iloc[i[0]].movie_id
        recommended.append((movies.iloc[i[0]].title))
        movie_posters.append(get_posters(movie_id))
    return recommended,movie_posters

st.title('Movies Recommendation System')
movies=pickle.load(open('./movies.pkl','rb'))
movies=pd.DataFrame(movies)
selected = st.selectbox('Enter or Select Movie Name',
movies['title'].values)

similarity=pickle.load(open('./similarity.pkl','rb'))



if st.button('Recommend'):
    names,posters= recommend(selected)
    col1, col2, col3, col4, col5 = st.columns(5)
    with col1:
        st.text(names[0])
        st.image(posters[0])
    with col2:
        st.text(names[1])
        st.image(posters[1])

    with col3:
        st.text(names[2])
        st.image(posters[2])
    with col4:
        st.text(names[3])
        st.image(posters[3])
    with col5:
        st.text(names[4])
        st.image(posters[4])
