In [1]:
import numpy as np
import pandas as pd

In [2]:
credits_df = pd.read_csv("credits.csv")
movies_df = pd.read_csv("movies.csv")

In [3]:
pd.set_option("display.max_columns",None)
pd.set_option("display.max_rows",None)

In [4]:
movies_df=movies_df.merge(credits_df,on="title")

In [5]:
movies_df.shape

(4808, 23)

In [6]:
movies_df=movies_df[['movie_id','title','overview','genres','keywords','cast','crew']]

In [7]:
movies_df.isnull().sum()

movie_id    0
title       0
overview    3
genres      0
keywords    0
cast        0
crew        0
dtype: int64

In [8]:
movies_df.dropna(inplace=True)

In [9]:
movies_df.duplicated().sum()

0

In [10]:
movies_df.iloc[0].genres

'[{"id": 28, "name": "Action"}, {"id": 12, "name": "Adventure"}, {"id": 14, "name": "Fantasy"}, {"id": 878, "name": "Science Fiction"}]'

In [11]:
import ast

In [12]:
def convert(obj):
    L = []
    if isinstance(obj, list):
        L = obj
    else:
        for i in ast.literal_eval(obj):
            L.append(i['name'])
    return L

In [13]:
movies_df["genres"]=movies_df["genres"].apply(convert)
movies_df["keywords"]=movies_df["keywords"].apply(convert)

In [14]:
def convert3(obj):
    L=[]
    counter=0
    for i in ast.literal_eval(obj):
        if counter!=3:
            L.append(i['name'])
            counter+=1
        else:
            break;
    return L

In [15]:
movies_df['cast']=movies_df['cast'].apply(convert3)

In [16]:
def fetch_director(obj):
    L=[]
    for i in ast.literal_eval(obj):
        if i['job']=="Director":
            L.append(i['name'])
    return L

In [17]:
movies_df['crew']=movies_df['crew'].apply(fetch_director)

In [18]:
movies_df['overview'][0]

'In the 22nd century, a paraplegic Marine is dispatched to the moon Pandora on a unique mission, but becomes torn between following orders and protecting an alien civilization.'

In [19]:
movies_df['overview'] = movies_df['overview'].apply(lambda x: x.split() if isinstance(x, str) else x)

In [20]:
movies_df['genres']=movies_df['genres'].apply(lambda x:[i.replace(" ","_") for i in x])
movies_df['keywords']=movies_df['keywords'].apply(lambda x:[i.replace(" ","_") for i in x])
movies_df['cast']=movies_df['cast'].apply(lambda x:[i.replace(" ","_") for i in x])
movies_df['crew']=movies_df['crew'].apply(lambda x:[i.replace(" ","_") for i in x])

In [21]:
movies_df['tags']=movies_df['overview']+movies_df['genres']+movies_df['keywords']+movies_df['cast']+movies_df['crew']

In [22]:
new_df = movies_df[['movie_id','title','tags']]

In [23]:
new_df['tags']=new_df['tags'].apply(lambda x:' '.join(x))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df['tags']=new_df['tags'].apply(lambda x:' '.join(x))


In [24]:
new_df['tags']=new_df['tags'].apply(lambda x:x.lower())
new_df['tags'][0]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df['tags']=new_df['tags'].apply(lambda x:x.lower())


'in the 22nd century, a paraplegic marine is dispatched to the moon pandora on a unique mission, but becomes torn between following orders and protecting an alien civilization. action adventure fantasy science_fiction culture_clash future space_war space_colony society space_travel futuristic romance space alien tribe alien_planet cgi marine soldier battle love_affair anti_war power_relations mind_and_soul 3d sam_worthington zoe_saldana sigourney_weaver james_cameron'

In [25]:
from sklearn.feature_extraction.text import CountVectorizer
cv = CountVectorizer(max_features=5000, stop_words='english')

In [26]:
cv.fit_transform(new_df['tags']).toarray().shape

(4805, 5000)

In [27]:
vectors = cv.fit_transform(new_df['tags']).toarray()
vectors[0]

array([0, 0, 0, ..., 0, 0, 0], dtype=int64)

In [28]:
len(cv.get_feature_names_out())

5000

In [29]:
import nltk

In [30]:
from nltk.stem.porter import PorterStemmer
ps = PorterStemmer()

In [31]:
def stem(text):
    y=[]
    for i in text.split():
        y.append(ps.stem(i))
    return " ".join(y)

In [32]:
new_df['tags']=new_df['tags'].apply(stem)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df['tags']=new_df['tags'].apply(stem)


In [33]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

tfidf = TfidfVectorizer(stop_words='english')

tfidf_matrix = tfidf.fit_transform(new_df['tags'])

cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

indices = pd.Series(new_df.index, index=new_df['title']).drop_duplicates()

def recommend(title):
    idx = indices[title]

    sim_scores = list(enumerate(cosine_sim[idx]))

    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    sim_scores = sim_scores[1:5]

    movie_indices = [i[0] for i in sim_scores]

    return new_df['title'].iloc[movie_indices]


********************************************        ACT BELOW        **********************************************

In [34]:
recommendations = recommend('Iron Man')
for x in recommendations:
    print(x)

Iron Man 2
Iron Man 3
Avengers: Age of Ultron
The Avengers


In [35]:
import requests
from bs4 import BeautifulSoup
def urls(recommendations):
    dd={}
    lt=[]
    for movie in recommendations:
        url = f'https://www.google.com/search?q={movie}%20movie%20poster&tbm=isch'
        response = requests.get(url)
        soup = BeautifulSoup(response.text, 'html.parser')
        img = soup.find_all("img")
        img_url = img[1]['src']
        dd.update({movie:img_url})
        lt.append(img_url)
        #print(f'{movie}: {img_url}')
    return lt
url=urls(recommendations)
#print(url)
#print(lt)
dd = {rec: p for rec, p in zip(recommendations, url)}
print(dd)

{'Iron Man 2': 'https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcSLQnTurXUpA9SJll8p-kjO9h2Qaw2OOMhENMZTIKk91vFbJTn55o7pykjB8Q&s', 'Iron Man 3': 'https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcS6ZCT1aQq8lS2wg99npJF6WuZFQbGRjztRLSx3jRGSZZNBRlGSo0tmqPbRezE&s', 'Avengers: Age of Ultron': 'https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcRJlFGy4VxG5JwbkUBu2ao-O9935ZlKOYsQx23X1WPOBx9chn3sBaR_Abbeui4&s', 'The Avengers': 'https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcSn0Urlx4GSlLheiv8AMkcmK9TT6PmbrWGts4JLAHpkhEefmBAMxnTteh7MgPQ&s'}


In [44]:
from flask import Flask, request, render_template, jsonify, flash
app=Flask(__name__)

@app.route('/', methods=['POST','GET'])
def main():
    if request.method == 'POST':
        title = request.form['inputTitle']
        recommendations = recommend(title)
        pic = urls(recommendations)
    return render_template("mpg.html")

@app.route('/data', methods=['POST','GET'])
def recommended():
    if request.method == 'POST':
        title = request.form['inputTitle']
        recommendations = recommend(title)
        pic = urls(recommendations)
        dd = {rec: p for rec, p in zip(recommendations, pic)}
        return render_template("mpg.html", data=dd,title=title)
    #else:
        #return jsonify({error:'method not supported'})

app.run(host="0.0.0.0")

 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:5000
 * Running on http://192.168.2.3:5000
Press CTRL+C to quit
127.0.0.1 - - [30/Aug/2023 15:44:28] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [30/Aug/2023 15:44:28] "GET /static/style/style.css HTTP/1.1" 304 -
127.0.0.1 - - [30/Aug/2023 15:44:28] "GET /static/scripts/script.js HTTP/1.1" 304 -
127.0.0.1 - - [30/Aug/2023 15:44:28] "GET /static/images/pirates%20of%20the%20caribbean%20-%20at%20worlds%20end.jpg HTTP/1.1" 304 -
127.0.0.1 - - [30/Aug/2023 15:44:28] "GET /static/images/avatar.jpeg HTTP/1.1" 304 -
127.0.0.1 - - [30/Aug/2023 15:44:28] "GET /static/images/spider-man.jpeg HTTP/1.1" 304 -
127.0.0.1 - - [30/Aug/2023 15:44:28] "GET /static/images/tangled.jpg HTTP/1.1" 304 -
127.0.0.1 - - [30/Aug/2023 15:44:28] "GET /static/images/background.jpeg HTTP/1.1" 304 -
127.0.0.1 - - [30/Aug/2023 15:44:35] "POST /data HTTP/1.1" 200 -
127.0.0.1 - - [30/Aug/2023 15:44:35] "GET /static/style/style.css HTTP/1.1" 304 -
127.0.0.1 - - 