In [62]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as py
%matplotlib inline

In [63]:
df=pd.read_csv("Updated_Movie_Dataset.csv")
df = df.rename(columns={df.columns[0]: "index_movie"})


In [64]:
df.columns

Index(['index_movie', 'movie_id', 'movie_name', 'year', 'genre', 'overview',
       'director', 'cast', 'Poster_URL'],
      dtype='object')

In [65]:
df.isna().sum()

index_movie      0
movie_id         0
movie_name       0
year            65
genre            0
overview         0
director         0
cast             0
Poster_URL     884
dtype: int64

In [66]:
df=df[["index_movie","movie_name","genre","overview","Poster_URL"]]


In [67]:
df.head()

Unnamed: 0,index_movie,movie_name,genre,overview,Poster_URL
0,0,Jawan,"Action, Thriller",A high-octane action thriller which outlines t...,https://m.media-amazon.com/images/M/MV5BMGExNG...
1,1,Jaane Jaan,"Crime, Drama, Mystery",A single mother and her daughter who commit a ...,https://m.media-amazon.com/images/M/MV5BMGIzNT...
2,2,Jailer,"Action, Comedy, Crime",A retired jailer goes on a manhunt to find his...,https://m.media-amazon.com/images/M/MV5BMmM3YT...
3,3,Rocky Aur Rani Kii Prem Kahaani,"Comedy, Drama, Family",Flamboyant Punjabi Rocky and intellectual Beng...,https://m.media-amazon.com/images/M/MV5BNjViNW...
4,4,OMG 2,"Comedy, Drama",An unhappy civilian asks the court to mandate ...,https://m.media-amazon.com/images/M/MV5BYzRmMD...


In [68]:
df['tag']=df['overview']+df['genre']
df.drop(columns=["overview","genre"],inplace=True)

In [69]:
df['tag']=df['tag'].apply(lambda x:x.lower())

In [70]:
df.head()

Unnamed: 0,index_movie,movie_name,Poster_URL,tag
0,0,Jawan,https://m.media-amazon.com/images/M/MV5BMGExNG...,a high-octane action thriller which outlines t...
1,1,Jaane Jaan,https://m.media-amazon.com/images/M/MV5BMGIzNT...,a single mother and her daughter who commit a ...
2,2,Jailer,https://m.media-amazon.com/images/M/MV5BMmM3YT...,a retired jailer goes on a manhunt to find his...
3,3,Rocky Aur Rani Kii Prem Kahaani,https://m.media-amazon.com/images/M/MV5BNjViNW...,flamboyant punjabi rocky and intellectual beng...
4,4,OMG 2,https://m.media-amazon.com/images/M/MV5BYzRmMD...,an unhappy civilian asks the court to mandate ...


In [71]:
import nltk

from nltk.stem.porter import PorterStemmer

ps=PorterStemmer()

In [72]:
   def stemming(text):
       y=[]
       for i in text.split():           
           y.append(ps.stem(i))

       return " ".join(y)
       

In [73]:
stemming(df['tag'][0])

'a high-octan action thriller which outlin the emot journey of a man who is set to rectifi the wrong in the society.action, thriller'

In [74]:
stemming(df['tag'][64])

"a modern adapt of the indian epic ramayana which follow the exil princ raghav' journey to rescu hi wife janaki from the raakshash king lankesh.action, adventure, drama"

In [75]:
df.head()

Unnamed: 0,index_movie,movie_name,Poster_URL,tag
0,0,Jawan,https://m.media-amazon.com/images/M/MV5BMGExNG...,a high-octane action thriller which outlines t...
1,1,Jaane Jaan,https://m.media-amazon.com/images/M/MV5BMGIzNT...,a single mother and her daughter who commit a ...
2,2,Jailer,https://m.media-amazon.com/images/M/MV5BMmM3YT...,a retired jailer goes on a manhunt to find his...
3,3,Rocky Aur Rani Kii Prem Kahaani,https://m.media-amazon.com/images/M/MV5BNjViNW...,flamboyant punjabi rocky and intellectual beng...
4,4,OMG 2,https://m.media-amazon.com/images/M/MV5BYzRmMD...,an unhappy civilian asks the court to mandate ...


In [76]:
df['tag'][45]

'an eight-year-old boy is thought to be a lazy trouble-maker, until the new art teacher has the patience and compassion to discover the real problem behind his struggles in school.drama, family'

In [77]:
import string

df['tag'] = df['tag'].str.replace(f"[{string.punctuation}]", "", regex=True)



In [78]:
df['tag'][56]

'a tough police officer sets out to track down and kill an equally tough gangsteraction crime drama'

In [79]:
from sklearn.feature_extraction.text import CountVectorizer
cv=CountVectorizer(max_features=5000,stop_words='english')

In [80]:
vectors=cv.fit_transform(df['tag']).toarray()

In [81]:
cv.get_feature_names_out()

array(['10', '100', '10000', ..., 'zee5', 'zone', 'zoya'],
      shape=(5000,), dtype=object)

In [82]:
from sklearn.metrics.pairwise import cosine_similarity

similarity=cosine_similarity(vectors)


In [83]:
def recommend(movie):
    index = df[df['movie_name'] == movie].index[0]

    distances = sorted(list(enumerate(similarity[index])),reverse=True,key = lambda x: x[1])
    for i in distances[1:6]:
        print(df.iloc[i[0]].movie_name)

    

In [84]:
recommend("Jailer")

Bhuddah Hoga Tera Baap
Dishoom
Radhe
De Dana Dan
Khosla Ka Ghosla!


In [85]:
recommend("3 Idiots")

Auzaar
Fukrey
Chhichhore
Haasil
Ta Ra Rum Pum


In [86]:
import pickle

In [87]:
pickle.dump(df, open("movies.pkl", "wb"))
pickle.dump(similarity, open("similarity.pkl", "wb"))