# Movie recommendation

In [1]:
import pandas as pd
import numpy as np
import nltk
from nltk.corpus import stopwords
import textblob
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
url = 'C:/Users/Hp/OneDrive/Desktop/PYDS/Movie_recommender/imdb_raw.csv'
df = pd.read_csv(url)
df.head(10)

Unnamed: 0,title,director,release_year,runtime,genre,rating,metascore,gross
0,The Shawshank Redemption,Frank Darabont,(1994),142 min,Drama,9.3,82,$28.34M
1,The Godfather,Francis Ford Coppola,(1972),175 min,"Crime, Drama",9.2,100,$134.97M
2,The Dark Knight,Christopher Nolan,(2008),152 min,"Action, Crime, Drama",9.0,84,$534.86M
3,Schindler's List,Steven Spielberg,(1993),195 min,"Biography, Drama, History",9.0,95,$96.90M
4,12 Angry Men,Sidney Lumet,(1957),96 min,"Crime, Drama",9.0,97,$4.36M
5,The Lord of the Rings: The Return of the King,Peter Jackson,(2003),201 min,"Action, Adventure, Drama",9.0,94,$377.85M
6,The Godfather Part II,Francis Ford Coppola,(1974),202 min,"Crime, Drama",9.0,90,$57.30M
7,Spider-Man: Across the Spider-Verse,Joaquim Dos Santos,(2023),140 min,"Animation, Action, Adventure",8.9,86,0
8,Pulp Fiction,Quentin Tarantino,(1994),154 min,"Crime, Drama",8.9,95,$107.93M
9,Inception,Christopher Nolan,(2010),148 min,"Action, Adventure, Sci-Fi",8.8,74,$292.58M


# Top movies according to rating in IMBD

In [3]:
nd= df[['title','release_year','rating']]
top_df = nd.head(10)
top_df
stop_df = top_df.sort_values(by='rating',ascending=False)
stop_df.head()

Unnamed: 0,title,release_year,rating
0,The Shawshank Redemption,(1994),9.3
1,The Godfather,(1972),9.2
2,The Dark Knight,(2008),9.0
3,Schindler's List,(1993),9.0
4,12 Angry Men,(1957),9.0


In [4]:
df.shape

(1000, 8)

# Movie Recommendation based on entered movie

In [5]:
df['data'] = df['title'] + ' ' + df['director'] + ' ' +  df['genre']
df['data'].head()

0        The Shawshank Redemption Frank Darabont Drama
1      The Godfather Francis Ford Coppola Crime, Drama
2    The Dark Knight Christopher Nolan Action, Crim...
3    Schindler's List Steven Spielberg Biography, D...
4               12 Angry Men Sidney Lumet Crime, Drama
Name: data, dtype: object

In [6]:
#remove punctuations - anything that is not a word or a space
df['data'] = df['data'].str.replace('[^\w\s]','')
# Lower case
df['data'] = df['data'].str.lower()

In [7]:
def remove_stopwords(text):
    words = text.split()
    return" ".join(word for word in words if word not in stopwords.words('english'))
df['data'] = df['data'].apply(remove_stopwords)
df['data']

0              shawshank redemption frank darabont drama
1            godfather francis ford coppola crime, drama
2      dark knight christopher nolan action, crime, d...
3      schindler's list steven spielberg biography, d...
4                 12 angry men sidney lumet crime, drama
                             ...                        
995    long engagement jean-pierre jeunet drama, myst...
996            shine scott hicks biography, drama, music
997    philomena stephen frears biography, comedy, drama
998             invisible man james whale horror, sci-fi
999          cell 211 daniel monzón action, crime, drama
Name: data, Length: 1000, dtype: object

In [8]:
#vectorize the data
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(df['data']).toarray()
X.shape

(1000, 2267)

In [9]:
# similarity matrix
similarity = cosine_similarity(X, X)
similarity.shape

(1000, 1000)

In [10]:
def get_index_from_title(title):
    try: 
        return df[df.title == title].index[0]
    except:
        return None


def recommend_movie(title, Limit = 10):
    index = get_index_from_title(title)
    if index is None:
        return []
    else:
        movie_scores = []
        for i in range(similarity.shape[0]):
            movie_scores.append((df['title'][i],similarity[index][i]))
        movie_scores.sort(key=lambda x:x[1],reverse = True)
        return movie_scores[1:Limit+1]

In [None]:
movie = input('The the name of the movie: ') 
nr = int(input('Enter the number of recomendations: '))
recommend_movie(movie,nr)

In [None]:
rm= recommend_movie('The Godfather',5)
rm

[('The Godfather Part II', 0.8660254037844387),
 ('Apocalypse Now', 0.6172133998483676),
 ('The Conversation', 0.6172133998483676),
 ('Casino', 0.36514837167011077),
 ('Z', 0.36514837167011077)]

In [None]:
drm=pd.DataFrame(rm)
drm

Unnamed: 0,0,1
0,The Godfather Part II,0.866025
1,Apocalypse Now,0.617213
2,The Conversation,0.617213
3,Casino,0.365148
4,Z,0.365148


In [None]:
drm.rename(
    mapper={'0':'Title',
    '1':'Movie Scores'},
    axis=1,
    inplace=True
)
drm

In [None]:
movie_title = df[['title']]
df.set_index('title',inplace=True)
movie_list=movie_title.index.tolist()
movie_list

[0,
 1,
 2,
 3,
 4,
 5,
 6,
 7,
 8,
 9,
 10,
 11,
 12,
 13,
 14,
 15,
 16,
 17,
 18,
 19,
 20,
 21,
 22,
 23,
 24,
 25,
 26,
 27,
 28,
 29,
 30,
 31,
 32,
 33,
 34,
 35,
 36,
 37,
 38,
 39,
 40,
 41,
 42,
 43,
 44,
 45,
 46,
 47,
 48,
 49,
 50,
 51,
 52,
 53,
 54,
 55,
 56,
 57,
 58,
 59,
 60,
 61,
 62,
 63,
 64,
 65,
 66,
 67,
 68,
 69,
 70,
 71,
 72,
 73,
 74,
 75,
 76,
 77,
 78,
 79,
 80,
 81,
 82,
 83,
 84,
 85,
 86,
 87,
 88,
 89,
 90,
 91,
 92,
 93,
 94,
 95,
 96,
 97,
 98,
 99,
 100,
 101,
 102,
 103,
 104,
 105,
 106,
 107,
 108,
 109,
 110,
 111,
 112,
 113,
 114,
 115,
 116,
 117,
 118,
 119,
 120,
 121,
 122,
 123,
 124,
 125,
 126,
 127,
 128,
 129,
 130,
 131,
 132,
 133,
 134,
 135,
 136,
 137,
 138,
 139,
 140,
 141,
 142,
 143,
 144,
 145,
 146,
 147,
 148,
 149,
 150,
 151,
 152,
 153,
 154,
 155,
 156,
 157,
 158,
 159,
 160,
 161,
 162,
 163,
 164,
 165,
 166,
 167,
 168,
 169,
 170,
 171,
 172,
 173,
 174,
 175,
 176,
 177,
 178,
 179,
 180,
 181,
 182,
 183,
 184,


In [None]:

df.head()

Unnamed: 0,title,director,release_year,runtime,genre,rating,metascore,gross,data
0,The Shawshank Redemption,Frank Darabont,(1994),142 min,Drama,9.3,82,$28.34M,shawshank redemption frank darabont drama
1,The Godfather,Francis Ford Coppola,(1972),175 min,"Crime, Drama",9.2,100,$134.97M,"godfather francis ford coppola crime, drama"
2,The Dark Knight,Christopher Nolan,(2008),152 min,"Action, Crime, Drama",9.0,84,$534.86M,"dark knight christopher nolan action, crime, d..."
3,Schindler's List,Steven Spielberg,(1993),195 min,"Biography, Drama, History",9.0,95,$96.90M,"schindler's list steven spielberg biography, d..."
4,12 Angry Men,Sidney Lumet,(1957),96 min,"Crime, Drama",9.0,97,$4.36M,"12 angry men sidney lumet crime, drama"


In [None]:

movie_list

['The Shawshank Redemption',
 'The Godfather',
 'The Dark Knight',
 "Schindler's List",
 '12 Angry Men',
 'The Lord of the Rings: The Return of the King',
 'The Godfather Part II',
 'Spider-Man: Across the Spider-Verse',
 'Pulp Fiction',
 'Inception',
 'The Lord of the Rings: The Fellowship of the Ring',
 'Fight Club',
 'Forrest Gump',
 'The Good, the Bad and the Ugly',
 'The Lord of the Rings: The Two Towers',
 'Jai Bhim',
 '777 Charlie',
 'Interstellar',
 'Goodfellas',
 'The Matrix',
 "One Flew Over the Cuckoo's Nest",
 'Star Wars: Episode V - The Empire Strikes Back',
 'Rocketry: The Nambi Effect',
 'Soorarai Pottru',
 'The Green Mile',
 'Terminator 2: Judgment Day',
 'Se7en',
 'Saving Private Ryan',
 'The Silence of the Lambs',
 'Star Wars: Episode IV - A New Hope',
 'Spirited Away',
 'City of God',
 'Life Is Beautiful',
 'Seven Samurai',
 "It's a Wonderful Life",
 'Harakiri',
 'Sita Ramam',
 'Back to the Future',
 'The Departed',
 'Gladiator',
 'Alien',
 'Parasite',
 'The Prestige