In [1]:
import numpy as np
import pandas as pd
import difflib
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
movies_data = pd.read_csv('movies_final.csv')

In [3]:
movies_data.head()

Unnamed: 0.1,Unnamed: 0,index,Title,Details
0,0,0,Aquaman and the Lost Kingdom,Aquaman and the Lost Kingdom Action Adventure ...
1,1,1,The Hunger Games: The Ballad of Songbirds & Sn...,The Hunger Games: The Ballad of Songbirds & Sn...
2,2,2,The Family Plan,The Family Plan Action Comedy Mark Wahlberg M...
3,3,3,Wonka,Wonka Comedy Family Fantasy Every good thing i...
4,4,4,Chicken Run: Dawn of the Nugget,Chicken Run: Dawn of the Nugget Animation Adve...


In [4]:
movies_data.shape

(14490, 4)

In [5]:
movies_data.drop('Unnamed: 0', axis=1, inplace=True)

In [6]:
movies_data

Unnamed: 0,index,Title,Details
0,0,Aquaman and the Lost Kingdom,Aquaman and the Lost Kingdom Action Adventure ...
1,1,The Hunger Games: The Ballad of Songbirds & Sn...,The Hunger Games: The Ballad of Songbirds & Sn...
2,2,The Family Plan,The Family Plan Action Comedy Mark Wahlberg M...
3,3,Wonka,Wonka Comedy Family Fantasy Every good thing i...
4,4,Chicken Run: Dawn of the Nugget,Chicken Run: Dawn of the Nugget Animation Adve...
...,...,...,...
14485,14485,El Mariachi,El Mariachi Action Crime Thriller united state...
14486,14486,Newlyweds,Newlyweds Comedy Romance A newlywed couple's ...
14487,14487,"Signed, Sealed, Delivered","Signed, Sealed, Delivered Comedy Drama Romance..."
14488,14488,Shanghai Calling,Shanghai Calling A New Yorker in Shanghai Da...


In [7]:
combined_features = movies_data['Details']

In [8]:
print(combined_features)

0        Aquaman and the Lost Kingdom Action Adventure ...
1        The Hunger Games: The Ballad of Songbirds & Sn...
2        The Family Plan Action Comedy  Mark Wahlberg M...
3        Wonka Comedy Family Fantasy Every good thing i...
4        Chicken Run: Dawn of the Nugget Animation Adve...
                               ...                        
14485    El Mariachi Action Crime Thriller united state...
14486    Newlyweds Comedy Romance  A newlywed couple's ...
14487    Signed, Sealed, Delivered Comedy Drama Romance...
14488    Shanghai Calling   A New Yorker in Shanghai Da...
14489    My Date with Drew Documentary obsession camcor...
Name: Details, Length: 14490, dtype: object


In [9]:
vectorizer = TfidfVectorizer()
feature_vectors = vectorizer.fit_transform(combined_features)

In [10]:
similarity = cosine_similarity(feature_vectors)

In [11]:
movie_name = input('Enter your favourite movie name - ')

Enter your favourite movie name - RRR


In [12]:
list_of_all_titles = movies_data['Title'].tolist()
print(list_of_all_titles)

['Aquaman and the Lost Kingdom', 'The Hunger Games: The Ballad of Songbirds & Snakes', 'The Family Plan', 'Wonka', 'Chicken Run: Dawn of the Nugget', 'Oppenheimer', 'Trolls Band Together', 'Freelance', 'Killers of the Flower Moon', 'The Jester', 'Leave the World Behind', 'Megaboa', 'Leo', "Five Nights at Freddy's", 'Thanksgiving', 'Fast X', 'Silent Night', 'The Creator', 'Expend4bles', 'Barbie', 'Aquaman', 'How the Grinch Stole Christmas', 'The Super Mario Bros. Movie', 'Rebel Moon - Part One: A Child of Fire', 'The Grinch', 'Transformers: Rise of the Beasts', 'Good Boy', 'Mission: Impossible - Dead Reckoning Part One', 'Migration', 'Shrapnel', 'Godzilla Minus One', 'Krampus', 'Escaping Paradise', 'The Marvels', 'Avatar: The Way of Water', 'Elemental', 'The Dark Kingdom', 'Reign of Chaos', 'Christmas Bloody Christmas', 'Spider-Man: Across the Spider-Verse', 'Meg 2: The Trench', 'The Advent Calendar', 'The Equalizer 3', 'Wish', 'A Man of Reason', 'Spider-Man: No Way Home', 'Guimoon: The

In [13]:
find_close_match = difflib.get_close_matches(movie_name,list_of_all_titles)
print(find_close_match)

['RRR']


In [14]:
close_match = find_close_match[0]
index_of_the_movie = movies_data[movies_data.Title == close_match]['index'].values[0]
print(index_of_the_movie)

1333


In [15]:
similarity_score = list(enumerate(similarity[index_of_the_movie]))
print(similarity_score)

[(0, 0.00607621387534458), (1, 0.003957896297685158), (2, 0.010346948256045183), (3, 0.010344161405110868), (4, 0.005668814443286585), (5, 0.008030627587944877), (6, 0.0), (7, 0.012928784062985775), (8, 0.01654458059017637), (9, 0.005498425668295568), (10, 0.0010365837292382332), (11, 0.008406403947171332), (12, 0.013746293223797777), (13, 0.0), (14, 0.009791976907762793), (15, 0.009325623173183124), (16, 0.0), (17, 0.004338748784796254), (18, 0.006013299056715705), (19, 0.012787643294907844), (20, 0.005457020037931288), (21, 0.0019887455623949315), (22, 0.002553035945238804), (23, 0.009058544979563562), (24, 0.0), (25, 0.015124407867914942), (26, 0.0), (27, 0.006900311196650149), (28, 0.0), (29, 0.006765050544758635), (30, 0.0017224751556980814), (31, 0.0), (32, 0.002452426877929919), (33, 0.001261702607574902), (34, 0.014689430076165109), (35, 0.00737177881198404), (36, 0.006141355968635628), (37, 0.01133228240324577), (38, 0.005759385963139037), (39, 0.003551501026263671), (40, 0.00

In [16]:
sorted_similar_movies = sorted(similarity_score, key = lambda x:x[1], reverse = True)
print(sorted_similar_movies)

[(1333, 1.0), (3506, 0.10331725357341302), (6495, 0.08463024984482323), (8802, 0.06885955943397053), (9488, 0.067923583223161), (10977, 0.06573209891806717), (5971, 0.06506604631936874), (2752, 0.05772498722754696), (4349, 0.05731876963492849), (1190, 0.05629014076750476), (214, 0.05618459190443376), (6944, 0.05557301295704709), (2935, 0.0528331747902291), (1487, 0.045455148575207424), (13897, 0.04501699603310349), (12043, 0.043657591810305325), (9253, 0.04333980317240984), (410, 0.04108318038912656), (10693, 0.04081300403069036), (9620, 0.040556367933112764), (3959, 0.03961806892093824), (7011, 0.03900108214604412), (3216, 0.03846100618164503), (6389, 0.037307377804288486), (6501, 0.03709170105990643), (13714, 0.037014341222337134), (341, 0.03680080812126128), (13475, 0.036616360310914746), (8611, 0.03646681957177848), (9708, 0.03645666535268012), (7803, 0.03532114561874602), (5432, 0.03527948120294598), (92, 0.035194091085683915), (14071, 0.035116086246605935), (4196, 0.0347383441129

In [17]:
print('Movies suggested for you: \n')
i = 1
suggested_movies = []
for movie in sorted_similar_movies:

    index = movie[0]
    title_from_index = movies_data[movies_data.index==index]['Title'].values[0]
    
    if i < 21:
        if title_from_index not in suggested_movies:
            suggested_movies.append(title_from_index)
        print(i,'.',title_from_index)
        i = i + 1

Movies suggested for you: 

1 . RRR
2 . Bāhubali: The Beginning
3 . K.G.F: Chapter 2
4 . The Rising Hawk
5 . Khufiya
6 . Baahubali: The Beginning
7 . Pushpa: The Rise - Part 1
8 . War
9 . Tiger 3
10 . Jai Lava Kusa
11 . Heart of Stone
12 . Kama Sutra: A Tale of Love
13 . Bāhubali 2: The Conclusion
14 . Dry Day
15 . दिल जो भी कहे
16 . Youth in Revolt
17 . Main Hoon Na
18 . Adipurush
19 . Indiana Jones and the Last Crusade
20 . The Indian Fighter


In [18]:
suggested_movies

['RRR',
 'Bāhubali: The Beginning',
 'K.G.F: Chapter 2',
 'The Rising Hawk',
 'Khufiya',
 'Baahubali: The Beginning',
 'Pushpa: The Rise - Part 1',
 'War',
 'Tiger 3',
 'Jai Lava Kusa',
 'Heart of Stone',
 'Kama Sutra: A Tale of Love',
 'Bāhubali 2: The Conclusion',
 'Dry Day',
 'दिल जो भी कहे',
 'Youth in Revolt',
 'Main Hoon Na',
 'Adipurush',
 'Indiana Jones and the Last Crusade',
 'The Indian Fighter']