In [1]:
import numpy as np
import pandas as pd
import difflib
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import ipywidgets as widgets
from IPython.display import display
import datetime

In [2]:
Movie = widgets.Text(description="Movie:")
n = widgets.IntText(description="No.of.recommendations:")
button= widgets.Button(description="Recommendation")

Inputs = widgets.VBox ([Movie, n])
Interface= widgets.HBox([Inputs, button])


db=pd.DataFrame(columns={'Movie','n','Time'})

In [3]:
def StoretoDb(response):
  global db
  current_timestamp=datetime.datetime.now()
  response.append(current_timestamp)
  new_row=pd.DataFrame(response, index=['Movie','n','Time']).T
  db = db.append(new_row,ignore_index=True)
  return("Stored successfully")

In [4]:
def on_button_clicked(b):
  response=[Movie.value,n.value]
  print(StoretoDb(response))

button.on_click(on_button_clicked)

In [5]:
Interface

HBox(children=(VBox(children=(Text(value='', description='Movie:'), IntText(value=0, description='No.of.recomm…

Stored successfully


In [6]:
db

Unnamed: 0,Time,n,Movie
0,2022-08-27 05:25:06.500194,10,Stalker (1979)


In [7]:
movie_data= pd.read_csv("movies.csv")
rating_data= pd.read_csv("ratings.csv")
df=pd.merge(rating_data,movie_data, how='left',on='movieId')
df.head()

Unnamed: 0,userId,movieId,rating,timestamp,title,genres
0,1,16,4.0,1217897793,Casino (1995),Crime|Drama
1,1,24,1.5,1217895807,Powder (1995),Drama|Sci-Fi
2,1,32,4.0,1217896246,Twelve Monkeys (a.k.a. 12 Monkeys) (1995),Mystery|Sci-Fi|Thriller
3,1,47,4.0,1217896556,Seven (a.k.a. Se7en) (1995),Mystery|Thriller
4,1,50,4.0,1217896523,"Usual Suspects, The (1995)",Crime|Mystery|Thriller


In [8]:
movie_data.isnull().sum()

movieId    0
title      0
genres     0
dtype: int64

In [9]:
movie_data.shape

(10329, 3)

In [10]:
movie_data.describe()

Unnamed: 0,movieId
count,10329.0
mean,31924.282893
std,37734.741149
min,1.0
25%,3240.0
50%,7088.0
75%,59900.0
max,149532.0


In [11]:
selected_features = ['title','genres']
selected_features

['title', 'genres']

In [12]:
for feature in selected_features:
  movie_data[feature] = movie_data[feature].fillna('')

In [13]:
combined_features = movie_data['genres']+' '+movie_data['title']

In [14]:
print(combined_features)

0        Adventure|Animation|Children|Comedy|Fantasy To...
1                Adventure|Children|Fantasy Jumanji (1995)
2                   Comedy|Romance Grumpier Old Men (1995)
3            Comedy|Drama|Romance Waiting to Exhale (1995)
4                Comedy Father of the Bride Part II (1995)
                               ...                        
10324    Animation|Children|Comedy Cosmic Scrat-tastrop...
10325                    Comedy Le Grand Restaurant (1966)
10326                Comedy A Very Murray Christmas (2015)
10327                           Drama The Big Short (2015)
10328    (no genres listed) Marco Polo: One Hundred Eye...
Length: 10329, dtype: object


In [15]:
vectorizer = TfidfVectorizer()

In [16]:
feature_vectors = vectorizer.fit_transform(combined_features)

In [17]:
print(feature_vectors)

  (0, 111)	0.3362698867204709
  (0, 8108)	0.43265717692986444
  (0, 8624)	0.6128595248435812
  (0, 3010)	0.2707878348138484
  (0, 1895)	0.1506742792993607
  (0, 1695)	0.2831287189871249
  (0, 493)	0.3079404316718635
  (0, 282)	0.2304681375927779
  (1, 4578)	0.7746211424208029
  (1, 111)	0.3761309147202813
  (1, 3010)	0.30288669912427474
  (1, 1695)	0.3166904568672631
  (1, 282)	0.2577875534799532
  (2, 5457)	0.41683593991106527
  (2, 6068)	0.49744684518089693
  (2, 3687)	0.6508918226536827
  (2, 7148)	0.18759152435485202
  (2, 111)	0.3160519681577258
  (2, 1895)	0.14161512643234506
  (3, 2924)	0.6452233972204463
  (3, 8547)	0.3226061062636555
  (3, 9110)	0.5605803893909848
  (3, 2601)	0.11365604308767434
  (3, 7148)	0.18595784494653286
  (3, 111)	0.3132995645290772
  :	:
  (10324, 493)	0.22994250312518305
  (10325, 7007)	0.6165987088179458
  (10325, 3629)	0.5231200278026358
  (10325, 82)	0.41024276778508634
  (10325, 4921)	0.397773340816688
  (10325, 1895)	0.14010249680635486
  (10326,

In [18]:
similarity = cosine_similarity(feature_vectors)
print(similarity)

[[1.         0.35757551 0.12761652 ... 0.02041226 0.         0.        ]
 [0.35757551 1.         0.11887692 ... 0.         0.         0.        ]
 [0.12761652 0.11887692 1.         ... 0.01918499 0.         0.        ]
 ...
 [0.02041226 0.         0.01918499 ... 1.         0.16400297 0.08303514]
 [0.         0.         0.         ... 0.16400297 1.         0.10730401]
 [0.         0.         0.         ... 0.08303514 0.10730401 1.        ]]


In [19]:
print(similarity.shape)

(10329, 10329)


In [20]:
movie_name = db['Movie'][0]
n= db['n'][0]

In [21]:
list_of_all_titles = movie_data['title'].tolist()
print(list_of_all_titles)

['Toy Story (1995)', 'Jumanji (1995)', 'Grumpier Old Men (1995)', 'Waiting to Exhale (1995)', 'Father of the Bride Part II (1995)', 'Heat (1995)', 'Sabrina (1995)', 'Tom and Huck (1995)', 'Sudden Death (1995)', 'GoldenEye (1995)', 'American President, The (1995)', 'Dracula: Dead and Loving It (1995)', 'Balto (1995)', 'Nixon (1995)', 'Cutthroat Island (1995)', 'Casino (1995)', 'Sense and Sensibility (1995)', 'Four Rooms (1995)', 'Ace Ventura: When Nature Calls (1995)', 'Money Train (1995)', 'Get Shorty (1995)', 'Copycat (1995)', 'Assassins (1995)', 'Powder (1995)', 'Leaving Las Vegas (1995)', 'Othello (1995)', 'Now and Then (1995)', 'Persuasion (1995)', 'City of Lost Children, The (Cité des enfants perdus, La) (1995)', 'Shanghai Triad (Yao a yao yao dao waipo qiao) (1995)', 'Dangerous Minds (1995)', 'Twelve Monkeys (a.k.a. 12 Monkeys) (1995)', 'Wings of Courage (1995)', 'Babe (1995)', 'Carrington (1995)', 'Dead Man Walking (1995)', 'Clueless (1995)', 'Cry, the Beloved Country (1995)', '

In [22]:
# finding the close match for the movie name given by the user

find_close_match = difflib.get_close_matches(movie_name, list_of_all_titles)
print(find_close_match)

['Stalker (1979)', 'Stakeout (1987)', 'Sneakers (1992)']


In [23]:
close_match = find_close_match[0]
print(close_match)

Stalker (1979)


In [24]:
# finding the index of the movie with title

index_of_the_movie = movie_data[movie_data.title == close_match].index.values[0]
print(index_of_the_movie)

992


In [25]:
# getting a list of similar movies

similarity_score = list(enumerate(similarity[index_of_the_movie]))
print(similarity_score)

[(0, 0.0), (1, 0.0), (2, 0.0), (3, 0.014771622705831312), (4, 0.0), (5, 0.0), (6, 0.0), (7, 0.0), (8, 0.0), (9, 0.0), (10, 0.017647714680317814), (11, 0.0), (12, 0.0), (13, 0.021987772792055955), (14, 0.0), (15, 0.021069779579688138), (16, 0.014990578320181917), (17, 0.0), (18, 0.0), (19, 0.017416859243759648), (20, 0.0), (21, 0.10282493012033428), (22, 0.0), (23, 0.18218866742239712), (24, 0.013578229157026285), (25, 0.021042583988636263), (26, 0.015941081253581357), (27, 0.02119094237635333), (28, 0.13624555972870547), (29, 0.006116235823669301), (30, 0.0173616379643685), (31, 0.12316935713683906), (32, 0.0), (33, 0.02046657491699423), (34, 0.019704819689414836), (35, 0.01758661817078545), (36, 0.0), (37, 0.014614635504877538), (38, 0.018166241668434082), (39, 0.016962605838520866), (40, 0.02034057715396281), (41, 0.0), (42, 0.01880790248622296), (43, 0.01190790443966479), (44, 0.07830842259641135), (45, 0.016562662346288174), (46, 0.014511723182232449), (47, 0.06904888188330391), (4

In [26]:
# sorting the movies based on their similarity score

sorted_similar_movies = sorted(similarity_score, key = lambda x:x[1], reverse = True) 
print(sorted_similar_movies)

[(992, 1.0), (3006, 0.43419777053078007), (975, 0.3987636937152703), (2320, 0.38684944661021836), (2876, 0.3485171031057315), (1596, 0.3416191159733562), (2924, 0.3318788673478723), (6151, 0.3262030305902643), (1077, 0.3061897289183982), (8089, 0.2955409828857224), (4922, 0.294455874209912), (4238, 0.2785645464305756), (1003, 0.2757928975018178), (5176, 0.2754286241826567), (2719, 0.262740492039558), (2430, 0.2626970715273094), (4511, 0.2617274027102), (4876, 0.2605657271615079), (5862, 0.2598115549832417), (2285, 0.2598068675838447), (6748, 0.2598068675838447), (6474, 0.2571432335829951), (6372, 0.2565692015450401), (5834, 0.25640302326901476), (4882, 0.2560467738187509), (8309, 0.2552137411451718), (4266, 0.2552105615122382), (9146, 0.25495999119571305), (6332, 0.254113661154831), (1112, 0.2539097059745359), (10032, 0.2491213644727271), (1918, 0.2490015304406468), (8183, 0.24711142574842698), (7179, 0.24707848007851455), (6154, 0.24628804070176608), (8051, 0.2460021357817067), (3803,

In [27]:
# print the name of similar movies based on the index

print('Movies suggested for you : \n')

i = 1

for movie in sorted_similar_movies:
  index = movie[0]
  title_from_index = movie_data[movie_data.index==index]['title'].values[0]
  if (i<(n+1)):
    print(i, '.',title_from_index)
    i+=1

Movies suggested for you : 

1 . Stalker (1979)
2 . H.O.T.S. (1979)
3 . Alien (1979)
4 . Phantasm (1979)
5 . Moonraker (1979)
6 . Black Hole, The (1979)
7 . Mad Max (1979)
8 . Clonus Horror, The (1979)
9 . Amityville Horror, The (1979)
10 . Moon (2009)
