In [37]:
# we start by importing the packages needed  

import pandas as pd
import hashlib
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import ipywidgets as widgets
from IPython.display import display

# Let's take a look on our movies dataset

movies = pd.read_csv('base1.csv')
movies

Unnamed: 0,Title,Year,Link,Genre
0,The Shawshank Redemption,(1994),https://www.imdb.com/title/tt0111161/?ref_=adv...,Drama
1,The Godfather,(1972),https://www.imdb.com/title/tt0068646/?ref_=adv...,"Crime, Drama"
2,Ramayana: The Legend of Prince Rama,(1993),https://www.imdb.com/title/tt0259534/?ref_=adv...,"Animation, Action, Adventure"
3,The Chaos Class,(1975),https://www.imdb.com/title/tt0252487/?ref_=adv...,"Comedy, Drama"
4,The Dark Knight,(2008),https://www.imdb.com/title/tt0468569/?ref_=adv...,"Action, Crime, Drama"
...,...,...,...,...
4995,Pocahontas,(I) (1995),https://www.imdb.com/title/tt0114148/?ref_=adv...,"Animation, Adventure, Drama"
4996,Hot Shots!,(1991),https://www.imdb.com/title/tt0102059/?ref_=adv...,"Action, Comedy"
4997,Safe House,(2012),https://www.imdb.com/title/tt1599348/?ref_=adv...,"Action, Thriller"
4998,High Tension,(2003),https://www.imdb.com/title/tt0338095/?ref_=adv...,Horror


In [38]:
# Create a function to generate unique IDs from movie names

def generate_movie_id(movie_name):
    return 1 + (int(hashlib.md5(movie_name.encode()).hexdigest(), 16) % 999999)

movies['MovieID'] = movies['Title'].apply(generate_movie_id)

In [39]:
movies

Unnamed: 0,Title,Year,Link,Genre,MovieID
0,The Shawshank Redemption,(1994),https://www.imdb.com/title/tt0111161/?ref_=adv...,Drama,368823
1,The Godfather,(1972),https://www.imdb.com/title/tt0068646/?ref_=adv...,"Crime, Drama",437384
2,Ramayana: The Legend of Prince Rama,(1993),https://www.imdb.com/title/tt0259534/?ref_=adv...,"Animation, Action, Adventure",742312
3,The Chaos Class,(1975),https://www.imdb.com/title/tt0252487/?ref_=adv...,"Comedy, Drama",218267
4,The Dark Knight,(2008),https://www.imdb.com/title/tt0468569/?ref_=adv...,"Action, Crime, Drama",929223
...,...,...,...,...,...
4995,Pocahontas,(I) (1995),https://www.imdb.com/title/tt0114148/?ref_=adv...,"Animation, Adventure, Drama",166040
4996,Hot Shots!,(1991),https://www.imdb.com/title/tt0102059/?ref_=adv...,"Action, Comedy",699325
4997,Safe House,(2012),https://www.imdb.com/title/tt1599348/?ref_=adv...,"Action, Thriller",206229
4998,High Tension,(2003),https://www.imdb.com/title/tt0338095/?ref_=adv...,Horror,786529


In [40]:
# use the vectorizer to turn a set of title to a matrix (set of number)

vectorizer = TfidfVectorizer(ngram_range=(1,2))
tfidf = vectorizer.fit_transform(movies["Title"])

In [41]:
# search function to compute the similarity between what we enter as a movie name and all the movies

def search(title):
    query_vec = vectorizer.transform([title])
    similarity = cosine_similarity(query_vec, tfidf).flatten()     #numpy vector
    indices = np.argpartition(similarity, -5)[-5:]                 #define the titles that have the greatest similarity to our search term, find the most five similars, get their indices 
    results = movies.iloc[indices].iloc[::-1]                      #index the data to get the titles, the last one is the moste similar reverse       
    
    return results

In [42]:
# Let's take a look on our ratings dataset

ratings = pd.read_csv('rating4.csv')
ratings

Unnamed: 0,Movie_Title,Rating,User_Name
0,The Shawshank Redemption,10/10,hitchcockthelegend
1,The Shawshank Redemption,10/10,Sleepin_Dragon
2,The Shawshank Redemption,10/10,EyeDunno
3,The Shawshank Redemption,10/10,alexkolokotronis
4,The Shawshank Redemption,10/10,kaspen12
...,...,...,...
38665,Hondo,6/10,jpdoherty
38666,Hondo,7/10,hitchcockthelegend
38667,Hondo,8/10,Nazi_Fighter_David
38668,Hondo,8/10,NewEnglandPat


In [43]:
# Create a function to generate unique IDs from usernames

def generate_user_id(username):
    return 1 + int(hashlib.md5(username.encode()).hexdigest(), 16) % (10**6)  

# Apply the function to the 'username' column to generate 'user_id' column

ratings['user_id'] = ratings['User_Name'].apply(generate_user_id)
ratings["MovieID"] = ratings["Movie_Title"].apply(generate_movie_id)

In [44]:
ratings

Unnamed: 0,Movie_Title,Rating,User_Name,user_id,MovieID
0,The Shawshank Redemption,10/10,hitchcockthelegend,338010,368823
1,The Shawshank Redemption,10/10,Sleepin_Dragon,324655,368823
2,The Shawshank Redemption,10/10,EyeDunno,441153,368823
3,The Shawshank Redemption,10/10,alexkolokotronis,626718,368823
4,The Shawshank Redemption,10/10,kaspen12,145446,368823
...,...,...,...,...,...
38665,Hondo,6/10,jpdoherty,717618,184449
38666,Hondo,7/10,hitchcockthelegend,338010,184449
38667,Hondo,8/10,Nazi_Fighter_David,733101,184449
38668,Hondo,8/10,NewEnglandPat,166047,184449


In [45]:
#cleaning the Rating column 
ratings['Rating'] = ratings['Rating'].str.replace('/10', '')
ratings["Rating"] = ratings["Rating"].astype(int)
ratings

Unnamed: 0,Movie_Title,Rating,User_Name,user_id,MovieID
0,The Shawshank Redemption,10,hitchcockthelegend,338010,368823
1,The Shawshank Redemption,10,Sleepin_Dragon,324655,368823
2,The Shawshank Redemption,10,EyeDunno,441153,368823
3,The Shawshank Redemption,10,alexkolokotronis,626718,368823
4,The Shawshank Redemption,10,kaspen12,145446,368823
...,...,...,...,...,...
38665,Hondo,6,jpdoherty,717618,184449
38666,Hondo,7,hitchcockthelegend,338010,184449
38667,Hondo,8,Nazi_Fighter_David,733101,184449
38668,Hondo,8,NewEnglandPat,166047,184449


In [46]:
# Create the main function for our recommandation system

def find_similar_movies(movie_id):
    
    # the pepole that liked the the same movie as us
    
    similar_users = ratings[(ratings["MovieID"] == movie_id) & (ratings["Rating"] >= 8)]["user_id"].unique()
    
    # similar_users what else they liked ?
    
    similar_user_recs = ratings[(ratings["user_id"].isin(similar_users)) & (ratings["Rating"] >= 8)]["MovieID"]
    
    #movies spicifique to our niche, people like us liked more than general movies # find only the movies that 10% or more of the users similar to us liked 
    
    similar_user_recs = similar_user_recs.value_counts() / len(similar_users)
    
    # find only the movies that 10% or more of the users similar to us liked  
    
    similar_user_recs = similar_user_recs[similar_user_recs > .10]
    
    
    
    #what percentage of regular people outside of similar liked this movies, find movies that define kind of similarity to the movies we liked
    
    all_users = ratings[(ratings["MovieID"].isin(similar_user_recs.index)) & (ratings["Rating"] >= 8)]  #des gens ils ont aimé les films qu'on a recommandé et rated highly
    
    # what percontage of all user recommande each of this movies
    
    # the % of all users that liked this movies
    all_user_recs = all_users["MovieID"].value_counts() / len(all_users["user_id"].unique())
    
    rec_percentages = pd.concat([similar_user_recs, all_user_recs], axis=1)
    
    rec_percentages.columns = ["similar", "all"]  
   
    # creating our score, we want movies that have big differentiel in how they are recommanded between pepole similar to us and the set of all users
   
    rec_percentages["score"] = rec_percentages["similar"] / rec_percentages["all"]
    
    rec_percentages = rec_percentages.sort_values("score", ascending=False)
   
    # use the index to merge
    return rec_percentages.head(10).merge(movies, left_index=True, right_on="MovieID")[["score", "Title","Year","Genre"]]

In [47]:
# this widget is just to see the search results, for our project we will be using Tkinter as a interface

movie_name_input = widgets.Text(
    value='Fight Club',
    description='Movie Title:',
    disabled=False
)
recommendation_list = widgets.Output()

def on_type(data):
    with recommendation_list:
        recommendation_list.clear_output()
        title = data["new"]
        if len(title) > 1:
            results = search(title)
            movie_id = results.iloc[0]["MovieID"]
            display(find_similar_movies(movie_id))

movie_name_input.observe(on_type, names='value')

display(movie_name_input, recommendation_list)

Text(value='Fight Club', description='Movie Title:')

Output()

In [49]:
#Tkiter Interface 


# Importing the modules

import pandas as pd
import hashlib
import tkinter as tk
from tkinter import *
from PIL import ImageTk,Image
import webbrowser  

# Create a Tkinter window
root = tk.Tk()
root.title("Movie Recommendation System")
root.geometry("1300x200")  # Set the initial window size

# Load our background image
bg_image = ImageTk.PhotoImage(Image.open("pics/cinema.jpg"))

# Create a canvas that fills the entire window and set the background image
canvas = tk.Canvas(root, width=1300, height=200)
canvas.pack()
canvas.create_image(0, 0, image=bg_image, anchor='nw')

# Define custom colors
bg_color = "#8b0000"  # Dark red background color
button_color = "#e74c3c"  # Red button color
text_color = "#ffffff"  # White text color

# Configure the style
style = ttk.Style()
style.configure("TLabel", padding=10, font=("Helvetica", 14), background=bg_color, foreground=text_color)
style.configure("TEntry", padding=10, font=("Helvetica", 14))
style.configure("TListbox", padding=10, font=("Helvetica", 12), background=bg_color, foreground=text_color)

# Create a StringVar to track changes in the movie title entry
movie_title_var = StringVar()

# Function to handle movie title input and display recommendations
def on_title_change(*args):
    title = movie_title_var.get()
    if not title:  # Check if title is empty
        recommendation_listbox.delete(0, END)  # Clear recommendations when the title is empty
    else:
        recommendation_listbox.delete(0, END)  # Clear previous recommendations
        if len(title) > 5:
            results = search(title)
            if not results.empty:
                movie_id = results.iloc[0]["MovieID"]
                recommendations = find_similar_movies(movie_id)
                for _, row in recommendations.iterrows():
                    recommendation_listbox.insert(END, row['Title'])

# Bind the function to the StringVar's trace method
movie_title_var.trace_add("write", on_title_change)

# Function to open the IMDb page when a movie is clicked
def open_imdb_page(event):
    selected_movie = recommendation_listbox.get(recommendation_listbox.curselection())
    # the IMDb URL for each movie in 'movies' database
    imdb_url = movies[movies['Title'] == selected_movie]['Link'].values[0]
    if imdb_url:
        webbrowser.open(imdb_url)  # Open the IMDb page in the default web browser

# Create and configure widgets with custom styling
movie_name_label = ttk.Label(root, text="Movie Recommendation System")
movie_name_label.pack(pady=10)

movie_name_entry = ttk.Entry(root, textvariable=movie_title_var, width=30)  # Set the width to 30 characters
movie_name_entry.insert(0, "Search for your Movie Title here...")
movie_name_entry.pack(padx=30, pady=10)

# Create a frame to contain the recommendations
recommendation_frame = tk.Frame(root, bg="#8b0000")  # Dark red background color
recommendation_frame.pack(padx=10, pady=10, fill=tk.BOTH, expand=False)

# Create a Listbox to display the recommendations
recommendation_listbox = Listbox(recommendation_frame, height=10, width=50)
recommendation_listbox.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)

# Create a scrollbar for the Listbox
scrollbar = Scrollbar(recommendation_frame, command=recommendation_listbox.yview)
scrollbar.pack(side=tk.RIGHT, fill=tk.Y)
recommendation_listbox.config(yscrollcommand=scrollbar.set)

# Bind the function to open IMDb pages when a movie is clicked
recommendation_listbox.bind("<<ListboxSelect>>", open_imdb_page)

# Apply custom background color
root.configure(background=bg_color)

# Start the Tkinter main loop
root.mainloop()


Exception in Tkinter callback
Traceback (most recent call last):
  File "C:\Users\EMCCd\anaconda3\Lib\tkinter\__init__.py", line 1948, in __call__
    return self.func(*args)
           ^^^^^^^^^^^^^^^^
  File "C:\Users\EMCCd\AppData\Local\Temp\ipykernel_24672\1330743026.py", line 46, in on_title_change
    recommendation_listbox.delete(0, END)  # Clear previous recommendations
    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\EMCCd\anaconda3\Lib\tkinter\__init__.py", line 3245, in delete
    self.tk.call(self._w, 'delete', first, last)
_tkinter.TclError: invalid command name ".!frame.!listbox"
Exception in Tkinter callback
Traceback (most recent call last):
  File "C:\Users\EMCCd\anaconda3\Lib\tkinter\__init__.py", line 1948, in __call__
    return self.func(*args)
           ^^^^^^^^^^^^^^^^
  File "C:\Users\EMCCd\AppData\Local\Temp\ipykernel_24672\1330743026.py", line 60, in open_imdb_page
    selected_movie = recommendation_listbox.get(recommendation_listbox.curselection()