# by using tkinter with GUI (Manual Input)

In [18]:
import pandas as pd
from collections import defaultdict
import tkinter as tk
from tkinter import ttk

# IMPORTING THE DATA
df = pd.read_csv("C:/Users/91797/Downloads/top_250_cleaned.csv")

# TO EXTRACT UNIQUE WORDS FOR DROPDOWN SUGESSTION BOX
multi_value_columns = ["genre", "casts", "writers", "directors", "tagline", "overview", "keywords"]
unique_words = {col: set() for col in multi_value_columns}
for col in multi_value_columns:
    df[col] = df[col].fillna('')   # TO FILL NULL VALUES IN THE COLUMN WITH AN EMPTY STRING
    words = df[col].str.split(',')   # TO GET SEPERATE SUGESSTIONS (SEPERATING WITH ',') 
    unique_words[col].update(word.strip().lower() for word_list in words for word in word_list if word)
    unique_words[col] = sorted(unique_words[col])   #TO CLEAN AND ARRANGE EACH WORDS & ADDS ONLY UNIQUE & NON-EMPTY

def recommend_movies():  # FUNTION TO COLLECT USER INPUT FROM GUI FIELD
    user_input = {
        'genre': genre_var.get().split(','),
        'casts': cast_var.get().split(','),
        'writers': writer_var.get().split(','),
        'directors': directors_var.get().split(','), 
        'tagline': tagline_var.get(),
        'overview': overview_var.get(),
        'keywords': keyword_var.get(),
        'min_year': int(min_year_var.get()) if min_year_var.get() else None,
        'max_year': int(max_year_var.get()) if max_year_var.get() else None,
        'min_rating': float(min_rating_var.get()) if min_rating_var.get() else None,
        'max_rating': float(max_rating_var.get()) if max_rating_var.get() else None,
    }
    
    scores = defaultdict(int)   # DICTIONARY TO COUNT MOVIE SCORES
    for index, row in df.iterrows():
        score = 0
        if user_input['genre']:
            if any(genre.lower() in row['genre'].lower() for genre in user_input['genre']):
                score += 3    # GENRE IS A MAJOR FACTOR THAT'S WHY SCORING IT 3
        for key in ['casts', 'writers', 'directors', 'tagline', 'overview', 'keywords']:
            if user_input[key]:
                if any(word.lower() in str(row[key]).lower() for word in user_input[key]):
                    score += 2 if key in ['casts', 'writers', 'directors'] else 1   # SCORING 2 IF ANY OF THESE EACH MATCHES
        if user_input['min_year'] and row['year'] < user_input['min_year']:     # FILTERING BASED ON YEAR AND RATING
            continue
        if user_input['max_year'] and row['year'] > user_input['max_year']:
            continue
        if user_input['min_rating'] and row['rating'] < user_input['min_rating']:
            continue
        if user_input['max_rating'] and row['rating'] > user_input['max_rating']:
            continue
        if score > 0:                                # ADDING THE MOVIE TO THE SCORE DICTIONARY IF IT HAS SCORED
            scores[row['title']] = score
    
    sorted_movies = sorted(scores.items(), key=lambda x: x[1], reverse=True) # SORTING MOVIES BY SCORES IN DESCENDING ORDER
    if not sorted_movies and user_input['genre']:                 # IF NO MATCH SUGESSTING HIGH-RATED MOVIES FROM THE SELECTED GENRE
        genre_filtered = df[df['genre'].str.contains('|'.join(user_input['genre']), case=False, na=False)]
        sorted_movies = genre_filtered.nlargest(10, 'rating')[['title', 'rating']].values.tolist()
    
    results_var.set("\n".join([f"{title} (matchScore: {score})" for title, score in sorted_movies[:10]])) # TO DISPLAY RESULTS IN GUI (App)

# GUI INTERFACE SETUP
root = tk.Tk()
root.title("Movie Recommender")

# FUNCTION TO CREATE COMBOBOX WITH AUTOCOMPLETE
def create_combobox(label, variable, values, row):
    ttk.Label(root, text=label).grid(row=row, column=0)
    combo = ttk.Combobox(root, textvariable=variable, values=values, width=50)
    combo.grid(row=row, column=1)
    combo.bind("<KeyRelease>", lambda e: update_suggestions(combo, values))

# FUNCTION TO FILTER SUGGESTIONS BASED ON USER INPUT
def update_suggestions(combo, values):
    typed = combo.get()
    if typed == "":
        combo['values'] = values  # Reset to all values
    else:
        filtered_values = [v for v in values if typed.lower() in v.lower()]
        combo['values'] = filtered_values # TO SHOW ONLY MATCHING VALUES
    combo.icursor(len(typed))  # TO CONTINUE USER TO TYPE

# VARIABLES TO STORE USER INPUT
genre_var = tk.StringVar()
cast_var = tk.StringVar()
writer_var = tk.StringVar()
director_var = tk.StringVar()
tagline_var = tk.StringVar()
overview_var = tk.StringVar()
keyword_var = tk.StringVar()
min_year_var = tk.StringVar()
max_year_var = tk.StringVar()
min_rating_var = tk.StringVar()
max_rating_var = tk.StringVar()
results_var = tk.StringVar()

# CREATING COMBOBOXES FOR SELECTION WITH AUTOCOMPLETE FUNCTION
create_combobox("Genre", genre_var, unique_words["genre"], 0)
create_combobox("Cast", cast_var, unique_words["casts"], 1)
create_combobox("Writer", writer_var, unique_words["writers"], 2)
create_combobox("Director", director_var, unique_words["directors"], 3)

# OTHER INPUT FIELDS FOR USER 
ttk.Label(root, text="Tagline").grid(row=4, column=0)
ttk.Entry(root, textvariable=tagline_var, width=50).grid(row=4, column=1)
ttk.Label(root, text="Overview").grid(row=5, column=0)
ttk.Entry(root, textvariable=overview_var, width=50).grid(row=5, column=1)
ttk.Label(root, text="Keyword").grid(row=6, column=0)
ttk.Entry(root, textvariable=keyword_var, width=50).grid(row=6, column=1)
ttk.Label(root, text="Min Year").grid(row=7, column=0)
ttk.Entry(root, textvariable=min_year_var, width=50).grid(row=7, column=1)
ttk.Label(root, text="Max Year").grid(row=8, column=0)
ttk.Entry(root, textvariable=max_year_var, width=50).grid(row=8, column=1)
ttk.Label(root, text="Min Rating").grid(row=9, column=0)
ttk.Entry(root, textvariable=min_rating_var, width=50).grid(row=9, column=1)
ttk.Label(root, text="Max Rating").grid(row=10, column=0)
ttk.Entry(root, textvariable=max_rating_var, width=50).grid(row=10, column=1)

# BUTTON TO TRIGGER MOVIE RECOMMENDATION
ttk.Button(root, text="Recommend", command=recommend_movies).grid(row=11, column=0, columnspan=2)

# OUTPUT DISPLAY AREA
ttk.Label(root, textvariable=results_var, wraplength=400).grid(row=12, column=0, columnspan=2)

# STARTING THE GUI APPLICATION
root.mainloop()


# by using dictionary (Manual Input)

In [16]:
import pandas as pd
from collections import defaultdict

# LOAD DATASET
df = pd.read_csv("C:/Users/91797/Downloads/top_250_cleaned.csv")

# FUNCTION TO RECOMMEND MOVIES
def recommend_movies(user_input):
    scores = defaultdict(int)
    
    # ITERATE THROUGH MOVIES AND CALCULATE SCORES
    for index, row in df.iterrows():
        score = 0
        
        # CHECK IF USER'S GENRE MATCHES ANY MOVIE GENRE
        if user_input['genre']:
            if any(genre.lower() in row['genre'].lower() for genre in user_input['genre']):
                score += 3  # HIGHER WEIGHT FOR GENRE MATCH
        
        # CHECK OTHER CATEGORICAL FIELDS FOR MATCHES
        for key in ['casts', 'writers', 'directors', 'tagline', 'overview', 'keywords']:
            if user_input[key]:
                if any(word.lower() in str(row[key]).lower() for word in user_input[key]):
                    score += 2 if key in ['casts', 'writers', 'directors'] else 1  # DIFFERENT WEIGHTS FOR FIELDS
        
        # FILTER MOVIES BASED ON YEAR AND RATING CONSTRAINTS
        if user_input['min_year'] and row['year'] < user_input['min_year']:
            continue
        if user_input['max_year'] and row['year'] > user_input['max_year']:
            continue
        if user_input['min_rating'] and row['rating'] < user_input['min_rating']:
            continue
        if user_input['max_rating'] and row['rating'] > user_input['max_rating']:
            continue
        
        # ADD MOVIE TO SCORES DICTIONARY IF IT HAS A POSITIVE SCORE
        if score > 0:
            scores[row['title']] = score

    # SORT MOVIES BASED ON SCORE IN DESCENDING ORDER
    sorted_movies = sorted(scores.items(), key=lambda x: x[1], reverse=True)
    
    # IF NO MATCHES, RETURN TOP-RATED MOVIES FROM THE SAME GENRE
    if not sorted_movies and user_input['genre']:
        genre_filtered = df[df['genre'].str.contains('|'.join(user_input['genre']), case=False, na=False)]
        sorted_movies = genre_filtered.nlargest(10, 'rating')[['title', 'rating']].values.tolist()

    return sorted_movies[:10]  # RETURN TOP 10 MATCHING MOVIES

# EXAMPLE USER INPUT (MODIFY AS NEEDED)
user_input = {
    'genre': ['Action'],  # EXAMPLE INPUT
    'casts': ['Robert Do'],
    'writers': [],
    'directors': [],
    'tagline': '',
    'overview': '',
    'keywords': '',
    'min_year': 2000,
    'max_year': None,
    'min_rating': 7.5,
    'max_rating': None
}

# GET RECOMMENDATIONS
recommendations = recommend_movies(user_input)

# PRINT RESULTS
for title, score in recommendations:
    print(f"{title} (SCORE: {score})")


Avengers: Infinity War (SCORE: 5)
Avengers: Endgame (SCORE: 5)
The Dark Knight (SCORE: 3)
The Lord of the Rings: The Return of the King (SCORE: 3)
The Lord of the Rings: The Fellowship of the Ring (SCORE: 3)
The Lord of the Rings: The Two Towers (SCORE: 3)
Inception (SCORE: 3)
Gladiator (SCORE: 3)
Spider-Man: Into the Spider-Verse (SCORE: 3)
The Dark Knight Rises (SCORE: 3)
