In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.neighbors import NearestNeighbors
from sklearn.decomposition import TruncatedSVD
import tkinter as tk
from tkinter import ttk, messagebox

In [2]:
# STEP 1: Load Data

In [3]:
ratings = pd.read_csv(r"C:\Users\admin\Downloads\archive\Dataset.csv")
ratings

Unnamed: 0,user_id,item_id,rating,timestamp
0,0,50,5,881250949
1,0,172,5,881250949
2,0,133,1,881250949
3,196,242,3,881250949
4,186,302,3,891717742
...,...,...,...,...
99998,880,476,3,880175444
99999,716,204,5,879795543
100000,276,1090,1,874795795
100001,13,225,2,882399156


In [4]:
movies = pd.read_csv(r"C:\Users\admin\Downloads\archive\Movie_Id_Titles.csv")
movies

Unnamed: 0,item_id,title
0,1,Toy Story (1995)
1,2,GoldenEye (1995)
2,3,Four Rooms (1995)
3,4,Get Shorty (1995)
4,5,Copycat (1995)
...,...,...
1677,1678,Mat' i syn (1997)
1678,1679,B. Monkey (1998)
1679,1680,Sliding Doors (1998)
1680,1681,You So Crazy (1994)


In [5]:
# Merge both datasets
df = pd.merge(ratings, movies, on='item_id')
df

Unnamed: 0,user_id,item_id,rating,timestamp,title
0,0,50,5,881250949,Star Wars (1977)
1,0,172,5,881250949,"Empire Strikes Back, The (1980)"
2,0,133,1,881250949,Gone with the Wind (1939)
3,196,242,3,881250949,Kolya (1996)
4,186,302,3,891717742,L.A. Confidential (1997)
...,...,...,...,...,...
99998,880,476,3,880175444,"First Wives Club, The (1996)"
99999,716,204,5,879795543,Back to the Future (1985)
100000,276,1090,1,874795795,Sliver (1993)
100001,13,225,2,882399156,101 Dalmatians (1996)


In [6]:
# STEP 2: Popularity-Based Recommendation

In [7]:
def popular_movies(top_n=10):
    popular = df.groupby('title').agg({'rating': ['mean', 'count']})
    popular.columns = ['AvgRating', 'RatingCount']
    popular = popular[popular['RatingCount'] > 100]
    return popular.sort_values('AvgRating', ascending=False).head(top_n)


In [8]:
# STEP 3: Prepare Pivot Table

In [9]:
movie_matrix = df.pivot_table(index='user_id', columns='title', values='rating').fillna(0)

In [10]:
movie_matrix

title,'Til There Was You (1997),1-900 (1994),101 Dalmatians (1996),12 Angry Men (1957),187 (1997),2 Days in the Valley (1996),"20,000 Leagues Under the Sea (1954)",2001: A Space Odyssey (1968),3 Ninjas: High Noon At Mega Mountain (1998),"39 Steps, The (1935)",...,Yankee Zulu (1994),Year of the Horse (1997),You So Crazy (1994),Young Frankenstein (1974),Young Guns (1988),Young Guns II (1990),"Young Poisoner's Handbook, The (1995)",Zeus and Roxanne (1997),unknown,Á köldum klaka (Cold Fever) (1994)
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,2.0,5.0,0.0,0.0,3.0,4.0,0.0,0.0,...,0.0,0.0,0.0,5.0,3.0,0.0,0.0,0.0,4.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
939,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
940,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
941,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
942,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,3.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [11]:
# STEP 4: User-Based Collaborative Filtering (KNN)

In [12]:
def user_based_recommend(movie_name, k=10):
    if movie_name not in movie_matrix.columns:
        return None
    movie_ratings = movie_matrix[movie_name]
    similarity = cosine_similarity(movie_matrix.T)
    similarity_df = pd.DataFrame(similarity, index=movie_matrix.columns, columns=movie_matrix.columns)
    sim_scores = similarity_df[movie_name].sort_values(ascending=False)[1:k+1]
    return sim_scores.index.tolist()

In [13]:
# STEP 5: Item-Based Collaborative Filtering (KNN)

In [14]:
def item_based_recommend(movie_name, k=10):
    if movie_name not in movie_matrix.columns:
        return None
    model_knn = NearestNeighbors(metric='cosine', algorithm='brute')
    model_knn.fit(movie_matrix.T)
    movie_idx = movie_matrix.columns.get_loc(movie_name)
    distances, indices = model_knn.kneighbors(movie_matrix.T[movie_idx].reshape(1, -1), n_neighbors=k+1)
    similar_movies = [movie_matrix.columns[i] for i in indices.flatten()][1:]
    return similar_movies

In [15]:
# STEP 6: Matrix Factorization using SVD

In [16]:
def svd_recommend(movie_name, k=10):
    if movie_name not in movie_matrix.columns:
        return None
    svd = TruncatedSVD(n_components=20)
    matrix_reduced = svd.fit_transform(movie_matrix.T)
    corr = np.corrcoef(matrix_reduced)
    movie_idx = movie_matrix.columns.get_loc(movie_name)
    corr_scores = corr[movie_idx]
    similar_indices = np.argsort(corr_scores)[::-1][1:k+1]
    similar_movies = movie_matrix.columns[similar_indices]
    return similar_movies.tolist()

In [17]:
# STEP 7: GUI Interface

In [18]:
def recommend_movies():
    movie = movie_combo.get()
    algo = algo_combo.get()
    
    if not movie:
        messagebox.showwarning("Input Error", "Please select a movie!")
        return
    
    if algo == "Popularity-Based":
        results = popular_movies()
        output = list(results.index)
    elif algo == "User-Based CF":
        results = user_based_recommend(movie)
        output = results if results else ["Movie not found."]
    elif algo == "Item-Based CF":
        results = item_based_recommend(movie)
        output = results if results else ["Movie not found."]
    elif algo == "Matrix Factorization (SVD)":
        results = svd_recommend(movie)
        output = results if results else ["Movie not found."]
    else:
        output = ["Invalid algorithm."]
    
    result_text.delete(1.0, tk.END)
    for m in output:
        result_text.insert(tk.END, f"🎬 {m}\n")

In [19]:
# STEP 8: Build GUI

In [20]:
root = tk.Tk()
root.title("🎥 Movie Recommendation System")
root.geometry("600x500")
root.config(bg="#121212")

tk.Label(root, text="Select a Movie", fg="white", bg="#121212", font=("Arial", 14)).pack(pady=10)
movie_combo = ttk.Combobox(root, values=sorted(movie_matrix.columns), width=50)
movie_combo.pack(pady=5)

tk.Label(root, text="Select Algorithm", fg="white", bg="#121212", font=("Arial", 14)).pack(pady=10)
algo_combo = ttk.Combobox(root, values=["Popularity-Based", "User-Based CF", "Item-Based CF", "Matrix Factorization (SVD)"], width=50)
algo_combo.pack(pady=5)

tk.Button(root, text="Recommend", command=recommend_movies, bg="#4CAF50", fg="white", font=("Arial", 12)).pack(pady=15)

result_text = tk.Text(root, height=15, width=60, wrap=tk.WORD)
result_text.pack(pady=10)

root.mainloop()