In [1]:
import pandas as pd
from lenskit import batch, topn
from lenskit import crossfold as xf
from lenskit.metrics import topn as tnmetrics
from lenskit.algorithms import Recommender
from lenskit.algorithms.user_knn import UserUser
from lenskit import topn
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.neighbors import KNeighborsRegressor

In [2]:
import pandas as pd
preprocessed_dataset_folder = "../Data/PreprocessedDataset"
ratings_df = pd.read_csv(preprocessed_dataset_folder+"/ratings.csv")
movies_df = pd.read_csv(preprocessed_dataset_folder+"/movies.csv")
user_plots_ratings_df = pd.read_csv(preprocessed_dataset_folder+"/user_plots.csv") #first run notebook algorithm_experiments to get this data

In [3]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.neighbors import KNeighborsRegressor
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

# Select movies rated by the user
def getRecommendations(user_plots_ratings_df, user_id):
    user_rated_movies = user_plots_ratings_df[user_plots_ratings_df['user_id'] == user_id]
    X = user_rated_movies['plot + title + genres']
    X_labels = user_rated_movies['item']
    y = user_rated_movies['rating']
    X_not_rated_labels = user_plots_ratings_df[user_plots_ratings_df['user_id'] != user_id]['item'].unique()
    X_not_rated = user_plots_ratings_df[user_plots_ratings_df['user_id'] != user_id]['plot + title + genres'].unique()
    vectorizer = TfidfVectorizer()
    X_tfidf = vectorizer.fit_transform(X)
    X_not_rated_tfidf = vectorizer.transform(X_not_rated)
    neigh = KNeighborsRegressor(n_neighbors=3, metric='cosine')
    neigh.fit(X_tfidf, y)
    y_pred = neigh.predict(X_not_rated_tfidf)
    not_rated_movies_predictions = pd.DataFrame({
        'item': user_plots_ratings_df[user_plots_ratings_df['user_id'] != user_id]['item'].unique(),
        'predicted_rating': y_pred
    })
    recommended_movies = pd.merge(not_rated_movies_predictions, movies_df, on='item')
    recommended_movies = recommended_movies.sort_values(by='predicted_rating', ascending=False).head(5)
    print(recommended_movies[['item', 'predicted_rating']])

    cosine_sim_matrix = cosine_similarity(X_not_rated_tfidf, X_tfidf)
    cosine_sim_df = pd.DataFrame(cosine_sim_matrix, index=X_not_rated_labels, columns=X_labels)
    return cosine_sim_df, recommended_movies


In [4]:
user_id = 66926#ratings_df['user'].sample(1, random_state=42).iloc[0]

print(f"Recommendations for user {user_id}:")

cosine_sim_df,recommended_movies = getRecommendations(user_plots_ratings_df, user_id)

Recommendations for user 66926:
     item  predicted_rating
955  1283          4.166667
10    923          4.166667
129  4014          4.166667
328  2294          4.000000
213  4388          4.000000


In [5]:
def getExplanations(recommended_movies, cosine_sim_df,user_plots_ratings_df, user_id,k=3):
    explanations = []
    for index, row in recommended_movies.iterrows():
        recommended_item_id = row['item']
        item_title = row['title']
        string = f"Recommended item \" {item_title}\" because you previously watched items with similar plot:"
        explanations.append(string)
        print(string)
        cosine_sim_values = cosine_sim_df.loc[recommended_item_id].sort_values(ascending=False)
        
        similar_items = cosine_sim_values.index[0:k]
        
        similar_items_info = []
        for similar_item_id in similar_items:
            similar_item_title = movies_df[movies_df['item'] == similar_item_id]['title'].values[0]
            similar_item_rating = user_plots_ratings_df[(user_plots_ratings_df['item'] == similar_item_id) & (user_plots_ratings_df['user_id'] == user_id)]['rating'].values[0]
            similar_items_info.append((similar_item_id, similar_item_title, similar_item_rating))
        
        for i, (similar_item_id, similar_item_title, similar_item_rating) in enumerate(similar_items_info):
            string2 = f" {i+1})  \"{similar_item_title}\" (ID: {similar_item_id}) and gave rating: {similar_item_rating}"
            explanations.append(string2)
            print(string2)
    return explanations


In [6]:
expl = getExplanations(recommended_movies, cosine_sim_df,user_plots_ratings_df, user_id)

Recommended item " high noon" because you previously watched items with similar plot:
 1)  "citizen kane" (ID: 923) and gave rating: 5.0
 2)  "saving private ryan" (ID: 2028) and gave rating: 3.0
 3)  "blazing saddles" (ID: 3671) and gave rating: 4.5
Recommended item " citizen kane" because you previously watched items with similar plot:
 1)  "citizen kane" (ID: 923) and gave rating: 5.0
 2)  "bringing up baby" (ID: 955) and gave rating: 4.0
 3)  "big" (ID: 2797) and gave rating: 3.5
Recommended item " chocolat" because you previously watched items with similar plot:
 1)  "willy wonka & the chocolate factory" (ID: 1073) and gave rating: 4.0
 2)  "blazing saddles" (ID: 3671) and gave rating: 4.5
 3)  "happiness" (ID: 2318) and gave rating: 4.0
Recommended item " antz" because you previously watched items with similar plot:
 1)  "glory" (ID: 1242) and gave rating: 3.5
 2)  "blazing saddles" (ID: 3671) and gave rating: 4.5
 3)  "paths of glory" (ID: 1178) and gave rating: 4.0
Recommended 

In [7]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.neighbors import KNeighborsRegressor
from sklearn.feature_extraction.text import TfidfVectorizer
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import numpy as np

def preprocess_plot_text(plot_text):
    tokens = word_tokenize(plot_text.replace("|"," ")) # to split genres
    stop_words = set(stopwords.words('english'))
    filtered_tokens = [word.lower() for word in tokens if word.isalnum() and word.lower() not in stop_words]
    return " ".join(filtered_tokens)

movies_df['plot + title + genres'] = (movies_df['plot']+" "+movies_df['title']+" "+movies_df['genres']).apply(preprocess_plot_text)
def get_user_rated_movies_plots(new_user_df, movies):
    rated_item_ids = list(movies)
    print(rated_item_ids)
    rated_movies_df = movies_df[movies_df['item'].isin(rated_item_ids)]
    rated_movies_df = pd.concat([rated_movies_df[['item', 'plot + title + genres']], new_user_df],axis=1)
    rated_movies_df['user_id'] = 0
    return rated_movies_df

In [8]:
import tkinter as tk
from tkinter import messagebox

class MovieRatingGUI:
    def __init__(self, root, movies):
        self.root = root
        self.movies = movies
        self.ratings = {}
        self.create_movie_rating_form(submit_button_size=(20, 5), submit_button_position=(600, 300))

    def create_movie_rating_form(self, submit_button_size, submit_button_position):
        # Determine the size of the enlarged window
        enlarged_width = 800  # 4 times the original width
        enlarged_height = 600  # 4 times the original height

        # Set the size of the window
        self.root.geometry(f"{enlarged_width}x{enlarged_height}")

        # Create a larger label with larger font size
        title_label = tk.Label(self.root, text="Pick at least 15 movies which you watched:", font=("Arial", 20))
        title_label.pack()

        # Create a scrollbar for the listbox
        scrollbar = tk.Scrollbar(self.root, orient=tk.VERTICAL)
        scrollbar.pack(side=tk.RIGHT, fill=tk.Y)

        # Create a larger scrollable listbox with separation between cells
        self.movie_listbox = tk.Listbox(self.root, yscrollcommand=scrollbar.set, selectmode=tk.MULTIPLE, font=("Arial", 15), height=20, width=50)
        for movie in self.movies:
            self.movie_listbox.insert(tk.END, movie)
        self.movie_listbox.pack(side=tk.LEFT, padx=20, pady=20)
        scrollbar.config(command=self.movie_listbox.yview)

        # Create a larger Submit button with specified size and position
        submit_button = tk.Button(self.root, text="Submit", command=self.submit_ratings, width=submit_button_size[0], height=submit_button_size[1], bg="red")
        submit_button.place(x=submit_button_position[0], y=submit_button_position[1])

    def submit_ratings(self):
        selected_movies_indices = self.movie_listbox.curselection()
        if len(selected_movies_indices) < 15:
            messagebox.showerror("Error", "Please select at least 15 movies.")
        else:
            selected_movies = [self.movies[index] for index in selected_movies_indices]
            if len(selected_movies) > 15:
                selected_movies = selected_movies[:15]  # Take only the first 15 movies if more than 15 are selected
            self.show_selected_movies(selected_movies)

    def show_selected_movies(self, movies):
        new_window = tk.Tk()
        new_window.title("Rate Selected Movies")
        # Dictionary to store movie ratings
        movie_ratings = {}

        for movie in movies:
            label = tk.Label(new_window, text=movie, font=("Arial", 10))
            label.grid(row=movies.index(movie), column=0, sticky="w", padx=10, pady=1)
            
            # Create a Scale widget for rating from 0 to 5
            rating_scale = tk.Scale(new_window, from_=0, to=5, orient=tk.HORIZONTAL, resolution=0.1)
            rating_scale.grid(row=movies.index(movie), column=1, padx=10, pady=1)
            
            # Store the movie and its corresponding rating scale in the dictionary
            movie_ratings[movie] = rating_scale

        submit_button = tk.Button(new_window, text="Submit Ratings", command=lambda: self.submit_ratings_with_ratings(new_window, movie_ratings))
        submit_button.grid(row=len(movies), columnspan=2, pady=10)


    def submit_ratings_with_ratings(self, window, movie_ratings):
        new_user = [{'title': movie, 'rating': float(rating.get())} for movie, rating in movie_ratings.items()]
        new_user_df = pd.DataFrame(new_user)
        
        # Filter movies_df based on selected titles
        selected_titles = new_user_df['title'].tolist()
        filtered_movies_df = movies_df[movies_df['title'].isin(selected_titles)]
        user_plot = get_user_rated_movies_plots(new_user_df['rating'], filtered_movies_df['item'])
        user_plot_all = pd.concat([user_plots_ratings_df, user_plot], axis=0)
        cosine, recomm = getRecommendations(user_plot_all, 0)
        explanations = getExplanations(recomm, cosine, user_plot_all, 0)

        
        explanation_window = tk.Tk()
        explanation_window.title("Rate explanations")
        window_width = 1000
        window_height = 800
        screen_width = explanation_window.winfo_screenwidth()
        screen_height = explanation_window.winfo_screenheight()

        # Calculate the position for the explanations window to be centered on the screen
        x_coordinate = (screen_width - window_width) // 2
        y_coordinate = (screen_height - window_height) // 2
    
        explanation_window.geometry(f"{window_width}x{window_height}+{x_coordinate}+{y_coordinate}")
        explanations_listbox = tk.Listbox(explanation_window, font=("Arial", 12), width=80, height=20)
        explanations_listbox.grid(row=0, column=0, padx=20, pady=20)

        # Insert explanations into the Listbox
        for explanation in explanations:
            explanations_listbox.insert(tk.END, explanation)

        # Create a Scale widget for each explanation and rating from 0 to 5
        rating_scales = []
        for _ in explanation:
            rating_scale = tk.Scale(explanation_window, from_=0, to=5, orient=tk.HORIZONTAL, resolution=0.1, length=200)
            rating_scale.pack(padx=20, pady=5)
            rating_scales.append(rating_scale)  # Store rating scales for later use

        def submit_ratings_and_ratings():
            # Handle logic for rating scales and explanations here
            ratings = [scale.get() for scale in rating_scales]
            print("Explanations Ratings:", ratings)

        # Create a Submit button for ratings
        submit_button_2 = tk.Button(explanation_window, text="Submit Ratings", command=explanation_window.destroy)
        submit_button_2.grid(row=1, column=0, pady=10)



# List of movies (replace this with your actual list of movies)
movies_list =  movies_df['title'].values.tolist()

root = tk.Tk()
root.title("Movie Rating App")
app = MovieRatingGUI(root, movies_list)
root.mainloop()


[1, 2, 3, 5, 6, 7, 10, 14, 16, 17, 18, 19, 21, 22, 24]
     item  predicted_rating
0     111          0.933333
333  2985          0.933333
737  5015          0.933333
348  6440          0.933333
739  6503          0.933333
Recommended item " taxi driver" because you previously watched items with similar plot:
 1)  "copycat" (ID: 22) and gave rating: 2.8
 2)  "father of the bride part ii" (ID: 5) and gave rating: 0.0
 3)  "sense and sensibility" (ID: 17) and gave rating: 0.0
Recommended item " robocop" because you previously watched items with similar plot:
 1)  "copycat" (ID: 22) and gave rating: 2.8
 2)  "heat" (ID: 6) and gave rating: 0.0
 3)  "jumanji" (ID: 2) and gave rating: 0.0
Recommended item " monster's ball" because you previously watched items with similar plot:
 1)  "four rooms" (ID: 18) and gave rating: 0.0
 2)  "father of the bride part ii" (ID: 5) and gave rating: 0.0
 3)  "copycat" (ID: 22) and gave rating: 2.8
Recommended item " barton fink" because you previously watc

Exception in Tkinter callback
Traceback (most recent call last):
  File "c:\Users\grado\AppData\Local\Programs\Python\Python310\lib\tkinter\__init__.py", line 1921, in __call__
    return self.func(*args)
  File "C:\Users\grado\AppData\Local\Temp\ipykernel_23896\3260700059.py", line 65, in <lambda>
    submit_button = tk.Button(new_window, text="Submit Ratings", command=lambda: self.submit_ratings_with_ratings(new_window, movie_ratings))
  File "C:\Users\grado\AppData\Local\Temp\ipykernel_23896\3260700059.py", line 105, in submit_ratings_with_ratings
    rating_scale.pack(padx=20, pady=5)
  File "c:\Users\grado\AppData\Local\Programs\Python\Python310\lib\tkinter\__init__.py", line 2425, in pack_configure
    self.tk.call(
_tkinter.TclError: cannot use geometry manager pack inside . which already has slaves managed by grid
