In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

# Load the data
wines = pd.read_csv('wines.csv')
ratings = pd.read_csv('XWines_Slim_150K_ratings.csv', low_memory=False)
wine_similarities = np.load('wine_similarities_w2v.npy')

# Create a set of all wine IDs
all_wine_ids = set(ratings['WineID'].unique())

# Mapping between WineID and matrix index
wine_id_to_idx = {wine_id: idx for idx, wine_id in enumerate(wines['WineID'])}
idx_to_wine_id = {idx: wine_id for idx, wine_id in enumerate(wines['WineID'])}

def get_unrated_wines(user_id, train_ratings, all_wine_ids):
    rated_wines = set(train_ratings[train_ratings['UserID'] == user_id]['WineID'].unique())
    unrated_wines = all_wine_ids - rated_wines
    return list(unrated_wines)

def get_similarities(unrated_wine, rated_wines, similarity_matrix):
    similarities = []
    if unrated_wine not in wine_id_to_idx:
        return similarities
    
    unrated_wine_idx = wine_id_to_idx[unrated_wine]
    
    for rated_wine in rated_wines:
        if rated_wine not in wine_id_to_idx:
            continue
        rated_wine_idx = wine_id_to_idx[rated_wine]
        similarity = similarity_matrix[unrated_wine_idx, rated_wine_idx]
        similarities.append((rated_wine, similarity))
    
    similarities.sort(key=lambda x: x[1], reverse=True)
    return similarities

def predict_rating(user_id, unrated_wine, train_ratings):
    user_ratings = train_ratings[train_ratings['UserID'] == user_id][['WineID', 'Rating']]
    rated_wines = user_ratings['WineID'].tolist()
    
    if len(rated_wines) == 0:
        return np.nan
    
    k_similar_wines = get_similarities(unrated_wine, rated_wines, wine_similarities)
    
    numerator = 0
    denominator = 0
    for wine, similarity in k_similar_wines:
        rating = user_ratings[user_ratings['WineID'] == wine]['Rating'].values[0]
        numerator += similarity * rating
        denominator += abs(similarity)
    
    if denominator == 0:
        user_mean = user_ratings['Rating'].mean()
        return user_mean
    
    return numerator / denominator

def predict_all_ratings(user_id):
    unrated_wines = get_unrated_wines(user_id, ratings, all_wine_ids)
    predictions = {}
    
    for wine in unrated_wines:
        predicted_rating = predict_rating(user_id, wine, ratings)
        predictions[wine] = predicted_rating
    
    return predictions

def recommend_wines(user_id, N=10):
    predicted_ratings = predict_all_ratings(user_id)
    recommended_wines = sorted(predicted_ratings.items(), key=lambda x: x[1], reverse=True)
    
    # Filter out duplicates based on wine name
    unique_recommendations = []
    seen_names = set()
    for wine_id, predicted_rating in recommended_wines:
        wine_name = wines[wines['WineID'] == wine_id]['WineName'].values[0]
        if wine_name not in seen_names:
            unique_recommendations.append((wine_id, predicted_rating, wine_name))
            seen_names.add(wine_name)
        if len(unique_recommendations) == N:
            break
    
    return unique_recommendations

def add_new_user():
    global ratings
    
    new_user_id = ratings['UserID'].max() + 1
    print(f"New user created with ID: {new_user_id}")
    
    sample_wines = wines.sample(5)
    new_ratings = []
    
    for _, wine in sample_wines.iterrows():
        print(f"\nWine: {wine['WineName']}")
        print(f"Type: {wine['Type']}")
        print(f"Country: {wine['Country']}")
        
        while True:
            try:
                rating = float(input("Please rate this wine (1-5): "))
                if 1 <= rating <= 5:
                    break
                else:
                    print("Rating must be between 1 and 5.")
            except ValueError:
                print("Please enter a valid number.")
        
        new_ratings.append({
            'UserID': new_user_id,
            'WineID': wine['WineID'],
            'Rating': rating
        })
    
    ratings = pd.concat([ratings, pd.DataFrame(new_ratings)], ignore_index=True)
    print("\nThank you for rating these wines!")
    return new_user_id

def display_recommendations(user_id):
    print(f"\nTop 10 wine recommendations for user {user_id}:")
    recommendations = recommend_wines(user_id)
    for i, (wine_id, predicted_rating, wine_name) in enumerate(recommendations, 1):
        print(f"{i}. {wine_name} (Predicted rating: {predicted_rating:.2f})")

def main():
    while True:
        print("\n1. Add new user")
        print("2. Get recommendations for existing user")
        print("3. Exit")
        
        choice = input("Enter your choice (1-3): ")
        
        if choice == '1':
            new_user_id = add_new_user()
            display_recommendations(new_user_id)
        
        elif choice == '2':
            try:
                user_id = int(input("Enter user ID: "))
                if user_id in ratings['UserID'].unique():
                    display_recommendations(user_id)
                else:
                    print("User not found.")
            except ValueError:
                print("Invalid user ID. Please enter a number.")
        
        elif choice == '3':
            print("Thank you for using the Wine Recommender System. Goodbye!")
            break
        
        else:
            print("Invalid choice. Please try again.")

if __name__ == "__main__":
    main()


1. Add new user
2. Get recommendations for existing user
3. Exit
Invalid choice. Please try again.

1. Add new user
2. Get recommendations for existing user
3. Exit
New user created with ID: 2062619

Wine: Four Sons Fraternity
Type: Red
Country: United States

Wine: Porto Colheita
Type: Dessert Port
Country: Portugal

Wine: Quartet Brut
Type: Sparkling
Country: United States

Wine: E   E Black Pepper Shiraz
Type: Red
Country: Australia

Wine: Zweigeltrebe
Type: Red
Country: Czech Republic

Thank you for rating these wines!

Top 10 wine recommendations for user 2062619:
1. Shiraz Rosé (Predicted rating: 3.42)
2. Sparkling Cuvée Brut (Predicted rating: 3.41)
3. Brut (Predicted rating: 3.41)
4. Vicar s Choice Sauvignon Blanc Bubbles (Predicted rating: 3.40)
5. Lightly Sparkling Sauvignon Blanc (Predicted rating: 3.40)
6. The Soloist Pinot Rosé (Predicted rating: 3.38)
7. L Ermitage Brut (Predicted rating: 3.38)
8. Anderson Valley Rosé of Pinot Noir (Predicted rating: 3.37)
9. Pinotage Ro

In [5]:
import tkinter as tk
from tkinter import ttk, messagebox
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

class WineRecommenderGUI:
    def __init__(self, master):
        self.master = master
        master.title("Wine Recommender System")
        master.geometry("600x400")

        # Load data
        self.wines = pd.read_csv('wines.csv')
        self.ratings = pd.read_csv('XWines_Slim_150K_ratings.csv', low_memory=False)
        self.wine_similarities = np.load('wine_similarities_w2v.npy')

        # Create sets of valid wine IDs
        self.valid_wine_ids = set(self.wines['WineID'])
        self.valid_rated_wine_ids = set(self.ratings['WineID'])
        self.all_valid_wine_ids = self.valid_wine_ids.intersection(self.valid_rated_wine_ids)

        self.wine_id_to_idx = {wine_id: idx for idx, wine_id in enumerate(self.wines['WineID'])}

        # Create notebook (tabbed interface)
        self.notebook = ttk.Notebook(master)
        self.notebook.pack(expand=True, fill="both", padx=10, pady=10)

        # Create tabs
        self.create_new_user_tab()
        self.create_existing_user_tab()

    def create_new_user_tab(self):
        new_user_frame = ttk.Frame(self.notebook)
        self.notebook.add(new_user_frame, text="New User")

        # Instructions
        ttk.Label(new_user_frame, text="Rate 5 random wines to get started:").grid(row=0, column=0, columnspan=2, pady=10)

        # Create widgets for rating wines
        self.rating_widgets = []
        self.sample_wines = self.wines[self.wines['WineID'].isin(self.all_valid_wine_ids)].sample(5)
        for i, (_, wine) in enumerate(self.sample_wines.iterrows()):
            ttk.Label(new_user_frame, text=f"{wine['WineName']} ({wine['Type']}, {wine['Country']})").grid(row=i+1, column=0, sticky="w", padx=5)
            rating_var = tk.StringVar()
            rating_combo = ttk.Combobox(new_user_frame, textvariable=rating_var, values=[1, 2, 3, 4, 5], width=5)
            rating_combo.grid(row=i+1, column=1, padx=5)
            self.rating_widgets.append((wine['WineID'], rating_var))

        # Button to submit ratings
        ttk.Button(new_user_frame, text="Get Recommendations", command=self.submit_new_user_ratings).grid(row=6, column=0, columnspan=2, pady=10)

        # Recommendations display
        self.new_user_recommendations = tk.Text(new_user_frame, height=10, width=70)
        self.new_user_recommendations.grid(row=7, column=0, columnspan=2, padx=5, pady=5)

    def create_existing_user_tab(self):
        existing_user_frame = ttk.Frame(self.notebook)
        self.notebook.add(existing_user_frame, text="Existing User")

        ttk.Label(existing_user_frame, text="Enter User ID:").grid(row=0, column=0, padx=5, pady=10)
        self.user_id_entry = ttk.Entry(existing_user_frame, width=10)
        self.user_id_entry.grid(row=0, column=1, padx=5)

        ttk.Button(existing_user_frame, text="Get Recommendations", command=self.get_existing_user_recommendations).grid(row=1, column=0, columnspan=2, pady=10)

        self.existing_user_recommendations = tk.Text(existing_user_frame, height=15, width=70)
        self.existing_user_recommendations.grid(row=2, column=0, columnspan=2, padx=5, pady=5)

    def submit_new_user_ratings(self):
        new_ratings = []
        new_user_id = self.ratings['UserID'].max() + 1

        for wine_id, rating_var in self.rating_widgets:
            try:
                rating = float(rating_var.get())
                if 1 <= rating <= 5:
                    new_ratings.append({'UserID': new_user_id, 'WineID': wine_id, 'Rating': rating})
                else:
                    raise ValueError
            except ValueError:
                messagebox.showerror("Invalid Input", "Please enter valid ratings (1-5) for all wines.")
                return

        self.ratings = pd.concat([self.ratings, pd.DataFrame(new_ratings)], ignore_index=True)
        recommendations = self.recommend_wines(new_user_id)
        self.display_recommendations(recommendations, self.new_user_recommendations)

    def get_existing_user_recommendations(self):
        try:
            user_id = int(self.user_id_entry.get())
            if user_id in self.ratings['UserID'].unique():
                recommendations = self.recommend_wines(user_id)
                self.display_recommendations(recommendations, self.existing_user_recommendations)
            else:
                messagebox.showerror("User Not Found", "The entered User ID does not exist.")
        except ValueError:
            messagebox.showerror("Invalid Input", "Please enter a valid User ID (integer).")

    def recommend_wines(self, user_id, N=10):
        predicted_ratings = self.predict_all_ratings(user_id)
        recommended_wines = sorted(predicted_ratings.items(), key=lambda x: x[1], reverse=True)
        
        unique_recommendations = []
        seen_names = set()
        for wine_id, predicted_rating in recommended_wines:
            if wine_id in self.valid_wine_ids:
                wine_name = self.wines[self.wines['WineID'] == wine_id]['WineName'].values[0]
                if wine_name not in seen_names:
                    unique_recommendations.append((wine_id, predicted_rating, wine_name))
                    seen_names.add(wine_name)
                if len(unique_recommendations) == N:
                    break
        
        return unique_recommendations

    def predict_all_ratings(self, user_id):
        unrated_wines = self.get_unrated_wines(user_id)
        predictions = {}
        
        for wine in unrated_wines:
            predicted_rating = self.predict_rating(user_id, wine)
            predictions[wine] = predicted_rating
        
        return predictions

    def get_unrated_wines(self, user_id):
        rated_wines = set(self.ratings[self.ratings['UserID'] == user_id]['WineID'].unique())
        unrated_wines = self.all_valid_wine_ids - rated_wines
        return list(unrated_wines)

    def predict_rating(self, user_id, unrated_wine):
        user_ratings = self.ratings[self.ratings['UserID'] == user_id][['WineID', 'Rating']]
        rated_wines = user_ratings['WineID'].tolist()
        
        if len(rated_wines) == 0:
            return np.nan
        
        k_similar_wines = self.get_similarities(unrated_wine, rated_wines)
        
        numerator = 0
        denominator = 0
        for wine, similarity in k_similar_wines:
            rating = user_ratings[user_ratings['WineID'] == wine]['Rating'].values[0]
            numerator += similarity * rating
            denominator += abs(similarity)
        
        if denominator == 0:
            user_mean = user_ratings['Rating'].mean()
            return user_mean
        
        return numerator / denominator

    def get_similarities(self, unrated_wine, rated_wines):
        similarities = []
        if unrated_wine not in self.wine_id_to_idx:
            return similarities
        
        unrated_wine_idx = self.wine_id_to_idx[unrated_wine]
        
        for rated_wine in rated_wines:
            if rated_wine not in self.wine_id_to_idx:
                continue
            rated_wine_idx = self.wine_id_to_idx[rated_wine]
            similarity = self.wine_similarities[unrated_wine_idx, rated_wine_idx]
            similarities.append((rated_wine, similarity))
        
        similarities.sort(key=lambda x: x[1], reverse=True)
        return similarities

    def display_recommendations(self, recommendations, text_widget):
        text_widget.delete('1.0', tk.END)
        text_widget.insert(tk.END, "Top 10 Wine Recommendations:\n\n")
        for i, (_, predicted_rating, wine_name) in enumerate(recommendations, 1):
            text_widget.insert(tk.END, f"{i}. {wine_name} (Predicted rating: {predicted_rating:.2f})\n")

if __name__ == "__main__":
    root = tk.Tk()
    app = WineRecommenderGUI(root)
    root.mainloop()