In [None]:
import pandas as pd
import ipywidgets as widgets
from IPython.display import display, clear_output
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix
from collections import Counter

In [None]:
games = pd.read_csv("games_full.csv")
games = games.dropna(subset=['title'])
all_titles = sorted(games['title'].drop_duplicates().tolist())

#### WIDGETS ####
search_box = widgets.Combobox(
    placeholder='Type a game title here...',
    options=all_titles,
    description='Search:',
    ensure_option=True,
    layout=widgets.Layout(width='70%')
)

add_button = widgets.Button(description='Add Game')
reset_button = widgets.Button(description='Reset', button_style='danger')
confirm_button = widgets.Button(
    description='Get Recommendations',
    button_style='success',
    layout=widgets.Layout(width='70%')
)


liked_list = widgets.SelectMultiple(
    options=[],
    description='Liked Games:',
    rows=8,
    layout=widgets.Layout(width='70%')
)

output = widgets.Output()

#### CALLBACK: ADD GAME ####
def on_add_click(b):
    game = search_box.value.strip()
    if game in all_titles and game not in liked_list.options:
        liked_list.options = list(liked_list.options) + [game]
        search_box.value = ""

#### CALLBACK: RESET ####
def on_reset_click(b):
    liked_list.options = []
    search_box.value = ""
    with output:
        output.clear_output()

#### CALLBACK: RECOMMEND ####
def on_confirm_click(b):
    with output:
        output.clear_output()
        liked_titles = list(liked_list.options)
        
        if not liked_titles:
            print("ERROR: Please add at least one game before getting recommendations.")
            return
        
        print("You selected the following games:")
        for title in liked_titles:
            print(f"• {title}")
        
        games['liked'] = games['title'].apply(lambda x: 1 if x in liked_titles else 0)
        
        text_cols = ['genres', 'supported_languages', 'developers', 'publishers']
        games[text_cols] = games[text_cols].fillna("")
        games['text_features'] = games[text_cols].agg(' '.join, axis=1)

        # BALACING
        liked = games[games['liked'] == 1]
        not_liked = games[games['liked'] == 0].sample(n=min(len(games[games['liked'] == 0]), len(liked) * 3), random_state=42)
        games_balanced = pd.concat([liked, not_liked]).copy()

        # TF-IDF Vectorizer
        vectorizer = TfidfVectorizer()
        X = vectorizer.fit_transform(games_balanced['text_features'])
        y = games_balanced['liked']

        # Check class distribution before splitting
        class_counts = Counter(y)
        if any(v < 2 for v in class_counts.values()):
            print("Not enough samples in one or more classes to perform a train/test split. Please add more games for better results you idiot.")
            print(f"Class distribution: {class_counts}")
        else:
            X_train, X_test, y_train, y_test = train_test_split(
                X, y, test_size=0.25, random_state=42, stratify=y
            )

            model = MultinomialNB()
            model.fit(X_train, y_train)

            y_pred = model.predict(X_test)
            y_prob = model.predict_proba(X_test)[:, 1]

            if len(set(y_test)) < 2:
                print("Test set contains only one class!!! Skipping evaluation.")
            else:
                print("Evaluation Metrics on Test Data")
                print(f"Accuracy: {accuracy_score(y_test, y_pred):.3f}")
                print(f"Precision: {precision_score(y_test, y_pred, zero_division=0):.3f}")
                print(f"Recall: {recall_score(y_test, y_pred, zero_division=0):.3f}")
                print(f"F1 Score: {f1_score(y_test, y_pred, zero_division=0):.3f}")
                print(f"ROC AUC: {roc_auc_score(y_test, y_prob):.3f}")
                print("\nConfusion Matrix:")
                print(confusion_matrix(y_test, y_pred))

        X_all = vectorizer.transform(games['text_features'])
        games['predicted_like'] = model.predict_proba(X_all)[:, 1]

        # EXCLUDE LIKED FROM RECOMMENDATIONS
        recommendations = games[~games['title'].isin(liked_titles)].sort_values('predicted_like', ascending=False)

        print("\nTop 10 Game Recommendations")
        for i, row in recommendations[['title', 'predicted_like']].head(10).iterrows():
            print(f"• {row['title']} (Score: {row['predicted_like']:.2f})")

#### PREVENT DUPLICATE CALLBACKS ####
try:
    add_button._click_callbacks.callbacks.clear()
    reset_button._click_callbacks.callbacks.clear()
    confirm_button._click_callbacks.callbacks.clear()
except Exception:
    pass

add_button.on_click(on_add_click)
reset_button.on_click(on_reset_click)
confirm_button.on_click(on_confirm_click)

#### DISPLAY UI ####
ui = widgets.VBox([
    widgets.HBox([search_box, add_button, reset_button]),
    liked_list,
    confirm_button,
    output
])

display(ui)


VBox(children=(HBox(children=(Combobox(value='', description='Search:', ensure_option=True, layout=Layout(widt…