In [11]:
import pandas as pd
import ipywidgets as widgets
from IPython.display import display
from itertools import combinations
from collections import Counter
from ast import literal_eval

def load_data():
    games = pd.read_csv("games_full.csv").dropna(subset=['title'])
    games['gameid'] = games['gameid'].astype(int)
    df = pd.read_csv("PSpurchased_games.csv")

    def parse_library(x):
        try:
            if isinstance(x, str):
                return [int(i) for i in literal_eval(x) if str(i).isdigit()]
            return []
        except:
            return []
    
    df['library'] = df['library'].apply(parse_library)
    purchased_ids = set(df['library'].explode().dropna())
    
    # Filter metadata to only games that appear in purchase data
    valid_games = games[games['gameid'].isin(purchased_ids)]
    
    all_titles = sorted(valid_games['title'].unique().tolist())
    title_to_id = dict(zip(valid_games['title'], valid_games['gameid']))
    id_to_title = dict(zip(valid_games['gameid'], valid_games['title']))
    
    return valid_games, df, all_titles, title_to_id, id_to_title

def compute_pair_stats(df, valid_game_ids):
    pair_counts = Counter()
    game_counts = Counter()
    
    for library in df['library']:
        valid_games = [g for g in library if g in valid_game_ids]
        game_counts.update(valid_games)
        
        if len(valid_games) > 1:
            for g1, g2 in combinations(sorted(valid_games), 2):
                pair_counts[(g1, g2)] += 1
    
    pair_df = pd.DataFrame([(g1, g2, count) for (g1, g2), count in pair_counts.items()],
                         columns=['game1', 'game2', 'pair_count'])
    
    def calc_percentage(row):
        return row['pair_count'] / game_counts.get(row['game1'], 1)
    
    pair_df['pair_percentage'] = pair_df.apply(calc_percentage, axis=1)
    return pair_df.sort_values('pair_count', ascending=False), game_counts

def create_ui(all_titles, pair_df, game_counts, title_to_id, id_to_title):
    search_box = widgets.Combobox(
        placeholder='Start typing a game name...',
        options=all_titles,
        ensure_option=True,
        description='Search:',
        layout=widgets.Layout(width='500px')
    )

    liked_list = widgets.Select(
        options=[],
        description='Selected:',
        rows=8,
        layout=widgets.Layout(width='500px')
    )
    
    add_button = widgets.Button(description="Add Game", button_style='success')
    reset_button = widgets.Button(description="Reset", button_style='warning')
    confirm_button = widgets.Button(description="Get Recommendations", button_style='primary')
    
    output = widgets.Output()
    
    def add_game(b):
        if search_box.value and search_box.value not in liked_list.options:
            liked_list.options = list(liked_list.options) + [search_box.value]
        search_box.value = ""
    
    def reset(b):
        liked_list.options = []
        output.clear_output()
    
    def get_recommendations(b):
        with output:
            output.clear_output()
            if not liked_list.options:
                print("Please add at least one game")
                return
            
            print("Your selected games:")
            for title in liked_list.options:
                print(f"- {title}")

            owned_ids = [title_to_id[title] for title in liked_list.options]
            rec_scores = Counter()
            
            for gid in owned_ids:
                # Games bought with this game
                pairs = pair_df[(pair_df['game1'] == gid) | (pair_df['game2'] == gid)]
                for _, row in pairs.iterrows():
                    other_id = row['game2'] if row['game1'] == gid else row['game1']
                    if other_id not in owned_ids:
                        score = row['pair_count']/game_counts[gid]
                        rec_scores[other_id] += score
            
            if not rec_scores:
                print("\nNo recommendations found")
                return
            
            print("\nTop recommendations:")
            for game_id, score in rec_scores.most_common(10):
                print(f"- {id_to_title[game_id]} (score: {score:.2f})")
    
    add_button.on_click(add_game)
    reset_button.on_click(reset)
    confirm_button.on_click(get_recommendations)
    
    display(widgets.VBox([
        widgets.HBox([search_box, add_button]),
        liked_list,
        widgets.HBox([reset_button, confirm_button]),
        output
    ]))

# Main execution
if __name__ == "__main__":
    try:
        games, df, all_titles, title_to_id, id_to_title = load_data()
        pair_df, game_counts = compute_pair_stats(df, set(title_to_id.values()))
        create_ui(all_titles, pair_df, game_counts, title_to_id, id_to_title)
    except Exception as e:
        print(f"Error: {e}")

VBox(children=(HBox(children=(Combobox(value='', description='Search:', ensure_option=True, layout=Layout(widt…

In [5]:
import pandas as pd

# Read each CSV file into a separate dataframe
ps_df = pd.read_csv('PSpurchased_games.csv')
steam_df = pd.read_csv('STEAMpurchased_games.csv')
xbox_df = pd.read_csv('XBOXpurchased_games.csv')

# Combine all dataframes into one
combined_df = pd.concat([ps_df, steam_df, xbox_df], ignore_index=True)

# Optional: Save the combined dataframe to a new CSV file
combined_df.to_csv('combined_purchased_games.csv', index=False)

# Display the first few rows to verify
print(combined_df.head())

   playerid                                            library
0    268071  [14972, 417905, 14693, 7742, 20162, 461983, 12...
1   2218485  [410618, 17456, 16034, 138931, 175202, 331007,...
2    253885  [417808, 706747, 619583, 492711, 555631, 18167...
3   1911732  [670209, 694794, 658551, 582894, 602666, 65531...
4   3098524  [19362, 170350, 12653, 20230, 12750, 18141, 41...


NameError: name 'search_box' is not defined