In [1]:
import pandas as pd

In [2]:
reviews_df = pd.read_json("../../data/raw/reviews.json", lines=True)
reviews_df.head()

Unnamed: 0,username,hours,products,product_id,page_order,date,text,early_access,page,found_funny,compensation,user_id
0,The A S C E N D E D man,4.3,18.0,10,7,2017-12-31,For an old game it is still very relevant and ...,False,16,,,7.65612e+16
1,jej,0.1,20.0,10,1,2017-12-26,Don't play this game not even good,False,35,1.0,,
2,🎉2018 🎊,2.1,6.0,10,5,2017-12-27,Awesome!!,False,32,,Product received for free,7.65612e+16
3,MaG1k,40.2,26.0,10,4,2017-12-29,Exelente juego!.,False,24,,Product received for free,
4,raymd.,64.3,15.0,10,5,2018-01-01,7/10...,False,16,,,


In [3]:
sentiment_df = pd.read_parquet("../../data/raw/reviews_item_cleaned.parquet")
sentiment_df.head()

Unnamed: 0,genres,Game,Game_ID,sentiment,num_genres
0,"[Action, Casual, Indie, Simulation, Strategy]",Lost Summoner Kitty,761140,4.0,5
1,"[Free to Play, Indie, RPG, Strategy]",Ironbound,643980,4.5,4
2,"[Casual, Free to Play, Indie, Simulation, Sports]",Real Pool 3D - Poolians,670290,4.5,5
3,"[Action, Adventure, Casual]",弹炸人2222,767400,4.0,3
4,"[Action, Adventure, Simulation]",Battle Royale Trainer,772540,3.0,3


In [4]:
item_df = pd.read_json("../../data/raw/item_metadata.json", lines=True)
item_df.head()

Unnamed: 0,publisher,genres,app_name,title,url,release_date,tags,discount_price,reviews_url,specs,price,early_access,id,developer,sentiment,metascore
0,Kotoshiro,"[Action, Casual, Indie, Simulation, Strategy]",Lost Summoner Kitty,Lost Summoner Kitty,http://store.steampowered.com/app/761140/Lost_...,2018-01-04,"[Strategy, Action, Indie, Casual, Simulation]",4.49,http://steamcommunity.com/app/761140/reviews/?...,[Single-player],4.99,False,761140.0,Kotoshiro,,
1,"Making Fun, Inc.","[Free to Play, Indie, RPG, Strategy]",Ironbound,Ironbound,http://store.steampowered.com/app/643980/Ironb...,2018-01-04,"[Free to Play, Strategy, Indie, RPG, Card Game...",,http://steamcommunity.com/app/643980/reviews/?...,"[Single-player, Multi-player, Online Multi-Pla...",Free To Play,False,643980.0,Secret Level SRL,Mostly Positive,
2,Poolians.com,"[Casual, Free to Play, Indie, Simulation, Sports]",Real Pool 3D - Poolians,Real Pool 3D - Poolians,http://store.steampowered.com/app/670290/Real_...,2017-07-24,"[Free to Play, Simulation, Sports, Casual, Ind...",,http://steamcommunity.com/app/670290/reviews/?...,"[Single-player, Multi-player, Online Multi-Pla...",Free to Play,False,670290.0,Poolians.com,Mostly Positive,
3,彼岸领域,"[Action, Adventure, Casual]",弹炸人2222,弹炸人2222,http://store.steampowered.com/app/767400/2222/,2017-12-07,"[Action, Adventure, Casual]",0.83,http://steamcommunity.com/app/767400/reviews/?...,[Single-player],0.99,False,767400.0,彼岸领域,,
4,,,Log Challenge,,http://store.steampowered.com/app/773570/Log_C...,,"[Action, Indie, Casual, Sports]",1.79,http://steamcommunity.com/app/773570/reviews/?...,"[Single-player, Full controller support, HTC V...",2.99,False,773570.0,,,


In [5]:
from sklearn.preprocessing import LabelEncoder

# Drop duplicates based on 'username' and 'product_id'
user_df = reviews_df.drop_duplicates(subset=['username', 'product_id']).copy()

# Reset the index to create a user_id column starting from 1
# user_df.reset_index(drop=True, inplace=True)
# user_df['user_id'] = user_df.index + 1

# Rename 'product_id' to 'game_id'
user_df.rename(columns={'product_id': 'game_id'}, inplace=True)

# Encoding user_id and game_id
user_encoder = LabelEncoder()
# game_encoder = LabelEncoder()

user_df['user_id'] = user_encoder.fit_transform(user_df['username'])
# user_df['game_id'] = game_encoder.fit_transform(user_df['product_id'])

# # user_df = user_df.dropna()

# Select relevant columns
user_df = user_df[['user_id', 'username', 'game_id', 'hours']]
user_df = user_df.dropna()

# Count unique game_id for each user_id
user_df['count_games'] = user_df.groupby('user_id')['game_id'].transform('nunique')

user_df.head()

Unnamed: 0,user_id,username,game_id,hours,count_games
0,461436,The A S C E N D E D man,10,4.3,1
1,614648,jej,10,0.1,3
2,739626,🎉2018 🎊,10,2.1,1
3,285193,MaG1k,10,40.2,1
4,664099,raymd.,10,64.3,1


In [6]:
import pandas as pd
import numpy as np


def split_train_test(user_df):
    # Step 1: Get users with count_games >= 6 (only unique user_ids)
    eligible_users = user_df[user_df['count_games'] >= 6]['user_id'].unique()
    
    # Step 2: Select 1% of these users randomly
    num_users_to_pick = max(1, int(len(eligible_users) * 0.05))  # Ensure at least 1 user is picked
    selected_users = np.random.choice(eligible_users, size=num_users_to_pick, replace=False)

    test_set = []
    
    # Step 3: For each selected user, pick 1 to 3 random entries
    for user_id in selected_users:
        user_entries = user_df[user_df['user_id'] == user_id]
        num_test_entries = min(len(user_entries), np.random.randint(1, 4))  # Min 1, Max 3
        test_rows = user_entries.sample(n=num_test_entries, random_state=42)
        test_set.append(test_rows)

    # Step 4: Create test_df
    test_df = pd.concat(test_set) if test_set else pd.DataFrame(columns=user_df.columns)

    # Step 5: Create train_df (original user_df minus test_df entries)
    train_df = user_df[~user_df.index.isin(test_df.index)]

    return train_df, test_df

# Apply function
train_df, test_df = split_train_test(user_df)

# Display results
print(f"Train DataFrame: {len(train_df)} rows")
print(f"Test DataFrame: {len(test_df)} rows")
print(test_df.head())  # Preview test dataset


Train DataFrame: 1139566 rows
Test DataFrame: 1612 rows
        user_id username  game_id   hours  count_games
866567   609369  imelman   344040     2.4           13
712432   609369  imelman   289130   121.8           13
127279   609369  imelman    17710     2.6           13
182207   377954    Rapid    45410     1.0            8
457954   377954    Rapid   230410  3717.5            8


In [7]:
train_df.head()

Unnamed: 0,user_id,username,game_id,hours,count_games
0,461436,The A S C E N D E D man,10,4.3,1
1,614648,jej,10,0.1,3
2,739626,🎉2018 🎊,10,2.1,1
3,285193,MaG1k,10,40.2,1
4,664099,raymd.,10,64.3,1


In [8]:
test_df.head()

Unnamed: 0,user_id,username,game_id,hours,count_games
866567,609369,imelman,344040,2.4,13
712432,609369,imelman,289130,121.8,13
127279,609369,imelman,17710,2.6,13
182207,377954,Rapid,45410,1.0,8
457954,377954,Rapid,230410,3717.5,8


In [None]:
def genre_based_recommendation(input_genres, items_df, k=10):
    """
    Recommend games based on genre similarity and sentiment score.
    """
    filtered_items = items_df[items_df['genres'].apply(lambda x: any(genre in x for genre in input_genres))]

    if filtered_items.empty:
        return []  # Return empty list instead of string for consistency

    recommended_games = filtered_items.sort_values(by='sentiment', ascending=False)['Game_ID'].head(k).tolist()
    return recommended_games

In [None]:
import pandas as pd
import numpy as np
from sklearn.neighbors import NearestNeighbors
from scipy.sparse import csr_matrix
from tqdm.notebook import tqdm

def build_sparse_matrices(df):
    # Get unique users and games
    users = df['user_id'].unique()
    games = df['game_id'].unique()
    
    print(f"Number of unique users: {len(users)}")
    print(f"Number of unique games: {len(games)}")
    
    # Create mappings
    user_to_idx = {user: i for i, user in enumerate(users)}
    game_to_idx = {game: i for i, game in enumerate(games)}
    idx_to_game = {i: game for game, i in game_to_idx.items()}
    idx_to_user = {i: user for user, i in user_to_idx.items()}
    
    # Create matrix data
    user_indices = [user_to_idx[user] for user in df['user_id']]
    game_indices = [game_to_idx[game] for game in df['game_id']]
    hours = df['hours'].values
    
    # Create user-game matrix (users as rows, games as columns)
    shape = (len(users), len(games))
    user_game_matrix = csr_matrix((hours, (user_indices, game_indices)), shape=shape)
    
    # Create game-user matrix (transpose)
    game_user_matrix = user_game_matrix.T.tocsr()
    
    print(f"User-game matrix shape: {user_game_matrix.shape}")
    print(f"Game-user matrix shape: {game_user_matrix.shape}")
    
    return user_game_matrix, game_user_matrix, user_to_idx, game_to_idx, idx_to_user, idx_to_game

def build_models(user_game_matrix, game_user_matrix):
    # User-based model (finds similar users)
    user_model = NearestNeighbors(n_neighbors=20, metric='cosine', algorithm='brute')
    user_model.fit(user_game_matrix)
    
    # Game-based model (finds similar games)
    game_model = NearestNeighbors(n_neighbors=10, metric='cosine', algorithm='brute')
    game_model.fit(game_user_matrix)
    
    return user_model, game_model

def hybrid_recommendations(user_id, input_game_ids, df, user_model, game_model, 
                          user_game_matrix, user_to_idx, game_to_idx, idx_to_user,
                          idx_to_game, game_user_matrix):
    # Check if user exists
    if user_id not in user_to_idx:
        print(f"User {user_id} not found in the dataset")
        return []
    
    user_idx = user_to_idx[user_id]
    
    # Check if input games exist
    valid_game_ids = [g for g in input_game_ids if g in game_to_idx]
    if not valid_game_ids:
        print(f"None of the input games {input_game_ids} found in the dataset")
        return []
    
    # 1. Find similar users based on user's profile
    user_vector = user_game_matrix[user_idx:user_idx+1]
    user_distances, user_indices = user_model.kneighbors(user_vector)
    
    # 2. Find similar games for each input game
    game_recommendations = {}
    for game_id in valid_game_ids:
        game_idx = game_to_idx[game_id]
        game_vector = game_user_matrix[game_idx:game_idx+1]
        game_distances, game_indices = game_model.kneighbors(game_vector)
        
        # Convert indices to game IDs (exclude the input game)
        similar_games = [(idx_to_game[idx], 1.0 - dist) for idx, dist 
                         in zip(game_indices[0], game_distances[0]) 
                         if idx_to_game[idx] != game_id]
        
        for game, score in similar_games:
            if game in game_recommendations:
                game_recommendations[game] = max(game_recommendations[game], score)
            else:
                game_recommendations[game] = score
    
    # 3. Get games played by similar users
    similar_users = [idx_to_user[idx] for idx in user_indices[0] if idx_to_user[idx] != user_id]
    
    # Get games played by similar users (excluding input games)
    user_based_recs = {}
    for sim_user in similar_users:
        # Get games played by this similar user
        user_games = df[df['user_id'] == sim_user]
        
        for _, row in user_games.iterrows():
            game_id = row['game_id']
            hours = row['hours']
            
            # Skip input games
            if game_id in valid_game_ids:
                continue
                
            # Calculate score based on hours played (normalize between 0-1)
            # Log transform to reduce impact of extremely high hours
            score = np.log1p(hours) / 10  # Adjust divisor as needed
            score = min(score, 1.0)  # Cap at 1.0
            
            if game_id in user_based_recs:
                user_based_recs[game_id] = max(user_based_recs[game_id], score)
            else:
                user_based_recs[game_id] = score
    
    # 4. Combine recommendations (hybrid approach)
    # Weight: 60% user-based, 40% game-based
    hybrid_recs = {}
    
    # Add game-based recommendations
    for game, score in game_recommendations.items():
        hybrid_recs[game] = 0.4 * score
    
    # Add user-based recommendations
    for game, score in user_based_recs.items():
        if game in hybrid_recs:
            hybrid_recs[game] += 0.6 * score
        else:
            hybrid_recs[game] = 0.6 * score
    
    # Sort by score and return top recommendations
    sorted_recs = sorted(hybrid_recs.items(), key=lambda x: x[1], reverse=True)
    return [game for game, _ in sorted_recs[:10]]

def run_hybrid_recommendation_system(train_df):
    # Build matrices and models
    user_game_matrix, game_user_matrix, user_to_idx, game_to_idx, idx_to_user, idx_to_game = build_sparse_matrices(train_df)
    user_model, game_model = build_models(user_game_matrix, game_user_matrix)
    
    # Function to get recommendations
    def get_recommendations(user_id, input_game_ids):
        return hybrid_recommendations(
            user_id, 
            input_game_ids, 
            train_df, 
            user_model, 
            game_model,
            user_game_matrix,
            user_to_idx, 
            game_to_idx, 
            idx_to_user, 
            idx_to_game,
            game_user_matrix,
        )
    
    return get_recommendations, user_to_idx, game_to_idx, idx_to_user, idx_to_game


Number of unique users: 738851
Number of unique games: 13477
User-game matrix shape: (738851, 13477)
Game-user matrix shape: (13477, 738851)

Hybrid recommendations for user 461436 based on game(s) [10]:
1. Game ID: 730, Name: Unknown
2. Game ID: 65540, Name: Unknown
3. Game ID: 323850, Name: Unknown
4. Game ID: 440, Name: Unknown
5. Game ID: 49800, Name: Unknown
6. Game ID: 299360, Name: Unknown
7. Game ID: 231430, Name: Unknown
8. Game ID: 372260, Name: Unknown
9. Game ID: 510240, Name: Unknown

Hybrid recommendations for user 622033 based on game(s) [248820, 285900, 359550]:
1. Game ID: 239160, Name: Unknown
2. Game ID: 409370, Name: Unknown
3. Game ID: 395850, Name: Unknown
4. Game ID: 622230, Name: Unknown
5. Game ID: 373420, Name: Unknown
6. Game ID: 527820, Name: Unknown
7. Game ID: 514050, Name: Unknown
8. Game ID: 350310, Name: Unknown
9. Game ID: 342350, Name: Unknown
10. Game ID: 217750, Name: Unknown


In [None]:

# Initialize the hybrid recommendation system
get_recommendations, user_to_idx, game_to_idx, idx_to_user, idx_to_game = run_hybrid_recommendation_system(train_df)

# Example usage
user_id = 461436
input_games = [10]
recommendations = get_recommendations(user_id, input_games)

print(f"\nHybrid recommendations for user {user_id} based on game(s) {input_games}:")
for i, game_id in enumerate(recommendations, 1):
    game_name = train_df[train_df['game_id'] == game_id]['game_name'].iloc[0] if 'game_name' in train_df.columns else "Unknown"
    print(f"{i}. Game ID: {game_id}, Name: {game_name}")

# Example with multiple games
user_id = 622033
input_games = [248820, 285900, 359550]
recommendations = get_recommendations(user_id, input_games)

print(f"\nHybrid recommendations for user {user_id} based on game(s) {input_games}:")
for i, game_id in enumerate(recommendations, 1):
    game_name = train_df[train_df['game_id'] == game_id]['game_name'].iloc[0] if 'game_name' in train_df.columns else "Unknown"
    print(f"{i}. Game ID: {game_id}, Name: {game_name}")

In [None]:
def genre_based_recommendation(genres, sentiment_df, k=10, exclude_games=None):
    """
    Recommend games based on genres.
    
    Parameters:
    -----------
    genres : list
        List of genres to base recommendations on
    sentiment_df : DataFrame
        DataFrame containing game information including genres
    k : int
        Number of recommendations to return
    exclude_games : list
        List of game IDs to exclude from recommendations
        
    Returns:
    --------
    list
        List of game IDs recommended based on genres
    """
    if not genres or len(genres) == 0:
        print("No genres provided for recommendation")
        return []
    
    # Ensure sentiment_df has a 'genres' column
    if 'genres' not in sentiment_df.columns:
        print("No genres column in the sentiment dataframe")
        return []
    
    # Initialize exclude_games if None
    if exclude_games is None:
        exclude_games = []
    
    # Create a dictionary to store game scores
    game_scores = {}
    
    # Iterate through the dataframe
    for _, row in sentiment_df.iterrows():
        game_id = row['game_id']
        
        # Skip if this game should be excluded
        if game_id in exclude_games:
            continue
            
        game_genres = row['genres']
        
        # Skip if game has no genres
        if not isinstance(game_genres, list) or len(game_genres) == 0:
            continue
        
        # Calculate genre match score (number of matching genres)
        matching_genres = set(genres).intersection(set(game_genres))
        match_score = len(matching_genres) / len(genres)  # Normalized by requested genres
        
        # Add additional weight based on sentiment if available
        sentiment_weight = 1.0
        if 'sentiment_score' in row and not pd.isna(row['sentiment_score']):
            # Normalize sentiment score to be between 0 and 1
            sentiment_weight = (row['sentiment_score'] + 1) / 2  # Assuming sentiment is between -1 and 1
        
        # Calculate final score
        if len(matching_genres) > 0:
            game_scores[game_id] = match_score * sentiment_weight
    
    # Sort games by score
    sorted_games = sorted(game_scores.items(), key=lambda x: x[1], reverse=True)
    
    # Return top k game IDs
    return [game_id for game_id, _ in sorted_games[:k]]

def get_recommendations_with_fallback(user_id, input_game_list, missing_game_genres, sentiment_df, k=10):
    """
    Get recommendations with fallback to genre-based recommendations if needed.
    
    Parameters:
    -----------
    user_id : int
        User ID to get recommendations for
    input_game_list : list
        List of game IDs the user is interested in
    missing_game_genres : list
        List of genres for games that might not be in the dataset
    sentiment_df : DataFrame
        DataFrame containing game information
    k : int
        Number of recommendations to return
        
    Returns:
    --------
    list
        List of recommended game IDs
    """
    # Check which games from the input list are missing in sentiment_df
    missing_games = [game_id for game_id in input_game_list if game_id not in game_to_idx]
    
    # Get recommendations using the hybrid system for games that exist
    existing_games = [game_id for game_id in input_game_list if game_id in game_to_idx]
    
    if existing_games:
        recommendations = hybrid_recommendations(
            user_id,
            existing_games,
            sentiment_df,
            user_model,
            game_model,
            user_game_matrix,
            user_to_idx,
            game_to_idx,
            idx_to_user,
            idx_to_game,
            game_user_matrix
        )
    else:
        recommendations = []
    
    # If we have missing games or not enough recommendations, use genre-based approach
    if missing_games or len(recommendations) < k:
        # Use the genres provided by the user
        if missing_game_genres:
            genre_recommendations = genre_based_recommendation(
                missing_game_genres, 
                sentiment_df, 
                k=k*2,
                exclude_games=existing_games  # Exclude games we already used for hybrid recommendations
            )
            
            # Add genre recommendations to the list, avoiding duplicates
            for game_id in genre_recommendations:
                if game_id not in recommendations:
                    recommendations.append(game_id)
    
    return recommendations[:k]  # Ensure we return only k recommendations


In [10]:
sample_user = None
sample_user_test_games = None
sample_user_train_games = None
sample_user_recommended_games = None
def evaluate_genre_recommendations(get_recommendations, train_df, test_df, sentiment_df, k=10, n_users=None):
    """
    Evaluate recommendations based on genre similarity between user's played games and recommendations
    
    Parameters:
    -----------
    get_recommendations : function
        Function that takes user_id and input_games and returns recommendations
    train_df : DataFrame
        Training data with user_id and game_id
    test_df : DataFrame
        Test data with user_id and game_id
    sentiment_df : DataFrame
        DataFrame containing Game_ID and genres (as lists)
    k : int
        Number of recommendations to evaluate
    n_users : int, optional
        Number of users to evaluate (None = all users)
    """
    
    # Create a mapping of game_id to genres
    game_genre_mapping = {}
    for index, row in sentiment_df.iterrows():
        game_id = row['Game_ID']
        genres = row['genres']  # Assuming genres is a list in the dataframe
        game_genre_mapping[game_id] = genres
    
    # Extract unique users and their respective games from both dataframes
    unique_users_games_test = test_df.groupby('user_id')['game_id'].apply(list).to_dict()
    unique_users_games_train = train_df.groupby('user_id')['game_id'].apply(list).to_dict()
    
    # Get list of users to evaluate
    test_users = list(unique_users_games_test.keys())
    
    # Limit number of users if specified
    if n_users is not None:
        import numpy as np
        # np.random.seed(42)  # For reproducibility
        test_users = np.random.choice(test_users, size=min(n_users, len(test_users)), replace=False)
    
    print(f"Evaluating on {len(test_users)} users out of {len(unique_users_games_test)} total users")
    
    # Training metrics to track
    train_genre_precision = []
    train_genre_recall = []
    train_genre_hit_rate = 0
    
    # Testing metrics to track
    test_genre_precision = []
    test_genre_recall = []
    test_genre_hit_rate = 0
    
    evaluated_users = 0
    
    for user_id in test_users:
        # Skip users not in training set
        if user_id not in unique_users_games_train:
            continue
            
        # Get games from training and test sets for this user
        train_games = unique_users_games_train[user_id]
        test_games = unique_users_games_test[user_id]
        
        # Skip users with no games in train set or test set
        if len(train_games) == 0 or len(test_games) == 0:
            continue
        
        # Get all genres from user's training games
        train_genres = set()
        for game in train_games:
            if game in game_genre_mapping:
                train_genres.update(game_genre_mapping[game])
        
        # Get all genres from user's test games
        test_genres = set()
        for game in test_games:
            if game in game_genre_mapping:
                test_genres.update(game_genre_mapping[game])
        
        # Skip if no genres found in either train or test
        if len(train_genres) == 0 or len(test_genres) == 0:
            continue
            
        # Get recommendations
        try:
            recommendations = get_recommendations(user_id, train_games)
            recommendations = recommendations[:k]  # Limit to top-k
        except Exception as e:
            print(f"Error getting recommendations for user {user_id}: {e}")
            continue
        
        # Get genres from recommended games
        recommended_genres = set()
        for game in recommendations:
            if game in game_genre_mapping:
                recommended_genres.update(game_genre_mapping[game])
        
        # Skip if no genres found in recommendations
        if len(recommended_genres) == 0:
            continue
        
        # Calculate TRAINING metrics (train games vs recommendations)
        train_relevant_genres = train_genres.intersection(recommended_genres)
        
        if len(recommended_genres) > 0:
            train_precision = len(train_relevant_genres) / len(recommended_genres)
            train_genre_precision.append(train_precision)
            
        if len(train_genres) > 0:
            train_recall = len(train_relevant_genres) / len(train_genres)
            train_genre_recall.append(train_recall)
            
        # Hit rate (1 if at least one genre matches)
        if len(train_relevant_genres) > 0:
            train_genre_hit_rate += 1
        
        # Calculate TESTING metrics (test games vs recommendations)
        test_relevant_genres = test_genres.intersection(recommended_genres)
        
        if len(recommended_genres) > 0:
            test_precision = len(test_relevant_genres) / len(recommended_genres)
            test_genre_precision.append(test_precision)
            
        if len(test_genres) > 0:
            test_recall = len(test_relevant_genres) / len(test_genres)
            test_genre_recall.append(test_recall)
            
        # Hit rate (1 if at least one genre matches)
        if len(test_relevant_genres) > 0:
            test_genre_hit_rate += 1
            
        evaluated_users += 1
        sample_user = user_id
        sample_user_test_games = test_games
        sample_user_train_games = train_games
        sample_user_recommended_games = recommendations
    
    # Calculate final metrics
    metrics = {
        # Training metrics
        'train_genre_precision': np.mean(train_genre_precision) if train_genre_precision else 0,
        'train_genre_recall': np.mean(train_genre_recall) if train_genre_recall else 0,
        'train_genre_hit_rate': train_genre_hit_rate / evaluated_users if evaluated_users else 0,
        
        # Testing metrics
        'test_genre_precision': np.mean(test_genre_precision) if test_genre_precision else 0,
        'test_genre_recall': np.mean(test_genre_recall) if test_genre_recall else 0,
        'test_genre_hit_rate': test_genre_hit_rate / evaluated_users if evaluated_users else 0,
        
        'num_evaluated_users': evaluated_users
    }
    
    # Print the training metrics
    print("\nTraining Genre-Based Evaluation Metrics (Train Games vs Recommendations):")
    print(f"Genre Precision: {metrics['train_genre_precision']:.4f}")
    print(f"Genre Recall: {metrics['train_genre_recall']:.4f}")
    print(f"Genre Hit Rate: {metrics['train_genre_hit_rate']:.4f}")
    
    # Print the testing metrics
    print("\nTesting Genre-Based Evaluation Metrics (Test Games vs Recommendations):")
    print(f"Genre Precision: {metrics['test_genre_precision']:.4f}")
    print(f"Genre Recall: {metrics['test_genre_recall']:.4f}")
    print(f"Genre Hit Rate: {metrics['test_genre_hit_rate']:.4f}")
    
    print(f"\nNumber of Evaluated Users: {metrics['num_evaluated_users']}")

    # Print the last user's data
    print("\nLast User Evaluated:")
    print(f"User ID: {sample_user}")
    print(f"Train Games: {sample_user_train_games}")
    print(f"Test Games: {sample_user_test_games}")
    print(f"Recommended Games: {sample_user_recommended_games}")
    
    return metrics

# Test with only 100 users
metrics = evaluate_genre_recommendations(get_recommendations, train_df, test_df, sentiment_df, k=10, n_users=10)

Evaluating on 10 users out of 829 total users

Training Genre-Based Evaluation Metrics (Train Games vs Recommendations):
Genre Precision: 0.8211
Genre Recall: 0.7918
Genre Hit Rate: 1.0000

Testing Genre-Based Evaluation Metrics (Test Games vs Recommendations):
Genre Precision: 0.3944
Genre Recall: 0.9350
Genre Hit Rate: 1.0000

Number of Evaluated Users: 10

Last User Evaluated:
User ID: 638048
Train Games: [8930, 13210, 211820, 218230, 228380, 236870, 242860, 285900, 291010, 389160, 463210, 466560, 547960]
Test Games: [348620]
Recommended Games: [339800, np.int64(306040), 47790, np.int64(12520), np.int64(389160), np.int64(291010), np.int64(381640), np.int64(683670), np.int64(515230), np.int64(570380)]
