# Baseline Model

Libraries

In [1]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import MinMaxScaler
from scipy.sparse import hstack
import warnings
warnings.filterwarnings('ignore')
from tqdm import tqdm
import random
from collections import defaultdict
from tqdm.notebook import tqdm, trange
from tqdm.contrib import tzip
import numpy as np
from sklearn.metrics import ndcg_score

Datasets

In [2]:
games_df = pd.read_pickle('games_processed.pkl')
recommendations_df = pd.read_pickle('recommendations_processed.pkl')

print("Games DataFrame Shape:", games_df.shape)
print("Recommendations DataFrame Shape:", recommendations_df.shape)

Games DataFrame Shape: (4146, 7)
Recommendations DataFrame Shape: (1034570, 8)


Matrix Formation

In [3]:
content_features = games_df['tags'].fillna('')
numeric_features = games_df[['price_final', 'rating', 'user_reviews']]

tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(content_features)
combined_features = hstack([tfidf_matrix, numeric_features])
similarity_matrix = cosine_similarity(combined_features)

Simple Content-Based System

In [4]:
def get_content_recommendations(game_id, similarity_matrix, games_df, n_recommendations=5):
    game_idx = games_df[games_df['app_id'] == game_id].index[0]
    similarity_scores = similarity_matrix[game_idx]
    similar_indices = similarity_scores.argsort()[::-1][1:n_recommendations+1]
    
    recommendations = []
    for idx in similar_indices:
        game_id = games_df.iloc[idx]['app_id']
        title = games_df.iloc[idx]['title']
        similarity = similarity_scores[idx]
        recommendations.append({
            'game_id': game_id,
            'title': title,
            'similarity_score': similarity
        })
    
    return recommendations

Simple Collaborative System

In [5]:
def get_collaborative_recommendations(user_id, recommendations_df, games_df, n_recommendations=5):
    user_games = recommendations_df[recommendations_df['user_id'] == user_id]
    user_positive_games = user_games[user_games['is_recommended'] == 1]['app_id'].tolist()
    
    if not user_positive_games:
        return []
    
    similar_users = recommendations_df[
        (recommendations_df['app_id'].isin(user_positive_games)) & 
        (recommendations_df['is_recommended'] == 1) &
        (recommendations_df['user_id'] != user_id)
    ]['user_id'].value_counts().head(5).index
    
    game_scores = {}
    for similar_user in similar_users:
        similar_user_games = recommendations_df[
            (recommendations_df['user_id'] == similar_user) & 
            (recommendations_df['is_recommended'] == 1)
        ]['app_id'].tolist()
        
        for game_id in similar_user_games:
            if game_id not in user_positive_games:
                if game_id not in game_scores:
                    game_scores[game_id] = 0
                game_scores[game_id] += 1
    
    recommendations = []
    for game_id, score in sorted(game_scores.items(), key=lambda x: x[1], reverse=True)[:n_recommendations]:
        game_title = games_df[games_df['app_id'] == game_id]['title'].values[0]
        recommendations.append({
            'game_id': game_id,
            'title': game_title,
            'collab_score': score
        })
    
    return recommendations

Combining Content-Based and Collaborative Systems to build a Hybrid System

In [6]:
def get_hybrid_recommendations(user_id, game_id, similarity_matrix, games_df, recommendations_df, 
                             n_recommendations=5, content_weight=0.6, collab_weight=0.4):

    # Get content-based recommendations
    content_recs = get_content_recommendations(game_id, similarity_matrix, games_df, n_recommendations*2)
    content_scores = {rec['game_id']: rec['similarity_score'] for rec in content_recs}
    
    # Get collaborative recommendations
    collab_recs = get_collaborative_recommendations(user_id, recommendations_df, games_df, n_recommendations*2)
    collab_scores = {rec['game_id']: rec['collab_score'] for rec in collab_recs}
    
    # Combine scores
    all_game_ids = set(content_scores.keys()) | set(collab_scores.keys())
    hybrid_scores = {}
    
    # Min-max scaling for each set of scores
    if content_scores:
        max_content = max(content_scores.values())
        min_content = min(content_scores.values())
        content_range = max_content - min_content
    
    if collab_scores:
        max_collab = max(collab_scores.values())
        min_collab = min(collab_scores.values())
        collab_range = max_collab - min_collab
    
    for game_id in all_game_ids:
        # Normalize content score
        if game_id in content_scores:
            norm_content = (content_scores[game_id] - min_content) / content_range if content_range > 0 else 0
        else:
            norm_content = 0
            
        # Normalize collab score
        if game_id in collab_scores:
            norm_collab = (collab_scores[game_id] - min_collab) / collab_range if collab_range > 0 else 0
        else:
            norm_collab = 0
            
        # Calculate hybrid score
        hybrid_scores[game_id] = (content_weight * norm_content + collab_weight * norm_collab)
    
    # Sort by hybrid score and get top N
    sorted_games = sorted(hybrid_scores.items(), key=lambda x: x[1], reverse=True)[:n_recommendations]
    
    # Format recommendations
    recommendations = []
    for game_id, score in sorted_games:
        title = games_df[games_df['app_id'] == game_id]['title'].iloc[0]
        recommendations.append({
            'game_id': game_id,
            'title': title
        })
    
    return recommendations

In [None]:
def create_train_val_test_split(recommendations_df, test_size=0.15, val_size=0.15, timestamp_col='date'):

    if timestamp_col in recommendations_df.columns:
        recommendations_df = recommendations_df.sort_values(timestamp_col)
    
    # Group by user
    user_groups = recommendations_df.groupby('user_id')
    
    train_data = []
    val_data = []
    test_data = []
    
    for user_id, user_data in user_groups:
        n_interactions = len(user_data)
        
        if n_interactions < 3:  # Need at least 3 interactions
            continue
            
        # Split chronologically
        n_test = max(1, int(n_interactions * test_size))
        n_val = max(1, int(n_interactions * val_size))
        
        user_train = user_data.iloc[:-n_test-n_val]
        user_val = user_data.iloc[-n_test-n_val:-n_test]
        user_test = user_data.iloc[-n_test:]
        
        train_data.append(user_train)
        val_data.append(user_val)
        test_data.append(user_test)
    
    return pd.concat(train_data), pd.concat(val_data), pd.concat(test_data)

train_df, val_df, test_df = create_train_val_test_split(recommendations_df)

print(f"Train set: {len(train_df)} recommendations")
print(f"Validation set: {len(val_df)} recommendations")
print(f"Test set: {len(test_df)} recommendations")

Train set: 78304 recommendations
Validation set: 36177 recommendations
Test set: 36177 recommendations


In [8]:
# Get a random user who has recommended games (is_recommended == 1)
users_with_recommendations = test_df[test_df['is_recommended'] == 1]['user_id'].unique()
sample_user = random.choice(users_with_recommendations)

# Get a random game that this user has recommended
user_recommended_games = test_df[(test_df['user_id'] == sample_user) & 
                               (test_df['is_recommended'] == 1)]['app_id'].values
sample_game = random.choice(user_recommended_games)
sample_game_title = games_df[games_df['app_id'] == sample_game]['title'].iloc[0]

print(f"Random User ID: {sample_user}")
print(f"Random Game ID: {sample_game}")
print(f"Random Game Title: {sample_game_title}\n")

print("Content-Based Recommendations:")
content_recs = get_content_recommendations(sample_game, similarity_matrix, games_df)
for i, rec in enumerate(content_recs, 1):
    print(f"{i}. {rec['title']} (Similarity: {rec['similarity_score']:.4f})")

print("\nCollaborative Recommendations:")
collab_recs = get_collaborative_recommendations(sample_user, recommendations_df, games_df)
for i, rec in enumerate(collab_recs, 1):
    print(f"{i}. {rec['title']} (Score: {rec['collab_score']})")

print("\nHybrid Recommendations:")
hybrid_recs = get_hybrid_recommendations(sample_user, sample_game, similarity_matrix, games_df, recommendations_df)
for i, rec in enumerate(hybrid_recs, 1):
    print(f"{i}. {rec['title']}")

Random User ID: 9282334
Random Game ID: 972660
Random Game Title: Spiritfarer®: Farewell Edition

Content-Based Recommendations:
1. Spiritfarer®: Farewell Edition - Digital Artbook (Similarity: 0.8998)
2. HipWitch (Similarity: 0.7094)
3. With You (Similarity: 0.7073)
4. Chicory: A Colorful Tale (Similarity: 0.7063)
5. Big Farm Story (Similarity: 0.7025)

Collaborative Recommendations:
1. Goose Goose Duck (Score: 2)
2. Eastward (Score: 2)
3. Superliminal (Score: 2)
4. Unrailed! (Score: 2)
5. When The Past Was Around (Score: 2)

Hybrid Recommendations:
1. Spiritfarer®: Farewell Edition - Digital Artbook
2. Superliminal
3. Goose Goose Duck
4. When The Past Was Around
5. Eastward


In [9]:
def calculate_metrics(recommended_items, relevant_items, k):

    recommended_items = recommended_items[:k]
    relevant_items = set(relevant_items)  # Convert to set 
    
    # Precision@k
    hits = len(set(recommended_items) & relevant_items)
    precision = hits / k if k > 0 else 0
    
    # Recall@k
    recall = hits / len(relevant_items) if len(relevant_items) > 0 else 0
    
    # F1@k
    f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
    
    # NDCG@k calculation
    dcg = 0
    idcg = 0
    
    for i, item in enumerate(recommended_items):
        rel = 1 if item in relevant_items else 0
        dcg += rel / np.log2(i + 2)  
    
    n_rel = min(len(relevant_items), k)
    for i in range(n_rel):
        idcg += 1 / np.log2(i + 2)
    
    ndcg = dcg / idcg if idcg > 0 else 0
    
    return {
        'precision': precision,
        'recall': recall,
        'f1': f1,
        'ndcg': ndcg
    }

In [10]:
def evaluate_content_based_system(df, similarity_matrix, games_df, k=5):

    metrics_sum = {'precision': 0, 'recall': 0, 'f1': 0, 'ndcg': 0}
    total_users = 0
    
    # Get total number of valid users for progress bar
    valid_users = sum(1 for _, user_data in df.groupby('user_id') 
                     if len(user_data[user_data['is_recommended'] == 1]['app_id']) >= 0)
    
    pbar = tqdm(total=valid_users, desc='Content-Based Evaluation', position=0)
    
    for user_id, user_data in df.groupby('user_id'):
        user_liked_games = user_data[user_data['is_recommended'] == 1]['app_id'].tolist()
        if len(user_liked_games) < 2:
            continue
            
        input_game = user_liked_games[0]
        test_games = set(user_liked_games[1:])
        
        try:
            recs = get_content_recommendations(input_game, similarity_matrix, games_df, n_recommendations=k)
            rec_ids = [rec['game_id'] for rec in recs]
            
            metrics = calculate_metrics(rec_ids, test_games, k)
            for metric, value in metrics.items():
                metrics_sum[metric] += value
            total_users += 1
            pbar.update(1)
            
        except Exception as e:
            continue
    
    pbar.close()
    
    avg_metrics = {
        metric: value/total_users if total_users > 0 else 0 
        for metric, value in metrics_sum.items()
    }
    
    print("\nContent-Based Metrics:")
    for metric, value in avg_metrics.items():
        print(f"{metric.upper()}@{k}: {value:.4f}")
    
    return avg_metrics

In [11]:
def evaluate_collaborative_system(train_df, test_df, games_df, k=5):
 
    metrics_sum = {'precision': 0, 'recall': 0, 'f1': 0, 'ndcg': 0}
    total_users = 0
    
    # Get test set ground truth
    print("Building test set ground truth...")
    test_user_likes = defaultdict(set)
    for _, row in tqdm(test_df[test_df['is_recommended'] == 1].iterrows(), 
                      desc='Processing test data', total=len(test_df[test_df['is_recommended'] == 1])):
        test_user_likes[row['user_id']].add(row['app_id'])

    users_in_test = test_df['user_id'].unique()
    pbar = tqdm(total=len(users_in_test), desc='Collaborative Evaluation')

    for user_id in users_in_test:
        recs = get_collaborative_recommendations(user_id, train_df, games_df, n_recommendations=k)
        recommended_games = [rec['game_id'] for rec in recs]

        if not recommended_games:
            pbar.update(1)
            continue

        relevant_games = test_user_likes[user_id]
        if len(relevant_games) > 0:
            metrics = calculate_metrics(recommended_games, relevant_games, k)
            for metric, value in metrics.items():
                metrics_sum[metric] += value
            total_users += 1
        
        pbar.update(1)

    pbar.close()

    avg_metrics = {
        metric: value/total_users if total_users > 0 else 0 
        for metric, value in metrics_sum.items()
    }
    
    print("\nCollaborative Filtering Metrics:")
    for metric, value in avg_metrics.items():
        print(f"{metric.upper()}@{k}: {value:.4f}")
    
    return avg_metrics

In [12]:
def evaluate_hybrid_system(test_df, similarity_matrix, games_df, recommendations_df, k=5, 
                         content_weight=0.6, collab_weight=0.4):

    metrics_sum = {'precision': 0, 'recall': 0, 'f1': 0, 'ndcg': 0}
    total_users = 0
    
    # Get test set ground truth
    test_user_likes = defaultdict(set)
    for _, row in test_df[test_df['is_recommended'] == 1].iterrows():
        test_user_likes[row['user_id']].add(row['app_id'])
    
    # Only evaluate users with sufficient test data
    valid_users = [user_id for user_id, games in test_user_likes.items() if len(games) >= 0]
    
    for user_id in tqdm(valid_users, desc=f'Hybrid Evaluation (w_content={content_weight:.1f})'):
        # Get a random game from user's liked games in training set
        user_train_games = recommendations_df[
            (recommendations_df['user_id'] == user_id) & 
            (recommendations_df['is_recommended'] == 1)
        ]['app_id'].tolist()
        
        if not user_train_games:
            continue
            
        input_game = random.choice(user_train_games)
        relevant_games = test_user_likes[user_id]
        
        try:
            recommendations = get_hybrid_recommendations(
                user_id=user_id,
                game_id=input_game,
                similarity_matrix=similarity_matrix,
                games_df=games_df,
                recommendations_df=recommendations_df,
                n_recommendations=k,
                content_weight=content_weight,
                collab_weight=collab_weight
            )
            
            rec_ids = [rec['game_id'] for rec in recommendations]
            metrics = calculate_metrics(rec_ids, relevant_games, k)
            
            for metric, value in metrics.items():
                metrics_sum[metric] += value
            total_users += 1
            
        except Exception as e:
            print(f"Error for user {user_id}: {str(e)}")
            continue
    
    # Calculate averages
    avg_metrics = {
        metric: value/total_users if total_users > 0 else 0 
        for metric, value in metrics_sum.items()
    }
    
    return avg_metrics

def tune_hybrid_weights(val_df, similarity_matrix, games_df, recommendations_df, k=5):

    best_metrics = {'precision': 0, 'recall': 0, 'f1': 0, 'ndcg': 0}
    best_weights = (0.5, 0.5)
    results = []
    
    weight_options = np.arange(0.1, 1.0, 0.1)
    
    print("Starting grid search for hybrid weights...")
    pbar_outer = tqdm(total=len(weight_options), desc='Grid Search Progress', position=0)
    
    for content_weight in weight_options:
        collab_weight = 1 - content_weight
        
        metrics = evaluate_hybrid_system(
            val_df, similarity_matrix, games_df, recommendations_df,
            k=k, content_weight=content_weight, collab_weight=collab_weight
        )
        
        results.append({
            'content_weight': content_weight,
            'collab_weight': collab_weight,
            **metrics
        })
        
        if metrics['f1'] > best_metrics['f1']:
            best_metrics = metrics
            best_weights = (content_weight, collab_weight)
        
        pbar_outer.update(1)
    
    pbar_outer.close()
    
    results_df = pd.DataFrame(results)
    print("\nGrid Search Results:")
    print(results_df)
    print(f"\nBest weights found: Content={best_weights[0]:.2f}, Collaborative={best_weights[1]:.2f}")
    print("\nBest validation metrics:")
    for metric, value in best_metrics.items():
        print(f"{metric.upper()}@{k}: {value:.4f}")
    
    return best_weights, best_metrics

In [13]:
print("Starting comprehensive evaluation of all recommendation systems...")

print("\n1. Evaluating Content-Based System...")
content_metrics = evaluate_content_based_system(test_df, similarity_matrix, games_df, k=5)

print("\n2. Evaluating Collaborative Filtering System...")
collab_metrics = evaluate_collaborative_system(train_df, test_df, games_df, k=5)

print("\n3. Tuning and Evaluating Hybrid System...")
best_weights, best_val_metrics = tune_hybrid_weights(val_df, similarity_matrix, games_df, recommendations_df, k=5)

print("\n4. Final Hybrid System Evaluation...")
content_weight, collab_weight = best_weights
hybrid_metrics = evaluate_hybrid_system(
    test_df, similarity_matrix, games_df, recommendations_df,
    k=5, content_weight=content_weight, collab_weight=collab_weight
)

# Print final comparison
print("\nFinal Results Summary:")
print("=" * 50)
metrics = ['precision', 'recall', 'f1', 'ndcg']
systems = {
    'Content-Based': content_metrics,
    'Collaborative': collab_metrics,
    'Hybrid': hybrid_metrics
}

for metric in metrics:
    print(f"\n{metric.upper()}@5:")
    for system, results in systems.items():
        print(f"{system}: {results[metric]:.4f}")

print("\nBest Hybrid Configuration:")
print(f"Content Weight: {content_weight:.2f}")
print(f"Collaborative Weight: {collab_weight:.2f}")

Starting comprehensive evaluation of all recommendation systems...

1. Evaluating Content-Based System...


Content-Based Evaluation:   0%|          | 0/507 [00:00<?, ?it/s]


Content-Based Metrics:
PRECISION@5: 0.0047
RECALL@5: 0.0155
F1@5: 0.0069
NDCG@5: 0.0106

2. Evaluating Collaborative Filtering System...
Building test set ground truth...


Processing test data:   0%|          | 0/32744 [00:00<?, ?it/s]

Collaborative Evaluation:   0%|          | 0/34975 [00:00<?, ?it/s]


Collaborative Filtering Metrics:
PRECISION@5: 0.0084
RECALL@5: 0.0416
F1@5: 0.0140
NDCG@5: 0.0273

3. Tuning and Evaluating Hybrid System...
Starting grid search for hybrid weights...


Grid Search Progress:   0%|          | 0/9 [00:00<?, ?it/s]

Hybrid Evaluation (w_content=0.1):   0%|          | 0/503 [00:00<?, ?it/s]

Hybrid Evaluation (w_content=0.2):   0%|          | 0/503 [00:00<?, ?it/s]

Hybrid Evaluation (w_content=0.3):   0%|          | 0/503 [00:00<?, ?it/s]

Hybrid Evaluation (w_content=0.4):   0%|          | 0/503 [00:00<?, ?it/s]

Hybrid Evaluation (w_content=0.5):   0%|          | 0/503 [00:00<?, ?it/s]

Hybrid Evaluation (w_content=0.6):   0%|          | 0/503 [00:00<?, ?it/s]

Hybrid Evaluation (w_content=0.7):   0%|          | 0/503 [00:00<?, ?it/s]

Hybrid Evaluation (w_content=0.8):   0%|          | 0/503 [00:00<?, ?it/s]

Hybrid Evaluation (w_content=0.9):   0%|          | 0/503 [00:00<?, ?it/s]


Grid Search Results:
   content_weight  collab_weight  precision    recall        f1      ndcg
0             0.1            0.9   0.001988  0.003214  0.002337  0.002209
1             0.2            0.8   0.002386  0.004887  0.003075  0.003732
2             0.3            0.7   0.003976  0.007522  0.005100  0.005326
3             0.4            0.6   0.002783  0.004873  0.003257  0.003958
4             0.5            0.5   0.002386  0.004639  0.003124  0.003354
5             0.6            0.4   0.006362  0.012577  0.008020  0.011407
6             0.7            0.3   0.002783  0.004876  0.003361  0.004647
7             0.8            0.2   0.006362  0.012894  0.008232  0.012592
8             0.9            0.1   0.005169  0.009711  0.006291  0.008328

Best weights found: Content=0.80, Collaborative=0.20

Best validation metrics:
PRECISION@5: 0.0064
RECALL@5: 0.0129
F1@5: 0.0082
NDCG@5: 0.0126

4. Final Hybrid System Evaluation...


Hybrid Evaluation (w_content=0.8):   0%|          | 0/507 [00:00<?, ?it/s]


Final Results Summary:

PRECISION@5:
Content-Based: 0.0047
Collaborative: 0.0084
Hybrid: 0.0032

RECALL@5:
Content-Based: 0.0155
Collaborative: 0.0416
Hybrid: 0.0069

F1@5:
Content-Based: 0.0069
Collaborative: 0.0140
Hybrid: 0.0043

NDCG@5:
Content-Based: 0.0106
Collaborative: 0.0273
Hybrid: 0.0067

Best Hybrid Configuration:
Content Weight: 0.80
Collaborative Weight: 0.20
