# Game Recommendation System

In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from scipy.sparse import csr_matrix
from fuzzywuzzy import process

### Data Loading and Preparation

In [2]:
# Load data
games_df = pd.read_csv('../data/external/games.csv')
recommendations_df = pd.read_csv('../data/external/recommendations.csv')

### Popular Game

In [3]:
# Geniş sütunların tamamını göstermek için pandas görüntüleme seçeneklerini ayarla
pd.set_option('display.max_colwidth', None)

# Gerekli sütunları seç
games_df = games_df[['app_id', 'title', 'date_release', 'positive_ratio', 'user_reviews', 'price_final']]

# Popularity sütununu ekle
popular_games_df = games_df.assign(popularity=(games_df['user_reviews'] * 0.4) + (games_df['positive_ratio'] * 0.6))

# En popüler 1000 oyunu seç
top_games = popular_games_df.sort_values(by='popularity', ascending=False).head(1000)
top_games_app_ids = top_games['app_id']

# Index'i resetle ve app_id'yi index yap
top_games = top_games.reset_index(drop=True).set_index('app_id')

# Image url sütununu ekle
top_games['image_url'] = top_games.index.map(lambda app_id: f"https://cdn.akamai.steamstatic.com/steam/apps/{app_id}/header.jpg")

top_games.head(20)

Unnamed: 0_level_0,title,date_release,positive_ratio,user_reviews,price_final,popularity,image_url
app_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
730,Counter-Strike: Global Offensive,2012-08-21,88,7494460,15.0,2997836.8,https://cdn.akamai.steamstatic.com/steam/apps/730/header.jpg
578080,PUBG: BATTLEGROUNDS,2017-12-21,57,2217226,0.0,886924.6,https://cdn.akamai.steamstatic.com/steam/apps/578080/header.jpg
570,Dota 2,2013-07-09,82,2045628,0.0,818300.4,https://cdn.akamai.steamstatic.com/steam/apps/570/header.jpg
271590,Grand Theft Auto V,2015-04-13,86,1484122,0.0,593700.4,https://cdn.akamai.steamstatic.com/steam/apps/271590/header.jpg
359550,Tom Clancy's Rainbow Six® Siege,2015-12-01,86,993312,20.0,397376.4,https://cdn.akamai.steamstatic.com/steam/apps/359550/header.jpg
440,Team Fortress 2,2007-10-10,93,985819,0.0,394383.4,https://cdn.akamai.steamstatic.com/steam/apps/440/header.jpg
105600,Terraria,2011-05-16,97,943413,10.0,377423.4,https://cdn.akamai.steamstatic.com/steam/apps/105600/header.jpg
4000,Garry's Mod,2006-11-29,96,853733,10.0,341550.8,https://cdn.akamai.steamstatic.com/steam/apps/4000/header.jpg
252490,Rust,2018-02-08,87,786668,40.0,314719.4,https://cdn.akamai.steamstatic.com/steam/apps/252490/header.jpg
1172470,Apex Legends™,2020-11-04,80,713182,0.0,285320.8,https://cdn.akamai.steamstatic.com/steam/apps/1172470/header.jpg


### Top 50 Games


In [4]:
# En popüler 50 oyunu seç ve app_id'yi index yap
top_50_games = popular_games_df.sort_values(by='popularity', ascending=False).head(50).reset_index(drop=True).set_index('app_id')
# Image url sütununu ekle
top_50_games['image_url'] = top_50_games.index.map(lambda app_id: f"https://cdn.akamai.steamstatic.com/steam/apps/{app_id}/header.jpg")
top_50_games.head()

Unnamed: 0_level_0,title,date_release,positive_ratio,user_reviews,price_final,popularity,image_url
app_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
730,Counter-Strike: Global Offensive,2012-08-21,88,7494460,15.0,2997836.8,https://cdn.akamai.steamstatic.com/steam/apps/730/header.jpg
578080,PUBG: BATTLEGROUNDS,2017-12-21,57,2217226,0.0,886924.6,https://cdn.akamai.steamstatic.com/steam/apps/578080/header.jpg
570,Dota 2,2013-07-09,82,2045628,0.0,818300.4,https://cdn.akamai.steamstatic.com/steam/apps/570/header.jpg
271590,Grand Theft Auto V,2015-04-13,86,1484122,0.0,593700.4,https://cdn.akamai.steamstatic.com/steam/apps/271590/header.jpg
359550,Tom Clancy's Rainbow Six® Siege,2015-12-01,86,993312,20.0,397376.4,https://cdn.akamai.steamstatic.com/steam/apps/359550/header.jpg


### Table Formatting

In [5]:
# recommendations_df veri çerçevesinden gerekli sütunları seç ve saat filtresi uygula
recommendations_df = recommendations_df[['app_id', 'user_id', 'helpful', 'is_recommended', 'hours']]
# En az 50 saat oynananları seçin
recommendations_df = recommendations_df[recommendations_df['hours'] > 50]
# En az 5 yardımcı oyu olanları seçin
recommendations_df = recommendations_df[recommendations_df['helpful'] >= 3]
# Recommended true olanları seçin
recommendations_df = recommendations_df[recommendations_df['is_recommended']]

In [6]:
selected_recommendations_df = recommendations_df[recommendations_df['app_id'].isin(top_games_app_ids)]

In [7]:
# İlk birkaç satırı göster
selected_recommendations_df.head()

Unnamed: 0,app_id,user_id,helpful,is_recommended,hours
179,292030,8581786,5,True,145.9
269,270880,5772308,7,True,165.9
316,975370,11202425,3,True,177.0
322,570,11564023,3,True,513.5
389,570,3037714,12,True,103.7


In [8]:
selected_recommendations_df.shape[0]

1153001

In [9]:
# Merge with games_df to get game titles
selected_recommendations_df = selected_recommendations_df.merge(top_games.reset_index()[['app_id', 'title']], on='app_id')

In [10]:
# Aynı kullanıcı ve oyun kombinasyonları için saatlerin toplamını alarak tekrarları ortadan kaldırma
selected_recommendations_df = selected_recommendations_df.groupby(['user_id', 'title']).agg({'hours': 'sum'}).reset_index()

## Collaborative Filtering

In [11]:
# Kullanıcı-Oyun Matrisinin Oluşturulması
user_game_matrix_hours = selected_recommendations_df.pivot_table(index='title', columns='user_id', values='hours', aggfunc='sum').fillna(0)

In [12]:
user_game_matrix_hours.head()

user_id,0,13,14,34,39,85,92,154,158,176,...,14305851,14305857,14305869,14305888,14305913,14305942,14305966,14305984,14306000,14306057
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
100% Orange Juice,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
20 Minutes Till Dawn,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7 Days to Die,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
911 Operator,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
A Dance of Fire and Ice,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [13]:
# Sparse matrix'e dönüştürme
user_game_matrix_sparse = csr_matrix(user_game_matrix_hours.values)

In [14]:
# Cosine Benzerliği Hesaplama
similarity_scores = cosine_similarity(user_game_matrix_sparse)

In [15]:
similarity_scores.shape

(952, 952)

In [16]:
# Recommendation function
def recommend(game_title, user_game_matrix_hours, similarity_scores, top_n=10):
    game_title = game_title.lower()
    normalized_titles = user_game_matrix_hours.index.str.lower()
    
    # Fuzzy matching to find the closest match
    closest_match = process.extractOne(game_title, normalized_titles, score_cutoff=80)
    
    if closest_match is None:
        return "Game not found in the dataset."
    
    index = np.where(normalized_titles == closest_match[0])[0][0]
    similar_items = sorted(list(enumerate(similarity_scores[index])), key=lambda x: x[1], reverse=True)[1:top_n+1]
    
    data = []
    for i in similar_items:
        item = []
        temp_df = top_games[top_games['title'].str.lower() == normalized_titles[i[0]]]
        item.extend(list(temp_df.drop_duplicates('title')['title'].values))
        item.extend(list(temp_df.drop_duplicates('title')['date_release'].values))
        item.extend(list(temp_df.drop_duplicates('title')['positive_ratio'].values.astype(int)))
        item.extend(list(temp_df.drop_duplicates('title')['price_final'].values.astype(float)))
        item.extend(list(temp_df.drop_duplicates('title')['image_url'].values))
        data.append([x.item() if isinstance(x, np.generic) else x for x in item])
    
    return data

In [17]:
# Örnek kullanım
game_title = "Elden Ring"  # Kullanıcının girdiği oyun ismi
recommended_games = recommend(game_title, user_game_matrix_hours, similarity_scores)

print(f"Recommended games for '{game_title}':")
for game in recommended_games:
    print(game)

Recommended games for 'Elden Ring':
['DARK SOULS™ III', '2016-04-11', 94, 30.0, 'https://cdn.akamai.steamstatic.com/steam/apps/374320/header.jpg']
['Sekiro™: Shadows Die Twice - GOTY Edition', '2019-03-21', 95, 60.0, 'https://cdn.akamai.steamstatic.com/steam/apps/814380/header.jpg']
['DARK SOULS™: REMASTERED', '2018-05-23', 89, 20.0, 'https://cdn.akamai.steamstatic.com/steam/apps/570940/header.jpg']
['DARK SOULS™ II: Scholar of the First Sin', '2015-04-01', 85, 20.0, 'https://cdn.akamai.steamstatic.com/steam/apps/335300/header.jpg']
['Nioh 2 – The Complete Edition', '2021-02-05', 88, 49.99, 'https://cdn.akamai.steamstatic.com/steam/apps/1325200/header.jpg']
['The Witcher® 3: Wild Hunt', '2015-05-18', 96, 40.0, 'https://cdn.akamai.steamstatic.com/steam/apps/292030/header.jpg']
['Cyberpunk 2077', '2020-12-09', 80, 60.0, 'https://cdn.akamai.steamstatic.com/steam/apps/1091500/header.jpg']
['Nioh: Complete Edition', '2017-11-07', 79, 49.99, 'https://cdn.akamai.steamstatic.com/steam/apps/485

In [18]:
import os
import pickle
# Define file paths
user_game_matrix_hours_path = '../data/processed/user_game_matrix_hours.pkl'
similarity_scores_path = '../data/processed/similarity_scores.pkl'
top_games_path = '../data/processed/top_games.pkl'
top_50_games_path = '../data/processed/top_50_games.pkl'

# Check if files exist before dumping
if not os.path.exists(user_game_matrix_hours_path):
    pickle.dump(user_game_matrix_hours, open(user_game_matrix_hours_path, 'wb'))
else:
    print(f"{user_game_matrix_hours_path} already exists.")

if not os.path.exists(similarity_scores_path):
    pickle.dump(similarity_scores, open(similarity_scores_path, 'wb'))
else:
    print(f"{similarity_scores_path} already exists.")

if not os.path.exists(top_games_path):
    pickle.dump(top_games, open(top_games_path, 'wb'))
else:
    print(f"{top_games_path} already exists.")
    
if not os.path.exists(top_50_games_path):
    pickle.dump(top_50_games, open(top_50_games_path, 'wb'))
    print(f"{top_50_games_path} has been created and saved.")
else:
    print(f"{top_50_games_path} already exists.")

../data/processed/user_game_matrix_hours.pkl already exists.
../data/processed/similarity_scores.pkl already exists.
../data/processed/top_games.pkl already exists.
../data/processed/top_50_games.pkl already exists.


# Test Ve Performans 

### Veriyi Eğitim ve Test Setlerine Ayırma

In [52]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, r2_score

In [53]:
# Kullanıcı-Oyun Matrisi'nden Rastgele Eğitim ve Test Ayrımı
def split_data(matrix):
    train, test = train_test_split(matrix, test_size=0.1, random_state=42)
    return train, test

In [54]:
# Test Veri Ayrımı
train_matrix, test_matrix = split_data(user_game_matrix_hours)

### MAE Hesaplama


In [55]:
# Modelin önerdiği değerlerle gerçek değerler arasındaki ortalama mutlak hatayı hesaplayın.
def calculate_mae(y_true, y_pred):
    mae = mean_absolute_error(y_true, y_pred)
    return mae

### R² Hesaplama

In [56]:
# Modelin önerdiği değerlerle gerçek değerler arasındaki R² skorunu hesaplayın.
def calculate_r2(y_true, y_pred):
    r2 = r2_score(y_true, y_pred)
    return r2

### Simülasyon: Gerçek değerler ve model tahminleri

In [57]:
# Gerçek saatler ve rastgele tahminler (örnek için)
true_hours = test_matrix.values.flatten()
predicted_hours = true_hours + np.random.normal(scale=10, size=true_hours.shape)  # Model tahmin simülasyonu

In [58]:
# MAE Performans Ölçümü
mae = calculate_mae(true_hours, predicted_hours)
print(f"MAE (Ortalama Mutlak Hata): {mae:.2f}")

MAE (Ortalama Mutlak Hata): 7.98


In [59]:
# R² Performans Ölçümü
r2 = calculate_r2(true_hours, predicted_hours)
print(f"R² (R-kare): {r2:.2f}")

R² (R-kare): 0.38
