# Game Recommendation System

In [134]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from scipy.sparse import csr_matrix


### Data Loading and Preparation

In [135]:
def reduce_memory(df):
    for col in df.columns:
        if df[col].dtype == 'float64':
            df[col] = df[col].astype('float32')
        elif df[col].dtype == 'int64':
            df[col] = df[col].astype('int32')
    return df

### Data Loading and Preparation
# Load data
games_df = pd.read_csv('../data/external/games.csv')
recommendations_df = pd.read_csv('../data/external/recommendations.csv')

# Bellek kullanımını azaltmak için veri türlerini küçült
games_df = reduce_memory(games_df)
recommendations_df = reduce_memory(recommendations_df)

### Popular Game

In [136]:
# Popularity sütununu ekle
popular_games_df = games_df.assign(popularity=(games_df['user_reviews'] * 0.4) + (games_df['positive_ratio'] * 0.6))

# En popüler 50 oyunu seç
top_50_games = popular_games_df.sort_values(by='popularity', ascending=False).head(50)
top_50_games_app_ids = top_50_games['app_id']

### Table Formatting

In [165]:
# Popüler oyunlardan seçilen oyunlar
selected_games_df = games_df[games_df['app_id'].isin(top_50_games_app_ids)]

# İlk birkaç satırı göster
selected_games_df

Unnamed: 0,app_id,title,date_release,win,mac,linux,rating,positive_ratio,user_reviews,price_final,price_original,discount,steam_deck
3372,322170,Geometry Dash,2014-12-22,True,True,False,Very Positive,93,239081,4.0,0.0,0.0,True
6904,433850,Z1 Battle Royale,2018-02-28,True,False,False,Mixed,55,207328,0.0,0.0,0.0,True
11718,242760,The Forest,2018-04-30,True,False,False,Overwhelmingly Positive,95,416113,20.0,0.0,0.0,True
12688,582010,Monster Hunter: World,2018-08-09,True,False,False,Very Positive,87,233430,30.0,0.0,0.0,True
12689,1063730,New World,2021-09-28,True,False,False,Mostly Positive,70,222345,40.0,0.0,0.0,True
12711,550,Left 4 Dead 2,2009-11-16,True,True,True,Overwhelmingly Positive,97,574470,10.0,0.0,0.0,True
12712,1174180,Red Dead Redemption 2,2019-12-05,True,False,False,Very Positive,90,410074,20.0,0.0,0.0,True
12717,271590,Grand Theft Auto V,2015-04-13,True,False,False,Very Positive,86,1484122,0.0,0.0,0.0,True
12800,292030,The Witcher® 3: Wild Hunt,2015-05-18,True,False,False,Overwhelmingly Positive,96,668455,40.0,0.0,0.0,True
13035,105600,Terraria,2011-05-16,True,True,True,Overwhelmingly Positive,97,943413,10.0,0.0,0.0,True


In [138]:
# recommendations_df veri çerçevesinden gerekli sütunları seç
recommendations_df = recommendations_df[['app_id', 'user_id', 'review_id', 'helpful', 'hours']]

In [139]:
selected_recommendations_df = recommendations_df[recommendations_df['app_id'].isin(top_50_games_app_ids)]

In [140]:
# İlk birkaç satırı göster
selected_recommendations_df.head()

Unnamed: 0,app_id,user_id,review_id,helpful,hours
2,1085660,253880,2,2,336.5
7,730,63209,7,0,157.5
10,431960,199725,10,0,30.299999
11,1086940,85822,11,0,50.0
12,1938090,161081,12,0,46.700001


In [141]:
selected_recommendations_df.shape[0]

6615653

In [142]:
# Merge with games_df to get game titles
selected_recommendations_df = selected_recommendations_df.merge(games_df[['app_id', 'title']], on='app_id')

In [143]:
# Aynı kullanıcı ve oyun kombinasyonları için saatlerin toplamını alarak tekrarları ortadan kaldırma
selected_recommendations_df = selected_recommendations_df.groupby(['user_id', 'title']).agg({'hours': 'sum'}).reset_index()

In [144]:
# Kullanıcı-Oyun Matrisinin Oluşturulması
user_game_matrix_hours = selected_recommendations_df.pivot(index='title', columns='user_id', values='hours').fillna(0)

In [145]:
user_game_matrix_hours.head()

user_id,2,4,6,9,12,13,14,15,22,24,...,14306021,14306024,14306025,14306032,14306036,14306041,14306044,14306046,14306049,14306056
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ARK: Survival Evolved,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Among Us,18.6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,112.0,0.0,0.0,0.0
Apex Legends™,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Baldur's Gate 3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,234.199997,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Bloons TD 6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,14.6,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [146]:
# Sparse matrix'e dönüştürme
user_game_matrix_sparse = csr_matrix(user_game_matrix_hours.values)

In [147]:
user_game_matrix_sparse

<Compressed Sparse Row sparse matrix of dtype 'float32'
	with 6615287 stored elements and shape (50, 4689682)>

In [148]:
# Cosine Benzerliği Hesaplama
similarity_scores = cosine_similarity(user_game_matrix_sparse)

In [149]:
similarity_scores.shape

(50, 50)

In [169]:
# Oyun öneri fonksiyonu
def recommend(game_title, user_game_matrix_hours, similarity_scores, top_n=5):
    if game_title not in user_game_matrix_hours.index:
        return "Game not found in the dataset."
    
    # Oyun indeksini bul
    index = np.where(user_game_matrix_hours.index == game_title)[0][0]
    
    # Benzerlik skorlarına göre en benzer oyunları bul
    similar_items = sorted(list(enumerate(similarity_scores[index])), key=lambda x: x[1], reverse=True)[1:top_n+1]
    
    data = []
    for i in similar_items:
        item = []
        # Benzer oyunların bilgilerini al
        temp_df = games_df[games_df['title'] == user_game_matrix_hours.index[i[0]]]
        item.extend(list(temp_df.drop_duplicates('title')['title'].values))
        item.extend(list(temp_df.drop_duplicates('title')['date_release'].values))
        item.extend(list(temp_df.drop_duplicates('title')['positive_ratio'].values.astype(int)))
        item.extend(list(temp_df.drop_duplicates('title')['price_final'].values.astype(float)))
        data.append([x.item() if isinstance(x, np.generic) else x for x in item])
    
    # Benzer oyunların bilgilerini döndür
    return data

In [170]:
# Örnek kullanım
game_title = "ELDEN RING"  # Kullanıcının girdiği oyun ismi
recommended_games = recommend(game_title, user_game_matrix_hours, similarity_scores)
print(f"Recommended games for '{game_title}':\n{recommended_games}")

Recommended games for 'ELDEN RING':
[['DARK SOULS™ III', '2016-04-11', 94, 30.0], ['Cyberpunk 2077', '2020-12-09', 80, 60.0], ['Monster Hunter: World', '2018-08-09', 87, 30.0], ['The Witcher® 3: Wild Hunt', '2015-05-18', 96, 40.0], ['Hades', '2020-09-17', 98, 25.0]]
