# Game Recommendation System

In [38]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from scipy.sparse import csr_matrix
from fuzzywuzzy import process

### Data Loading and Preparation

In [2]:
# Load data
games_df = pd.read_csv('../data/external/games.csv')
recommendations_df = pd.read_csv('../data/external/recommendations.csv')

### Popular Game

In [3]:
# Gerekli sütunları seç
games_df = games_df[['app_id', 'title', 'date_release', 'positive_ratio', 'user_reviews', 'price_final']]

# Popularity sütununu ekle
popular_games_df = games_df.assign(popularity=(games_df['user_reviews'] * 0.4) + (games_df['positive_ratio'] * 0.6))

# En popüler 1000 oyunu seç
top_games = popular_games_df.sort_values(by='popularity', ascending=False).head(1000)
top_games_app_ids = top_games['app_id']

In [4]:
top_games.head(20)

Unnamed: 0,app_id,title,date_release,positive_ratio,user_reviews,price_final,popularity
14398,730,Counter-Strike: Global Offensive,2012-08-21,88,7494460,15.0,2997836.8
47770,578080,PUBG: BATTLEGROUNDS,2017-12-21,57,2217226,0.0,886924.6
13176,570,Dota 2,2013-07-09,82,2045628,0.0,818300.4
12717,271590,Grand Theft Auto V,2015-04-13,86,1484122,0.0,593700.4
14535,359550,Tom Clancy's Rainbow Six® Siege,2015-12-01,86,993312,20.0,397376.4
47380,440,Team Fortress 2,2007-10-10,93,985819,0.0,394383.4
13035,105600,Terraria,2011-05-16,97,943413,10.0,377423.4
15363,4000,Garry's Mod,2006-11-29,96,853733,10.0,341550.8
13173,252490,Rust,2018-02-08,87,786668,40.0,314719.4
14376,1172470,Apex Legends™,2020-11-04,80,713182,0.0,285320.8


### Table Formatting

In [18]:
# recommendations_df veri çerçevesinden gerekli sütunları seç ve saat filtresi uygula
recommendations_df = recommendations_df[['app_id', 'user_id', 'helpful', 'is_recommended', 'hours']]
# En az 30 saat oynananları seçin
recommendations_df = recommendations_df[recommendations_df['hours'] > 30]
# En az 5 yardımcı oyu olanları seçin
recommendations_df = recommendations_df[recommendations_df['helpful'] >= 3]
# Recommended true olanları seçin
recommendations_df = recommendations_df[recommendations_df['is_recommended']]

In [19]:
selected_recommendations_df = recommendations_df[recommendations_df['app_id'].isin(top_games_app_ids)]

In [20]:
# İlk birkaç satırı göster
selected_recommendations_df.head()

Unnamed: 0,app_id,user_id,helpful,is_recommended,hours
179,292030,8581786,5,True,145.9
269,270880,5772308,7,True,165.9
330,686810,12537537,76,True,35.8
389,570,3037714,12,True,103.7
422,435150,6334645,83,True,199.9


In [21]:
selected_recommendations_df.shape[0]

777569

In [22]:
# Merge with games_df to get game titles
selected_recommendations_df = selected_recommendations_df.merge(games_df[['app_id', 'title']], on='app_id')

In [23]:
# Aynı kullanıcı ve oyun kombinasyonları için saatlerin toplamını alarak tekrarları ortadan kaldırma
selected_recommendations_df = selected_recommendations_df.groupby(['user_id', 'title']).agg({'hours': 'sum'}).reset_index()

## Collaborative Filtering

In [24]:
# Kullanıcı-Oyun Matrisinin Oluşturulması
user_game_matrix_hours = selected_recommendations_df.pivot_table(index='title', columns='user_id', values='hours', aggfunc='sum').fillna(0)

In [25]:
user_game_matrix_hours.head()

user_id,0,34,92,154,158,200,252,293,297,307,...,14305774,14305792,14305824,14305851,14305857,14305869,14305888,14305966,14305984,14306046
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
100% Orange Juice,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
20 Minutes Till Dawn,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7 Days to Die,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
911 Operator,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
A Dance of Fire and Ice,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [26]:
# Sparse matrix'e dönüştürme
user_game_matrix_sparse = csr_matrix(user_game_matrix_hours.values)

In [27]:
# Cosine Benzerliği Hesaplama
similarity_scores = cosine_similarity(user_game_matrix_sparse)

In [28]:
similarity_scores.shape

(954, 954)

In [39]:
# Recommendation function
def recommend(game_title, user_game_matrix_hours, similarity_scores, top_n=10):
    game_title = game_title.lower()
    normalized_titles = user_game_matrix_hours.index.str.lower()
    
    # Fuzzy matching to find the closest match
    closest_match = process.extractOne(game_title, normalized_titles, score_cutoff=80)
    
    if closest_match is None:
        return "Game not found in the dataset."
    
    index = np.where(normalized_titles == closest_match[0])[0][0]
    similar_items = sorted(list(enumerate(similarity_scores[index])), key=lambda x: x[1], reverse=True)[1:top_n+1]
    
    data = []
    for i in similar_items:
        item = []
        temp_df = games_df[games_df['title'].str.lower() == normalized_titles[i[0]]]
        item.extend(list(temp_df.drop_duplicates('title')['title'].values))
        item.extend(list(temp_df.drop_duplicates('title')['date_release'].values))
        item.extend(list(temp_df.drop_duplicates('title')['positive_ratio'].values.astype(int)))
        item.extend(list(temp_df.drop_duplicates('title')['price_final'].values.astype(float)))
        data.append([x.item() if isinstance(x, np.generic) else x for x in item])
    
    return data

In [45]:
# Örnek kullanım
game_title = "Elden Ring"  # Kullanıcının girdiği oyun ismi
recommended_games = recommend(game_title, user_game_matrix_hours, similarity_scores)

print(f"Recommended games for '{game_title}':")
for game in recommended_games:
    print(game)

Recommended games for 'Elden Ring':
['Sekiro™: Shadows Die Twice - GOTY Edition', '2019-03-21', 95, 60.0]
['DARK SOULS™ III', '2016-04-11', 94, 30.0]
['DARK SOULS™: REMASTERED', '2018-05-23', 89, 20.0]
['XCOM®: Chimera Squad', '2020-04-23', 72, 2.99]
['Cyberpunk 2077', '2020-12-09', 80, 60.0]
['DARK SOULS™ II: Scholar of the First Sin', '2015-04-01', 85, 20.0]
['Nioh: Complete Edition', '2017-11-07', 79, 49.99]
['The Witcher® 3: Wild Hunt', '2015-05-18', 96, 40.0]
['Nioh 2 – The Complete Edition', '2021-02-05', 88, 49.99]
['DEATHLOOP', '2021-09-13', 77, 59.99]
