In [6]:
import requests
from dotenv import load_dotenv
import os

API_KEY = os.getenv("RAWG_APIKEY")
r = requests.get(f"https://api.rawg.io/api/games?key={API_KEY}&search=cyberpunk")
data = r.json()

print(data["results"][0]["background_image"])

https://media.rawg.io/media/screenshots/4fe/4fe64c4575199aa9533e8222afc18a8a.jpg


In [62]:
import pandas as pd
import os

if os.path.exists("./Dataset/recommendations.csv"):
	recommendation = pd.read_csv("./Dataset/recommendations.csv", low_memory=False)  
else:
	print("File './Dataset/recommendations.csv' not found.")
	recommendation = None

if os.path.exists("./Dataset/games.csv"):
	games = pd.read_csv("./Dataset/games.csv", low_memory=False)  # or pd.read_csv
else:
	print("File './Dataset/games.csv' not found.")
	games = None
	
if os.path.exists("./Dataset/users.csv"):
	users = pd.read_csv("./Dataset/users.csv", low_memory=False)  # or pd.read_csv
else:
	print("File './Dataset/users.csv' not found.")
	users = None

if os.path.exists("./Dataset/steam_games.csv"):
	steam_games = pd.read_csv("./Dataset/steam_games.csv", low_memory=False)  # or pd.read_csv
else:
	print("File './Dataset/steam_games.csv' not found.")
	steam_games = None

if steam_games is not None:
	display(steam_games.head())

Unnamed: 0,AppID,Name,Release date,Estimated owners,Peak CCU,Required age,Price,DLC count,About the game,Supported languages,...,Average playtime two weeks,Median playtime forever,Median playtime two weeks,Developers,Publishers,Categories,Genres,Tags,Screenshots,Movies
0,20200,Galactic Bowling,"Oct 21, 2008",0 - 20000,0,0,19.99,0,Galactic Bowling is an exaggerated and stylize...,['English'],...,0,0,0,Perpetual FX Creative,Perpetual FX Creative,"Single-player,Multi-player,Steam Achievements,...","Casual,Indie,Sports","Indie,Casual,Sports,Bowling",https://cdn.akamai.steamstatic.com/steam/apps/...,http://cdn.akamai.steamstatic.com/steam/apps/2...
1,655370,Train Bandit,"Oct 12, 2017",0 - 20000,0,0,0.99,0,THE LAW!! Looks to be a showdown atop a train....,"['English', 'French', 'Italian', 'German', 'Sp...",...,0,0,0,Rusty Moyher,Wild Rooster,"Single-player,Steam Achievements,Full controll...","Action,Indie","Indie,Action,Pixel Graphics,2D,Retro,Arcade,Sc...",https://cdn.akamai.steamstatic.com/steam/apps/...,http://cdn.akamai.steamstatic.com/steam/apps/2...
2,1732930,Jolt Project,"Nov 17, 2021",0 - 20000,0,0,4.99,0,Jolt Project: The army now has a new robotics ...,"['English', 'Portuguese - Brazil']",...,0,0,0,Campião Games,Campião Games,Single-player,"Action,Adventure,Indie,Strategy",,https://cdn.akamai.steamstatic.com/steam/apps/...,http://cdn.akamai.steamstatic.com/steam/apps/2...
3,1355720,Henosis™,"Jul 23, 2020",0 - 20000,0,0,5.99,0,HENOSIS™ is a mysterious 2D Platform Puzzler w...,"['English', 'French', 'Italian', 'German', 'Sp...",...,0,0,0,Odd Critter Games,Odd Critter Games,"Single-player,Full controller support","Adventure,Casual,Indie","2D Platformer,Atmospheric,Surreal,Mystery,Puzz...",https://cdn.akamai.steamstatic.com/steam/apps/...,http://cdn.akamai.steamstatic.com/steam/apps/2...
4,1139950,Two Weeks in Painland,"Feb 3, 2020",0 - 20000,0,0,0.0,0,ABOUT THE GAME Play as a hacker who has arrang...,"['English', 'Spanish - Spain']",...,0,0,0,Unusual Games,Unusual Games,"Single-player,Steam Achievements","Adventure,Indie","Indie,Adventure,Nudity,Violent,Sexual Content,...",https://cdn.akamai.steamstatic.com/steam/apps/...,http://cdn.akamai.steamstatic.com/steam/apps/2...


In [63]:
steam_games_clean = steam_games[["AppID","Name","Categories","Genres"]]
steam_games_clean.head()
games.head()

Unnamed: 0,app_id,title,date_release,win,mac,linux,rating,positive_ratio,user_reviews,price_final,price_original,discount,steam_deck
0,13500,Prince of Persia: Warrior Within™,2008-11-21,True,False,False,Very Positive,84,2199,9.99,9.99,0.0,True
1,22364,BRINK: Agents of Change,2011-08-03,True,False,False,Positive,85,21,2.99,2.99,0.0,True
2,113020,Monaco: What's Yours Is Mine,2013-04-24,True,True,True,Very Positive,92,3722,14.99,14.99,0.0,True
3,226560,Escape Dead Island,2014-11-18,True,False,False,Mixed,61,873,14.99,14.99,0.0,True
4,249050,Dungeon of the ENDLESS™,2014-10-27,True,True,False,Very Positive,88,8784,11.99,11.99,0.0,True


In [64]:
# games = pd.merge(games,steam_games_clean,left_on="app_id", right_on="AppID")

# games = games.drop(columns=["AppID","Name"])
# games.head()

# Popularity Based Filtering:

In [65]:
sorted_df= games.sort_values('positive_ratio', ascending=False)
sorted_df.drop(columns=['date_release', 'win', 'mac', 'linux', 'price_final', 'price_original', 'discount', 'steam_deck'], inplace=True)

top_games= sorted_df[sorted_df['user_reviews'] > 300].head(250)
top_games= top_games.merge(games, on='app_id')

top_games.drop(columns=['title_y', 'positive_ratio_y', 'user_reviews_y'], inplace=True)

top_games= top_games.rename(columns={
    'title_x': 'title',
    'user_reviews_x': 'user_reviews',
    'positive_ratio_x': 'positive_ratio'
})
top_games['title'].head(5)

0              Endless Monday: Dreams and Deadlines
1                          South Scrimshaw Part One
2                                      祈風 Inorikaze
3    Aokana - Four Rhythms Across the Blue - EXTRA2
4                                  Ever Seen A Cat?
Name: title, dtype: object

# Collaborative Filtering

In [66]:
merged_data= games.merge(recommendation, on='app_id')
merged_data.drop(columns=['helpful', 'funny','date', 'hours', 'review_id'], inplace=True)
merged_data.sample(1)

Unnamed: 0,app_id,title,date_release,win,mac,linux,rating,positive_ratio,user_reviews,price_final,price_original,discount,steam_deck,is_recommended,user_id
34050559,424030,War of Rights,2018-12-03,True,False,False,Very Positive,87,8897,30.0,0.0,0.0,True,True,7697807


In [67]:
user_count= merged_data['user_id'].value_counts()
user_count

user_id
11764552    6045
5112758     4152
11656130    3840
5669734     3479
11553593    3392
            ... 
2123464        1
10957644       1
3637291        1
4338591        1
5429380        1
Name: count, Length: 13781059, dtype: int64

In [68]:
game_count= merged_data['title'].value_counts()
game_count

title
Team Fortress 2                     319492
Rust                                270684
Cyberpunk 2077                      226414
Counter-Strike: Global Offensive    219737
Dota 2                              216914
                                     ...  
Inner Tao                                1
MITTIN: Keyboard/Mouse Version           1
Life is Paine                            1
Scroll Set                               1
Another World Quest                      1
Name: count, Length: 37518, dtype: int64

## Filtering Games with user count more than 400 & game_count more than 250

In [69]:
filtered_data= merged_data[merged_data['user_id'].isin(user_count[user_count >=400].index) & 
merged_data['title'].isin(game_count[game_count >=250].index)]

filtered_data

Unnamed: 0,app_id,title,date_release,win,mac,linux,rating,positive_ratio,user_reviews,price_final,price_original,discount,steam_deck,is_recommended,user_id
75,13500,Prince of Persia: Warrior Within™,2008-11-21,True,False,False,Very Positive,84,2199,9.99,9.99,0.0,True,True,748899
154,13500,Prince of Persia: Warrior Within™,2008-11-21,True,False,False,Very Positive,84,2199,9.99,9.99,0.0,True,True,1988901
215,13500,Prince of Persia: Warrior Within™,2008-11-21,True,False,False,Very Positive,84,2199,9.99,9.99,0.0,True,False,4266016
318,13500,Prince of Persia: Warrior Within™,2008-11-21,True,False,False,Very Positive,84,2199,9.99,9.99,0.0,True,True,14066411
357,13500,Prince of Persia: Warrior Within™,2008-11-21,True,False,False,Very Positive,84,2199,9.99,9.99,0.0,True,True,11283041
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
41153014,1361510,Teenage Mutant Ninja Turtles: Shredder's Revenge,2022-06-16,True,False,True,Very Positive,94,10250,17.00,0.00,0.0,True,True,7889648
41154064,1361510,Teenage Mutant Ninja Turtles: Shredder's Revenge,2022-06-16,True,False,True,Very Positive,94,10250,17.00,0.00,0.0,True,True,12130450
41154307,1361510,Teenage Mutant Ninja Turtles: Shredder's Revenge,2022-06-16,True,False,True,Very Positive,94,10250,17.00,0.00,0.0,True,True,11250992
41154370,1361510,Teenage Mutant Ninja Turtles: Shredder's Revenge,2022-06-16,True,False,True,Very Positive,94,10250,17.00,0.00,0.0,True,True,11766424


## Making a pivot table:

In [70]:
pt= filtered_data.pivot_table(index='title', columns='user_id', values='is_recommended', fill_value=0)
pt.shape

(7750, 414)

In [71]:
from scipy.sparse import csr_matrix
import numpy as np

sparse_user_item = csr_matrix(pt.values)

sparsity = 1 - np.count_nonzero(pt.values) / pt.size
print(f"Sparsity: {sparsity:.2%}")

Sparsity: 95.57%


In [72]:
from sklearn.decomposition import TruncatedSVD

# Dimensionality reduction
svd = TruncatedSVD(n_components=50)
reduced_matrix = svd.fit_transform(pt)

In [73]:
from sklearn.preprocessing import normalize

reduced_matrix = normalize(reduced_matrix)

In [74]:
from sklearn.metrics.pairwise import cosine_similarity

similarity_matrix= cosine_similarity(reduced_matrix)

In [75]:
similarity_matrix.shape

(7750, 7750)

In [76]:
def recommend_based_on_collaborative(game):

    index= np.where(pt.index == game)[0][0]
    sim_games= sorted(list(enumerate(similarity_matrix[index])), key=lambda x:x[1], reverse=True)[1:6]

    suggestions= []
    
    for i in sim_games:
        suggestions.append(pt.index[i[0]])

    return suggestions

In [77]:
recommend_based_on_collaborative("Just Cause™ 3")

['Borderlands 2',
 'Hitman: Absolution™',
 'Tomb Raider',
 'Saints Row IV: Re-Elected',
 'Middle-earth™: Shadow of Mordor™']

# Content Based Filtering:

In [78]:
!pip install langdetect



In [79]:
import pandas as pd
from langdetect import detect, DetectorFactory
import warnings

# Suppress warnings that might occur if a language cannot be reliably detected
warnings.filterwarnings("ignore", category=UserWarning)

# Optional: To ensure consistent results across runs for langdetect
DetectorFactory.seed = 0

In [80]:
def get_language(text):
    try:
        return detect(text)
    except:  # noqa: E722
        return None

In [81]:
games['detected_language'] = games['title'].apply(get_language)

print("\nDataFrame with detected languages:")
games.head()


DataFrame with detected languages:


Unnamed: 0,app_id,title,date_release,win,mac,linux,rating,positive_ratio,user_reviews,price_final,price_original,discount,steam_deck,detected_language
0,13500,Prince of Persia: Warrior Within™,2008-11-21,True,False,False,Very Positive,84,2199,9.99,9.99,0.0,True,en
1,22364,BRINK: Agents of Change,2011-08-03,True,False,False,Positive,85,21,2.99,2.99,0.0,True,en
2,113020,Monaco: What's Yours Is Mine,2013-04-24,True,True,True,Very Positive,92,3722,14.99,14.99,0.0,True,en
3,226560,Escape Dead Island,2014-11-18,True,False,False,Mixed,61,873,14.99,14.99,0.0,True,es
4,249050,Dungeon of the ENDLESS™,2014-10-27,True,True,False,Very Positive,88,8784,11.99,11.99,0.0,True,en


In [82]:
games_data_en= games[games['detected_language'] == 'en'].copy()

In [83]:
from sklearn.feature_extraction.text import CountVectorizer

cv= CountVectorizer(max_features=5000, stop_words='english')
vector= cv.fit_transform(games_data_en['title'])

vector

<Compressed Sparse Row sparse matrix of dtype 'int64'
	with 80360 stored elements and shape (28813, 5000)>

In [84]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import OneHotEncoder
from scipy.sparse import hstack, csr_matrix

games_data_en['date_release']= pd.to_datetime(games_data_en['date_release'])
games_data_en['release_year']= games_data_en['date_release'].dt.year


#Scaling numerical columns
#positive_ratio, price_final, release_year
scaler= MinMaxScaler()
games_data_en['release_year']= scaler.fit_transform(games_data_en[['release_year']])
games_data_en['positive_ratio']= scaler.fit_transform(games_data_en[['positive_ratio']])
games_data_en['price_final']= scaler.fit_transform(games_data_en[['price_final']])

In [85]:
scaled_features_sparse= csr_matrix(games_data_en[['positive_ratio', 'price_final', 'release_year']])

In [86]:
from sklearn.preprocessing import OrdinalEncoder

ratings_categories= [
    'Overwhelmingly Negative',
    'Very Negative',
    'Mostly Negative',
    'Negative',
    'Mixed',
    'Positive',
    'Mostly Positive',
    'Very Positive',
    'Overwhelmingly Positive'
]

#win, max, linux
oe= OrdinalEncoder(categories= [ratings_categories])
games_data_en.loc[:, 'rating'] = oe.fit_transform(games_data_en[['rating']])

In [87]:
ratings_encoded= games_data_en['rating']

In [88]:
ratings_encoded_array= ratings_encoded.values
ratings_encoded_reshaped= ratings_encoded_array.reshape(-1, 1)

In [89]:
ratings_encoded_reshaped.shape

(28813, 1)

In [90]:
games_data_en['win']= games_data_en['win'].astype(int)
games_data_en['mac']= games_data_en['mac'].astype(int)
games_data_en['linux']= games_data_en['linux'].astype(int)

binary_features_matrix= csr_matrix(games_data_en[['win', 'mac', 'linux']])

In [91]:
print(f"Shape of 'vector': {vector.shape}")
print(f"Shape of 'ratings_encoded': {ratings_encoded.shape}")
print(f"Shape of 'scaled_features_sparse': {scaled_features_sparse.shape}")
print(f"Shape of 'binary_features_matrix': {binary_features_matrix.shape}")

Shape of 'vector': (28813, 5000)
Shape of 'ratings_encoded': (28813,)
Shape of 'scaled_features_sparse': (28813, 3)
Shape of 'binary_features_matrix': (28813, 3)


In [92]:
combined_features= hstack([
    vector,
    csr_matrix(ratings_encoded_reshaped.astype(float)),
    scaled_features_sparse,
    binary_features_matrix
])

In [93]:
from sklearn.metrics.pairwise import cosine_similarity

similarity_matrix= cosine_similarity(combined_features)

In [94]:
games_data_en = games_data_en.drop(
    columns=['date_release', 'user_reviews', 'price_original', 
             'discount', 'steam_deck', 'detected_language']
)

In [95]:
games_data_en[games_data_en["title"]== "Monaco: What's Yours Is Mine"]

Unnamed: 0,app_id,title,win,mac,linux,rating,positive_ratio,price_final,release_year
2,113020,Monaco: What's Yours Is Mine,1,1,1,7.0,0.92,0.049968,0.615385


In [96]:
final_df = games_data_en.set_index('app_id', drop=True)

In [97]:
similarity_df= pd.DataFrame(similarity_matrix, index= final_df.index, columns= final_df.index)

In [98]:
title_df= games_data_en[['app_id', 'title']]
title_df = title_df.set_index('app_id', drop=True)

In [101]:
title_df.head()

Unnamed: 0_level_0,title
app_id,Unnamed: 1_level_1
13500,Prince of Persia: Warrior Within™
22364,BRINK: Agents of Change
113020,Monaco: What's Yours Is Mine
249050,Dungeon of the ENDLESS™
271850,Men of War: Assault Squad 2 - Deluxe Edition u...


In [99]:
def recommend_based_on_content(title):
    # Check if the title exists in the dataframe
    matching_games = title_df[title_df['title'] == title]
    
    if len(matching_games) == 0:
        print(f"Game '{title}' not found in the dataset.")
        print("Available games (first 10):")
        print(title_df['title'].head(10).tolist())
        return []
    
    game_id = matching_games.index[0]
    
    sorted_scores = similarity_df.loc[game_id].sort_values(ascending=False)
    recommendations = sorted_scores[sorted_scores.index != game_id].head(5)
    
    suggestions = []
    
    for i in recommendations.index:
        suggestions.append(title_df[title_df.index == i]['title'].values[0])
    
    return suggestions

In [103]:
recommend_based_on_content('BRINK: Agents of Change')

['The Brink 尘与土',
 'Swipecart',
 'Mahjongg Investigations: Under Suspicion',
 'ChargeShot',
 'Megatect']