In [2]:
import pandas as pd
import ast
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.decomposition import TruncatedSVD

In [24]:
path = 'data/csv/df_games_cl.csv'

df = pd.read_csv(path)

In [25]:
df_model = df[['id', 'app_name']]
df_model

Unnamed: 0,id,app_name
0,761140,Lost Summoner Kitty
1,643980,Ironbound
2,670290,Real Pool 3D - Poolians
3,767400,弹炸人2222
4,773570,Log Challenge
...,...,...
32127,773640,Colony On Mars
32128,733530,LOGistICAL: South Africa
32129,610660,Russian Roads
32130,658870,EXIT 2 - Directions


In [26]:
df_model.to_csv('df_model.csv', index=False)

In [3]:
selected_cols = ['app_name', 'genres', 'tags', 'specs', 'developer', 'price', 'early_access']
df = df[selected_cols]
df

Unnamed: 0,app_name,genres,tags,specs,developer,price,early_access
0,Lost Summoner Kitty,"['Action', 'Casual', 'Indie', 'Simulation', 'S...","['Strategy', 'Action', 'Indie', 'Casual', 'Sim...",['Single-player'],Kotoshiro,4.99,False
1,Ironbound,"['Free to Play', 'Indie', 'RPG', 'Strategy']","['Free to Play', 'Strategy', 'Indie', 'RPG', '...","['Single-player', 'Multi-player', 'Online Mult...",Secret Level SRL,0.00,False
2,Real Pool 3D - Poolians,"['Casual', 'Free to Play', 'Indie', 'Simulatio...","['Free to Play', 'Simulation', 'Sports', 'Casu...","['Single-player', 'Multi-player', 'Online Mult...",Poolians.com,0.00,False
3,弹炸人2222,"['Action', 'Adventure', 'Casual']","['Action', 'Adventure', 'Casual']",['Single-player'],彼岸领域,0.99,False
4,Log Challenge,No data,"['Action', 'Indie', 'Casual', 'Sports']","['Single-player', 'Full controller support', '...",No data,2.99,False
...,...,...,...,...,...,...,...
32127,Colony On Mars,"['Casual', 'Indie', 'Simulation', 'Strategy']","['Strategy', 'Indie', 'Casual', 'Simulation']","['Single-player', 'Steam Achievements']","Nikita ""Ghost_RUS""",1.99,False
32128,LOGistICAL: South Africa,"['Casual', 'Indie', 'Strategy']","['Strategy', 'Indie', 'Casual']","['Single-player', 'Steam Achievements', 'Steam...",Sacada,4.99,False
32129,Russian Roads,"['Indie', 'Racing', 'Simulation']","['Indie', 'Simulation', 'Racing']","['Single-player', 'Steam Achievements', 'Steam...",Laush Dmitriy Sergeevich,1.99,False
32130,EXIT 2 - Directions,"['Casual', 'Indie']","['Indie', 'Casual', 'Puzzle', 'Singleplayer', ...","['Single-player', 'Steam Achievements', 'Steam...","xropi,stev3ns",4.99,False


In [4]:
df['genres'] = df['genres'].str.replace("'", "").str.strip('[]').str.split(',').apply(lambda x: ' '.join(x))
df['tags'] = df['tags'].str.replace("'", "").str.strip('[]').str.split(',').apply(lambda x: ' '.join(x))
df['specs'] = df['specs'].str.replace("'", "").str.strip('[]').str.split(',').apply(lambda x: ' '.join(x))
df['early_access'] = df['early_access'].astype(int)
df

Unnamed: 0,app_name,genres,tags,specs,developer,price,early_access
0,Lost Summoner Kitty,Action Casual Indie Simulation Strategy,Strategy Action Indie Casual Simulation,Single-player,Kotoshiro,4.99,0
1,Ironbound,Free to Play Indie RPG Strategy,Free to Play Strategy Indie RPG Card Game ...,Single-player Multi-player Online Multi-Play...,Secret Level SRL,0.00,0
2,Real Pool 3D - Poolians,Casual Free to Play Indie Simulation Sports,Free to Play Simulation Sports Casual Indi...,Single-player Multi-player Online Multi-Play...,Poolians.com,0.00,0
3,弹炸人2222,Action Adventure Casual,Action Adventure Casual,Single-player,彼岸领域,0.99,0
4,Log Challenge,No data,Action Indie Casual Sports,Single-player Full controller support HTC Vi...,No data,2.99,0
...,...,...,...,...,...,...,...
32127,Colony On Mars,Casual Indie Simulation Strategy,Strategy Indie Casual Simulation,Single-player Steam Achievements,"Nikita ""Ghost_RUS""",1.99,0
32128,LOGistICAL: South Africa,Casual Indie Strategy,Strategy Indie Casual,Single-player Steam Achievements Steam Cloud...,Sacada,4.99,0
32129,Russian Roads,Indie Racing Simulation,Indie Simulation Racing,Single-player Steam Achievements Steam Tradi...,Laush Dmitriy Sergeevich,1.99,0
32130,EXIT 2 - Directions,Casual Indie,Indie Casual Puzzle Singleplayer Atmospher...,Single-player Steam Achievements Steam Cloud,"xropi,stev3ns",4.99,0


In [5]:
tfidf_vectorizer = TfidfVectorizer(stop_words='english')

In [6]:
tfidf_appname = tfidf_vectorizer.fit_transform(df['app_name'])
tfidf_appname = tfidf_appname.toarray()

tfidf_genres = tfidf_vectorizer.fit_transform(df['genres'])
tfidf_genres = tfidf_genres.toarray()

tfidf_tags = tfidf_vectorizer.fit_transform(df['tags'])
tfidf_tags = tfidf_tags.toarray()

tfidf_specs = tfidf_vectorizer.fit_transform(df['specs'])
tfidf_specs = tfidf_specs.toarray()

tfidf_developer = tfidf_vectorizer.fit_transform(df['developer'])
tfidf_developer = tfidf_developer.toarray()

numerical_ft = df[['price', 'early_access']].values

In [7]:
textual_ft = np.hstack((tfidf_appname, tfidf_genres, tfidf_tags, tfidf_specs, tfidf_developer))
textual_ft

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [8]:
textual_ft.shape

(32132, 31266)

In [9]:
svd = TruncatedSVD(n_components=5, random_state=42)
reduced_features = svd.fit_transform(textual_ft)

# Check the shape of the reduced feature matrix
print(reduced_features.shape)

(32132, 5)


In [10]:
combined_ft = np.hstack((reduced_features, numerical_ft))
combined_ft.shape

(32132, 7)

In [11]:
cosine_sim = cosine_similarity(combined_ft, combined_ft)

In [12]:
cosine_sim.shape

(32132, 32132)

In [12]:
indices = np.argsort(-cosine_sim, axis=1)[:, 1:6]

In [41]:
cosine_sim.shape

(32132, 5)

In [13]:
indices[32103]

array([32024, 32011, 30821, 32114,   222], dtype=int64)

In [17]:
np.save('cosine_sim.npy', indices)

In [21]:
idx = df[df['app_name'] == 'Counter-Strike'].index[0]
idx

32103

In [60]:
path = 'data/ml-model/df_model.csv'

df = pd.read_csv(path)
df

Unnamed: 0,id,app_name
0,761140,Lost Summoner Kitty
1,643980,Ironbound
2,670290,Real Pool 3D - Poolians
3,767400,弹炸人2222
4,773570,Log Challenge
...,...,...
32127,773640,Colony On Mars
32128,733530,LOGistICAL: South Africa
32129,610660,Russian Roads
32130,658870,EXIT 2 - Directions


In [37]:
df[df['app_name'] == 'Counter-Stri']

Unnamed: 0,id,app_name
24120,427520,Factorio


In [61]:
def game_recommendation(id: int):

    path = 'data/ml-model/cosine_sim.npy'
    cosine_sim = np.load(path)

    idx = df[df['id'] == id].index[0]
    rec_games = df['app_name'].iloc[cosine_sim[idx]]

    info = {'recommendations': None}
    info['recommendations'] = list(rec_games)

    return info

In [63]:
game_recommendation(10)

{'recommendations': ['Half-Life Deathmatch: Source',
  'Zombie Panic',
  'Cannon Fodder 3',
  'Counter-Strike: Condition Zero',
  'Zombie Master']}