## Install Library dan Import Library yang diperlukan

In [38]:
# Install library berikut jika belum ada
%pip install pandas
%pip install numpy
%pip install scipy
%pip install implicit
%pip install scikit-learn

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [39]:
import pandas as pd
import numpy as np
import scipy.sparse as sparse
import implicit as ip
import re
import os
import json
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [40]:
df = pd.read_csv('Data/steam_users_purchase_play.csv')

## Collaborative Filtering

### Baca file csv dan analisa data

In [41]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 88338 entries, 0 to 88337
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   user_id    88338 non-null  int64  
 1   game_name  88338 non-null  object 
 2   hours      88338 non-null  float64
 3   purchase   88338 non-null  int64  
 4   play       88338 non-null  int64  
 5   ID         88338 non-null  object 
dtypes: float64(1), int64(3), object(2)
memory usage: 4.0+ MB


Data Bersih, tidak ada NULL value, data yang diperlukan juga sudah dalam data numerik

In [42]:
df.head()

Unnamed: 0,user_id,game_name,hours,purchase,play,ID
0,151603712,Fallout 4,87.0,1,1,fallout4
1,151603712,Spore,14.9,1,1,spore
2,151603712,Fallout New Vegas,12.1,1,1,falloutnewvegas
3,151603712,Left 4 Dead 2,8.9,1,1,left4dead2
4,151603712,HuniePop,8.5,1,1,huniepop


Purchase dan play merupakan nilai 1 dan 0 yang menyatakan apakah user memainkan dan membeli game, nilainya hampir pasti 1, jadi kurang cocok untuk dijadikan measure untuk rekomendasi. Total playtime lebih cocok untuk dijadikan item rekomendasi

### Processing dan Membuat Lookup Table

In [43]:
# simpan nama kolom untuk memudahkan dan memperjelas kode
users, items, measure = "user_id", "game_name", "hours"
internal_users, internal_items, internal_measures = "user_internal", "item_internal", "measure_internal"

In [44]:
# untuk internal konversikan data user dan item menjadi ID numerik
df[internal_users] = df[users].astype("category").cat.codes
df[internal_items] = df[items].astype("category").cat.codes

In [45]:
df.head()

Unnamed: 0,user_id,game_name,hours,purchase,play,ID,user_internal,item_internal
0,151603712,Fallout 4,87.0,1,1,fallout4,5057,950
1,151603712,Spore,14.9,1,1,spore,5057,2341
2,151603712,Fallout New Vegas,12.1,1,1,falloutnewvegas,5057,951
3,151603712,Left 4 Dead 2,8.9,1,1,left4dead2,5057,1452
4,151603712,HuniePop,8.5,1,1,huniepop,5057,1260


In [46]:
# buat lookup table untuk user
user_lookup_table = df[[internal_users, users]].drop_duplicates() # sekaligus hapus data duplikat
user_lookup_table[internal_users] = user_lookup_table[internal_users].astype(str) # jadikan internal id sebagai string
user_lookup_table[users] = user_lookup_table[users].astype(int).astype(str) # hilangkan koma akibat float, dan jadikan sebagai string
user_lookup_table.head()

Unnamed: 0,user_internal,user_id
0,5057,151603712
28,6895,187131847
29,1518,59945701
62,1304,53875128
440,4580,140954425


In [47]:
# buat lookup table untuk game item
game_lookup_table = df[[internal_items, items]].drop_duplicates() # sekaligus hapus data duplikat
game_lookup_table[internal_items] = game_lookup_table[internal_items].astype(str) # jadikan internal id sebagai string
game_lookup_table.head()


Unnamed: 0,item_internal,game_name
0,950,Fallout 4
1,2341,Spore
2,951,Fallout New Vegas
3,1452,Left 4 Dead 2
4,1260,HuniePop


In [48]:
# rename measure yang digunakan
df.rename(columns = {measure : internal_measures}, inplace = True)
df.head()

Unnamed: 0,user_id,game_name,measure_internal,purchase,play,ID,user_internal,item_internal
0,151603712,Fallout 4,87.0,1,1,fallout4,5057,950
1,151603712,Spore,14.9,1,1,spore,5057,2341
2,151603712,Fallout New Vegas,12.1,1,1,falloutnewvegas,5057,951
3,151603712,Left 4 Dead 2,8.9,1,1,left4dead2,5057,1452
4,151603712,HuniePop,8.5,1,1,huniepop,5057,1260


In [49]:
# data user id dan game telah ada di lookup table, sisakan atribut internal di df
df = df[[internal_users, internal_items, internal_measures]]
df.head()

Unnamed: 0,user_internal,item_internal,measure_internal
0,5057,950,87.0
1,5057,2341,14.9
2,5057,951,12.1
3,5057,1452,8.9
4,5057,1260,8.5


### Membuat model

In [50]:
# membuat csr matrix 
item_user = sparse.csr_matrix((df[internal_measures].astype(float), (df[internal_items], df[internal_users]))) # item-user csr table
user_item = sparse.csr_matrix((df[internal_measures].astype(float), (df[internal_users], df[internal_items]))) # user-item csr table

In [51]:
# membuat model collaborative filtering dengan algoritma ALS
model = ip.als.AlternatingLeastSquares(factors = 20, regularization = 0.1, iterations = 20, random_state = 42)

# confidence
alpha = 15
data_confidence = (user_item * alpha).astype("double")

# fit model
model.fit(data_confidence)

  0%|          | 0/20 [00:00<?, ?it/s]

### Membuat sistem rekomendasi

In [52]:
def similar(item, n_similar):
    res = []
    n_similar += 1
    for it in item:
        item_id = game_lookup_table[internal_items].loc[game_lookup_table[items] == str(it)]
        if item_id.empty:
            item_names = [-1e4] * (n_similar)
        else:
            item_id = item_id.to_string(index = False).strip()
            similar, score = model.similar_items(int(item_id), n_similar)
            item_names = []
            for result_id in similar:
                item_name = game_lookup_table[items].loc[game_lookup_table[internal_items] == str(result_id)].values[0]
                item_names.append(item_name)
                # print(item_name == "Penny Arcade's On the Rain-Slick Precipice of Darkness 3")
        res.append(item_names)
    return pd.DataFrame(res, columns = [items] + list(map(str, range(1, n_similar))))

similar(["Fallout 3 - Game of the Year Edition"], 10).head()

Unnamed: 0,game_name,1,2,3,4,5,6,7,8,9,10
0,Fallout 3 - Game of the Year Edition,YOU DON'T KNOW JACK,Assassin's Creed Revelations,Dragon Age Origins - Ultimate Edition,Star Wars Knights of the Old Republic,BioShock,Penny Arcade's On the Rain-Slick Precipice of ...,The Banner Saga,Fable - The Lost Chapters,FTL Faster Than Light,Kingdoms of Amalur Reckoning


## Content Based Filtering

### Baca file CSV dan analisa data

In [53]:
df2 = pd.read_csv("Data/steam_games.csv")

In [54]:
df2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 40833 entries, 0 to 40832
Data columns (total 20 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   url                       40833 non-null  object 
 1   types                     40831 non-null  object 
 2   name                      40817 non-null  object 
 3   desc_snippet              27612 non-null  object 
 4   recent_reviews            2706 non-null   object 
 5   all_reviews               28470 non-null  object 
 6   release_date              37654 non-null  object 
 7   developer                 40490 non-null  object 
 8   publisher                 35733 non-null  object 
 9   popular_tags              37888 non-null  object 
 10  game_details              40313 non-null  object 
 11  languages                 40797 non-null  object 
 12  achievements              12194 non-null  float64
 13  genre                     40395 non-null  object 
 14  game_d

In [55]:
df2.head()

Unnamed: 0,url,types,name,desc_snippet,recent_reviews,all_reviews,release_date,developer,publisher,popular_tags,game_details,languages,achievements,genre,game_description,mature_content,minimum_requirements,recommended_requirements,original_price,discount_price
0,https://store.steampowered.com/app/379720/DOOM/,app,DOOM,Now includes all three premium DLC packs (Unto...,"Very Positive,(554),- 89% of the 554 user revi...","Very Positive,(42,550),- 92% of the 42,550 use...","May 12, 2016",id Software,"Bethesda Softworks,Bethesda Softworks","FPS,Gore,Action,Demons,Shooter,First-Person,Gr...","Single-player,Multi-player,Co-op,Steam Achieve...","English,French,Italian,German,Spanish - Spain,...",54.0,Action,"About This Game Developed by id software, the...",,"Minimum:,OS:,Windows 7/8.1/10 (64-bit versions...","Recommended:,OS:,Windows 7/8.1/10 (64-bit vers...",$19.99,$14.99
1,https://store.steampowered.com/app/578080/PLAY...,app,PLAYERUNKNOWN'S BATTLEGROUNDS,PLAYERUNKNOWN'S BATTLEGROUNDS is a battle roya...,"Mixed,(6,214),- 49% of the 6,214 user reviews ...","Mixed,(836,608),- 49% of the 836,608 user revi...","Dec 21, 2017",PUBG Corporation,"PUBG Corporation,PUBG Corporation","Survival,Shooter,Multiplayer,Battle Royale,PvP...","Multi-player,Online Multi-Player,Stats","English,Korean,Simplified Chinese,French,Germa...",37.0,"Action,Adventure,Massively Multiplayer",About This Game PLAYERUNKNOWN'S BATTLEGROUND...,Mature Content Description The developers de...,"Minimum:,Requires a 64-bit processor and opera...","Recommended:,Requires a 64-bit processor and o...",$29.99,
2,https://store.steampowered.com/app/637090/BATT...,app,BATTLETECH,Take command of your own mercenary outfit of '...,"Mixed,(166),- 54% of the 166 user reviews in t...","Mostly Positive,(7,030),- 71% of the 7,030 use...","Apr 24, 2018",Harebrained Schemes,"Paradox Interactive,Paradox Interactive","Mechs,Strategy,Turn-Based,Turn-Based Tactics,S...","Single-player,Multi-player,Online Multi-Player...","English,French,German,Russian",128.0,"Action,Adventure,Strategy",About This Game From original BATTLETECH/Mec...,,"Minimum:,Requires a 64-bit processor and opera...","Recommended:,Requires a 64-bit processor and o...",$39.99,
3,https://store.steampowered.com/app/221100/DayZ/,app,DayZ,The post-soviet country of Chernarus is struck...,"Mixed,(932),- 57% of the 932 user reviews in t...","Mixed,(167,115),- 61% of the 167,115 user revi...","Dec 13, 2018",Bohemia Interactive,"Bohemia Interactive,Bohemia Interactive","Survival,Zombies,Open World,Multiplayer,PvP,Ma...","Multi-player,Online Multi-Player,Steam Worksho...","English,French,Italian,German,Spanish - Spain,...",,"Action,Adventure,Massively Multiplayer",About This Game The post-soviet country of Ch...,,"Minimum:,OS:,Windows 7/8.1 64-bit,Processor:,I...","Recommended:,OS:,Windows 10 64-bit,Processor:,...",$44.99,
4,https://store.steampowered.com/app/8500/EVE_On...,app,EVE Online,EVE Online is a community-driven spaceship MMO...,"Mixed,(287),- 54% of the 287 user reviews in t...","Mostly Positive,(11,481),- 74% of the 11,481 u...","May 6, 2003",CCP,"CCP,CCP","Space,Massively Multiplayer,Sci-fi,Sandbox,MMO...","Multi-player,Online Multi-Player,MMO,Co-op,Onl...","English,German,Russian,French",,"Action,Free to Play,Massively Multiplayer,RPG,...",About This Game,,"Minimum:,OS:,Windows 7,Processor:,Intel Dual C...","Recommended:,OS:,Windows 10,Processor:,Intel i...",Free,


Kita menentukan atribut yang relevan untuk rekomendasi adalah
 - genre
 - game_details
 - popular_tags
 - publisher
 - developer
 - all_review (yang ini akan diproses nanti)

### Preprocessing Data

In [56]:
### Preprocessing Data
# Fungsi untuk menghilangkan spasi diantara kata
def remove_spaces(x):
    if isinstance(x, str):
        return x.replace(" ", "")
    else:
        return x

if os.path.exists("Data/steam_games_preprocessed.csv"):
    df_games = pd.read_csv("Data/steam_games_preprocessed.csv")
else:
    # siapkan kolom id, akan dijelaskan untuk apa nanti
    df["ID"] = ""
    df2["ID"] = ""

    # isi game tanpa nama dengan kosong terlebih dahulu
    df2["name"] = df2["name"].fillna("")

    # isi kolom id dengan nama tanpa karakter spesial dan lowercase, ini untuk membuat perbandingan menjadi konsisten
    # kita hanya perlu game yang ada di user, tidak perlu memasukkan game yang tidak pernah dimainkan 

    for i, row in df2.iterrows():
        id = re.sub("[^A-Za-z0-9]+", "", row["name"])
        id = id.lower()
        df2.at[i, "ID"] = id

    for i, row in df.iterrows():
        id = re.sub("[^A-Za-z0-9]+", "", row["game_name"])
        id = id.lower()
        df.at[i, "ID"] = id

    # sekarang hanya ambil data game di df2 yang ada di data user df
    id_game_in_user = df["ID"].unique()
    df_games = df2[df2["ID"].isin(id_game_in_user)]
    id_game_in_games = df_games["ID"].unique()
    df = df[df["ID"].isin(id_game_in_games)]

    # isi data null dengan ""
    df_games.loc[:, "genre"] = df_games["genre"].fillna("")
    df_games.loc[:, "game_details"] = df_games["game_details"].fillna("")
    df_games.loc[:, "popular_tags"] = df_games["popular_tags"].fillna("")
    df_games.loc[:, "publisher"] = df_games["publisher"].fillna("")
    df_games.loc[:, "developer"] = df_games["developer"].fillna("")

    # hapus spasi antar kata
    df_games.loc[:, 'genre'] = df_games['genre'].apply(remove_spaces)
    df_games.loc[:, 'game_details'] = df_games['game_details'].apply(remove_spaces)
    df_games.loc[:, 'popular_tags'] = df_games['popular_tags'].apply(remove_spaces)
    df_games.loc[:, 'publisher'] = df_games['publisher'].apply(remove_spaces)
    df_games.loc[:, 'developer'] = df_games['developer'].apply(remove_spaces)

    # hanya ambil kolom-kolom penting (ada di penjelasan atas)
    df_games = df_games[["ID", "name", "genre", "game_details", "popular_tags", "publisher", "developer"]]

    # membuat kolom gabungan dari berbagai atribut
    df_games["genre_publisher_developer"] = df_games['genre'] + df_games['publisher'] + df_games['developer']
    df_games["genre_popular_tags_developer"] = df_games['genre'] + df_games['popular_tags'] + df_games['developer']
    df_games["genre_popular_tags_game_details"] = df_games['genre'] + df_games['popular_tags'] + df_games['game_details']
    df_games["genre_publisher_developer_game_details"] = df_games['genre'] + df_games['publisher'] + df_games['developer'] + df_games['game_details']


    # hapus duplikat
    df_games.drop_duplicates("name")

    # kosongkan data null
    for col in df_games.columns:
        df_games[col] = df_games[col].fillna("")

    # jadikan file untuk mempercepat runtime berikutnya
    df_games.to_csv("Data/steam_games_preprocessed.csv", index = False)
    df.to_csv("Data/steam_users_purchase_play.csv", index = False)

### Preprocessing untuk kolom review

In [57]:
# Review merupakan faktor penting untuk rekomendasi
df2[["recent_reviews", "all_reviews"]].head()

Unnamed: 0,recent_reviews,all_reviews
0,"Very Positive,(554),- 89% of the 554 user revi...","Very Positive,(42,550),- 92% of the 42,550 use..."
1,"Mixed,(6,214),- 49% of the 6,214 user reviews ...","Mixed,(836,608),- 49% of the 836,608 user revi..."
2,"Mixed,(166),- 54% of the 166 user reviews in t...","Mostly Positive,(7,030),- 71% of the 7,030 use..."
3,"Mixed,(932),- 57% of the 932 user reviews in t...","Mixed,(167,115),- 61% of the 167,115 user revi..."
4,"Mixed,(287),- 54% of the 287 user reviews in t...","Mostly Positive,(11,481),- 74% of the 11,481 u..."


Tetapi terlihat kalau data reviews sangat acak2an dan sulit untuk dilihat

Dalam kasus ini kita akan menggunakan all_reviews sebagai patokan rekomendasi karena kolom recent_reviews terlalu sedikit datanya

In [58]:
df2.head()

Unnamed: 0,url,types,name,desc_snippet,recent_reviews,all_reviews,release_date,developer,publisher,popular_tags,game_details,languages,achievements,genre,game_description,mature_content,minimum_requirements,recommended_requirements,original_price,discount_price
0,https://store.steampowered.com/app/379720/DOOM/,app,DOOM,Now includes all three premium DLC packs (Unto...,"Very Positive,(554),- 89% of the 554 user revi...","Very Positive,(42,550),- 92% of the 42,550 use...","May 12, 2016",id Software,"Bethesda Softworks,Bethesda Softworks","FPS,Gore,Action,Demons,Shooter,First-Person,Gr...","Single-player,Multi-player,Co-op,Steam Achieve...","English,French,Italian,German,Spanish - Spain,...",54.0,Action,"About This Game Developed by id software, the...",,"Minimum:,OS:,Windows 7/8.1/10 (64-bit versions...","Recommended:,OS:,Windows 7/8.1/10 (64-bit vers...",$19.99,$14.99
1,https://store.steampowered.com/app/578080/PLAY...,app,PLAYERUNKNOWN'S BATTLEGROUNDS,PLAYERUNKNOWN'S BATTLEGROUNDS is a battle roya...,"Mixed,(6,214),- 49% of the 6,214 user reviews ...","Mixed,(836,608),- 49% of the 836,608 user revi...","Dec 21, 2017",PUBG Corporation,"PUBG Corporation,PUBG Corporation","Survival,Shooter,Multiplayer,Battle Royale,PvP...","Multi-player,Online Multi-Player,Stats","English,Korean,Simplified Chinese,French,Germa...",37.0,"Action,Adventure,Massively Multiplayer",About This Game PLAYERUNKNOWN'S BATTLEGROUND...,Mature Content Description The developers de...,"Minimum:,Requires a 64-bit processor and opera...","Recommended:,Requires a 64-bit processor and o...",$29.99,
2,https://store.steampowered.com/app/637090/BATT...,app,BATTLETECH,Take command of your own mercenary outfit of '...,"Mixed,(166),- 54% of the 166 user reviews in t...","Mostly Positive,(7,030),- 71% of the 7,030 use...","Apr 24, 2018",Harebrained Schemes,"Paradox Interactive,Paradox Interactive","Mechs,Strategy,Turn-Based,Turn-Based Tactics,S...","Single-player,Multi-player,Online Multi-Player...","English,French,German,Russian",128.0,"Action,Adventure,Strategy",About This Game From original BATTLETECH/Mec...,,"Minimum:,Requires a 64-bit processor and opera...","Recommended:,Requires a 64-bit processor and o...",$39.99,
3,https://store.steampowered.com/app/221100/DayZ/,app,DayZ,The post-soviet country of Chernarus is struck...,"Mixed,(932),- 57% of the 932 user reviews in t...","Mixed,(167,115),- 61% of the 167,115 user revi...","Dec 13, 2018",Bohemia Interactive,"Bohemia Interactive,Bohemia Interactive","Survival,Zombies,Open World,Multiplayer,PvP,Ma...","Multi-player,Online Multi-Player,Steam Worksho...","English,French,Italian,German,Spanish - Spain,...",,"Action,Adventure,Massively Multiplayer",About This Game The post-soviet country of Ch...,,"Minimum:,OS:,Windows 7/8.1 64-bit,Processor:,I...","Recommended:,OS:,Windows 10 64-bit,Processor:,...",$44.99,
4,https://store.steampowered.com/app/8500/EVE_On...,app,EVE Online,EVE Online is a community-driven spaceship MMO...,"Mixed,(287),- 54% of the 287 user reviews in t...","Mostly Positive,(11,481),- 74% of the 11,481 u...","May 6, 2003",CCP,"CCP,CCP","Space,Massively Multiplayer,Sci-fi,Sandbox,MMO...","Multi-player,Online Multi-Player,MMO,Co-op,Onl...","English,German,Russian,French",,"Action,Free to Play,Massively Multiplayer,RPG,...",About This Game,,"Minimum:,OS:,Windows 7,Processor:,Intel Dual C...","Recommended:,OS:,Windows 10,Processor:,Intel i...",Free,


In [59]:
# Buat kolom persentase review dan kualifikasi
if os.path.exists("Data/steam_user_review.csv"):
    df_review = pd.read_csv("Data/steam_user_review.csv")
else:
    for i, row in df2.iterrows():
        if type(row["all_reviews"]) == str:
            x = re.findall(r"- [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]*%", str(row["all_reviews"]))
            if len(x) != 0:
                df2.at[i, "percentage_positive_review"] = x[0].translate({ord(i) : None for i in "- %"})
            
            review = row["all_reviews"].split(",")
            if "user reviews" in review[0]:
                df2.at[i, "review_qualification"] = ""
            else:
                df2.at[i, "review_qualification"] = review[0]
            
    df_review = df2[["name", "percentage_positive_review", "review_qualification", "all_reviews"]]
    df_review.to_csv("Data/steam_user_review.csv", index = False)
    

### Membuat model content based

In [60]:
# Data yang sudah dipreprocess tadi
df_games = pd.read_csv("Data/steam_games_preprocessed.csv")
df_users = pd.read_csv("Data/steam_users_purchase_play.csv")
df_reviews = pd.read_csv("Data/steam_user_review.csv")

In [61]:
# Putar balik index pada df_games
indices = pd.Series(df_games.index, index = df_games["name"]).drop_duplicates()
print(indices)

name
DayZ                                     0
EVE Online                               1
TERA                                     2
Stonehearth                              3
Call of Duty®: Black Ops                 4
                                      ... 
Black Ink                             3031
The Deer                              3032
Tank Operations: European Campaign    3033
King-Dom                              3034
GRID                                  3035
Length: 3036, dtype: int64


In [62]:
# mengambil list game yang ada
game_list = df_games["name"].unique()
print(game_list)

['DayZ' 'EVE Online' 'TERA' ... 'Tank Operations: European Campaign'
 'King-Dom' 'GRID']


In [63]:
def recommend(title, sim, n):
    # kalau title gak ada
    if title not in game_list:
        print("Game not found")
        return []
    
    # Ambil index dari game
    index = indices[title].values


    if type(index) is pd.Series:
        print("Multiple Games of the Same Name")
        return []


    # similarity score
    score = list(enumerate(sim[index][0]))

    # sort score
    score = sorted(score, key = lambda x : x[1], reverse = True)

    # ambil n buah game dengan nilai score terbesar
    score = score[0 : n + 1]

    # ambil index dari game dalam similarity score
    game_indices = [i[0] for i in score]

    # return list game di dalam game_indices
    games = df_games["name"].iloc[game_indices].tolist()
    for game in games:
        if game == title:
            games.remove(game)

    return games

In [64]:
def recommend_by_rating(n, game_list, game_name):
    # ga ada game yang diinput
    if len(game_list) == 0 or type(game_list) is not list:
        return pd.DataFrame(data = [[game_name[0]] + [""] * n], columns = ["game_name"] + list(map(str, range(1, n + 1))))

    reviews = df_reviews.loc[df_review["name"].isin(game_list)]
    reviews = reviews.loc[~reviews["name"].isin(game_name)]
    reviews = reviews.sort_values(by="percentage_positive_review", ascending=False)

    if len(reviews.index) < n:
        return pd.DataFrame(data = [[game_name[0]] + reviews["name"].tolist() + [""] * (n - len(reviews.index))], columns = ["game_name"] + list(map(str, range(1, n + 1))))
    else:
        return pd.DataFrame(data = [[game_name[0]] + reviews["name"].tolist()[0:n]], columns = ["game_name"] + list(map(str, range(1, n + 1))))

In [65]:
def recommend_init(n, game_name):
    # siapkan dataframe rekomendasi
    recommendation = pd.DataFrame(columns = ["game_name"] + list(map(str, range(1, n + 1))))

    # kita akan menggunakan gabungan dari genre, publisher, dan developer untuk patokan rekomendasi
    # memastikan tidak ada data null
    df_games["genre_publisher_developer"] = df_games["genre_publisher_developer"].fillna("")

    # buat cosine similarity dengan kolom tsb
    count = CountVectorizer(stop_words = "english")
    count_matrix = count.fit_transform(df_games["genre_publisher_developer"])
    cosine_similarity_matrix = cosine_similarity(count_matrix, count_matrix)

    suggestions = recommend(game_name, cosine_similarity_matrix, n)
    recommendation = pd.concat([recommendation, recommend_by_rating(n, suggestions, game_name)])

    return recommendation

In [66]:
recommend_init(10, ["Counter-Strike"])

Unnamed: 0,game_name,1,2,3,4,5,6,7,8,9,10
0,Counter-Strike,Portal,Left 4 Dead 2,Half-Life 2,Counter-Strike: Source,Half-Life,Half-Life 2: Episode Two,Left 4 Dead,Half-Life 2: Episode One,Team Fortress Classic,Half-Life: Source


## Hybrid Recommender System

### Gabungkan model content-based dan collaborative

In [67]:
def get_content_based_name(game_name):
    id = df_users[df_users["game_name"] == game_name].head(1)["ID"].values[0]
    content_name = df_games[df_games["ID"] == str(id)].head(1)["name"].values[0]
    return content_name

In [68]:
def get_collaborative_name(game_name):
    id = df_games[df_games["name"] == game_name].head(1)["ID"].values[0]
    collab_name = df_users[df_users["ID"] == str(id)].head(1)["game_name"].values[0]
    return collab_name

In [69]:
def hybrid_recommender(n, game_name):
    collaborative_name = get_collaborative_name(game_name)
    rec = recommend_init(int(0.7 * n), [game_name])
    cur_len = int(0.7 * n)
    while cur_len < n:
        df_collaborative = similar([collaborative_name], n)
        for col in df_collaborative.columns:
            if col != "game_name" and cur_len < n:
                flag = True
                for col2 in rec.columns:
                    if col2 != "game_name":
                        if get_content_based_name(df_collaborative[col].values[0]) == rec[col2].values[0]:
                            flag = False
                            break
                if flag:
                    cur_len += 1
                    rec[cur_len] = get_content_based_name(df_collaborative[col].values[0])
             
    return rec

hybrid_recommender(10, "Counter-Strike")

Unnamed: 0,game_name,1,2,3,4,5,6,7,8,9,10
0,Counter-Strike,Portal,Left 4 Dead 2,Half-Life 2,Counter-Strike: Source,Half-Life,Left 4 Dead,Half-Life: Source,Total War™: ROME II - Emperor Edition,Sid Meier's Civilization® V,Half-Life 2: Deathmatch


Jelaskan topik <br>
jelaskan metode yang dipakai <br>
demoin cara kerjanya <br>

## Setup Flask Server For Deployment

### Create list of games for game searching

In [70]:
if os.path.exists("static/data/game_list.json"):
    pass
else:
    game_list_data = df_games["name"].unique()
    df_game_original = pd.read_csv("Data/steam_games.csv")
    df_game_image = []
    for game in game_list_data:
        game_link = df_game_original[df_game_original["name"] == game]["url"].values[0]
        game_link_content = game_link.split("/")
        steam_game_id = game_link_content[-3]
        collaborative_name = get_collaborative_name(game)
        image_link = f"https://shared.fastly.steamstatic.com/store_item_assets/steam/apps/{steam_game_id}/header.jpg"
        df_game_image.append({
            "game_name" : game,
            "game_image_link" : image_link,
            "game_link" : game_link,
        })
    with open("static/data/game_list.json", "w") as f:
        json.dump(df_game_image, f)

### Imports

In [71]:
%pip install Flask

Note: you may need to restart the kernel to use updated packages.


In [72]:
from flask import Flask, render_template, request, redirect, url_for

### App

In [None]:
app = Flask(__name__)

@app.route("/game_details")
def game_details():
    name = request.args["name"]
    image_link = request.args["image_link"]
    genre = request.args["genre"]
    rating = request.args["rating"]
    game_link = request.args["game_link"]
    return render_template("game_details.html", name=name, image_link=image_link, genre=genre, rating=rating, game_link=game_link)

@app.route("/recommendation")
def recommendation():
    game_name = request.args["game_name"]
    recommendations = hybrid_recommender(10, game_name)
    data = []
    df_game_original = pd.read_csv("Data/steam_games.csv")
    for col in recommendations.columns:
        if col != 'game_name':
            name = recommendations[col].values[0]
            game = df_game_original[df_game_original["name"] == name]
            game_link = df_game_original[df_game_original["name"] == name]["url"].values[0]
            game_link_content = game_link.split("/")
            steam_game_id = game_link_content[-3]
            image_link = f"https://shared.fastly.steamstatic.com/store_item_assets/steam/apps/{steam_game_id}/header.jpg"
            data.append({
                "game_name" : name,
                "game_data" : game,
                "image_link" : image_link
            })
        
    return render_template("recommendation.html", game_name = game_name, recommendations = data)

@app.route("/", methods=["GET", "POST"])
def index():
    if request.method == "POST":
        game_name = request.form.get("game-name")
        if game_name in game_list:
            return redirect(url_for("recommendation", game_name = game_name))
        else:
            return render_template("index.html", error="Game not Found")
    return render_template("index.html", error=False)

In [None]:
if __name__ == "__main__":
    app.run(debug = True, use_reloader = False)

 * Serving Flask app '__main__'
 * Debug mode: on


 * Running on http://127.0.0.1:5000
Press CTRL+C to quit
127.0.0.1 - - [14/Mar/2025 11:33:50] "GET /game_details?recommendations={'game_name':+'Axiom+Verge+Original+Soundtrack',+'game_data':++++++++++++++++++++++++++++++++++++++++++++++++++++++url+types++\\%0A14540++https://store.steampowered.com/app/368920/Axio...+++app+++%0A%0A++++++++++++++++++++++++++++++++++name+desc_snippet+recent_reviews++\\%0A14540++Axiom+Verge+Original+Soundtrack++++++++++NaN++++++++++++NaN+++%0A%0A+++++++++++++++++++++++++++++++++++++++++++++all_reviews++release_date++\\%0A14540++Positive,(11),-+100%25+of+the+11+user+reviews+fo...++May+14,+2015+++%0A%0A+++++++++++++++++++developer+publisher++++++++++++popular_tags++\\%0A14540++Thomas+Happ+Games+LLC+++++++NaN++Action,Adventure,Indie+++%0A%0A++++++++++++++++++++++++++++++++++++++++++++game_details++\\%0A14540++Single-player,Downloadable+Content,Steam+Achie...+++%0A%0A+++++++++++++++++++++++++++++++++++++++++++++++languages++achievements++\\%0A14540++English,Fre