In [1]:
from docarray import BaseDoc
from docarray.typing import NdArray

class ToyDoc(BaseDoc):
  text: str = ''
  embedding: NdArray[128]

In [None]:
from docarray import DocList
import numpy as np
from vectordb import InMemoryExactNNVectorDB, HNSWVectorDB

# Specify your workspace path
db = InMemoryExactNNVectorDB[ToyDoc](workspace='./workspace_path')

# Index a list of documents with random embeddings
doc_list = [ToyDoc(text=f'toy doc {i}', embedding=np.random.rand(128)) for i in range(1000)]
db.index(inputs=DocList[ToyDoc](doc_list))

In [4]:
# Perform a search query
query = ToyDoc(text='query', embedding=np.random.rand(128))
results = db.search(inputs=DocList[ToyDoc]([query]), limit=1)

# Print out the matches
for m in results[0].matches:
  print(m)
  m.e

[1;35mToyDoc[0m[1m([0m
    [33mid[0m=[32m'f7529bd8d0dfb267df1421be52f26814'[0m,
    [33mtext[0m=[32m'toy doc 697'[0m,
    [33membedding[0m=[1;35mNdArray[0m[1m([0m[1m[[0m[1;36m0.32330185[0m, [1;36m0.89269434[0m, [1;36m0.23539347[0m, [1;36m0.65961198[0m, [1;36m0.78324998[0m,
         [1;36m0.70321443[0m, [1;36m0.79247765[0m, [1;36m0.8529012[0m , [1;36m0.15774017[0m, [1;36m0.77760242[0m,
         [1;36m0.04551047[0m, [1;36m0.59361679[0m, [1;36m0.29219951[0m, [1;36m0.88514965[0m, [1;36m0.50778523[0m,
         [1;36m0.9627889[0m , [1;36m0.38824495[0m, [1;36m0.33033576[0m, [1;36m0.1830116[0m , [1;36m0.54596467[0m,
         [1;36m0.87485307[0m, [1;36m0.66322805[0m, [1;36m0.88585726[0m, [1;36m0.14107379[0m, [1;36m0.47982615[0m,
         [1;36m0.10457807[0m, [1;36m0.35763992[0m, [1;36m0.47532271[0m, [1;36m0.45497114[0m, [1;36m0.07295483[0m,
         [1;36m0.34520915[0m, [1;36m0.8933615[0m , [1;36m0.46144065[0m

In [5]:
import pandas as pd
import numpy as np
from pathlib import Path

dataset_path = Path("../data/processed") / "games_with_vectors.pickle"

df = pd.read_pickle(dataset_path)

In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 85103 entries, 0 to 85102
Data columns (total 41 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   AppID                       85103 non-null  int64  
 1   Name                        85097 non-null  object 
 2   Release date                85103 non-null  object 
 3   Estimated owners            85103 non-null  object 
 4   Peak CCU                    85103 non-null  int64  
 5   Required age                85103 non-null  int64  
 6   Price                       85103 non-null  float64
 7   DLC count                   85103 non-null  int64  
 8   About the game              85103 non-null  object 
 9   Supported languages         85103 non-null  object 
 10  Full audio languages        85103 non-null  object 
 11  Reviews                     9743 non-null   object 
 12  Header image                85103 non-null  object 
 13  Website                     394

In [7]:
len(df.iloc[0]["Tags_vector"])

448

In [8]:
len(df.iloc[0]["Description_vector"])

50

In [9]:
# game_id is the index in the dataframe

class GameDescriptionDoc(BaseDoc):
  game_id: int = 0
  embedding: NdArray[50]

class GameTagDoc(BaseDoc):
  game_id: int = 0
  embedding: NdArray[448]

In [10]:
description_db = HNSWVectorDB[GameDescriptionDoc](workspace='./game_description_db', space="cosine")
tags_db = HNSWVectorDB[GameTagDoc](workspace='./game_tags_db', space="cosine")

In [11]:
"""
for index, row in df.iterrows():
    print(row['c1'], row['c2'])
"""


descriptions_list = [GameDescriptionDoc(game_id=index, embedding=row["Description_vector"]) for index, row in df.iterrows()]
tags_list = [GameTagDoc(game_id=index, embedding=row["Tags_vector"]) for index, row in df.iterrows()]

In [12]:
description_db.index(inputs=DocList[GameDescriptionDoc](descriptions_list))

<DocList[GameDescriptionDoc] (length=85103)>

In [13]:
tags_db.index(inputs=DocList[GameTagDoc](tags_list))

<DocList[GameTagDoc] (length=85103)>

In [15]:
def get_desc_vector_by_app_id(app_id: int):
    description_vector = df[df['AppID']==app_id]['Description_vector'].values[0] # This can fail if the app id is not found
    return description_vector

def get_tags_vector_by_app_id(app_id: int):
    tags_vector = df[df['AppID']==app_id]['Tags_vector'].values[0] # This can fail if the app id is not found
    return tags_vector

In [53]:
def query_games_by_description(app_id: int, no_results=10):
    description_vector = get_desc_vector_by_app_id(app_id)
    query = GameDescriptionDoc(text='query', embedding=description_vector)
    results = description_db.search(inputs=DocList[GameDescriptionDoc]([query]), limit=no_results)
    searched_game_name = df.iloc[results[0].matches[0].game_id]["Name"]
    print(f"Games similar to {searched_game_name} (by description):\n")
    
    for m in results[0].matches[1:]:
      match_game_id = m.game_id
      print(df.iloc[match_game_id]["Name"])

In [54]:
query_games_by_description(570940) # Dark Souls Remastered

Games similar to DARK SOULS™: REMASTERED (by description):

DARK SOULS™ III
DARK SOULS™ II
DARK SOULS™ II: Scholar of the First Sin
.hack//G.U. Last Recode
Beyond Hanwell Teaser: The Royal Hallamshire
Delusions of a Lost Soul
The Eternal Castle [REMASTERED]
Shadow of Valhalla
HeXen: Deathkings of the Dark Citadel


In [55]:
def query_games_by_tags(app_id: int, no_results=10):
    tags_vector = get_tags_vector_by_app_id(app_id)
    query = GameTagDoc(text='query', embedding=tags_vector)
    results = tags_db.search(inputs=DocList[GameTagDoc]([query]), limit=no_results)
    
    searched_game_name = df.iloc[results[0].matches[0].game_id]["Name"]
    print(f"Games similar to {searched_game_name} (by tags):\n")
    
    for m in results[0].matches[1:]:
      match_game_id = m.game_id
      print(df.iloc[match_game_id]["Name"])

In [56]:
query_games_by_tags(570940)

Games similar to DARK SOULS™: REMASTERED (by tags):

DARK SOULS™ III
DARK SOULS™: Prepare To Die™ Edition
DARK SOULS™ II: Scholar of the First Sin
Lords Of The Fallen™
DARK SOULS™ II
Kingdom Of Rhea
Monster Hunter: World
Shattered - Tale of the Forgotten King
Dragon's Dogma: Dark Arisen


In [57]:
query_games_by_tags(1901370)

Games similar to Ib (by tags):

Tide Up
Alicemare
Cat in the Box
The Witch's House MV
The Sand Man
Mad Father
Angels of Death
Viviette
Tales of the Black Forest


In [58]:
query_games_by_description(1901370)

Games similar to Ib (by description):

Mojo 2: Mia
Root Letter Last Answer
Annie and the Art Gallery
Paper Bride 2 Zangling Village
RHEM I SE: The Mysterious Land
Agatha Christie - Hercule Poirot: The London Case
Darkness Within 2: The Dark Lineage
Atelier Ryza 2: Lost Legends & the Secret Fairy
VR Nostalgia 5


In [59]:
query_games_by_description(1901370, no_results=30)

Games similar to Ib (by description):

Mojo 2: Mia
Root Letter Last Answer
Annie and the Art Gallery
Paper Bride 2 Zangling Village
RHEM I SE: The Mysterious Land
Agatha Christie - Hercule Poirot: The London Case
Darkness Within 2: The Dark Lineage
Atelier Ryza 2: Lost Legends & the Secret Fairy
VR Nostalgia 5
Atelier Marie Remake: The Alchemist of Salburg
Atelier Sophie 2: The Alchemist of the Mysterious Dream
Plum Bun Reformatted
Forgotten Places: Regained Castle
Mojo: Hanako
匿名信：隐匿者 / Anonymous Letter ：Prowler
√Letter - Root Letter -
Phantom Thief Sylphy
烛梦灯  The Dreams of Candlelight
The Walsingham Files - Chapter 1
Demon Hunter 2: New Chapter
Mojo
Alice's Warped Wonderland:REcollection
The Farmthis Gallery
Strange Investigations: Becoming Collector's Edition
Escape : Lia
Faraway: Arctic Escape
The Uncertain: VR Experience
Myst
The Abbey - Director's cut


In [60]:
query_games_by_tags(1901370, no_results=30)

Games similar to Ib (by tags):

Tide Up
Alicemare
Cat in the Box
The Witch's House MV
The Sand Man
Mad Father
Angels of Death
Viviette
Tales of the Black Forest
Chloé’s Requiem -encore-
Rakuen
Psicose?
UNREAL LIFE
Fausts Alptraum
Cursed Mansion
Changed
The Hanged Man
OneShot
Confess My Love
The Crooked Man
Retrace
Clea
MIDNIGHT Remastered
Angels of Death Episode.Eddie
Misao: Definitive Edition
Corpse Party
MEMENTO
Night Loops
White Day: A Labyrinth Named School


In [61]:
query_games_by_tags(65540, no_results=30)

Games similar to Gothic 1 (by tags):

Gothic II: Gold Edition
Risen
Gothic® 3
The Witcher: Enhanced Edition Director's Cut
Fable - The Lost Chapters
The Elder Scrolls IV: Oblivion® Game of the Year Edition
Risen 3 - Titan Lords
The Elder Scrolls IV: Oblivion® Game of the Year Edition Deluxe
Bound By Flame
Fable Anniversary
SpellMaster: The Saga
The Witcher® 3: Wild Hunt
Summoner
ArcaniA
Gothic 3: Forsaken Gods Enhanced Edition
Risen 2: Dark Waters
Middle-earth™: Shadow of War™
The Elder Scrolls III: Morrowind® Game of the Year Edition
Lords Of The Fallen™
Divinity II: Developer's Cut
Dragon's Dogma: Dark Arisen
Enclave
The Witcher 2: Assassins of Kings Enhanced Edition
Two Worlds Epic Edition
Gothic Playable Teaser
Demonicon
Hogwarts Legacy
Two Worlds II HD
ELEX


In [62]:
query_games_by_description(65540, no_results=5)

Games similar to Gothic 1 (by description):

Head Over Heels
Raiders! Forsaken Earth
Refuge
Orc Colony


In [64]:
query_games_by_tags(574740, no_results=30)

Games similar to Fausts Alptraum (by tags):

Tide Up
Tales of the Black Forest
Cat in the Box
Angels of Death
The Witch's House MV
Higurashi When They Cry Hou - Ch.6 Tsumihoroboshi
Re:Turn - One Way Trip
Cursed Mansion
The Crooked Man
Chloé’s Requiem -encore-
异化之恶〇Abnormal Treatment
Higurashi When They Cry Hou - Ch.7 Minagoroshi
Higurashi When They Cry Hou - Ch. 5 Meakashi
永冻之壳 The Shell of Permafrost
Rakuen
MIND SWITCH
Facility 386
Confess My Love
Viviette
Reflection of Mine
Mad Father
Alicemare
The Hanged Man
Coffin of Ashes
KOEWOTAYORINI / 声之寄托
Stitched
The Sand Man
Angels of Death Episode.Eddie
Night Loops


In [68]:
query_games_by_tags(1307710, no_results=10)


Games similar to GRID Legends (by tags):

Bump and Run Racing
Bounty: Drag Racing
Real Drift Multiplayer 2
NASCAR 21: Ignition
Concept Destruction
Need For Speed: Hot Pursuit
Strike Cars
F1 2015
Oval Racer Series - Sandbox


In [69]:
query_games_by_description(1307710, no_results=10)


Games similar to GRID Legends (by description):

Torque Drift
New Star GP
GRID
Monster Truck Destruction
Forza Motorsport
Riptide GP2
DIRT 5
Tony Stewart's All-American Racing
F1® 22
