## Game Recommendation System with Vector Search

In [1]:
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import CharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_chroma import Chroma

In [2]:
from dotenv import load_dotenv

load_dotenv()

True

In [3]:
import pandas as pd

games = pd.read_csv('../data/processed/games_cleaned.csv')
games.head()

Unnamed: 0,app_id,name,release_date,peak_ccu,required_age,price,dlc_count,about_the_game,supported_languages,full_audio_languages,...,median_playtime_forever,median_playtime_two_weeks,developers,publishers,categories,genres,tags,estimated_owners_min,estimated_owners_max,tagged_about
0,20200,Galactic Bowling,2008-10-21,0,0,19.99,0,Galactic Bowling is an exaggerated and stylize...,['English'],[],...,0,0,Perpetual FX Creative,Perpetual FX Creative,"['Single-player', 'Multi-player', 'Steam Achie...","['Casual', 'Indie', 'Sports']","['Indie', 'Casual', 'Sports', 'Bowling']",0,20000,20200 Galactic Bowling is an exaggerated and s...
1,655370,Train Bandit,2017-10-12,0,0,0.99,0,THE LAW!! Looks to be a showdown atop a train....,"['English', 'French', 'Italian', 'German', 'Sp...",[],...,0,0,Rusty Moyher,Wild Rooster,"['Single-player', 'Steam Achievements', 'Full ...","['Action', 'Indie']","['Indie', 'Action', 'Pixel Graphics', '2D', 'R...",0,20000,655370 THE LAW!! Looks to be a showdown atop a...
2,1732930,Jolt Project,2021-11-17,0,0,4.99,0,Jolt Project: The army now has a new robotics ...,"['English', 'Portuguese - Brazil']",[],...,0,0,Campião Games,Campião Games,['Single-player'],"['Action', 'Adventure', 'Indie', 'Strategy']",[''],0,20000,1732930 Jolt Project: The army now has a new r...
3,1355720,Henosis™,2020-07-23,0,0,5.99,0,HENOSIS™ is a mysterious 2D Platform Puzzler w...,"['English', 'French', 'Italian', 'German', 'Sp...",[],...,0,0,Odd Critter Games,Odd Critter Games,"['Single-player', 'Full controller support']","['Adventure', 'Casual', 'Indie']","['2D Platformer', 'Atmospheric', 'Surreal', 'M...",0,20000,1355720 HENOSIS™ is a mysterious 2D Platform P...
4,1139950,Two Weeks in Painland,2020-02-03,0,0,0.0,0,ABOUT THE GAME Play as a hacker who has arrang...,"['English', 'Spanish - Spain']",[],...,0,0,Unusual Games,Unusual Games,"['Single-player', 'Steam Achievements']","['Adventure', 'Indie']","['Indie', 'Adventure', 'Nudity', 'Violent', 'S...",0,20000,1139950 ABOUT THE GAME Play as a hacker who ha...


In [4]:
games['tagged_about'].to_csv(
    '../data/processed/tagged_about.txt',
    sep='\n',
    index=False,
    header=False
)

In [5]:
raw_documents = TextLoader('../data/processed/tagged_about.txt').load()
text_splitter = CharacterTextSplitter(chunk_size=0, chunk_overlap=0, separator='\n')
documents = text_splitter.split_documents(raw_documents)

Created a chunk of size 1049, which is longer than the specified 0
Created a chunk of size 498, which is longer than the specified 0
Created a chunk of size 636, which is longer than the specified 0
Created a chunk of size 623, which is longer than the specified 0
Created a chunk of size 680, which is longer than the specified 0
Created a chunk of size 1843, which is longer than the specified 0
Created a chunk of size 794, which is longer than the specified 0
Created a chunk of size 699, which is longer than the specified 0
Created a chunk of size 990, which is longer than the specified 0
Created a chunk of size 1376, which is longer than the specified 0
Created a chunk of size 2425, which is longer than the specified 0
Created a chunk of size 2086, which is longer than the specified 0
Created a chunk of size 664, which is longer than the specified 0
Created a chunk of size 1245, which is longer than the specified 0
Created a chunk of size 1726, which is longer than the specified 0
Cre

In [6]:
# Commenting out to avoid re-embedding and extra OpenAI API costs.
# Embeddings are already stored in '../data/chroma_storage'.

# stored in '../data/chroma_storage', so we can simply load them instead.
# db_games = Chroma.from_documents(
#     documents,
#     embedding=OpenAIEmbeddings(),
#     persist_directory='../data/chroma_storage'
# )

In [7]:
db_games = Chroma(
    persist_directory="../data/chroma_storage",  
    embedding_function=OpenAIEmbeddings()
)

print("✅ Chroma DB loaded successfully!")

✅ Chroma DB loaded successfully!


In [8]:
def semantic_game_rec(query: str, top_k: int = 10) -> pd.DataFrame:
    """
    Retrieve the top-k most similar games based on a semantic search query.

    Parameters
    ----------
    query : str
        The input search query.
    top_k : int, optional
        The number of top similar games to retrieve (default is 10).

    Returns
    -------
    pd.DataFrame
        A DataFrame containing the recommended games with matching `app_id`s.
    """
    recs = db_games.similarity_search(query=query, k=top_k)
    game_ids = [int(recs[i].page_content.split()[0]) for i in range(len(recs))]
    game_recs = games[games['app_id'].isin(game_ids)]
    
    return game_recs

In [14]:
semantic_game_rec('A game related to nature and animals')

Unnamed: 0,app_id,name,release_date,peak_ccu,required_age,price,dlc_count,about_the_game,supported_languages,full_audio_languages,...,median_playtime_forever,median_playtime_two_weeks,developers,publishers,categories,genres,tags,estimated_owners_min,estimated_owners_max,tagged_about
1146,1779710,SEARCH ALL - ANIMALS,2021-10-19,1,0,0.99,0,SEARCH ALL - ANIMALS is a game in the genre of...,"['English', 'French', 'Italian', 'German', 'Sp...","['English', 'French', 'Italian', 'German', 'Sp...",...,0,0,Laush Dmitriy Sergeevich,Laush Studio,"['Single-player', 'Steam Achievements', 'Full ...","['Casual', 'Indie']","['Exploration', 'Clicker', 'Collectathon', 'Hi...",0,20000,1779710 SEARCH ALL - ANIMALS is a game in the ...
4729,922700,Hidden Animals: Photo Hunt. Seek and Find Obje...,2018-10-18,2,0,2.99,0,Discover what it feels like to be a wildlife p...,"['English', 'French', 'Italian', 'German', 'Sp...",[],...,0,0,Crisp App Studio,Crisp App Studio,"['Single-player', 'Steam Achievements', 'Steam...","['Adventure', 'Casual', 'Indie']","['Indie', 'Adventure', 'Casual', 'Hidden Objec...",0,20000,922700 Discover what it feels like to be a wil...
17146,1296810,Naturalealia: Forest Determination,2020-05-22,0,0,6.99,0,Based on 1930's cartoon style made with hand-d...,['English'],[],...,0,0,Butnariu Ovidiu Mihai,Butnariu Ovidiu Mihai,"['Single-player', 'Full controller support']","['Action', 'Casual', 'Indie']","['Action', 'Casual', 'Indie', 'Platformer', 'S...",0,20000,1296810 Based on 1930's cartoon style made wit...
19623,1036200,Song Animals,2019-03-25,0,0,0.0,1,"Song Animals is a short adventure puzzle game,...",['English'],[],...,0,0,"Edwin Montgomery,Myshkin Entertainment",Myshkin Entertainment,"['Single-player', 'Steam Achievements']",['Indie'],"['Indie', 'Free to Play', 'Point & Click', 'Pu...",0,20000,1036200 Song Animals is a short adventure puzz...
24881,906160,Repeat the image: Animals,2018-08-04,0,0,3.99,1,Repeat the image: Animals - is a game for very...,['English'],['English'],...,0,0,Laush Dmitriy Sergeevich,Laush Dmitriy Sergeevich,"['Single-player', 'Steam Achievements']","['Casual', 'Indie']","['Indie', 'Casual', 'Puzzle', 'Relaxing', 'Atm...",0,20000,906160 Repeat the image: Animals - is a game f...
37331,1428150,SAVAGE,2020-12-21,0,0,0.99,0,This is an adventure game in the wild. You can...,"['English', 'French', 'German', 'Spanish - Spa...",[],...,0,0,Lightning Cop,HM.GAMES,"['Single-player', 'Steam Achievements', 'Parti...","['Action', 'Adventure', 'Indie', 'RPG', 'Simul...",[''],0,20000,1428150 This is an adventure game in the wild....
42293,1028590,The WILDS,2022-02-15,1,0,9.99,0,Prepare to enter The WILDS! Become the animal ...,['English'],[],...,0,0,Gluten Free Games,Gluten Free Games,['Single-player'],"['Simulation', 'Early Access']","['Early Access', 'Simulation', 'Nature', 'Surv...",0,20000,1028590 Prepare to enter The WILDS! Become the...
57745,1260340,Naturalist Stories,2020-08-02,0,0,9.99,0,We move on the planet with great speed and do ...,"['English', 'Russian']",[],...,0,0,MiL_Puzzle,MiL_Puzzle,['Single-player'],"['Adventure', 'Indie', 'Early Access']","['Adventure', 'Indie', 'Early Access', 'Nature...",0,20000,1260340 We move on the planet with great speed...
59626,1923720,Wildlife Planet: The Incremental,2022-08-25,37,0,2.69,0,Tired and exhausted from your daily life? You ...,"['English', 'French', 'German', 'Korean', 'Sim...",[],...,0,0,CapPlay Interactive Inc.,CapPlay Interactive Inc.,"['Single-player', 'In-App Purchases', 'Steam C...","['Casual', 'Indie', 'Simulation', 'Strategy']","['Simulation', 'Creature Collector', 'Nature',...",0,20000,1923720 Tired and exhausted from your daily li...
83306,1733110,Of Life and Land,2024-04-02,130,0,22.49,1,Take the role of a leader delegated to build n...,"['English', 'German', 'French', 'Italian', 'Sp...",[],...,0,0,Kerzoven,"Kerzoven,Metaroot","['Single-player', 'Steam Achievements', 'Steam...","['Indie', 'Simulation', 'Strategy', 'Early Acc...","['Early Access', 'Base-Building', 'City Builde...",0,20000,1733110 Take the role of a leader delegated to...
