## Game Feature Recommendation System
Recommends to the user on prompting game features extracted from game database information

In [21]:
# Library imports

import numpy as np
import spacy
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
nlp = spacy.load("en_core_web_sm")

In [22]:
# External data imports
def loadFeatData():
    DAT = {}
    with open("../data/game_datfeat.txt", "r") as f:
        lines = [l.strip() for l in f.readlines()]
        CUR_GAME = ""
        for l in lines:
            # empty line (between entries)
            if l == "":
                continue
            #new entry
            else:
                if l[0] == "+":
                    CUR_GAME = l[2:]
                    DAT[CUR_GAME] = {"tags":[],"entities":[],"features":[]}
                elif l[0] == "#":
                    DAT[CUR_GAME]["tags"] = l[2:].split(",")
                elif l[0] == "@":
                    DAT[CUR_GAME]["entities"] = l[2:].split(",")
                else:
                    DAT[CUR_GAME]["features"].append(l)
    return DAT

GAME_DATA = loadFeatData()

### Note: Embedding doesn't work with unbalanced dataset - try something else

In [28]:
###   EMBED THE TAGS AND ENTITIES   ###

# get all of the tags and entities
ALL_TAGS = []
ALL_ENTITIES = []
for g in GAME_DATA:
    ALL_TAGS += [t.lower() for t in GAME_DATA[g]["tags"]]
    ALL_ENTITIES += [e.lower() for e in GAME_DATA[g]["entities"]]

# remove duplicates
ALL_TAGS = list(set(ALL_TAGS))
ALL_ENTITIES = list(set(ALL_ENTITIES))

#make a tag embedding
def tagEmbed(tags):
    vec = [0]*len(ALL_TAGS)
    for t in tags:
        tt = t.lower()
        if tt in ALL_TAGS:
            vec[ALL_TAGS.index(tt)] = 1
    return vec

#make a entity embedding
def entEmbed(entities):
    vec = [0]*len(ALL_ENTITIES)
    for e in entities:
        ee = e.lower()
        if ee in ALL_ENTITIES:
            vec[ALL_ENTITIES.index(ee)] = 1
    return vec


# make the embeddings for each game
TAG_EMBEDDINGS = {}
ENTITY_EMBEDDINGS = {}
for g in GAME_DATA:
    TAG_EMBEDDINGS[g] = tagEmbed(GAME_DATA[g]["tags"])
    ENTITY_EMBEDDINGS[g] = entEmbed(GAME_DATA[g]["entities"])

#make a combined embedding
FULL_EMVEDDINGS = {}
for g in GAME_DATA:
    FULL_EMVEDDINGS[g] = TAG_EMBEDDINGS[g]+ENTITY_EMBEDDINGS[g]
    

In [39]:
## GET CLOSEST GAME FROM TEXT BASED ON EMBEDDINGS ##
def getClosestGames(txt,k=3):
    # tokenize and remove all stop words
    raw_toks = word_tokenize(txt)
    toks = [w.lower() for w in raw_toks if w.lower() not in stopwords.words("english") and w.isalpha()]
    print(toks)

    # get the embedding for the text
    txt_embed = tagEmbed(toks)+entEmbed(toks)

    # get the closest games using distance metrics
    dists = {}
    for g in GAME_DATA:
        dists[g] = np.linalg.norm(np.array(txt_embed) - np.array(FULL_EMVEDDINGS[g]))
    dists = sorted(dists.items(), key=lambda x: x[1])
    print(dists)
    return dists[:k]


### Steps: 
1. Load the data (games => tags, entities, features)
2. Get a user prompt for a game and its genre
3. Recommend some random features based on the text similarity and closest tags

* Note: Keep in mind, there are over 150k games with tags, features, and entities - the search cannot be too large


In [40]:
# recommend some features to the user based on a prompt
def recommendFeatures(userPrompt):
    # get the closest game
    closestGames = getClosestGames(userPrompt)

    # get the features for that game
    for g in closestGames:
        print(g[0])
        print(GAME_DATA[g[0]]["features"])

In [41]:
user_txt = "top-down multiplayer action game about knights"
recommendFeatures(user_txt)

['multiplayer', 'action', 'game', 'knights']
[('THE RAMP', 4.242640687119285), ('BATMAN: ARKHAM CITY', 4.47213595499958), ('KINDERGARTEN', 4.898979485566356), ('AMONG US', 5.0), ('ELDEN RING', 5.0), ('HADES', 5.291502622129181), ('SUPER FANCY PANTS ADVENTURE', 5.477225575051661), ('VAMPIRE SURVIVORS', 5.5677643628300215), ('BABA IS YOU', 5.656854249492381), ('A SHORT HIKE', 5.656854249492381), ('OUTER WILDS', 5.744562646538029), ('ESCAPE SIMULATOR', 5.830951894845301), ('NIDHOGG', 5.830951894845301), ('SPELUNKY', 5.916079783099616), ('ULTRAKILL', 5.916079783099616), ('DONUT COUNTY', 6.0), ('OVERCOOKED! 2', 6.0), ('SCRIBBLENAUTS UNLIMITED', 6.082762530298219), ('CASTLE CRASHERS', 6.164414002968976), ('THE ELDER SCROLLS V: SKYRIM', 6.244997998398398), ('DOWNWELL', 6.4031242374328485), ('OMORI', 6.4031242374328485), ('CYBERPUNK 2077', 6.48074069840786), ('KINGSWAY', 6.557438524302), ('STARDEW VALLEY', 6.557438524302), ('THE BINDING OF ISAAC', 6.6332495807108), ('CUPHEAD', 6.70820393249936