# GAME FEATURE GENERATOR (POOR MAN'S NLP)

Generates new game features using N-Grams and W2V and grammar structures of existing feature sets

In [1]:
# Library imports

import numpy as np
import spacy
import math
import re
import random
from nltk.tokenize import word_tokenize
from tqdm import tqdm
from nltk.corpus import stopwords
from sklearn.metrics.pairwise import cosine_similarity
import requests
import json
from nltk.corpus import wordnet as wn
# from nltk.stem.wordnet import WordNetLemmatizer as wnl


# other files
import sys
sys.path.append('../')
import Python.utils as utils

# setup
nlp = spacy.load("en_core_web_sm")

  from .autonotebook import tqdm as notebook_tqdm


### Make the Game Recommender (as a class for easy access and modularization)

In [2]:
'''
    This class is used to generate the game features for the game recommender system
'''
class GameRecModel():
    # init function
    def __init__(self):
        # big variables
        self.GAME_DATA = {}
        self.ALL_TAGS = []
        self.ALL_ENTITIES = []
        self.GAME_DAT_TFIDF = {}
        self.GLOVE_DAT = {}

        #helper variables
        self.custom_stopwords = ["game", ",", ".", "!"]

    # creates everything needed for the game recommender
    def setup(self, full=True, glove_size=50):
        self.importGameData(full=full)
        self.getTagsEntities()
        self.importGlove(size=glove_size)
        self.importGameDatTfidf()


    #import the game data from the game_datfeat.txt file
    def importGameData(self,full=True):
        #select which file to import
        if full:
            print("Importing full game data...")
            game_import_file = "../data/game_datfeat_FULL.txt"
        else:
            print("Importing subset of game data...")
            game_import_file = "../data/game_datfeat.txt"

        #import the data
        self.GAME_DATA = {}
        with open(game_import_file, "r") as f:
            lines = [l.strip() for l in f.readlines()]
            CUR_GAME = ""
            for l in lines:
                # empty line (between entries)
                if l == "":
                    continue
                #new entry
                else:
                    if l[0] == "+":
                        CUR_GAME = l[2:].upper()
                        self.GAME_DATA[CUR_GAME] = {"tags":[],"entities":[],"features":[]}
                    elif l[0] == "#":
                        self.GAME_DATA[CUR_GAME]["tags"] = [t.lower() for t in l[2:].split(",")]
                    elif l[0] == "@":
                        self.GAME_DATA[CUR_GAME]["entities"] = [e.lower() for e in l[2:].split(",")]
                    elif l[0] == "-":
                        self.GAME_DATA[CUR_GAME]["features"].append(l[2:])


    #get all of the tags and the entities from the game data
    def getTagsEntities(self):
        # get all of the tags and entities
        ALL_TAGS = []
        ALL_ENTITIES = []
        for g in self.GAME_DATA:
            ALL_TAGS += [t.lower() for t in self.GAME_DATA[g]["tags"]]
            ALL_ENTITIES += [e.lower() for e in self.GAME_DATA[g]["entities"]]

        # remove duplicates and set the value
        self.ALL_TAGS = list(set(ALL_TAGS))
        self.ALL_ENTITIES = list(set(ALL_ENTITIES))


    #import the word embedding data from the GloVe dataset
    def importGlove(self,size=50):
        GLOVE_DAT = {}
        with open(f"../data/glove.6B/glove.6B.{size}d.txt", "r", encoding="utf-8") as f:
            lines = f.readlines()
            for line in tqdm(lines,desc='Importing GloVe'):
                line = line.split()
                self.GLOVE_DAT[line[0]] = np.array([float(x) for x in line[1:]])

    
    # get the tfidf scores for each game
    def importGameDatTfidf(self):
        GAME_DOCS = {}
        for g in self.GAME_DATA:
            GAME_DOCS[g] = list(set(self.GAME_DATA[g]["tags"]+self.GAME_DATA[g]["entities"]))
        self.GAME_DAT_TFIDF = utils.idf_docs(GAME_DOCS)



    # return the similarity score between 2 game theme word sets
    # takes as input 2 lists of tokens from each game
    def gameSim(self,prompt_game,comp_game): 
        # Get the cosine similarity between each word in g1 and g2 to make a matrix
        d = cosine_similarity([self.GLOVE_DAT[w] for w in prompt_game if w in self.GLOVE_DAT],[self.GLOVE_DAT[w] for w in comp_game if w in self.GLOVE_DAT if w in self.GLOVE_DAT])

        #return the average of the max similarity score for each word in the prompt game
        return sum([max(x) for x in d])


    #tokenizes the text
    def tokenize(self, txt):
        raw_toks = word_tokenize(txt)
        toks = [w.lower() for w in raw_toks if w.lower() not in stopwords.words("english") and w.lower() not in self.custom_stopwords]
        #add the custom tag words (can be compound words)
        for t in self.ALL_TAGS:
            if t in txt and t not in toks:
                toks.append(t)
        return toks


    ### MAIN FUNCTIONS ###

    # turns the prompt into a dataset item [tags, entities]
    def toDataStr(self, txt):
        prompt_toks = self.tokenize(txt)
        tags = [t for t in prompt_toks if t in self.ALL_TAGS]
        entities = [e for e in prompt_toks if e in self.ALL_ENTITIES]
        return f"{','.join(tags)}\n{','.join(entities)}"

    # get the closest games to the prompt game
    def getClosestGames(self, prompt_txt, num_games=3):
        # get the prompt game tokens
        prompt_toks = self.tokenize(prompt_txt)

        # get the similarity scores for each game
        sim_scores = {}
        for g in self.GAME_DATA.keys():
            other_toks = self.GAME_DATA[g]['tags'] + self.GAME_DATA[g]['entities']
            sim_scores[g] = self.gameSim(prompt_toks,other_toks)

        #add tf-idf scores where found
        tot_scores = {}
        for g in sim_scores:
            for t in prompt_toks:
                if t in self.GAME_DAT_TFIDF[g]:
                    tot_scores[g] = sim_scores[g]+self.GAME_DAT_TFIDF[g][t]

        # sort the games by similarity score
        sorted_games = sorted(tot_scores.items(), key=lambda x: x[1], reverse=True)

        # return the top num_games and their distances (for debugging)
        best = sorted_games[:num_games]
        return [b[0] for b in best], [b[1] for b in best]

    # returns the list of features for a game
    def getGameFeats(self,game):
        return self.GAME_DATA[game]["features"]

    # returns the list of entities for a game
    def getGameEntities(self,game):
        return self.GAME_DATA[game]["entities"]

    # returns some features from the top games recommended
    def getTopGameFeats(self, prompt_txt, num_games=3):
        top_games, _ = self.getClosestGames(prompt_txt,num_games=num_games)
        feats = []
        for g in top_games:
            feats += self.getGameFeats(g)
        return feats

    # returns the first n game features from the top selected games
    def getNumGameFeats(self, prompt_txt, num_feats=30):
        top_games, _ = self.getClosestGames(prompt_txt,num_games=num_feats)  #assume each game has at least 1 feature
        feats = []
        for g in top_games:
            feats += self.getGameFeats(g)
            if len(feats) >= num_feats:
                break
        return feats[:num_feats]

    # returns some entities from the top games recommended
    def getTopGameEntities(self, prompt_txt, num_games=3):
        top_games, _ = self.getClosestGames(prompt_txt,num_games=num_games)
        ents = []
        for g in top_games:
            ents += self.getGameEntities(g)
        return list(set(ents))



In [3]:
GameRec = GameRecModel()
GameRec.setup(False)

Importing subset of game data...


Importing GloVe: 100%|██████████| 400000/400000 [00:06<00:00, 64338.79it/s]


In [4]:
# test out the game recommendation
prompt = "an rpg about a princess who collects swords and flowers to turn into potions and is secretly a frog"
print(GameRec.toDataStr(prompt))
print(GameRec.getClosestGames(prompt, num_games=5))
print(GameRec.getTopGameFeats(prompt, num_games=5))
print(GameRec.getNumGameFeats(prompt, num_feats=50))
print(GameRec.getTopGameEntities(prompt, num_games=5))

rpg
rpg,princess,swords,turn
(['CASTLE CRASHERS', 'CHROMA SQUAD', 'UNDERTALE', 'NIDHOGG', 'THE BINDING OF ISAAC'], [9.396202793135762, 9.019775167666664, 8.619144312145528, 8.232175826270373, 7.0947548845780135])
['defend your kingdom', 'smash your way to victory', 'play locally or online to save your princess', 'crash some castles', 'destroy the city', 'hire actors', 'ensure further combats', 'upgrade your gear', 'change the marketing', 'buy or craft improvements', 'conquer the hearts', 'save the city', 'combine your heroes', 'provide cash', 'produce ( actors', 'deal more damage', 'do this with than others', 'need to perform certain actions', 'show mercy to the opponent in order', 'resolve conflict without bloodshed', 'attack monsters', 'try to find peaceful resolutions', 'choose to perform friendly actions in battle', 'finish every encounter', 'run towards their opponent', 'reach the end', 'throw their swords', 'run, jump', 'find bizarre treasures', 'discover secrets and fight his wa

## Generate new features from the set given

1. Mix and match words to generate to new grammars
2. Mix and match with random grammar structure probabilities extracted from the features
3. N-grams on the words 
4. #2 but with fill in with synonyms and related words (WordNet)
5. #2 but with fill in related thematic words (ConceptNet)
6. Gamify features from the CapableOf and UsedFor descriptions from ConceptNet


### Experiment 1: Mix and Match
Note: Not terrible - could be better

-> ['try the way', 'run to the conflict', 'choose every victory or perform their secrets']

In [9]:
class FeatureGenerator():
    def __init__(self, train_feat_set):
        self.grammar_groups = self.breakDown(train_feat_set)

    # breaks down a set of texts to their grammar groups
    def breakDown(self, txt_set):
        gram_groups = {}
        for txt in txt_set:
            # get the part of speech
            doc = nlp(txt)
            texts = [t.text for t in doc]
            pos = [t.pos_ for t in doc]

            # add the grammar groups
            for i in range(len(texts)):
                if pos[i] not in gram_groups:
                    gram_groups[pos[i]] = []
                gram_groups[pos[i]].append(texts[i])
        return gram_groups

    # generates a feature set from a grammar setup
    def generate(self, grammar):
        feats = []
        for g in grammar:
            if g in self.grammar_groups:
                feats.append(random.choice(self.grammar_groups[g]))
        return " ".join(feats)
        
    

In [10]:
# test it out lol
prompt = "an rpg about a princess who collects swords and flowers to turn into potions and is secretly a frog"
rec_feats = GameRec.getTopGameFeats(prompt, num_games=5)

FG = FeatureGenerator(rec_feats)
print(FG.grammar_groups)

{'VERB': ['defend', 'smash', 'play', 'save', 'crash', 'destroy', 'hire', 'ensure', 'upgrade', 'change', 'buy', 'craft', 'conquer', 'save', 'combine', 'provide', 'produce', 'deal', 'do', 'need', 'perform', 'show', 'resolve', 'try', 'find', 'choose', 'perform', 'finish', 'run', 'reach', 'throw', 'run', 'jump', 'find', 'discover', 'fight'], 'PRON': ['your', 'your', 'your', 'your', 'your', 'this', 'their', 'their', 'his'], 'NOUN': ['kingdom', 'way', 'victory', 'princess', 'castles', 'city', 'actors', 'combats', 'gear', 'marketing', 'improvements', 'hearts', 'city', 'heroes', 'cash', 'actors', 'damage', 'others', 'actions', 'mercy', 'opponent', 'order', 'conflict', 'bloodshed', 'attack', 'monsters', 'resolutions', 'actions', 'battle', 'encounter', 'opponent', 'end', 'swords', 'treasures', 'secrets', 'way', 'safety'], 'ADP': ['to', 'with', 'than', 'to', 'in', 'without', 'in', 'towards', 'to'], 'ADV': ['locally', 'online'], 'CCONJ': ['or', 'or', 'and'], 'PART': ['to', 'to', 'to', 'to'], 'DET'

In [11]:
test_grams = [["VERB", "DET", "NOUN"], ["VERB", "ADP", "DET", "NOUN"], ["VERB", "DET", "NOUN", "CCONJ", "VERB", "PRON", "NOUN"]]

for g in test_grams:
    for i in range(5):
        print(FeatureGenerator(rec_feats).generate(g))

print([FeatureGenerator(rec_feats).generate(g) for g in test_grams])


smash the encounter
choose the treasures
fight the safety
produce the cash
ensure the opponent
hire to the heroes
craft in the marketing
do to some heroes
reach to the victory
jump without the actions
jump the city and conquer their damage
find the actions or combine your resolutions
change the hearts or need his swords
need the resolutions or smash your conflict
play the city or reach your gear
['throw the damage', 'perform to the princess', 'fight the improvements or smash your way']


### Experiment 2: Mix and Match+
Note: Better results and structuring and faster - still needs diversity in word choice and grammar fixes 

-> ['change to defend bizarre princess', 'destroy your kingdom', 'provide bizarre city', 'save friendly monsters', 'buy way towards every opponent to way']



In [12]:
class FeatureGeneratorPlus():
    def __init__(self, train_feat_set):
        self.grammar_groups, self.grammar_structs = self.breakDown(train_feat_set)

    # breaks down a set of texts to their grammar groups and get their structures
    def breakDown(self, txt_set):
        gram_groups = {}
        gram_struct = {}
        for txt in txt_set:
            # get the part of speech
            doc = nlp(txt)
            texts = [t.text for t in doc]
            pos = [t.pos_ for t in doc]

            # add the grammar structure
            pos_struct = " ".join(pos)
            if pos_struct not in gram_struct:
                gram_struct[pos_struct] = 0
            gram_struct[pos_struct] += 1
            
            # add the grammar groups
            for i in range(len(texts)):
                if pos[i] not in gram_groups:
                    gram_groups[pos[i]] = []
                gram_groups[pos[i]].append(texts[i])

        #count up the structures and convert to probabilities
        tot = sum(gram_struct.values())
        for g in gram_struct:
            gram_struct[g] /= tot
            
        return gram_groups, gram_struct

    # generates a feature set from a grammar setup
    def generate(self):
        feats = []
        # select a structure probabilistically
        struct = random.choices(list(self.grammar_structs.keys()), list(self.grammar_structs.values()))[0]
        grammar = struct.split(" ")

        for g in grammar:
            if g in self.grammar_groups:
                feats.append(random.choice(self.grammar_groups[g]))
        return " ".join(feats)

In [13]:
# test it out lol again
prompt = "an rpg about a princess who collects swords and flowers to turn into potions and is secretly a frog"
rec_feats = GameRec.getTopGameFeats(prompt, num_games=5)

FGP = FeatureGeneratorPlus(rec_feats)
print(FGP.grammar_groups)
print(FGP.grammar_structs)

{'VERB': ['defend', 'smash', 'play', 'save', 'crash', 'destroy', 'hire', 'ensure', 'upgrade', 'change', 'buy', 'craft', 'conquer', 'save', 'combine', 'provide', 'produce', 'deal', 'do', 'need', 'perform', 'show', 'resolve', 'try', 'find', 'choose', 'perform', 'finish', 'run', 'reach', 'throw', 'run', 'jump', 'find', 'discover', 'fight'], 'PRON': ['your', 'your', 'your', 'your', 'your', 'this', 'their', 'their', 'his'], 'NOUN': ['kingdom', 'way', 'victory', 'princess', 'castles', 'city', 'actors', 'combats', 'gear', 'marketing', 'improvements', 'hearts', 'city', 'heroes', 'cash', 'actors', 'damage', 'others', 'actions', 'mercy', 'opponent', 'order', 'conflict', 'bloodshed', 'attack', 'monsters', 'resolutions', 'actions', 'battle', 'encounter', 'opponent', 'end', 'swords', 'treasures', 'secrets', 'way', 'safety'], 'ADP': ['to', 'with', 'than', 'to', 'in', 'without', 'in', 'towards', 'to'], 'ADV': ['locally', 'online'], 'CCONJ': ['or', 'or', 'and'], 'PART': ['to', 'to', 'to', 'to'], 'DET'

In [14]:
for i in range(15):
    print(FGP.generate())

print([FGP.generate() for i in range(5)])

ensure online or locally to do their actions
destroy , crash
city treasures
destroy your end
conquer more marketing
need than your castles
perform , way
crash every actors
change your damage
resolve friendly way
run the actors
destroy with this actions
save your to towards monsters
try the hearts
show , provide
['resolve the mercy', 'find in your opponent', 'crash every castles', 'finish to crash friendly battle', 'crash online or online to crash their combats']


### Experiment 3: N-Grams
Note: Fast, but no diversity with the subset of top 5 games and n=3; too many copies from original featureset (80-90% from original); grammar is a little off

-> ['find bizarre treasures', 'produce actors', 'play locally or online to save your princess', 'save the city', 'crash some castles']

In [15]:
# makes an n-grams generator from the dataset
class FeatureGeneratorNGrams():
    def __init__(self, train_feat_set,n=3):
        self.n = n
        self.makeNGramSet(train_feat_set)
        
    # setup the n-grams
    def makeNGramSet(self, dataset):
        gram_set = {}  #key = gram set (array), value = { ct = how many times the context shows, next_word = list of words occuring after and their counts}
        n = self.n

        for f in dataset:
            tokens = f.split(" ")
            tokens = [t.lower() for t in tokens]

            #add starts and end
            tokens = ["__START__"]*(n-1) + tokens + ["__END__"]

            for i in range(len(tokens)-(n-1)):
                    g = tuple(tokens[i:i+n-1])   #get the gram
                    nw = tokens[i+n-1]         #get the next word
                    #print(f"{g} -> {nw}")
                    
                    #add to the gram set
                    if g not in gram_set:
                        gram_set[g] = {"ct":0,"next_word":{}}
                    gram_set[g]['ct']+=1
                    
                    #add count of seeing next word
                    if nw not in gram_set[g]['next_word']:
                        gram_set[g]['next_word'][nw] = 0
                    gram_set[g]['next_word'][nw]+=1
        self.gram_set = gram_set

    # return the bayesian probability of seeing a particular token given context
    def prob(self,context,token):
        tc = tuple(context)
        if tc in self.gram_set and token in self.gram_set[tc]['next_word']:
            return self.gram_set[tc]['next_word'][token] / self.gram_set[tc]["ct"]
        else:
            return 0.0
        
    # randomly select the next token given context
    def rand_token(self,context,auto_end=False):
        # assume context is in the gram set
        nd = self.gram_set[context]['next_word']
        words = []
        probs = []
        for w in nd.keys():
            words.append(w)
            probs.append(self.prob(context,w))
        if auto_end and "__END__" in words:
            return "__END__"
        #return random.choices(words,weights=probs,k=1)[0]   # weighted probability
        return random.choice(words)                          # normal probability


    # generate a feature from the n-gram set
    def generate(self,mintok=5):
        context = tuple(["__START__"] * (self.n-1))
        out_set = []
        next_token = self.rand_token(context)
        while next_token != "__END__":
            # add to output
            out_set.append(next_token)
            
            # extend context
            cl = list(context)
            cl.append(next_token)
            cl.pop(0)
            context = tuple(cl)
            
            # get the next context
            next_token = self.rand_token(context,(len(out_set) >= abs(mintok)))
        
        
        #return the combined text
        out_text = " ".join(out_set)
        out_text = re.sub(r'\s*(,|\.|\?)\s*', r'\1 ', out_text)
        out_text = re.sub(r'\s+(\“|\’)\s*',r'\1', out_text)
        out_text = re.sub(r'(\(|\))\s*',r'', out_text)
        
        return out_text
        

In [16]:
# test it out again again lol
prompt = "an rpg about a princess who collects swords and flowers to turn into potions and is secretly a frog"
rec_feats = GameRec.getTopGameFeats(prompt, num_games=10)

N = 3
FGNG = FeatureGeneratorNGrams(rec_feats, N)
print(FGNG.gram_set)

{('__START__', '__START__'): {'ct': 52, 'next_word': {'defend': 1, 'smash': 1, 'play': 1, 'crash': 1, 'destroy': 1, 'hire': 1, 'ensure': 1, 'upgrade': 1, 'change': 1, 'buy': 1, 'conquer': 1, 'save': 1, 'combine': 1, 'provide': 1, 'produce': 1, 'deal': 1, 'do': 1, 'need': 2, 'show': 1, 'resolve': 1, 'attack': 1, 'try': 1, 'choose': 2, 'finish': 1, 'run': 1, 'reach': 1, 'throw': 1, 'run,': 1, 'find': 2, 'discover': 3, 'encounter': 2, 'explore': 1, 'go': 1, 'take': 1, 'explore,': 1, 'feature': 1, 'manage': 1, 'defeat': 1, 'steal': 1, 'suit': 1, 'organize': 1, 'progress,': 1, 'quiet': 1, 'learn': 1, 'restore': 1, 'turn': 1}}, ('__START__', 'defend'): {'ct': 1, 'next_word': {'your': 1}}, ('defend', 'your'): {'ct': 1, 'next_word': {'kingdom': 1}}, ('your', 'kingdom'): {'ct': 1, 'next_word': {'__END__': 1}}, ('__START__', 'smash'): {'ct': 1, 'next_word': {'your': 1}}, ('smash', 'your'): {'ct': 1, 'next_word': {'way': 1}}, ('your', 'way'): {'ct': 2, 'next_word': {'to': 1, '__END__': 1}}, ('way

In [17]:
for i in range(10):
    print(FGNG.generate())

print([FGNG.generate() for i in range(5)])

smash your way to victory
restore stardew valley to greatness
provide cash
defeat enemies and plunder crypts and caves
buy or craft improvements
deal more damage
try to find peaceful resolutions
upgrade your gear
progress, the player
deal more damage
['feature a set', 'discover a strange land of monsters', 'deal more damage', 'encounter ai characters', 'choose from various character']


In [18]:
# count repetitions of features generated
def countRepeats(GENERATOR, og_feats, gen_num=10, trials=5):
    repeat_ct = []
    for i in range(trials):
        tct = 0
        for j in range(gen_num):
            feat = GENERATOR.generate()
            if feat in og_feats:
                tct+=1
        repeat_ct.append(tct)

    perc_repeat = sum(repeat_ct) / (gen_num*trials)
    return perc_repeat

print(f"{(countRepeats(FGNG, rec_feats, gen_num=10, trials=5)*100)}%")

82.0%


### Experiment 4: #2 but use synonyms and related thematic words (using WordNet)
Note: Garbage - only gets synonyms and high - low level word roots for the same word

castle -> [Synset('mansion.n.02'), Synset('fortification.n.01'), Synset('chessman.n.01'), Synset('chess_move.n.01')]

In [21]:
# TEST WORDNET

# get the related thematic words for a word (use same POS)
def getHyperWords(word):
    syns = wn.synsets(word, pos='n')
    other_words = []
    if len(syns) > 0:
        other_words += [[h for h in word.hypernyms()] for word in syns]
    return sum(other_words, [])




In [22]:
tests = ["dungeon", "castle", "alien", "skateboard", "rabbit", "sword", "wizard", "pistol", "brain"]
for t in tests:
    print(f"{t} -> {getHyperWords(t)}")

dungeon -> [Synset('stronghold.n.01'), Synset('cell.n.07')]
castle -> [Synset('mansion.n.02'), Synset('fortification.n.01'), Synset('chessman.n.01'), Synset('chess_move.n.01')]
alien -> [Synset('traveler.n.01'), Synset('intruder.n.01'), Synset('hypothetical_creature.n.01')]
skateboard -> [Synset('board.n.03'), Synset('wheeled_vehicle.n.01')]
rabbit -> [Synset('leporid.n.01'), Synset('fur.n.01'), Synset('game.n.07')]
sword -> [Synset('weapon.n.01')]
wizard -> [Synset('expert.n.01'), Synset('occultist.n.01')]
pistol -> [Synset('firearm.n.01')]
brain -> [Synset('neural_structure.n.01'), Synset('intelligence.n.01'), Synset('cognition.n.01'), Synset('intellectual.n.01'), Synset('variety_meat.n.01')]


### Experiment 5: #2 but with fill in related thematic words (ConceptNet)
Note: Gets way more words and verbs, but the generating is very lacking - too many options

--> ['smell to smash friendly end', 'be show on the victory', 'a princess', 'cover to your swords', 'rotate to be a hearts']

In [23]:
obj = requests.get('http://api.conceptnet.io/c/en/castle').json()
print(set([(e['rel']['label'], e['end']['label']) for e in obj['edges']]))

{('RelatedTo', 'fortress'), ('AtLocation', 'Germany'), ('RelatedTo', 'kings'), ('RelatedTo', 'castle'), ('IsA', 'a large, richly decorated building'), ('AtLocation', 'Europe'), ('RelatedTo', 'building'), ('RelatedTo', 'palace'), ('RelatedTo', 'home'), ('HasContext', 'chess'), ('RelatedTo', 'house'), ('AtLocation', 'a castle'), ('Synonym', 'castle'), ('RelatedTo', 'moat')}


In [61]:
def getNextWords(word):
    obj = requests.get(f'http://api.conceptnet.io/c/en/{word}').json()
    rel_set = ['RelatedTo','PartOf','IsA','HasA','MadeOf','Synonym','AtLocation']
    save_set = []
    del_set = []
    for e in obj['edges']:
        if 'language' in e['end'] and e['end']['language'] != 'en':
            continue

        if e['rel']['label'] in rel_set:
            save_set.append(e['end']['label'])
        else:
            del_set.append((e['rel']['label'],e['end']['label']))
    return list(set(save_set)), del_set


tests = ["dungeon", "castle", "alien", "skateboard", "rabbit", "sword", "wizard", "pistol", "brain",'ninja','fantasy','rpg', 'frog']
for t in tests:
    s,d = getNextWords(t)
    print(f"{t} -> {s}")

dungeon -> ['keep', 'cell', 'dungeon']
castle -> ['Germany', 'a large, richly decorated building', 'house', 'castle', 'moat', 'building', 'fortress', 'palace', 'Europe', 'kings', 'a castle', 'home']
alien -> ['foreigner', 'stranger', 'foreign', 'alien', 'extraterrestrial being', 'person', 'exotic']
skateboard -> ['wheeled vehicle', 'skateboard', 'board']
rabbit -> ['big', 'small', 'bunny', 'rodent', 'ears', 'animal', 'furry', 'tail', 'big ears', 'an animal', 'pet', 'hare', 'long ears', 'mammal', 'long', 'bugs', 'carrots', 'easter', 'a mammal', 'fluffy']
sword -> ['steel', 'sword', 'a sheath']
wizard -> ['sorcerer', 'wizard', 'ace']
pistol -> ['a gun', 'a police station', 'a handheld gun', 'a weapon, that is', 'pistol']
brain -> ['a person', 'your brain', 'brain', 'your body', 'the brain', 'a skull', 'neurons', 'a human body', 'biological organ', 'a brain']
ninja -> ['class', 'mercenary', 'ninja']
fantasy -> ['fantasy', 'fiction', 'fantasize']
rpg -> ['larp', 'rocket propelled grenade',

In [62]:
# get everything a frog is capable of
frog_obj = requests.get(f'http://api.conceptnet.io/c/en/frog').json()
for e in frog_obj['edges']:
    if e['rel']['label'] == 'CapableOf':
        print(e['end']['label'])


In [77]:
class ConceptNetGenerator():
    def __init__(self, train_entity_set, train_feat_set):
        self.noun_rel = ['RelatedTo','PartOf','IsA','HasA','MadeOf','Synonym']
        self.verb_rel = ['CapableOf','UsedFor']

        # get related thematic words and features from the entity set
        self.cnet_nouns = self.getThemeNouns(train_entity_set)
        self.cnet_feats = self.getThemeFeats(train_entity_set)
        self.gram_groups, self.gram_structs = self.breakDown(train_feat_set+self.cnet_feats)
        self.addThemeWords()

    #use concept net to get the new entities related to it
    def getThemeNouns(self, word_set):
        noun_set = []

        # go through each word's connections
        for word in word_set:
            # get the related thematic words for a word (use same POS)
            obj = requests.get(f'http://api.conceptnet.io/c/en/{word}').json()
            for e in obj['edges']:

                #skip non-english words
                if 'language' in e['end'] and e['end']['language'] != 'en':
                    continue

                if e['rel']['label'] in self.noun_rel:
                    noun_set.append(e['end']['label'])
        
        #clean up
        return list(set(noun_set))

    #get more features from the entity set using the relational set (capableof, usedfor)
    def getThemeFeats(self, word_set):
        feat_set = []

        # go through each word's connections
        for word in word_set:
            for rel in self.verb_rel:
                # get the related thematic words for a word (use same POS)
                obj = requests.get(f'https://api.conceptnet.io/query?node=/c/en/{word}&rel=/r/{rel}&offset=0&limit=30').json()
                for e in obj['edges']:

                    #skip non-english words
                    if 'language' in e['end'] and e['end']['language'] != 'en':
                        continue

                    feat_set.append(e['end']['label'])
        
        #clean up
        return list(set(feat_set))


    # breaks down a set of texts to their grammar groups and get their structures
    def breakDown(self, txt_set, base_verb=True):
        gram_groups = {}
        gram_struct = {}
        for txt in txt_set:
            # get the part of speech
            doc = nlp(txt)
            texts = [t.text for t in doc]
            pos = [t.pos_ for t in doc]

            #convert all verb to their base form if an option
            if base_verb:
                for i in range(len(pos)):
                    if pos[i] == "VERB":
                        texts[i] = doc[i].lemma_

            # add the grammar structure
            pos_struct = " ".join(pos)
            if pos_struct not in gram_struct:
                gram_struct[pos_struct] = 0
            gram_struct[pos_struct] += 1
            
            # add the grammar groups
            for i in range(len(texts)):
                if pos[i] not in gram_groups and len(pos[i]) > 1:
                    gram_groups[pos[i]] = []
                gram_groups[pos[i]].append(texts[i])

        #count up the structures and convert to probabilities
        tot = sum(gram_struct.values())
        for g in gram_struct:
            gram_struct[g] /= tot
            
        return gram_groups, gram_struct

    # combine the theme words and features to the grammar groups
    def addThemeWords(self):
        #add the nouns
        doc = nlp(" ".join(self.cnet_nouns))
        texts = [t.text for t in doc]
        pos = [t.pos_ for t in doc]

        for i in range(len(texts)):
            if pos[i] not in self.gram_groups:
                self.gram_groups[pos[i]] = []
            if pos[i] == "VERB":
                texts[i] = doc[i].lemma_
            self.gram_groups[pos[i]].append(texts[i])
        
        #add the feats' words
        gg, _ = self.breakDown(self.cnet_feats)
        for k in gg:
            if k in self.gram_groups:
                self.gram_groups[k] += gg[k]


    # generates a feature set from a grammar setup
    def generate(self):
        feats = []
        # select a structure probabilistically
        struct = random.choices(list(self.gram_structs.keys()), list(self.gram_structs.values()))[0]
        grammar = struct.split(" ")

        for g in grammar:
            if g in self.gram_groups:
                feats.append(random.choice(self.gram_groups[g]))
        return " ".join(feats)

            


In [78]:
prompt = "an rpg about a princess who collects swords and flowers to turn into potions and is secretly a frog"
rec_feats = GameRec.getTopGameFeats(prompt, num_games=5)
tok_prompt = utils.tokenize(prompt)

CNG = ConceptNetGenerator(tok_prompt, rec_feats)

In [79]:
print(tok_prompt)
print(CNG.gram_groups)
print(CNG.cnet_nouns)
print(CNG.cnet_feats)

['rpg', 'princess', 'collects', 'swords', 'flowers', 'turn', 'potions', 'secretly', 'frog']
{'VERB': ['defend', 'smash', 'play', 'save', 'crash', 'destroy', 'hire', 'ensure', 'upgrade', 'change', 'buy', 'craft', 'conquer', 'save', 'combine', 'provide', 'produce', 'deal', 'do', 'need', 'perform', 'show', 'resolve', 'try', 'find', 'choose', 'perform', 'finish', 'run', 'reach', 'throw', 'run', 'jump', 'find', 'discover', 'fight', 'smell', 'continue', 'grow', 'decorate', 'spre', 'turn', 'plant', 'wave', 'add', 'return', 'jump', 'smell', 'position', 'face', 'smell', 'jump', 'leap', 'grow', 'jump', 'win', 'catch', 'jump', 'wish', 'cover', 'grow', 'melt', 'rpg', 'rotate', 'turn', 'drive', 'roleplay', 'leave', 'change', 'leave', 'go', 'propel', 'stem', 'corner', 'smell', 'continue', 'grow', 'decorate', 'spre', 'turn', 'plant', 'wave', 'add', 'return', 'jump', 'smell', 'position', 'face', 'smell', 'jump', 'leap', 'grow', 'jump', 'win', 'catch', 'jump', 'wish', 'cover', 'grow', 'melt'], 'PRON': 

In [84]:
for i in range(10):
    print(CNG.generate())

print([CNG.generate() for i in range(5)])

craft to jump thorny catch
jump to jump certain prince
turn good flowers
leap with out the sun
decorate your opponent
pet
catch this into to pond
melt castles
roleplay their office
melt 's win peaceful pond
['smell to smash friendly end', 'be show on the victory', 'a princess', 'cover to your swords', 'rotate to be a hearts']


### Experiment 6: Use randomly selected features from the CapableOf and UsedFor descriptions from ConceptNet
Note: Pretty good! Maybe need to swap out some verbs or mix in with the recommended game set features. 
Use only the prompt nouns, combining with the games takes a long time - searching for adjacent neighbor entities takes even longer.
Starts to deviate with the features and too many to choose from

-> ['make boats', 'return to the pond', 'look cool', 'have roots', 'kill people']

In [28]:
class ConceptNetFeatGamifier():
    def __init__(self, train_entity_set,get_related=True,debug=False):
        self.noun_rel = ['RelatedTo','PartOf','IsA','HasA','MadeOf','Synonym']
        self.verb_rel = ['CapableOf','UsedFor']
        
        # get more entities that are related to the entity set
        if get_related:
            if debug:
                print(f"> Extracting related entities from ConceptNet...")
            cnet_nouns = self.getThemeNouns(train_entity_set)
            self.full_entity_set = train_entity_set + cnet_nouns
            print(f"> Extracted [ {len(cnet_nouns)} ] entities from ConceptNet!")
        else:
            self.full_entity_set = train_entity_set

        if debug:
            print(f"> Total [ {len(self.full_entity_set)} ] entities in the entity set!")

        # get the features based on the entity sets
        cnet_feats = self.getThemeFeats(self.full_entity_set)

        #process features (turn into base verbs)
        if debug:
            print(f"> Processing [ {len(cnet_feats)} ] features...")

        self.cnet_feats = []
        for f in cnet_feats:
            doc = nlp(f)    
            has_verb = False
            nf = []
            for t in doc:
                if t.pos_ == "VERB":
                    has_verb = True
                    nf.append(t.lemma_)
                else:
                    nf.append(t.text)
            if has_verb and len(nf) > 1:
                self.cnet_feats.append(" ".join(nf))

        if debug:
            print(f"> FINISHED!")


    
    #use concept net to get the new entities related to it
    def getThemeNouns(self, word_set):
        noun_set = []

        # go through each word's connections
        for word in tqdm(word_set, desc="Getting related nouns"):
            # get the related thematic words for a word (use same POS)
            obj = requests.get(f'http://api.conceptnet.io/c/en/{word}').json()
            for e in obj['edges']:

                #skip non-english words
                if 'language' in e['end'] and e['end']['language'] != 'en':
                    continue

                if e['rel']['label'] in self.noun_rel:
                    noun_set.append(e['end']['label'])
        
        #clean up
        return list(set(noun_set))

    #get more features from the entity set using the relational set (capableof, usedfor)
    def getThemeFeats(self, word_set):
        feat_set = []

        # go through each word's connections
        for word in tqdm(word_set, desc="Getting related features"):
            for rel in self.verb_rel:
                # get the related thematic words for a word (use same POS)
                obj = requests.get(f'https://api.conceptnet.io/query?node=/c/en/{word}&rel=/r/{rel}&offset=0&limit=30').json()
                for e in obj['edges']:

                    #skip non-english words
                    if 'language' in e['end'] and e['end']['language'] != 'en':
                        continue

                    feat_set.append(e['end']['label'])
        
        #clean up
        return list(set(feat_set))


    # generates a feature set from a grammar setup
    def generate(self):
        return random.choice(self.cnet_feats)

            


In [47]:
# get prompt entities
prompt = "an rpg about a princess who collects swords and flowers to turn into potions and is secretly a frog"
tok = utils.tokenize(prompt)
noun_toks = utils.single_noun(prompt)
print(tok_prompt)
CNFG = ConceptNetFeatGamifier(noun_toks, True, True)

# get rec game entities
# rec_entities = GameRec.getTopGameEntities(prompt, num_games=3)
# print(rec_entities)
# CNFG = ConceptNetFeatGamifier(tok_prompt+rec_entities, False, True)


['rpg', 'princess', 'sword', 'flower', 'potion', 'frog']
> Extracting related entities from ConceptNet...


Getting related nouns: 100%|██████████| 6/6 [00:00<00:00,  9.20it/s]


> Extracted [ 20 ] entities from ConceptNet!
> Total [ 26 ] entities in the entity set!


Getting related features: 100%|██████████| 26/26 [00:03<00:00,  6.65it/s]


> Processing [ 80 ] features...
> FINISHED!


In [48]:
print(CNFG.full_entity_set)
print(CNFG.cnet_feats)

['rpg', 'princess', 'sword', 'flower', 'potion', 'frog', 'a part of a plant', 'larp', 'flower', 'steel', 'roleplaying game', 'pretty', 'bloom', 'rocket propelled grenade', 'plant', 'sword', 'rpg', 'rose', 'prince', 'blossom', 'frog', 'rebound', 'petals', 'princess', 'potion', 'a flower']
['kill people', 'have roots', 'be jump across a stream', 'be hold a bee', 'look cool', 'jump into the air', 'cast a shadow', 'produce fruit or seed', 'catch a fly', 'smell nice', 'jump in the pond', 'flower when water', 'make boats', 'use a hammer', 'chop up enemies', 'wave her hand', 'play a fairy tale character', 'give nectar to hummingbirds', 'delight a person', 'build a bridge', 'jump very high', "cut off someone 's hand", 'spre out of a pond', 'feel cold', 'govern a country', 'win a princess', 'build things', 'be suffer dehydration', 'succeed a king', 'become a king', 'extinguish a fire', 'kill a person', 'return to the pond', 'wish to be a prince', 'train a king', 'grow in a garden', 'kill a live

In [49]:
for i in range(10):
    print(CNFG.generate())

print([CNFG.generate() for i in range(5)])

jump very high
train a king
attract pollinators
cut off someone 's hand
make boats
use a hammer
build a bridge
use a hammer
cast a shadow
spre out of a pond
['have roots', 'spre out of a pond', 'become a king', 'wave her hand', 'look cool']
