These are some sample queries that Word2Vec can do to compare the embeddings of words in various ways. Most of these examples are included in the project paper.

In [2]:
import gensim
from pprint import pprint


model_dir = r"PATH/TO/MODELS/twitch_500_20e/model"

model = gensim.models.word2vec.Word2Vec.load(model_dir)
my_embed = model.wv
del model

In [1]:
model_dir = r"PATH/TO/EmoteControlled/embedding/embedding"

model = gensim.models.word2vec.Word2Vec.load(model_dir)
ec_embed = model.wv
del model

# Odd Word Out

In [123]:
# Odd word out
word1 = 'youtube'
word2 = 'twitch'
word3 = 'instagram'

print(f"{word1}, {word2}, {word3}:")
print('Early 2018:')
pprint(ec_embed.doesnt_match([word1, word2, word3]))
print('\nLate 2022:')
pprint(my_embed.doesnt_match([word1, word2, word3]))

youtube, twitch, instagram:
Early 2018:
'instagram'

Late 2022:
'instagram'


In [120]:
# Odd word out
word1 = 'halo'
word2 = 'destiny'
word3 = 'minecraft'

print(f"{word1}, {word2}, {word3}:")
print('Early 2018:')
pprint(ec_embed.doesnt_match([word1, word2, word3]))
print('\nLate 2022:')
pprint(my_embed.doesnt_match([word1, word2, word3]))

halo, destiny, minecraft:
Early 2018:
'minecraft'

Late 2022:
'minecraft'


# Other words in same context

In [77]:
# Other words within this context
word1 = 'monday'
word2 = 'tuesday'
word3 = 'wednesday'

print(f"{word1}, {word2}, {word3}:")
print('Early 2018:')
pprint(ec_embed.most_similar_cosmul([word1, word2, word3], topn=5))
print('\nLate 2022:')
pprint(my_embed.most_similar_cosmul([word1, word2, word3], topn=5))

monday, tuesday, wednesday:
Early 2018:
[('thursday', 0.9577841758728027),
 ('saturday', 0.9165282249450684),
 ('sunday', 0.907137393951416),
 ('friday', 0.8335966467857361),
 ('tomorrow', 0.7105387449264526)]

Late 2022:
[('thursday', 0.7666406631469727),
 ('saturday', 0.7345103621482849),
 ('sunday', 0.712386965751648),
 ('friday', 0.6353601813316345),
 ('thrusday', 0.47483497858047485)]


In [86]:
# Other words within this context
word1 = 'halo'
word2 = 'battlefield'
word3 = 'cod'

print(f"{word1}, {word2}, {word3}:")
print('Early 2018:')
pprint(ec_embed.most_similar_cosmul([word1, word2, word3], topn=5))
print('\nLate 2022:')
pprint(my_embed.most_similar_cosmul([word1, word2, word3], topn=5))

halo, battlefield, cod:
Early 2018:
[('titanfall', 0.7009791135787964),
 ('battlefront', 0.7005841732025146),
 ('destiny', 0.6946454644203186),
 ('gow', 0.6763774156570435),
 ('borderlands', 0.6754363775253296)]

Late 2022:
[('mw2', 0.42584699392318726),
 ('warzone', 0.4224894046783447),
 ('wz', 0.40713760256767273),
 ('pubg', 0.40599527955055237),
 ('overwatch', 0.39819079637527466)]


In [87]:
# Other words within this context
word1 = 'hasan'
word2 = 'xqc'
word3 = 'mizkif'

print(f"{word1}, {word2}, {word3}:")
print('Early 2018:')
pprint(ec_embed.most_similar_cosmul([word1, word2, word3], topn=5))
print('\nLate 2022:')
pprint(my_embed.most_similar_cosmul([word1, word2, word3], topn=5))

hasan, xqc, mizkif:
Early 2018:
[('poki', 0.3933767080307007),
 ('tyler1', 0.37368881702423096),
 ('caden', 0.3709830641746521),
 ('alex', 0.3645510673522949),
 ('tyler', 0.3628367483615875)]

Late 2022:
[('miz', 0.4279533922672272),
 ('erobb', 0.4117581844329834),
 ('ludwig', 0.4075615704059601),
 ('adin', 0.40582212805747986),
 ('esfand', 0.4053257703781128)]


# A -> B as C -> ___

In [60]:
## Word relations
word1 = 'man'
word11 = 'woman'

word2 = 'king'

print(f"{word1} relates to {word11} as {word2} to _____")
print('Early 2018:')
pprint(ec_embed.most_similar_cosmul(positive=[word2, word11], negative=[word1],topn=5))
print('\nLate 2022:')
pprint(my_embed.most_similar_cosmul(positive=[word2, word11], negative=[word1],topn=5))

man relates to woman as king to _____
Early 2018:
[('queen', 0.9875078201293945),
 ('princess', 0.9762540459632874),
 ('prince', 0.9511711597442627),
 ('murderer', 0.9334140419960022),
 ('shepard', 0.9321715831756592)]

Late 2022:
[('queen', 0.8871447443962097),
 ('queens', 0.8037395477294922),
 ('goddess', 0.7729969024658203),
 ("king's", 0.7706203460693359),
 ('prince', 0.7612332105636597)]


In [73]:
## Word relations
word1 = ':)'
word11 = ':('

word2 = 'FeelsGoodMan'

print(f"{word1} relates to {word11} as {word2} to _____")
print('Early 2018:')
pprint(ec_embed.most_similar_cosmul(positive=[word2, word11], negative=[word1],topn=5))
print('\nLate 2022:')
pprint(my_embed.most_similar_cosmul(positive=[word2, word11], negative=[word1],topn=5))

:) relates to :( as FeelsGoodMan to _____
Early 2018:
[('FeelsBadMan', 0.9389158487319946),
 ('roofeels', 0.8735018968582153),
 ('NotLikeThis', 0.8671714663505554),
 ('#notmycloud9', 0.8551284670829773),
 ('feelsbadman', 0.8447734713554382)]

Late 2022:
[('FeelsBadMan', 0.9865471124649048),
 ('PepeHands', 0.85306316614151),
 ('sadge', 0.8340296149253845),
 ('peeposad', 0.8284163475036621),
 ('smoge', 0.8052573204040527)]


In [116]:
## Word relations
word1 = 'sekiro'
word11 = 'singleplayer'

word2 = 'halo'

print(f"{word1} relates to {word11} as {word2} to _____")
print('Early 2018:')
pprint(ec_embed.most_similar_cosmul(positive=[word2, word11], negative=[word1],topn=5))
print('\nLate 2022:')
pprint(my_embed.most_similar_cosmul(positive=[word2, word11], negative=[word1],topn=5))

sekiro relates to singleplayer as halo to _____
Early 2018:
[('multiplayer', 1.0128192901611328),
 ('frontlines', 0.993749737739563),
 ('co-op', 0.9850131273269653),
 ('mcc', 0.9829264879226685),
 ('sandbox', 0.9821550250053406)]

Late 2022:
[('multiplayer', 0.831718921661377),
 ('mulitplayer', 0.7799060940742493),
 ('cod', 0.7712476849555969),
 ('4v4', 0.7586734890937805),
 ('co-op', 0.7517169713973999)]


In [113]:
## Word relations
word1 = 'witcher'
word11 = 'geralt'

word2 = 'dbz'

print(f"{word1} relates to {word11} as {word2} to _____")
print('Early 2018:')
pprint(ec_embed.most_similar_cosmul(positive=[word2, word11], negative=[word1],topn=5))
print('\nLate 2022:')
pprint(my_embed.most_similar_cosmul(positive=[word2, word11], negative=[word1],topn=5))

witcher relates to geralt as dbz to _____
Early 2018:
[('krillin', 1.1277506351470947),
 ('gohan', 1.1147927045822144),
 ('yamcha', 1.0909305810928345),
 ('hinata', 1.0903247594833374),
 ('bakugo', 1.0854500532150269)]

Late 2022:
[('goku', 0.906746506690979),
 ('vegeta', 0.8936079144477844),
 ('frieza', 0.8789056539535522),
 ('kakashi', 0.8565698862075806),
 ('piccolo', 0.8423339128494263)]


# Word Similarity

New words

In [35]:
word = 'tiktok'
print(f'Word = {word}')
print('Early 2018:')
pprint(ec_embed.most_similar(word, topn=5))
print('\nLate 2022:')
pprint(my_embed.most_similar(word, topn=5))

Word = tiktok
Early 2018:
[('🕚', 0.6714262366294861),
 ('🕣', 0.6463980078697205),
 ('🕢', 0.6357600688934326),
 ('🕞', 0.6341347694396973),
 ('🕠', 0.6330246329307556)]

Late 2022:
[('tik', 0.7353744506835938),
 ('tok', 0.7287505865097046),
 ('twitter', 0.7192186713218689),
 ('youtube', 0.7173717021942139),
 ('facebook', 0.6878344416618347)]


In [34]:
word = 'covid'
print(f'Word = {word}')
print('Early 2018:')
pprint(ec_embed.most_similar(word, topn=5))
print('\nLate 2022:')
pprint(my_embed.most_similar(word, topn=5))

Word = covid
Early 2018:
[('shenkiz', 0.8626444935798645),
 ('spedwithmeds', 0.8553473353385925),
 ('purplecraze1', 0.8546385765075684),
 ('prophetstorm', 0.854583203792572),
 ('kuningasest', 0.8535624742507935)]

Late 2022:
[('flu', 0.6742793321609497),
 ('rona', 0.6223848462104797),
 ('pandemic', 0.5910007357597351),
 ('bronchitis', 0.5708175897598267),
 ('strep', 0.5706968307495117)]


In [62]:
word = 'sadge'
print(f'Word = {word}')
print('Early 2018:')
pprint(ec_embed.most_similar(word, topn=5))
print('\nLate 2022:')
pprint(my_embed.most_similar(word, topn=5))

Word = sadge
Early 2018:
[('gjore', 0.9402377605438232),
 ('gjorder', 0.9352293610572815),
 ('mumler', 0.9276406168937683),
 ('misforstår', 0.9257854223251343),
 ('failede', 0.9248456954956055)]

Late 2022:
[(':(', 0.798589289188385),
 ('PepeHands', 0.7551366090774536),
 ('smoge', 0.7036895155906677),
 ('peeposad', 0.6771226525306702),
 ('FeelsBadMan', 0.6268635988235474)]


In [63]:
word = 'ong'
print(f'Word = {word}')
print('Early 2018:')
pprint(ec_embed.most_similar(word, topn=5))
print('\nLate 2022:')
pprint(my_embed.most_similar(word, topn=5))

Word = ong
Early 2018:
[('tran', 0.5921935439109802),
 ('rea', 0.5333951115608215),
 ('kent', 0.5145621299743652),
 ('phill', 0.5145235061645508),
 ('maud', 0.5090656280517578)]

Late 2022:
[('fr', 0.6783689856529236),
 ('frfr', 0.6468927264213562),
 ('ngl', 0.6333709359169006),
 ('bruh', 0.5841929912567139),
 ('nahh', 0.5813834071159363)]


In [93]:
word = 'tft'
print(f'Word = {word}')
print('Early 2018:')
pprint(ec_embed.most_similar(word, topn=5))
print('\nLate 2022:')
pprint(my_embed.most_similar(word, topn=5))

Word = tft
Early 2018:
[('ftm', 0.6835049390792847),
 ('fnatic', 0.6691223382949829),
 ('valiance', 0.6687248945236206),
 ('vg', 0.6564218997955322),
 ("na'vi", 0.6558435559272766)]

Late 2022:
[('valorant', 0.6490270495414734),
 ('hearthstone', 0.6373392939567566),
 ('league', 0.624655544757843),
 ('valo', 0.6167577505111694),
 ('dbd', 0.6100278496742249)]


In [56]:
word = 'nazi'
print(f'Word = {word}')
print('Early 2018:')
try:
    pprint(ec_embed.most_similar(word, topn=5))
except:
    print('Not present in vocabulary.')
print('\nLate 2022:')
try:
    pprint(my_embed.most_similar(word, topn=5))
except:
    print('Not present in vocabulary.')

Word = nazi
Early 2018:
[('fascist', 0.6940041184425354),
 ('hitler', 0.6691939830780029),
 ('neo-nazi', 0.6678686738014221),
 ('communist', 0.652311384677887),
 ('xenophobe', 0.6463825702667236)]

Late 2022:
[('fascist', 0.6345600485801697),
 ('communist', 0.5377800464630127),
 ('liberal', 0.5354568958282471),
 ('pedo', 0.5251925587654114),
 ('pedophile', 0.5056630969047546)]


New meanings

In [31]:
word = 'bruh'
print(f'Word = {word}')
print('Early 2018:')
pprint(ec_embed.most_similar(word, topn=5))
print('\nLate 2022:')
pprint(my_embed.most_similar(word, topn=5))

Word = bruh
Early 2018:
[('lmaoo', 0.8270270824432373),
 ('lmao', 0.8264647126197815),
 ('lma\\1\\1', 0.8260526061058044),
 ('lmfao', 0.8187956213951111),
 ('dude', 0.8046270608901978)]

Late 2022:
[('bro', 0.7603374123573303),
 ('broo', 0.7248337864875793),
 ('bruhh', 0.6970949769020081),
 ('lmao', 0.6763647198677063),
 ('lmfao', 0.6665611267089844)]


In [33]:
word = 'gay'
print(f'Word = {word}')
print('Early 2018:')
pprint(ec_embed.most_similar(word, topn=5))
print('\nLate 2022:')
pprint(my_embed.most_similar(word, topn=5))

Word = gay
Early 2018:
[('bisexual', 0.7963554263114929),
 ('homosexual', 0.7890292406082153),
 ('horny', 0.7743268013000488),
 ('lesbian', 0.7575783133506775),
 ('asexual', 0.7413439154624939)]

Late 2022:
[('racist', 0.6475604772567749),
 ('homophobic', 0.6261984705924988),
 ('bisexual', 0.6246201992034912),
 ('lesbian', 0.6082661151885986),
 ('trans', 0.6082121133804321)]


In [40]:
word = 'mandate'
print(f'Word = {word}')
print('Early 2018:')
pprint(ec_embed.most_similar(word, topn=5))
print('\nLate 2022:')
pprint(my_embed.most_similar(word, topn=5))

Word = mandate
Early 2018:
[('magnate', 0.5362988710403442),
 ('genoa', 0.5304369330406189),
 ('invitate', 0.5278923511505127),
 ('dedichi', 0.517254114151001),
 ('annexation', 0.5139479637145996)]

Late 2022:
[('liandries', 0.39909812808036804),
 ('rylai', 0.3920638859272003),
 ('abysall', 0.3883359432220459),
 ('qss', 0.38536104559898376),
 ('rylais', 0.38165608048439026)]


Unchanged meanings

In [61]:
word = 'debate'
print(f'Word = {word}')
print('Early 2018:')
pprint(ec_embed.most_similar(word, topn=5))
print('\nLate 2022:')
pprint(my_embed.most_similar(word, topn=5))

Word = debate
Early 2018:
[('discussion', 0.7672012448310852),
 ('conversation', 0.7255319952964783),
 ('argument', 0.7147690653800964),
 ('disagreement', 0.708280622959137),
 ('convo', 0.7039156556129456)]

Late 2022:
[('discussion', 0.6500672101974487),
 ('conversation', 0.5961875915527344),
 ('argument', 0.5685128569602966),
 ('convo', 0.5614274144172668),
 ('rant', 0.49557769298553467)]


In [49]:
word = 'year'
print(f'Word = {word}')
print('Early 2018:')
pprint(ec_embed.most_similar(word, topn=5))
print('\nLate 2022:')
pprint(my_embed.most_similar(word, topn=5))

Word = year
Early 2018:
[('week', 0.8601747155189514),
 ('years', 0.8299892544746399),
 ('month', 0.8273553252220154),
 ('yr', 0.8206229209899902),
 ('day', 0.798405110836029)]

Late 2022:
[('week', 0.7725321054458618),
 ('month', 0.7188349366188049),
 ('yr', 0.7083409428596497),
 ('years', 0.695040762424469),
 ('decade', 0.6368394494056702)]


In [48]:
word = 'internet'
print(f'Word = {word}')
print('Early 2018:')
pprint(ec_embed.most_similar(word, topn=5))
print('\nLate 2022:')
pprint(my_embed.most_similar(word, topn=5))

Word = internet
Early 2018:
[('wifi', 0.8583606481552124),
 ('isp', 0.8039506077766418),
 ('computer', 0.8027212023735046),
 ('connection', 0.7891113758087158),
 ('router', 0.767777144908905)]

Late 2022:
[('wifi', 0.6884805560112),
 ('isp', 0.5936806201934814),
 ('internets', 0.572384238243103),
 ('connection', 0.5558504462242126),
 ('wi-fi', 0.554517924785614)]
