# Note

This is the notebook version of the recommender for testing. Implementation here will differ from the actual recommender file, Recommender.py

Make sure you update the model path if necessary in Recommender.py

In [None]:
import gensim.models.word2vec
import numpy as np

model_dir = r"PATH\TO\MODELS\MODEL_NAME\model" # ex: C:\Emote_Recommender\models\twitch_500_20e\model

model = gensim.models.word2vec.Word2Vec.load(model_dir)
embeddings = model.wv
del model

In [2]:
from .Tokenizer import TwitchTokenizer

tokenizer = TwitchTokenizer()

# Recommender

In [3]:
import pickle

# Load registered Emotes
with open('emote_dict', 'rb') as f:
    emotes = pickle.load(f)
    emotes.replace(['NaN', 'nan'], np.nan, inplace = True)

with open('bttv_dict', 'rb') as f:
    bttv_emotes = pickle.load(f)
    bttv_emotes.replace(['NaN', 'nan'], np.nan, inplace = True)

with open('ffz_dict', 'rb') as f:
    ffz_emotes = pickle.load(f).T
    ffz_emotes.replace(['NaN', 'nan'], np.nan, inplace = True)

In [4]:
base_twitch = ['GLOBAL_TWITCH']
base_twitch.extend([
    'GLOBAL_FFZ',
    'GLOBAL_7TV'
])

In [6]:
# Channel the user is currently chatting in
channel = 'HasanAbi'

# Channels the user is actively subscribed to
subs = [
    'CohhCarnage',
    'xQc',
    'pokimane'
]
twitch_subs = subs.copy()
twitch_subs.extend(base_twitch)

# Emotes native to the current channel
emotes_ch = [x for x in emotes[channel].fillna('').values.flatten() if x != '']

# Emotes the user has access to
emotes_user = [x for x in emotes[twitch_subs].fillna('').values.flatten() if x != '']

# Handle BTTV
bttv_subs = subs.copy()
bttv_subs.extend(['GLOBAL_BTTV'])        
emotes_ch.extend([x for x in bttv_emotes[channel].fillna('').values.flatten() if x != ''])
emotes_user.extend([x for x in bttv_emotes[bttv_subs].fillna('').values.flatten() if x != ''])

# Handle FFZ
ffz_subs = subs.copy()
base_twitch.extend(['GLOBAL_FFZ'])
if channel in ffz_emotes.columns: # Less channel support on FFZ than BTTV
    emotes_ch.extend([x for x in ffz_emotes[channel].fillna('').values.flatten() if x != ''])
    emotes_user.extend([x for x in ffz_emotes[ffz_subs].fillna('').values.flatten() if x != ''])

# All relevant emotes for lookup
emotes_all = emotes_ch + emotes_user

In [5]:
def recommendEmotes(msg: str) -> list:
    """
    Returns a dict of the most similar emotes for each emote found in input.

    Param: msg - chat message as a string
    Param: subs - list of channels the user is subscribed to
    Param: channel - current channel the msg is being sent in
    Return: dict with detected emotes and the 3 most similar in 'channel'
    """
    rec = dict()
    tok = tokenizer.tokenize(text=msg,emotes=emotes_all)    

    # For each word in the chat message
    for token in tok:
        if token not in emotes_all:
            # Emote from unrecognized channel (or just text)
            continue

        # Calculate similarity between the current emote and each emote in the current channel
        sim_scores = {}
        for emote in emotes_ch:
            try:
                sim_scores[emote] = embeddings.similarity(token.lower(), emote.lower())
            except Exception as e:
                # No embedding data found for an emote
                # Either embeddings are out of date or emote just isn't used enough
                continue
            
        # Sort by similarity and return top 3 emotes
        sim_scores = sorted(sim_scores.items(), key=lambda x: x[1], reverse=True)
        rec[token] = [emote for emote in sim_scores[:3]]

    # Return a dict of detected emotes as keys with recommendations as values
    return rec

In [None]:
msg = "AYAYA here's a haHAA chat message forsenPls good luck cohhLUL remember to sub <3" # Example message from user to process

rec_emotes = recommendEmotes(msg=msg)
rec_emotes # Print the detected emotes in the message and their most similar emotes

In [None]:
# Rewrite user's message with new emotes for the current channel
new_msg = msg
for emote in rec_emotes.items():
    if len(emote[1]) > 0:
        new_msg = new_msg.replace(emote[0],emote[1][0][0])

new_msg

In [14]:
# Function to find which channels an emote is registered to (can be multiple channels for emotes from 3rd party extensions)
def find_channel(emote: str):
    # Search native Twitch emotes
    col_mask = emotes.isin([emote]).any()
    cols = col_mask[col_mask == True].index.tolist()

    # Search 3rd-party emotes
    if len(cols) == 0:
        if emote in bttv_emotes.values:
            col_mask = bttv_emotes.isin([emote]).any()
            cols = col_mask[col_mask == True].index.tolist()
            print('BTTV:',cols)
        if emote in ffz_emotes.values:
            col_mask = ffz_emotes.isin([emote]).any()
            cols = col_mask[col_mask == True].index.tolist()
            print('FFZ:',cols)
    else:
        print(cols[0])

In [None]:
find_channel('POGGERS')

In [None]:
tokenizer.tokenize(':^)',[])

# Vector Reconstruction

This is not implemented in Recommender.py

In [6]:
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

def reconstruct_vec_from_3(word: str):
    e = embeddings.most_similar(word,topn=3)
    print(e)
    # Get the vectors for the 3 most similar tokens
    embed1 = embeddings[e[0][0]]
    embed2 = embeddings[e[1][0]]
    embed3 = embeddings[e[2][0]]

    # A = reconstructed vector, B = ground truth
    A = (embed1 + embed2 + embed3) / 3
    B = embeddings[word]

    # Compare reconstructed to actual vector
    cos_sim = cosine_similarity(A.reshape(1, -1), B.reshape(1, -1))
    print(cos_sim[0][0])
    return cos_sim[0][0]

In [7]:
import random

reconstruct_emotes = []
r = emotes[random.sample(list(emotes.columns),1)].sample(1).iloc[0][0]

# Randomly reconstruct 400 emotes
while len(reconstruct_emotes) < 400:
    while (r != r) or (r.lower() in reconstruct_emotes) or (r not in embeddings): # r != r is a null check
        r = emotes[random.sample(list(emotes.columns),1)].sample(1).iloc[0][0]
    r = r.lower()
    reconstruct_emotes.append(r)

In [None]:
graph_data = []
for e in reconstruct_emotes:
    try:
        graph_data.append(reconstruct_vec_from_3(e))
    except:
        print(e,'not found')

In [None]:
# Plot the reconstructed vector similarities
from matplotlib import pyplot as plt

plt.figure(figsize=(5,5))
plt.title('Reconstruction Similarity of 400 Emotes')
plt.xlim(0,1)

plt.xlabel('Similarity')
plt.ylabel('Frequency')
plt.hist(graph_data, color='Green',bins=[0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0])
plt.show()

Some sample reconstructions

In [None]:
word = 'WutFace'
reconstruct_vec_from_3(word)

In [None]:
word = 'LUL'
reconstruct_vec_from_3(word)

In [None]:
word = 'BabyRage'
reconstruct_vec_from_3(word)

In [None]:
word = 'aceofjam'
reconstruct_vec_from_3(word)