## Get the emoji package

In [1]:
import emoji

In [2]:
emoji_dictionary = {'0': '\u2764\uFE0F',
                    '1': ':baseball:',
                    '2': ':grinning_face_with_big_eyes:',
                    '3': ':disappointed_face:',
                    '4': ':fork_and_knife:',
                    '5': ':hundred_points:',
                    '6': ':fire:',
                    '7': ':face_blowing_a_kiss:',
                    '8': ':chestnut:',
                    '9': ':flexed_biceps:'}

In [3]:
for e in emoji_dictionary.values(): 
    print(emoji.emojize(e))

❤️
⚾
😃
😞
🍴
💯
🔥
😘
🌰
💪


## Processing a Custom Dataset

In [4]:
import pandas as pd
import numpy as np

In [5]:
train = pd.read_csv('train_emoji.csv', header=None)
test = pd.read_csv('test_emoji.csv', header=None)

In [6]:
train.head()

Unnamed: 0,0,1,2,3
0,never talk to me again,3,,
1,I am proud of your achievements,2,,
2,It is the worst day in my life,3,,
3,Miss you so much,0,,[0]
4,food is life,4,,


In [7]:
#Printing sentences with emojis
data = train.values
print(data.shape)

(132, 4)


In [8]:
X_train = train[0]
Y_train = train[1]

X_test = test[0]
Y_test = test[1]

In [11]:
for i in range(5):
    print(X_train[i], emoji.emojize(emoji_dictionary[str(Y_train[i])]))

never talk to me again 😞
I am proud of your achievements 😃
It is the worst day in my life 😞
Miss you so much ❤️
food is life 🍴


## Getting the Glove Vectors

In [15]:
f = open('glove.6B.50d.txt', encoding='utf-8')

In [16]:
embeddings_index = {}

for line in f:
    values = line.split()
    word = values[0]
    coefs = np.asarray(values[1:], dtype='float')
    embeddings_index[word] = coefs
    
f.close()

In [18]:
emb_dim = embeddings_index['eat'].shape[0]
print(emb_dim)

50


## Converting sentences into vectors (Embedding Layer Output)

In [19]:
def embedding_output(X):
    maxLen = 10
    embedding_out = np.zeros((X.shape[0], maxLen, emb_dim))
    
    for ix in range(X.shape[0]):
        X[ix] = X[ix].split()
        
        for ij in range(len(X[ix])):
            try:
                embedding_out[ix][ij] = embeddings_index[X[ix][ij].lower()]
            except:
                embedding_out[ix][ij] = np.zeros((50,))
    
    return embedding_out

In [20]:
embeddings_matrix_train = embedding_output(X_train)
embeddings_matrix_test = embedding_output(X_test)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [21]:
print(embeddings_matrix_train.shape)
print(embeddings_matrix_test.shape)

(132, 10, 50)
(56, 10, 50)
