In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from nltk.corpus import stopwords
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers.wrappers import Bidirectional
from keras.layers import Embedding
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from gensim.models.keyedvectors import KeyedVectors
import math
from sklearn.metrics import classification_report, confusion_matrix

Using TensorFlow backend.


In [2]:
# importing the traning data
train_data = pd.read_csv("Train.csv")
train_data.head()

Unnamed: 0,TEXT,Label
0,Vacation wasted ! #vacation2017 #photobomb #ti...,0
1,"Oh Wynwood, you’re so funny! : @user #Wynwood ...",1
2,Been friends since 7th grade. Look at us now w...,2
3,This is what it looks like when someone loves ...,3
4,RT @user this white family was invited to a Bl...,3


In [3]:
# import the testing data
test_data = pd.read_csv("Test.csv")
test_data.head()

Unnamed: 0,TEXT,Label
0,A little throwback with my favourite person @ ...,0
1,glam on @user yesterday for #kcon makeup using...,7
2,Democracy Plaza in the wake of a stunning outc...,11
3,Then &amp; Now. VILO @ Walt Disney Magic Kingdom,0
4,Who never... @ A Galaxy Far Far Away,2


In [4]:
# import the mappings file
mappings = pd.read_csv("Mapping.csv")
mappings.head()

Unnamed: 0.1,Unnamed: 0,emoticons,number
0,0,😜,0
1,1,📸,1
2,2,😍,2
3,3,😂,3
4,4,😉,4


In [5]:
train_data.shape, test_data.shape, mappings.shape

((70000, 2), (50006, 2), (20, 3))

In [6]:
train_length = train_data.shape[0]
test_length = test_data.shape[0]
train_length, test_length

(70000, 50006)

In [7]:
stop_words = stopwords.words("english")
stop_words

['i',
 'me',
 'my',
 'myself',
 'we',
 'our',
 'ours',
 'ourselves',
 'you',
 "you're",
 "you've",
 "you'll",
 "you'd",
 'your',
 'yours',
 'yourself',
 'yourselves',
 'he',
 'him',
 'his',
 'himself',
 'she',
 "she's",
 'her',
 'hers',
 'herself',
 'it',
 "it's",
 'its',
 'itself',
 'they',
 'them',
 'their',
 'theirs',
 'themselves',
 'what',
 'which',
 'who',
 'whom',
 'this',
 'that',
 "that'll",
 'these',
 'those',
 'am',
 'is',
 'are',
 'was',
 'were',
 'be',
 'been',
 'being',
 'have',
 'has',
 'had',
 'having',
 'do',
 'does',
 'did',
 'doing',
 'a',
 'an',
 'the',
 'and',
 'but',
 'if',
 'or',
 'because',
 'as',
 'until',
 'while',
 'of',
 'at',
 'by',
 'for',
 'with',
 'about',
 'against',
 'between',
 'into',
 'through',
 'during',
 'before',
 'after',
 'above',
 'below',
 'to',
 'from',
 'up',
 'down',
 'in',
 'out',
 'on',
 'off',
 'over',
 'under',
 'again',
 'further',
 'then',
 'once',
 'here',
 'there',
 'when',
 'where',
 'why',
 'how',
 'all',
 'any',
 'both',
 'each

In [8]:
# tokenize the sentences
def tokenize(tweets):
    stop_words = stopwords.words("english")
    tokenized_tweets = []
    for tweet in tweets:
        # split all words in the tweet
        words = tweet.split(" ")
        tokenized_string = ""
        for word in words:
            # remove @handles -> useless -> no information
            if word[0] != '@' and word not in stop_words:
                # if a hashtag, remove # -> adds no new information
                if word[0] == "#":
                    word = word[1:]
                tokenized_string += word + " "
        tokenized_tweets.append(tokenized_string)
    return tokenized_tweets

In [11]:
tokenize(['This is a good day. @css #coding'])

['This good day. coding ']

In [12]:
# translate tweets to a sequence of numbers
def encod_tweets(tweets):
    tokenizer = Tokenizer(filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n', split=" ", lower=True)
    tokenizer.fit_on_texts(tweets)
    return tokenizer, tokenizer.texts_to_sequences(tweets)

In [15]:
a= tokenize(['This is a good day. @css #coding'])
a, encod_tweets(a)

(['This good day. coding '],
 (<keras_preprocessing.text.Tokenizer at 0x1394b7d0e80>, [[1, 2, 3, 4]]))

In [176]:
# apply padding to dataset and convert labels to bitmaps
def format_data(encoded_tweets, max_length, labels):
    x = pad_sequences(encoded_tweets, maxlen= max_length, padding='post')
    y = []
    for emoji in labels:
        bit_vec = np.zeros(20)
        bit_vec[emoji] = 1
        y.append(bit_vec)
    y = np.asarray(y)
    return x, y

In [177]:
# create weight matrix from pre trained embeddings
def create_weight_matrix(vocab, raw_embeddings):
    vocab_size = len(vocab) + 1
    weight_matrix = np.zeros((vocab_size, 300))
    for word, idx in vocab.items():
        if word in raw_embeddings:
            weight_matrix[idx] = raw_embeddings[word]
    return weight_matrix

In [195]:
# final model
def final_model(weight_matrix, vocab_size, max_length, x, y):
    embedding_layer = Embedding(vocab_size, 300, weights=[weight_matrix], input_length=max_length, trainable=True, mask_zero=True)
    model = Sequential()
    model.add(embedding_layer)
    model.add(Bidirectional(LSTM(128, dropout=0.2, return_sequences=True)))
    model.add(Bidirectional(LSTM(128, dropout=0.2)))
    model.add(Dense(20, activation='softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    model.fit(x, y, epochs = 1, validation_split = 0.25)
    score, acc = model.evaluate(x_test, y_test)
    return model, score, acc

In [184]:
tokenized_tweets = tokenize(train_data['TEXT'])
tokenized_tweets += tokenize(test_data['TEXT'])
max_length = math.ceil(sum([len(s.split(" ")) for s in tokenized_tweets])/len(tokenized_tweets))
tokenizer, encoded_tweets = encod_tweets(tokenized_tweets)
max_length, len(tokenized_tweets)

(10, 120000)

In [188]:
x, y = format_data(encoded_tweets[:train_length], max_length, train_data['Label'])
len(x), len(y)

(70000, 70000)

In [190]:
x_test, y_test = format_data(encoded_tweets[train_length:], max_length, test_data['Label'])
len(x_test), len(y_test)

(50000, 50000)

In [192]:
vocab = tokenizer.word_index
vocab, len(vocab)

({'i': 1,
  'love': 2,
  '️': 3,
  'the': 4,
  '…': 5,
  'new': 6,
  'amp': 7,
  'happy': 8,
  'day': 9,
  'my': 10,
  'night': 11,
  'one': 12,
  'beach': 13,
  'york': 14,
  'time': 15,
  'today': 16,
  'this': 17,
  'good': 18,
  'park': 19,
  'best': 20,
  "i'm": 21,
  'you': 22,
  'christmas': 23,
  'like': 24,
  'birthday': 25,
  'get': 26,
  'california': 27,
  'we': 28,
  'city': 29,
  'got': 30,
  'last': 31,
  'university': 32,
  'a': 33,
  'beautiful': 34,
  'little': 35,
  'great': 36,
  'see': 37,
  'thanks': 38,
  'back': 39,
  'family': 40,
  'so': 41,
  'thank': 42,
  'life': 43,
  'center': 44,
  'much': 45,
  'fun': 46,
  'favorite': 47,
  'me': 48,
  'it': 49,
  'first': 50,
  'when': 51,
  'home': 52,
  'always': 53,
  'go': 54,
  'morning': 55,
  'tonight': 56,
  "it's": 57,
  'us': 58,
  'school': 59,
  'amazing': 60,
  'friends': 61,
  'girl': 62,
  'florida': 63,
  'texas': 64,
  'weekend': 65,
  'lake': 66,
  'year': 67,
  'state': 68,
  "can't": 69,
  'know': 

In [71]:
raw_embeddings = KeyedVectors.load_word2vec_format('model_swm_300-6-10-low.w2v', binary=False)
raw_embeddings

<gensim.models.keyedvectors.Word2VecKeyedVectors at 0x1f043d0a898>

In [193]:
weight_matrix = create_weight_matrix(vocab, raw_embeddings)
len(weight_matrix)

97463

In [196]:
model, score, acc = final_model(weight_matrix, len(vocab)+1, max_length, x, y)
model, score, acc

Train on 52500 samples, validate on 17500 samples
Epoch 1/1


(<keras.engine.sequential.Sequential at 0x1f019314390>,
 3.7563875875854493,
 0.0687)

In [197]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_10 (Embedding)     (None, 10, 300)           29238900  
_________________________________________________________________
bidirectional_19 (Bidirectio (None, 10, 256)           439296    
_________________________________________________________________
bidirectional_20 (Bidirectio (None, 256)               394240    
_________________________________________________________________
dense_10 (Dense)             (None, 20)                5140      
Total params: 30,077,576
Trainable params: 30,077,576
Non-trainable params: 0
_________________________________________________________________


In [203]:
y_pred = model.predict(x_test)
y_pred

array([[0.00569975, 0.01949204, 0.25266057, ..., 0.03280429, 0.05922674,
        0.005699  ],
       [0.01819765, 0.08561526, 0.16418862, ..., 0.02105953, 0.03014265,
        0.00755177],
       [0.00180057, 0.00821676, 0.01861308, ..., 0.00745926, 0.00281304,
        0.00339441],
       ...,
       [0.00849353, 0.04380578, 0.12488709, ..., 0.02229321, 0.02466554,
        0.02386905],
       [0.00639585, 0.00540844, 0.23108616, ..., 0.01010101, 0.07053875,
        0.00586585],
       [0.01582223, 0.02276355, 0.17639498, ..., 0.05261671, 0.02992874,
        0.01783151]], dtype=float32)

In [205]:
for pred in y_pred:
    print(np.argmax(pred))

2
13
11
13
3
2
9
9
9
9
9
2
2
9
9
2
3
9
12
11
9
2
7
7
3
3
3
3
9
2
9
11
2
9
11
5
9
2
3
2
7
2
15
9
9
2
3
2
3
5
9
3
9
2
3
3
13
2
3
5
12
3
9
2
9
11
15
2
3
2
9
11
11
2
2
9
7
15
9
2
5
2
2
11
2
3
2
9
9
3
3
9
9
3
3
9
9
5
9
9
2
16
5
2
2
3
2
9
5
9
13
5
3
2
7
2
3
9
7
11
3
11
11
3
3
9
2
13
9
5
3
2
9
9
2
13
9
3
2
9
9
5
3
2
9
3
11
11
3
3
9
9
2
5
9
7
2
3
13
9
9
9
9
2
9
13
2
3
3
11
16
3
9
9
15
2
9
2
9
11
9
9
9
9
3
3
2
9
9
9
2
11
12
9
9
9
1
3
9
3
2
2
7
11
3
17
1
16
7
3
3
3
9
2
3
2
3
2
3
9
5
9
9
2
9
2
17
5
9
15
2
12
2
16
11
3
3
9
13
12
9
13
2
14
2
9
2
9
2
9
3
11
9
3
9
9
9
2
3
9
3
9
5
3
12
9
2
2
3
2
9
16
3
13
2
11
11
5
2
2
11
3
5
9
3
2
2
3
9
9
3
9
9
9
7
2
9
13
9
9
3
9
2
9
9
11
2
9
11
9
9
11
7
9
19
9
9
9
2
7
9
3
9
2
3
7
12
5
2
2
3
2
7
13
9
9
2
3
2
11
7
2
3
9
5
2
1
3
2
3
2
11
9
2
3
16
9
9
9
9
9
7
3
3
9
9
11
9
15
2
9
11
3
9
3
3
2
3
7
7
9
12
3
3
3
3
3
9
9
9
7
14
3
3
3
3
12
3
3
2
12
9
3
3
3
9
11
7
7
17
9
2
2
9
3
2
9
3
9
5
9
19
12
9
9
11
11
3
3
3
3
11
2
5
3
2
15
3
2
2
11
3
9
3
13
1
12
2
16
2
3
3
9
7
9
13
9
2
3


12
9
16
9
9
3
9
2
3
3
12
2
3
2
19
7
2
15
9
3
2
3
5
12
11
3
9
12
9
9
9
19
7
9
2
3
2
3
3
2
9
3
9
2
3
3
9
2
11
2
9
2
2
15
3
5
2
9
9
3
9
3
9
9
15
9
3
3
3
9
9
3
11
9
7
13
3
3
9
1
19
7
3
7
2
9
2
9
11
3
3
9
9
5
9
12
11
3
2
9
5
9
3
2
2
2
3
9
1
7
2
9
11
7
2
3
3
3
5
14
3
1
3
1
9
2
2
9
13
12
5
3
2
2
9
7
15
3
11
9
2
12
9
9
14
9
2
3
7
9
3
2
3
9
2
3
9
2
9
2
2
2
3
13
13
11
9
9
9
9
9
7
9
2
9
9
7
9
11
2
2
9
2
3
9
11
12
13
7
9
3
9
19
3
7
3
9
3
9
11
3
9
7
3
2
9
3
2
15
3
9
9
3
5
11
13
9
11
2
9
3
9
3
9
9
3
5
9
9
9
9
2
12
9
9
2
9
3
2
2
7
2
5
3
3
9
16
2
9
3
12
13
7
1
9
1
9
3
13
9
9
9
2
13
15
11
9
11
5
3
12
3
9
9
2
9
2
1
3
9
12
3
2
2
2
9
11
9
2
9
3
2
9
9
2
11
13
9
3
3
9
3
11
3
2
12
9
15
3
9
11
13
1
11
2
3
9
11
9
2
17
3
9
16
9
11
2
3
2
3
9
9
3
3
2
9
9
2
3
3
12
3
9
9
5
2
6
9
3
2
9
9
16
17
3
9
11
11
17
3
11
3
9
9
3
3
3
9
3
12
14
9
11
1
2
5
9
9
2
12
16
2
1
1
3
2
7
2
12
9
5
3
9
3
2
9
11
13
3
2
9
3
1
9
11
2
9
2
9
3
9
3
9
9
2
9
9
2
2
9
9
9
3
9
9
9
2
9
2
9
9
13
9
9
11
3
5
3
2
9
2
3
9
13
9
2
11
2
9
3
16
9
9
13
3
2
3
1

9
3
12
9
11
9
2
9
1
3
14
9
9
14
3
14
9
9
7
2
3
9
3
7
2
9
12
3
3
3
3
3
15
9
3
3
11
9
2
3
11
1
2
15
3
2
3
9
3
3
2
9
9
5
7
13
2
7
3
5
9
9
3
11
5
3
14
3
13
12
2
9
2
9
12
3
9
9
5
3
5
3
3
2
2
3
9
2
9
12
9
3
9
5
3
13
3
9
2
9
2
7
9
9
11
3
9
7
2
1
11
9
9
7
13
9
5
11
5
2
9
2
5
2
9
9
13
7
3
9
2
17
9
9
2
3
9
3
9
16
9
3
13
7
9
9
3
9
2
3
5
5
2
9
9
9
2
2
13
9
9
2
9
9
11
5
3
9
2
11
9
7
3
9
3
11
9
12
3
2
9
3
9
9
9
3
9
9
2
13
3
12
11
3
11
7
3
9
12
9
12
5
9
2
5
1
9
2
16
7
5
3
3
2
3
12
9
9
15
2
12
5
16
9
9
9
9
3
5
9
9
3
12
9
9
2
3
9
2
3
5
9
9
3
2
2
5
2
3
2
3
3
2
9
9
3
9
5
9
3
5
2
9
3
9
11
7
9
3
9
15
5
7
5
2
9
2
3
3
1
9
9
9
11
9
3
7
3
7
12
14
16
9
3
9
9
9
9
9
9
9
9
7
7
9
9
2
12
2
11
3
9
9
3
9
9
1
2
11
13
9
7
3
2
2
3
3
9
3
2
5
3
3
9
3
11
1
2
2
2
12
5
2
2
2
2
9
16
2
9
9
9
3
12
3
2
1
3
11
16
3
2
7
9
2
16
2
9
2
3
2
3
9
9
9
3
3
2
9
9
12
3
19
9
9
14
3
3
12
2
9
13
11
3
9
3
9
3
9
3
1
7
7
5
9
3
7
9
3
5
16
9
9
5
9
9
5
9
3
9
16
2
9
9
9
3
3
3
2
3
3
3
7
9
9
3
7
9
7
3
9
2
3
3
5
2
9
11
3
9
9
13
2
3
9
11
16
15
9
11
9
3
9


11
9
9
9
5
3
3
9
7
2
3
9
9
3
15
2
9
3
2
5
12
9
2
9
12
7
14
9
1
3
9
13
3
9
2
5
2
9
11
13
2
9
11
2
7
3
9
3
3
1
9
11
2
1
3
7
9
2
14
9
9
15
7
5
9
9
2
19
9
9
3
9
3
3
2
3
3
6
3
3
5
3
9
16
2
9
5
2
16
9
3
2
2
13
2
3
9
6
5
2
2
2
1
9
2
2
9
9
2
3
11
9
2
2
3
3
9
2
3
16
9
11
2
9
3
2
9
3
3
7
9
3
3
3
9
3
9
7
2
2
9
2
13
9
3
9
2
15
7
13
9
9
9
2
2
3
12
2
15
2
11
9
5
3
9
9
5
5
3
9
3
16
12
9
5
9
9
1
5
9
9
3
2
2
3
3
2
3
9
3
2
2
3
3
3
5
12
9
3
5
3
3
9
3
9
2
2
11
2
2
9
9
2
11
11
9
2
5
3
16
2
9
9
12
2
9
9
9
2
2
9
2
3
11
2
3
2
2
7
3
9
2
2
9
9
9
9
2
2
9
16
9
9
2
3
3
9
13
7
9
3
9
12
9
2
15
2
14
11
9
7
7
9
9
3
12
9
13
2
3
9
14
3
11
3
7
15
11
3
14
5
9
9
11
2
11
2
2
9
9
2
9
9
3
3
5
9
13
1
2
3
9
5
2
9
7
9
3
9
9
2
2
3
19
2
3
7
9
9
11
3
9
2
5
3
3
9
2
9
12
3
9
9
7
1
3
9
3
9
9
3
9
2
11
7
9
3
9
9
7
2
9
2
9
13
3
9
2
3
9
3
3
7
11
3
2
9
2
9
3
9
9
16
14
13
2
9
3
9
3
12
3
9
3
9
11
16
11
2
3
2
7
3
9
12
2
2
9
7
3
2
2
14
1
9
11
9
11
3
15
9
9
13
2
9
15
9
3
9
9
3
5
2
9
9
2
5
11
9
2
5
13
3
7
3
1
2
2
9
3
2
9
2
14
7
9
3
1
3
11
2
9
3


3
7
11
1
2
2
3
2
3
9
3
9
2
17
8
17
3
13
5
11
2
3
9
5
12
9
1
1
5
9
2
9
9
9
3
3
9
9
12
9
2
9
14
9
3
12
9
3
15
7
3
9
7
13
9
11
11
2
12
2
1
2
9
11
3
3
9
2
17
9
3
9
2
9
3
9
9
3
5
15
12
13
2
5
2
9
16
13
3
9
2
9
3
5
2
3
9
3
2
9
2
9
1
16
9
2
9
11
2
13
7
11
1
2
3
2
11
16
2
9
2
3
3
3
2
9
3
9
11
9
3
5
9
12
11
9
3
9
9
3
9
9
3
2
9
3
9
9
2
3
9
16
3
7
9
5
1
9
3
3
3
9
1
9
9
9
2
2
3
2
9
3
3
1
11
9
9
11
3
9
9
2
3
9
9
9
3
9
2
14
5
11
12
3
3
3
3
3
7
19
12
15
3
3
3
19
11
2
9
3
11
3
9
16
2
9
3
11
3
5
2
15
9
2
3
2
11
3
12
7
9
17
2
3
3
9
5
9
3
15
11
9
9
12
17
9
3
5
9
3
3
3
9
19
1
9
3
12
9
9
5
3
9
3
9
3
14
2
9
9
15
9
2
9
3
17
9
3
3
9
9
9
13
2
3
9
5
3
2
9
3
13
7
3
3
9
3
12
2
3
9
5
12
9
9
11
3
9
13
9
2
3
9
3
9
3
2
9
2
2
3
7
9
9
9
3
2
9
9
3
1
2
7
12
9
9
9
2
9
2
11
9
3
5
3
7
3
9
3
3
12
3
9
11
3
9
3
9
9
2
9
13
9
3
16
2
3
3
2
9
9
9
13
1
3
9
9
9
16
7
2
11
0
1
12
2
9
17
3
15
7
9
9
3
2
13
9
3
9
2
2
9
9
11
7
9
3
9
3
11
3
9
9
7
1
2
3
3
9
11
2
3
3
9
9
3
11
9
3
11
3
9
3
16
2
1
2
3
2
3
9
3
9
9
9
3
2
11
2
9
3
7
3
9
3
9
9
3
9

17
11
2
9
3
5
9
3
5
3
11
2
3
9
9
3
3
6
2
3
9
2
9
2
3
9
3
3
3
3
2
17
2
1
11
3
2
2
9
12
9
9
3
2
3
2
12
9
2
9
2
5
9
9
9
3
9
9
9
9
9
1
2
3
9
3
9
2
2
2
3
1
3
9
9
9
2
3
2
3
9
9
12
2
11
3
2
3
9
11
11
9
8
9
2
9
9
3
3
12
2
9
2
9
3
7
7
13
1
9
2
3
3
9
2
9
2
2
7
1
8
3
3
9
2
3
2
9
19
2
9
7
9
3
5
2
1
13
2
3
1
9
9
3
2
2
2
2
3
9
9
3
7
2
9
2
11
3
2
2
9
3
3
9
2
9
9
3
3
9
2
9
12
7
13
9
3
15
9
9
9
9
3
9
9
2
9
9
1
11
9
3
3
9
13
3
3
16
9
9
9
19
9
9
9
2
3
3
2
9
3
11
3
12
2
2
13
19
9
2
3
9
9
2
9
9
9
11
9
3
12
16
7
9
9
3
3
2
3
9
7
2
3
3
11
9
13
3
3
7
2
9
3
11
2
9
3
14
3
5
1
3
11
9
13
7
2
3
2
3
7
9
3
3
13
15
9
2
11
2
3
3
2
7
3
2
2
2
2
3
3
1
2
9
9
2
2
3
9
3
15
3
9
3
2
2
9
9
3
9
19
3
3
5
2
7
9
3
9
11
11
3
11
11
3
14
16
9
9
7
9
17
9
6
7
11
9
9
9
1
9
11
2
9
7
9
13
9
3
9
3
3
2
12
9
9
9
2
9
11
9
2
3
2
2
3
9
3
9
5
9
2
3
9
2
3
11
9
7
15
7
3
2
2
3
9
2
9
9
11
9
13
2
9
9
5
3
3
11
2
9
12
3
15
9
3
11
9
9
3
11
2
9
11
3
2
7
1
3
7
3
13
1
16
2
1
2
9
9
1
9
3
9
7
16
12
3
1
9
3
3
2
11
3
9
9
9
9
2
7
9
2
9
3
11
9
9
9
5
5
1
3
3
11
2


3
2
12
2
2
3
3
3
9
9
1
2
2
7
5
9
9
7
3
11
3
9
11
2
5
2
2
15
9
9
3
7
9
3
3
9
12
1
12
9
9
3
3
9
7
13
9
3
11
11
2
9
2
3
16
9
3
9
11
3
3
2
9
16
3
3
3
2
7
9
2
16
15
7
3
9
11
9
2
9
9
1
3
2
3
2
9
3
9
2
9
9
3
2
9
9
9
9
9
3
9
7
9
9
3
3
2
3
2
5
2
2
9
9
3
9
2
9
9
9
15
9
11
3
2
2
11
2
11
2
2
3
9
9
5
1
9
9
2
15
7
9
9
11
3
17
9
11
3
9
3
3
9
12
7
9
9
16
2
9
3
2
3
3
9
3
3
9
9
9
7
7
9
2
9
3
9
7
2
11
9
12
11
2
5
3
9
1
9
3
3
2
3
12
9
9
16
9
1
2
11
1
9
7
3
9
2
9
3
11
7
7
9
7
11
11
3
2
3
9
7
9
9
2
19
7
3
9
16
11
3
5
5
7
2
11
9
12
7
3
9
2
2
9
2
9
9
12
9
3
3
11
19
11
16
9
5
9
2
9
7
7
3
9
11
3
16
11
3
15
3
11
3
2
2
9
7
9
9
9
11
2
3
2
6
9
9
9
2
7
9
9
3
7
2
9
9
3
11
11
2
1
9
16
9
2
17
3
9
1
3
3
2
3
9
9
9
12
2
11
9
3
3
3
13
1
9
13
9
9
19
9
7
5
2
2
9
9
2
9
15
3
15
3
2
3
9
2
2
9
9
9
3
9
3
2
9
2
11
1
9
2
2
9
3
7
9
3
11
2
3
3
2
3
9
2
14
3
9
3
3
3
3
3
9
3
9
5
3
5
16
16
13
9
2
11
3
11
2
13
3
12
3
1
2
9
9
9
7
2
9
9
2
2
3
6
9
12
7
2
1
7
3
2
9
3
3
3
9
3
13
9
9
3
9
3
1
2
2
9
2
3
3
2
9
9
3
14
9
2
9
12
1
7
2
3
9
9
3
3
16
3


9
9
3
3
3
9
9
9
3
9
2
3
9
11
2
3
9
2
3
11
3
3
9
12
3
3
9
7
11
12
11
7
3
16
11
2
2
5
3
12
12
9
3
2
9
9
9
15
9
2
2
2
3
2
2
9
3
11
11
9
2
16
11
2
11
3
2
5
9
1
16
11
13
2
12
2
2
2
3
3
9
2
3
2
9
3
9
3
16
9
9
9
2
5
2
1
2
9
2
3
9
12
2
5
2
9
2
9
5
2
2
9
3
13
9
2
7
9
2
9
13
2
3
3
2
9
7
2
7
9
14
3
5
9
14
11
2
3
9
9
9
11
1
9
12
3
2
3
9
9
3
9
11
12
2
5
3
2
3
5
3
3
9
1
2
9
3
11
9
2
9
9
12
9
13
7
2
11
5
1
9
7
3
5
2
19
2
9
2
3
15
2
9
3
3
9
7
7
3
2
2
7
9
7
2
3
17
2
11
9
12
2
7
11
9
9
12
2
7
2
2
9
9
2
2
2
13
9
9
3
3
12
9
17
3
9
9
3
9
2
3
7
9
2
7
3
3
5
2
3
9
3
3
9
3
2
11
12
3
19
9
11
3
3
9
3
3
3
9
11
5
3
2
15
2
11
3
9
5
2
3
3
3
3
3
3
3
2
3
5
3
3
12
11
9
9
3
13
2
14
2
11
9
3
3
2
1
16
3
14
5
2
2
9
3
3
2
5
9
2
9
15
1
9
3
9
2
3
9
3
3
9
9
9
7
2
3
3
3
9
9
3
2
3
7
2
3
3
7
9
9
9
2
16
3
9
11
7
7
11
2
3
9
9
2
3
9
2
9
7
9
14
9
7
9
3
9
9
2
3
3
12
15
9
3
9
3
3
3
14
9
2
2
7
9
9
11
3
7
2
3
7
2
13
7
2
9
2
2
3
3
15
9
2
9
3
9
9
3
3
3
9
9
9
9
9
9
5
9
9
12
14
3
9
3
9
9
9
9
3
9
3
3
2
9
9
14
9
13
17
2
2
12
2
3
9
3
9
3
15
2
1

3
2
9
3
11
7
9
9
9
2
9
19
9
2
3
2
2
3
2
8
13
11
13
9
9
3
9
9
2
16
9
3
11
3
11
2
5
9
3
19
2
9
3
17
9
2
11
7
5
3
7
11
13
3
3
3
7
12
9
9
3
3
11
9
2
2
9
9
9
3
7
2
1
2
16
5
13
9
13
3
5
9
15
3
12
3
3
3
3
3
19
9
3
11
3
7
9
9
16
2
9
9
9
9
12
2
9
12
9
9
7
3
7
9
9
5
7
2
9
11
2
2
2
9
2
2
9
9
2
3
3
3
2
2
11
13
2
3
9
9
9
2
7
12
9
13
15
1
12
9
9
11
1
2
15
11
2
9
9
2
2
2
9
9
3
3
9
9
3
9
9
9
2
3
11
9
9
7
2
13
2
3
1
9
9
9
2
5
2
2
13
3
12
3
9
3
16
11
3
9
11
2
7
11
3
2
2
9
7
2
12
11
1
2
9
9
9
9
5
9
9
9
16
9
11
11
3
7
9
3
1
9
3
9
3
9
3
2
9
3
12
9
3
9
9
9
9
9
3
2
2
3
11
2
9
2
9
3
3
9
15
2
7
11
9
3
9
9
3
3
3
14
11
11
9
1
9
9
5
9
2
2
11
17
3
9
3
2
3
9
3
9
9
3
2
2
2
13
9
3
9
16
2
9
2
2
11
2
2
9
7
3
9
9
2
2
9
5
9
2
2
3
9
9
5
9
2
9
9
9
9
2
9
11
3
3
2
13
11
9
12
14
13
9
2
2
2
2
15
9
2
7
2
7
9
2
3
9
7
9
2
2
2
12
9
9
2
3
3
2
9
1
7
3
7
9
9
16
9
16
9
3
13
2
9
9
11
9
3
7
9
9
16
9
3
7
13
2
7
2
9
11
9
2
3
3
13
2
9
9
3
13
11
3
3
9
17
3
2
3
9
2
9
9
3
9
3
2
9
3
6
9
3
9
2
2
12
9
13
5
16
2
11
9
9
9
17
3
11
19
9
9
2
9
3
3
2


2
9
9
9
2
9
2
9
11
2
15
3
9
2
2
7
9
9
9
9
11
9
13
3
9
2
11
9
2
13
9
9
9
9
2
9
2
9
9
3
9
9
9
9
9
15
12
9
19
3
9
9
12
2
9
9
5
12
1
3
9
5
9
3
9
3
3
9
9
9
3
2
11
9
1
9
11
9
3
2
11
2
11
3
3
9
9
11
11
9
3
9
3
9
2
2
3
9
5
9
11
9
9
9
9
11
9
2
12
3
3
9
5
5
3
13
9
3
7
9
2
13
9
11
3
2
9
3
2
2
3
7
2
15
3
3
9
2
3
9
11
9
9
2
2
11
5
11
9
9
9
3
3
3
11
2
2
11
13
2
16
3
9
12
3
9
9
3
5
3
3
7
9
9
5
12
16
2
2
9
17
11
2
2
3
9
3
9
9
3
2
3
13
3
9
2
2
9
2
1
2
3
2
2
17
7
2
3
3
2
1
9
2
2
9
12
9
9
9
9
9
9
3
3
5
9
2
7
2
11
3
2
2
15
2
9
3
1
3
9
2
3
9
2
3
9
11
9
11
9
2
2
3
2
2
9
9
3
2
3
1
9
9
9
2
9
9
9
3
9
9
3
15
11
3
2
9
9
9
7
17
9
13
7
2
15
5
2
3
19
14
9
3
9
3
3
7
3
9
3
9
9
9
9
2
2
3
12
3
16
2
9
3
9
11
9
3
7
5
9
2
15
12
3
3
2
3
2
11
2
11
3
3
9
9
11
7
3
3
3
9
5
7
9
2
17
3
9
3
2
3
2
2
9
7
1
9
9
2
2
9
3
3
9
2
9
3
5
17
9
3
13
3
12
5
3
5
12
2
11
3
3
9
9
3
3
9
2
2
3
5
2
9
3
15
9
5
2
9
3
12
16
9
3
3
9
3
3
3
1
2
3
3
12
9
16
12
2
11
16
9
3
3
3
3
1
3
2
7
12
19
9
2
9
14
13
9
3
3
11
9
9
9
2
9
9
9
3
3
12
9
3
9
13
9
9
3
9
5
9
1

7
13
9
3
15
9
9
15
5
16
9
9
9
2
11
15
9
2
9
2
2
2
15
2
3
3
2
2
11
2
9
9
11
2
2
13
11
2
7
2
9
9
9
16
11
3
9
9
9
3
2
2
5
13
1
11
2
3
5
9
9
3
13
2
11
2
3
3
13
2
3
5
2
2
9
9
5
9
9
9
3
12
2
14
2
5
9
2
2
9
9
3
3
16
3
2
9
9
2
7
7
7
2
9
3
7
9
16
2
7
2
3
12
2
11
7
9
9
9
3
15
9
3
9
1
11
2
2
5
12
9
1
9
3
9
5
9
9
9
9
2
12
13
9
9
2
3
12
2
3
16
2
9
12
9
3
9
9
9
7
2
3
7
12
2
5
9
13
1
3
3
9
9
5
2
3
3
5
3
2
12
9
16
9
3
9
17
3
2
1
2
1
2
9
3
1
1
2
13
2
13
16
9
2
3
5
16
9
2
9
2
3
2
9
9
3
7
9
15
3
12
9
9
11
1
11
2
9
3
3
9
9
9
2
9
12
2
9
9
9
3
9
2
7
13
3
3
9
11
11
5
2
12
2
9
11
9
3
11
12
9
9
9
9
2
3
9
9
9
9
2
9
9
9
9
9
9
9
9
12
2
9
9
3
9
3
7
3
2
11
9
9
2
2
12
7
2
16
9
2
9
9
2
12
11
9
2
9
9
11
9
1
7
16
9
12
2
9
9
9
1
9
9
3
9
2
15
11
3
7
11
3
3
2
11
9
9
9
7
1
3
7
11
7
3
9
9
13
7
5
11
9
3
5
2
3
11
9
2
17
9
1
11
5
2
9
2
16
9
7
3
1
7
7
9
2
3
9
3
13
9
3
2
17
9
2
9
19
9
5
2
3
3
3
11
13
9
2
2
9
9
11
7
2
2
14
2
3
9
3
2
3
13
11
5
3
3
14
14
2
3
3
11
3
16
9
9
3
3
2
1
5
12
13
3
9
9
9
17
3
9
3
3
9
5
3
9
11
15
9
9
9
2
9
2

9
9
3
12
13
9
9
2
2
13
15
9
2
11
2
3
9
3
3
9
3
12
9
2
9
2
9
2
3
3
2
2
2
7
3
9
3
3
2
9
19
14
9
9
3
2
7
3
2
2
9
3
5
9
2
9
3
12
5
2
5
3
1
11
7
3
2
9
3
3
13
19
12
9
9
3
9
9
3
9
12
9
3
9
9
9
2
17
2
9
9
3
3
2
3
9
7
3
3
9
9
3
9
9
3
9
9
15
2
9
13
9
9
9
2
1
7
13
3
9
12
9
3
2
2
9
3
11
3
15
3
9
16
9
9
7
3
9
3
3
12
7
1
9
9
3
9
2
2
1
3
9
2
3
13
9
2
13
11
2
3
15
11
13
7
2
2
11
9
11
9
9
5
1
13
17
9
15
9
7
12
2
9
3
2
2
9
9
9
3
3
9
2
3
3
14
11
2
2
2
2
3
3
9
12
7
2
9
7
11
9
2
9
5
2
9
9
5
9
3
2
3
9
9
2
9
3
12
2
2
9
11
9
3
7
9
5
2
2
9
3
9
11
13
9
19
19
3
9
11
15
9
16
11
5
3
17
2
2
5
3
2
7
2
9
12
2
11
3
15
2
2
3
3
2
9
9
3
9
2
3
3
3
7
11
12
3
3
12
1
17
3
2
11
7
16
9
2
11
9
13
9
7
3
9
3
9
2
13
9
9
13
9
2
16
2
7
2
3
3
3
5
12
2
9
2
2
12
2
1
5
12
2
7
2
9
2
3
2
3
5
3
9
9
11
1
9
9
9
2
9
9
7
3
9
2
11
9
9
2
2
2
2
1
3
3
2
2
3
2
5
3
9
9
9
9
3
9
3
3
3
2
11
9
7
3
2
7
3
3
9
9
11
9
3
3
12
3
3
3
9
11
3
15
3
9
3
9
3
13
3
13
3
9
2
2
9
11
3
12
9
9
13
3
9
12
3
3
3
7
2
11
15
5
9
9
2
2
2
3
12
2
3
3
7
3
7
12
2
3
3
1
9
3
2
3
7
13

2
12
2
2
1
3
9
11
9
2
7
7
7
2
2
3
3
7
2
2
3
9
9
2
9
3
2
3
9
3
2
3
9
3
3
2
2
16
2
11
3
3
9
7
6
14
2
3
3
9
3
7
16
9
2
1
9
13
6
7
5
9
9
14
3
9
3
2
2
9
14
9
2
3
2
2
2
3
9
16
2
2
2
9
3
9
3
9
9
11
9
9
3
3
3
7
2
2
3
1
17
9
11
2
1
9
2
9
3
3
3
9
2
3
2
9
3
2
7
9
7
9
3
7
9
3
1
9
3
2
11
5
5
2
9
11
3
7
9
2
13
3
9
3
8
9
2
3
12
12
11
9
9
9
3
9
9
13
7
9
2
13
9
16
9
3
3
9
9
3
7
7
7
7
2
16
3
11
9
3
9
3
2
2
2
12
2
3
3
9
9
3
9
2
9
9
2
12
3
3
3
2
19
3
9
3
2
9
2
3
5
2
15
11
1
2
3
9
9
3
3
3
7
9
5
3
2
5
2
12
9
9
9
9
7
9
3
9
9
5
9
3
3
3
12
7
7
9
9
7
1
16
2
9
3
3
7
3
9
9
9
3
5
5
2
9
3
9
3
9
11
13
7
2
3
9
2
9
3
2
2
12
3
9
9
3
9
3
3
2
9
1
2
9
9
9
1
3
9
9
9
7
9
9
2
9
9
9
17
16
3
9
11
13
3
1
9
9
2
9
11
11
9
13
9
9
1
9
7
2
2
9
9
5
9
8
3
2
11
2
2
9
11
11
5
2
9
7
3
9
3
9
2
2
1
2
11
3
5
5
9
12
9
7
11
11
3
13
9
7
9
3
9
9
9
12
11
2
17
2
12
2
11
2
9
3
9
9
9
9
3
9
11
9
9
12
9
16
5
3
3
2
11
3
2
2
2
9
7
2
9
2
2
3
16
2
3
11
9
1
9
12
9
9
3
9
5
3
9
9
9
5
7
11
9
3
9
9
9
9
2
9
9
3
3
3
11
9
2
9
9
11
9
11
9
2
3
3
3
7
3
3
9
3
1
9
9


In [210]:
y_pred = np.array([np.argmax(pred) for pred in y_pred])
y_true = np.array(test_data['Label'])
print(classification_report(y_true, y_pred))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00     10760
           1       0.04      0.01      0.01      5279
           2       0.04      0.08      0.06      5241
           3       0.03      0.10      0.04      2885
           4       0.00      0.00      0.00      2517
           5       0.03      0.02      0.02      2317
           6       0.04      0.00      0.00      2049
           7       0.04      0.05      0.05      1894
           8       0.04      0.00      0.00      1796
           9       0.05      0.47      0.09      1671
          10       0.00      0.00      0.00      1544
          11       0.35      0.67      0.46      1528
          12       0.44      0.44      0.44      1462
          13       0.02      0.02      0.02      1346
          14       0.02      0.00      0.01      1377
          15       0.01      0.01      0.01      1249
          16       0.04      0.03      0.03      1306
          17       0.00    

In [211]:
emoji_pred = [mappings[mappings['number'] == pred]['emoticons'] for pred in y_pred]
emoji_pred

[2    😍
 Name: emoticons, dtype: object, 13    ✨
 Name: emoticons, dtype: object, 11    🇺🇸
 Name: emoticons, dtype: object, 13    ✨
 Name: emoticons, dtype: object, 3    😂
 Name: emoticons, dtype: object, 2    😍
 Name: emoticons, dtype: object, 9    ❤
 Name: emoticons, dtype: object, 9    ❤
 Name: emoticons, dtype: object, 9    ❤
 Name: emoticons, dtype: object, 9    ❤
 Name: emoticons, dtype: object, 9    ❤
 Name: emoticons, dtype: object, 2    😍
 Name: emoticons, dtype: object, 2    😍
 Name: emoticons, dtype: object, 9    ❤
 Name: emoticons, dtype: object, 9    ❤
 Name: emoticons, dtype: object, 2    😍
 Name: emoticons, dtype: object, 3    😂
 Name: emoticons, dtype: object, 9    ❤
 Name: emoticons, dtype: object, 12    ☀
 Name: emoticons, dtype: object, 11    🇺🇸
 Name: emoticons, dtype: object, 9    ❤
 Name: emoticons, dtype: object, 2    😍
 Name: emoticons, dtype: object, 7    🔥
 Name: emoticons, dtype: object, 7    🔥
 Name: emoticons, dtype: object, 3    😂
 Name: emoticons, dtype: 

In [241]:
for i in range(100, 150):
    test_tweet = test_data['TEXT'][i]
    pred_label = y_pred[i]
    pred_emoji = emoji_pred[i]
    print('tweet: ', test_tweet)
    print('pred emoji: ', pred_label, pred_emoji)
    print('-'*50)

tweet:  New York ain't ready... #BuffaloState #HWS @ New York Strip Clubs
pred emoji:  2 2    😍
Name: emoticons, dtype: object
--------------------------------------------------
tweet:  Just livin man @ South Beach Miami
pred emoji:  16 16    😎
Name: emoticons, dtype: object
--------------------------------------------------
tweet:  It's the most wonderful time of the year -#rockefellercenter #christmas #christmastree…
pred emoji:  5 5    🎄
Name: emoticons, dtype: object
--------------------------------------------------
tweet:  Oh I think that I found myself a cheerleader @ University of Iowa Rugby…
pred emoji:  2 2    😍
Name: emoticons, dtype: object
--------------------------------------------------
tweet:  Gotta take care of the feet. #pedicure #atlanta #atl #friends #pampered #feet #feelinggood…
pred emoji:  2 2    😍
Name: emoticons, dtype: object
--------------------------------------------------
tweet:  I think I need a hat like this #cutefilter #snapchat @ Georgetown,…
pred emo