In [0]:
import pandas as pd
!pip install tensorflow==2.0.0

import tensorflow as tf
print(tf.__version__)

import nltk
import gensim
import numpy as np
from gensim import corpora, models, similarities
from keras.utils import to_categorical
from numpy import argmax
nltk.download('punkt')
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Softmax, LSTM, Embedding
#tf.enable_eager_execution()


2.0.0


Using TensorFlow backend.


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [0]:
def read_glove_vecs(glove_file):
    with open(glove_file, 'r') as f:
        words = set()
        word_to_vec_map = {}
        
        for line in f:
            line = line.strip().split()
            curr_word = line[0]
            words.add(curr_word)
            word_to_vec_map[curr_word] = np.array(line[1:], dtype=np.float64)
            
    return words, word_to_vec_map

In [0]:
def read_training_and_test_files():
  X_train = pd.read_csv('train_emoji.csv', names = ['X', 'Y'])
  X_test = pd.read_csv('test_emoji.csv', names = ['X', 'Y'])  
  return X_train, X_test
  
def read_files_and_retrieve_wordtovec_map(filename):
  X_train = pd.read_csv('train_emoji.csv', names = ['X', 'Y'])
  X_test = pd.read_csv('test_emoji.csv', names = ['X', 'Y'])
  words, word_to_vec_map = read_glove_vecs(filename)
  return X_train, X_test, word_to_vec_map

def process_training_and_test_values(X_train, X_test):
  X_train_X = X_train['X'].values.tolist()
  X_train_Y = X_train['Y'].values.tolist()

  X_test_X = X_test['X'].values.tolist()
  X_test_Y = X_test['Y'].values.tolist()

  X_train_lower = [x.lower() for x in X_train_X]
  X_test_lower = [x.lower() for x in X_test_X]
  return X_train_lower, X_test_lower, X_train_Y, X_test_Y
  
def tokenize_values(values):
  return [nltk.word_tokenize(item) for item in values]

def convert_to_nparray(values, value_type):
  return np.asarray(values).astype(value_type)



In [0]:
""" Converts sentences to average word embeddings. """
def sentence_to_avg(corpus, word_to_vec_map, vector_dim):
    avg = np.zeros((vector_dim,))
    averages = []
    for sentence in corpus:
      sum_val = 0
      for word in sentence:
        try:
          sum_val += word_to_vec_map[word]
        except:
          pass
      avg = sum_val/len(sentence)
      averages.append(avg)
    return averages

""" Converts sentences to min word embeddings """
def sentence_to_min(corpus, word_to_vec_map, vector_dim):
  minimum = []
  for sentence in corpus:
    min_val = np.ones((vector_dim,)) * np.inf
    for word in sentence:
      try:
        min_val = np.minimum(min_val, word_to_vec_map[word])
      except:
        pass
    minimum.append(min_val)
  return minimum

""" Converts sentences to max word embeddings"""
def sentence_to_max(corpus, word_to_vec_map, vector_dim):
  maximum = []
  for sentence in corpus:
    max_val = np.zeros((vector_dim,))
    for word in sentence:
      try:
        max_val = np.maximum(max_val, word_to_vec_map[word])
      except:
        pass
    maximum.append(max_val)
  return maximum

""" Converts sentences to word average embeddings"""
def combine_min_max_avg(corpus, word_to_vec_map, vector_dim):
  avg = sentence_to_avg(corpus, word_to_vec_map, vector_dim)
  minimum = sentence_to_min(corpus, word_to_vec_map, vector_dim)
  maximum = sentence_to_max(corpus, word_to_vec_map, vector_dim)    
  return list(zip(avg, minimum, maximum))
  
  

In [0]:
def Model_to_Fit(hidden_layers, classes):
  hidden_layer_size, num_classes = hidden_layers, classes
  layers = [
          tf.keras.layers.Dense(num_classes, activation='softmax')
      ]

  model = tf.keras.Sequential(layers)
  optimizer = tf.keras.optimizers.Adam(0.001)
  model.compile(optimizer= optimizer, loss='sparse_categorical_crossentropy', validation_split = 0.2,  metrics = ['accuracy'])
  return model

In [0]:
def CreateConfusionMatrixandReturnPredictions(actual_labels, predicted_labels):
  max_indices = []
  for row in predicted_labels:
    maximum_val = 0
    max_index = 0
    for index, value in enumerate(row):
      if (value > maximum_val):
        max_index =  index
        maximum_val = value
    max_indices.append(max_index)
  print(tf.math.confusion_matrix(actual_labels, max_indices, num_classes = 5))
  return max_indices

In [0]:
def ShowCorrectandIncorrectPredictions(test_label_set, pred_val):
  correctness_of_predictions = test_label_dataset == pred_val
  incorrect_values = []
  correct_values = []
  for i, w in enumerate(correctness_of_predictions):
    if(w == False):
      incorrect_values.append((test_label_dataset[i], pred_val[i], i))
    if(w == True):
      correct_values.append((test_label_dataset[i], pred_val[i], i))

  print("""----------------------------
Incorrect Predictions""")
  for i in incorrect_values:
    print(X_test_lower[i[2]], i[0], i[1])
  
  print("""-------------------------- 
Correct Predictions""")
  for i in correct_values:
    print(X_test_lower[i[2]], i[0], i[1])  

In [0]:
# Glove vector of 50 Dimensions for Twitter using Avg
X_train, X_test, word_to_vec_map = read_files_and_retrieve_wordtovec_map('glove.twitter.27B.50d.txt')
X_train_lower, X_test_lower, X_train_Y, X_test_Y = process_training_and_test_values(X_train, X_test)
tokenized_training_data = tokenize_values(X_train_lower)
tokenized_test_labels = tokenize_values(X_test_lower)

avg = sentence_to_avg(tokenized_training_data, word_to_vec_map, 50) 
test_avg = sentence_to_avg(tokenized_test_labels, word_to_vec_map, 50)


training_dataset = convert_to_nparray(avg, 'float32')
testing_dataset = convert_to_nparray(test_avg, 'float32')

training_label_dataset = convert_to_nparray(X_train_Y, 'float32')
test_label_dataset = convert_to_nparray(X_test_Y, 'float32')

model = Model_to_Fit(512, 5)
model.fit(x = training_dataset, y = training_label_dataset, epochs = 100)

predictions = model.predict(testing_dataset)
model.get_weights()
print(model.evaluate(testing_dataset, test_label_dataset))

pred_val = CreateConfusionMatrixandReturnPredictions(test_label_dataset, convert_to_nparray(predictions, 'float32'))
ShowCorrectandIncorrectPredictions(test_label_dataset, pred_val)

Train on 132 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100


In [0]:
# Glove vector of 50 Dimensions for Twitter using Min 
X_train, X_test, word_to_vec_map = read_files_and_retrieve_wordtovec_map('glove.twitter.27B.50d.txt')
X_train_lower, X_test_lower, X_train_Y, X_test_Y = process_training_and_test_values(X_train, X_test)
tokenized_training_data = tokenize_values(X_train_lower)
tokenized_test_labels = tokenize_values(X_test_lower)

minimum = sentence_to_min(tokenized_training_data, word_to_vec_map, 50) 
test_minimum = sentence_to_min(tokenized_test_labels, word_to_vec_map, 50)


training_dataset = convert_to_nparray(minimum, 'float32')
testing_dataset = convert_to_nparray(test_minimum, 'float32')

training_label_dataset = convert_to_nparray(X_train_Y, 'float32')
test_label_dataset = convert_to_nparray(X_test_Y, 'float32')

model = Model_to_Fit(512, 5)
model.fit(x = training_dataset, y = training_label_dataset, epochs = 100)

predictions = model.predict(testing_dataset)
pred_val = np.amax(convert_to_nparray(predictions, 'float32'), axis = 1)
model.get_weights()
print(model.evaluate(testing_dataset, test_label_dataset))

pred_val = CreateConfusionMatrixandReturnPredictions(test_label_dataset, convert_to_nparray(predictions, 'float32'))
ShowCorrectandIncorrectPredictions(test_label_dataset, pred_val)

Train on 132 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100


In [0]:
# Glove vector of 50 Dimensions for Twitter using Max
X_train, X_test, word_to_vec_map = read_files_and_retrieve_wordtovec_map('glove.twitter.27B.50d.txt')
X_train_lower, X_test_lower, X_train_Y, X_test_Y = process_training_and_test_values(X_train, X_test)
tokenized_training_data = tokenize_values(X_train_lower)
tokenized_test_labels = tokenize_values(X_test_lower)

maximum = sentence_to_max(tokenized_training_data, word_to_vec_map, 50) 
test_maximum = sentence_to_max(tokenized_test_labels, word_to_vec_map, 50)


training_dataset = convert_to_nparray(maximum, 'float32')
testing_dataset = convert_to_nparray(test_maximum, 'float32')

training_label_dataset = convert_to_nparray(X_train_Y, 'float32')
test_label_dataset = convert_to_nparray(X_test_Y, 'float32')

model = Model_to_Fit(512, 5)
model.fit(x = training_dataset, y = training_label_dataset, epochs = 100)

predictions = model.predict(testing_dataset)
pred_val = np.amax(convert_to_nparray(predictions, 'float32'), axis = 1)
model.get_weights()
model.evaluate(testing_dataset, test_label_dataset)

pred_val = CreateConfusionMatrixandReturnPredictions(test_label_dataset, convert_to_nparray(predictions, 'float32'))
ShowCorrectandIncorrectPredictions(test_label_dataset, pred_val)

Train on 132 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100


<tf.Tensor: id=7443, shape=(5, 5), dtype=int32, numpy=
array([[ 7,  0,  0,  0,  0],
       [ 8,  0,  0,  0,  0],
       [18,  0,  0,  0,  0],
       [16,  0,  0,  0,  0],
       [ 7,  0,  0,  0,  0]], dtype=int32)>

In [0]:
# Glove vector of 50 Dimensions for Twitter using Max-Min-Avg 
X_train, X_test, word_to_vec_map = read_files_and_retrieve_wordtovec_map('glove.twitter.27B.50d.txt')
X_train_lower, X_test_lower, X_train_Y, X_test_Y = process_training_and_test_values(X_train, X_test)
tokenized_training_data = tokenize_values(X_train_lower)
tokenized_test_labels = tokenize_values(X_test_lower)

combined_dataset = combine_min_max_avg(tokenized_training_data, word_to_vec_map, 50)
test_combined_dataset = combine_min_max_avg(tokenized_test_labels, word_to_vec_map, 50)


training_dataset = convert_to_nparray(combined_dataset, 'float32')
testing_dataset = convert_to_nparray(test_combined_dataset, 'float32')

training_label_dataset = convert_to_nparray(X_train_Y, 'float32')
test_label_dataset = convert_to_nparray(X_test_Y, 'float32')

train = []
for i in training_dataset:
  train.append(i.flatten())
  
test = []
for i in testing_dataset:
  test.append(i.flatten())
  
test_val = convert_to_nparray(test, 'float32')

train_val = convert_to_nparray(train, 'float32')
print(train_val.shape)

model = Model_to_Fit(512, 5)
model.fit(x = train_val, y = training_label_dataset, epochs = 100)

predictions = model.predict(test_val)
print(model.evaluate(test_val, test_label_dataset))

pred_val = CreateConfusionMatrixandReturnPredictions(test_label_dataset, convert_to_nparray(predictions, 'float32'))
ShowCorrectandIncorrectPredictions(test_label_dataset, pred_val)

(132, 150)
Train on 132 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Ep

In [0]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/drive


In [0]:
# Word2Vec of 300 dimensions on Google News using avg
model = gensim.models.KeyedVectors.load_word2vec_format('GoogleNews-vectors-negative300.bin', binary=True)  
X_train, X_test = read_training_and_test_files()
X_train_lower, X_test_lower, X_train_Y, X_test_Y = process_training_and_test_values(X_train, X_test)
tokenized_training_data = tokenize_values(X_train_lower)
tokenized_test_labels = tokenize_values(X_test_lower)


avg = sentence_to_avg(tokenized_training_data, model, 300) 
test_avg = sentence_to_avg(tokenized_test_labels, model, 300)

training_dataset = convert_to_nparray(avg, 'float32')
testing_dataset = convert_to_nparray(test_avg, 'float32')

training_label_dataset = convert_to_nparray(X_train_Y, 'float32')
test_label_dataset = convert_to_nparray(X_test_Y, 'float32')

model = Model_to_Fit(512, 5)
model.fit(x = training_dataset, y = training_label_dataset, epochs = 100)

predictions = model.predict(testing_dataset)
pred_val = np.amax(convert_to_nparray(predictions, 'float32'), axis = 1)
model.get_weights()
model.evaluate(testing_dataset, test_label_dataset)

#tf.math.confusion_matrix(test_label_dataset, pred_val, num_classes = 5)
pred_val = CreateConfusionMatrixandReturnPredictions(test_label_dataset, convert_to_nparray(predictions, 'float32'))
ShowCorrectandIncorrectPredictions(test_label_dataset, pred_val)

tf.Tensor(
[[ 5  0  0  2  0]
 [ 0  6  0  2  0]
 [ 0  0 16  2  0]
 [ 0  0  2 14  0]
 [ 0  0  0  2  5]], shape=(5, 5), dtype=int32)
----------------------------
Incorrect Predictions
work is hard	 3.0 2
work is horrible	 3.0 2
you brighten my day	 2.0 3
will you be my valentine	 2.0 3
he can pitch really well	 1.0 3
see you at the restaurant	 4.0 3
i will  run 1.0 3
i like your jacket 	 0.0 3
family is all i have	 0.0 3
i did not have breakfast  4.0 3
-------------------------- 
Correct Predictions
i want to eat	 4.0 4
he did not answer	 3.0 3
he got a very nice raise	 2.0 2
she got me a nice present	 2.0 2
ha ha ha it was so funny	 2.0 2
he is a good friend	 2.0 2
i am upset	 3.0 3
we had such a lovely dinner tonight	 2.0 2
where is the food	 4.0 4
stop making this joke ha ha ha	 2.0 2
where is the ball	 1.0 1
this girl is messing with me	 3.0 3
are you serious 3.0 3
let us go play baseball	 1.0 1
this stupid grader is not working 	 3.0 3
congratulation for having a baby	 2.0 2
stop pis

In [18]:
# Glove vector of 300 Dimensions on Wikipedia using Avg
X_train, X_test, word_to_vec_map = read_files_and_retrieve_wordtovec_map('glove.6B.300d.txt')
X_train_lower, X_test_lower, X_train_Y, X_test_Y = process_training_and_test_values(X_train, X_test)
tokenized_training_data = tokenize_values(X_train_lower)
tokenized_test_labels = tokenize_values(X_test_lower)

avg = sentence_to_avg(tokenized_training_data, word_to_vec_map, 300) 
test_avg = sentence_to_avg(tokenized_test_labels, word_to_vec_map, 300)


training_dataset = convert_to_nparray(avg, 'float32')
testing_dataset = convert_to_nparray(test_avg, 'float32')

training_label_dataset = convert_to_nparray(X_train_Y, 'float32')
test_label_dataset = convert_to_nparray(X_test_Y, 'float32')

model = Model_to_Fit(512, 5)
model.fit(x = training_dataset, y = training_label_dataset, epochs = 100)

predictions = model.predict(testing_dataset)
pred_val = np.amax(convert_to_nparray(predictions, 'float32'), axis = 1)
model.get_weights()
model.evaluate(testing_dataset, test_label_dataset)

#tf.math.confusion_matrix(test_label_dataset, pred_val, num_classes = 5)
pred_val = CreateConfusionMatrixandReturnPredictions(test_label_dataset, convert_to_nparray(predictions, 'float32'))
ShowCorrectandIncorrectPredictions(test_label_dataset, pred_val)

Train on 132 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100


In [0]:
# Glove vector of 50 Dimensions on Wikipedia using avg
X_train, X_test, word_to_vec_map = read_files_and_retrieve_wordtovec_map('glove.6B.50d.txt')
X_train_lower, X_test_lower, X_train_Y, X_test_Y = process_training_and_test_values(X_train, X_test)
tokenized_training_data = tokenize_values(X_train_lower)
tokenized_test_labels = tokenize_values(X_test_lower)

avg = sentence_to_avg(tokenized_training_data, word_to_vec_map, 50) 
test_avg = sentence_to_avg(tokenized_test_labels, word_to_vec_map, 50)


training_dataset = convert_to_nparray(avg, 'float32')
testing_dataset = convert_to_nparray(test_avg, 'float32')

training_label_dataset = convert_to_nparray(X_train_Y, 'float32')
test_label_dataset = convert_to_nparray(X_test_Y, 'float32')

model = Model_to_Fit(512, 5)
model.fit(x = training_dataset, y = training_label_dataset, epochs = 100)

predictions = model.predict(testing_dataset)
pred_val = convert_to_nparray(predictions, 'float32')
#model.get_weights()
model.evaluate(testing_dataset, test_label_dataset)

#tf.math.confusion_matrix(test_label_dataset, pred_val, num_classes = 5)
pred_val = CreateConfusionMatrixandReturnPredictions(test_label_dataset, convert_to_nparray(predictions, 'float32'))
ShowCorrectandIncorrectPredictions(test_label_dataset, pred_val)

Train on 132 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100


In [0]:
# Glove vector of 100 Dimensions on Wikipedia using avg
X_train, X_test, word_to_vec_map = read_files_and_retrieve_wordtovec_map('glove.6B.100d.txt')
X_train_lower, X_test_lower, X_train_Y, X_test_Y = process_training_and_test_values(X_train, X_test)
tokenized_training_data = tokenize_values(X_train_lower)
tokenized_test_labels = tokenize_values(X_test_lower)

avg = sentence_to_avg(tokenized_training_data, word_to_vec_map, 100) 
test_avg = sentence_to_avg(tokenized_test_labels, word_to_vec_map, 100)


training_dataset = convert_to_nparray(avg, 'float32')
testing_dataset = convert_to_nparray(test_avg, 'float32')

training_label_dataset = convert_to_nparray(X_train_Y, 'float32')
test_label_dataset = convert_to_nparray(X_test_Y, 'float32')

model = Model_to_Fit(512, 5)
model.fit(x = training_dataset, y = training_label_dataset, epochs = 100)

predictions = model.predict(testing_dataset)
pred_val = np.max(convert_to_nparray(predictions, 'float32'), axis = 1)

p = np.around(pred_val)
model.get_weights()
model.evaluate(testing_dataset, test_label_dataset)

#tf.math.confusion_matrix(test_label_dataset, p, num_classes = 5)
pred_val = CreateConfusionMatrixandReturnPredictions(test_label_dataset, convert_to_nparray(predictions, 'float32'))
ShowCorrectandIncorrectPredictions(test_label_dataset, pred_val)

Train on 132 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100


<tf.Tensor: id=54999, shape=(5, 5), dtype=int32, numpy=
array([[ 2,  5,  0,  0,  0],
       [ 0,  8,  0,  0,  0],
       [ 0, 18,  0,  0,  0],
       [ 0, 16,  0,  0,  0],
       [ 0,  7,  0,  0,  0]], dtype=int32)>

In [19]:
# Glove vector of 100 Dimensions for Twitter using Avg
X_train, X_test, word_to_vec_map = read_files_and_retrieve_wordtovec_map('glove.twitter.27B.100d.txt')
X_train_lower, X_test_lower, X_train_Y, X_test_Y = process_training_and_test_values(X_train, X_test)
tokenized_training_data = tokenize_values(X_train_lower)
tokenized_test_labels = tokenize_values(X_test_lower)

avg = sentence_to_avg(tokenized_training_data, word_to_vec_map, 100) 
test_avg = sentence_to_avg(tokenized_test_labels, word_to_vec_map, 100)


training_dataset = convert_to_nparray(avg, 'float32')
testing_dataset = convert_to_nparray(test_avg, 'float32')

training_label_dataset = convert_to_nparray(X_train_Y, 'float32')
test_label_dataset = convert_to_nparray(X_test_Y, 'float32')

model = Model_to_Fit(512, 5)
model.fit(x = training_dataset, y = training_label_dataset, epochs = 100)

predictions = model.predict(testing_dataset)
model.get_weights()
print(model.evaluate(testing_dataset, test_label_dataset))

pred_val = CreateConfusionMatrixandReturnPredictions(test_label_dataset, convert_to_nparray(predictions, 'float32'))
ShowCorrectandIncorrectPredictions(test_label_dataset, pred_val)

Train on 132 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100


In [0]:
# Glove vector of 200 Dimensions on Wikipedia using avg
X_train, X_test, word_to_vec_map = read_files_and_retrieve_wordtovec_map('glove.6B.200d.txt')
X_train_lower, X_test_lower, X_train_Y, X_test_Y = process_training_and_test_values(X_train, X_test)
tokenized_training_data = tokenize_values(X_train_lower)
tokenized_test_labels = tokenize_values(X_test_lower)

avg = sentence_to_avg(tokenized_training_data, word_to_vec_map, 200) 
test_avg = sentence_to_avg(tokenized_test_labels, word_to_vec_map, 200)

training_dataset = convert_to_nparray(avg, 'float32')
testing_dataset = convert_to_nparray(test_avg, 'float32')

training_label_dataset = convert_to_nparray(X_train_Y, 'float32')
test_label_dataset = convert_to_nparray(X_test_Y, 'float32')

model = Model_to_Fit(512, 5)
model.fit(x = training_dataset, y = training_label_dataset, epochs = 100)

predictions = model.predict(testing_dataset)
model.get_weights()
print(model.evaluate(testing_dataset, test_label_dataset))

pred_val = CreateConfusionMatrixandReturnPredictions(test_label_dataset, convert_to_nparray(predictions, 'float32'))
ShowCorrectandIncorrectPredictions(test_label_dataset, pred_val)

Train on 132 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100


In [22]:
# Glove vector of 200 Dimensions for Twitter using Avg
X_train, X_test, word_to_vec_map = read_files_and_retrieve_wordtovec_map('glove.twitter.27B.200d.txt')
X_train_lower, X_test_lower, X_train_Y, X_test_Y = process_training_and_test_values(X_train, X_test)
tokenized_training_data = tokenize_values(X_train_lower)
tokenized_test_labels = tokenize_values(X_test_lower)

avg = sentence_to_avg(tokenized_training_data, word_to_vec_map, 200) 
test_avg = sentence_to_avg(tokenized_test_labels, word_to_vec_map, 200)


training_dataset = convert_to_nparray(avg, 'float32')
testing_dataset = convert_to_nparray(test_avg, 'float32')

training_label_dataset = convert_to_nparray(X_train_Y, 'float32')
test_label_dataset = convert_to_nparray(X_test_Y, 'float32')

model = Model_to_Fit(512, 5)
model.fit(x = training_dataset, y = training_label_dataset, epochs = 100)

predictions = model.predict(testing_dataset)
model.get_weights()
print(model.evaluate(testing_dataset, test_label_dataset))

pred_val = CreateConfusionMatrixandReturnPredictions(test_label_dataset, convert_to_nparray(predictions, 'float32'))
ShowCorrectandIncorrectPredictions(test_label_dataset, pred_val)

Train on 132 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
