In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
import tensorflow.keras.preprocessing.text as text
from tensorflow.keras import layers, losses, metrics, initializers, regularizers
from math import floor
import matplotlib.pyplot as plt
import keras_tuner as kt

# In this cell I import all of the libraries that I will be using




In [2]:
# Remove unncecesary data from dataset, and remove very unclear values
data = pd.concat([pd.read_csv("data/full_dataset/goemotions_1.csv"), pd.read_csv("data/full_dataset/goemotions_2.csv"), pd.read_csv("data/full_dataset/goemotions_3.csv")])
data = data.drop(data[data.example_very_unclear == True].index)
data = data.drop(data[data.neutral == True].index)
data = data.drop(labels=['id', "author", "subreddit", "link_id", "parent_id", "created_utc", "rater_id", "example_very_unclear", "neutral"], axis=1)

data.sum(axis=1, numeric_only=True) >1
data = data[data.sum(axis=1, numeric_only=True) == 1]
data = data.reset_index().dropna()

# In this cell I remove unncecesary data from dataset, and remove very unclear values

In [4]:
num_words_median = np.median([len(words.split()) for words in data["text"]])
len(data) / num_words_median, num_words_median

# In this cell I calculate some staticstics about the data

(5097.0, 12.0)

In [5]:
def array_to_label(array):
    for i in range(0, 27):
        if array[i] == 1:
            return data.columns[i]

def array_to_num(array):
    for i in range(1, 29):
        if array[i] == 1:
            return i - 1
array_to_label(data.iloc[0]), data.iloc[0], 

# In this cell I define some functions which allow me to convert the formats in which the results are being stored

(None,
 index                           0
 text              That game hurt.
 admiration                      0
 amusement                       0
 anger                           0
 annoyance                       0
 approval                        0
 caring                          0
 confusion                       0
 curiosity                       0
 desire                          0
 disappointment                  0
 disapproval                     0
 disgust                         0
 embarrassment                   0
 excitement                      0
 fear                            0
 gratitude                       0
 grief                           0
 joy                             0
 love                            0
 nervousness                     0
 optimism                        0
 pride                           0
 realization                     0
 relief                          0
 remorse                         0
 sadness                         1
 surprise    

In [3]:
training_size = floor(len(data)*0.8)
testing_data = data.iloc[training_size:]
training_data = data.iloc[:training_size]
print(len(training_data), len(testing_data))

# In this cell I split the data into testing and training data

48931 12233


In [4]:
vectorization_layer = layers.TextVectorization(
    max_tokens=30000,
    output_mode="int",
    output_sequence_length=30,
)

# In this cell I create a layer which can be used to vectorize text

2022-11-25 13:37:32.747214: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-11-25 13:37:32.760392: I tensorflow/core/common_runtime/process_util.cc:146] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.


In [5]:
training_text = training_data.text
type(training_text)
vectorization_layer.adapt(training_text)

# In this cell I adapt the vectorization layer to the vocabulary used in the data

In [6]:
def vectorize_text(text):
  text = tf.expand_dims(text, -1)
  return vectorization_layer(text)

# In this cell I create a function which makes it easier to use the vectorization layer

In [10]:
first_text = training_text.iloc[3]
vectorize_text(first_text), first_text

# In this cell I test the text vectorization

(<tf.Tensor: shape=(1, 30), dtype=int64, numpy=
 array([[12117, 15676,  7658, 12795,   212, 14313,   764,  8638,  4806,
             8,    59,   216,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0]])>,
 '"Sponge Blurb Pubs Quaw Haha GURR ha AAa!" finale is too real')

In [16]:
vocabulary = dict(zip(list(range(20000)), vectorization_layer.get_vocabulary()))

# In this cell I export the vocabulary of the vectorization layer to a separate file

In [12]:
from tqdm import tqdm
tqdm.pandas()
preprocessed_training_data = training_data.text.progress_map(lambda x: vectorize_text(x))
preprocessed_testing_data = testing_data.text.progress_map(lambda x: vectorize_text(x))

# In this cell I apply the vectorization layer to both the testing and the training data

100%|██████████| 48931/48931 [01:32<00:00, 530.35it/s]
100%|██████████| 12233/12233 [00:21<00:00, 569.24it/s]


In [13]:
preprocessed_training_data = tf.stack(list(preprocessed_training_data))
preprocessed_testing_data = tf.stack(list(preprocessed_testing_data))

# In this cell I convert the training and testing data to a tensor

In [None]:
def build_sepcnn_model(hp):
    if hp:
        filters = hp.Int("filters", min_value=20, max_value=100, step=20)
        blocks = hp.Int("blocks", min_value=1, max_value=4, step=1)
        learning_rate = hp.Choice("learning_rate", [1e-2, 1e-3, 1e-4])
        dropout_rate = hp.Float("dropout_rate", min_value=0.2, max_value=0.5, step=0.1)
        kernel_size = hp.Int("kernel_size", min_value=2, max_value=6)
        pool_size = hp.Int("pool_size", min_value=2, max_value=6)


        conv_activations = hp.Choice("conv_activations", ["tanh", "relu"])

        embedding_dim = hp.Int("embedding_dim", min_value=50, max_value=300, step=50)
        
    sepcnn_model = tf.keras.Sequential()
    sepcnn_model.add(layers.Embedding(30001, output_dim=embedding_dim, input_length=50))
    for i in range(blocks-1):
        sepcnn_model.add(layers.Dropout(rate=dropout_rate))
        sepcnn_model.add(layers.SeparableConv1D(filters=filters,
                                  kernel_size=kernel_size,
                                  activation=conv_activations,
                                  bias_initializer='random_uniform',
                                  depthwise_initializer='random_uniform',
                                  padding='same'))
        sepcnn_model.add(layers.SeparableConv1D(filters=filters,
                                  kernel_size=kernel_size,
                                  activation=conv_activations,
                                  bias_initializer='random_uniform',
                                  depthwise_initializer='random_uniform',
                                  padding='same'))
        sepcnn_model.add(layers.MaxPooling1D(pool_size=pool_size, padding="same"))
    sepcnn_model.add(layers.SeparableConv1D(filters=filters * 2,
                          kernel_size=kernel_size,
                          activation=conv_activations,
                          bias_initializer='random_uniform',
                          depthwise_initializer='random_uniform',
                          padding='same'))
    sepcnn_model.add(layers.SeparableConv1D(filters=filters * 2,
                              kernel_size=kernel_size,
                              activation=conv_activations,
                              bias_initializer='random_uniform',
                              depthwise_initializer='random_uniform',
                              padding='same'))
    sepcnn_model.add(layers.GlobalAveragePooling1D())
    sepcnn_model.add(layers.Dropout(rate=dropout_rate))
    
    dense_activation = hp.Choice("dense_activations", ["tanh", "relu"])
    first_dense_units = hp.Int("first_dense_units", min_value = 20, max_value = 370, step=50)
    sepcnn_model.add(layers.Dense(first_dense_units, activation=dense_activation))
    sepcnn_model.add(layers.Dropout(rate=dropout_rate))
    
    sepcnn_model.add(layers.Dense(27, activation="softmax"))

    
    sepcnn_model.compile(loss=losses.CategoricalCrossentropy(from_logits=False),
                         optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
                         metrics=["accuracy", tf.keras.metrics.CategoricalCrossentropy(from_logits=False)])
    return sepcnn_model

# In this cell I define the sepcnn_model to be used with the hyperparameter tuner

In [None]:
def build_sepcnn_for_training(embedding_dim, filters, blocks, learning_rate, dropout_rate, kernel_size, pool_size, conv_activations, dense_activation, first_dense_units, second_dense_units):
    sepcnn_model = tf.keras.Sequential()
    sepcnn_model.add(layers.Embedding(30001, output_dim=embedding_dim, input_length=30))
    for i in range(blocks-1):
        sepcnn_model.add(layers.Dropout(rate=dropout_rate))
        sepcnn_model.add(layers.SeparableConv1D(filters=filters,
                                  kernel_size=kernel_size,
                                  activation=conv_activations,
                                  bias_initializer='random_uniform',
                                  depthwise_initializer='random_uniform',
                                  padding='same'))
        sepcnn_model.add(layers.SeparableConv1D(filters=filters,
                                  kernel_size=kernel_size,
                                  activation=conv_activations,
                                  bias_initializer='random_uniform',
                                  depthwise_initializer='random_uniform',
                                  padding='same'))
        sepcnn_model.add(layers.MaxPooling1D(pool_size=pool_size, padding="same"))
    sepcnn_model.add(layers.SeparableConv1D(filters=filters * 2,
                          kernel_size=kernel_size,
                          activation=conv_activations,
                          bias_initializer='random_uniform',
                          depthwise_initializer='random_uniform',
                          padding='same'))
    sepcnn_model.add(layers.SeparableConv1D(filters=filters * 2,
                              kernel_size=kernel_size,
                              activation=conv_activations,
                              bias_initializer='random_uniform',
                              depthwise_initializer='random_uniform',
                              padding='same'))
    sepcnn_model.add(layers.GlobalAveragePooling1D())
    sepcnn_model.add(layers.Dropout(rate=dropout_rate))
    
    #dense_activation = hp.Choice("dense_activations", ["tanh", "relu"])
    #first_dense_units = hp.Int("first_dense_units", min_value = 20, max_value = 370, step=50)
    sepcnn_model.add(layers.Dense(first_dense_units, activation=dense_activation))
    sepcnn_model.add(layers.Dropout(rate=dropout_rate))
    
    if second_dense_units > 0:
        sepcnn_model.add(layers.Dense(second_dense_units, activation=dense_activation)),
        sepcnn_model.add(layers.Dropout(rate=dropout_rate))
    
    sepcnn_model.add(layers.Dense(27, activation="softmax"))

    
    sepcnn_model.compile(loss=losses.CategoricalCrossentropy(from_logits=False),
                         optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
                         metrics=["accuracy"])
    return sepcnn_model

# In this cell I define the sepCNN model for the final model

In [None]:
def build_ann_model(hp):

    ann_model = tf.keras.Sequential([
        layers.Embedding(30001, hp.Int("embedding_dim", min_value=50, max_value=250, step=50), input_length=30),
        layers.GlobalAveragePooling1D(),
        layers.Dropout(hp.Float("dropout_rate", min_value = 0.05, max_value=0.4, step=0.05)),
        layers.Dense(units=hp.Int("units", min_value=32, max_value=512, step=32), activation=hp.Choice("activation", ["relu", "tanh"])),
        layers.Dropout(hp.Float("dropout_rate", min_value = 0.05, max_value=0.4, step=0.05)),
        layers.Dense(27, activation="softmax"),
    ])
    ann_model.compile(loss=losses.CategoricalCrossentropy(from_logits=False),
              optimizer=tf.keras.optimizers.Adam(1e-3),
              metrics = ["accuracy", tf.metrics.categorical_crossentropy])
    return ann_model

# In this cell I define the feed-forward model for hyperparameter tuning             

In [None]:
def build_ann_model_for_training(embedding_dim, first_layer_units, second_layer_units, dropout_rate, activation):

    ann_model = tf.keras.Sequential([
        layers.Embedding(30001, embedding_dim, input_length=30),
        layers.GlobalAveragePooling1D(),
        layers.Dropout(dropout_rate),
        layers.Dense(first_layer_units, activation),
        layers.Dropout(dropout_rate),
        layers.Dense(second_layer_units, activation),
        layers.Dropout(dropout_rate),
        layers.Dense(27, activation="softmax"),
    ])
    ann_model.compile(loss=losses.CategoricalCrossentropy(from_logits=False),
              optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
              metrics = ["accuracy", tf.metrics.categorical_crossentropy])
    return ann_model
# In this cell I define the feed-forward model for the final model

In [17]:
training_labels = training_data.drop(labels=["text"], axis=1, inplace=False)
testing_labels = testing_data.drop(labels=["text"], axis=1, inplace=False)

# In this cell I separate the training and testing labels from the rest of the data

In [None]:
preprocessed_training_labels = list(training_labels.progress_apply(lambda x: array_to_num(x), axis=1))
preprocessed_training_labels

processed_training_labels = list(training_labels.progress_apply(lambda x: array_to_label(x), axis=1))



100%|██████████| 48931/48931 [00:00<00:00, 51662.39it/s]
100%|██████████| 48931/48931 [00:01<00:00, 45976.46it/s]


In [14]:
#preprocessed_training_data = tf.expand_dims(preprocessed_training_data, -1)
#reprocessed_training_data_copy = tf.reshape(preprocessed_training_data, [55096, 50])
#reprocessed_training_labels = tf.reshape(training_labels, [55096,1,28])
#reprocessed_training_data_copy.shape, preprocessed_training_data.shape, training_labels.shape, preprocessed_training_labels.shape
#testing_labels.shape, preprocessed_testing_data.shape
preprocessed_testing_data_copy = tf.reshape(preprocessed_testing_data, [12233, 30])






In [39]:

preprocessed_training_data_copy = tf.reshape(preprocessed_training_data, [48931, 30])


In [None]:

training_labels = training_labels.drop(columns="index")   
training_labels_copy = tf.convert_to_tensor(training_labels)
training_labels_copy

# In this cell I convert the training labels to a tensor

<tf.Tensor: shape=(48931, 27), dtype=int64, numpy=
array([[0, 0, 0, ..., 0, 1, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 1, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 1],
       [0, 0, 0, ..., 0, 0, 0]])>

In [None]:
preprocessed_training_labels_copy = tf.reshape(preprocessed_training_labels_copy, [133081, 1])

NameError: name 'preprocessed_training_labels_copy' is not defined

In [None]:
callbacks = [tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=2)]

# In this cell I define the callbacks which stop the training early if the accuracy decreases

In [58]:
sepcnn_tuner = kt.Hyperband(
    build_sepcnn_model,
    max_epochs=20,
    factor=3,
    directory="sepcnn_dir_2",
    project_name="sepcnn_classifier_2", 
    objective="val_accuracy",
    overwrite=False
)
stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)    

INFO:tensorflow:Reloading Oracle from existing project sepcnn_dir_2/sepcnn_classifier_2/oracle.json
INFO:tensorflow:Reloading Tuner from sepcnn_dir_2/sepcnn_classifier_2/tuner0.json


In [None]:
sepcnn_tuner.search(
    x=preprocessed_training_data_copy,
    y=training_labels_copy,
    epochs=15,
    validation_split=0.15,
    callbacks=[stop_early], 
    batch_size=32
)
best_hps=tuner.get_best_hyperparameters(num_trials=1)[0]


In [61]:
best_hps=sepcnn_tuner.get_best_hyperparameters(num_trials=1)[0]
best_hps.get("filters")


60

In [None]:
final_sepcnn_model = build_sepcnn_for_training(
    filters=60,
    blocks=1,
    learning_rate=0.001,
    dropout_rate=0.25,
    kernel_size=3,
    pool_size=5,
    conv_activations="relu",
    embedding_dim=200,
    dense_activation="tanh",
    first_dense_units=100,
    second_dense_units=50
)

# In this cell I build the final sepCNN model

In [None]:
sepcnn_history = final_sepcnn_model.fit(
    x=preprocessed_training_data_copy,
    y=training_labels_copy,
    epochs=80,
    validation_split=0.2,
    callbacks=callbacks
)

# In this cell I train the final sepCNN model

Epoch 1/80
Epoch 2/80
Epoch 3/80
Epoch 4/80
Epoch 5/80


In [None]:
final_sepcnn_model2.evaluate(x=preprocessed_testing_data_copy, y=preprocessed_testing_labels)

# In this cell I test the final sepCNN model

In [None]:
ann_history = ann_model.fit(
    x=preprocessed_training_data_copy,
    y=training_labels_copy,
    epochs=10,
    validation_split=0.1,
    callbacks=callbacks
)

In [None]:
tuner = kt.Hyperband(
    build_ann_model,
    objective="val_accuracy",
    max_epochs=10,
    factor=3,
    directory="my_dir",
    project_name="hpq_ann",
    overwrite="true"
)
stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)

# In this cell I define the parameters for the hyperparameter tuning for the feed-forward model

In [7]:
trained_ann = tf.keras.models.load_model("ann")
trained_cnn = tf.keras.models.load_model("cnn")

# In this cell I load the trained models

In [None]:
tuner.search(preprocessed_training_data_copy, training_labels_copy, epochs=50, validation_split=0.15, callbacks=[stop_early])
optimal_hyperparameters = tuner.get_best_hyperparameters(num_trials=1)[0]
                                                                                                                
# In this cell I search for the optimal hyperparameters                                                                                                                 

In [None]:
optimal_hyperparameters = tuner.get_best_hyperparameters(num_trials=1)[0]

# In this cell I return the optimal hyperparameters

In [None]:
ann_model = build_ann_model_for_training(
    embedding_dim=150, 
    dropout_rate=0.2, 
    first_layer_units=128, 
    second_layer_units=64, 
    activation="tanh")

# In this cell I build the final feed-forward model

In [None]:
ann_model.fit(
    x=preprocessed_training_data_copy, 
    y=training_labels_copy, 
    epochs=10, 
    validation_split=0.15, 
    callbacks=callbacks)

# In this cell I train the final feed-forward model

In [None]:
ann_model.evaluate(preprocessed_testing_data_copy, preprocessed_testing_labels)

# In this cell I test the final feed-forward model

In [None]:
ann_model.save("ann")

# In this cell I save the feed-forward model

In [18]:
preprocessed_testing_labels = tf.convert_to_tensor(testing_labels.drop(columns="index"))

# In this cell I convert the testing labels to a tensor

In [21]:
trained_cnn.evaluate(x=preprocessed_testing_data_copy, y=preprocessed_testing_labels), trained_ann.evaluate(x=preprocessed_testing_data_copy, y=preprocessed_testing_labels)



([2.6585614681243896, 0.3461129665374756],
 [2.3985812664031982, 0.37178125977516174, 2.3985812664031982])

In [11]:
preprocessed_testing_data_copy = tf.reshape(preprocessed_testing_data, [12233, 30])

ValueError: Failed to convert a NumPy array to a Tensor (Unsupported object type tensorflow.python.framework.ops.EagerTensor).

In [None]:
sepcnn_model = build_sepcnn_model(embedding_dim=200, filters=32, blocks=2, dropout_rate=0.2, kernel_size=3, pool_size=3, learning_rate=1e-3)
sepcnn_model.fit(
    x=preprocessed_training_data_copy,
    y=training_labels_copy,
    epochs=10,
    validation_split=0.1,
    callbacks=callbacks)

In [None]:
sepcnn_model.evaluate(x=preprocessed_testing_data_copy,
              y=preprocessed_testing_labels)



In [42]:
trained_cnn.evaluate(x=preprocessed_testing_data_copy,
              y=preprocessed_testing_labels), trained_ann.evaluate(x=preprocessed_testing_data_copy,
              y=preprocessed_testing_labels)




([2.6585614681243896, 0.3461129665374756],
 [2.3985812664031982, 0.37178125977516174, 2.3985812664031982])

In [None]:
preprocessed_training_data_copy, preprocessed_training_labels

In [8]:
labels = data.columns.drop(["text", "index"])
labels

Index(['admiration', 'amusement', 'anger', 'annoyance', 'approval', 'caring',
       'confusion', 'curiosity', 'desire', 'disappointment', 'disapproval',
       'disgust', 'embarrassment', 'excitement', 'fear', 'gratitude', 'grief',
       'joy', 'love', 'nervousness', 'optimism', 'pride', 'realization',
       'relief', 'remorse', 'sadness', 'surprise'],
      dtype='object')

In [None]:
final_sepcnn_model2.save("actualfinal_sepcnn_model")

In [None]:
final_sepcnn_model2.save("cnn")

In [None]:
final_model_loaded = tf.keras.models.load_model("final_sepcnn_model")

In [None]:
final_model_loaded.summary()

In [None]:
final_model_loaded.evaluate(
    x=preprocessed_testing_data_copy,
    y=preprocessed_testing_labels
)

In [57]:
trained_cnn.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_2 (Embedding)     (None, 30, 200)           6000200   
                                                                 
 separable_conv1d_2 (Separab  (None, 30, 40)           8640      
 leConv1D)                                                       
                                                                 
 separable_conv1d_3 (Separab  (None, 30, 40)           1760      
 leConv1D)                                                       
                                                                 
 global_average_pooling1d_1   (None, 40)               0         
 (GlobalAveragePooling1D)                                        
                                                                 
 dropout_3 (Dropout)         (None, 40)                0         
                                                      

In [9]:
def ann_predict(text):

    vectorized = vectorize_text(text)
    output = trained_ann.predict(vectorized)
    return output

#output = ann_predict(testing_data.text.iloc[76])
output = ann_predict("")
output = tf.reshape(output, [27]).numpy().tolist()
output_dict = dict(zip(labels, output))

#highest_output = max(ouput_dict, key=output_dict.get)
#output_dict, highest_output
max(output_dict, key=output_dict.get), output_dict
sorted(output_dict.values(), reverse=True)[:3]
sorted_output_dict = dict(sorted(output_dict.items(), key=lambda item:item[1], reverse=True))
sorted_output_dict#, testing_data.text.iloc[76]


{'approval': 0.10927341878414154,
 'confusion': 0.09982244670391083,
 'curiosity': 0.08398647606372833,
 'annoyance': 0.059897832572460175,
 'realization': 0.057530708611011505,
 'sadness': 0.05199885368347168,
 'excitement': 0.04941577464342117,
 'admiration': 0.04929938167333603,
 'disapproval': 0.0482577383518219,
 'disappointment': 0.04453521966934204,
 'surprise': 0.03903232514858246,
 'anger': 0.03443717211484909,
 'joy': 0.029241276904940605,
 'disgust': 0.028340954333543777,
 'love': 0.028020750731229782,
 'amusement': 0.025000836700201035,
 'embarrassment': 0.021047083660960197,
 'caring': 0.018760673701763153,
 'optimism': 0.01589212194085121,
 'fear': 0.015567841939628124,
 'nervousness': 0.015440806746482849,
 'remorse': 0.01499853003770113,
 'gratitude': 0.014954703859984875,
 'pride': 0.014403589069843292,
 'desire': 0.012958898209035397,
 'relief': 0.011080865748226643,
 'grief': 0.006803733296692371}

In [16]:
def cnn_predict(text):

    vectorized = vectorize_text(text)
    output = trained_cnn.predict(vectorized)
    return output

output = cnn_predict("I feel scared")
output = tf.reshape(output, [27]).numpy().tolist()
output_dict = dict(zip(labels, output))

#highest_output = max(ouput_dict, key=output_dict.get)
#output_dict, highest_output
max(output_dict, key=output_dict.get), output_dict
sorted(output_dict.values(), reverse=True)[:3]
sorted_output_dict = dict(sorted(output_dict.items(), key=lambda item:item[1], reverse=True))
sorted_output_dict, testing_data.text.iloc[70]

({'fear': 0.42594608664512634,
  'sadness': 0.21000884473323822,
  'nervousness': 0.0901598408818245,
  'embarrassment': 0.05420936271548271,
  'disappointment': 0.04663660004734993,
  'realization': 0.04185948148369789,
  'remorse': 0.029163120314478874,
  'grief': 0.02765926904976368,
  'disgust': 0.02709975279867649,
  'surprise': 0.015983998775482178,
  'relief': 0.006445922423154116,
  'annoyance': 0.003559886710718274,
  'caring': 0.003495303215458989,
  'approval': 0.002597693121060729,
  'desire': 0.0022432920522987843,
  'pride': 0.0021839728578925133,
  'anger': 0.0018672236474230886,
  'confusion': 0.0017620096914470196,
  'disapproval': 0.0015190384583547711,
  'excitement': 0.0011410551378503442,
  'optimism': 0.0010064503876492381,
  'curiosity': 0.0009361167321912944,
  'amusement': 0.000843517598696053,
  'joy': 0.0007211748161353171,
  'gratitude': 0.0006001463043503463,
  'admiration': 0.00018055542022921145,
  'love': 0.00017029346781782806},
 "Every time I see a som

In [239]:
def ann_results():
    results = []
    predicted_values = ann_predict(preprocessed_testing_data_copy)
    return predicted_values 

In [240]:
annresults = ann_results()

In [305]:
np.argsort(annresults).shape, preprocessed_testing_labels.numpy().shape
count = 0
for i in range(len(preprocessed_testing_labels.numpy())):
    if np.argsort(annresults[i])[np.argmax(preprocessed_testing_labels.numpy()[i])] > 23:
        count+=1

In [307]:
count

1883

In [233]:
np.ndarray(annresults)

array(15.)

In [216]:
ann_predict(preprocessed_testing_data_copy).shape

(12233, 27)

In [192]:
preprocessed_testing_labels[1]

<tf.Tensor: shape=(27,), dtype=int64, numpy=
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0])>

In [None]:
for i in preprocessed_testing_data_copy:
    print(ann_predict(tf.expand_dims(i, -1)))

In [None]:
ann_results()

In [None]:
for i in preprocessed_testing_data_copy:
    print(i)

In [None]:
preprocessed_testing_data_copy[1], array_to_label(preprocessed_testing_labels[1])

In [None]:
#tf.unique_with_counts(tf.map_fn(elems=training_labels_copy, fn=lambda x: array_to_label(list(x))))
values = np.array([])
for i in training_labels_copy.numpy():
    values = np.append(values, array_to_label(i))


In [None]:
np.unique(values, return_counts=True, )

In [None]:
np.unique(training_labels_copy, axis=0, return_counts=True)