# Load Libraries

In [1]:
import numpy as np
import pandas as pd

from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.sequence import pad_sequences

import pickle



# Mount the drive and change the directory

In [2]:
import os
os.chdir("../../../")
# from google.colab import drive
# drive.mount('/content/drive/')
# %cd drive/MyDrive/poleval_emotion/

# Determine constants

In [3]:
MODEL_NAME = 'lstm_baseline'
MAX_LEN = 259 # exact value used when training the model

In [4]:
DATA_DIR = 'data/'
TESTA_FILENAME = 'testA/in_baseline.tsv'
TESTB_FILENAME = 'testB/in_baseline.tsv'
TRAIN_FILENAME = 'train/in_baseline.tsv'

In [5]:
MODELS_DIR = 'models/'
TOKENIZERS_DIR = 'models/tokenizers/'
MODEL_FILENAME = MODEL_NAME

In [6]:
PREDS_DIR = 'predictions/'
TESTA_OUTPUT_SUBDIR = 'testA/'
TESTB_OUTPUT_SUBDIR = 'testB/'
TRAIN_OUTPUT_SUBDIR = 'train/'

# Load the model

In [7]:
model_path = MODELS_DIR + MODEL_FILENAME + '.h5'
model = load_model(model_path)



# Load the tokenizer

In [8]:
tokenizer_path = TOKENIZERS_DIR + MODEL_FILENAME + '.pickle'
with open(tokenizer_path, 'rb') as handle:
    tokenizer = pickle.load(handle)

# Load & prepare data

In [9]:
def load_data(file_path):
    data = pd.read_csv(file_path, sep='\t')
    return data

In [10]:
def load_and_prepare_data(file_path, tokenizer, max_len):
    data = load_data(file_path)
    sequences = tokenizer.texts_to_sequences(data['text'])
    padded_sequences = pad_sequences(sequences, maxlen=max_len)
    return padded_sequences

# Preparing the test datasets

In [11]:
X_a = load_and_prepare_data(DATA_DIR + TESTA_FILENAME, tokenizer, MAX_LEN)
X_b = load_and_prepare_data(DATA_DIR + TESTB_FILENAME, tokenizer, MAX_LEN)
X_t = load_and_prepare_data(DATA_DIR + TRAIN_FILENAME, tokenizer, MAX_LEN)

# Generate predictions

In [12]:
def generate_predictions(model, X):
    predictions = model.predict(X)
    # Convert predictions to boolean values
    predictions_bool = (predictions > 0.5).astype('bool')
    return predictions_bool

In [13]:
predictions_a = generate_predictions(model, X_a)
predictions_b = generate_predictions(model, X_b)
predictions_t = generate_predictions(model, X_t)

[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 36ms/step
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step
[1m225/225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 30ms/step


# Save predictions

In [14]:
def save_predictions(predictions, file_path):
    df = pd.DataFrame(predictions, columns=['Joy', 'Trust', 'Anticipation', 'Surprise', 'Fear', 'Sadness', 'Disgust', 'Anger', 'Positive', 'Negative', 'Neutral'])
    df.to_csv(file_path, sep='\t', index=False, header=False)

In [15]:
testA_output_name = PREDS_DIR + TESTA_OUTPUT_SUBDIR + MODEL_NAME + '.tsv'
testB_output_name = PREDS_DIR + TESTB_OUTPUT_SUBDIR + MODEL_NAME + '.tsv'
train_output_name = PREDS_DIR + TRAIN_OUTPUT_SUBDIR + MODEL_NAME + '.tsv'

In [16]:
save_predictions(predictions_a, testA_output_name)
save_predictions(predictions_b, testB_output_name)
save_predictions(predictions_t, train_output_name)