In [22]:
# Run this notebook to evaluate the pre-trained model performance
# Majid Moghadam (Student ID: 1708800)
import os
import sys
import numpy as np
import pandas as pd

from tensorflow.keras.models import model_from_json
import tensorflow.keras.backend as K
from tensorflow.keras.preprocessing.text import Tokenizer

In [28]:
# ID    UTTERANCE    CORE RELATIONS
trainFile = pd.read_csv('dataset/hw1_train.csv')
# ID    UTTERANCE
testFile = pd.read_csv('dataset/hw1_test.csv')

In [29]:
# Data preparation

# input features
X = list(trainFile['UTTERANCE'])

unique_relations = []
for relation_str in trainFile['CORE RELATIONS']:
    relations = relation_str.split(' ')
    for relation in relations:
        if relation not in unique_relations:
            unique_relations.append(relation)

unique_nodes = []
for path in unique_relations:
    nodes = path.split('.')
    for node in nodes:
        if node not in unique_nodes:
            unique_nodes.append(node)

# add label columns to dataframe
for relation in unique_relations:
    trainFile[relation] = 0

# fill out label columns
for idx, relation_str in enumerate(trainFile['CORE RELATIONS']):
    relations = relation_str.split(' ')
    for relation in relations:
        trainFile.loc[idx,relation] = 1

#separate label columns
labels = trainFile[unique_relations]

# target values
y = labels.values

In [30]:
def recall_m(y_true, y_pred):
        true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
        recall = true_positives / (possible_positives + K.epsilon())
        return recall

def precision_m(y_true, y_pred):
        true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
        precision = true_positives / (predicted_positives + K.epsilon())
        return precision

def f1_score(y_true, y_pred):
    precision = precision_m(y_true, y_pred)
    recall = recall_m(y_true, y_pred)
    return 2*((precision*recall)/(precision+recall+K.epsilon()))

In [31]:
def create_model(hyperParams):
    
    hidden_layers = hyperParams['hidden_layers']
    activation = hyperParams['activation']
    dropout = hyperParams['dropout']
    output_activation = hyperParams['output_activation']
    loss = hyperParams['loss']
    
    model = Sequential()
    model.add(Dense(hidden_layers[0], input_shape=(5000,), activation=activation))
    model.add(Dropout(dropout))
    for i in range(len(hidden_layers)-1):
        model.add(Dense(hidden_layers[i], activation=activation))
        model.add(Dropout(dropout))
    model.add(Dense(46, activation=output_activation))
    model.compile(loss=loss, optimizer='adam', metrics=['accuracy', f1_score])
    # categorical_crossentropy, binary_crossentropy f1_loss->(for tensorflow 1.14)
    
    return model

In [32]:
# load test data and convert to vector

# training data
tokenizer = Tokenizer(num_words=5000)
tokenizer.fit_on_texts(X)
X = tokenizer.texts_to_matrix(X)

# test data
X_t = list(testFile['UTTERANCE'])
X_pred = tokenizer.texts_to_matrix(X_t)

In [33]:
# Load model:

# load json and create model
json_file = open('model.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
model = model_from_json(loaded_model_json)
# load weights into new model
model.load_weights("model.h5")
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy', f1_score])

In [38]:
scores = model.evaluate(X, y, verbose=1)

print('Validation F1 score: ', scores[2])

Validation F1 score:  0.9993001476171275
