# ACD Model Training

In [1]:
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)
    
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

In [2]:
from glove import *
from myLayers import CustomAttention

import pandas as pd
import numpy as np

from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split, KFold
from sklearn import metrics
from sklearn.utils import class_weight

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

## Load dataset and set data types

In [4]:
reviews = pd.read_csv('../data/reviews_revision_train.csv', index_col=0)

reviews['ids'] = reviews['ids'].apply(lambda x: list(map(int, x[1:-1].split(', '))))
reviews['meta_review_pros'] = reviews['meta_review_pros'].apply(lambda x: x[2:-2].split('\', \''))
reviews['meta_review_so-so'] = reviews['meta_review_so-so'].apply(lambda x: x[2:-2].split('\', \''))
reviews['meta_review_cons'] = reviews['meta_review_cons'].apply(lambda x: x[2:-2].split('\', \''))
reviews['meta_review_labels'] = reviews['meta_review_labels'].apply(lambda x: x[2:-2].split('\', \''))

## Load GloVe model

In [3]:
glove_model = GloveModel.from_pretrained('../trained_models/el.glove.300.txt')

## Pad ids of each entry to match max length

In [9]:
padded_ids = keras.preprocessing.sequence.pad_sequences(
    reviews['ids'].to_numpy(), padding="post", value=0
)

## Transform target labels to one-hot encoding

In [10]:
def encode_category(prod_type, label):
    def category_function(df):
        if prod_type == df['meta_product_type'] and label in df['meta_review_labels']:
            return 1
        elif prod_type == 'Γενικά' and label in df['meta_review_labels']:
            return 1
        else:
            return 0
    return category_function

## Train a model for each aspect

In [None]:
f1_per_label = {}
loss_per_label = {}
models = []

with open('../data/top_labels.txt', 'r', encoding='utf-8') as f:
    f_lines = f.readlines()
    for i in f_lines:
        i = i.strip()
        prod_type, label = i.split('#')
        prod_type = prod_type.replace('_', ' ')
        label = label.replace('_', ' ')

        print('------------------------------------------------------------------------')
        print(f'Now training for aspect "{prod_type} - {label}"\n')
        
        # Encode target
        target = reviews.apply(encode_category(prod_type, label), axis='columns')
        
        # Define the K-fold Cross Validator
        kfold = KFold(n_splits=5, shuffle=True, random_state=0)
        
        fold_number = 1
        f1_per_fold = []
        loss_per_fold = []
        models_per_fold = []
        # for every fold
        for train_idx, test_idx in kfold.split(padded_ids, target):
        
            # Build model
            embeddings_layer = keras.layers.Embedding(
                input_dim=glove_model.emb_norm.shape[0], output_dim=glove_model.emb_norm.shape[1],
                weights=[glove_model.emb_norm], name='embeddings', trainable=False,
                mask_zero=True
            )
            inputs = keras.layers.Input(shape=(padded_ids.shape[1], ), dtype='int32', name='inputs')
            embeddings = embeddings_layer(inputs)
            lstm_embeddings = keras.layers.LSTM(128, return_sequences=True, name='lstm',
                                                kernel_regularizer=keras.regularizers.l2(l=4e-6)
                                               )(embeddings)
            lstm_embeddings = keras.layers.Dropout(0.5, name='dropout')(lstm_embeddings)
            after_attention = CustomAttention(name='attention', trainable=True
                                             )([lstm_embeddings, lstm_embeddings])
            output = keras.layers.Dense(1, activation='sigmoid', name='linear_sigmoid',
                                        kernel_regularizer=keras.regularizers.l2(l=4e-6)
                                       )(after_attention)

            model = keras.Model(inputs=inputs, outputs=output)

            model.compile(loss=keras.losses.BinaryCrossentropy(),
                          optimizer=keras.optimizers.Adam(1e-3),
                          metrics=['accuracy'])

            # Fit model
            early_stopping = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=10, verbose=1,
                                                              restore_best_weights=True)
            # Adjust class weights in case of unbalanced data
            class_weights = {i: w for i, w in enumerate(
                class_weight.compute_class_weight('balanced',
                                                  classes=np.unique(target.iloc[train_idx]),
                                                  y=target.iloc[train_idx])
            )}

            history = model.fit(
                x=padded_ids[train_idx], y=target.iloc[train_idx], epochs=50, batch_size=16,
                callbacks=[early_stopping],
                class_weight=class_weights,
                verbose=0
            )

            # Evaluate model
            y_pred = np.round(model.predict(padded_ids[test_idx]))

            f1_per_fold.append([metrics.f1_score(target.iloc[test_idx], y_pred, pos_label=0, average='binary'),
                                metrics.f1_score(target.iloc[test_idx], y_pred, pos_label=1, average='binary'),
                                metrics.f1_score(target.iloc[test_idx], y_pred, average='weighted')])

            loss_per_fold.append(model.evaluate(padded_ids[test_idx], target.iloc[test_idx], verbose=0)[0])

            models_per_fold.append(model)

            print(f'* Fold {fold_number}: loss: {loss_per_fold[-1]:.3f} - f1: {f1_per_fold[-1][-1]:.3f}')
            fold_number += 1
        
        # Find best fold and keep results
        best_fold = np.argmin(np.array(loss_per_fold))

        f1_per_label[i] = f1_per_fold[best_fold]
        loss_per_label[i] = loss_per_fold[best_fold]
        models.append(models_per_fold[best_fold])

        print(f'\nBest fold number: {best_fold + 1} - loss: {loss_per_label[i]:.3f}\n')
        print(pd.DataFrame(
            f1_per_label[i],
            index=['negative class', 'positive class', 'weighted'],
            columns=['F1-score:']).transpose())
        print('------------------------------------------------------------------------')

## Merge all models to a single one

In [12]:
from unidecode import unidecode

In [13]:
# Create common first layers
embeddings_layer = keras.layers.Embedding(
    input_dim=glove_model.emb_norm.shape[0], output_dim=glove_model.emb_norm.shape[1],
    weights=[glove_model.emb_norm], name='embeddings', trainable=False,
    mask_zero=True
)
inputs = keras.layers.Input(shape=(padded_ids.shape[1], ), dtype='int32', name='inputs')
embeddings = embeddings_layer(inputs)

# Create layers for each aspect
outputs = []
for prod_type in loss_per_label.keys():
    prod_type = unidecode(prod_type.strip().replace('#', '_'))
    
    lstm_embeddings = keras.layers.LSTM(128, return_sequences=True, name='lstm_'+prod_type,
                                        kernel_regularizer=keras.regularizers.l2(l=4e-6)
                                       )(embeddings)
    lstm_embeddings = keras.layers.Dropout(0.5, name='dropout_'+prod_type)(lstm_embeddings)
    
    after_attention = CustomAttention(name='attention_'+prod_type)([lstm_embeddings, lstm_embeddings])
    
    output = keras.layers.Dense(1, activation='sigmoid', name='linear_sigmoid_'+prod_type,
                                kernel_regularizer=keras.regularizers.l2(l=4e-6)
                               )(after_attention)
    outputs.append(output)

# Create merged model
merged_model = keras.Model(inputs=inputs, outputs=keras.layers.Concatenate()(outputs))

merged_model.compile(loss=keras.losses.CategoricalCrossentropy(),
                     optimizer=keras.optimizers.Adam(1e-3),
                     metrics=['accuracy'])

# Copy weights from training
for prod_type, model in zip(loss_per_label.keys(), models):
    prod_type = unidecode(prod_type.strip().replace('#', '_'))
    
    merged_model.get_layer('lstm_'+prod_type).set_weights(model.get_layer('lstm').get_weights())
    merged_model.get_layer('attention_'+prod_type).set_weights(model.get_layer('attention').get_weights())
    merged_model.get_layer('linear_sigmoid_'+prod_type).set_weights(model.get_layer('linear_sigmoid').get_weights())

## Save model

In [14]:
merged_model.save('models/ACD_model.h5')