In [1]:
import os
import numpy as np
import pandas as pd

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
import seaborn as sns

In [2]:
path_prefix = '/media/juan/Juan/NLP/datasets/'

# emb_type = 'bert'
emb_type = 'doc2vec'

# lang = 'es'
lang = 'en'
# lang = 'fr'

X_train_path = os.path.join(path_prefix, 'X_train_' + emb_type + '_' + lang + '.npy')
y_train_path = os.path.join(path_prefix, 'y_train_' + emb_type + '_' + lang + '.npy')

X_val_path = os.path.join(path_prefix, 'X_val_' + emb_type + '_' + lang + '.npy')
y_val_path = os.path.join(path_prefix, 'y_val_' + emb_type + '_' + lang + '.npy')

X_test_path = os.path.join(path_prefix, 'X_test_' + emb_type + '_' + lang + '.npy')
y_test_path = os.path.join(path_prefix, 'y_test_' + emb_type + '_' + lang + '.npy')

In [3]:
X_train = np.load(X_train_path)
y_train = np.load(y_train_path)

X_val = np.load(X_val_path)
y_val = np.load(y_val_path)

X_test = np.load(X_test_path)
y_test = np.load(y_test_path)

In [None]:
model = keras.Sequential([
        layers.Dense(150, activation="relu", input_shape=(len(X_train), len(X_train[0]))),
        layers.Dropout(0.5),
        layers.Dense(3, activation='softmax')
        ])
model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.001),
    loss="categorical_crossentropy",
    metrics=[tf.keras.metrics.Accuracy(), tf.keras.metrics.Precision(), tf.keras.metrics.Recall()]
)
history = model.fit(np.array(X_train), np.array(y_train), 
                    validation_data=(np.array(X_val), np.array(y_val)), 
                    epochs=2000, verbose=2, batch_size=256, class_weight={0:1,1:1,2:1})
metrics = model.evaluate(X_test, y_test)
metrics.append(2*metrics[2]*metrics[3]/(metrics[2] + metrics[3]))
metrics_df = pd.DataFrame()
metrics_dict = {'lang': [lang], 'embedding': [emb_type], 'accuracy': [metrics[1]], 'precision': [metrics[2]], 'recall': [metrics[3]], 'f1_score': [metrics[4]]}
metrics_df = metrics_df.append([pd.DataFrame(data = metrics_dict)])
metrics_df.to_csv('./../../results/Classification/results' + lang + emb_type + '.csv')

Instructions for updating:
The `validate_indices` argument has no effect. Indices are always validated on CPU and never validated on GPU.
Epoch 1/2000
470/470 - 2s - loss: 0.4462 - accuracy: 6.0685e-04 - precision: 0.8254 - recall: 0.8156 - val_loss: 0.3690 - val_accuracy: 6.6596e-04 - val_precision: 0.8490 - val_recall: 0.8487
Epoch 2/2000
470/470 - 1s - loss: 0.3721 - accuracy: 0.0016 - precision: 0.8499 - recall: 0.8494 - val_loss: 0.3485 - val_accuracy: 0.0017 - val_precision: 0.8585 - val_recall: 0.8584
Epoch 3/2000
470/470 - 1s - loss: 0.3531 - accuracy: 0.0029 - precision: 0.8572 - recall: 0.8569 - val_loss: 0.3363 - val_accuracy: 0.0033 - val_precision: 0.8641 - val_recall: 0.8638
Epoch 4/2000
470/470 - 1s - loss: 0.3395 - accuracy: 0.0046 - precision: 0.8617 - recall: 0.8612 - val_loss: 0.3301 - val_accuracy: 0.0061 - val_precision: 0.8642 - val_recall: 0.8640
Epoch 5/2000
470/470 - 1s - loss: 0.3280 - accuracy: 0.0061 - precision: 0.8673 - recall: 0.8668 - val_loss: 0.3226 - 

In [None]:
plt.plot(history.history['val_loss'])
plt.plot(history.history['loss'])

In [None]:
plt.plot(history.history['val_accuracy'])
plt.plot(history.history['accuracy'])

In [None]:
y_pred = model.predict(X_test)
sns.heatmap(confusion_matrix(np.argmax(y_pred, axis =1),np.argmax(y_test, axis =1)), cmap='Blues')
plt.savefig('./../../results/Classification/' + lang + '_' + emb_type + '.png')

In [None]:
from collections import Counter
print(dict(Counter(np.argmax(y_train, axis=1))))
print(dict(Counter(np.argmax(y_val, axis=1))))
print(dict(Counter(np.argmax(y_test, axis=1))))