## Acceso a drive

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


## Bibliotecas necesarias

In [2]:
!pip3 install pickle5



In [3]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import Input
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adadelta
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.utils import plot_model
from tensorflow.keras.utils import to_categorical
from gensim.models import KeyedVectors
import tensorflow.keras.backend as K
import numpy as np
import io
import datetime
import pickle5 as pickle
from sklearn.metrics import classification_report

## Definición del modelo

In [4]:
class CNN:
  def __init__(self, input_dim, embedding_matrix, input_length=300):
    output_dim = 300
    input = Input(shape=(input_length,), name='input_1')
    embeddings = layers.Embedding(input_dim+1,output_dim,input_length=input_length, weights=[embedding_matrix], trainable=False)
    conv1D = layers.Conv1D(128, 5, activation='relu', name='Conv1D_layer')

    embedding_output = embeddings(input)
    conv1D_output = conv1D(embedding_output)

    conv1D_output = layers.GlobalMaxPooling1D()(conv1D_output)
    
    predictions = layers.Dense(23, activation='softmax', name='dense_layer')(conv1D_output)
    self.model = Model([input],predictions)
    self.__compile()
    plot_model(self.model, to_file='/content/drive/My Drive/dicode/Backtranslation/cnn_model.png', show_shapes=True, show_dtype=True)
    print(self.model.summary())
  
  def __compile(self):
      self.model.compile(loss = tf.keras.losses.CategoricalCrossentropy() , optimizer='adam', metrics=['accuracy'])
    
  def fit(self, input, targets, epochs=5, batch_size=128):
      early_stopping_monitor = EarlyStopping(monitor = 'val_loss',patience = 20, min_delta=0.00001)
      callbacks = [early_stopping_monitor]
      history = self.model.fit([input], targets, epochs = epochs, batch_size = batch_size, callbacks = callbacks)
    
  def predict(self, input):
      return self.model.predict([input])
        
  def evaluate(self, input, targets, batch_size=128):
      return self.model.evaluate([input], targets, batch_size=batch_size)


## Entrenamiento y evaluación

In [5]:
def load_data(path):
  data = np.load(path)
  return data['arr_0']

In [6]:
for fold in range(1,6):
  x_train = load_data('/content/drive/My Drive/dicode/Backtranslation/K-Folds/{}-fold/x_train.npz'.format(fold))
  y_train = load_data('/content/drive/My Drive/dicode/Backtranslation/K-Folds/{}-fold/y_train.npz'.format(fold))
  x_test = load_data('/content/drive/My Drive/dicode/Backtranslation/K-Folds/{}-fold/x_test.npz'.format(fold))
  y_test = load_data('/content/drive/My Drive/dicode/Backtranslation/K-Folds/{}-fold/y_test.npz'.format(fold))
  embedding_matrix = load_data('/content/drive/My Drive/dicode/Backtranslation/K-Folds/{}-fold/embedding_matrix.npz'.format(fold))
  with open('/content/drive/My Drive/dicode/Backtranslation/K-Folds/{}-fold/tokenizer.pickle'.format(fold), 'rb') as handle:
    tokenizer = pickle.load(handle)
  input_dim = len(tokenizer.word_index)
  cnn_model = CNN(input_dim=input_dim, embedding_matrix=embedding_matrix)
  cnn_model.fit(x_train, y_train, epochs=10)
  cnn_model.evaluate(x_test, y_test)
  y_pred = cnn_model.predict(x_test)
  y_predicted = np.argmax(y_pred, axis=-1)
  y_true = np.argmax(y_test, axis=-1)
  report = classification_report(y_true, y_predicted)
  print(report)
  output_file = open('/content/drive/My Drive/dicode/Backtranslation/K-Folds/{}-fold/classification_report_cnn.txt'.format(fold), 'w',encoding='utf8')


Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 300)]             0         
_________________________________________________________________
embedding (Embedding)        (None, 300, 300)          44141400  
_________________________________________________________________
Conv1D_layer (Conv1D)        (None, 296, 128)          192128    
_________________________________________________________________
global_max_pooling1d (Global (None, 128)               0         
_________________________________________________________________
dense_layer (Dense)          (None, 23)                2967      
Total params: 44,336,495
Trainable params: 195,095
Non-trainable params: 44,141,400
_________________________________________________________________
None
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch