In [1]:
import sys
sys.path.append('../../')

In [2]:
import random
random.seed(4444)
import pickle
import numpy as np
np.random.seed(5555)
import tensorflow as tf
tf.set_random_seed(6666)
import pandas as pd
from utils.dataset import DataSet, LABELS
from models.own import OwnSingleModel
from keras.utils import to_categorical
from sklearn.metrics import f1_score, accuracy_score
from keras.utils import plot_model

from env import *

%matplotlib inline

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [3]:
WORD_SEQS_PATH = os.path.join(CACHES_PATH, 'word_seqs_sw', 'simple')

TRAIN_SEQS_PADDED_PKL = os.path.join(WORD_SEQS_PATH, 'train_seqs_padded.pkl')
VAL_SEQS_PADDED_PKL = os.path.join(WORD_SEQS_PATH, 'val_seqs_padded.pkl')
TEST_SEQS_PADDED_PKL = os.path.join(WORD_SEQS_PATH, 'test_seqs_padded.pkl')

VECTOR_DIM = 300
EMBEDDING_PKL = os.path.join(WORD_SEQS_PATH, 'wem_%d.pkl' % VECTOR_DIM)

MODEL_FILE = os.path.join(SAVED_MODELS_PATH, 'own', 'single.h5')
RESULT_CSV = os.path.join(RESULTS_PATH, 'own_single.csv')

In [4]:
raw_dataset = DataSet()
train, val, test = raw_dataset.train, raw_dataset.val, raw_dataset.test

with open(TRAIN_SEQS_PADDED_PKL, 'rb') as f:
    train_seqs_padded = pickle.load(f)
    
with open(VAL_SEQS_PADDED_PKL, 'rb') as f:
    val_seqs_padded = pickle.load(f)
    
with open(TEST_SEQS_PADDED_PKL, 'rb') as f:
    test_seqs_padded = pickle.load(f)
    
with open(EMBEDDING_PKL, 'rb') as f:
    embedding = pickle.load(f)

In [5]:
train_with_seq = pd.merge(train, train_seqs_padded, on='id')
val_with_seq = pd.merge(val, val_seqs_padded, on='id')
test_with_seq = pd.merge(test, test_seqs_padded, on='id')

seq = 'words_seq'
train_x = np.array(list(train_with_seq[seq]))
val_x = np.array(list(val_with_seq[seq]))
test_x = np.array(list(test_with_seq[seq]))

y_cols = LABELS
val_ys = []
train_outputs = []
val_outputs = []
for col in y_cols:
    train_y = train_with_seq[col] + 2
    val_y = val_with_seq[col] + 2
    val_ys.append(val_y)
    train_y_onehot = to_categorical(train_y)
    val_y_onehot = to_categorical(val_y)
    train_outputs.append(train_y_onehot)
    val_outputs.append(val_y_onehot)

In [6]:
model = OwnSingleModel(max_len = train_x.shape[1], embedding = embedding)
model._model.summary()
plot_model(model._model)

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 1123)         0                                            
__________________________________________________________________________________________________
embedding_1 (Embedding)         (None, 1123, 300)    15000000    input_1[0][0]                    
__________________________________________________________________________________________________
spatial_dropout1d_1 (SpatialDro (None, 1123, 300)    0           embedding_1[0][0]                
__________________________________________________________________________________________________
spatial_dropout1d_2 (SpatialDro (None, 1123, 300)    0           embedding_1[0][0]                
__________________________________________________________________________________________________
spatial_dr

In [7]:
model.fit(train_x, train_outputs, 
          validation_data = (val_x, val_outputs),
          model_file = MODEL_FILE
         )

Train on 105000 samples, validate on 15000 samples
Epoch 1/300


  'precision', 'predicted', average, warn_for)



Epoch 00001: _f1_monitor improved from -inf to 0.39928, saving model to /root/mounted/projects/ai_challenger_sentiment/data/saved_models/own/single.h5
Epoch 2/300

Epoch 00002: _f1_monitor improved from 0.39928 to 0.54936, saving model to /root/mounted/projects/ai_challenger_sentiment/data/saved_models/own/single.h5
Epoch 3/300

Epoch 00003: _f1_monitor improved from 0.54936 to 0.58868, saving model to /root/mounted/projects/ai_challenger_sentiment/data/saved_models/own/single.h5
Epoch 4/300

Epoch 00004: _f1_monitor improved from 0.58868 to 0.60647, saving model to /root/mounted/projects/ai_challenger_sentiment/data/saved_models/own/single.h5
Epoch 5/300

Epoch 00005: _f1_monitor improved from 0.60647 to 0.61391, saving model to /root/mounted/projects/ai_challenger_sentiment/data/saved_models/own/single.h5
Epoch 6/300

Epoch 00006: _f1_monitor improved from 0.61391 to 0.62105, saving model to /root/mounted/projects/ai_challenger_sentiment/data/saved_models/own/single.h5
Epoch 7/300




Epoch 00008: _f1_monitor improved from 0.62698 to 0.63029, saving model to /root/mounted/projects/ai_challenger_sentiment/data/saved_models/own/single.h5
Epoch 9/300

Epoch 00009: _f1_monitor improved from 0.63029 to 0.63639, saving model to /root/mounted/projects/ai_challenger_sentiment/data/saved_models/own/single.h5

Epoch 00009: ReduceLROnPlateau reducing learning rate to 0.00020000000949949026.
Epoch 10/300

Epoch 00010: _f1_monitor improved from 0.63639 to 0.63684, saving model to /root/mounted/projects/ai_challenger_sentiment/data/saved_models/own/single.h5
Epoch 11/300

Epoch 00011: _f1_monitor improved from 0.63684 to 0.63971, saving model to /root/mounted/projects/ai_challenger_sentiment/data/saved_models/own/single.h5

Epoch 00011: ReduceLROnPlateau reducing learning rate to 4.0000001899898055e-05.


In [8]:
model._model.load_weights(MODEL_FILE)
val_probs = model.predict(val_x)
test_probs = model.predict(test_x)

val_preds = list(map(lambda x: np.argmax(x, axis = -1), val_probs))
# val_preds = np.argmax(val_probs, axis = 1)
# test_preds = list(map(lambda x: np.argmax(x, axis = -1), test_probs))

In [9]:
f1s = 0
for i, (vy, vp) in enumerate(zip(val_ys, val_preds)):
    f1 = f1_score(vy, vp, average='macro')
    print("The %sth f1: %s" % (i, f1))
    f1s += f1
    
print("The average f1 of val is %s" % (f1s / len(y_cols)))

The 0th f1: 0.605466622247453
The 1th f1: 0.502159358188295
The 2th f1: 0.626253418681543
The 3th f1: 0.5627216442135571
The 4th f1: 0.76670829521768
The 5th f1: 0.5170212115127293
The 6th f1: 0.6424132286511315
The 7th f1: 0.7493463835096147
The 8th f1: 0.6883904199916078
The 9th f1: 0.5938356266491976
The 10th f1: 0.6806562906095373
The 11th f1: 0.7260506256647516
The 12th f1: 0.7250364915288556
The 13th f1: 0.6963565770584659
The 14th f1: 0.6591619739738395
The 15th f1: 0.687874474323913
The 16th f1: 0.46662034314991496
The 17th f1: 0.6724100280515015
The 18th f1: 0.5527347542662917
The 19th f1: 0.6730820552972366
The average f1 of val is 0.6397149911393558


  'precision', 'predicted', average, warn_for)
