In [1]:
import os
import pickle
import numpy as np
import pandas as pd

from collections import Counter

from sklearn.metrics import f1_score, make_scorer, confusion_matrix, \
    classification_report
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.linear_model import LogisticRegression
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB, BernoulliNB
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split, cross_val_score, \
    StratifiedShuffleSplit, RandomizedSearchCV, GridSearchCV
from sklearn.preprocessing import LabelEncoder

from keras_preprocessing.sequence import pad_sequences
import tensorflow as tf

tf.logging.set_verbosity(tf.logging.INFO)

%matplotlib inline

  from ._conv import register_converters as _register_converters


# Data

In [2]:
with open("../Data/Learn/labels.pkl", "rb") as f:
    learn_labels = pickle.load(f)

with open("../Data/generated/my_learn_sequences.pkl", "rb") as f:
    learn_sequences = pickle.load(f)

with open("../Data/generated/my_embeddings.pkl", "rb") as f:
    embeddings = pickle.load(f)

# Remove unknown words row
embeddings = embeddings[:-1, :]

X_train, X_test, y_train, y_test = train_test_split(
    learn_sequences, learn_labels, test_size=0.3,
    shuffle=True, stratify=learn_labels, random_state=42
)

In [3]:
embeddings.shape

(28935, 300)

# CNN

In [33]:
class DNNModel(BaseEstimator, ClassifierMixin):
    def __init__(self,
                 sentence_length,
                 embeddings,
                 filters_by_ksize=5,
                 kernel_sizes=(2,),
                 batch_size=128,
                 learning_rate=0.1,
                 dropout_keep_prob=1.0,
                 model_name=None,
                 checkpoints_dir="../checkpoints/",
                 ):
        self.sentence_length = sentence_length
        self.embeddings = embeddings
        self.embedding_dim = self.embeddings.shape[1]
        self.filters_by_ksize = filters_by_ksize
        self.kernel_sizes = kernel_sizes
        self.batch_size = batch_size
        self.learning_rate = learning_rate
        self.dropout_keep_prob = dropout_keep_prob
        self.features_key = "x"
        self.weight_key = "weight"
        self.set_model_directory(checkpoints_dir, model_name)

    def set_model_directory(self, checkpoints_dir, model_name):
        if model_name is not None:
            self.model_dir = checkpoints_dir + model_name
            # Check model_dir doesn't already exist
            if os.path.exists(self.model_dir):
                raise ValueError("model_dir already exists")
        else:
            self.model_dir = None

    def check_warm_start(self, warm_start):
        if warm_start:
            # Check if model was already fitted
            try:
                self.classifier_
            except:
                warm_start = False
        return warm_start
    
    def create_dnn_classifier(self):
        # Columns of X
        self.feature_columns_ = [tf.feature_column.numeric_column(
            key=self.features_key, shape=self.sentence_length
        )]
        # Model parameters
        params = {
            "feature_columns": self.feature_columns_,
            "n_classes": self.n_classes_,
        }
        # Config
        run_config = tf.estimator.RunConfig(
            model_dir=self.model_dir,
            log_step_count_steps=10,
        )
        # Create model
        model = tf.estimator.Estimator(model_fn=self.model_fn,
                                       model_dir=self.model_dir,
                                       params=params,
                                       config=run_config)
        model = tf.contrib.estimator.add_metrics(model, self.f1_score)
        return model
        
    def f1_score(self, labels, predictions):
        return {"f1-score": self.f1_metric_fn(labels=labels, predictions=predictions)}
    
    def f1_metric_fn(self, labels, predictions):
        p, p_op = tf.metrics.precision(labels=labels, predictions=predictions)
        r, r_op = tf.metrics.recall(labels=labels, predictions=predictions)
        return 2 * p * r / (p + r), tf.group(p_op, r_op)        

    def input_fn(self, mode, X, y=None, num_epochs=1):
        if mode in [tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL]:
            shuffle = True
        else:
            shuffle, num_epochs, y = (False, 1, None)
        X = {self.features_key: X}
        return tf.estimator.inputs.numpy_input_fn(X, y, self.batch_size,
                                                  num_epochs, shuffle)

    def model_fn(self, features, labels, mode, params):
        # Network
        logits = self.network_fn(features, params)
        
        # Predict
        predicted_classes = tf.argmax(logits, 1)
        if mode == tf.estimator.ModeKeys.PREDICT:
            return tf.estimator.EstimatorSpec(mode, predictions=predicted_classes)        
        
        # Loss
        class_M = self.label_encoder_.transform(["M"])
        weights = tf.cast(tf.equal(labels, class_M), tf.float64)
        weights = tf.multiply(weights, (6.63 - 1)) + 1
        loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits,
                                                      weights=weights)
        
        # Eval
        if mode == tf.estimator.ModeKeys.EVAL:
            return tf.estimator.EstimatorSpec(mode, loss=loss, predictions=predicted_classes)
        
        # Train
        optimizer = tf.train.AdagradOptimizer(learning_rate=self.learning_rate)
        train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step())
        return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)
    
    def network_fn(self, features, params):
        # Create embedding matrix
        embeddings = tf.convert_to_tensor(self.embeddings)
        unknown_words_embedding = tf.Variable(tf.random_uniform(
            [1, self.embedding_dim], -1.0, 1.0, tf.float64), trainable=True)
        embeddings = tf.concat([embeddings, unknown_words_embedding], axis=0)
        
        # Extract sequences embeddings
        sequences = tf.feature_column.input_layer(features, params['feature_columns'])
        embeddings = tf.nn.embedding_lookup(embeddings, tf.cast(sequences, tf.int64))
        
        # Convolutions and max poolings
        feature_maps = []
        iterator = zip([self.filters_by_ksize] * len(self.kernel_sizes), self.kernel_sizes)
        for filters, kernel_size in iterator:
            tmp = tf.layers.conv1d(embeddings, filters, kernel_size, padding="same")
            tmp = tf.layers.max_pooling1d(tmp, [self.sentence_length], strides=1, 
                                          padding="valid")
            feature_maps.append(tmp)
        
        # Concat all feature maps and add softmax
        shape = [-1, self.filters_by_ksize * len(self.kernel_sizes)]
        feature_maps = tf.reshape(tf.concat(feature_maps, axis=2), shape)
        feature_maps = tf.nn.dropout(feature_maps, self.dropout_keep_prob)
        logits = tf.layers.dense(feature_maps, self.n_classes_, activation=None)
        return logits
    
    def fit_and_apply_transformers(self, X, y):
        X = pad_sequences(X, self.sentence_length)
        self.label_encoder_ = LabelEncoder()
        y = self.label_encoder_.fit_transform(y)
        self.n_classes_ = len(self.label_encoder_.classes_)
        return X, y

    def apply_transformers(self, X, y):
        X = pad_sequences(X, self.sentence_length)
        y = self.label_encoder_.transform(y)
        return X, y

    def fit(self, X, y, num_epochs=1, warm_start=True):
        warm_start = self.check_warm_start(warm_start)
        if not warm_start:
            X, y = self.fit_and_apply_transformers(X, y)
            self.classifier_ = self.create_dnn_classifier()
        else:
            X, y = self.apply_transformers(X, y)

        self.classifier_.train(self.input_fn(
            tf.estimator.ModeKeys.TRAIN, X, y, num_epochs))
        return self

    def predict(self, X):
        X = pad_sequences(X, self.sentence_length)
        classes = list(self.classifier_.predict(self.input_fn(
            tf.estimator.ModeKeys.PREDICT, X)))
        labels = self.label_encoder_.inverse_transform(classes)
        return labels
    
    def score(self, X, y):
        X, y = self.apply_transformers(X, y)
        results = self.classifier_.evaluate(self.input_fn(
            tf.estimator.ModeKeys.EVAL, X, y))
        return results["f1-score"]

# Hyperparameters

### kernel_sizes=(2,), filters_by_ksize=50

In [None]:
%%capture capt

for lr in np.logspace(-6, 0, 7):
    print("-" * 80 + "\nLearning rate :", lr)
    model = DNNModel(
        sentence_length=max(map(len, X_train)), 
        embeddings=embeddings,
        dropout_keep_prob=1.0,
        filters_by_ksize=50,
        kernel_sizes=(2,),
        learning_rate=lr,
    )
    for epoch in range(5):
        model.fit(X_train, y_train, num_epochs=1, warm_start=True)
        f1 = model.score(X_test, y_test)
        print("EPOCH %d: test f1-score = %.3f" % (epoch, f1))

In [39]:
capt.show()

--------------------------------------------------------------------------------
Learning rate : 1e-06
EPOCH 0: test f1-score = nan
EPOCH 1: test f1-score = nan
EPOCH 2: test f1-score = nan
EPOCH 3: test f1-score = nan
EPOCH 4: test f1-score = nan
--------------------------------------------------------------------------------
Learning rate : 1e-05
EPOCH 0: test f1-score = nan
EPOCH 1: test f1-score = nan
EPOCH 2: test f1-score = nan
EPOCH 3: test f1-score = nan
EPOCH 4: test f1-score = nan
--------------------------------------------------------------------------------
Learning rate : 0.0001
EPOCH 0: test f1-score = 0.225
EPOCH 1: test f1-score = 0.234
EPOCH 2: test f1-score = 0.235
EPOCH 3: test f1-score = 0.240
EPOCH 4: test f1-score = 0.243
--------------------------------------------------------------------------------
Learning rate : 0.001
EPOCH 0: test f1-score = 0.270
EPOCH 1: test f1-score = 0.285
EPOCH 2: test f1-score = 0.306
EPOCH 3: test f1-score = 0.321
EPOCH 4: test f1-s

Most stable: learning rate = 0.01

# Clean code

In [2]:
with open("../Data/Learn/labels.pkl", "rb") as f:
    learn_labels = pickle.load(f)

with open("../Data/generated/my_learn_sequences.pkl", "rb") as f:
    learn_sequences = pickle.load(f)

with open("../Data/generated/my_embeddings.pkl", "rb") as f:
    embeddings = pickle.load(f)

# Remove unknown words row
embeddings = embeddings[:-1, :]

# Different random state
X_train, X_test, y_train, y_test = train_test_split(
    learn_sequences, learn_labels, test_size=0.3,
    shuffle=True, stratify=learn_labels, random_state=42 + 1
)

In [3]:
cd ..

/home/omar/Notebooks/deeplearningtest


In [4]:
from cnn_model import CNNModel

In [7]:
model = CNNModel(sentence_length=max(map(len, X_train)), embeddings=embeddings)
model

CNNModel(batch_size=128, checkpoints_dir=None, dropout_keep_prob=1.0,
     embeddings=array([[ 0.01856,  0.06263, ..., -0.13433,  0.14924],
       [-0.07446, -0.01878, ..., -0.08351, -0.09113],
       ...,
       [-0.74394, -0.07888, ...,  0.59589, -0.16601],
       [ 0.01831,  0.3224 , ..., -0.15861, -0.00104]]),
     filters_by_ksize=50, kernel_sizes=(2,), learning_rate=0.01,
     model_name=None, sentence_length=379)

In [8]:
model.fit(X_train, y_train, num_epochs=5)
model.score(X_test, y_test)
print(classification_report(y_test, model.predict(X_test)))

INFO:tensorflow:Using config: {'_model_dir': '/home/omar/tmp/tmphtt2bsg4', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 10, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7ff7603fd898>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
INFO:tensorflow:Using config: {'_model_dir': '/home/omar/tmp/tmphtt2bsg4', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_ch

INFO:tensorflow:loss = 0.9160079956054688, step = 571 (1.688 sec)
INFO:tensorflow:global_step/sec: 5.94085
INFO:tensorflow:loss = 1.03570556640625, step = 581 (1.683 sec)
INFO:tensorflow:global_step/sec: 5.95396
INFO:tensorflow:loss = 0.9967358112335205, step = 591 (1.680 sec)
INFO:tensorflow:global_step/sec: 5.96804
INFO:tensorflow:loss = 1.0164092779159546, step = 601 (1.676 sec)
INFO:tensorflow:global_step/sec: 5.95963
INFO:tensorflow:loss = 0.8200972080230713, step = 611 (1.678 sec)
INFO:tensorflow:global_step/sec: 5.93108
INFO:tensorflow:loss = 0.9402025938034058, step = 621 (1.686 sec)
INFO:tensorflow:global_step/sec: 5.95099
INFO:tensorflow:loss = 0.9440509676933289, step = 631 (1.680 sec)
INFO:tensorflow:global_step/sec: 5.98116
INFO:tensorflow:loss = 1.0836787223815918, step = 641 (1.672 sec)
INFO:tensorflow:global_step/sec: 5.92209
INFO:tensorflow:loss = 0.7479866147041321, step = 651 (1.689 sec)
INFO:tensorflow:global_step/sec: 5.90614
INFO:tensorflow:loss = 0.69965124130249

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /home/omar/tmp/tmphtt2bsg4/model.ckpt-1256
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
              precision    recall  f1-score   support

           C       0.94      0.81      0.87     11974
           M       0.35      0.68      0.47      1805

   micro avg       0.80      0.80      0.80     13779
   macro avg       0.65      0.75      0.67     13779
weighted avg       0.87      0.80      0.82     13779

