In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import confusion_matrix 

import sys
sys.path.append('src/')
from get_network import get_network
from training_utils import train
from get_data import get_data

import keras
from keras.models import Sequential
from keras.layers import Dense, Activation, BatchNormalization, Dropout

import matplotlib.pyplot as plt
%matplotlib inline

# Load data

In [None]:
X_test, Y_test, X_val, Y_val, X_train, Y_train = get_data()

# number of samples in each set
print(len(X_train), len(X_val), len(X_test))

In [None]:
# number of features and number of classes
print(X_train.shape[1], np.unique(Y_train).shape[0])

# Add label noise to the data

In [None]:
# proportion of noisy labels
p = 0.3

# number of samples in the train set
n_samples = len(X_train)

# choose which samples will have noisy labels
is_noisy = np.random.choice([0, 1], size=(n_samples,), p=[1.0 - p, p]).astype('bool')

# number of samples with noisy labels
n_noisy_samples = is_noisy.sum()

all_classes = np.unique(Y_train)

In [None]:
# create noisy labels
Y_train_noisy = np.zeros_like(Y_train)

# random uniform noise
Y_train_noisy[is_noisy] = np.random.choice(all_classes, size=n_noisy_samples)

# original labels
Y_train_noisy[~is_noisy] = Y_train[~is_noisy]

In [None]:
# original distribution of classes
np.unique(Y_train, return_counts=True)

In [None]:
# after some labels are flipped
np.unique(Y_train_noisy, return_counts=True)

In [None]:
ohe = OneHotEncoder(sparse=False, dtype='float32')
ohe.fit(Y_train.reshape(-1, 1))

Y_test = ohe.transform(Y_test.reshape(-1, 1))
Y_val = ohe.transform(Y_val.reshape(-1, 1))
Y_train = ohe.transform(Y_train.reshape(-1, 1))
Y_train_noisy = ohe.transform(Y_train_noisy.reshape(-1, 1))

# Train a usual neural network

In [None]:
def simple_net():
    model = Sequential([
        #Dropout(0.1, input_shape=(54,)),
        Dense(100, input_shape=(54,)),
        BatchNormalization(),
        Activation('relu'),

        #Dropout(0.1),
        Dense(100),
        BatchNormalization(),
        Activation('relu'),

        Dense(7),
        Activation('softmax'),
    ])
    model.compile(
        optimizer=keras.optimizers.Adam(1e-3),
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )
    return model

In [None]:
from keras.callbacks import EarlyStopping

In [None]:
model1 = simple_net()

# train on original train dataset
model1.fit(
    X_train, Y_train, epochs=100, batch_size=128, 
    validation_data=(X_val, Y_val), verbose=0,
    callbacks=[EarlyStopping('val_acc', patience=10, verbose=1)]
);

model1.evaluate(X_train, Y_train, verbose=0)

In [None]:
model2 = simple_net()

# train on train dataset where some labels are corrupted
model2.fit(
    X_train, Y_train_noisy, epochs=100, batch_size=128, 
    validation_data=(X_val, Y_val), verbose=0,
    callbacks=[EarlyStopping('val_acc', patience=10, verbose=1)]
)

model2.evaluate(X_train, Y_train_noisy, verbose=0)

In [None]:
model1.evaluate(X_val, Y_val, verbose=0)

In [None]:
model2.evaluate(X_val, Y_val, verbose=0)

# Compute confusion matrix

In [None]:
np.set_printoptions(precision=3, linewidth=120, suppress=True)

In [None]:
Y_val_pred = model2.predict_proba(X_val, verbose=0)

In [None]:
conf = confusion_matrix(Y_val.argmax(1), Y_val_pred.argmax(1))
conf

In [None]:
conf = conf + 1
conf_norm = conf.T/(conf.sum(1))
conf_norm = conf_norm.T # p(j|i)

In [None]:
conf_norm

In [None]:
# ideas:
# 1. overfit with big nn, then use smaller
# 2.

# Train with noise adaptation layer

In [None]:
# number of batches in the train set
print(len(X_train)/128)

In [None]:
graph, ops = get_network(
    initial_bias=np.log(conf_norm).astype('float32'),
    architecture=[54, 100, 100, 7], 
    dropout=[0.0, 0.0, 0.1], 
    optimizer=tf.train.AdamOptimizer(1e-3),
    weight_decay=1e-4
)

losses1, losses2 = train(
    0, graph, ops, X_train, Y_train_noisy, X_val, Y_val, 
    batch_size=128, num_epochs1=60, num_epochs2=50, steps_per_epoch=130, 
    validation_steps=27, patience=10
)

In [None]:
plt.plot([x[0] for x in losses1], label='train');
plt.plot([x[1] for x in losses1], label='test');
plt.legend();

In [None]:
plt.plot([x[0] for x in losses2], label='train');
plt.plot([x[1] for x in losses2], label='test');
plt.legend();