In [82]:
import tensorflow as tf
import os
import numpy as np
# from copy import deepcopy

from keras.utils import np_utils
from keras.models import Sequential
from keras.layers import Dense, Activation, Conv2D, MaxPooling2D, Flatten, Dropout
from keras.optimizers import Adam
from keras import backend as K
import functools
from collections import Counter

In [83]:
# load data from CIFAR10
def load_data(clean_data_size):
    cifar10 = tf.keras.datasets.cifar10
    (x_train, y_train), (x_test, y_test) = cifar10.load_data()
    x_train, x_test = x_train / 255.0, x_test / 255.0
    x_train = x_train.reshape(x_train.shape[0], 32, 32, 3)
    x_test = x_test.reshape(x_test.shape[0], 32, 32, 3)

    # transform labels to one-hot vectors
    y_train = tf.contrib.keras.utils.to_categorical(y_train, 10)
    y_test = tf.contrib.keras.utils.to_categorical(y_test, 10)
    
    clean_index = []
    for label in range(10):
        positive_index = list(np.where(y_train[:, label] == 1)[0])        
        clean_index = np.append(clean_index, np.random.choice(positive_index, clean_data_size, replace=False)).astype(int)
    
    x_clean = x_train[clean_index]
    y_clean = y_train[clean_index]
    x_train = np.delete(x_train, clean_index, axis=0)
    y_train = np.delete(y_train, clean_index, axis=0)
    return x_train, y_train, x_test, y_test, x_clean, y_clean


def generate_noise_labels(y_train, noise_level):
    num_noise = int(noise_level * y_train.shape[0])
    noise_index = np.random.choice(y_train.shape[0], num_noise, replace=False)
    label_slice = np.argmax(y_train[noise_index], axis=1)
    new_label = np.random.randint(low=0, high=10, size=num_noise)
    while sum(label_slice == new_label) > 0:
        n = sum(label_slice == new_label)
        new_label[label_slice == new_label] = np.random.randint(low=0, high=10, size=n)
    y_train[noise_index] = tf.contrib.keras.utils.to_categorical(new_label, 10)
    return y_train


In [84]:
noise_level = 0.8
clean_data_size = 50

x_train, y_train, x_test, y_test, x_clean, y_clean = load_data(clean_data_size)
y_train = generate_noise_labels(y_train, noise_level)


In [85]:
def create_model(architecture, num_classes, learning_rate=0.0002, dropout=0.5):
    model = Sequential()
    for layer_index in range(len(architecture)):
        layer = architecture[layer_index]
        if len(layer) == 3:
            if layer_index == 0:
                model.add(Conv2D(layer[0], kernel_size=(layer[1], layer[2]), input_shape=(32, 32, 3),
                                 kernel_initializer='glorot_normal', activation='relu', padding='same'))
            else:
                model.add(Conv2D(layer[0], kernel_size=(layer[1], layer[2]), kernel_initializer='glorot_normal',
                                 activation='relu', padding='same'))
            if layer_index < 3:
                model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
        elif len(layer) == 1:
            if len(architecture[layer_index - 1]) == 3:
                model.add(Flatten())
            model.add(Dense(layer[0], activation='relu', kernel_initializer='glorot_normal'))
        else:
            print('Invalid architecture /(ㄒoㄒ)/~~')
    model.add(Dropout(dropout))
    if num_classes > 2:
        model.add(Dense(num_classes))
        model.add(Activation('softmax'))
        adam = Adam(lr=learning_rate)
        model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer=adam)
    elif num_classes == 2:
        model.add(Dense(1))
        model.add(Activation('sigmoid'))
        adam = Adam(lr=learning_rate)
        model.compile(loss='mean_squared_error', metrics=['accuracy'], optimizer=adam)   
    return model

In [86]:
# generate 10 binary classifier
binary_classifier_list = []
architecture = [[32, 5, 5], [32, 5, 5], [32, 5, 5], [256]]
for label in range(10):
    model = create_model(architecture, num_classes=2)
    binary_classifier_list.append(model)
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_58 (Conv2D)           (None, 32, 32, 32)        2432      
_________________________________________________________________
max_pooling2d_58 (MaxPooling (None, 16, 16, 32)        0         
_________________________________________________________________
conv2d_59 (Conv2D)           (None, 16, 16, 32)        25632     
_________________________________________________________________
max_pooling2d_59 (MaxPooling (None, 8, 8, 32)          0         
_________________________________________________________________
conv2d_60 (Conv2D)           (None, 8, 8, 32)          25632     
_________________________________________________________________
max_pooling2d_60 (MaxPooling (None, 4, 4, 32)          0         
_________________________________________________________________
flatten_20 (Flatten)         (None, 512)               0         
__________

In [None]:
# use the idea of PU learning to augment positive data
epochs = 20
additional_data_index = [[] for i in range(10)]
for epoch in range(epochs):
    for label in range(10):
        positive_index = list(np.where(y_clean[:, label] == 1)[0])
        x = x_clean[positive_index]
        x = np.concatenate((x, x_train[additional_data_index[label]]), axis=0)
        n_p = len(x)
        n_n = min(400, n_p)
        negative_index = list(np.where(y_clean[:, label] != 1)[0])
        negative_index = np.random.choice(negative_index, n_n, replace=False)
        x = np.concatenate((x, x_clean[negative_index]), axis=0)
        y = [1] * n_p + [0] * n_n
        classifier = binary_classifier_list[label]
        classifier.fit(x, y, batch_size=32, epochs=20, shuffle=True)
        pred_train = classifier.predict(x_train)
        candidate_index = np.where(pred_train > 0.98)[0]
        if len(candidate_index) < 1000:
            additional_data_index[label] = list(candidate_index)
        else:
            additional_data_index[label] = np.argsort(-pred_train, axis=0)[0:1000].reshape(-1)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [None]:
# get additional data and train teacher model
for label in range(10):
    index = additional_data_index[label]
    x_clean = np.concatenate((x_clean, x_train[index]), axis=0)
    y_clean = np.concatenate((y_clean, tf.contrib.keras.utils.to_categorical([label]*len(index), 10)))

architecture = [[32, 5, 5], [32, 5, 5], [32, 5, 5], [500]]
teacher_model = create_model(architecture, num_classes=10)
teacher_model.fit(x_clean, y_clean, batch_size=64, epochs=50, shuffle=True)

In [None]:
# calculate lambda

In [None]:
# generate a multi-classifier
architecture = [[32, 5, 5], [32, 5, 5], [32, 5, 5], [500]]
student_model = create_model(architecture, num_classes=10)

In [None]:
# train