In [1]:
from __future__ import print_function
import keras
import tensorflow as tf
import itertools
from keras.datasets import cifar10
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten, BatchNormalization
from keras.layers import Conv2D, MaxPooling2D
from keras.optimizers import SGD
from keras import regularizers
import numpy as np
from sklearn.linear_model import LogisticRegression as logreg
from sklearn.model_selection import train_test_split
import sklearn as sk
from sklearn.metrics import accuracy_score
import time
import statistics

Using TensorFlow backend.


##Unbiased Estimator

### MNIST Dataset

In [0]:
def prepare_dataset_cnn(X, y, img_size = (28, 28, 1)):
    # Reshape to image size & scale to [0,1]
    X = X.reshape(X.shape[0], 28,28,1)
    X = X.astype('float32')
    X = X/255.

    # Convert class vectors to binary class matrices.
    y = keras.utils.to_categorical(y, 2)
    
    return X, y

In [0]:
def clf_cnn_fit(Xtr, Str, Xval, Sval, epochs = 20, batch_size=32, shuffle=True, sample_weight = None):
    # Prepare the dataset for CNN model
    X, y = prepare_dataset_cnn(Xtr, Str)
    X_v, Y_v = prepare_dataset_cnn(Xval, Sval)
    # Create CNN topology
    model = Sequential()

    model.add(Conv2D(32, (3, 3), padding='same',
                 input_shape=X.shape[1:]))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Conv2D(64, (3, 3), padding='same'))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Flatten())
    model.add(Dense(512))
    model.add(Activation('relu'))
    model.add(Dense(2))
    model.add(Activation('softmax'))

    # initiate RMSprop optimizer
    opt = keras.optimizers.rmsprop(lr=0.0001, decay=1e-6)

    # Train the model using RMSprop
    model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])
    
    #Train model
    model.fit(X, y, epochs = epochs, batch_size=batch_size, shuffle=True, validation_data=(X_v, Y_v), sample_weight = sample_weight)
    return model

In [0]:
def loss_func_modif(S_train, rh0 = 0.2, rh1= 0.4):
  alpha = float(1 - rh1 + rh0) / 2
  sample_weight = np.ones(np.shape(S_train)) * (1 - alpha)
  sample_weight[S_train==0] = alpha
  sample_weight = np.ravel(sample_weight)
  return sample_weight

In [0]:
def kfold_Model_fit(X_train, S_train, X_test, Y_test, sample_weight_indic = False, kfold = 10):
  start_time= time.time()
  accuracy_test = np.zeros(kfold)
  S_train = np.ravel(S_train)
  for i in range(kfold):
    X_t, X_v, S_t, S_v= train_test_split(X_train, S_train, test_size = 0.2)
    if(sample_weight_indic == True):
      sample_weight = loss_func_modif(S_t)
    else:
      sample_weight = None
    clf= clf_cnn_fit(X_t, S_t, X_v, S_v, sample_weight = sample_weight)
    X_t, Y_t = prepare_dataset_cnn(X_test, Y_test)
    score = clf.evaluate(X_t, Y_t)
    accuracy_test[i] = score[1]
  print("Running time: "+ str(int(time.time()-start_time))+" seconds")
  return accuracy_test

In [0]:
def Prepare_datset(dataset):
    Xtr = dataset['Xtr']
    Str = dataset['Str']
    Xts = dataset['Xts']
    Yts = dataset['Yts']
    X_train= np.asarray(Xtr)
    S_train = np.asarray(Str)
    X_test= np.asarray(Xts)
    Y_test = np.asarray(Yts)
    S_train = np.ravel(S_train)
    Y_test = np.ravel(Y_test)
    return X_train, X_test, S_train, Y_test

In [0]:
dataset = np.load('mnist_dataset.npz')

### Baseline

In [8]:
X_train, X_test, S_train, Y_test = Prepare_datset(dataset)
accuracy_test = kfold_Model_fit(X_train, S_train, X_test, Y_test, False, 10)
print(np.mean(accuracy_test), statistics.stdev(accuracy_test))

Train on 8000 samples, validate on 2000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
None
Train on 8000 samples, validate on 2000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
None
Train on 8000 samples, validate on 2000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
None
Train on 8000 samples, validate on 2000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
E

### Label noise correction by weighted loss function

In [10]:
X_train, X_test, S_train, Y_test = Prepare_datset(dataset)
accuracy_test = kfold_Model_fit(X_train, S_train, X_test, Y_test, True, 10)
print(np.mean(accuracy_test), statistics.stdev(accuracy_test))

Train on 8000 samples, validate on 2000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Train on 8000 samples, validate on 2000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Train on 8000 samples, validate on 2000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Train on 8000 samples, validate on 2000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoc

### CIFAR Dataset

In [0]:
dataset = np.load('cifar_dataset.npz')

In [0]:
def prepare_dataset_cnn(X, y, img_size = (32, 32, 3)):
    # Reshape to image size & scale to [0,1]
    X = X.reshape(X.shape[0], 32, 32, 3)
    X = X.astype('float32')
    X = X/255.

    # Convert class vectors to binary class matrices.
    y = keras.utils.to_categorical(y, 2)
    
    return X, y

### Baseline

In [16]:
X_train, X_test, S_train, Y_test = Prepare_datset(dataset)
accuracy_test = kfold_Model_fit(X_train, S_train, X_test, Y_test, False, 10)
print(np.mean(accuracy_test), statistics.stdev(accuracy_test))

Train on 8000 samples, validate on 2000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
None
Train on 8000 samples, validate on 2000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
None
Train on 8000 samples, validate on 2000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
None
Train on 8000 samples, validate on 2000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
E

### Label noise correction by weighted loss function

In [17]:
X_train, X_test, S_train, Y_test = Prepare_datset(dataset)
accuracy_test = kfold_Model_fit(X_train, S_train, X_test, Y_test, True, 10)
print(np.mean(accuracy_test), statistics.stdev(accuracy_test))

Train on 8000 samples, validate on 2000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Train on 8000 samples, validate on 2000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Train on 8000 samples, validate on 2000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Train on 8000 samples, validate on 2000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoc