In [1]:
import argparse
import os
import time
import math
import json
import numpy as np
# from PIL import Image
import socket
import torchvision.transforms as transforms
from scipy.special import softmax
from tensorflow.keras import datasets, layers, models, activations
import tensorflow.keras.backend as K
import tensorflow as tf
from load_corrupted_data import CIFAR10, CIFAR100

# note: nosgdr, schedule, and epochs are highly related settings

parser = argparse.ArgumentParser(description='Trains WideResNet on CIFAR',
                                 formatter_class=argparse.ArgumentDefaultsHelpFormatter)
# Optimization options
parser.add_argument('--gold_fraction', '-gf', type=float, default=0.1, help='What fraction of the data should be trusted?')
parser.add_argument('--corruption_prob', '-cprob', type=float, default=0.5, help='The label corruption probability.')
parser.add_argument('--corruption_type', '-ctype', type=str, default='flip', help='Type of corruption ("unif" or "flip").')

# random seed
parser.add_argument('--seed', type=int, default=1)
args = parser.parse_args("")
mean = [x / 255 for x in [125.3, 123.0, 113.9]]
std = [x / 255 for x in [63.0, 62.1, 66.7]]

train_transform = transforms.Compose(
    [transforms.RandomHorizontalFlip(), transforms.RandomCrop(32, padding=4), transforms.ToTensor(),
     transforms.Normalize(mean, std)])
test_transform = transforms.Compose(
    [transforms.ToTensor(), transforms.Normalize(mean, std)])

gold_fraction = args.gold_fraction
corruption_prob = args.corruption_prob
corruption_type = args.corruption_type
train_data_gold = CIFAR10(
        'data', True, True, gold_fraction, corruption_prob, corruption_type,
        transform=train_transform, download=True)
train_data_silver = CIFAR10(
        'data', True, False, gold_fraction, corruption_prob, corruption_type,
        transform=train_transform, download=True, shuffle_indices=train_data_gold.shuffle_indices)
train_data_gold_deterministic = CIFAR10(
        'data', True, True, gold_fraction, corruption_prob, corruption_type,
        transform=test_transform, download=True, shuffle_indices=train_data_gold.shuffle_indices)
train_all_images = np.vstack((train_data_gold.train_data, train_data_silver.train_data))
train_all_labels = np.array(train_data_gold.train_labels + train_data_silver.train_labels)
train_all_labels_ = np.array([x -10 for x in train_data_gold.train_labels] + train_data_silver.train_labels)

i = np.random.permutation(len(train_all_images))
train_all_images, train_all_labels, train_all_labels_ = train_all_images[i], train_all_labels[i], train_all_labels_[i]


test_data = CIFAR10('data', train=False, transform=test_transform, download=True)
num_classes = 10

def make_model():
    model = models.Sequential()
    model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 3), kernel_initializer='random_normal', bias_initializer='zeros'))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Conv2D(64, (3, 3), activation='relu', kernel_initializer='random_normal', bias_initializer='zeros'))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Conv2D(64, (3, 3), activation='relu', kernel_initializer='random_normal', bias_initializer='zeros'))
    model.add(layers.Flatten())
    model.add(layers.Dense(64, activation='relu', kernel_initializer='random_normal', bias_initializer='zeros'))
    model.add(layers.Dense(10, kernel_initializer='random_normal', bias_initializer='zeros'))
    model.add(layers.Activation(activations.linear))
    return model

# Train Silver model first 

def sparse_cat_entropy_loss(y_true, y_pred):
    y_pred = K.softmax(y_pred)
    y_true = K.cast(K.one_hot(K.cast(y_true[:, 0], tf.int32), num_classes), tf.float32)
    
    # scale predictions so that the class probas of each sample sum to 1
    y_pred /= K.sum(y_pred, axis=-1, keepdims=True)
    y_pred = K.clip(y_pred, K.epsilon(), 1 - K.epsilon())
    loss = y_true * K.log(y_pred)# * weights
    loss = -K.sum(loss, -1)
    return loss

base_model = make_model()

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


In [2]:
model_s = tf.keras.models.clone_model(base_model)
model_s.compile(optimizer='adam', loss=sparse_cat_entropy_loss,
              metrics=['sparse_categorical_accuracy'])

print('\n Silver model \n')
history_s = model_s.fit(train_data_silver.train_data, train_data_silver.train_labels, epochs=7,
                    validation_data=(test_data.test_data, test_data.test_labels))

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor

 Silver model 

Train on 45000 samples, validate on 10000 samples
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Epoch 1/7
Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7


In [3]:
# Form corruption matrix on silver model
from scipy.special import softmax
from scipy.optimize import minimize_scalar

def confidence_calibration(logits, y):
       """
       Confidence calibration of the logits outside of tensorflow, based on the paper https://arxiv.org/pdf/1706.04599.pdf
       Done using the method : Temperature Scaling 
       :param logits : Logits from neural network
       :type logits : Pandas DataFrame / Numpy array
       :param y : Multi/Single label Y
       :type y : Pandas DataFrame
       :return : Temperature Value, that will be used to scale the logits by
       :rtype : Float 
       """
       logits = np.array(logits)
       y = np.array(y.tolist()).astype('float32')
       
       def categorical_crossentropy_multi(y_true, y_pred):
              # y_true, y_pred -> numberOfExamples, dimensionSize
              losses = -1*np.log(np.maximum(np.sum(np.array(y_true) * np.array(y_pred), axis=1),1e-7)) # ignore '0'
              loss = np.mean(losses)
              return loss
       def temperature_scaling(x):
              """
              Temperature scaling for BERT.
              logits : Logits : N, M : N = number of examples, M = number of classes
              y : Target variable, N, M 
              x : Temperature 
              """
              scaled_logits = logits*1./x
              return categorical_crossentropy_multi(y, softmax(scaled_logits, axis=-1))
       res = minimize_scalar(temperature_scaling)
       return res.x

logits = model_s.predict(train_data_gold_deterministic.train_data)
gold_labels = np.array([x -10 for x in train_data_gold_deterministic.train_labels])
gold_one_hot = np.zeros((gold_labels.size, 10))
gold_one_hot[np.arange(gold_labels.size), gold_labels] = 1
T = confidence_calibration(logits, gold_one_hot)
matrices = []

logits_scaled = logits/T
probs = softmax(logits_scaled, axis=-1)

corruption_matrix = np.zeros((num_classes, num_classes))
label_count = np.zeros(num_classes)
for i, g_label in enumerate(train_data_gold_deterministic.train_labels):
    corruption_matrix[g_label-10] += probs[i] # -10 because added +10 to differentiate from silver labels
    label_count[g_label-10] += 1

corruption_matrix = corruption_matrix/label_count[:, np.newaxis] 
matrices.append(corruption_matrix)

  return np.exp(x - logsumexp(x, axis=axis, keepdims=True))


## Two different implementations of GLC

### Version 1:

- implements it by taking the corruption matrix, transposing it & indexing the matrix according to the labels generated
- the resulting matrix is multiplied with the probabilties matrix to give the corrected probabilties

In [4]:
# Define GLC loss
for i, corruption_matrix in enumerate(matrices):
    def glc_loss(y_true, y_pred):
        y_pred = tf.nn.softmax(y_pred)
        silver_gold_ids = y_true[:, 0]>=10
        silver_gold_ids = tf.cast(silver_gold_ids, tf.int32)
        y_true_ = tf.where(tf.cast(silver_gold_ids, tf.bool), y_true[:, 0]-10, y_true[:, 0])
        corruption_matrix_ = tf.gather(tf.cast(corruption_matrix.T, tf.float32), tf.cast(y_true_, tf.int32))

        y_true = tf.cast(tf.one_hot(tf.cast(y_true_, tf.int32), num_classes), tf.float32)
        y_pred = tf.clip_by_value(y_pred, K.epsilon(), 1-K.epsilon())
        probabilities = y_pred
        one_hot_labels = y_true

        probabilities_corrected = probabilities * corruption_matrix_
#         probabilities_corrected = probabilities_corrected/tf.math.reduce_sum(probabilities_corrected, axis=1, keepdims=True) 
        probabilities_corrected = tf.clip_by_value(probabilities_corrected, K.epsilon(), 1-K.epsilon())

        silver_gold_ids_ = tf.expand_dims(silver_gold_ids, 1)
        mix_loss_s = -tf.reduce_sum(one_hot_labels * tf.cast(tf.math.logical_not(tf.cast(silver_gold_ids_, tf.bool)), tf.float32) * tf.log(probabilities_corrected), axis=-1)
        mix_loss_g = -tf.reduce_sum(one_hot_labels * tf.cast(silver_gold_ids_, tf.float32) * tf.log(probabilities), axis=-1)
        mix_loss = mix_loss_s + mix_loss_g
        per_example_loss =  mix_loss
        return per_example_loss

    model_glc = tf.keras.models.clone_model(base_model)
    model_glc.compile(optimizer='adam', loss=glc_loss,
                  metrics=['sparse_categorical_accuracy'])

    # Define GLC model

    print('\n GLC model \n')
    history_glc = model_glc.fit(train_all_images, train_all_labels, epochs=7,
                        validation_data=(test_data.test_data, test_data.test_labels))
    
    # 60.39, came with really concentrated matrices, not spread out


 GLC model 

Train on 50000 samples, validate on 10000 samples
Epoch 1/7
Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7


In [5]:
np.round(matrices[0], 2)

array([[0.29, 0.03, 0.09, 0.02, 0.06, 0.01, 0.35, 0.02, 0.07, 0.05],
       [0.04, 0.3 , 0.03, 0.01, 0.12, 0.01, 0.04, 0.01, 0.03, 0.42],
       [0.15, 0.05, 0.18, 0.05, 0.08, 0.06, 0.28, 0.04, 0.07, 0.03],
       [0.17, 0.1 , 0.07, 0.14, 0.08, 0.12, 0.15, 0.05, 0.07, 0.04],
       [0.21, 0.06, 0.08, 0.05, 0.19, 0.06, 0.18, 0.07, 0.06, 0.03],
       [0.12, 0.17, 0.05, 0.1 , 0.05, 0.24, 0.14, 0.06, 0.04, 0.03],
       [0.09, 0.05, 0.08, 0.04, 0.05, 0.04, 0.32, 0.02, 0.28, 0.04],
       [0.1 , 0.05, 0.04, 0.04, 0.08, 0.06, 0.35, 0.24, 0.02, 0.03],
       [0.09, 0.04, 0.3 , 0.02, 0.04, 0.01, 0.11, 0.01, 0.32, 0.06],
       [0.06, 0.09, 0.04, 0.02, 0.31, 0.01, 0.07, 0.02, 0.03, 0.36]])

###  Version 2

Implements by 

- Doing a dot product of the corruption matrix and probabilities to give the corrected probabilties 

In [6]:
# Define GLC loss, scenario 1 
for i, corruption_matrix in enumerate(matrices):
    def glc_loss_(y_true, y_pred):
        y_pred = tf.nn.softmax(y_pred)
        silver_gold_ids = y_true[:, 0]>=10
        silver_gold_ids = tf.cast(silver_gold_ids, tf.int32)
        y_true_ = tf.where(tf.cast(silver_gold_ids, tf.bool), y_true[:, 0]-10, y_true[:, 0])

        y_true = tf.cast(tf.one_hot(tf.cast(y_true_, tf.int32), num_classes), tf.float32)
        y_pred = tf.clip_by_value(y_pred, K.epsilon(), 1-K.epsilon())
        probabilities = y_pred
        one_hot_labels = y_true
        
        probabilities_corrected = tf.linalg.matmul(probabilities, tf.cast(corruption_matrix, tf.float32))

        silver_gold_ids_ = tf.expand_dims(silver_gold_ids, 1)
        mix_loss_s = -tf.reduce_sum(one_hot_labels * tf.cast(tf.math.logical_not(tf.cast(silver_gold_ids_, tf.bool)), tf.float32) * tf.log(probabilities_corrected), axis=-1)
        mix_loss_g = -tf.reduce_sum(one_hot_labels * tf.cast(silver_gold_ids_, tf.float32) * tf.log(probabilities), axis=-1)
        mix_loss = mix_loss_s + mix_loss_g
        per_example_loss =  mix_loss
        return per_example_loss

    model_glc_ = tf.keras.models.clone_model(base_model)
    model_glc_.compile(optimizer='adam', loss=glc_loss_,
                  metrics=['sparse_categorical_accuracy'])

    # Define GLC model

    print('\n GLC model \n')
    history_glc_ = model_glc_.fit(train_all_images, train_all_labels, epochs=7,
                        validation_data=(test_data.test_data, test_data.test_labels))
    
    # 60.39, came with really concentrated matrices, not spread out


 GLC model 

Train on 50000 samples, validate on 10000 samples
Epoch 1/7
Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7


In [7]:
# Compare against non-GLC model

print('\n Non-GLC model \n')

model_n = tf.keras.models.clone_model(base_model)
model_n.compile(optimizer='adam', loss=sparse_cat_entropy_loss,
              metrics=['sparse_categorical_accuracy'])
history_n = model_n.fit(train_all_images, train_all_labels_, epochs=7,
                    validation_data=(test_data.test_data, test_data.test_labels))


 Non-GLC model 

Train on 50000 samples, validate on 10000 samples
Epoch 1/7
Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7


In [8]:
normal_model_acc = model_n.evaluate(test_data.test_data, test_data.test_labels)
normal_model_acc



[1.6771817258834838, 0.4114]

In [9]:
glc_model_acc = model_glc.evaluate(test_data.test_data, test_data.test_labels)
glc_model_acc



[3.004318510437012, 0.3939]

In [10]:
glc_model_acc_ = model_glc_.evaluate(test_data.test_data, test_data.test_labels)
glc_model_acc_



[1.8666670642852783, 0.5587]

### For values:

- Gold Fraction : 0.1
- Corruption Probability : 0.5
- Corruption Type : flip 

#### Accuracies:

- GLC model : 56%
- Normal model : 41%

In [12]:
# proof of working
y_true = tf.constant([[11], [0]])
corruption_matrix = tf.constant([[0.6, 0.4], [0.7, 0.3]])
y_pred = tf.constant([[0.9, 0.1], [0.8, 0.2]])
silver_gold_ids = y_true[:, 0]>=10
silver_gold_ids = tf.cast(silver_gold_ids, tf.int32)
y_true_ = tf.where(tf.cast(silver_gold_ids, tf.bool), y_true[:, 0]-10, y_true[:, 0])
                              
y_true = tf.cast(tf.one_hot(tf.cast(y_true_, tf.int32), 2), tf.float32)
y_pred = tf.clip_by_value(y_pred, K.epsilon(), 1-K.epsilon())
probabilities = y_pred
one_hot_labels = y_true

probabilities_corrected = tf.linalg.matmul(probabilities, tf.cast(corruption_matrix, tf.float32))
probabilities_corrected = tf.clip_by_value(probabilities_corrected, K.epsilon(), 1-K.epsilon())

silver_gold_ids_ = tf.expand_dims(silver_gold_ids, 1)
mix_loss_s = -tf.reduce_sum(one_hot_labels * tf.cast(tf.math.logical_not(tf.cast(silver_gold_ids_, tf.bool)), tf.float32) * tf.log(probabilities_corrected), axis=-1)
mix_loss_g = -tf.reduce_sum(one_hot_labels * tf.cast(silver_gold_ids_, tf.float32) * tf.log(probabilities), axis=-1)
mix_loss = mix_loss_s + mix_loss_g
per_example_loss =  mix_loss


In [13]:
sess = tf.Session()
sess.run(per_example_loss), sess.run(probabilities), sess.run(probabilities_corrected), sess.run(mix_loss_s), sess.run(mix_loss_g), sess.run(one_hot_labels), sess.run(tf.log(probabilities)), sess.run(silver_gold_ids)

(array([2.3025851, 0.4780358], dtype=float32),
 array([[0.9, 0.1],
        [0.8, 0.2]], dtype=float32),
 array([[0.61      , 0.39      ],
        [0.62      , 0.38000003]], dtype=float32),
 array([-0.       ,  0.4780358], dtype=float32),
 array([ 2.3025851, -0.       ], dtype=float32),
 array([[0., 1.],
        [1., 0.]], dtype=float32),
 array([[-0.10536055, -2.3025851 ],
        [-0.22314353, -1.609438  ]], dtype=float32),
 array([1, 0]))

In [14]:
sess.run(tf.log(tf.constant([0.1])))

array([-2.3025851], dtype=float32)

In [15]:
sess = tf.Session()
sess.run(per_example_loss), sess.run(probabilities), sess.run(probabilities_corrected)

(array([2.3025851, 0.4780358], dtype=float32),
 array([[0.9, 0.1],
        [0.8, 0.2]], dtype=float32),
 array([[0.61      , 0.39      ],
        [0.62      , 0.38000003]], dtype=float32))

In [3]:
import numpy as np

def uniform_mix_C(mixing_ratio, num_classes):
    '''
    returns a linear interpolation of a uniform matrix and an identity matrix
    '''
    return mixing_ratio * np.full((num_classes, num_classes), 1 / num_classes) + \
        (1 - mixing_ratio) * np.eye(num_classes)

def flip_labels_C(corruption_prob, num_classes, seed=1):
    '''
    returns a matrix with (1 - corruption_prob) on the diagonals, and corruption_prob
    concentrated in only one other entry for each row
    '''
    np.random.seed(seed)
    C = np.eye(num_classes) * (1 - corruption_prob)
    row_indices = np.arange(num_classes)
    for i in range(num_classes):
        C[i][np.random.choice(row_indices[row_indices != i])] = corruption_prob
    return C

In [8]:
print(uniform_mix_C(0.4, 10))

[[0.64 0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04]
 [0.04 0.64 0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04]
 [0.04 0.04 0.64 0.04 0.04 0.04 0.04 0.04 0.04 0.04]
 [0.04 0.04 0.04 0.64 0.04 0.04 0.04 0.04 0.04 0.04]
 [0.04 0.04 0.04 0.04 0.64 0.04 0.04 0.04 0.04 0.04]
 [0.04 0.04 0.04 0.04 0.04 0.64 0.04 0.04 0.04 0.04]
 [0.04 0.04 0.04 0.04 0.04 0.04 0.64 0.04 0.04 0.04]
 [0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.64 0.04 0.04]
 [0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.64 0.04]
 [0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.64]]


In [9]:
print(flip_labels_C(0.4, 10))

[[0.6 0.  0.  0.  0.  0.  0.4 0.  0.  0. ]
 [0.  0.6 0.  0.  0.  0.  0.  0.  0.  0.4]
 [0.  0.  0.6 0.  0.  0.  0.4 0.  0.  0. ]
 [0.4 0.  0.  0.6 0.  0.  0.  0.  0.  0. ]
 [0.4 0.  0.  0.  0.6 0.  0.  0.  0.  0. ]
 [0.  0.4 0.  0.  0.  0.6 0.  0.  0.  0. ]
 [0.  0.  0.  0.  0.  0.  0.6 0.  0.4 0. ]
 [0.  0.  0.  0.  0.  0.  0.4 0.6 0.  0. ]
 [0.  0.  0.4 0.  0.  0.  0.  0.  0.6 0. ]
 [0.  0.  0.  0.  0.4 0.  0.  0.  0.  0.6]]
