In [0]:
'''
load the cifar10 models, compute the projections, and save datasets
recommending to run locally
'''

'\nload the cifar10 models, compute the projections, and save datasets\nrecommending to run locally\n'

In [0]:
from __future__ import print_function

In [0]:
import getpass
user = getpass.getuser()

if user == 'root':
    from google.colab import drive
    drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [0]:
################################################################
# import blocks
################################################################
# I/O
import os

# agrs
import sys

# garabage collection
import gc


# load json
import json
import numpy as np

import keras
from keras.models import Sequential, Model
from keras import backend as K
from keras.layers import Dense, Conv2D, BatchNormalization, Activation
from keras.layers import AveragePooling2D, Input, Flatten
from keras.optimizers import Adam
from keras.preprocessing.image import ImageDataGenerator
from keras.datasets import cifar10

# split folds
from sklearn.model_selection import KFold

# tsne setups
from sklearn.manifold import TSNE

t_steps = 600
t_perplexity = 30
t_learning_rate = 150

tsne2 = TSNE(n_components=2,
             n_iter=t_steps,
             perplexity=t_perplexity,
             learning_rate = t_learning_rate,
             n_iter_without_progress = t_steps,
             random_state = 31)

tsne3 = TSNE(n_components=3,
             n_iter=t_steps,
             perplexity=t_perplexity,
             learning_rate = t_learning_rate,
             n_iter_without_progress = t_steps,
             random_state = 31)

# plotting parameters
from mpl_toolkits.mplot3d import Axes3D
import matplotlib as mpl
import matplotlib.pyplot as plt
import statistics, math, numbers, numpy
import matplotlib.colors as colors
from operator import itemgetter

from random import shuffle

In [0]:
import umap

umap3 = umap.UMAP(n_components=3, n_neighbors = 15, min_dist=0.1, metric = 'minkowski', random_state = 31)
umap2 = umap.UMAP(n_components=2, n_neighbors = 15, min_dist=0.1, metric = 'minkowski', random_state = 31)

In [0]:
################################################################
# parameters setting
################################################################
# fix random seeds
from numpy.random import seed
seed(3)

from tensorflow import set_random_seed
set_random_seed(31)

# which dataset to use
DATA_SET = sys.argv[1] if len(sys.argv) > 1 and sys.argv[1] != '-f' else 'cifar10'

# how many data points to show
VIS_NUM_POINTS = 4000

BATCH_SIZE = 128
NUM_CLASSES = 10
EPOCHS = 150
VALIDATION_SPLIT = 0.2
TARGET_LAYER = -2 # the last hidden layer

# input image dimensions
img_rows, img_cols, num_channels = 32, 32, 3

# Subtracting pixel mean improves accuracy
subtract_pixel_mean = True 

# check points / saved to Google drive
path_header = '/content/gdrive/My Drive/Colab Notebooks/cifar10-resnet/' if user == 'root' else ''

# load trained models
checkpoint_path = path_header + 'saved_models/'+ DATA_SET + '-e{epoch:04d}.ckpt'
checkpoint_dir = os.path.dirname(checkpoint_path)

# data path
data_dir = path_header + 'saved_projections'

# colors
tableau10 = ["#4E79A7", "#F28E2B", "#E15759", "#76B7B2", "#59A14F",
            "#EDC948", "#B07AA1", "#FF9DA7", "#9C755F", "#BAB0AC"]

tableau20 = ["#4E79A7", "#A0CBE8", "#F28E2B", "#FFBE7D", "#59A14F",
             "#8CF17D", "#B6992D", "#EDC948",  "#499894", "#76B7B2",
             "#E15759", "#FF9D9A", "#79706E", "#BAB0AC", "#D37295",
             "#FABFD2", "#B07AA1", "#D4A6C8", "#9D7660", "#D7B5A6"]

tableau10_colors = colors.ListedColormap(tableau10)
tableau20_colors = colors.ListedColormap(tableau20)

In [0]:

################################################################
# load datasets
################################################################

# the data, split between train and test sets
if DATA_SET == 'cifar10':
    (x_train, y_train), (x_test0, y_test0) = cifar10.load_data()
elif  DATA_SET == 'imagenet10':
    (x_train, y_train), (x_test0, y_test0) = imagenet10.load_data()


# Input image dimensions.
input_shape = x_train.shape[1:]

# Normalize data
x_train = x_train.astype('float32') / 255
x_test0 = x_test0.astype('float32') / 255
x_flat = x_test0.copy().reshape(10000,3072)

# If subtract pixel mean is enabled
if subtract_pixel_mean:
    x_train_mean = np.mean(x_train, axis=0)
    x_train -= x_train_mean
    x_test0 -= x_train_mean

# Convert class vectors to binary class matrices.
y_train = keras.utils.to_categorical(y_train, NUM_CLASSES)
y_test0 = keras.utils.to_categorical(y_test0, NUM_CLASSES)


TOTAL = 10000
all_indices = np.arange(TOTAL)
USED = VIS_NUM_POINTS * 2
shuffle(all_indices)
used_indices = all_indices[0: USED];

# reduce
x_test = x_test0[used_indices] 
y_test = y_test0[used_indices] 
#
print('dataset', DATA_SET)
print('x_train shape:', x_train.shape)
print('x_test shape:', x_test.shape)
print('y_train shape:', y_train.shape)
print('y_test shape:', y_test.shape)
print('y_test labels:',set(y_test[:,1]))

# load training history
with open(checkpoint_dir + '/' + DATA_SET + '-history.json') as f:
    hist_json = json.load(f)
    history = hist_json['history']


dataset cifar10
x_train shape: (50000, 32, 32, 3)
x_test shape: (8000, 32, 32, 3)
y_train shape: (50000, 10)
y_test shape: (8000, 10)
y_test labels: {0.0, 1.0}


In [0]:

##########################################################################
# split datasets
##########################################################################

VIS_NUM_SPLITS = int(len(y_test) / VIS_NUM_POINTS)

desc_num_points = str(int(VIS_NUM_POINTS / 1000)) + 'k'

x_splits = np.empty((int(VIS_NUM_SPLITS), int(VIS_NUM_POINTS), img_rows, img_cols, num_channels))
y_splits = np.empty((int(VIS_NUM_SPLITS), int(VIS_NUM_POINTS), NUM_CLASSES))


In [0]:
p2 = umap2
p3 = umap3
p_name = 'tsne'

In [0]:

def write_file(content, path):
    file = open(path,'w+')
    file.write(content)
    file.close()
   
    
def write_fig(p2d, p3d, labels, fig_path):
    save_fig = plt.gcf()
    fig = plt.figure(figsize=(15, 7))

    ax = fig.add_subplot(1, 2, 1, projection='3d')

    ax.scatter(xs = p3d[:,0],
           ys = p3d[:,1],
           zs = p3d[:,2],
           c = labels,
           cmap = tableau10_colors, alpha = 1)
    plt.title(p_name + ' 3D after training')

    fig.add_subplot(1, 2, 2)

    plt.scatter(x = p2d[:,0],
            y = p2d[:,1],
            c = labels,
            cmap = tableau10_colors, alpha= 1)
    plt.title(p_name + ' 2D after training')

    plt.draw()
    fig.savefig(fig_path, dpi=100)
    plt.close(fig)

In [0]:

########################################################################
#compute projections
########################################################################

file_names = ""
test_acc_records = ""
training_acc_records = ""

# for each model
for e in np.arange(EPOCHS):
    model_path = checkpoint_dir + '/' + DATA_SET + '-e%04d.ckpt' % (e + 1)

    # load model
    print('.........loading ' + model_path + ' for ' + p_name)
    
    # reduce
    shuffle(all_indices)
    used_indices = all_indices[0: USED];
    
    x_test = x_test0[used_indices] 
    y_test = y_test0[used_indices] 
    
    saved_model = keras.models.load_model(model_path)

    target_layer_model = Model(inputs=saved_model.input,
                                 outputs=saved_model.layers[TARGET_LAYER].output)
    
    # resplit dataset
    kf = KFold(n_splits=VIS_NUM_SPLITS, shuffle = True)

    k = 0 # !!!
    for x_indice, y_indice in kf.split(x_test):

        x_splits[k] = x_test[y_indice]
        y_splits[k] = y_test[y_indice]
        k += 1
    

    len_splits = len(x_splits)
    
    for j in np.arange(len_splits):
        file_name = DATA_SET + '-e%04d'% (e + 1) + '-f%d'%j + '-' + desc_num_points  + '-l%d'%TARGET_LAYER + '-' + p_name + '.txt'
        fig_path = path_header + 'saved_figs/' + DATA_SET + '-e%04d'% (e + 1) + '-f%d'%j + '-' + desc_num_points + '-l%d'%TARGET_LAYER + '-' + p_name + '.png'

        x_ = x_splits[j]
        y_ = y_splits[j]
        y_labels = [int(np.argmax(y)) for y in y_]

        score = saved_model.evaluate(x_, y_, verbose = 0)

        y_target = target_layer_model.predict(x_)
        y_pred = saved_model.predict(x_)
        y_pred_labels = [int(np.argmax(y)) for y in y_pred]
        y_correctness = list(map(lambda x, y: True if x == y else False, y_pred_labels, y_labels))

        #projecting
        y_target_p2d = p2.fit_transform(y_target)
        y_target_p3d = p3.fit_transform(y_target)

        # normalized
        y_target_p2d_normed = y_target_p2d.transpose()
        y_target_p2d_normed = numpy.array([[(d - numpy.mean(d))/ numpy.std(d)] for d in y_target_p2d_normed]).reshape(2, VIS_NUM_POINTS).transpose()

        y_target_p3d_normed = y_target_p3d.transpose()
        y_target_p3d_normed = numpy.array([[(d - numpy.mean(d))/ numpy.std(d)] for d in y_target_p3d_normed]).reshape(3, VIS_NUM_POINTS).transpose()
        
        
        output_path = data_dir + '/' + file_name
        OUTPUT = ""

        OUTPUT += 'data_set\n' + DATA_SET + '\n' + \
        'num_classes\n' + str(NUM_CLASSES) + '\n' + \
        'data_points\n' + str(VIS_NUM_POINTS) + '\n' +\
         \
        'model\n' + model_path + '\n' +\
         \
        'color_schema\n' + ','.join(tableau10) + '\n' +\
        \
        'test_accuracy\n' + str(score[1]) + '\n' +\
        'test_loss\n' + str(score[0]) + '\n' +\
        \
        'train_accuracy\n' + str(history['acc'][e]) + '\n' +\
        'train_loss\n' + str(history['loss'][e]) + '\n' +\
        \
        'val_accuracy\n' + str(history['val_acc'][e]) + '\n' +\
        'val_loss\n' + str(history['val_loss'][e]) + '\n' +\
        \
        'labels\n' + ','.join([str(d) for d in y_labels]) + '\n' +\
        'pred_labels\n' + ','.join([str(d) for d in y_pred_labels]) + '\n' +\
        'pred_correctness\n' + ','.join([str(d) for d in y_correctness]) + '\n' +\
        \
        'p2d\n' + ';'.join([str(d[0]) + ',' + str(d[1]) for d in y_target_p2d_normed])  + '\n' +\
        \
        'p3d\n' + ';'.join([str(d[0]) + ',' + str(d[1])  + ',' + str(d[2]) for d in y_target_p3d_normed])  + '\n'

        file_names += file_name + ','
        test_acc_records += str(score[1]) + ','
        training_acc_records +=  str(history['acc'][e]) + ','
        
        file = open(output_path,'w+')
        file.write(OUTPUT)
        file.close()

        write_file(OUTPUT, output_path)
        write_fig(y_target_p2d_normed, y_target_p3d_normed, y_labels, fig_path)
        
        del y_target, y_pred, y_pred_labels, y_correctness, y_target_p2d, y_target_p3d, y_target_p2d_normed, y_target_p3d_normed
        gc.collect()

# write accuracy files
accuracy_path = data_dir + '/'+ 'accuracy.txt'
accuracy_content = file_names[:-1] + '\n' + test_acc_records[:-1] +'\n' + training_acc_records[:-1]
write_file(accuracy_content, accuracy_path)

.........loading /content/gdrive/My Drive/Colab Notebooks/cifar10-resnet/saved_models/cifar10-e0001.ckpt for tsne
