In [None]:
from google.colab import drive

drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [1]:
import os
import numpy as np
import math
import scipy.io
from skimage.util import random_noise

#architecture
import keras.backend as K
from keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Reshape, UpSampling2D, Conv2DTranspose
from keras.models import Model, load_model
from keras.utils import to_categorical
from keras.optimizers import SGD
from tensorflow.keras.optimizers import Adam

from keras.datasets import cifar10
from keras.datasets import cifar100

# graphics
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt

#------------------------------------------------------------------------DEFINES-------------------------------------------------------------------------------

_FOLDER_DRIVE = '/content/gdrive/MyDrive/IC/'
_FOLDER_DATASET = '/content/gdrive/MyDrive/IC/Datasets/'
_SAMPLE = ''

# curriculum learning setup
_NUMBER_STEPS_LADDER = 20
_ANTI_CL = False

# training setup
_NUMBER_EPOCHS = 200
_BATCH_SIZE = 32
_LR = 0.01
_MOMENTUM = 0.0001

# dataset setup
_DATASET = 'mnist'
_METRIC = 'rmse'
_LABELS = 10
_PERCENTUAL = 100

# DAE
_NOISE_LEVEL = .1

#----------------------------------------------------------------------LOAD DATA-------------------------------------------------------------------------------

def color2gray(a):
	return a

def loadData():
	print("\tLoading dataset " + _DATASET + " with noise " + str(_NOISE_LEVEL) + " ... ")

	Xtrain = np.load(_FOLDER_DATASET + '%s_Xtrain_%s_100.npy' % (_DATASET , int(_NOISE_LEVEL * 100)))
	Xtest  = np.load(_FOLDER_DATASET + '%s_Xtest_%s_100.npy'  % (_DATASET , int(_NOISE_LEVEL * 100)))
	Ytrain = np.load(_FOLDER_DATASET + '%s_Ytrain_%s_100.npy' % (_DATASET , int(_NOISE_LEVEL * 100)))
	Ytest  = np.load(_FOLDER_DATASET + '%s_Ytest_%s_100.npy'  % (_DATASET , int(_NOISE_LEVEL * 100)))

	print("\t\tData train shape: ", Xtrain.shape)
	print("\t\tLabel train shape: ", Ytrain.shape)
	print("\t\tData test shape: ", Xtest.shape)
	print("\t\tLabel test shape: ", Ytest.shape)

	Xtrain = Xtrain / 255
	Xtest = Xtest / 255

	Ytrain = Ytrain / 255
	Ytest = Ytest / 255

	Xtrain, Ytrain = scoringBootstrap(Xtrain, Ytrain)

	return Xtrain, Ytrain, Xtest, Ytest

#-------------------------------------------------------------------------PACING-------------------------------------------------------------------------------

def pacingFunction(X, Y, pacing, currentEpoch):
	if (pacing == "linear"):
		size = int((X.shape[0]/_NUMBER_EPOCHS) * currentEpoch)
	elif (pacing == "log"):
		size = int((math.log((currentEpoch/_NUMBER_EPOCHS)*(X.shape[0]),(X.shape[0])))*(X.shape[0]))
	elif (pacing == "ladder"):
		sizeStep = int(_NUMBER_EPOCHS/_NUMBER_STEPS_LADDER)
		currentStep = int(currentEpoch/sizeStep)+1
		size = int((X.shape[0]*(currentStep/_NUMBER_STEPS_LADDER)))
	elif (pacing == "ladderlog"):
		sizeStep = int(_NUMBER_EPOCHS/_NUMBER_STEPS_LADDER)
		currentStep = (int(currentEpoch/sizeStep)+1)*sizeStep
		size = int((math.log((currentStep/_NUMBER_EPOCHS)*(X.shape[0]),(X.shape[0])))*(X.shape[0]))

	if (size <= 0):
		return X[:1], Y[:1]
	else:
		return X[:size], Y[:size]

#------------------------------------------------------------------------METRICS-------------------------------------------------------------------------------

def RMSE(y, x):
	err = np.sum((y.astype("float") - x.astype("float")) ** 2)
	err /= float(y.shape[0] * y.shape[1])
	score = math.sqrt(err)
	return score

def HOMO(img):
  homos = []

  for k in range(0, img.shape[2]):
    h = 0
    for i in range(0, img.shape[0]):
      for j in range(0, img.shape[1]):
        h += (img[i][j][k]) / (1 + abs(i-j))

    homos.append(h)
  return homos

def BHISC(x):
  a = float((_NOISE_LEVEL * _NOISE_LEVEL) / 100)
  maxHomo = max(HOMO(x))
  return (maxHomo * a) / (_NOISE_LEVEL * _NOISE_LEVEL)


def CHISC(x, i):
  delta = 0.1
  k = 0.0008
  return BHISC(x) + (((delta * _NOISE_LEVEL) / (k)) + (i/10000000000))

#------------------------------------------------------------------------SCORING-------------------------------------------------------------------------------

def scoringBootstrap(X, Y):
	if not os.path.exists(_FOLDER_DRIVE + '_Model/%s_bootstrap_weights.h5' % _DATASET):
		print("\tTraining scoring model ...")
		bootstrapModel = setModel()
		history = bootstrapModel.fit(x=X, y=Y, batch_size=_BATCH_SIZE, epochs=int(_NUMBER_EPOCHS*0.01), shuffle=True, verbose=1)
		bootstrapModel.save_weights(_FOLDER_DRIVE + '_Model/%s_bootstrap_weights.h5' % _DATASET)
		bootstrapModel.save(_FOLDER_DRIVE + '_Model/%s_model.h5' % _DATASET)
	else:
		print("\tLoading scoring model ...")
		bootstrapModel = load_model(_FOLDER_DRIVE + '_Model/%s_model.h5' % _DATASET)
		bootstrapModel.load_weights(_FOLDER_DRIVE + '_Model/%s_bootstrap_weights.h5' % _DATASET)

	print("\tDefining difficulty level with metric %s..." % _METRIC)

	if _METRIC == 'rmse':
		pred = bootstrapModel.predict(X)
		del bootstrapModel

	score = []
	for i in range(0,Y.shape[0]):
		if _METRIC == 'rmse':
			score.append(RMSE(pred[i], Y[i]))
		elif _METRIC == 'chisc':
			score.append(CHISC(X[i], i))
		elif _METRIC == 'bhisc':
			score.append(BHISC(X[i]))
		else:
			raise Exception('Inexistent Metric!')

	X = X.tolist()
	Y = Y.tolist()

	Xsorted = [x for _,x in sorted(zip(score,X), reverse=_ANTI_CL)]
	del X
	Ysorted = [y for _,y in sorted(zip(score,Y), reverse=_ANTI_CL)]
	del Y

	if _PERCENTUAL == 100:
		return np.array(Xsorted), np.array(Ysorted)
	else:
		Xsorted = np.array(Xsorted)
		Ysorted = np.array(Ysorted)
		size = int(Xsorted.shape[0]*(_PERCENTUAL/100))
		return Xsorted[:size], Ysorted[:size]

#-------------------------------------------------------------------ARCHITECTURE-------------------------------------------------------------------------------

def setModel():
	input_img = Input(shape=(28, 28, 1))

	encoder = Conv2D(8, kernel_size=(3, 3), strides=(2, 2), padding='same', activation='relu')(input_img)
	encoder = Conv2D(8, kernel_size=(3, 3), padding='valid', activation='relu')(encoder)
	encoder = MaxPooling2D(pool_size=(2, 2))(encoder)
	encoder = Flatten(name='code')(encoder)
	print(encoder.shape)
	decoder = Reshape((7, 7, 8))(encoder)
	decoder = UpSampling2D((2, 2))(decoder)
	decoder = Conv2DTranspose(8, kernel_size=(3, 3), padding='valid', activation='relu')(decoder)
	decoder = Conv2DTranspose(1, kernel_size=(3, 3), strides=(2, 2), padding='same', activation='relu')(decoder)

	autoencoder = Model(input_img, decoder)
	autoencoder.compile(loss='mean_squared_error', optimizer=Adam())

	return autoencoder

#-----------------------------------------------------------------------TRAINING-------------------------------------------------------------------------------

def trainingTestingModel(model, Xtrain, Ytrain, Xtest, Ytest, pacing):
	print("\tTraining curriculum model with " + pacing + "...")

	errTrain = []
	errTest = []
	sizes = []

	errTrain.append(1)
	errTest.append(1)
	sizes.append(0)

	for e in range(0, _NUMBER_EPOCHS):
		if not (pacing == "constant"):
			Xcurriculum, Ycurriculum = pacingFunction(Xtrain, Ytrain, pacing, e+1)
		else:
			Xcurriculum, Ycurriculum = Xtrain, Ytrain

		batches = list(range(0, Xcurriculum.shape[0], _BATCH_SIZE))
		perm = np.random.permutation(Xcurriculum.shape[0])
		for b in batches:
			if b + _BATCH_SIZE < Xcurriculum.shape[0]:
				x = Xcurriculum[perm[b : b + _BATCH_SIZE]]
				y = Ycurriculum[perm[b : b + _BATCH_SIZE]]
			else:
				x = Xcurriculum[perm[b : ]]
				y = Ycurriculum[perm[b : ]]
			loss = model.train_on_batch(x, y)

		err = model.evaluate(Xtest, Ytest, batch_size=_BATCH_SIZE)
		errTrain.append(loss)
		errTest.append(err)
		sizes.append(Xcurriculum.shape[0])
		print("\t\tEpoch %i/%i (%s), LR (%.10f): Loss(%.15f), Err Test(%.15f)." % (e+1, _NUMBER_EPOCHS, Xcurriculum.shape[0], K.eval(model.optimizer.lr), loss, err))

	model.save_weights(_FOLDER_DRIVE + '_Model/%s_%s_weights_%i.h5' % (_DATASET, pacing, _PERCENTUAL))
	model.save(_FOLDER_DRIVE + '_Model/%s_%s_model.h5' % (_DATASET, pacing))
	np.savetxt(_FOLDER_DRIVE + '_Model/%s_%s_loss_%i.txt' % (_DATASET, pacing, _PERCENTUAL), errTrain, fmt="%s")
	np.savetxt(_FOLDER_DRIVE + '_Model/%s_%s_accTest_%i.txt' % (_DATASET, pacing, _PERCENTUAL), errTest, fmt="%s")
	np.savetxt(_FOLDER_DRIVE + '_Model/%s_%s_mode_%i.txt' % (_DATASET, pacing, _PERCENTUAL), sizes, fmt="%s")
	del model

#-------------------------------------------------------------------------RESULT-------------------------------------------------------------------------------

def checkHardnessScoring(X, Y):
	print("\tChecking the hardness score...")
	model1 = setModel()
	history1 = model1.fit(x=X[:int(X.shape[0]*0.25)], y=Y[:int(Y.shape[0]*0.25)], batch_size=_BATCH_SIZE, epochs=11, shuffle=True, verbose=1)
	del model1

	model2 = setModel()
	history2 = model2.fit(x=X[int(X.shape[0]*0.25):int(X.shape[0]*0.5)], y=Y[int(Y.shape[0]*0.25):int(Y.shape[0]*0.5)], batch_size=_BATCH_SIZE, epochs=11, shuffle=True, verbose=1)
	del model2

	model3 = setModel()
	history3 = model3.fit(x=X[int(X.shape[0]*0.5):int(X.shape[0]*0.75)], y=Y[int(Y.shape[0]*0.5):int(Y.shape[0]*0.75)], batch_size=_BATCH_SIZE, epochs=11, shuffle=True, verbose=1)
	del model3

	model4 = setModel()
	history4 = model4.fit(x=X[int(X.shape[0]*0.75):], y=Y[int(Y.shape[0]*0.75):], batch_size=_BATCH_SIZE, epochs=11, shuffle=True, verbose=1)
	del model4

	print("\tPlotting the training loss graphic ...")
	if not _ANTI_CL:
		plt.plot(history1.history['loss'], label="Easier")
		plt.plot(history2.history['loss'], label="Easy")
		plt.plot(history3.history['loss'], label="Hard")
		plt.plot(history4.history['loss'], label="Harder")
	else:
		plt.plot(history4.history['loss'], label="Easier")
		plt.plot(history3.history['loss'], label="Easy")
		plt.plot(history2.history['loss'], label="Hard")
		plt.plot(history1.history['loss'], label="Harder")

	plt.legend(loc='upper right')
	plt.ylabel('Loss')
	plt.xlabel('Epochs')
	plt.xlim(left=1)
	plt.ylim(bottom=0)
	plt.margins(0.5, 0.5)
	plt.savefig(_FOLDER_DRIVE + "_Model/%s_scoring.png" % _DATASET)
	plt.close()

def performanceGraphs():
	print("\tPlotting the loss performance ...")
	constantLoss = np.loadtxt(_FOLDER_DRIVE + '_Model/%s_constant_loss_%s.txt' % (_DATASET, str(_PERCENTUAL)))
	linearLoss = np.loadtxt(_FOLDER_DRIVE + '_Model/%s_linear_loss_%s.txt' % (_DATASET, str(_PERCENTUAL)))
	logLoss = np.loadtxt(_FOLDER_DRIVE + '_Model/%s_log_loss_%s.txt' % (_DATASET, str(_PERCENTUAL)))
	ladderLoss = np.loadtxt(_FOLDER_DRIVE + '_Model/%s_ladder_loss_%s.txt' % (_DATASET, str(_PERCENTUAL)))
	ladderlogLoss = np.loadtxt(_FOLDER_DRIVE + '_Model/%s_ladderlog_loss_%s.txt' % (_DATASET, str(_PERCENTUAL)))

	plt.figure(figsize=(14.4, 4.8))
	plt.plot(constantLoss, label="Constant")
	plt.plot(linearLoss, label="Linear")
	plt.plot(logLoss, label="Log")
	plt.plot(ladderLoss, label="Ladder")
	plt.plot(ladderlogLoss, label="Ladder Log")
	plt.legend(loc='upper right')
	plt.ylabel('Loss')
	plt.xlabel('Epochs')
	plt.xlim(left=1)
	plt.ylim(bottom=0)
	plt.margins(0.5, 0.5)
	plt.savefig(_FOLDER_DRIVE + "_Model/%s_loss.png" % _DATASET)
	plt.close()

	print("\tPlotting the testing accuracy performance ...")
	constantAccTest = np.loadtxt(_FOLDER_DRIVE + '_Model/%s_constant_accTest_%s.txt' % (_DATASET, str(_PERCENTUAL)))
	linearAccTest = np.loadtxt(_FOLDER_DRIVE + '_Model/%s_linear_accTest_%s.txt' % (_DATASET, str(_PERCENTUAL)))
	logAccTest = np.loadtxt(_FOLDER_DRIVE + '_Model/%s_log_accTest_%s.txt' % (_DATASET, str(_PERCENTUAL)))
	ladderAccTest = np.loadtxt(_FOLDER_DRIVE + '_Model/%s_ladder_accTest_%s.txt' % (_DATASET, str(_PERCENTUAL)))
	ladderlogAccTest = np.loadtxt(_FOLDER_DRIVE + '_Model/%s_ladderlog_accTest_%s.txt' % (_DATASET, str(_PERCENTUAL)))

	plt.figure(figsize=(14.4, 4.8))
	plt.plot(constantAccTest, label="Constant")
	plt.plot(linearAccTest, label="Linear")
	plt.plot(logAccTest, label="Log")
	plt.plot(ladderAccTest, label="Ladder")
	plt.plot(ladderlogAccTest, label="Ladder Log")
	plt.legend(loc='lower right')
	plt.ylabel('Accuracy')
	plt.xlabel('Epochs')
	plt.xlim(left=1)
	plt.ylim(bottom=0)
	plt.margins(0.5, 0.5)
	plt.savefig(_FOLDER_DRIVE + "_Model/%s_accTest.png" % _DATASET)
	plt.close()

	print("\tPlotting the example size performance ...")
	constantMode = np.loadtxt(_FOLDER_DRIVE + '_Model/%s_constant_mode_%s.txt' % (_DATASET, str(_PERCENTUAL)))
	linearMode = np.loadtxt(_FOLDER_DRIVE + '_Model/%s_linear_mode_%s.txt' % (_DATASET, str(_PERCENTUAL)))
	logMode = np.loadtxt(_FOLDER_DRIVE + '_Model/%s_log_mode_%s.txt' % (_DATASET, str(_PERCENTUAL)))
	ladderMode = np.loadtxt(_FOLDER_DRIVE + '_Model/%s_ladder_mode_%s.txt' % (_DATASET, str(_PERCENTUAL)))
	ladderlogMode = np.loadtxt(_FOLDER_DRIVE + '_Model/%s_ladderlog_mode_%s.txt' % (_DATASET, str(_PERCENTUAL)))

	plt.figure(figsize=(14.4, 4.8))
	plt.plot(constantMode, label="Constant")
	plt.plot(linearMode, label="Linear")
	plt.plot(logMode, label="Log")
	plt.plot(ladderMode, label="Ladder")
	plt.plot(ladderlogMode, label="Ladder Log")
	plt.legend(loc='center right')
	plt.ylabel('Number of examples')
	plt.xlabel('Epochs')
	plt.xlim(left=1)
	plt.ylim(bottom=0)
	plt.margins(0.5, 0.5)
	plt.savefig(_FOLDER_DRIVE + "_Model/%s_mode.png" % _DATASET)
	plt.close()

#--------------------------------------------------------------------------------------------------------------------------------------------------------------

if __name__ == '__main__':
	if not os.path.exists(_FOLDER_DRIVE + "_Model/"):
		os.makedirs(_FOLDER_DRIVE + "_Model/")

	Xtrain, Ytrain, Xtest, Ytest = loadData()

	model = setModel()
	trainingTestingModel(model, Xtrain, Ytrain, Xtest, Ytest, "constant")
	model = setModel()
	trainingTestingModel(model, Xtrain, Ytrain, Xtest, Ytest, "linear")
	model = setModel()
	trainingTestingModel(model, Xtrain, Ytrain, Xtest, Ytest, "log")
	model = setModel()
	trainingTestingModel(model, Xtrain, Ytrain, Xtest, Ytest, "ladder")
	model = setModel()
	trainingTestingModel(model, Xtrain, Ytrain, Xtest, Ytest, "ladderlog")

	checkHardnessScoring(Xtrain, Ytrain)
	performanceGraphs()

	print("\tDone!!")

In [2]:
from google.colab import files

!zip -r /content/CIFAR10_ANTICL_CHISC_30_200.zip /content/gdrive/MyDrive/IC/_Model

files.download('/content/CIFAR10_ANTICL_CHISC_30_200.zip')