### Dataset Credit: https://www.cs.toronto.edu/~kriz/cifar.html 
### Collected by: Alex Krizhevsky, Vinod Nair, and Geoffrey Hinton. 

In [2]:
import sys
import os
import pickle
import time
import keras
from keras.datasets import cifar10
from keras.preprocessing.image import ImageDataGenerator
from keras.utils import np_utils
import matplotlib.pyplot as plt
import progressbar as pb
import numpy as np

print("=========== Cifar10 Extractor ===========\n")
(X_train, y_train), (X_test, y_test) = cifar10.load_data()

y_train = np_utils.to_categorical(y_train)
y_test = np_utils.to_categorical(y_test)

label_names = {	"Airplane": 0,
				"Automobile": 1,
				"Bird": 2,
				"Cat": 3,
				"Deer": 4,
				"Dog": 5,
				"Frog": 6,
				"Horse": 7,
				"Ship": 8,
				"Truck": 9}

#Client Parameter
extract_label_1 = "Dog"
extract_label_2 = "Cat"
selected_label1 = label_names[extract_label_1]
selected_label2 = label_names[extract_label_2]
print("Selected Labels: ",extract_label_1," and ",extract_label_2)

############################################################
####             Extracting Training Set                ####
############################################################

print("============= Extracting Training Set ==============")
widgets = ['Extracting: ', pb.Percentage(), ' ', 
            pb.Bar(marker=pb.RotatingMarker()), ' ', pb.ETA()]
timer = pb.ProgressBar(widgets=widgets, maxval=600).start()

#Create destination folder
if not os.path.exists("./Extracted_Images_"+extract_label_1+"_"+extract_label_2+"_Training_Set"):
	os.makedirs("./Extracted_Images_"+extract_label_1+"_"+extract_label_2+"_Training_Set")

# while loop extraction

total_training_set = 50000;
total_target_training_set_label1 = 300;
total_target_training_set_label2 = 300;
total_target_set = total_target_training_set_label1 + total_target_training_set_label2
bin_output_X_train = []
bin_output_y_train = []

i = 0

label1_j = 0
label2_j = 0


# Extracting images into binary file
while(i < total_training_set):
	# Match target set
	if(int((y_train[i].tolist())[selected_label1]) == 1 and label1_j <= total_target_training_set_label1 - 1
		or int((y_train[i].tolist())[selected_label2]) == 1 and label2_j <= total_target_training_set_label2 - 1):
		
		#########################
		# Appending binary file #
		#########################
		bin_output_X_train.append((X_train[i]))
		bin_output_y_train.append((y_train[i]))


		############################################
		# Extracting Cifar10 binary into png image #
		############################################
		fig = plt.figure()
		sizes = np.shape(X_train[0])
		height = float(sizes[0])
		width = float(sizes[1])
		fig.set_size_inches(width/height, 1, forward=False)
		ax = plt.Axes(fig, [0., 0., 1., 1.])
		ax.set_axis_off()
		fig.add_axes(ax)
		plt.imshow(X_train[i], cmap=plt.get_cmap('gray'))
		#fig.savefig(("Extracted_Images_"+extract_label+"/"+extract_label+"%d.png"%(j)), dpi=height)
		fig.savefig("Extracted_Images_"+extract_label_1+"_"+extract_label_2+"_Training_Set/Dataset_"+str(label1_j + label2_j)+".png", dpi=height)
		plt.close()

		# inc encounter index j
		timer.update(label1_j+label2_j)

		###########################################################
		# Both labels have been reached maximum target set number #
		###########################################################
		
		# Inc specific label's counter
		if(int((y_train[i].tolist())[selected_label1]) == 1): # Label 1
			label1_j = label1_j + 1
		elif(int((y_train[i].tolist())[selected_label2]) == 1): # Label 2
			label2_j = label2_j + 1

		if(label1_j >= total_target_training_set_label1 and label2_j >= total_target_training_set_label1):
			break;
		

	
	#inc encounter index i
	i = i + 1


output_filename = "training_dataset_"+extract_label_1+"_"+extract_label_2+"_"+str(total_target_set)+".dat"

# Writing binary output with X_train and y_train data
with open(output_filename, "wb") as f:
	pickle.dump([bin_output_X_train, bin_output_y_train], f)
timer.finish()
time.sleep(1)

#-------------------------------------------------------------------------------------------------------#


############################################################
####             Extracting Testing Set                 ####
############################################################

print("============= Extracting Testing Set ==============")
# widgets = ['Extracting: ', pb.Percentage(), ' ', 
#             pb.Bar(marker=pb.RotatingMarker()), ' ', pb.ETA()]
timer2 = pb.ProgressBar(widgets=widgets, maxval=200).start()

#Create destination folder
if not os.path.exists("./Extracted_Images_"+extract_label_1+"_"+extract_label_2+"_Testing_Set"):
	os.makedirs("./Extracted_Images_"+extract_label_1+"_"+extract_label_2+"_Testing_Set")

# while loop extraction

total_testing_set = 10000;
total_target_testing_set_label1 = 100;
total_target_testing_set_label2 = 100;
total_target_set = total_target_testing_set_label1 + total_target_testing_set_label2	
bin_output_X_test = []
bin_output_y_test = []

i = 0

label1_j = 0
label2_j = 0


# Extracting images into binary file
while(i < total_testing_set):
	# Match target set
	if(int((y_test[i].tolist())[selected_label1]) == 1 and label1_j <= total_target_testing_set_label1 - 1
		or int((y_test[i].tolist())[selected_label2]) == 1 and label2_j <= total_target_testing_set_label2 - 1):
		
		# The current number of maximum data is still under 100 for specific filter

		#########################
		# Appending binary file #
		#########################
		bin_output_X_test.append((X_test[i]))
		bin_output_y_test.append((y_test[i]))


		############################################
		# Extracting Cifar10 binary into png image #
		############################################
		fig = plt.figure()
		sizes = np.shape(X_test[0])
		height = float(sizes[0])
		width = float(sizes[1])
		fig.set_size_inches(width/height, 1, forward=False)
		ax = plt.Axes(fig, [0., 0., 1., 1.])
		ax.set_axis_off()
		fig.add_axes(ax)
		plt.imshow(X_test[i], cmap=plt.get_cmap('gray'))
		#fig.savefig(("Extracted_Images_"+extract_label+"/"+extract_label+"%d.png"%(j)), dpi=height)
		fig.savefig("Extracted_Images_"+extract_label_1+"_"+extract_label_2+"_Testing_Set/Dataset_"+str(label1_j + label2_j)+".png", dpi=height)
		plt.close()

		# inc encounter index j
		timer2.update(label1_j+label2_j)



		###########################################################
		# Both labels have been reached maximum target set number #
		###########################################################
		
		# Inc specific label's counter
		if(int((y_test[i].tolist())[selected_label1]) == 1): # Label 1
			label1_j = label1_j + 1
		elif(int((y_test[i].tolist())[selected_label2]) == 1): # Label 2
			label2_j = label2_j + 1

		if(label1_j >= total_target_testing_set_label1 and label2_j >= total_target_testing_set_label1):
			break;
	
	#inc encounter index i
	
	i = i + 1


output_filename = "testing_dataset_"+extract_label_1+"_"+extract_label_2+"_"+str(total_target_set)+".dat"

# Writing binary output with X_test and y_test data
with open(output_filename, "wb") as f:
	pickle.dump([bin_output_X_test, bin_output_y_test], f)

timer2.finish()
time.sleep(1)

print("Process Completed")


Selected Labels:  Dog  and  Cat


Extracting: 100% |||||||||||||||||||||||||||||||||||||||||||||||| Time: 0:00:32




Extracting: 100% |||||||||||||||||||||||||||||||||||||||||||||||| Time: 0:00:15


Process Completed
