# This program opens the gray scale (0,0,0) image contained in the .npy files and shuffles the data into training, validation and testing.

In [17]:
#echo 1 | sudo tee /proc/sys/vm/overcommit_memory
#export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/cuda/lib64

import numpy as np

#First 2 commands allow more memory for the computer to use and use the GPU for ML


In [18]:
Path = "Quarter32Bit" #Give folder path where the .npy files are stored

AllData = np.load(Path + "/ImageDataNorm.npy") #Open image data
AllLabel = np.load(Path + "/ImageLab.npy") #Open labels


In [19]:
#Determine size of data and choose what proportion of data goes into training, validation and testing
No_Crystals = AllData.shape[0] #Number of crystals (excluding *10 thickness)

Shuffle_Index = np.arange(No_Crystals)#Need to shuffle crystals indices into different data sets
np.random.shuffle(Shuffle_Index)#shufle indices

###########
TrainRatio = 0.8 #Proportion of training data
ValidationRatio = 0.1 #Proportion of validation data
TestRatio = 1 - TrainRatio - ValidationRatio #Proportion of test data

###########

#Work out the number of crystals in each set
TrainCrystalNo = round(TrainRatio * No_Crystals)
ValCrystalNo = round(ValidationRatio * No_Crystals)
TestCrystalNo = No_Crystals - TrainCrystalNo - ValCrystalNo
PixleNo = AllData.shape[2] * AllData.shape[3] #Size of image

In [20]:
#Initialise image data arrays
train_images = np.zeros(TrainCrystalNo * PixleNo * AllData.shape[1], dtype = np.float32).reshape(TrainCrystalNo, AllData.shape[1], AllData.shape[2], AllData.shape[3])
val_images = np.zeros(ValCrystalNo * PixleNo * AllData.shape[1], dtype = np.float32).reshape(ValCrystalNo, AllData.shape[1], AllData.shape[2], AllData.shape[3])
test_images = np.zeros(TestCrystalNo * PixleNo * AllData.shape[1], dtype = np.float32).reshape(TestCrystalNo, AllData.shape[1], AllData.shape[2], AllData.shape[3])

#Initialise label arrays
train_lab = np.zeros(TrainCrystalNo * AllData.shape[1], dtype = np.uint8).reshape(TrainCrystalNo, AllData.shape[1])
val_lab = np.zeros(ValCrystalNo * AllData.shape[1], dtype = np.uint8).reshape(ValCrystalNo, AllData.shape[1])
test_lab = np.zeros(TestCrystalNo * AllData.shape[1], dtype = np.uint8).reshape(TestCrystalNo, AllData.shape[1])

#Put Shuffled Crystals into training and validation and test
for i in range(0, No_Crystals): 
	if(i < TrainCrystalNo):
		train_images[i] = AllData[Shuffle_Index[i]]
		train_lab[i] = AllLabel[Shuffle_Index[i]]

	elif(i < TrainCrystalNo + ValCrystalNo):
		val_images[i - TrainCrystalNo] = AllData[Shuffle_Index[i]]
		val_lab[i - TrainCrystalNo] = AllLabel[Shuffle_Index[i]]	
	
	else:
		test_images[i - TrainCrystalNo - ValCrystalNo] = AllData[Shuffle_Index[i]]
		test_lab[i - TrainCrystalNo - ValCrystalNo] = AllLabel[Shuffle_Index[i]]
		



In [21]:
#We must shuffle the data again so the different thickness are not in order
Shuffle_Index_Train = np.arange(TrainCrystalNo * AllData.shape[1])
Shuffle_Index_Val = np.arange(ValCrystalNo * AllData.shape[1])
Shuffle_Index_Test = np.arange(TestCrystalNo * AllData.shape[1])

np.random.shuffle(Shuffle_Index_Train)
np.random.shuffle(Shuffle_Index_Val)
np.random.shuffle(Shuffle_Index_Test)

#Flatten the data by merging the different crystals and different thickness into 1 dimension
train_images = train_images.reshape(-1, AllData.shape[2], AllData.shape[3], 1)
val_images = val_images.reshape(-1, AllData.shape[2], AllData.shape[3], 1)
test_images = test_images.reshape(-1, AllData.shape[2], AllData.shape[3], 1)
train_lab = train_lab.reshape(-1)
val_lab = val_lab.reshape(-1)
test_lab = test_lab.reshape(-1)

train_images_copy = train_images
val_images_copy = val_images
test_images_copy = test_images
train_lab_copy = train_lab
val_lab_copy = val_lab
test_lab_copy = test_lab

#Shuffle data for training, validation and testing
for i in range(0,TrainCrystalNo * AllData.shape[1]):
	train_images_copy[i] = train_images[Shuffle_Index_Train[i]]
	train_lab_copy[i] = train_lab[Shuffle_Index_Train[i]]
    
for i in range(0,ValCrystalNo * AllData.shape[1]):
	val_images_copy[i] = val_images[Shuffle_Index_Val[i]]
	val_lab_copy[i] = val_lab[Shuffle_Index_Val[i]]
    
for i in range(0,TestCrystalNo * AllData.shape[1]):
	test_images_copy[i] = test_images[Shuffle_Index_Test[i]]
	test_lab_copy[i] = test_lab[Shuffle_Index_Test[i]]

#Put data back into orginal arrays
train_images = train_images_copy.astype("float32")
val_images = val_images_copy.astype("float32")
test_images = test_images_copy.astype("float32")
train_lab = train_lab_copy
val_lab = val_lab_copy
test_lab = test_lab_copy

#Turn label arrays into categorical
from keras.utils import to_categorical
train_lab = to_categorical(train_lab)
val_lab = to_categorical(val_lab)
test_lab = to_categorical(test_lab)

####################################################################################Data is ready