In [1]:
#echo 1 | sudo tee /proc/sys/vm/overcommit_memory
#export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/cuda/lib64

import numpy as np

#First 2 commands allow more memory for the computer to use and use the GPU for ML


In [2]:
Path = "Quarter32Bit" #Give folder path where the .npy files are stored

AllData = np.load(Path + "/ImageGradFourierNorm.npy") #Open image data
AllLabel = np.load(Path + "/ImageLab.npy") #Open labels

In [4]:
#Determine size of data and choose what proportion of data goes into training, validation and testing
No_Crystals = AllData.shape[0] #Number of crystals (excluding *10 thickness)

Shuffle_Index = np.arange(No_Crystals)#Need to shuffle crystals indices into different data sets
np.random.shuffle(Shuffle_Index)#shufle indices

#Keep a memory of what goes where so we can back track the shuffling
UnShuffle_Index = np.arange(AllData.shape[0] * AllData.shape[1] * 2).reshape(-1, 10, 2)
for i in range(0, AllData.shape[0]):
    for j in range(0, AllData.shape[1]):
        UnShuffle_Index[i][j][1] = j #Thickness index

###########
TrainRatio = 0.8 #Proportion of training data
ValidationRatio = 0.1 #Proportion of validation data
TestRatio = 1 - TrainRatio - ValidationRatio #Proportion of test data

###########

#Work out the number of crystals in each set
TrainCrystalNo = round(TrainRatio * No_Crystals)
ValCrystalNo = round(ValidationRatio * No_Crystals)
TestCrystalNo = No_Crystals - TrainCrystalNo - ValCrystalNo
PixleNo = AllData.shape[2] * AllData.shape[3] #Size of image

In [5]:
#Initialise image data arrays
PixelDimension = 3
train_images = np.zeros(TrainCrystalNo * PixleNo * AllData.shape[1] * PixelDimension, dtype = np.float32).reshape(TrainCrystalNo, AllData.shape[1], AllData.shape[2], AllData.shape[3], PixelDimension)
val_images = np.zeros(ValCrystalNo * PixleNo * AllData.shape[1] * PixelDimension, dtype = np.float32).reshape(ValCrystalNo, AllData.shape[1], AllData.shape[2], AllData.shape[3], PixelDimension)
test_images = np.zeros(TestCrystalNo * PixleNo * AllData.shape[1] * PixelDimension, dtype = np.float32).reshape(TestCrystalNo, AllData.shape[1], AllData.shape[2], AllData.shape[3], PixelDimension)

#Initialise label arrays
train_lab = np.zeros(TrainCrystalNo * AllData.shape[1], dtype = np.uint8).reshape(TrainCrystalNo, AllData.shape[1])
val_lab = np.zeros(ValCrystalNo * AllData.shape[1], dtype = np.uint8).reshape(ValCrystalNo, AllData.shape[1])
test_lab = np.zeros(TestCrystalNo * AllData.shape[1], dtype = np.uint8).reshape(TestCrystalNo, AllData.shape[1])

#Put Shuffled Crystals into training and validation and test
for i in range(0, No_Crystals): 
    for j in range(0, AllData.shape[1]):#Put shuffle positions in array
        UnShuffle_Index[i][j][0] = Shuffle_Index[i] #Crystal index
        
    if(i < TrainCrystalNo):
        train_images[i] = AllData[Shuffle_Index[i]]
        train_lab[i] = AllLabel[Shuffle_Index[i]]
        
    elif(i < TrainCrystalNo + ValCrystalNo):
        val_images[i - TrainCrystalNo] = AllData[Shuffle_Index[i]]
        val_lab[i - TrainCrystalNo] = AllLabel[Shuffle_Index[i]]
        
    else:
        test_images[i - TrainCrystalNo - ValCrystalNo] = AllData[Shuffle_Index[i]]
        test_lab[i - TrainCrystalNo - ValCrystalNo] = AllLabel[Shuffle_Index[i]]



In [6]:
#We must shuffle the data again so the different thickness are not in order
Shuffle_Index_Train = np.arange(TrainCrystalNo * AllData.shape[1])
Shuffle_Index_Val = np.arange(ValCrystalNo * AllData.shape[1])
Shuffle_Index_Test = np.arange(TestCrystalNo * AllData.shape[1])

                                                               
np.random.shuffle(Shuffle_Index_Train)
np.random.shuffle(Shuffle_Index_Val)
np.random.shuffle(Shuffle_Index_Test)

#Flatten the data by merging the different crystals and different thickness into 1 dimension
train_images = train_images.reshape(-1, AllData.shape[2], AllData.shape[3], PixelDimension)
val_images = val_images.reshape(-1, AllData.shape[2], AllData.shape[3], PixelDimension)
test_images = test_images.reshape(-1, AllData.shape[2], AllData.shape[3], PixelDimension)
train_lab = train_lab.reshape(-1)
val_lab = val_lab.reshape(-1)
test_lab = test_lab.reshape(-1)

#Flatten
UnShuffle_Index = UnShuffle_Index.reshape(-1, 2)
UnShuffle_Index_All = np.arange(AllData.shape[0] * AllData.shape[1] * 2).reshape(-1, 2)#New array for next shuffle


train_images_copy = train_images
val_images_copy = val_images
test_images_copy = test_images
train_lab_copy = train_lab
val_lab_copy = val_lab
test_lab_copy = test_lab

#Shuffle data for training, validation and testing
for i in range(0,TrainCrystalNo * AllData.shape[1]):
    Addindex = 0 #Add position due to using 3 different Shuffle_Index arrays
    UnShuffle_Index_All[i + Addindex] = UnShuffle_Index[Shuffle_Index_Train[i] + Addindex]
    train_images_copy[i] = train_images[Shuffle_Index_Train[i]]
    train_lab_copy[i] = train_lab[Shuffle_Index_Train[i]]
    
for i in range(0,ValCrystalNo * AllData.shape[1]):
    Addindex = TrainCrystalNo * AllData.shape[1] #Add position due to using 3 different Shuffle_Index arrays
    UnShuffle_Index_All[i + Addindex] = UnShuffle_Index[Shuffle_Index_Val[i] + Addindex]

    val_images_copy[i] = val_images[Shuffle_Index_Val[i]]
    val_lab_copy[i] = val_lab[Shuffle_Index_Val[i]]

for i in range(0,TestCrystalNo * AllData.shape[1]):
    Addindex = TrainCrystalNo * AllData.shape[1] + ValCrystalNo * AllData.shape[1] #Add position due to using 3 different Shuffle_Index arrays
    UnShuffle_Index_All[i + Addindex] = UnShuffle_Index[Shuffle_Index_Test[i] + Addindex]

    test_images_copy[i] = test_images[Shuffle_Index_Test[i]]
    test_lab_copy[i] = test_lab[Shuffle_Index_Test[i]]

#Put data back into orginal arrays
train_images = train_images_copy.astype("float32")
val_images = val_images_copy.astype("float32")
test_images = test_images_copy.astype("float32")
train_lab = train_lab_copy
val_lab = val_lab_copy
test_lab = test_lab_copy

#Turn label arrays into categorical
from keras.utils import to_categorical
train_lab = to_categorical(train_lab)
val_lab = to_categorical(val_lab)
test_lab = to_categorical(test_lab)

####################################################################################Data is ready

In [7]:
#Run this line to save the shuffled data to a new path
NewPath = "RBG32Bit"

#Save the reshuffled data
np.save(NewPath+"/train_images.npy", train_images)
np.save(NewPath+"/val_images.npy", val_images)
np.save(NewPath+"/test_images.npy", test_images)
np.save(NewPath+"/train_lab.npy", train_lab)
np.save(NewPath+"/val_lab.npy", val_lab)
np.save(NewPath+"/test_lab.npy", test_lab)

#Save unshuffle array
np.save(NewPath+"/UnShuffle_Index_All.npy", UnShuffle_Index_All)


In [8]:
#This will give the original crystal number and thickness given a possition in an array
def UnshuffleIndex(N, WhichDataSet): #WhichDataSet = 0 for training, 1 for validation, 2 for test data
    if(WhichDataSet == 0):
        N = N
    elif(WhichDataSet == 1):
        N = N + TrainCrystalNo * AllData.shape[1]
    elif(WhichDataSet == 2):
        N = N + TrainCrystalNo * AllData.shape[1] + ValCrystalNo * AllData.shape[1]
    else:
        print("Error, wrong number")
    return(UnShuffle_Index_All[N])
 


In [10]:
#For example
print(UnshuffleIndex(0, 0))
print(train_images[0])
print("The next array is Identical")
print(AllData[4805][1])
#They are the same array

[4805    1]
[[[ 4.18074518e-01  9.82364756e-04 -7.84053955e+01]
  [ 4.19053525e-01  4.14147368e-03  6.52712860e+01]
  [ 4.22316641e-01  2.84812762e-03  1.68663387e+01]
  ...
  [ 1.64817408e-01  1.53494030e-02  1.23089304e+01]
  [ 1.63956612e-01  1.50690852e-02  1.68663387e+01]
  [ 1.63146794e-01  1.83297079e-02  6.52712860e+01]]

 [[ 4.19060230e-01  4.76311985e-03  6.64506683e+01]
  [ 4.25366133e-01  4.51026997e-03 -7.84053955e+01]
  [ 4.25162643e-01  1.83472736e-03 -2.15596809e+01]
  ...
  [ 1.38249129e-01  1.42562119e-02  1.18322849e+01]
  [ 1.37882963e-01  1.41658392e-02  1.92205429e+01]
  [ 1.37237325e-01  1.61270015e-02  5.76493607e+01]]

 [[ 4.24287647e-01  3.46688367e-03  1.77630463e+01]
  [ 4.26675826e-01  1.62356277e-03 -2.18473339e+01]
  [ 4.25872594e-01  7.73473119e-04 -4.44190264e+00]
  ...
  [ 1.27928317e-01  7.20563019e-03  2.69944763e+00]
  [ 1.26824126e-01  7.43645849e-03  4.46828413e+00]
  [ 1.26819879e-01  8.32558237e-03  1.89758015e+01]]

 ...

 [[ 1.48601979e-01  6.