In [1]:
#import modules

import numpy as np
import os
import matplotlib.pyplot as plt
import pickle

from keras import layers
from keras import models
from keras import optimizers
from keras.preprocessing import image_dataset_from_directory
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.regularizers import l2
from keras.utils import to_categorical
from keras.utils import multi_gpu_model
from keras.utils import Sequence
from keras.models import load_model
from tensorflow.distribute import MirroredStrategy

#initialise random generator
rng = np.random.default_rng()

In [2]:
#Define FelixDataflow classes and functions.

class FelixSequence(Sequence):
    def __init__(self, x_set, y_set, batch_size, file_type):
        """Here self.x is a list of paths to file_type files. self.y is a
        corresponding list of labels."""
        self.x, self.y = x_set, y_set
        self.batch_size = batch_size
        self.file_type = file_type

    def __len__(self):
        return int(np.ceil(len(self.x) / float(self.batch_size)))

    def __getitem__(self, idx):
        batch_x = self.x[idx * self.batch_size:(idx + 1) * self.batch_size]
        batch_y = self.y[idx * self.batch_size:(idx + 1) * self.batch_size]
        return arrs_from_paths(batch_x, self.file_type), to_categorical(np.array(batch_y),10)

def gen_paths_labels(base_path):
    """A generator to yield (data-paths, corresponding labels) tuples for each
    segment of data (typically training, validation, and testing)."""
    for segment in sorted(os.listdir(base_path)):
        segment_path = os.path.join(base_path, segment)
        segment_paths = []
        segment_labels = []
        for label in os.listdir(segment_path):
            label_path = os.path.join(segment_path, label)
            for crystal in os.listdir(label_path):
                segment_paths.append(os.path.join(label_path, crystal))
                segment_labels.append(label)
        indexes = np.arange(len(segment_labels))
        rng.shuffle(indexes)
        yield [np.array(segment_paths)[indexes], np.array(list(map(int,segment_labels)))[indexes]]

def arrs_from_paths(paths, file_type):
    if file_type == "txt":
        return np.array([np.loadtxt(file_name) for file_name in paths])
    elif file_type == "npy":
        return np.array([np.load(file_name)[[0],:,:] for file_name in paths])

def felix_fit_new(model, batch_size, epochs, workers, AllData, file_type, patience):
    #AllPaths = [[TrainingPaths, TrainingThickness], [], []]
    """A fit function to allow validation and test data to be supplied via a
    generator."""
    
    best_val_loss = np.inf
    
    x = np.arange(0, epochs)
    
    TrainImage = AllData[0][0]
    ValImage = AllData[0][1]
    TestImage = AllData[0][2]
    
    TrainClassification = AllData[1][0]
    ValClassification = AllData[1][1]
    TestClassification = AllData[1][2]
    
    for epoch in range(epochs):
        print("-------------------------------------------------------------------------")
        print("Epoch", epoch+1, "/", epochs, ": ")
        print("Training: ")
        train_hist = model.fit(TrainImage, TrainClassification, validation_data = (ValImage, ValClassification), epochs = epoch+1, workers = workers, initial_epoch = epoch, shuffle=True)

        print("Validation: ")
     
        epoch_loss = train_hist.history["val_loss"][-1]
        if(epoch_loss < best_val_loss):
            #model.save(NewPath+ModelName)
            print("The model improved from: ",best_val_loss, "to: ", epoch_loss)
            best_val_loss = epoch_loss
            patience_i = 0
        else:
            patience_i+=1
            print("The model did not improve, patience_i = ", patience_i)

        print("Epoch loss: ", epoch_loss)
        #val_hist[0][epoch] = avg_recon_loss
        if(patience_i > patience):
            print("Early Stopping, the model did not improve from: ", best_val_loss)
            break

    print("-------------------------------------------------------------------------")
    print("Testing: ")
    tst_hist = model.evaluate(TestImage, TestClassification, workers = workers)
    
    return tst_hist[0]

    

In [3]:
def MakeThicknessList(ListPaths):
    Thickness = []
    for i in ListPaths:
        Thickness.append(int(i.split("/")[-1].split(".")[0]))
    Thickness = np.array(Thickness)
    return(Thickness)

def OpenTxt(Path):
    with open(Path) as textFile:
        lines = [line.split() for line in textFile]
    List = []
    for i in lines:
        List.append(i[0])
    return(List)

def LoadCentralBeam(AllPaths):
    TrainImages = np.zeros(128 * 128 * len(AllPaths[0]), dtype = np.float).reshape(len(AllPaths[0]), 1, 128, 128)
    ValImages = np.zeros(128 * 128 * len(AllPaths[1]), dtype = np.float).reshape(len(AllPaths[1]), 1, 128, 128)
    TestImages = np.zeros(128 * 128 * len(AllPaths[2]), dtype = np.float).reshape(len(AllPaths[2]), 1, 128, 128)
    
    AllImages = [TrainImages, ValImages, TestImages]
    
    for i in range(0, len(AllImages)):
        for j in range(0, len(AllImages[i])):
            #print(j)
            AllImages[i][j] = np.load(AllPaths[i][j])[0]
    return AllImages

DataPath = "//home/ug-ml/felix-ML/classification/Classification000/DataPaths/"

TrainPath = OpenTxt(DataPath + "Train_0p1.txt")
ValPath = OpenTxt(DataPath + "Validation_0p1.txt")
TestPath = OpenTxt(DataPath + "Test_0p1.txt")

AllLACBED = LoadCentralBeam([TrainPath, ValPath, TestPath])


TrainThickness = to_categorical(MakeThicknessList(TrainPath),10)
ValThickness = to_categorical(MakeThicknessList(ValPath),10)
TestThickness = to_categorical(MakeThicknessList(TestPath),10)

AllData = [AllLACBED, [TrainThickness, ValThickness, TestThickness]] #[Lacbed image, thickness]


In [4]:
#All paths

SaveDataPath = "/home/ug-ml/Documents/GitHub_BigFiles/SaveFolder" #Base directory of place you store information of models
SaveFolderName = "/Classifer_1" #Will create a folder and put in information about the outcome / inputs
ModelName = "/Model.hdf5"


#Many variables

#Model Variables
input_shape = (1, 128, 128)

#Hyper parameters
learning_rate = 0.0005
l2_regularizer = 0.0001
loss = 'categorical_crossentropy'
optimizer = "RMSprop" #Not a variable ONLY used for a note
batch_size = 32
epochs = 50
ShuffleTrainData = True

#Call back variables
TrainingPatience = 5
CheckPointMonitor = 'val_acc'
EarlyStopMonitor = 'val_acc'

#CPU variables
CPUworkers = 16


#List the name of the variables you want to save in a file
VariableListName = ["input_shape", 
                   "learning_rate", "l2_regularizer", "loss", "optimizer", "batch_size", "epochs", "ShuffleTrainData",
                   "TrainingPatience", "CheckPointMonitor", "EarlyStopMonitor",
                   "CPUworkers"]

#List the variables in the same order as VariableListName
VariableListValues = [input_shape, 
                   learning_rate, l2_regularizer, loss, optimizer, batch_size, epochs, ShuffleTrainData,
                   TrainingPatience, CheckPointMonitor, EarlyStopMonitor,
                   CPUworkers]

In [5]:
def build_model(learning_rate, l2_regularizer):
    strategy = MirroredStrategy() #Allows multiple GPUs

    with strategy.scope():
        model = models.Sequential()
        model.add(layers.Conv2D(128, (4, 4),
                                         activation='relu',
                                         data_format='channels_first',
                                         input_shape= input_shape))
        model.add(layers.MaxPooling2D((2, 2), data_format='channels_first'))
        model.add(layers.Conv2D(128, (4, 4),
                                         data_format='channels_first',
                                         activation='relu'))
        model.add(layers.MaxPooling2D((2, 2), data_format='channels_first'))
        model.add(layers.Conv2D(128, (4, 4),
                                         data_format='channels_first',
                                         activation='relu'))
        model.add(layers.MaxPooling2D((2, 2), data_format='channels_first'))
        model.add(layers.Flatten())
        model.add(layers.Dropout(0.25))
        model.add(layers.Dense(128, activation='relu',
                               kernel_regularizer = l2(l2_regularizer)))

        model.add(layers.Dense(10, activation='softmax',
                               kernel_regularizer = l2(l2_regularizer)))

        model.compile(loss = loss,
                      optimizer = optimizers.RMSprop(learning_rate = learning_rate),
                      metrics=['acc'])
        
        
    model.summary()
    return model


In [None]:
prev_searched = [[0,1,2]]

learning_rate = np.array([0.0005, 0.0001, 0.001, 0.002, 0.005])
l2_regularizer = np.array([0.00005, 0.0001, 0.0005, 0.001, 0.002])
batch_size = np.array([8, 16, 32, 64, 128])

def neighbours(point):
    dircs = np.array([[0,0,1],[0,1,0],[1,0,0],[0,0,-1],[0,-1,0],[-1,0,0]])
    ns = dircs+point
    return np.array([i for i in ns if (0<=i).all() and (i<5).all() and (i != prev_searched).all(axis=0).any()])

z = np.array([0,1,2])
converged = False
best_test_loss = np.inf
best_lr = np.nan
best_l2_r = np.nan
best_bs = np.nan

while not converged:
    neighs = neighbours(z)
    #print(neighs)

    lr = learning_rate[neighs[:,0]]
    l2_r = l2_regularizer[neighs[:,1]]
    bs = batch_size[neighs[:,2]]

    step_params = np.array([lr, l2_r, bs]).T
    
    converged = True
    
    for i, param_set in enumerate(step_params):
        prev_searched.append(neighs[i])
        print(param_set[0])
        
        model = build_model(param_set[0], param_set[1])
        
        test_loss = felix_fit_new(model, param_set[2].astype(int), epochs, CPUworkers, AllData, "npy", TrainingPatience)
        
        if test_loss < best_test_loss:
            best_test_loss = test_loss
            best_lr = param_set[0]
            best_l2_r = param_set[1]
            best_bs = param_set[2]
            converged = False
    

0.0005
INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0', '/job:localhost/replica:0/task:0/device:GPU:1')
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 128, 125, 125)     2176      
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 128, 62, 62)       0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 128, 59, 59)       262272    
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 128, 29, 29)       0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 128, 26, 26)       262272    
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (No