In [1]:
#import modules

import numpy as np
import os
import matplotlib.pyplot as plt
import pickle

from keras import layers
from keras import models
from keras import optimizers
from keras.preprocessing import image_dataset_from_directory
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.regularizers import l2
from keras.utils import to_categorical
from keras.utils import multi_gpu_model
from keras.utils import Sequence
from keras.models import load_model
from tensorflow.distribute import MirroredStrategy

In [2]:
#initialise random generator
rng = np.random.default_rng()

In [3]:
#Define FelixDataflow classes and functions.

class FelixSequence(Sequence):
    def __init__(self, x_set, y_set, batch_size, file_type):
        """Here self.x is a list of paths to file_type files. self.y is a
        corresponding list of labels."""
        self.x, self.y = x_set, y_set
        self.batch_size = batch_size
        self.file_type = file_type

    def __len__(self):
        return int(np.ceil(len(self.x) / float(self.batch_size)))

    def __getitem__(self, idx):
        batch_x = self.x[idx * self.batch_size:(idx + 1) * self.batch_size]
        batch_y = self.y[idx * self.batch_size:(idx + 1) * self.batch_size]
        return arrs_from_paths(batch_x, self.file_type), to_categorical(np.array(batch_y),10)

def gen_paths_labels(base_path = "D:\\Uni Work\\Masters Project\\test_dir"):
    """A generator to yield (data-paths, corresponding labels) tuples for each
    segment of data (typically training, validation, and testing)."""
    for segment in sorted(os.listdir(base_path)):
        segment_path = os.path.join(base_path, segment)
        segment_paths = []
        segment_labels = []
        for label in os.listdir(segment_path):
            label_path = os.path.join(segment_path, label)
            for crystal in os.listdir(label_path):
                segment_paths.append(os.path.join(label_path, crystal))
                segment_labels.append(label)
        indexes = np.arange(len(segment_labels))
        rng.shuffle(indexes)
        yield [np.array(segment_paths)[indexes], np.array(list(map(int,segment_labels)))[indexes]]

def arrs_from_paths(paths, file_type):
    if file_type == "txt":
        return np.array([np.loadtxt(file_name) for file_name in paths])
    elif file_type == "npy":
        return np.array([np.load(file_name) for file_name in paths])


def felix_fit(model, batch_size, epochs, workers, callbacks, base_path, file_type):
    """A fit function to allow validation and test data to be supplied via a
    generator."""
    data = [i for i in gen_paths_labels(base_path)]
    val_seq = FelixSequence(data[2][0], data[2][1], batch_size, file_type)
    train_seq = FelixSequence(data[1][0], data[1][1], batch_size, file_type)
    test_seq = FelixSequence(data[0][0], data[0][1], batch_size, file_type)
    for epoch in range(epochs):
        print("-------------------------------------------------------------------------")
        print("Epoch", epoch+1, "/", epochs, ": ")
        print("Training: ")
        model.fit(x = train_seq, epochs = epoch+1, workers = workers, initial_epoch = epoch)
        print("Validation: ")
        model.evaluate(x = val_seq, workers = workers, callbacks = callbacks)
    print("-------------------------------------------------------------------------")
    print("Testing: ")
    model.evaluate(test_seq)

In [12]:
#All paths

Path = "/home/ug-ml/felix-ML/angle_3_data" #Path where training, validation, and test data is
SaveDataPath = "/home/ug-ml/Documents/GitHub_BigFiles/SaveFolder" #Base directory of place you store information of models
SaveFolderName = "/ConvnetAllData2" #Will create a folder and put in information about the outcome / inputs
ModelName = "/ConvnetAllData2.hdf5"


#Many variables

#Model Variables
input_shape = (36, 128, 128)

#Hyper parameters
learning_rate = 0.0005
l2_regularizer = 0.0001
loss = 'categorical_crossentropy'
optimizer = "RMSprop" #Not a variable ONLY used for a note
batch_size = 32
epochs = 100
ShuffleTrainData = True

#Call back variables
TrainingPatience = 30
CheckPointMonitor = 'val_acc'
EarlyStopMonitor = 'val_acc'

#CPU variables
CPUworkers = 16


#List the name of the variables you want to save in a file
VariableListName = ["input_shape", 
                   "learning_rate", "l2_regularizer", "loss", "optimizer", "batch_size", "epochs", "ShuffleTrainData",
                   "TrainingPatience", "CheckPointMonitor", "EarlyStopMonitor",
                   "CPUworkers"]

#List the variables in the same order as VariableListName
VariableListValues = [input_shape, 
                   learning_rate, l2_regularizer, loss, optimizer, batch_size, epochs, ShuffleTrainData,
                   TrainingPatience, CheckPointMonitor, EarlyStopMonitor,
                   CPUworkers]

In [9]:
#Early stopping and check points

EarlyStop = EarlyStopping(monitor = EarlyStopMonitor,
                          mode = 'min',
                          verbose = 1,
                          patience = TrainingPatience)

NewPath = SaveDataPath + SaveFolderName
Checkpoint = ModelCheckpoint(NewPath + ModelName, #Save path
                             monitor = CheckPointMonitor,
                             verbose = 1,
                             save_best_only = True,
                             mode = 'auto',
                             save_freq = 'epoch')


#Make folder to put model and history information
try:
    os.mkdir(NewPath)
except:
    print("Folder failed to be created, it may already exist")
    
File1  = open(NewPath +"/Parameters.txt", "w+")
if(len(VariableListName) == len(VariableListValues)):
    for i in range(0, len(VariableListName)):
        File1.write(VariableListName[i] + " " + str(VariableListValues[i]) + "\n")
    File1.close()
else:
    print("VariableListName and VariableListValues do not match up, so file can not be saved")


Folder failed to be created, it may already exist


In [10]:
#Build model
strategy = MirroredStrategy() #Allows multiple GPUs

with strategy.scope():
    model = models.Sequential()
    model.add(layers.SeparableConv2D(256, (4, 4),
                                     activation='relu',
                                     data_format='channels_first',
                                     input_shape= input_shape))
    model.add(layers.MaxPooling2D((2, 2), data_format='channels_first'))
    model.add(layers.SeparableConv2D(256, (4, 4),
                                     data_format='channels_first',
                                     activation='relu'))
    model.add(layers.MaxPooling2D((2, 2), data_format='channels_first'))
    model.add(layers.SeparableConv2D(256, (4, 4),
                                     data_format='channels_first',
                                     activation='relu'))
    model.add(layers.MaxPooling2D((2, 2), data_format='channels_first'))
    model.add(layers.Flatten())
    model.add(layers.Dropout(0.25))
    model.add(layers.Dense(128, activation='relu',
                           kernel_regularizer = l2(l2_regularizer)))
    
    model.add(layers.Dense(10, activation='softmax',
                           kernel_regularizer = l2(l2_regularizer)))

    model.compile(loss = loss,
                  optimizer = optimizers.RMSprop(learning_rate = learning_rate),
                  metrics=['acc'])

#Save summary of model
with open(NewPath + '/summary.txt','w') as fh:
    model.summary(print_fn=lambda x: fh.write(x + '\n'))
    
model.summary()

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0', '/job:localhost/replica:0/task:0/device:GPU:1')
Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
separable_conv2d_3 (Separabl (None, 256, 125, 125)     10048     
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 256, 62, 62)       0         
_________________________________________________________________
separable_conv2d_4 (Separabl (None, 256, 59, 59)       69888     
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 256, 29, 29)       0         
_________________________________________________________________
separable_conv2d_5 (Separabl (None, 256, 26, 26)       69888     
_________________________________________________________________
max_pooling2d_5 (MaxPooling2 (None, 2

In [14]:
felix_fit(model, batch_size, epochs, CPUworkers, [EarlyStop, Checkpoint], Path, "npy")
#data = [i for i in gen_paths_labels(Path)]
#val_seq = FelixSequence(data[2][0], data[2][1], batch_size, "npy")
#print(data[1][0].shape)
#train_seq = FelixSequence(data[1][0], data[1][1], batch_size, "npy")
#model.fit(x = train_seq, epochs = epochs, workers = CPUworkers)

-------------------------------------------------------------------------
Epoch 1 / 100 : 
Training: 
Validation: 
-------------------------------------------------------------------------
Epoch 2 / 100 : 
Training: 
Epoch 2/2
Validation: 
-------------------------------------------------------------------------
Epoch 3 / 100 : 
Training: 
Epoch 3/3
Validation: 
-------------------------------------------------------------------------
Epoch 4 / 100 : 
Training: 
Epoch 4/4
Validation: 
-------------------------------------------------------------------------
Epoch 5 / 100 : 
Training: 
Epoch 5/5
Validation: 
-------------------------------------------------------------------------
Epoch 6 / 100 : 
Training: 
Epoch 6/6
Validation: 
-------------------------------------------------------------------------
Epoch 7 / 100 : 
Training: 
Epoch 7/7
Validation: 
-------------------------------------------------------------------------
Epoch 8 / 100 : 
Training: 
Epoch 8/8
Validation: 
----------

Epoch 54/54
Validation: 
-------------------------------------------------------------------------
Epoch 55 / 100 : 
Training: 
Epoch 55/55
Validation: 
-------------------------------------------------------------------------
Epoch 56 / 100 : 
Training: 
Epoch 56/56
Validation: 
-------------------------------------------------------------------------
Epoch 57 / 100 : 
Training: 
Epoch 57/57
Validation: 
-------------------------------------------------------------------------
Epoch 58 / 100 : 
Training: 
Epoch 58/58
Validation: 
-------------------------------------------------------------------------
Epoch 59 / 100 : 
Training: 
Epoch 59/59
Validation: 
-------------------------------------------------------------------------
Epoch 60 / 100 : 
Training: 
Epoch 60/60
Validation: 
-------------------------------------------------------------------------
Epoch 61 / 100 : 
Training: 
Epoch 61/61
Validation: 
-------------------------------------------------------------------------
Epoch

Epoch 81/81
Validation: 
-------------------------------------------------------------------------
Epoch 82 / 100 : 
Training: 
Epoch 82/82
Validation: 
-------------------------------------------------------------------------
Epoch 83 / 100 : 
Training: 
Epoch 83/83
Validation: 
-------------------------------------------------------------------------
Epoch 84 / 100 : 
Training: 
Epoch 84/84
Validation: 
-------------------------------------------------------------------------
Epoch 85 / 100 : 
Training: 
Epoch 85/85
Validation: 
-------------------------------------------------------------------------
Epoch 86 / 100 : 
Training: 
Epoch 86/86
Validation: 
-------------------------------------------------------------------------
Epoch 87 / 100 : 
Training: 
Epoch 87/87
Validation: 
-------------------------------------------------------------------------
Epoch 88 / 100 : 
Training: 
Epoch 88/88
Validation: 
-------------------------------------------------------------------------
Epoch

In [15]:
model.save("/home/ug-ml/Documents/GitHub_BigFiles/SaveFolder/ConvnetAllData2/ConvnetAllData2.hdf5")