# CNN-Training

In [1]:
# Import required packages
import tensorflow as tf
import numpy as np
import pandas as pd
from PIL import Image
import random
from sklearn.model_selection import train_test_split
import os
import shutil

In [2]:
# Input Parameters - CNN-Training
parameters_training = {
    'reg_id' : 1,  # Number of the register entry
    'conv_range' : None,             # [mrad] Convergence angle range, for which models should be trained, None -> no filtering
    'thickness_range' : None        # [nm]   Thickness range, for which models should be trained, None -> no filtering
    }

In [3]:
# Code Snippet limits GPU memory growth -> without, errors occur (may not necessary for cluster/other computers)
config = tf.compat.v1.ConfigProto(gpu_options=
                                  tf.compat.v1.GPUOptions(per_process_gpu_memory_fraction=0.8)
                                  # device_count = {'GPU': 1}
                                  )
config.gpu_options.allow_growth = True
session = tf.compat.v1.Session(config=config)
tf.compat.v1.keras.backend.set_session(session)




In [4]:
class CNN_PACBED_Trainer:
    def __init__(self, parameters):
        # Declare variables
        self.Reg_id = parameters['reg_id']

        self.df = None
        self.df_filtered = None
        self.df_train = None
        self.df_validation = None
        self.dim = None
        self.rescaling = None
        
        # Get register entry
        path = self.make_register()
        
        # Load dataframe
        self.df = pd.read_csv(os.path.join(path, 'simulation', 'df.csv'), sep = ';')
        
    def make_register(self,):
        # Load register file
        df_register = pd.read_csv('.\\data\\Register.csv', sep = ';', index_col = 'id')
        
        # Get parameters
        df_system = df_register.loc[self.Reg_id]
        print(f'Loaded system: \n')
        display(df_system)
       
        self.path_models = os.path.join(df_system.loc[self.Reg_id]['path'], 'models')
        # Check if already trained model is present
        if not os.path.exists(self.path_models):
            # Make path for saving models
            os.mkdir(self.path_models)
            
            # Make register for the models
            column_names = ['id', 'path', 'thickness', 'convergenc angle min', 'convergenc angle max']
            self.df_register_model = pd.DataFrame(columns = column_names)
            self.df_register_model.set_index('id')

            self.reg_id_model = 0
            
        else:
            self.df_register_model = pd.read_csv(os.path.join(self.path_models, 'Register_models.csv'), sep = ';', index_col = 'id')
            
            self.reg_id_model = np.amax(self.df_register_model.index) + 1
            
        self.path_model = os.path.join(self.path_models, str(self.reg_id_model))
        
        os.mkdir(self.path_model)    
            
        # Save entry
        data_entry = [self.path_model, df_system.loc[self.Reg_id]['thickness'], df_system.loc[self.Reg_id]['convergenc angle min'], df_system.loc[self.Reg_id]['convergenc angle max']]
        df_model = pd.DataFrame(data = [data_entry], columns = self.df_register_model.columns, index = [self.reg_id_model])
        self.df_register_model = pd.concat([self.df_register_model, df_model], ignore_index = False)
        self.df_register_model.to_csv(os.path.join(self.path_models, 'Register_models.csv'), sep = ';', index = True, index_label = 'id')
        
        return df_system.loc[self.Reg_id]['path']
            
    
    # Filter trainings dataset to conv_range and thickness_range
    def filter_dataset(self, conv_range = None, thickness_range = None):
        if conv_range == None:
            self.df_filtered = self.df
            print('Model will be trained from ({:.1f} to {:.1f}) mrad convergence angle.'.format(np.amin(self.df['Conv_Angle']), np.amax(self.df['Conv_Angle'])))
        else:
            self.df_filtered = self.df[(self.df['Conv_Angle'] >= np.amin(conv_range)) & (self.df['Conv_Angle'] <= np.amax(conv_range))]
            print('Model will be trained from ({:.1f} to {:.1f}) mrad convergence angle.'.format(np.amin(self.df_filtered['Conv_Angle']), np.amax(self.df_filtered['Conv_Angle'])))
        
        if thickness_range == None:
            print('Model will be trained from ({:.1f} to {:.1f}) nm thickness.'.format(np.amin(self.df['Thickness'])/10, np.amax(self.df['Thickness']/10)))
        else:
            self.df_filtered = self.df[(self.df_filtered['Thickness'] >= np.amin(thickness_range)) & (self.df_filtered['Thickness'] <= np.amax(thickness_range))]
            print('Model will be trained from ({:.1f} to {:.1f}) nm thickness.'.format(np.amin(self.df_filtered['Thickness'])/10, np.amax(self.df_filtered['Thickness']/10)))

        self.df_filtered.reset_index()
        
        # Preparing Dataframe for training and validation
        splitting_ratio = 0.1
        self.df_train, self.df_validation = train_test_split(self.df_filtered, test_size=splitting_ratio)
        
        print('Number of images: {}'.format(len(self.df_filtered)))
        print('{:.0f}% used for training, {:.0f}% used for validation'.format((1-splitting_ratio)*100, splitting_ratio*100))
        
        # Update model register file
        self.df_register_model.loc[self.df_register_model['id'] == self.reg_id_model, 'convergenc angle min'] = np.amin(self.df_filtered['Conv_Angle'])
        self.df_register_model.loc[self.df_register_model['id'] == self.reg_id_model, 'convergenc angle max'] = np.amax(self.df_filtered['Conv_Angle'])
        self.df_register_model.loc[self.df_register_model['id'] == self.reg_id_model, 'thickness'] = np.amax(self.df_filtered['Thickness'])/10

        self.df_register_model.to_csv(os.path.join(self.path_models, 'Register_models.csv'), sep = ';', index=False)
        
        
    # Define dimension of CNN input
    def CNN_dim(self, dim = (0,0,0)):
        # If no dim input given, an example image will be loaded and used for dimension determination --> no rescaling required
        if dim == (0,0,0):
            self.dim = np.array(Image.open(self.df['Path'][0])).shape
            self.rescaling = False
        else:
            self.dim = dim
            self.rescaling = True
            
        # Convert dimension to rgb (required for pretrained models), otherwise grayscale can be used also
        if len(self.dim) == 3:
            self.dim = list(self.dim)
            self.dim[-1] = 3
        else:
            self.dim = self.dim + (3,)
        print('Shape of CNN input: {}'.format(self.dim))
    
    def build_model(self, n_classes, fc_layers = [1024, 1024], dropout = 0.3):
        
        # Load pretrainerd xception model (can be changed with other models or non-pretrained model)
        base_model = tf.keras.applications.Xception(weights = 'imagenet',
                                                    include_top = False,
                                                    pooling = 'avg', # max pooling may perform better or worse
                                                    input_shape = self.dim)
        # Build model
        
        # Inputs
        inputs_img = tf.keras.Input(shape=self.dim) # Image
        input_conv = tf.keras.Input(shape=(1))      # Cnvergence angle

        # Pretrained model
        x = base_model(inputs_img)

        # Build up fully connected layers with dropout
        for fc in fc_layers:
            x = tf.keras.layers.Dense(fc, activation='relu')(x)
            x = tf.keras.layers.Dropout(dropout)(x)

        # Add second input, convergence angel
        x = tf.keras.layers.Concatenate(axis=1)([x, input_conv])

        # Number of putputs defined by the datagenerator
        outputs = tf.keras.layers.Dense(n_classes, activation='softmax')(x)
        
        # Full model
        model = tf.keras.Model(inputs = (inputs_img, input_conv), outputs = outputs)        
        print(model.summary())
    
        return model
    
    
    def CNN_training(self, model, datagenerator_train, datagenerator_validation, epochs, path):
    
        # Callback functions for saving the model with the lowest validation loss --> used for final model
        model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
            filepath=path,
            save_weights_only=False,
            monitor='val_loss',
            mode='min',
            save_best_only=True)

        # Compile  model
        model.compile(loss='categorical_crossentropy', optimizer=tf.keras.optimizers.Adam(),
                      metrics=[tf.keras.metrics.CategoricalAccuracy()]) 

        # Train model
        history_train = model.fit(datagenerator_train,
                                  epochs=epochs,
                                  validation_data=datagenerator_validation,
                                  callbacks=[model_checkpoint_callback]
                                  )

        return history_train
    
    
    def train_thickness(self, epochs = 80, batch_size = 8):
        # Generate labels
        label_thickness, scale_vec = self.label_gen('Thickness', self.df_filtered)
        
        # Initialize datagenerator
        datagenerator_thickness_train = self.DataGenerator(self.df_train, label_thickness, 'Thickness', self.rescaling, self.dim, batch_size, shuffle=True, scale_vec=scale_vec)
        datagenerator_thickness_validation = self.DataGenerator(self.df_validation, label_thickness, 'Thickness', self.rescaling, self.dim, batch_size, shuffle=False, scale_vec=scale_vec)

        # Generate model
        model_thickness = self.build_model(n_classes = len(label_thickness))
        
        # Train model
        checkpoint_filepath = os.path.join(self.path_model, 'Thickness')
        history_thickness_train = self.CNN_training(model_thickness, datagenerator_thickness_train, datagenerator_thickness_validation, epochs, checkpoint_filepath)
        
        # Transform tensorflow SavedModel to tensorflow lite models
        self.transform_model(checkpoint_filepath)
        
        print('Training finished!')
        
    def train_mistilt(self, epochs = 40, batch_size = 8):
        # Generate labels
        label_mistilt, scale_vec = self.label_gen('Mistilt', self.df_filtered)
        
        # Initialize datagenerator
        datagenerator_mistilt_train = self.DataGenerator(self.df_train, label_mistilt, 'Mistilt', self.rescaling, self.dim, batch_size, shuffle=True, scale_vec=scale_vec)
        datagenerator_mistilt_validation = self.DataGenerator(self.df_validation, label_mistilt, 'Mistilt', self.rescaling, self.dim, batch_size, shuffle=False, scale_vec=scale_vec)

        # Generate model
        model_mistilt = self.build_model(n_classes = len(label_mistilt))
        
        # Train model
        checkpoint_filepath = os.path.join(self.path_model, 'Mistilt')
        history_mistilt_train = self.CNN_training(model_mistilt, datagenerator_mistilt_train, datagenerator_mistilt_validation, epochs, checkpoint_filepath)
        
        # Transform tensorflow SavedModel to tensorflow lite models
        self.transform_model(checkpoint_filepath)      
        
        print('Training finished!')
        
    def train_scale(self, epochs = 20, batch_size = 8):
        # Generate labels
        label_scale, scale_vec = self.label_gen('Scale', self.df_filtered)
        
        # Initialize datagenerator
        datagenerator_scale_train = self.DataGenerator(self.df_train, label_scale, 'Scale', self.rescaling, self.dim, batch_size, shuffle=True, scale_vec=scale_vec)
        datagenerator_scale_validation = self.DataGenerator(self.df_validation, label_scale, 'Scale', self.rescaling, self.dim, batch_size, shuffle=False, scale_vec=scale_vec)

        # Generate model
        model_scale = self.build_model(n_classes = len(label_scale))
        
        # Train model
        checkpoint_filepath = os.path.join(self.path_model, 'Scale')
        history_scale_train = self.CNN_training(model_scale, datagenerator_scale_train, datagenerator_scale_validation, epochs, checkpoint_filepath)
        
        # Transform tensorflow SavedModel to tensorflow lite models
        self.transform_model(checkpoint_filepath)
        
        print('Training finished!')
        
    def transform_model(self, path):
        # Convert to tensorflow lite framework
        converter = tf.lite.TFLiteConverter.from_saved_model(path)
        tflite_model = converter.convert()

        # Save the model.
        with open(os.path.join(path + '.tflite'), 'wb') as f:
            f.write(tflite_model)

        # Delete tensorflow saved model
        shutil.rmtree(path)
        
    def label_gen(self, case, dataframe):
        # Generate labels
        if case == 'Thickness':
            # Number of different classes
            label_unique = np.unique(dataframe['Thickness'])
            # Save as dataframe
            df_labels = pd.DataFrame({'Thickness / A' : label_unique,'Index' : np.arange(0,len(label_unique))})
            # Only required for scaling
            scale_vec = None
        elif case == 'Mistilt':
            # Number of different classes
            label_unique = np.unique(dataframe['Mistilt'])
            # Save as dataframe
            df_labels = pd.DataFrame({'Mistilt / mrad' : label_unique,'Index' : np.arange(0,len(label_unique))})
            # Only required for scaling
            scale_vec = None
        elif case == 'Scale':
            # Used scaling operations
            scale_vec = [0.6, 0.8, 1, 1.2, 1.4, 1.6, 1.8]
            # Number of different classes
            label_unique = np.unique(scale_vec)
            # Save as dataframe
            df_labels = pd.DataFrame({'Scale / []' : label_unique,'Index' : np.arange(0,len(label_unique))})
            
        # Save labels
        df_labels.to_csv(os.path.join(self.path_model, case + '_labels.csv'), sep = ';', index=False)
        print('Number of labels: {}'.format(len(df_labels)))
        
        return df_labels, scale_vec

    # Make custom data generator
    class DataGenerator(tf.keras.utils.Sequence):
        def __init__(self, df, labels, case, rescaling, dim=(299, 299, 1), batch_size=8, shuffle=True, scale_vec=None):
            # Declare variables
            self.batch_size = batch_size
            self.df = df.copy(deep=True)
            self.indices = self.df.index.tolist()
            self.labels = labels
            self.num_classes = len(self.labels)
            self.shuffle = shuffle
            self.case = case
            self.dim = dim
            self.on_epoch_end()
            self.scale_vec = scale_vec
            self.rescaling = rescaling

            # Normalize convergence angle input
            self.conv_borders = [np.amin(self.df['Conv_Angle']), np.amax(self.df['Conv_Angle'])]

            if self.conv_borders[0] - self.conv_borders[1] == 0:
                self.df.loc[:, 'Conv_Angle_normed'] = 1
            else:
                self.conv_ratio = self.conv_borders[0]/(self.conv_borders[1]-self.conv_borders[0])
                self.df['Conv_Angle_normed'] = self.df['Conv_Angle']/(self.conv_borders[1]-self.conv_borders[0]) - self.conv_ratio

        def __len__(self):
            return int(np.floor(len(self.indices) / self.batch_size))

        def __getitem__(self, index):
            index = self.index[index * self.batch_size:(index + 1) * self.batch_size]
            batch = [self.indices[k] for k in index]

            X, y = self.__get_data(batch)
            return X, y

        def on_epoch_end(self):
            self.index = np.arange(len(self.indices))
            if self.shuffle == True:
                np.random.shuffle(self.index)

        def __get_label(self, label_id):
            # One Hot Encoding (maybe integer encoding better suitable)
            label_id = tf.keras.utils.to_categorical(label_id, self.num_classes)
            return label_id

        def __get_data(self, batch):
            X = np.empty((self.batch_size, *self.dim))
            X_conv = np.empty((self.batch_size, 1))
            y = np.empty((self.batch_size, self.num_classes))

            for i, id in enumerate(batch):

                # Loading image
                img = Image.open(self.df['Path'][id])
                img_arr = np.array(img)

                # If grayscale, add a dimension (required for tensorflow)
                if len(img.size) == 2:
                    img_arr = np.array(img)[:, :, np.newaxis].astype(np.float32)

                # Normalize image and change datatype
                img_arr = ((img_arr-np.amin(img_arr)) / (np.amax(img_arr)-np.amin(img_arr))).astype(np.float32)

                # Resize image to the required dimension - Image already correct dimension --> save time by commenting out
                if self.rescaling:
                    img_arr = tf.keras.preprocessing.image.smart_resize(img_arr, self.dim[0:2], interpolation='bilinear')


                # Loading Label for different cases (for scaling the label depends on the applied scale_rnd value)
                if self.case == 'Thickness':
                    y_val = self.df['Thickness'][id]
                elif self.case == 'Mistilt':
                    y_val = self.df['Mistilt'][id]
                elif self.case == 'Scale':
                    # Make a random scaling operation from the given vector
                    scale_rnd = self.scale_vec[np.random.randint(len(self.scale_vec), size=1)[0]]
                    # Scale image
                    img_arr = tf.keras.preprocessing.image.apply_affine_transform(img_arr, zx=scale_rnd, zy=scale_rnd, row_axis=0, col_axis=1, channel_axis=2, fill_mode='constant', cval=0., order=1)
                    y_val = scale_rnd

                # Get categorical labels, one-hot encoded
                y[i,] = self.__get_label(np.array(self.labels.loc[self.labels.iloc[:, 0] == y_val, 'Index']))


                # Random scaling only if scale is not trained (equal zooming in x and y, no straining)
                if self.case != 'Scale':
                    # Zoom image
                    zoom = np.random.uniform(0.8,1.2)
                    # Allow small random stretching
                    zoom_x = np.random.normal(zoom, 0.07)
                    zoom_y = np.random.normal(zoom, 0.07)
                    img_arr = tf.keras.preprocessing.image.apply_affine_transform(img_arr, zx=zoom_x, zy=zoom_y, row_axis=0, col_axis=1, channel_axis=2, fill_mode='constant', cval=0., order=1)


                # Random shear
                img_arr = tf.keras.preprocessing.image.random_shear(img_arr, intensity=0.05, row_axis=0, col_axis=1,
                                                                    channel_axis=2, fill_mode='constant', cval=0.0,
                                                                    interpolation_order=1)

                # Random rotation (may change rotation from 45° to 90°)
                img_arr = tf.keras.preprocessing.image.random_rotation(img_arr, rg=50, row_axis=0, col_axis=1,
                                                                       channel_axis=2, fill_mode='constant', cval=0.0,
                                                                       interpolation_order=1)

                 # Random vertical and horizontal shift
                img_arr = tf.keras.preprocessing.image.random_shift(img_arr, wrg=0.1, hrg=0.1, row_axis=0, col_axis=1,
                                                                    channel_axis=2, fill_mode='constant', cval=0.0,
                                                                    interpolation_order=1)           

                # Random flip left/right and up/down
                if random.choice([0, 1]):
                    img_arr = tf.image.flip_left_right(img_arr)
                if random.choice([0, 1]):
                    img_arr = tf.image.flip_up_down(img_arr)


                # Add poisson noise: The larger the value the smaller the noise becomes, Poisson noise no negative values valid --> relu function
                noise_strength = np.random.randint(1, high=201, dtype=int)
                img_arr = np.random.poisson(tf.nn.relu(img_arr)*noise_strength)/noise_strength

                # Normalize between -1 to 1 (due to keras input for xception model)
                img_arr = 2*(img_arr-np.amin(img_arr))/(np.amax(img_arr)-np.amin(img_arr))-1

                # Get convergence angle and normalize it
                conv_normed = self.df['Conv_Angle_normed'][id]
                
                # Add gaussian noise, because user will not give exactly convergence angle
                conv_normed = np.random.normal(conv_normed, 0.05)

                # Filling batch
                X[i,] = (np.stack((img_arr[:,:,0],)*self.dim[2], axis=-1)).astype(np.float32)

                #X[i,] = img_arr
                X_conv[i,] = np.float32(conv_normed)

            return (X, X_conv), y
        
        

In [5]:
CNN_trainer = CNN_PACBED_Trainer(parameters_training)

Loaded system: 



Unnamed: 0,id,path,material,composition,direction,thickness,thickness step,high tension,convergenc angle min,convergenc angle max,convergenc angle step,mistilt min,mistilt max,mistilt step,azimuth min,azimuth max,azimuth step,dim
1,1,.\data\1,Strontium titanate,SrTiO3,"(0, 0, 1)",100,1,80,15.0,25.0,0.5,0,10,1,0,0.5,0.1,"(170, 170)"


In [6]:
CNN_trainer.filter_dataset(conv_range = parameters_training['conv_range'], thickness_range = parameters_training['thickness_range'])

Model will be trained from (15.0 to 25.0) mrad convergence angle.
Model will be trained from (0.0 to 99.8) nm thickness.
Number of images: 138600
90% used for training, 10% used for validation


In [7]:
CNN_trainer.CNN_dim()

Shape of CNN input: (170, 170, 3)


In [8]:
CNN_trainer.train_thickness(epochs = 40)

Number of labels: 100
Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            [(None, 170, 170, 3) 0                                            
__________________________________________________________________________________________________
xception (Functional)           (None, 2048)         20861480    input_2[0][0]                    
__________________________________________________________________________________________________
dense (Dense)                   (None, 1024)         2098176     xception[0][0]                   
__________________________________________________________________________________________________
dropout (Dropout)               (None, 1024)         0           dense[0][0]                      
________________________________________________________________________

Epoch 30/40
INFO:tensorflow:Assets written to: .\data\1\models\0\Thickness\assets
Epoch 31/40
Epoch 32/40
Epoch 33/40
INFO:tensorflow:Assets written to: .\data\1\models\0\Thickness\assets
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
INFO:tensorflow:Assets written to: .\data\1\models\0\Thickness\assets
Epoch 40/40
Training finished!


In [9]:
CNN_trainer.train_mistilt(epochs = 20)

Number of labels: 11
Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_5 (InputLayer)            [(None, 170, 170, 3) 0                                            
__________________________________________________________________________________________________
xception (Functional)           (None, 2048)         20861480    input_5[0][0]                    
__________________________________________________________________________________________________
dense_3 (Dense)                 (None, 1024)         2098176     xception[0][0]                   
__________________________________________________________________________________________________
dropout_2 (Dropout)             (None, 1024)         0           dense_3[0][0]                    
_______________________________________________________________________

In [10]:
CNN_trainer.train_scale(epochs = 10)

Number of labels: 7
Model: "model_2"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_8 (InputLayer)            [(None, 170, 170, 3) 0                                            
__________________________________________________________________________________________________
xception (Functional)           (None, 2048)         20861480    input_8[0][0]                    
__________________________________________________________________________________________________
dense_6 (Dense)                 (None, 1024)         2098176     xception[0][0]                   
__________________________________________________________________________________________________
dropout_4 (Dropout)             (None, 1024)         0           dense_6[0][0]                    
________________________________________________________________________