# Imports

In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]='1'

In [2]:
import talos as ta
from talos.model import lr_normalizer, early_stopper, hidden_layers

import tensorflow as tf
if tf.test.is_gpu_available() & tf.test.is_built_with_cuda():
    print("The installed version of TensorFlow {} includes GPU support.\n".format(tf.__version__))
    #from tensorflow.python.client import device_lib
    #print(device_lib.list_local_devices())

from keras import callbacks, backend as K
from keras.models import Sequential, Model, load_model
from keras.layers import Dense, Dropout, Flatten, Input
from keras.preprocessing.image import ImageDataGenerator
from keras.applications import VGG16
from keras.utils import multi_gpu_model
from keras.initializers import glorot_uniform
from keras.optimizers import Adam, Nadam, RMSprop, SGD, Adagrad
from keras.layers.advanced_activations import ReLU, LeakyReLU

from datetime import datetime
import pandas as pd
import numpy as np

from numpy.random import seed
seed(1)
from tensorflow import set_random_seed
set_random_seed(1)

config = tf.ConfigProto()
config.gpu_options.allow_growth=True
config.gpu_options.per_process_gpu_memory_fraction = 0.99
sess = tf.Session(config=config)
K.set_session(sess)

Using TensorFlow backend.


The installed version of TensorFlow 1.14.0 includes GPU support.



# Hilfsfunktionen

### Benutzerdefinierte Kostenfunktion & Metrik

In [3]:
def circular_mse(y_true, y_pred):
    max_error = tf.constant(360, dtype='float32')
    return K.mean(K.square(K.minimum(K.abs(y_pred - y_true), max_error - K.abs(y_pred - y_true))), axis=-1)

def circular_mae(y_true, y_pred):
    max_error = tf.constant(360, dtype='float32')
    return K.mean(K.minimum(K.abs(y_pred - y_true), K.abs(max_error - K.abs(y_pred - y_true))), axis=-1)

### Anpassung Generatoren (zur Verwendung ImageDataGenerator mit Functional API)

In [4]:
def prepare_train_data(e_gen, a_gen):
    while True:
        x_e, y_e = e_gen.next()
        x_a, y_a = a_gen.next()
        yield x_e, [y_e, y_a]
        
def prepare_test_data(e_gen, a_gen):
    while True:
        x_e, y_e = e_gen.next()
        x_a, y_a = a_gen.next()
        yield x_e

### Generierung Datenpipeline (Angepasst für Klassifizierung)

In [5]:
def create_dataframes():
    df = pd.read_csv(_CSV_FILE)
    df_shuffled = df.sample(frac=1, random_state=1)

    df_shuffled['Elevation'] = df_shuffled['Elevation'].apply(lambda x: str(x))
    df_shuffled['Azimuth'] = df_shuffled['Azimuth'].apply(lambda x: str(x))

    df_train = df_shuffled[0:int(_TRAIN_SAMPLES*0.8 // _BATCH_SIZE * _BATCH_SIZE)]   
    df_valid = df_shuffled.drop(df_shuffled.index[0:df_train.shape[0]])[0:int(_TRAIN_SAMPLES*0.2 // _BATCH_SIZE * _BATCH_SIZE)]
    df_test  = df_shuffled[df_shuffled.shape[0] - _TEST_SAMPLES:df_shuffled.shape[0]]
    
    return df_train, df_valid, df_test

In [6]:
def create_train_data():
    
    with tf.device('/gpu:1'):

        df_train, df_valid, df_test = create_dataframes()

        if _USE_DATA_AUGMENTATION:
            train_data_generator = ImageDataGenerator(
                rescale=1./255
                ,width_shift_range=0.1
                ,height_shift_range=0.1
                ,zoom_range=0.1
                ,brightness_range=(0.25, 0.75)
                ,fill_mode='nearest'
            )
        else:
            train_data_generator = ImageDataGenerator(
                rescale=1./255
            )

        valid_data_generator = ImageDataGenerator(rescale=1./255)

        e_train_gen = train_data_generator.flow_from_dataframe(
            dataframe=df_train,
            directory=_IMAGE_DIR,
            x_col='Filename',
            y_col='Elevation',
            class_mode='sparse',
            target_size=(224, 224),
            color_mode='rgb',
            shuffle=True,
            #shuffle=False,
            seed=77,
            batch_size=_BATCH_SIZE
        )

        a_train_gen = train_data_generator.flow_from_dataframe(
            dataframe=df_train,
            directory=_IMAGE_DIR,
            x_col='Filename',
            y_col='Azimuth',
            class_mode='sparse',
            target_size=(224, 224),
            color_mode='rgb',
            shuffle=True,
            #shuffle=False,
            seed=77,
            batch_size=_BATCH_SIZE
        )

        e_val_gen = valid_data_generator.flow_from_dataframe(
            dataframe=df_valid,
            directory=_IMAGE_DIR,
            x_col='Filename',
            y_col='Elevation',
            class_mode='sparse',
            target_size=(224, 224),
            color_mode='rgb',
            shuffle=False,
            seed=777,
            batch_size=_BATCH_SIZE
        )

        a_val_gen = valid_data_generator.flow_from_dataframe(
            dataframe=df_valid,
            directory=_IMAGE_DIR,
            x_col='Filename',
            y_col='Azimuth',
            class_mode='sparse',
            target_size=(224, 224),
            color_mode='rgb',
            shuffle=False,
            seed=777,
            batch_size=_BATCH_SIZE
        )

        return e_train_gen, a_train_gen, e_val_gen, a_val_gen

In [7]:
def create_test_data():
    
    df_train, df_valid, df_test = create_dataframes()
    
    test_data_generator = ImageDataGenerator(rescale=1./255)

    e_test_gen = test_data_generator.flow_from_dataframe(
        dataframe=df_test,
        directory=_IMAGE_DIR,
        x_col='Filename',
        y_col='Elevation',
        class_mode='sparse',
        target_size=(224, 224),
        color_mode='rgb',
        shuffle=False,
        seed=777,
        batch_size=_BATCH_SIZE
    )

    a_test_gen = test_data_generator.flow_from_dataframe(
        dataframe=df_test,
        directory=_IMAGE_DIR,
        x_col='Filename',
        y_col='Azimuth',
        class_mode='sparse',
        target_size=(224, 224),
        color_mode='rgb',
        shuffle=False,
        seed=777,
        batch_size=_BATCH_SIZE
    )
    
    return e_test_gen, a_test_gen, df_test

### Generierung Modell (Angepasst für Klassifizierung)

In [13]:
def create_model():
    
    K.clear_session()
    
    lr = _LEARNING_RATE
    
    cnn = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
    
    fc_input = Input(shape=(7, 7, 512))
    fc = Flatten()(fc_input)

    fc = Dense(units=_FIRST_NEURON, activation=None, kernel_initializer=glorot_uniform(seed=1))(fc)
    fc = LeakyReLU(alpha = _LEAKY_ALPHA)(fc)
    if _DROPOUT_RATE > 0.0:
        fc = Dropout(rate=_DROPOUT_RATE)(fc)
    
    hidden_neuron_fraction = _FIRST_NEURON
    for i in range(_HIDDEN_LAYERS):
        hidden_neuron_fraction = hidden_neuron_fraction // 2
        fc = Dense(units=hidden_neuron_fraction, activation=None, kernel_initializer=glorot_uniform(seed=1))(fc)
        fc = LeakyReLU(alpha = 0.1)(fc)
        if _DROPOUT_RATE > 0.0:
            fc = Dropout(rate=_DROPOUT_RATE)(fc)
        
    elevation_output = Dense(18, activation='softmax', name='elevation_output')(fc)
    azimuth_output = Dense(72, activation='softmax', name='azimuth_output')(fc)

    fc = Model(inputs=fc_input, outputs=[elevation_output, azimuth_output])
    
    cnn_output = cnn.get_layer('block5_pool').output
    full_output = fc(cnn_output)
    full_model = Model(inputs=cnn.input, outputs=full_output)
    
    if _IS_FINETUNING:
        lr = lr * 1e-2
        full_model.load_weights(_MODEL_TO_LOAD)
        for layer in cnn.layers[:15]:
            layer.trainable = False
            #print(layer.name, layer.trainable)
    else:
        cnn.trainable = False
        for layer in cnn.layers[:25]:
            layer.trainable = False
            #print(layer.name, layer.trainable)
    
    full_model.compile(
        optimizer=_OPTIMIZER(lr=lr)
        ,loss='sparse_categorical_crossentropy'
        ,loss_weights=[1., 1.]
        ,metrics=['accuracy'])
    
    return full_model

# Parameter

### Dateisystem

In [9]:
_RUN = '96'
_LOSS = 'MSE'
_DATASET_NAME = 'Dataset_2019-08-13'

In [10]:
_IMAGE_DIR = 'D:\\tkroiss\\Datasets\\{}\\'.format(_DATASET_NAME)
_CSV_FILE = _IMAGE_DIR + 'images.csv'

# _DEVICE = 'RTX' if(os.environ["CUDA_VISIBLE_DEVICES"]=='0') else 'TITAN'

_LOG_DIR = 'E:\\tkroiss\\{}_Klassifizierung_{}\\'.format(_RUN, _LOSS)
assert(os.path.exists(_LOG_DIR)) == False, 'Vorsicht, Verzeichnis existiert bereits!'
os.makedirs(_LOG_DIR)

AssertionError: Vorsicht, Verzeichnis existiert bereits!

### Hyperparameter

In [11]:
_TEST_SAMPLES = 10000
_BATCH_SIZE = 64
_LEARNING_RATE = 1e-3
_DROPOUT_RATE = 0.25
_FIRST_NEURON = 1024
_HIDDEN_LAYERS = 2
_OPTIMIZER = Adam
_ACTIVATION = 'leakyrelu'
_LEAKY_ALPHA = 0.1

# Operationen

### Training (Base)

In [None]:
_IS_FINETUNING = False
_USE_DATA_AUGMENTATION = False
_MODEL_TO_LOAD = None
_TRAIN_SAMPLES = 20000
_NUM_EPOCHS = 1

e_train_gen, a_train_gen, e_val_gen, a_val_gen = create_train_data()

model = create_model()

checkpointer = callbacks.ModelCheckpoint(
        filepath=_LOG_DIR + 'Best_Weights_FC.hdf5'
        ,monitor='val_accuracy'
        ,verbose=1
        ,save_best_only=False
        ,mode='min')
    
csv_logger = callbacks.CSVLogger(
        filename=_LOG_DIR + 'Logger_FC.csv'
        ,separator=','
        ,append=False)

startTime = datetime.now()
history = model.fit_generator(
    generator=prepare_train_data(e_train_gen, a_train_gen)
    ,steps_per_epoch=e_train_gen.n//e_train_gen.batch_size
    ,validation_data=prepare_train_data(e_val_gen, a_val_gen)
    ,validation_steps=e_val_gen.n//e_val_gen.batch_size
    ,callbacks=[csv_logger, checkpointer]
    ,epochs=_NUM_EPOCHS
)
print("Time taken:", datetime.now() - startTime)

### Training (Fine)

In [None]:
_IS_FINETUNING = True
_USE_DATA_AUGMENTATION = True
#_MODEL_TO_LOAD = _LOG_DIR + 'Best_Weights_FC.hdf5'
_MODEL_TO_LOAD = _LOG_DIR + 'Best_Weights_CNN_Epoche_0-12.hdf5'
_TRAIN_SAMPLES = 100000
_NUM_EPOCHS = 188

#del model

e_train_gen, a_train_gen, e_val_gen, a_val_gen = create_train_data()

model = create_model()

checkpointer = callbacks.ModelCheckpoint(
    filepath=_LOG_DIR + 'Best_Weights_CNN.hdf5'
    ,monitor='val_accuracy'
    ,verbose=1
    ,save_weights_only=False
    ,save_best_only=False
    ,mode='min'
)
    
csv_logger = callbacks.CSVLogger(
    filename=_LOG_DIR + 'Logger_CNN.csv'
    ,separator=','
    ,append=False
)

lr_reducer = callbacks.ReduceLROnPlateau(
    monitor='val_loss'
    ,factor=0.1
    ,patience=10
    ,verbose=1
    ,mode='min'
    ,min_delta=0.0001
)

early_stopper = callbacks.EarlyStopping(
    monitor='val_loss'
    ,min_delta=0
    ,patience=15
    ,verbose=1
    ,mode='min'
)

with tf.device('/gpu:0'):

    startTime = datetime.now()
    history = model.fit_generator(
        generator=prepare_train_data(e_train_gen, a_train_gen)
        ,steps_per_epoch=e_train_gen.n//e_train_gen.batch_size
        ,validation_data=prepare_train_data(e_val_gen, a_val_gen)
        ,validation_steps=e_val_gen.n//e_val_gen.batch_size
        ,callbacks=[checkpointer
                    ,csv_logger
                    ,lr_reducer
                    ,early_stopper
                   ]
        ,epochs=_NUM_EPOCHS
    )
    print("Time taken:", datetime.now() - startTime)

### Prognose

In [15]:
_IS_FINETUNING = True
_TRAIN_SAMPLES = 0
_MODEL_TO_LOAD = _LOG_DIR + 'Best_Weights_CNN.hdf5'

model = create_model()

e_test_gen, a_test_gen, df_test = create_test_data()

predictions = model.predict_generator(generator=prepare_test_data(e_test_gen, a_test_gen)
                                      ,steps=e_test_gen.n//e_test_gen.batch_size
                                      ,verbose=1
                                     )

Found 10000 validated image filenames belonging to 18 classes.
Found 10000 validated image filenames belonging to 72 classes.


In [None]:
num_predictions = e_test_gen.n//e_test_gen.batch_size*e_test_gen.batch_size
df_result = pd.DataFrame({'Filename': df_test['Filename'][0:num_predictions],
                          'Elevation_true': df_test['Elevation'][0:num_predictions],
                          'Elevation_pred': [(np.argmax(elem) + 1)*5 for elem in predictions[0][:]],
                          'Elevation_err': None,
                          'Azimuth_true': df_test['Azimuth'][0:num_predictions],
                          'Azimuth_pred': [(np.argmax(elem) + 1)*5 for elem in predictions[1][:]],
                          'Azimuth_err': None,}
                        )

In [None]:
err_ele_sum = 0
err_azi_sum = 0

for index, row in df_result.iterrows():
    err_ele = abs((row['Elevation_pred'] - int(row['Elevation_true'])))
    df_result.at[index, 'Elevation_err'] = err_ele
    err_ele_sum = err_ele_sum + err_ele
        
    err_azi = min(abs(row['Azimuth_pred'] - int(row['Azimuth_true'])), abs(360 - abs(row['Azimuth_pred'] - int(row['Azimuth_true']))))
    df_result.at[index, 'Azimuth_err'] = err_azi
    err_azi_sum = err_azi_sum + err_azi
    
err_ele_avg = err_ele_sum/num_predictions
err_azi_avg = err_azi_sum/num_predictions
    
print('Fehler Elevation: {}'.format('%.3f'%err_ele_avg))
print('Fehler Azimut: {}'.format('%.2f'%err_azi_avg))

df_result.to_csv(_LOG_DIR + 'Prognosen_ErrE_{}_ErrA_{}.csv'.format('%.2f'%err_ele_avg, '%.2f'%err_azi_avg), index=False)
df_result.head()