In [1]:
import os
import shutil
from tqdm import tqdm
import cv2
import gc
import pandas as pd
gc.enable()
import numpy as np
from sklearn import metrics, preprocessing
import pandas as pd
import datetime
import logging
import absl.logging
logging.root.removeHandler(absl.logging._absl_handler)
absl.logging._warn_preinit_stderr = False

logging.basicConfig(filename='training_logs.log',
                    level = logging.INFO,
                    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
                    datefmt='%d-%b-%y %H:%M:%S')

In [2]:
import tensorflow as tf
from keras.backend.tensorflow_backend import set_session
config = tf.ConfigProto()
config.gpu_options.per_process_gpu_memory_fraction = 0.5
set_session(tf.Session(config=config))

Using TensorFlow backend.


In [None]:
import configparser
config = configparser.ConfigParser()
config.read('training_config.conf')

# csv file path for actual target and images names
csv_file = config['paths']['csv_file']
# images folder path
images_dir = config['paths']['images_dir']
# name of the model to be saved for vgg
model_name = config['paths']['model_name']

# training images directory
train_images_dir = config['train_paths']['train_images_dir']
# training csv file
train_csv = config['train_paths']['train_csv']

# validation images directory
val_images_dir = config['val_paths']['val_images_dir']
# validation csv file
val_csv = config['val_paths']['val_csv']

# test images directory
test_images_dir = config['test_paths']['test_images_dir']
# test csv file
test_csv = config['test_paths']['test_csv']

# learning rate to train model on
lr = config['model_params']['lr']
# training batch size
train_batch = config['model_params']['train_batch']
# validation bath size
val_batch = config['model_params']['val_batch']
# test bath size
test_batch = config['model_params']['test_batch']

# target size of images
target_size = eval(config['model_params']['target_size'])
# image color format
image_color_mode = config['model_params']['image_color_mode']
# whether to use pretrained model, if yes then pretrained weights file else None
pretrained_weights = config['model_params']['pretrained_weights']
# whether to load initial weights or random weights
initial_weights = config['model_params']['initial_weights']

print('model configuration-')
logging.info(datetime.datetime.now())
logging.info('training configuration:')
for tag in ['paths', 'train_paths', 'val_paths', 'test_paths', 'model_params']:
    for key, val in config[tag].items():
        logging.info(key+' '+val)
        print(key, val)

In [5]:
data_gen_args = dict(rotation_range=0.2,
                     width_shift_range=0.05,
                     height_shift_range=0.05,
                     shear_range=0.05,
                     zoom_range=0.05,
                     horizontal_flip=True,
                     fill_mode='nearest',
                     rescale=1./255
                    )
logging.info('data_gen_args:')
for key, val in data_gen_args.items():
    logging.info(key+' '+str(val))

In [6]:
from dataFlatten import trainGenerator, validationGenerator
train_generator = trainGenerator(int(train_batch), train_images_dir, pd.read_csv(train_csv), 
                           data_gen_args, save_to_dir=None, image_color_mode=image_color_mode,
                           target_size=target_size)

validation_generator = validationGenerator(int(val_batch), val_images_dir, pd.read_csv(val_csv), 
                                           data_gen_args, save_to_dir=None, image_color_mode=image_color_mode,
                                           target_size=target_size)

Found 2430 validated image filenames.
Found 300 validated image filenames.


In [10]:
from vgg_regressor import vgg_12_freezed
model = vgg_12_freezed(pretrained_weights=pretrained_weights, input_size=(target_size[0], target_size[1], 3), initial_weights=initial_weights, lr=float(lr))
logging.info('model- ')
logging.info(vgg_12_freezed)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 1024, 128, 3)      0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 1024, 128, 64)     1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 1024, 128, 64)     36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 512, 64, 64)       0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 512, 64, 128)      73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 512, 64, 128)      147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 256, 32, 128)      0         
__________

In [11]:
from vgg_regressor import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
early_stopping = EarlyStopping(monitor='val_mean_squared_error', mode='min', patience=10, verbose=1)
model_checkpoint = ModelCheckpoint(model_name, monitor='val_mean_squared_error', verbose=1, save_best_only=True)

In [None]:
history = model.fit_generator(train_generator, 
                              epochs=60,
                              steps_per_epoch=608,
                              validation_data=validation_generator,
                              validation_steps=150,
                              callbacks=[model_checkpoint, early_stopping],
                              verbose=1)

# predictions

In [9]:
from dataFlatten import testGenerator
import numpy as np
from sklearn import metrics
def mean_absolute_percentage_error(y_true, y_pred): 
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

# train data

In [13]:
# define test generator for preprocessing data as per train data
data_gen_args = dict(
                    rescale=1./255
        )

test_generator1 = testGenerator(int(train_batch) , train_images_dir , pd.read_csv(train_csv),
                               data_gen_args, x_col='field', y_col='target', 
                               save_to_dir=None, image_color_mode=image_color_mode,
                               target_size=target_size)

# load test data csv file
test_data = pd.read_csv(train_csv)

# make perdictions for the test data
test_data['target_pred'] = model.predict_generator(test_generator1)

target='target'
# print various perfomance metrics for the test data
print("RMSE: ", np.sqrt(metrics.mean_squared_error(test_data[[target]], test_data[[target+'_pred']])))
print("MSE: ", metrics.mean_squared_error(test_data[[target]], test_data[[target+'_pred']]))
print("MAE: ", metrics.mean_absolute_error(test_data[[target]], test_data[[target+'_pred']]))
print("MAPE: ", mean_absolute_percentage_error(test_data[[target]], test_data[[target+'_pred']]))
logging.info('train mae- '+str(metrics.mean_absolute_error(test_data[[target]], test_data[[target+'_pred']])))

Found 2430 validated image filenames.
RMSE:  0.4860827328980316
MSE:  0.23627642322161913
MAE:  0.35924799914874456
MAPE:  13.97961940836135


# test data

In [14]:
data_gen_args = dict(
                    rescale=1./255
        )

test_generator2 = testGenerator(int(test_batch), test_images_dir, pd.read_csv(test_csv),
                               data_gen_args, x_col='field', y_col='target',
                               save_to_dir=None, image_color_mode=image_color_mode,
                               target_size=target_size)

# load test data csv file
test_data = pd.read_csv(test_csv)

# make perdictions for the test data
test_data['target_pred'] = model.predict_generator(test_generator2)

target='target'
# print various perfomance metrics for the test data
print("RMSE: ", np.sqrt(metrics.mean_squared_error(test_data[[target]], test_data[[target+'_pred']])))
print("MSE: ", metrics.mean_squared_error(test_data[[target]], test_data[[target+'_pred']]))
print("MAE: ", metrics.mean_absolute_error(test_data[[target]], test_data[[target+'_pred']]))
print("MAPE: ", mean_absolute_percentage_error(test_data[[target]], test_data[[target+'_pred']]))
logging.info('test mae- '+str(metrics.mean_absolute_error(test_data[[target]], test_data[[target+'_pred']])))

Found 270 validated image filenames.
RMSE:  0.757502316857466
MSE:  0.573809760044429
MAE:  0.5398256226664573
MAPE:  21.030569695167934
