In [None]:
import pandas as pd
import numpy as np
import sys
sys.path.append('../')
from src.models import combined_cnn
from src.generator import generator_two_inputs
from src.loss import smoothed_binary_crossentropy
sys.path.append('/notebook')
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
% matplotlib inline

In [None]:
# import dataframe with filenames and labels
sample = pd.read_csv('../labels/training_labels_updated_111219.csv')
sample['temp_label'] = sample['final_label'].apply(lambda x: np.round(x))
sample['temp_label'] = sample['temp_label'].astype('int').astype('str')
sample['final_label'] =  sample['final_label'].astype('str')

train_data, test_data = train_test_split(sample, test_size = 0.2, random_state = 10)

In [None]:
# # import dataframe with filenames and labels
# labels = pd.read_csv('../labels/training_labels.csv')
# labels['full_label'] = 'aerial_' + labels['AERIAL_Driveway'].astype(int).astype(str) + \
#                        '_gsv_' + labels['GSV_Driveway'].astype(int).astype(str)
# # use train test split
# train_data, test_data = train_test_split(labels, test_size = 0.25, random_state = 100)

In [None]:
# train_data = train_data[train_data.final_label != '0.5']

In [None]:
train_data.shape, test_data.shape

In [None]:
train_data.groupby('temp_label').aggregate({'ADDR_NUM':len})

Class imbalance, so upsampling

In [None]:
# artificially oversample the minority class
zero_class = train_data[train_data.temp_label == '0']
train_data = (train_data.append(zero_class)).append(zero_class)

In [None]:
# input parameters
batch_size = 8
epochs = 10

In [None]:
model = combined_cnn(gsv_image_dim = (128,128, 3), sat_image_dim = ((128,128, 4)), 
                     n_classes = 1)
model.summary()

In [None]:
#implement class weights as an easy first attempt to handle imbalanced class problem
class_weight = {1.0: 1,
                0.0: 2}

In [None]:
history = model.fit_generator(
                # train 
                generator_two_inputs(train_data, aerial_dir = '../data/training/aerial_images/', 
                gsv_dir ='../data/training/sv_images/', batch_size = batch_size, 
                gsv_image_dim = (128,128, 3), aer_image_dim = (128,128, 4), y_column = 'temp_label'),
                
                validation_data=generator_two_inputs(test_data, aerial_dir = '../data/training/aerial_images/', 
                gsv_dir ='../data/training/sv_images/', batch_size = batch_size, 
                gsv_image_dim = (128,128, 3), aer_image_dim = (128,128, 4), y_column = 'temp_label'),
                
                validation_steps = 5,
                steps_per_epoch=np.ceil(sample.shape[0]//batch_size), 
                epochs=epochs,
#                 class_weight = class_weight,
                verbose=1)

In [None]:
plt.figure(figsize = (18,6))
plt.subplot(1,2,1); plt.title('Training', fontsize = 20)
plt.plot(history.history['acc'], label = 'acc')
plt.plot(history.history['specificity'], label = 'specificity')
plt.plot(history.history['sensitivity'], label = 'sensitivity')
plt.axhline((train_data.temp_label).astype('float32').sum()/train_data.shape[0], ls = '--', c = 'r', label = 'baseline')
plt.xticks(np.arange(0,epochs)); plt.xlabel('epoch')
plt.legend()
plt.subplot(1,2,2); plt.title('Validation', fontsize = 20)
plt.plot(history.history['val_acc'], label = 'val acc')
plt.plot(history.history['val_specificity'], label = 'val specificity')
plt.plot(history.history['val_sensitivity'], label = 'val sensitivity')
plt.axhline((test_data.temp_label).astype('float32').sum()/test_data.shape[0], ls = '--', c = 'r', label = 'baseline')
plt.xticks(np.arange(0,epochs));plt.xlabel('epoch')
plt.legend();

In [None]:
addresses_gsv_filename = ['1_ESSEX_ST.jpg', '8_GILMAN_ST.jpg', '9_MELVILLE_RD.jpg','10_CENTRAL_ST.jpg',
                         '14_MANSFIELD_ST.jpg']
pred_sample = sample[sample.gsv_filename.isin(addresses_gsv_filename)]
pred_sample

In [None]:
model.predict_generator(generator_two_inputs(pred_sample, aerial_dir = '../data/training/aerial_images/', 
                gsv_dir ='../data/training/sv_images/', batch_size = batch_size, 
                 gsv_image_dim = (128,128, 3), aer_image_dim = (128,128, 4), y_column = 'temp_label'),
                      steps = 1)

### Save model

In [None]:
model.save('../models/basicmodel.h5')

In [None]:
model.save_weights('../models/basicmodel_weights.h5')

In [None]:
from tensorflow.keras.models import load_model
from tensorflow.keras import backend as K

In [None]:
def sensitivity(y_true, y_pred):
        true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
        return true_positives / (possible_positives + K.epsilon())

def specificity(y_true, y_pred):
        true_negatives = K.sum(K.round(K.clip((1-y_true) * (1-y_pred), 0, 1)))
        possible_negatives = K.sum(K.round(K.clip(1-y_true, 0, 1)))
        return true_negatives / (possible_negatives + K.epsilon())

In [None]:
chk = load_model('../models/basicmodel.h5', 
                 custom_objects={'sensitivity': sensitivity, 'specificity': specificity})