In [None]:
import pandas as pd
import keras
import numpy as np
import sys
sys.path.append('../')
from src.models import three_input_model
from src.generator import generator_three_inputs
sys.path.append('/notebook')
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
% matplotlib inline

In [None]:
# import dataframe with filenames and labels
sample = pd.read_csv('../labels/training_labels_updated_111219.csv')
sample['temp_label'] = sample['final_label'].apply(lambda x: np.round(x))
sample['temp_label'] = sample['temp_label'].astype('int').astype('str')
sample['final_label'] =  sample['final_label'].astype('str')

train_data, test_data = train_test_split(sample, test_size = 0.25, random_state = 100)

In [None]:
# artificially oversample the minority class
zero_class = train_data[train_data.temp_label == '0']
train_data = (train_data.append(zero_class)).append(zero_class)

In [None]:
[x for x in chk2 if x not in chk1]

In [None]:
### test tabular
tabular_df = sample[['MBL', 'ADDR_NUM']]
tabular_df['test_col'] = np.random.normal(size=tabular_df.shape[0])
tabular_df['test_col2'] = np.random.normal(2,3,size=tabular_df.shape[0])
tabular_df.drop(['ADDR_NUM'], axis = 1, inplace = True)

In [None]:
# input parameters
batch_size = 8
epochs = 5

In [None]:
model = three_input_model(n_tabular_cols = 2, gsv_image_dim = (128, 128, 3), sat_image_dim = ((128, 128, 4)), 
                          n_classes = 1, loss_fn = 'binary_crossentropy')
model.summary()

In [None]:
#implement class weights as an easy first attempt to handle imbalanced class problem
# class_weight = {1.0: 1,
#                 0.0: 2}

In [None]:
history = model.fit_generator(
    
                # train 
                generator_three_inputs(train_data, tabular_df, [ 'test_col', 'test_col2'],                       
                           aerial_dir = '../data/training/aerial_images/', 
                           gsv_dir ='../data/training/sv_images/', 
                           batch_size = batch_size, 
                           gsv_image_dim = (128,128, 3), aer_image_dim = (128,128, 4) , 
                           y_column = 'temp_label'),
    
                # validation
                validation_data = generator_three_inputs(test_data, tabular_df, ['test_col', 'test_col2'],                       
                           aerial_dir = '../data/training/aerial_images/', 
                           gsv_dir ='../data/training/sv_images/', 
                           batch_size = batch_size, 
                           gsv_image_dim = (128,128, 3), aer_image_dim = (128,128, 4) , 
                           y_column = 'temp_label'),
    
                validation_steps = 5,
                steps_per_epoch=np.ceil(sample.shape[0]//batch_size), 
                epochs=epochs,
#                 class_weight = class_weight,
                verbose=1)

In [None]:
plt.figure(figsize = (18,6))
plt.subplot(1,2,1); plt.title('Training', fontsize = 20)
plt.plot(history.history['acc'], label = 'acc')
plt.plot(history.history['specificity'], label = 'specificity')
plt.plot(history.history['sensitivity'], label = 'sensitivity')
plt.axhline((train_data.temp_label).astype('float32').sum()/train_data.shape[0], ls = '--', c = 'r', label = 'baseline')
plt.xticks(np.arange(0,epochs)); plt.xlabel('epoch')
plt.legend()
plt.subplot(1,2,2); plt.title('Validation', fontsize = 20)
plt.plot(history.history['val_acc'], label = 'val acc')
plt.plot(history.history['val_specificity'], label = 'val specificity')
plt.plot(history.history['val_sensitivity'], label = 'val sensitivity')
plt.axhline((test_data.temp_label).astype('float32').sum()/test_data.shape[0], ls = '--', c = 'r', label = 'baseline')
plt.xticks(np.arange(0,epochs));plt.xlabel('epoch')
plt.legend();

In [None]:
addresses_gsv_filename = ['1_ESSEX_ST.jpg', '8_GILMAN_ST.jpg', '9_MELVILLE_RD.jpg','10_CENTRAL_ST.jpg',
                         '14_MANSFIELD_ST.jpg']
pred_sample = sample[sample.gsv_filename.isin(addresses_gsv_filename)]
pred_sample

In [None]:
model.predict_generator(
    generator_three_inputs(pred_sample, tabular_df, [ 'test_col', 'test_col2'],                       
                           aerial_dir = '../data/training/aerial_images/', 
                           gsv_dir ='../data/training/sv_images/', 
                           batch_size = batch_size, 
                           gsv_image_dim = (128,128, 3), aer_image_dim = (128,128, 4) , 
                           y_column = 'temp_label'),
    
                        
                      steps = 1 )

In [None]:
# Save the model architecture
with open('../models/model_architecture.json', 'w') as f:
    f.write(model.to_json())

In [None]:
# save basic model
import pickle
with open('../models/imageandtabular_modelhistory.pkl', 'wb') as file_pi:
    pickle.dump(history.history, file_pi)

In [None]:
model.save('../models/imageandtabular_model.h5', save_format='tf')

In [None]:
model.save_weights('../models/imageandtabular_model_weights.h5')

In [None]:
model = combined_cnn(gsv_image_dim = (128,128, 3), sat_image_dim = ((128,128, 4)), 
                     n_classes = 1)