In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [17]:
from google.cloud import storage
import pandas as pd
import numpy as np
from io import StringIO
import tensorflow as tf
from keras.layers import AveragePooling2D, MaxPooling2D
from keras.layers import Dense, Activation, Dropout
from keras.models import Sequential, Model
from keras.layers import Conv2D, Flatten, BatchNormalization, GlobalAveragePooling2D
from keras.utils import to_categorical
from keras.optimizers import SGD
from keras.preprocessing.image import load_img
from keras.preprocessing.image import img_to_array
from keras.preprocessing.image import array_to_img
from keras.preprocessing.image import save_img
from keras.utils import Sequence
from keras import regularizers
from keras.models import load_model
import os
import psutil
from generatorCNN import GeneratorCNN
import glob
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_curve, auc, confusion_matrix
import functions as func
import datasets
import matplotlib as plt
from matplotlib import pyplot
import seaborn as sns
from keras import applications
import models

#from tqdm import tqdm


project='clean-composite-253713'
credentials_path = 'UAH TFM-eac84e3c4295.json'
bucket_name = 'tfmuah2019'

from numpy.random import seed
seed(1)

batch_size = 4
path = 'drive/My Drive/Colab Notebooks/'
num_classes = 2
model_name = 'resnet'

In [2]:
def plot_accuracy(hist):
    pyplot.plot(hist['acc'])
    pyplot.plot(hist['val_acc'])
    pyplot.legend(['train', 'test'], loc='upper left')
    
def plot_error(hist):
    pyplot.plot(hist['loss'])
    pyplot.plot(hist['val_loss'])
    pyplot.legend(['train', 'test'], loc='upper left')
    
def plot_confusion_matrix(y_pred, y_test):
    con_mat = confusion_matrix(y_true=y_test, y_pred=y_pred)
    con_mat_norm = np.around(con_mat / con_mat.sum(axis=1)[:, np.newaxis], decimals=2)
    con_mat_df = pd.DataFrame(con_mat_norm, index = [i for i in range(0,len(con_mat))], columns = [i for i in range(0,len(con_mat))])
    figure = pyplot.figure(figsize=(4, 4))
    sns.heatmap(con_mat_df, annot=True, cmap=plt.cm.Blues, square=True, cbar=False)
    pyplot.tight_layout()
    pyplot.ylabel('True label')
    pyplot.xlabel('Predicted label')
    pyplot.show()
    
def plot_roc_auc(y_pred, y_test):
    pyplot.plot([0, 1], [0, 1], 'k--')
    #n_curvas = 1 if len(y_test) == 1 else y_test.shape[1]
    for i in range(0,1):
        fpr, tpr, thresholds = roc_curve(y_test, y_pred)
        auc_keras = auc(fpr, tpr)  
        pyplot.plot(fpr, tpr, label='Class ' + str(i) + ' (area = {:.3f})'.format(auc_keras))
    pyplot.xlabel('False positive rate')
    pyplot.ylabel('True positive rate')
    pyplot.title('ROC curve')
    pyplot.legend(loc='best')
    pyplot.show()

def plot_roc_auc_threshold(y_pred, y_test):
    pyplot.plot([0, 1], [0, 1], 'k--')
    valores = [0.1,0.3,0.5, 0.7, 0.9]
    for i in valores:
        y_array_pred = output(y_pred, threshold=i)
        y_array_test = output(y_test)
        fpr, tpr, thresholds = roc_curve(y_array_pred, y_array_test)
        auc_keras = auc(fpr, tpr)  
        acc = accuracy(y_array_pred, y_array_test)
        pyplot.plot(fpr, tpr, label='Thrs. ' + str(i) + ' (area= {:.3f})'.format(auc_keras)+' (acc= {:.3f})'.format(acc))
    pyplot.xlabel('False positive rate')
    pyplot.ylabel('True positive rate')
    pyplot.title('ROC curve')
    pyplot.legend(loc='best')
    pyplot.show()

def output(y_pred, threshold=0.5):
    if len(y_pred.shape) > 1:
        if y_pred.shape[1] > 1:
            return [np.argmax(x) for x in y_pred]
        else:
            return [ 1 if x >= threshold else 0 for x in y_pred ]
    else:
        return [ 1 if x >= threshold else 0 for x in y_pred ]

def accuracy(y_pred, y_test):
    acc = np.array([ y_pred[i] == y_test[i] for i in range(0,len(y_test))])
    return acc.sum()/len(acc)

### Convertir CSV a Feather

In [None]:
X_train, y_train, X_validation, y_validation = datasets.local(path, num_train=15, num_validation=15)

In [11]:
files = glob.glob('train_simple_up/*')
df = func.toDF_all(files, reb=False)
X_train, y_train = func.split(df, categories=2)
X_train = X_train / 255
print('X train shape', X_train.shape)
print('y train shape', y_train.shape)

files = glob.glob('validation_simple_up/*')
df = func.toDF_all(files, reb=False)
X_validation, y_validation = func.split(df, categories=2)
X_validation = X_validation / 255
print('X validation shape', X_validation.shape)
print('y validation shape', y_validation.shape)

shape = (X_train.shape[1], X_train.shape[2], X_train.shape[3])

X train shape (15912, 144, 144, 3)
y train shape (15912,)
X validation shape (1976, 144, 144, 3)
y validation shape (1976,)


In [5]:
X = np.concatenate((X_train, X_test), axis=0)
y = np.concatenate((y_train, y_test), axis=0)

#for i in range(0,4):
#    X = np.concatenate((X, X_train, X_test), axis=0)
#    y = np.concatenate((y, y_train, y_test), axis=0)
print('X shape', X.shape)
print('y shape', y.shape)

NameError: name 'X_test' is not defined

### Crear Generadores desde Local para entrenar en Batch

In [None]:
files_train = glob.glob('train_simple_up/*')
files_validation = glob.glob('validation_simple_up/*')
gen_train, gen_validation = datasets.generators(files_train=files_train, files_validation=files_validation, num_classes=num_classes, batch_size=batch_size)
shape = (gen_train.shape()[1], gen_train.shape()[2], gen_train.shape()[3])
shape

### Generar Modelo y Entrenar

In [None]:
import tensorflow as tf
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

In [None]:
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

In [18]:
model = models.model(model_name, shape=shape, num_classes=num_classes)
#model = models.model('load', load_path=path + 'model_resnet_up_batchsize8_3.h5')



Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.2/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
ResNet50 model loaded


In [None]:
history = model.fit(X_train, y_train, batch_size=batch_size, epochs=50, verbose=1, validation_data=(X_validation,y_validation))

## Entrenar con generadores desde local
#history = model.fit_generator(gen_train, validation_data=gen_validation, epochs=2, verbose=1, use_multiprocessing=True)

Train on 15912 samples, validate on 1976 samples
Epoch 1/50

### Resultados

In [None]:
df_history = pd.read_csv(path + 'history_' + model_name + '_up_batchsize8_3.csv')

In [None]:
df_history = pd.DataFrame(history.history)

In [None]:
df_history = pd.concat([df_history, pd.DataFrame(history.history)], ignore_index=True)

In [None]:
df_history.head()

In [None]:
plot_accuracy(df_history)

In [None]:
plot_error(df_history)

In [None]:
# Cargar dataset balanceado de test
df = func.toDF_all(glob.glob(path + 'test_simple_up/*'), reb=False)
X_test, y_test = func.split(df, categories=num_classes)
X_test = X_test / 255


In [None]:
y_pred = model.predict(X_test)


In [None]:
y_array_pred = output(y_pred, threshold=0.5)
y_array_test = output(y_test)
plot_confusion_matrix(y_array_pred, y_array_test)

In [None]:
plot_roc_auc_threshold(y_pred, y_test)

In [None]:
acc = np.array([ y_array_pred[i] == y_array_test[i] for i in range(0,len(y_array_test))])
print("Accuracy: ", acc.sum()/len(acc) )

In [None]:
model.save(path + 'model_' + model_name + '_up_batchsize8_3.h5')
model.save_weights(path + 'weight_' + model_name + '_up_batchsize8_3.h5')
df_history.to_csv(path + 'history_' + model_name + '_up_batchsize8_3.csv')