In [1]:
"""
OBJETIVO : Definir y entrenar lso modelos. Ademas, recolectar los resultados para analizarlos mas adelante  
V2 : Se agrega redes pre entrenadas
"""
Autor='Diego Paredes'

In [2]:
#Manejo de Datos
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import boxcox
from sklearn.preprocessing import MinMaxScaler

#Machine learning
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import datasets, layers, models
from tensorflow.keras.callbacks import Callback, ReduceLROnPlateau
import tensorflow_addons as tfa
from tensorflow.keras.initializers import HeNormal
from tensorflow.keras.layers import BatchNormalization


from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from sklearn.metrics import confusion_matrix

#Librerias estandar (Extras)
import re
import os
import time
import random
from datetime import datetime
import pickle


TensorFlow Addons (TFA) has ended development and introduction of new features.
TFA has entered a minimal maintenance and release mode until a planned end of life in May 2024.
Please modify downstream libraries to take dependencies from other repositories in our TensorFlow community (e.g. Keras, Keras-CV, and Keras-NLP). 

For more information see: https://github.com/tensorflow/addons/issues/2807 

 The versions of TensorFlow you are currently using is 2.8.0 and is not supported. 
Some things might work, some things might not.
If you were to encounter a bug, do not file an issue.
If you want to make sure you're using a tested and supported configuration, either change the TensorFlow version or the TensorFlow Addons's version. 
You can find the compatibility matrix in TensorFlow Addon's readme:
https://github.com/tensorflow/addons


In [3]:
"""
DEFINIMOS EL PATH DEL PROYECTO 
"""
with open('../../path_base.txt') as f:
    path_base = f.read()
path_base

'C:/Users/Shounen/Desktop/Ciclo XI/Tesis 2/FinalTesis/Tesis2-DiegoParedes'

In [4]:
"""
Variables generales
"""
path_imagenes = 'F:/GOES/'     

products = ['C13','C07','C08']
times   = ['10','20','30','40','50','00']


In [5]:
!python --version
print(tf. __version__)
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

#Limitamos el GPU, en caso se necesite
gpus = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_virtual_device_configuration(gpus[0],
                                                        [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=4096)])

Python 3.9.112.8.0

Num GPUs Available:  1


In [6]:
"""
Metodos para realizar el entrenamient - evaluacion del modelo
"""

'\nMetodos para realizar el entrenamient - evaluacion del modelo\n'

In [7]:
def getConv3D_func(p,run,input_1, shape=()):   
    
    x = input_1
    if p['normLayer'][run]:
        x = tf.keras.layers.Rescaling(1./65536)(input_1)
        
    x = tf.keras.layers.Conv3D(32, (3,3,3), input_shape=(shape),padding='same',  kernel_initializer=HeNormal())(x)
    if p['batchNorm'][run]:
        x = BatchNormalization()(x)
    
    x = tf.keras.layers.LeakyReLU(alpha=0.01)(x)
    
    for iConv in range(p['cnn_cant'][run]):
        units = p['cnn_units'][run][iConv]
        maxPool = p['maxPool'][run][iConv]
        droupout = p['droupout'][run][iConv]        
        
        x = tf.keras.layers.Conv3D(units, (3,3,3), padding='same',  kernel_initializer=HeNormal())(x)
        if p['batchNorm'][run]:
            x = tf.keras.layers.BatchNormalization()(x)
            
        x = tf.keras.layers.LeakyReLU(alpha=0.01)(x)
        if maxPool:
            x = tf.keras.layers.MaxPooling3D()(x)
        if droupout:
            x = tf.keras.layers.Dropout(droupout)(x)
   
    output = tf.keras.layers.GlobalMaxPool3D()(x)
    
    return output 

In [8]:
def getConv2D_func(p,run,input_1, shape=()):   
    
    x = input_1
    if p['normLayer'][run]:
        x = tf.keras.layers.Rescaling(1./65536)(input_1)
        
    x = tf.keras.layers.Conv2D(32, 3, input_shape=(shape),padding='same', kernel_initializer=HeNormal())(x)    
    if p['batchNorm'][run]:
        x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.LeakyReLU(alpha=0.01)(x)
    
    for iConv in range(p['cnn_cant'][run]):
        units = p['cnn_units'][run][iConv]
        maxPool = p['maxPool'][run][iConv]
        droupout = p['droupout'][run][iConv]        
        
        x = tf.keras.layers.Conv2D(units, 3, padding='same', kernel_initializer=HeNormal())(x)
        if p['batchNorm'][run]:
            x = tf.keras.layers.BatchNormalization()(x)
        
        x = tf.keras.layers.LeakyReLU(alpha=0.01)(x)
        if maxPool:
            x = tf.keras.layers.MaxPooling2D()(x)
        if droupout:
            x = tf.keras.layers.Dropout(droupout)(x)
    
    output = tf.keras.layers.GlobalMaxPool2D()(x)
    
    return output 

In [9]:
def getConv2D(p,run):    
    model = keras.Sequential()
    shape = (p['margen'][run],p['margen'][run],p['canales'][run])
        
    if p['normLayer'][run]:
        model.add(tf.keras.layers.Rescaling(1./65536))    
        
    model.add(tf.keras.layers.Conv2D(32, (3,3), input_shape=(p['margen'][run],p['margen'][run],p['canales'][run]),padding='same', kernel_initializer=HeNormal()))    
    if p['batchNorm'][run]:
        model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.LeakyReLU(alpha=0.01))       
    
    
    for iConv in range(p['cnn_cant'][run]):
        units = p['cnn_units'][run][iConv]
        maxPool = p['maxPool'][run][iConv]
        droupout = p['droupout'][run][iConv]        
        
        model.add(tf.keras.layers.Conv2D(units, (3,3), padding='same', kernel_initializer=HeNormal()))
        if p['batchNorm'][run]:
            model.add(tf.keras.layers.BatchNormalization())
        model.add(tf.keras.layers.LeakyReLU(alpha=0.01))      
    
        if maxPool:
            model.add(tf.keras.layers.MaxPooling2D())
        if droupout:
            model.add(tf.keras.layers.Dropout(droupout))       
   
    
    model.add(tf.keras.layers.GlobalMaxPool2D())
    
    return model    

In [10]:
def getPreTrainedModel(p, run, shape=()):
    print('Se utilizara red pre entreanda : ',p['pre_trained'][run])
    convnet = tf.keras.applications.ResNet50(weights='imagenet', include_top=False , input_shape=shape)
    for capa in convnet.layers:
        capa.trainable = False
        
    
    return convnet

In [11]:
def crearModelo(p,run, redTipo):        

    shape = (p['tiempos'][run],p['margen'][run],p['margen'][run],p['canales'][run])    
    print(shape)
    if p['rnn_tipo'][run] == 'LSTM':
        input_1 = tf.keras.layers.Input(shape=shape)
        if p['pre_trained'][run]:
            convnet= getPreTrainedModel(p, run)
        else:
            convnet = getConv2D(p,run)
        timeD = tf.keras.layers.TimeDistributed(convnet)(input_1)
        timeD = layers.TimeDistributed(tf.keras.layers.Flatten())(timeD)
        _rnn =  tf.keras.layers.LSTM(p['rnn_units'][run])(timeD)
        listConcat = [_rnn]
    elif p['rnn_tipo'][run] == 'CONV3D':
        input_1 = tf.keras.layers.Input(shape=shape)
        convnet = getConv3D_func(p,run,input_1)    
        listConcat = [convnet]        
    elif p['rnn_tipo'][run] == 'CONV2D':
        input_1 = tf.keras.layers.Input(shape=shape[1:])
        if p['pre_trained'][run]:
            convnet= getPreTrainedModel(p, run, shape=shape[1:])
            convnet = tf.keras.layers.Flatten()(convnet.output)
        else:
            #convnet = getConv2D(p,run)
            convnet = getConv2D_func(p,run,input_1,shape=shape[1:])   
        listConcat = [convnet]   
    else:
        print(f"ERROR: No se especifico un tipo de red correcto... {p['rnn_tipo'] }")
        
    listInputs = [input_1]
    
    if len(p['inputs'][run])>1:
        #Agregamos los otros atrbutos        
        for attr in p['inputs'][run][1:]:
            # The other input
            input_x = tf.keras.layers.Input(shape=(1,))
            listConcat.append(input_x)
            listInputs.append(input_x)

            
        # Concatenate
        final = tf.keras.layers.Concatenate()(listConcat) 
    
    
    
    dense_capas = [final]
    for iDense in range(p['dense_cant'][run]):
        units = p['dense_units'][run][iDense]
        droupout =  p['dense_droupout'][run][iDense]
        
        params_dense = {
            'units' : units,            
        }
        
        if p['dense_tipo'][run] == 'RELU':
            params_dense['activation'] = tf.keras.activations.relu
        if p['dense_tipo'][run] == 'SELU':
            params_dense['activation'] = tf.keras.activations.selu
            #dense_capas.append(tf.keras.layers.Dense(units=units, activation=tf.keras.activations.selu)(dense_capas[-1]))
        if p['kernel_regularizer'][run]:
            params_dense['kernel_regularizer'] = tf.keras.regularizers.l2(p['kernel_regularizer'][run][iDense])
        dense_capas.append(tf.keras.layers.Dense(**params_dense)(dense_capas[-1]))
        
        
        if droupout:
            dense_capas.append(tf.keras.layers.Dropout(droupout)(dense_capas[-1]))
    
    # output
    if redTipo == 'Regresion':
        output = tf.keras.layers.Dense(units=1)(dense_capas[-1])      
    elif redTipo == 'Clasificacion':
        output = tf.keras.layers.Dense(units=1,activation=tf.keras.activations.sigmoid)(dense_capas[-1])
    else:
        print(f"No se pudo crear el modelo outputs no esta bien definido {p['redTipo']}")
        return -1      
    
    full_model = tf.keras.Model(inputs=listInputs, outputs=[output])

    return full_model

In [12]:
def getMetrics(params, HP, run):
    redTipo = params['redTipo']
    paciencia = params['paciencia']
    
    lr = HP['lr'][run]    
    
    if redTipo == 'Clasificacion':    
        if HP['optimizer'][run] == 'Adam':
            if HP['lr_decay'][run]>0:
                lr_decay = keras.optimizers.schedules.ExponentialDecay(
                    lr,
                    decay_steps=2000,  # Ajusta el número de pasos según tu conjunto de datos
                    decay_rate=HP['lr_decay'][run],
                    staircase=True
                )
                optimizer = keras.optimizers.Adam(learning_rate=lr_decay)   
            else:
                optimizer = keras.optimizers.Adam(learning_rate=lr)   
        
        if HP['optimizer'][run] == 'RMSprop':
            optimizer = keras.optimizers.RMSprop(learning_rate=lr)   
            
        if HP['loss'][run] == 'binary_crossentropy':
            loss_fn= keras.losses.BinaryCrossentropy()
        
        train_acc_metric = keras.metrics.BinaryCrossentropy()
        val_acc_metric = keras.metrics.BinaryCrossentropy()
        
        if paciencia:
            early_stopping = keras.callbacks.EarlyStopping(monitor="val_auc", patience=paciencia, mode="max")  
 
        
        metrics = ['acc', keras.metrics.TruePositives(),
                         keras.metrics.TrueNegatives(),
                         keras.metrics.FalsePositives(),
                         keras.metrics.FalseNegatives(),
                         keras.metrics.AUC(),
                  
                  ]
        

    elif redTipo == 'Regresion':
        optimizer = keras.optimizers.Adam(learning_rate=lr)
        loss_fn=keras.losses.MeanSquaredError()
        train_acc_metric = keras.metrics.MeanSquaredError()
        val_acc_metric = keras.metrics.MeanSquaredError()
        if paciencia:
            early_stopping = keras.callbacks.EarlyStopping(monitor="val_mse", patience=paciencia, mode="max")                                            
        metrics = ['mse']
        
        
    else:
        print('No se pudo crear las metricas')
        return -1    
         
        
    logs = Callback()
    callbacks = [logs]                     
    if paciencia:
        callbacks.append(early_stopping)
        
    metrics = {'optimizer': optimizer, 'loss_fn':loss_fn,'train_acc_metric': train_acc_metric,
               'val_acc_metric': val_acc_metric, 'metrics': metrics,'callbacks': callbacks}
    
    return metrics
        

In [13]:
def applyDA(img, DA):
    # DA = 0 , imagen original
    
    # DA = 1 , flip horizontal (FH)
    # DA = 2 , flip vertical (FV)
    # DA = 3 , Flip diagonal (FV + FH)
    # DA = 4 , Rotacion 90° (R90)
    # DA = 5 , Rotacion 270° (R270)
    # DA = 6 , FH + Rotacion 90° (FH + R90)
    # DA = 7 , FH + Rotacion 270° (FH + R270)    
    
    
    if DA == 1:
        img= tf.image.flip_left_right(img)  # FH
        return img
    elif DA == 2:
        return tf.image.flip_up_down(img)     # FV
        return img
    elif DA == 3:
        img = tf.image.flip_left_right(img)   # FH
        img= tf.image.flip_up_down(img)     # FV
        return img
    elif DA == 4:
        img = tf.image.rot90(img, k=1)        # R90
        #img = tfa.image.rotate(img, tf.constant(np.pi/2))        
        return img
    elif DA == 5:
        img = tf.image.rot90(img, k=3)        # R270
        #img = tfa.image.rotate(img, tf.constant(np.pi/2*3))
        return img
    elif DA == 6:
        img = tf.image.flip_left_right(img)   # FH
        img = tf.image.rot90(img, k=1)        # R90
        #img = tfa.image.rotate(img, tf.constant(np.pi/2))
        return img
    elif DA == 7:
        img = tf.image.flip_left_right(img)   # FH
        img = tf.image.rot90(img, k=3)        # R270
        #img = tfa.image.rotate(img, tf.constant(np.pi/2*3))  
        return img
    else:
        return img

In [14]:
def get_sat_time_steps(n):    
    if n==2:
        return [5,0]
    if n==3:
        return [5,3,0]
    if n==4:
        return [5,3,2,0]
    if n==5:
        return [5,3,2,1,0]
    if n==6:
        return [5,4,3,2,1,0]
    
    return [5]

In [15]:
#Transformamos un filename tensor en una imagen
def read_png_file(item, value,p, run, path_base, DA=0, _3D=False, redTipo='Clasificacion'):
    # imagenData[0] = XO
    # imagenData[1] = XA
    # imagenData[2] = Fecha
    imagenData = tf.strings.split(item['imagen'], sep='--')
    
    
    size = int(p['margen'][run] / 2)
    timeJoin = []
    for j in get_sat_time_steps(p['tiempos'][run]):
        filename = path_base + 'PNG/' + imagenData[2] + '/' + imagenData[2] + '_' + str(j) + '.png'        
        # shape=(1200, 950, 3)
        image_string = tf.io.read_file(filename)
        img_decoded = tf.io.decode_png(image_string, dtype=tf.uint16, channels=3)       
        
        
        #if p['normLayer'][run]:            
        #    img_decoded = tf.cast(img_decoded, dtype=tf.float32) / tf.constant(65535, dtype=tf.float32)
        
        # Cortamos imagen
        
        img_decoded = img_decoded[int(imagenData[1]) - size:int(imagenData[1]) + size,
                                      int(imagenData[0]) - size:int(imagenData[0]) + size,
                                      0:p['canales'][run]]
        if DA:
            img_decoded = applyDA(img_decoded, item['DA'])
        
        timeJoin.insert(0,img_decoded)
        
        #timeJoin.insert(0,img_decoded[int(imagenData[1]) - size:int(imagenData[1]) + size,
        #                              int(imagenData[0]) - size:int(imagenData[0]) + size,
        #                              0:p['canales'][run]])
 
        
    if p['tiempos'][run]==1:
        imagenData = tf.reshape(timeJoin[0],(p['margen'][run],p['margen'][run],p['canales'][run]))
    else:
        if _3D:        
            img = tf.reduce_mean( timeJoin , axis=0 )
            imagenData = tf.reshape(img,(p['margen'][run],p['margen'][run],p['canales'][run]))
        else:
            img = tf.stack(timeJoin, axis=0)
            imagenData = tf.reshape(img,(p['tiempos'][run],p['margen'][run],p['margen'][run],p['canales'][run]))
        
    
    
    if len(p['inputs'][run]) == 1:
        return imagenData, int(value)
    
    item['imagen'] = imagenData
    itemL = []
    for inpL in p['inputs'][run]:
        itemL.append(item[inpL])
    
    if redTipo=='Regresion':
        return tuple(itemL), float(value)
    else:     
        return tuple(itemL), int(value)


In [16]:
def splitDataset(p,HP,run, path_imagenes):
    
    test = pd.read_csv(p['dsVal'])
    train = pd.read_csv(p['dsTrain'])
    
    # Estandarizamos los valores


    scaler = MinMaxScaler()

    precip_data_truncated = np.clip(train['dato'], 0, p['max_data'])
    train['_dato'] = scaler.fit_transform(np.array(precip_data_truncated).reshape(-1, 1))
    precip_data_truncated = np.clip(test['dato'], 0, p['max_data'])
    test['_dato'] = scaler.transform(np.array(precip_data_truncated).reshape(-1, 1))
    
    
    train['_umb1'] = scaler.fit_transform(np.array(train['umb1']).reshape(-1, 1))
    test['_umb1'] = scaler.transform(np.array(test['umb1']).reshape(-1, 1))
    
    train['_altura'] = scaler.fit_transform(np.array(train['altura']).reshape(-1, 1))
    test['_altura'] = scaler.transform(np.array(test['altura']).reshape(-1, 1))


    if p['dataset']:
        print('Se escojera una parte del Dataset')
        train =  train.sample(frac=p['dataset'])
        test = test.sample(frac=p['dataset'])
    
               
    inputsList = {}
    inputsListTest = {}
    
    # Agregamos un atributo para indicar que el dato va realizar DA
    if p['DA']:        
        inputsList['DA'] = train['DA'].tolist() 
        
    print(f'Tamaño del dataset: Train {len(train)}  - Val {len(test)}') 
    
    for inp in HP['inputs'][run]:
        inputsList[inp] = train[inp].tolist()  
        inputsListTest[inp] = test[inp].tolist()  
    
    
    train_dataset = tf.data.Dataset.from_tensor_slices(((inputsList),train[HP['outputs'][run]].tolist()))           
    val_dataset = tf.data.Dataset.from_tensor_slices(((inputsListTest),test[HP['outputs'][run]].tolist()))     
    
    train_dataset = train_dataset.map(lambda x ,y : read_png_file(x,y,HP,run,path_imagenes,p['DA'],p['meanMatrizImagen'],p['redTipo']))
    val_dataset = val_dataset.map(lambda x ,y : read_png_file(x,y,HP,run,path_imagenes,False,p['meanMatrizImagen'],p['redTipo']))
       
    
    train_dataset = train_dataset.batch(p['batch'])
    val_dataset = val_dataset.batch(p['batch'])
    
    
    return train_dataset, val_dataset

In [17]:
def crearDir(path, newDir):
    try:
        pathT = os.path.join(path, newDir)
        os.mkdir(pathT)
        return pathT
    except FileExistsError:
        return pathT
        pass
    except:
        print(f"No se pudo crear el directorio: {newDir}")
        pritn(f'Path base: {path}')
        pritn(f'Nuevo    : {newDir}')        
        return None

In [18]:
def iniciarProyect(path_base, params, HP):
    repDir = crearDir(f'{path_base}/Archivos/Resultados', params["redTipo"])   
    repDir = crearDir(repDir, params["directory"])
    repDir = crearDir(repDir, params["Proyect"])
    
    excelFile = f'{repDir}/Stats-{params["Proyect"]}.xlsx'
    if os.path.exists(excelFile):
        print('Ya existe el archivo Excel !')
    if params['record'] and not os.path.exists(excelFile):
        writer = pd.ExcelWriter(excelFile, engine = 'xlsxwriter')
        #joinParams = params | HP
        keys_values = params.items()
        strParams = {str(key): str(value) for key, value in keys_values}        
        print(strParams)
        pd.DataFrame(strParams,index=[0]).to_excel(writer, sheet_name = 'Informacion')
        
        pd.DataFrame(HP).to_excel(writer, sheet_name = 'Informacion',startrow=3)
        
        writer.save()  
        
        
    return repDir , excelFile

In [19]:
def getCM(logs):
    lKeys = list(logs.keys())
    
    try:
        TN = int(logs[[x for x in lKeys if 'val_true_negatives' in x][0]])
        TP = int(logs[[x for x in lKeys if 'val_true_positives' in x][0]])
        FN = int(logs[[x for x in lKeys if 'val_false_negatives' in x][0]])
        FP = int(logs[[x for x in lKeys if 'val_false_positives' in x][0]])
    except:
        print(f'\nNo se pudo leer keys para la matriz de confucion en logs : {lKeys}')
        print(f'Se intento leer: val_true_negatives,val_true_positives, val_false_negatives y val_false_positives')
    
       
    y_true =  [0]*TN + [1]*TP + [1]*FN + [0]*FP
    _y_pred = [0]*TN + [1]*TP + [0]*FN + [1]*FP
    
    return TN, FP, FN, TP, np.array(y_true), np.array(_y_pred)

In [20]:
class CustomCB(Callback):
    """ Custom callback to compute metrics at the end of each training epoch"""
    def __init__(self, val_ds=None, WANDB=True):     
        self.val_ds = val_ds  
        self.history = {}
        self.wandb = WANDB
   
   
    def on_epoch_end(self, epoch, logs={}):  
        TN, FP, FN, TP, y_true, _y_pred = getCM(logs)
        
        self.history.setdefault('epoch', []).append(epoch)
        
        self.history.setdefault('loss', []).append(logs['loss']) 
        self.history.setdefault('acc', []).append(logs['acc'])  
        self.history.setdefault('val_loss', []).append(logs['val_loss']) 
        self.history.setdefault('val_acc', []).append(logs['val_acc']) 
        
        self.history.setdefault('auc', []).append(logs['auc']) 
        self.history.setdefault('val_auc', []).append(logs['val_auc']) 
        
        
        self.history.setdefault('val_TN', []).append(TN) 
        self.history.setdefault('val_FP', []).append(FP)
        self.history.setdefault('val_FN', []).append(FN) 
        self.history.setdefault('val_TP', []).append(TP) 
        
            
        if self.wandb:
            wandb.log({"conf_mat" : wandb.plot.confusion_matrix(probs=None,
                                    preds=_y_pred, y_true=y_true,
                                    class_names=[0,1]),                   
                       'val_TN' :TN,'val_FN' :FN,'val_TP' :TP,'val_FP' :FP,
                       'val_acc': logs['val_acc'],'loss' : logs['loss'],
                       'val_loss': logs['val_loss'],'acc' : logs['acc']                  
                      })


In [21]:
def crearCallbacks(statsDir, HP,run, metricas, p):
    CB = metricas['callbacks']
    
    idModel = datetime.today().strftime("%Y%m%d_%H%M%S")
    checkpoint_path = statsDir + '/Model_{epoch:02d}_' + f'{HP["rnn_tipo"][run]}_{HP["outputs"][run]}_{idModel}.hdf5' 
    cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,verbose=1)
    
    # Iniciamos WANDB
    if p['record']:        
        CB.append(cp_callback)   
    
    if HP['lr_schedule'][run]>0:
        lr_schedule = ReduceLROnPlateau(
            monitor='val_loss',    # Métrica a monitorear
            factor=HP['lr_schedule'][run],            # Multiplicador para reducir la tasa de aprendizaje (la nueva LR será lr * factor)
            patience=5,            # Número de épocas sin mejoras antes de reducir la LR
            min_lr=1e-6,           # Tasa de aprendizaje mínima
            verbose=1              # Para imprimir cuando se reduzca la LR
        )
        CB.append(lr_schedule)
        
    if p['WANDB']:
        config = dict(learning_rate=HP['lr'][run], epochs = p['epocas'],
             batch_size = p['batch'],architecture="CNN", 
             num_classes = 2,)
        wandb.init(project=f'{p["Proyect"]}',            
                   config=config,
                   name= f'Ex_({HP["canales"][run]}-{HP["tiempos"][run]}-{HP["margen"][run]})_{idModel}')   
                                               
    return CB, idModel

In [22]:
def trainModel(path_base,path_imagenes,params,HP, criterio):        
    """ Creamos los directorios para los reportes  """             
    repDir, statsFile  = iniciarProyect(path_base, params, HP)
    print(f'DIRECTORIO BASE : {repDir}')
    
    
    """ Comenzamos el entrenamiento """ 
    # Una iteracion por cada Hiperparametro (HP) que existe
    ds_i = 0  
    resultados = [] 
    for run in range(HP['runs']):
        ds_i += 1                 
        print(f'Inicio de la prueba N°: {(run+1)}/{HP["runs"]}')         
        print(f'- Nombre del Proyecto : {params["Proyect"]}')
        print(f'- Batch size          : {params["batch"]}')
        print(f'- Criterio {criterio} : {HP[criterio][run]}')
        print('__________________________________________________')        
        
        """ LEEMOS EL DATASET A USAR  """     
        train_dataset, val_dataset = splitDataset(params,HP,run, path_imagenes)
        

        """ DEFINIMOS Y INICAMOS EL MODELO """
        model = crearModelo(HP,run,params['redTipo']) 
        metricas = getMetrics(params, HP, run)
        model.compile(optimizer=metricas['optimizer'],loss=metricas['loss_fn'],metrics=metricas['metrics'],)
        
                
        display(model.summary())
    
        """ CALLBACKS """
        CB, idModel = crearCallbacks(repDir, HP,run, metricas, params)           
        if params['redTipo'] == 'Clasificacion':
            hist =  CustomCB(val_dataset, params['WANDB'])

            
        """ ENTRENAMIENTO """
        history = model.fit(train_dataset,batch_size=params['batch'],                            
                            epochs=params['epocas'],callbacks=CB,
                            validation_data=val_dataset,
                            validation_batch_size=params['batch'],
                            verbose=1)
        
        
        
        
        """ GUARDAMOS REPORTES """                
        
        
        try:
            resultados.append(history.history)
            # Guardamos las estadisticas
            if params['record']:                
                with pd.ExcelWriter(statsFile, mode="a", engine="openpyxl", if_sheet_exists='overlay') as writer:                    
                    tempDF = pd.DataFrame(history.history)
                    if params['redTipo'] == 'Clasificacion':
                     
                        tempDF.columns = [x.replace('true_','T').replace('false_','F').replace('positives','P').replace('negatives','N') for x in tempDF.columns]
                        tempDF.columns = [re.sub('_[0-9]','',x) for x in tempDF.columns]
                        #tempDF.columns = ['loss', 'acc', 'TP', 'TN', 'FP','FN',
                        #                  'val_loss','val_acc','val_TP','val_TN','val_FP','val_FN']
                    tempDF.to_excel(writer,startrow=0,
                                    sheet_name=f'{run}-{idModel}')
            if params['WANDB']:
                wandb.finish()            
        except Exception as e:
            print(f'ERROR: No se pudo guardar los resulatdos test: {run}', str(e))
            try:
                with open(f'{repDir}/Test-{run}-{idModel}.csv', 'wb') as file_pi:
                    pickle.dump(history.history, file_pi)
            except Exception:
                print(traceback.format_exc())
                print(f'ERROR: No se pudo guardar el hist temporal')
                return history
            
        
    return resultados

In [23]:
def fix_result_file(path_base,path_imagenes,params,HP, criterio, files):        
    """ Creamos los directorios para los reportes  """             
    repDir, statsFile  = iniciarProyect(path_base, params, HP)
    print(f'DIRECTORIO BASE : {repDir}')
    
    
    """ Comenzamos el entrenamiento """ 
    # Una iteracion por cada Hiperparametro (HP) que existe
    ds_i = 0  
    resultados = [] 
    #for run in range(HP['runs']):
    for run , file in enumerate(files):
        
        ds_i += 1                 
        print(f'Inicio de la prueba N°: {(run+1)}/{HP["runs"]}')         
        print(f'- Nombre del Proyecto : {params["Proyect"]}')
        print(f'- Batch size          : {params["batch"]}')
        print(f'- Criterio {criterio} : {HP[criterio][run]}')
        print('__________________________________________________')        
        
        """ LEEMOS EL DATASET A USAR  """     
        train_dataset, val_dataset = None, None #splitDataset(params,HP,run, path_imagenes)
        

        """ DEFINIMOS Y INICAMOS EL MODELO """
        model = crearModelo(HP,run,params['redTipo']) 
        metricas = getMetrics(params, HP, run)
        model.compile(optimizer=metricas['optimizer'],loss=metricas['loss_fn'],metrics=metricas['metrics'],)

    
        """ CALLBACKS """
        idModel = file.split('\\')[-1].split('-')[-1].split('.')[0]
        #CB, idModel = crearCallbacks(repDir, HP,run, metricas, params)           
        #if params['redTipo'] == 'Clasificacion':
        #    hist =  CustomCB(val_dataset, params['WANDB'])

            
        """ ENTRENAMIENTO """
        with open(file, "rb") as input_file:
            history = pickle.load(input_file)
       
        """
        model.fit(train_dataset,batch_size=params['batch'],                            
                            epochs=params['epocas'],callbacks=CB,
                            validation_data=val_dataset,
                            validation_batch_size=params['batch'],
                            verbose=1)
        """
        
        
        
        """ GUARDAMOS REPORTES """                
        
        
        try:
            resultados.append(history)
            # Guardamos las estadisticas
            if params['record']:                
                with pd.ExcelWriter(statsFile, mode="a", engine="openpyxl", if_sheet_exists='overlay') as writer:                    
                    tempDF = pd.DataFrame(history)
                    if params['redTipo'] == 'Clasificacion':
                        tempDF.columns = [x.replace('true_','T').replace('false_','F').replace('positives','P').replace('negatives','N') for x in tempDF.columns]
                        tempDF.columns = [re.sub('_[0-9]','',x) for x in tempDF.columns]
                        #['loss', 'acc', 'TP', 'TN', 'FP','FN',
                        #                  'val_loss','val_acc','val_TP','val_TN','val_FP','val_FN']
                    tempDF.to_excel(writer,startrow=0,
                                    sheet_name=f'{run}-{idModel}')
            if params['WANDB']:
                wandb.finish()            
        except Exception as e:
            print(f'ERROR: No se pudo guardar los resulatdos test: {run}', str(e))
            
        
    return resultados

In [24]:
#%%time
#resultado = trainModel(path_base,path_imagenes,p_train,hiperparams, 'lr')

In [1]:
import pandas as pd
path = r'C:\Users\Shounen\Desktop\Ciclo XI\Tesis 2\FinalTesis\Tesis2-DiegoParedes\Archivos\Dataset\Clasificacion\Entrenamiento\V7_SplitCon_DA4\CLASE_TrainDS_1.csv'
df_train = pd.read_csv(path)
df_train['codigo'].nunique()

113

In [2]:
import pandas as pd
path = r'C:\Users\Shounen\Desktop\Ciclo XI\Tesis 2\FinalTesis\Tesis2-DiegoParedes\Archivos\Dataset\Clasificacion\Validacion\ClaseV7_ValidacionDS.csv'
df_val = pd.read_csv(path)
df_val['codigo'].nunique()

103

In [3]:
import pandas as pd
path = r'C:\Users\Shounen\Desktop\Ciclo XI\Tesis 2\FinalTesis\Tesis2-DiegoParedes\Archivos\Dataset\Clasificacion\Pruebas\ClaseV7_PruebasDS.csv'
df_test = pd.read_csv(path)
df_test['codigo'].nunique()

100

In [8]:
df_test['clase'].value_counts()

1    1262
0      12
Name: clase, dtype: int64

In [28]:
#pd.concat([df_train,df_val,df_test], axis=0)#['codigo'].nunique()

In [29]:
"""
 Definimos las varibles para las iteraciones
"""

modelTipo = 'Clasificacion' 
rnnTipo = 'CONV3D' #'LSTM' CONV3D
idProject = datetime.today().strftime("%Y%m%d_%H")

p_train = {
            # Variables generales
          'products': products,
          'times'   : times,
    
            # Reportes
          'directory': 'CONV3D-V20241203', 
          'Proyect'  : f'{modelTipo}-{idProject}', # TesisDiego
          'record'   : True,  # Grabar los resultados en  excels    
          'WANDB'    : False, # Grabar los resultados en WANDB
    
            # Datos del modelo
          'redTipo'  : modelTipo, # Clasificacion / Regresion
          'rnn'      : True,  # Redes recurrentes          
          'meanMatrizImagen' : False, # !!! RNN modelos SIEMPRE EN FALSE  !!!
          'max_data'    : 10, # 'RobustScaler'
        
            # Variables del entrenamiento                
          'batch'    : 64, #64,     
          'epocas'   : 200,  #250,  
          'paciencia': 150,   # 0 = No paciencia  (val_accuracy)
    
    
           # Dataset
          'dsTrain'  : f'{path_base}/Archivos/Dataset/{modelTipo}/Entrenamiento/ClaseVGTIME_DAUS/CLASE_TrainDS_6.csv',  
          'dsVal'    : f'{path_base}/Archivos/Dataset/{modelTipo}/Validacion/ClaseVGTIME_ValidacionDS.csv',
          'dataset'  : None,     # 1 = 100% del ds
          'DA'       : True,  # Usaulmente para clasificacion          
         }

cantRuns = 2

 # Hiper parametros     
hiperparams = {    
               # General
              'dsTName'     :['CLASE_TrainDS_1']*cantRuns,                         
              'inputs'     : [['imagen','_dato', '_umb1','_altura']]*cantRuns,
              'outputs'    : ['clase']*cantRuns, #
    
              # Modelo
              'optimizer' : ['Adam']*cantRuns, # Adam, RMSprop 
              'lr_schedule' : [0]*cantRuns,
              'lr_decay' : [0.94]*cantRuns,
              'lr'         : [0.001]*cantRuns,
              'loss'       : ['binary_crossentropy']*cantRuns, #binary_crossentropy
              'batchNorm'  : [True]*cantRuns,
              'normLayer'  : [True]*cantRuns,
              'pre_trained': [[]]*cantRuns, #'ResNet50'
    
               # Capas convulucionales
              'cnn_cant'   : [5]*cantRuns,
              'cnn_units'  : [[16, 32, 64, 16, 8]]*cantRuns,
              'droupout'   : [[0.1, 0.3, 0.2, 0.1, 0.1]]*cantRuns,
              'maxPool'    : [[False,False,False,False,False]]*cantRuns,
    
               # Capas Recurrentes
              'rnn_tipo'   : [rnnTipo]*cantRuns,
              'rnn_units'  : [8]*cantRuns,
              
              # Capas densas
              'dense_tipo' : ['RELU']*cantRuns, #RELU
              'dense_cant' : [4]*cantRuns,
              'dense_units': [[64, 32, 16, 8]]*cantRuns,
              'kernel_regularizer' : [[0,0,0,0]]*cantRuns,
              'dense_droupout': [[0.3, 0.2, 0.1, 0]]*cantRuns,
    
              # Imagenes satelitates
              'canales'    : [3]*cantRuns,
              'tiempos'    : [6]*cantRuns,
              'margen'     : [12,10], 
    
              # -
              'runs'       : cantRuns
              }




# Forma del DS
if p_train['dsVal']:
    dfVal = pd.read_csv(p_train['dsVal'])
    print(f'VALDIACION : {len(dfVal)}')
    print('--------------------------------------------------')
    
tempDF = pd.read_csv(p_train['dsTrain'])
print(f'TRAIN: {len(tempDF)}')
tempDF.head(2)

VALDIACION : 3051
--------------------------------------------------
TRAIN: 5952


Unnamed: 0.3,Unnamed: 0.2,Unnamed: 0.1,index,Unnamed: 0,nombre,codigo,XO,XA,longitud,latitud,...,umb2,fecha,flag,flagV2,imagen,clase,index_st_group,flag_m2_group,DA,stratify
0,0,366,239,41386,VISCA VISCA,114119,642,894,-71.9787,-14.7228,...,3.9,2021-04-12-16,C0000002,D01,642--894--2021-04-12-21,1,114119-68,0,0,0
1,1,4,9412,33360,SAN MATEO DE OTAO,472EC742,388,734,-76.56359,-11.84696,...,7.0,2021-03-01-16,C0000002,D02,388--734--2021-03-01-21,1,472EC742-1,0,0,0


In [31]:
%%time
resultado = trainModel(path_base,path_imagenes,p_train,hiperparams, 'margen')

Ya existe el archivo Excel !
DIRECTORIO BASE : C:/Users/Shounen/Desktop/Ciclo XI/Tesis 2/FinalTesis/Tesis2-DiegoParedes/Archivos/Resultados\Clasificacion\CONV3D-V20241203\Clasificacion-20241203_22
Inicio de la prueba N°: 1/2
- Nombre del Proyecto : Clasificacion-20241203_22
- Batch size          : 64
- Criterio margen : 12
__________________________________________________
Tamaño del dataset: Train 5952  - Val 3051
(6, 12, 12, 3)
Model: "model_2"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_9 (InputLayer)           [(None, 6, 12, 12,   0           []                               
                                3)]                                                               
                                                                                                  
 rescaling_2 (Rescaling)        (None, 6, 12, 12, 3  0

None

Epoch 1/200

Epoch 1: saving model to C:/Users/Shounen/Desktop/Ciclo XI/Tesis 2/FinalTesis/Tesis2-DiegoParedes/Archivos/Resultados\Clasificacion\CONV3D-V20241203\Clasificacion-20241203_22\Model_01_CONV3D_clase_20241206_070344.hdf5
Epoch 2/200

Epoch 2: saving model to C:/Users/Shounen/Desktop/Ciclo XI/Tesis 2/FinalTesis/Tesis2-DiegoParedes/Archivos/Resultados\Clasificacion\CONV3D-V20241203\Clasificacion-20241203_22\Model_02_CONV3D_clase_20241206_070344.hdf5
Epoch 3/200

Epoch 3: saving model to C:/Users/Shounen/Desktop/Ciclo XI/Tesis 2/FinalTesis/Tesis2-DiegoParedes/Archivos/Resultados\Clasificacion\CONV3D-V20241203\Clasificacion-20241203_22\Model_03_CONV3D_clase_20241206_070344.hdf5
Epoch 4/200

Epoch 4: saving model to C:/Users/Shounen/Desktop/Ciclo XI/Tesis 2/FinalTesis/Tesis2-DiegoParedes/Archivos/Resultados\Clasificacion\CONV3D-V20241203\Clasificacion-20241203_22\Model_04_CONV3D_clase_20241206_070344.hdf5
Epoch 5/200

Epoch 5: saving model to C:/Users/Shounen/Desktop/Ciclo XI/Tesi

None

Epoch 1/200

Epoch 1: saving model to C:/Users/Shounen/Desktop/Ciclo XI/Tesis 2/FinalTesis/Tesis2-DiegoParedes/Archivos/Resultados\Clasificacion\CONV3D-V20241203\Clasificacion-20241203_22\Model_01_CONV3D_clase_20241207_055258.hdf5
Epoch 2/200

Epoch 2: saving model to C:/Users/Shounen/Desktop/Ciclo XI/Tesis 2/FinalTesis/Tesis2-DiegoParedes/Archivos/Resultados\Clasificacion\CONV3D-V20241203\Clasificacion-20241203_22\Model_02_CONV3D_clase_20241207_055258.hdf5
Epoch 3/200

Epoch 3: saving model to C:/Users/Shounen/Desktop/Ciclo XI/Tesis 2/FinalTesis/Tesis2-DiegoParedes/Archivos/Resultados\Clasificacion\CONV3D-V20241203\Clasificacion-20241203_22\Model_03_CONV3D_clase_20241207_055258.hdf5
Epoch 4/200

Epoch 4: saving model to C:/Users/Shounen/Desktop/Ciclo XI/Tesis 2/FinalTesis/Tesis2-DiegoParedes/Archivos/Resultados\Clasificacion\CONV3D-V20241203\Clasificacion-20241203_22\Model_04_CONV3D_clase_20241207_055258.hdf5
Epoch 5/200

Epoch 5: saving model to C:/Users/Shounen/Desktop/Ciclo XI/Tesi

In [None]:
files = [
r'C:\Users\Shounen\Desktop\Ciclo XI\Tesis 2\FinalTesis\Tesis2-DiegoParedes\Archivos\Resultados\Clasificacion\LSTM-V20241013\Clasificacion-20241014_23\Test-0-20241014_235009.csv',
r'C:\Users\Shounen\Desktop\Ciclo XI\Tesis 2\FinalTesis\Tesis2-DiegoParedes\Archivos\Resultados\Clasificacion\LSTM-V20241013\Clasificacion-20241014_23\Test-1-20241015_050036.csv'
]

In [None]:
a = ['loss', 'acc', 'true_positives', 'true_negatives', 'false_positives',
       'false_negatives', 'auc', 'val_loss_2', 'val_acc', 'val_true_positives',
       'val_true_negatives_1', 'val_false_positives_1', 'val_false_negatives',
       'val_auc', 'lr']

#[x.replace('true_','T').replace('false_','F').replace('positives','P').replace('negatives','N') for x in a]
#[re.sub('_[0-9]','',x) for x in a]

In [None]:
#a  = fix_result_file(path_base,path_imagenes,p_train,hiperparams, 'tiempos', files)

In [None]:
#e-4