In [1]:
from netCDF4 import Dataset, num2date
import numpy as np
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import cartopy.feature as cf
import tensorflow as tf
import pandas as pd
import re
from tensorflow import keras
from tensorflow.keras import datasets, layers, models
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle

import os
import time

In [2]:
#El modelo solo considera en input_shape(x,x,1), el 1 se puede cambiar para abarcar mas canales de imagenes satelitales
def crearModelo(inputLen):
    model = models.Sequential()
    model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(110, 110, inputLen)))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Conv2D(64, (3, 3), activation='relu'))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Conv2D(64, (3, 3), activation='relu'))

    model.add(layers.Flatten())
    model.add(layers.Dense(64, activation='relu'))
    model.add(layers.Dense(40))
    print(model.summary())
    return model

#testModel = crearModelo(1)
#testModel.summary()


In [56]:
#Se le da un tensor de 4 dimensiones
#[0] =  dato de precipitacion
#[1] = Punto de la estacion (Longitud)
#[2] = Punto de la estacion (Latitud)
#[3] = Fecha (año-mes-dia-hora)

#Devuelve x,y
#X = Dato de precipitacion
#Y = np.Array de las matrices de colores de cada producto en products (C08,C07 o C13)
def leerImagenArea(tensor, path_base,margen,products): 
    
    """
    -----------------------------------------------------------------------------------    
    !!!!!VERIFICAR QUE LA HORA DE LA IMAGEN SATELITAL SEA IGUAL A LA HORA PERU!!!!!!!!
    -----------------------------------------------------------------------------------
    
    Los archivos se deben encontrar en carpetas ordenadas : ../GOES/{producto}/{año}/{mes}/{ARCHIVO}.nc
    ARCHIVO = G16_{producto}_Cyl_{año}{mes}{dia}-{hora}00.nc'
    
    EJEMPLO : path_base + GOES/C8/2019/02/G16_C08_Cyl_20190210-1600.nc
    """
    
    #Se define por defecto el path base - (Temporal)
    #path_base  =  'C:/Users/Shounen/Desktop/Ciclo XI/Tesis 2/'
    #path_base  =  '../GOES'
    
    try:
        #Fecha = 2019 01 05 22
        fecha = str(tensor.numpy()[3].decode('UTF-8'))
        year,month,day,hour = fecha.split('-')
        
   
    except:
        print("No se pudo leer la fecha")
        print(tensor.numpy()[3].decode('UTF-8'))
        return -1
    
    
    #El ancho y alto sera el margen que se dara desde el punto de origen (estacion)
    #Esta en grados (1 grado == 100Km) - (Temporal)
    alto= margen[0]
    ancho= margen[1]
    
    
    #Se define el producto 
    mapaArrays = []
    for product in products:    
        origen = [float(tensor.numpy()[1].decode('UTF-8')),float(tensor.numpy()[2].decode('UTF-8'))]    
        filename = f'{path_base}/{product}/{year}/{month}/G16_{product}_Cyl_{year}{month}{day}-{hour}00.nc'    
        try:
            ds = Dataset(filename)
        except:
            print("No se pudo leer el archivo")
            print(filename)
            return -1

        # convierte el tiempo de formato numerico a formato fecha y hora de python
        #date = num2date(ds.variables['time'][:], ds.variables['time'].units, only_use_cftime_datetimes=False, only_use_python_datetimes=True)

        # convierte el formato de la variable de Int16 a Float32 y guarda el resultado
        field = ds.variables['CMI'][:].data.astype(np.float32)/100.0

        # obtiene las coordenadas de los pixeles
        lon = ds.variables['longitude'][:].data
        lat = ds.variables['latitude'][:].data    

        #Se define el margen para recortar la imagen satelital
        maxLon=origen[0]+ancho
        minLon=origen[0]-ancho
        maxLat=origen[1]+alto
        minLat=origen[1]-alto

        #Booleanos que ayudarán a buscar el margen
        altoMin = False
        altoMax = False


        #Inicializamos los "indices"
        lom = 0
        loM = 0
        lam = 0
        laM = 0

        """
        Tener en cuenta que el arreglo de longitudes (lon) esta ordenado de manera creciente,
        mientras que el de latitudes (lat) esta de manera decreciente
        """    
        for i in range(0,len(lon)):
            if lon[i]>=minLon and not altoMin:
                altoMin = True
                lom = i
            if lon[i]<=maxLon:
                loM = i

        for j in range(0,len(lat)):
            if lat[j]>=minLat:    
                laM = j
            if lat[j]<=maxLat and not altoMax:
                altoMax = True
                lam = j   
                
        mapaArrays.append(field[lam:laM,lom:loM])
    
    if len(products) == 1:
        return mapaArrays[0], float(tensor.numpy()[0].decode('UTF-8'))
    
    return np.dstack(mapaArrays), float(tensor.numpy()[0].decode('UTF-8'))
       

In [13]:
#Devuelve una lista con lo indices que no se encontraron lso archivos y el producto
#Servira para ver si se teinen todas las imagenes necesarias para el entrenamiento
def comprobarFile(df,products,path_base):        
    no_index = []
    no_product = []
    for i in df.index:       
        year,month,day,hour = df['fecha'][i].split('-')
        tmpProduct = []        
        for p in products:
            filename = f'{path_base}/{p}/{year}/{month}/G16_{p}_Cyl_{year}{month}{day}-{hour}00.nc'       
            existe = os.path.exists(filename)
            if not existe:
                tmpProduct.append(p)
        if len(tmpProduct)>0:
            no_index.append(i)                
            no_product.append(tmpProduct)
    
    df = df.drop(index=no_index)
    print(f'{len(no_index)} datos eliminados: No se encontraron los archivos de imagenes satelitales')
    return df , (no_index,no_product)

In [5]:
def obtenerDatos(filename):
    pdata = pd.read_csv(filename) 
    
    #Seleccionamos solo las columnas necesarias : precipitacion, Estacion (Longitud), Estacion (Latitud), Fecha (año-mes-dia-hora)
    pdataX = pdata.loc[:, ['dato','longitud', 'latitud', 'fecha']]

    #Quitamos los valores NA
    pdataX = pdataX[pdataX['dato'].notna()]

    #Definimos un solo tipo (str) pora asi poder convertirlo a tensor
    pdataX = pdataX.astype({"dato":str,"longitud":str, "latitud":  str, "fecha": str})
                
    #Barajeamos los datos
    pdataX = shuffle(pdataX)
    
    print(f'{len(pdataX)} datos leidos')
    return pdataX

In [59]:
def xyDataset(dataset, path_base,margen,products):
    x = []
    y = []
    i,j = 0.0 , []    
    for dato in dataset:       
        i,j =  leerImagenArea(dato, path_base,margen,products)
        x.append(i)
        y.append(j)
    x = np.asarray(x)
    y = np.asarray(y)
    return x,y
    

In [57]:
#@tf.function

def train_step(x,y,model,optimizer,loss_fn,train_acc_metric):    
    with tf.GradientTape() as tape:
        logits = model(x, training=True)
        loss_value = loss_fn(y, logits)
    grads = tape.gradient(loss_value, model.trainable_weights)
    optimizer.apply_gradients(zip(grads, model.trainable_weights))
    train_acc_metric.update_state(y, logits)
    return loss_value

In [58]:
#@tf.function

def test_step(x,y,model,val_acc_metric):      
    val_logits = model(x, training=False)
    val_acc_metric.update_state(y, val_logits)

In [69]:
def entrenamiento(model,datasetList,path_base,margen,products, batch_size,epocas=2):
    #Definimos el optimizador
    optimizer = keras.optimizers.Adam(learning_rate=1e-3)
    
    #Definimos la funcion de peridida
    loss_fn = keras.losses.MeanAbsoluteError()
    
    #Definimos las metricas a evaluar
    train_acc_metric = keras.metrics.MeanSquaredError()
    val_acc_metric = keras.metrics.MeanSquaredError()
       
    
    for epoch in range(epocas):
        print("\nComienzo de la epoca %d" % (epoch,))
        start_time = time.time()
        
        #Procesamos y separamos el dataset en grupos para el entrenamiento
        dataset = tf.data.Dataset.from_tensor_slices(datasetList)
        train_size = int(len(datasetList)*0.8)
        #train_dataset = dataset.take(train_size)
        #val_dataset = dataset.skip(train_size)

        #Dataset de entrenamiento
        train_dataset = tf.data.Dataset.from_tensor_slices(datasetList[:train_size])       
        

        #Dataset de validacion
        val_dataset = tf.data.Dataset.from_tensor_slices(datasetList[train_size:])
        
        
        ##################################
        #ENTREMAINETO CON BATCH
        ##################################
        if batch_size != -1:
            #print("No implementado aun")
            #return
            train_dataset = train_dataset.batch(batch_size)
            val_dataset = val_dataset.batch(batch_size)
            
            #Iterate over the batches of the dataset.
            for step, (datos) in enumerate(train_dataset):
                #Obtenmos X,Y
                x_train_batch, y_train_batch =  xyDataset(datos, path_base,margen,products)
                
                #Entrenamos
                loss_value = train_step(x_train_batch, y_train_batch, model,optimizer,loss_fn,train_acc_metric)
                
                #Imprimir log
                if step % 200 == 0:
                    print(
                        "Training loss (para un batch) en el paso %d: %.4f"
                        % (step, float(loss_value))
                    )
                    print("Datos procesados: %d samples" % ((step + 1) * batch_size))

            #Imprimir metricas al final de cada epoca
            train_acc = train_acc_metric.result()
            print("Training acc en la epoca: %.4f" % (float(train_acc),))

            #Reseteamos al metrica
            train_acc_metric.reset_states()

            # Run a validation loop at the end of each epoch.
            for (datos) in val_dataset:
                #Obtenmos X,Y
                x_val_batch, y_val_batch =  xyDataset(datos, path_base,margen,products)
                
                #Evaluamos
                test_step(x_val_batch, y_val_batch, model,val_acc_metric)

            #Imrpimimos resultados de valdiacion    
            val_acc = val_acc_metric.result()
            val_acc_metric.reset_states()
            print("Validation acc: %.4f" % (float(val_acc),))    
            print("Time taken: %.2fs" % (time.time() - start_time))    
            
        ##################################
        #ENTREMAINETO CON TODO EL DATASET
        ##################################
        else:
            #Obtenemos dataset de etrenamiento
            x_train, y_train = xyDataset(train_dataset, path_base,margen,products)
            
                        
            #Obtenemos dataset de validacion      
            x_val, y_val = xyDataset(val_dataset, path_base,margen,products)    
            
            
            #Entrenamos el modelo
            model.compile(optimizer='adam',
              loss=tf.keras.losses.MeanAbsoluteError(),
              metrics=[tf.keras.metrics.MeanSquaredError()])

            history = model.fit(x=x_train, y= y_train, validation_data=(x_val,y_val))
            
                 
        
    

In [9]:
#Path_base debe ser completo, se usará para comprobar si existen las imagenes satelitales descargadas
path_base = 'C:/Users/Shounen/Desktop/Ciclo XI/Tesis 2/GOES/'

#Productos de las imagenes satelitales (C08, C07 o C13)
products = ['C08','C07']

#El margen servira para recortar la imagen [alto, ancho] desde el punto de origen (estacion)
margen = [1,1]

#Batach -1 = entrenara con todo el dataset al mismo tiempo
batch = -1

In [10]:
#Creamos el modelo
modelo = crearModelo(len(products))

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 108, 108, 32)      608       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 54, 54, 32)       0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 52, 52, 64)        18496     
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 26, 26, 64)       0         
 2D)                                                             
                                                                 
 conv2d_2 (Conv2D)           (None, 24, 24, 64)        36928     
                                                                 
 flatten (Flatten)           (None, 36864)             0

In [15]:
#Leemos los datos del archivo
#Archivo de prueba contiene datos del 2019 del mes 01 y 02
df = obtenerDatos('pruebasV2.csv')

336720 datos leidos


In [16]:
#Comprobamos si existen las imagenes/produtos por cada dato,
#caso contrario los borra de la lista
df, (no_i,no_p) = comprobarFile(df,products,path_base)

273800 datos eliminados: No se encontraron los archivos de imagenes satelitales


In [18]:
#Seleccionamos algunos para las pruebas
df = df[0:200]
datasetList = df.values.tolist()

#-Visualizacion
print(len(datasetList))
print(datasetList[0])

200
['0.0', '-70.6775', '-15.74562', '2019-02-27-00']


In [41]:
datasetList

[['0.0', '-70.6775', '-15.74562', '2019-02-27-00'],
 ['0.1', '-74.2642', '-14.5423', '2019-02-27-16'],
 ['0.0', '-76.5909', '-9.87895', '2019-01-31-06'],
 ['0.0', '-71.01823', '-13.92214', '2019-01-29-12'],
 ['0.0', '-71.38955', '-15.21331', '2019-02-25-02'],
 ['0.0', '-73.27164', '-14.52014', '2019-02-26-08'],
 ['0.0', '-74.96083', '-9.38139', '2019-02-28-20'],
 ['0.1', '-76.37972', '-11.11333', '2019-01-28-01'],
 ['0.0', '-70.17122', '-15.47106', '2019-02-28-09'],
 ['0.0', '-78.05111', '-7.62167', '2019-02-28-07'],
 ['0.0', '-77.0432', '-12.07057', '2019-01-29-08'],
 ['0.0', '-78.82367', '-7.47987', '2019-01-28-17'],
 ['0.0', '-78.70694', '-7.57417', '2019-01-26-07'],
 ['0.0', '-78.72682', '-7.38407', '2019-01-29-18'],
 ['0.0', '-78.52363', '-6.67996', '2019-01-29-07'],
 ['0.0', '-77.44643', '-9.47683', '2019-01-31-13'],
 ['0.0', '-77.40607', '-9.85243', '2019-01-08-22'],
 ['0.0', '-79.69924', '-5.28985', '2019-02-26-21'],
 ['0.0', '-76.76952', '-11.38887', '2019-02-24-07'],
 ['0.1',

In [61]:
#Entrenamos con TODO el dataset
entrenamiento(modelo,datasetList,path_base,[1,1],products, batch,epocas=2)


Comienzo de la epoca 0


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  field = ds.variables['CMI'][:].data.astype(np.float32)/100.0
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  lon = ds.variables['longitude'][:].data
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  lat = ds.variables['latitude'][:].data



Comienzo de la epoca 1


In [68]:
#Entrenamos con BATCH
entrenamiento(modelo,datasetList,path_base,[1,1],products, 64 ,epocas=5)


Comienzo de la epoca 0


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  field = ds.variables['CMI'][:].data.astype(np.float32)/100.0
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  lon = ds.variables['longitude'][:].data
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  lat = ds.variables['latitude'][:].data


ValueError: Exception encountered when calling layer "sequential" (type Sequential).

Input 0 of layer "conv2d" is incompatible with the layer: expected min_ndim=4, found ndim=1. Full shape received: (64,)

Call arguments received:
  • inputs=tf.Tensor(shape=(64,), dtype=float64)
  • training=True
  • mask=None

In [38]:
arr1 = np.array([[1, 2, 3],[4,5,6],[7,8,9]])

arr2 = np.array([[-1, -2, -3],[-4,-5,-6],[-7,-8,-9]])

arr3 = np.array([[-11, -22, -33],[-44,-55,-66],[-77,-88,-99]])

L = [arr1,arr2,arr3]

arr = np.dstack(L)

#arr = np.array([arr1,arr2])
#arr = np.reshape(arr,(3,3,2))
print(arr.shape)
print(arr)

(3, 3, 3)
[[[  1  -1 -11]
  [  2  -2 -22]
  [  3  -3 -33]]

 [[  4  -4 -44]
  [  5  -5 -55]
  [  6  -6 -66]]

 [[  7  -7 -77]
  [  8  -8 -88]
  [  9  -9 -99]]]
