In [1]:
#Load the imports
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers, models
from matplotlib import pyplot as plt
from datetime import datetime
from sklearn.model_selection import train_test_split
# The following lines adjust the granularity of reporting.
pd.options.display.max_rows = 10
pd.options.display.float_format = "{:.1f}".format
print("Imported modules.")

Imported modules.


In [2]:
#Define program constants

DATABASE_PATH = "database.csv"
SEQUENCE_LEN = 26 #26 points equals 26*15 days equals a year aprox.

In [3]:
# Load the dataset
aux_df = pd.read_csv(DATABASE_PATH)
scale_factor = 1000.0
# Shuffle the examples
#train_df = train_df.reindex(np.random.permutation(train_df.index))
#print(aux_df[['date']])

In [4]:

def convert_to_timestamp(date_str):

    if(isinstance(aux_df['date'][0], np.int64)):
      print("Already converted")
      return False #NOT ROBUST, FAILS IF RUN TWICE ON THE SAME DF

    date_obj = datetime.strptime(date_str, "%m/%d/%Y")
    return int(date_obj.timestamp())# * 1000) Optional multiplication by 1000 to turn into miliseconds.


aux_df['date'] = aux_df['date'].apply(convert_to_timestamp)

In [5]:
#Check if dates are correctly turned into a timsetamp format
print(aux_df['date'][0])
date = datetime.fromtimestamp(aux_df['date'][0])
year = date.year
month = date.month
day = date.day

print(year, month, day)

978318000
2001 1 1


In [6]:
#divide the dataframe into three sections (train, test, validation) with roughly a 70-20-10 distribution
#The samples are divided secuentially to improve the RNN performance

train_df, test_df = train_test_split(aux_df, test_size=0.2, shuffle=False)  # 80% to training df and 20% to testing df
train_df, val_df = train_test_split(train_df, test_size=0.1, shuffle=False)  # Of the 80%, 90% goes to training and 10% to validation

# To make each df legth divisible by the batches in the model building section
train_df = train_df[:-(len(train_df) % SEQUENCE_LEN)]
val_df = val_df[:-(len(val_df) % SEQUENCE_LEN)]
test_df = test_df[:-(len(test_df) % SEQUENCE_LEN)]


DATA NORMALIZATION INFO

https://medium.com/@spinjosovsky/normalize-data-before-or-after-split-of-training-and-testing-data-7b8005f81e26
https://datascience.stackexchange.com/questions/27615/should-we-apply-normalization-to-test-data-as-well

In [7]:
# Normalization of every dataframe. The chosen normalization uses mean and standard deviation
# Every Dataframe must be normalized by the same values, hence the train df mean and deviation are saved
train_mean = train_df.mean()
train_std = train_df.std()


def normalize_data(data, mean, std):
    return (data - mean) / std

#The id field shouldn't be normalized yet is not used in the model
train_normalized = normalize_data(train_df, train_mean, train_std)
val_normalized = normalize_data(val_df, train_mean, train_std)
test_normalized = normalize_data(test_df, train_mean, train_std)

In [26]:
# Keras Input tensors of float values.
#The prediction is going to use one value for lat, long, precipitation, temperature (the ones on the date desired to estimate)
#and the SEQUENCE_LEN previous values of ppna
inputs = {
    'date':
        tf.keras.layers.Input(shape=(1,), dtype=tf.float32,
                              name='date'),
    'latitude':
        tf.keras.layers.Input(shape=(1,), dtype=tf.float32,
                              name='latitude'),
    'longitude':
        tf.keras.layers.Input(shape=(1,), dtype=tf.float32,
                              name='longitude'),
    'ppna':
        tf.keras.layers.Input(shape=(SEQUENCE_LEN, 1), dtype=tf.float32,
                             name='ppna'),
    'precipitation':
        tf.keras.layers.Input(shape=(1,), dtype=tf.float32,
                              name='precipitation'),
    'temperature':
        tf.keras.layers.Input(shape=(1,), dtype=tf.float32,
                              name='temperature')
}

In [27]:
# Define the model for the NN
# LSTM layer with 64 nodes processing the ppna -- the only parameter that takes info from the past
lstm_layer = tf.keras.layers.LSTM(64)(inputs['ppna'])

# Concatenate the inputs and LSTM output to provide the model tools to learn the patterns
concatenated_features = tf.keras.layers.concatenate([inputs['date'], inputs['latitude'], inputs['longitude'], inputs['precipitation'], inputs['temperature'], lstm_layer])

# Output Layer, with just one output (Estimated PPNA) that receives the concatenated features
output = tf.keras.layers.Dense(1, name='ppna_output')(concatenated_features)

# Create the model
model = tf.keras.Model(inputs=inputs, outputs=output)

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae', 'accuracy'])
model.summary()

Model: "model_5"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 ppna (InputLayer)           [(None, 26, 1)]              0         []                            
                                                                                                  
 date (InputLayer)           [(None, 1)]                  0         []                            
                                                                                                  
 latitude (InputLayer)       [(None, 1)]                  0         []                            
                                                                                                  
 longitude (InputLayer)      [(None, 1)]                  0         []                            
                                                                                            

In [28]:
#Model training

#Define the training parameters
epochs = 5  
batch_size = 64 

#Define the features and labels for each section of the process.
#The label is the ppna (what we want to predict)
#The features are the values used to predict said label (previous ppna, precipitations, temperature, lat, long, date)
train_labels = train_normalized['ppna'] 
train_features = {
        'date': train_normalized['date'],
        'latitude': train_normalized['latitude'],
        'longitude': train_normalized['longitude'],
        'ppna': train_normalized['ppna'],
        'precipitation': train_normalized['ppt'],
        'temperature': train_normalized['temp']
}

val_labels = val_normalized['ppna'] 
val_features = {
         'date': val_normalized['date'],
         'latitude': val_normalized['latitude'],
         'longitude': val_normalized['longitude'],
         'ppna': val_normalized['ppna'],
         'precipitation': val_normalized['ppt'],
         'temperature': val_normalized['temp']
}

test_labels = test_normalized['ppna']
test_features = {
    'date': test_normalized['date'],
    'latitude': test_normalized['latitude'],
    'longitude': test_normalized['longitude'],
    'ppna': test_normalized['ppna'],
    'precipitation': test_normalized['ppt'],
    'temperature': test_normalized['temp']
}

#Train the model with the previously defined parameters and data
history = model.fit(
    train_features, 
    train_labels, 
    epochs=epochs,
    batch_size=batch_size,
    validation_data=(val_features, val_labels)  
)

# Evaluate the model
test_loss = model.evaluate(test_features, test_labels)  

print("Pérdida en el conjunto de prueba:", test_loss)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Pérdida en el conjunto de prueba: [8.460481240035733e-07, 0.0006542647606693208, 0.0]


DE ACA PARA ABAJO SOLO HAY CODIGO SIN USAR, PARTES SIN PROBAR QUE GUARDE POR SI FALLA LO QUE HICE. NO EJECUTAR

In [None]:

# Define the model for the NN
# CODIGO ALTERNATIVO PARA CREAR MODELO. NO EJECUTAR
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
num_caracteristicas = 6  # Número de características: PPNA, temperatura, precipitaciones, latitud, longitud, fecha

# Modelo RNN con capa LSTM
model = models.Sequential()

# Agrega una capa LSTM con return_sequences=True para mantener las secuencias
model.add(layers.LSTM(units=64, return_sequences=True, input_shape=(SEQUENCE_LEN, num_caracteristicas)))
# Agrega otra capa LSTM si es necesario
model.add(layers.LSTM(units=64, return_sequences=False))  # False para la última capa LSTM

# Capa de salida para predecir la PPNA futura
model.add(layers.Dense(1))

# Compila el modelo
model.compile(loss='mean_squared_error', optimizer='adam')

# Resumen del modelo
model.summary()


In [17]:
#Model training NO EJECUTAR, CODIGO BORRADOR

epochs = 1  
batch_size = 128  # Tamaño del lote de entrenamiento (ajusta según sea necesario)
train_labels = train_normalized['ppna']
val_labels = val_normalized['ppna']

test_labels = test_normalized['ppna']
test_features = {
    'latitude': test_normalized['latitude'],
    'longitude': test_normalized['longitude'],
    'ppna': test_normalized['ppna'],
    'precipitation': test_normalized['ppt'],
    'temperature': test_normalized['temp']
}

history = model.fit(
    x={'latitude': train_normalized['latitude'],
       'longitude': train_normalized['longitude'],
       'ppna': train_normalized['ppna'],
       'precipitation': train_normalized['ppt'],
       'temperature': train_normalized['temp']},
    y=train_labels,
    epochs=epochs,
    batch_size=batch_size,
    validation_data=(
        {'latitude': val_normalized['latitude'],
         'longitude': val_normalized['longitude'],
         'ppna': val_normalized['ppna'],
         'precipitation': val_normalized['ppt'],
         'temperature': val_normalized['temp']},
        val_labels
    )
)

# Evalúa el modelo en el conjunto de prueba
test_loss = model.evaluate(test_features, test_labels)  # Reemplaza test_labels por las etiquetas reales

# Imprime el resultado de la evaluación
print("Pérdida en el conjunto de prueba:", test_loss)



ValueError: Layer "model_2" expects 5 input(s), but it received 1 input tensors. Inputs received: [<tf.Tensor: shape=(32, 7), dtype=float64, numpy=
array([[ 2.1170918 , -1.72874062, -0.26278911, -1.0814139 ,  0.82817595,
        -0.20319931,  1.27145415],
       [ 2.11709878, -1.72115296, -0.26278911, -1.0814139 ,  0.55131022,
         0.05242548,  1.11048984],
       [ 2.11710575, -1.7135653 , -0.26278911, -1.0814139 ,  0.17709505,
         0.34331331,  1.1605987 ],
       [ 2.11711272, -1.70597764, -0.26278911, -1.0814139 ,  0.03009318,
         0.04313373,  1.61463614],
       [ 2.1171197 , -1.69838998, -0.26278911, -1.0814139 , -0.18994747,
         0.21244649,  1.18319312],
       [ 2.11712667, -1.69080232, -0.26278911, -1.0814139 , -0.63195058,
         0.17178716,  0.20023021],
       [ 2.11713364, -1.68321466, -0.26278911, -1.0814139 , -0.57984389,
         0.49953076,  0.14986643],
       [ 2.11714062, -1.675627  , -0.26278911, -1.0814139 , -0.9956881 ,
         0.11779576, -0.93057008],
       [ 2.11714759, -1.66803934, -0.26278911, -1.0814139 , -1.19978138,
        -0.10995097, -1.17504765],
       [ 2.11715456, -1.66045168, -0.26278911, -1.0814139 , -1.39002985,
         0.72040991, -0.26615302],
       [ 2.11716154, -1.65286402, -0.26278911, -1.0814139 , -1.34903534,
        -0.45188887, -1.25513637],
       [ 2.11716851, -1.64527636, -0.26278911, -1.0814139 , -1.26089933,
        -0.44737682, -1.11498983],
       [ 2.11717548, -1.6376887 , -0.26278911, -1.0814139 , -1.08385888,
        -0.83966125, -1.57708464],
       [ 2.11718245, -1.63010104, -0.26278911, -1.0814139 , -0.88798217,
        -0.88288877, -0.61576694],
       [ 2.11718943, -1.62251338, -0.26278911, -1.0814139 , -0.84386072,
         0.45315453, -0.60497468],
       [ 2.1171964 , -1.61492572, -0.26278911, -1.0814139 , -0.6045983 ,
        -0.02354208, -0.89508757],
       [ 2.11720337, -1.60733806, -0.26278911, -1.0814139 , -0.35601736,
        -0.73094639, -0.67810823],
       [ 2.11721035, -1.5997504 , -0.26278911, -1.0814139 , -0.25718676,
         1.10457807, -0.40504544],
       [ 2.11721732, -1.59216274, -0.26278911, -1.0814139 ,  0.22059042,
         2.4038059 ,  0.10347988],
       [ 2.11722429, -1.58457508, -0.26278911, -1.0814139 ,  0.95765561,
        -0.11696433,  0.03536864],
       [ 2.11723127, -1.57698742, -0.26278911, -1.0814139 ,  1.43864063,
         0.51632468,  0.29431591],
       [ 2.11723824, -1.56939976, -0.26278911, -1.0814139 ,  1.68589306,
        -0.82409914,  0.3492372 ],
       [ 2.11724521, -1.5618121 , -0.26278911, -1.0814139 ,  1.85208203,
         0.02055322,  1.05888231],
       [ 2.11725219, -1.55564712, -0.26278911, -1.0814139 ,  1.65748913,
        -0.50233552,  0.88270383],
       [ 2.11725916, -1.54805946, -0.26278911, -1.0814139 ,  1.83980155,
         1.50317396,  1.19714052],
       [ 2.11726613, -1.5404718 , -0.26278911, -1.0814139 ,  1.35560892,
        -0.48439045,  0.68929272],
       [ 2.1172731 , -1.53288414, -0.26278911, -1.0814139 ,  0.61915079,
         0.42112023,  0.81921141],
       [ 2.11728008, -1.52529648, -0.26278911, -1.0814139 , -0.19576803,
         1.74270117,  1.17868132],
       [ 2.11728705, -1.51770882, -0.26278911, -1.0814139 , -0.4425675 ,
         0.57451325,  0.14009126],
       [ 2.11729402, -1.51012116, -0.26278911, -1.0814139 , -0.64090496,
         2.43839923, -0.04058567],
       [ 2.117301  , -1.5025335 , -0.26278911, -1.0814139 , -0.95470186,
         1.26884827, -0.71197221],
       [ 2.11730797, -1.49494584, -0.26278911, -1.0814139 , -1.12951111,
         0.40741638, -0.35388862]])>]