In [1]:
# Solicitud de datos a la API REST
from api_somo import APIClient, fetch_qf 
import logging
import pandas as pd

# Setting up logging for the main script
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

api_url = "http://127.0.0.1:8000/database/functions/time-series/filter_date_serie/"
params = {
    "station_name": "buenaventura",
    "variable_name": "nivel del mar",
    "processing_level_name": "Control de calidad",
    "start_date":"2009-01-01",
    "end_date":"2012-12-31"
    }

try:
    # 1. Fetch and process the data
    red_mpomm_serie, red_mpomm_metadata = APIClient.fetch_and_process_data(api_url, params)
    red_mpomm_serie.index = pd.to_datetime(red_mpomm_serie.index)
    red_mpomm_serie.set_index('Fecha', inplace=True)


    if red_mpomm_serie.empty:
        raise ValueError("The historical series is empty.")

    # Additional processing with the historical series
    logger.info(f"Fetched historical series: {red_mpomm_serie.head(0)}")

except Exception as e:
    logger.error(f"Error fetching historical series: {e}")

display(red_mpomm_serie)
display(red_mpomm_serie.columns, type(red_mpomm_serie))
display(round(red_mpomm_serie.describe(),3))

INFO:api_somo:Sending request to http://127.0.0.1:8000/database/functions/time-series/filter_date_serie/ with parameters: {'station_name': 'buenaventura', 'variable_name': 'nivel del mar', 'processing_level_name': 'Control de calidad', 'start_date': '2009-01-01', 'end_date': '2012-12-31'}
INFO:api_somo:Original columns: ['date_time', 'sensor_data', 'quality_flag']
INFO:api_somo:Timestamp conversion and indexing complete.
ERROR:__main__:Error fetching historical series: "None of ['Fecha'] are in the columns"


Unnamed: 0_level_0,value,qf
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1
2009-03-20 05:01:00,-99999.00,9
2009-03-20 05:02:00,-99999.00,9
2009-03-20 05:03:00,-99999.00,9
2009-03-20 05:04:00,-99999.00,9
2009-03-20 05:05:00,-99999.00,9
...,...,...
2012-12-31 04:56:00,1.03,1
2012-12-31 04:57:00,1.03,1
2012-12-31 04:58:00,1.03,1
2012-12-31 04:59:00,1.03,1


Index(['value', 'qf'], dtype='object')

pandas.core.frame.DataFrame

Unnamed: 0,value,qf
count,1990080.0,1990080.0
mean,-49344.646,5.043
std,49997.499,3.938
min,-99999.0,1.0
25%,-99999.0,1.0
50%,0.27,4.0
75%,2.46,9.0
max,307.35,9.0


In [2]:
type(red_mpomm_serie.index)

pandas.core.indexes.datetimes.DatetimeIndex

In [3]:
from graphics_utils.express import fig_pie_month
red_mpomm_serie
pie_fig = fig_pie_month(red_mpomm_serie, params)
pie_fig.show()

                        value  qf
timestamp                        
2009-03-20 05:01:00 -99999.00   9
2009-03-20 05:02:00 -99999.00   9
2009-03-20 05:03:00 -99999.00   9
2009-03-20 05:04:00 -99999.00   9
2009-03-20 05:05:00 -99999.00   9
...                       ...  ..
2012-12-31 04:56:00      1.03   1
2012-12-31 04:57:00      1.03   1
2012-12-31 04:58:00      1.03   1
2012-12-31 04:59:00      1.03   1
2012-12-31 05:00:00      1.04   1

[1990080 rows x 2 columns]


In [4]:
def train_val_test_split(serie, tr_size=0.8, vl_size=0.1, ts_size=0.1 ):
    # Definir número de datos en cada subserie
    N = serie.shape[0]
    Ntrain = int(tr_size*N)  # Número de datos de entrenamiento
    Nval = int(vl_size*N)    # Número de datos de validación
    Ntst = N - Ntrain - Nval # Número de datos de prueba

    # Realizar partición
    train = serie[0:Ntrain]
    val = serie[Ntrain:Ntrain+Nval]
    test = serie[Ntrain+Nval:]

    return train, val, test

tr, vl, ts = train_val_test_split(red_mpomm_serie['value'])

# Imprimir en pantalla el tamaño de cada subset
print(f'Tamaño set de entrenamiento: {tr.shape}')
print(f'Tamaño set de validación: {vl.shape}')
print(f'Tamaño set de prueba: {ts.shape}')

Tamaño set de entrenamiento: (1592064,)
Tamaño set de validación: (199008,)
Tamaño set de prueba: (199008,)


In [None]:
from graphics_utils.express import fig_pie_month, fig_boxplot, fig_lineplot

display(red_mpomm_serie[red_mpomm_serie['qf'] == 9])


'''line_fig = fig_lineplot(red_mpomm_serie, params, color='qf')
line_fig.show()
pie_fig = fig_pie_month(red_mpomm_serie, params)
pie_fig.show()
boxplot_fig = fig_boxplot(red_mpomm_serie, params, color='month')
boxplot_fig.show()'''

Unnamed: 0_level_0,value,qf
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1
2009-03-20 05:01:00,-99999.0,9
2009-03-20 05:02:00,-99999.0,9
2009-03-20 05:03:00,-99999.0,9
2009-03-20 05:04:00,-99999.0,9
2009-03-20 05:05:00,-99999.0,9
...,...,...
2012-11-29 21:21:00,-99999.0,9
2012-11-29 21:22:00,-99999.0,9
2012-11-29 21:23:00,-99999.0,9
2012-12-10 07:09:00,-99999.0,9


"line_fig = fig_lineplot(red_mpomm_serie, params, color='qf')\nline_fig.show()\npie_fig = fig_pie_month(red_mpomm_serie, params)\npie_fig.show()\nboxplot_fig = fig_boxplot(red_mpomm_serie, params, color='month')\nboxplot_fig.show()"