In [5]:

import pandas as pd
import numpy as np
from datetime import timedelta, datetime
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import os
import pickle


In [6]:
def read_station(folder_path, file_name):
    
    full_path = os.path.join(folder_path, f'{file_name}.txt')
    try:
        # Load observation data from a text file
        # Specify -99999 and -1 as NaN values
        df = pd.read_table(full_path, delim_whitespace=True, na_values=[-99999, -1]) 

        # sep='\s+'

        df['date'] = pd.to_datetime(df['date'])

    except FileNotFoundError:
        print(f"No data: {full_path}")
        return pd.DataFrame()  # Return empty DataFrame if file is not found
    
    return df


In [7]:
def return_daily(directory_path, filename):        

    full_path = os.path.join(directory_path, f'{filename}.csv')

    # Load observation data from a CSV file
    df = pd.read_csv(full_path)

    # Convert timestamps from string to datetime and standardize to UTC
    # df['issued_timestamp'] = pd.to_datetime(df['issue_date'], utc=True).dt.normalize() 
    df['issued_timestamp'] = pd.to_datetime(df['issue_date'], utc=True)

    df['forecast_timestamp'] = df['issued_timestamp'] + pd.to_timedelta(df['lead_time'], unit='D')
    
    df['forecast_timestamp'] += timedelta(hours=12)

    # print(df.head())
    # print(df['issued_timestamp'])

    return df 


In [8]:
def process_google(hybas_gauge, id_):

    
    hybas_index = np.where(hybas_gauge['hybas_gauge_id'].astype(str) == f'hybas_{id_}')
    hybas_index = hybas_index[0][0]
    print(hybas_index)
    
    filename = hybas_gauge['hybas_gauge_id'][hybas_index]

    df = return_daily('data/processed_google_forecasts', filename)
    
    # Initialize the dictionary to store the grouped forecast data
    forecast_dict = {}

    # Group the DataFrame by 'issued_timestamp'
    grouped = df.groupby('issued_timestamp')

    print(grouped.head())

    # hybas_6120848710	issue_date	lead_time

    # Iterate over each group
    for issued_timestamp, group in grouped:

        # Sort the group by 'forecast_timestamp' to ensure the order
        group_sorted = group.sort_values('forecast_timestamp')
        
        # Extract the forecast timestamps and values, keeping only the last value of each day
        group_sorted = group_sorted.drop_duplicates('forecast_timestamp', keep='last')
        
        time = group_sorted['forecast_timestamp'].tolist()
        dis24_station = group_sorted[filename].tolist()
        start_date = time[0] if time else None  # Safeguard in case the list is empty

        # Store the structured data in the dictionary with the issue date as the key
        forecast_dict[issued_timestamp] = {
            'time': time,
            'dis24_station': dis24_station,
            'start_date': start_date
        }

    return forecast_dict


In [9]:

name_data = 'discharge'

# -----------------------------------------------------------------------------
# Load station data
info_stations = pd.read_csv('../../Documents/porto_alegre_stations_wwhoutid.csv')

hybas_gauge =  pd.read_csv('station_id_to_hybas_gauge_mapping.csv', encoding = 'unicode_escape')


# Specify the directory paths
hist_folder_path = f'Historic_{name_data}'
telem_folder_path = 'Telemetricas'

In [10]:

# Loop through each station using the 'Code' column
for index, row in info_stations.iterrows():

    if index == 0: 
        id_ = '6120015560'
    elif index == 3: 
        id_ = '6121309270'
    elif index == 6: 
        id_ = '6120848710' 
        # id_ = '6120849520'
    elif index == 8: 
        id_ = '6121309110'
    elif index == 7: 
        id_ = '6120840790'
    elif index == 9: 
        id_ = '6120849080'
    elif index == 10: 
        id_ = '6120845950'

        
    else:
        continue

    station_name = row['Name']
    station_code = row['Code']
    print('----------------------------------------------------------------------------')
    print(station_code)
    print(station_name)

    # Read telemetric data
    df_telem = read_station('../../Telemetry', station_code)


    processed_data = process_google(hybas_gauge,id_)

    # Save the processed data to a file
    with open(f'../data_google_{station_code}.pkl', 'wb') as file:
        pickle.dump(processed_data, file)


----------------------------------------------------------------------------
87450004
CAIS MAUÁ C6
0
      Unnamed: 0  hybas_6120015560  issue_date  lead_time  \
0              0       1627.853027  2023-12-31          0   
1              1       1555.758545  2023-12-31          1   
2              2       1390.994019  2023-12-31          2   
3              3       1312.437012  2023-12-31          3   
4              4       1284.348511  2023-12-31          4   
...          ...               ...         ...        ...   
1344        2688       2183.323486  2024-06-16          0   
1345        2689       2630.575928  2024-06-16          1   
1346        2690       3530.483154  2024-06-16          2   
1347        2691       3992.463379  2024-06-16          3   
1348        2692       4649.478516  2024-06-16          4   

              issued_timestamp        forecast_timestamp  
0    2023-12-31 00:00:00+00:00 2023-12-31 12:00:00+00:00  
1    2023-12-31 00:00:00+00:00 2024-01-01 12:00:

  df = pd.read_table(full_path, delim_whitespace=True, na_values=[-99999, -1])
  df = pd.read_table(full_path, delim_whitespace=True, na_values=[-99999, -1])


----------------------------------------------------------------------------
85900000
RIO PARDO
17
      Unnamed: 0  hybas_6120848710  issue_date  lead_time  \
0              0       1003.833374  2023-12-31          0   
1              1        978.362549  2023-12-31          1   
2              2        880.911194  2023-12-31          2   
3              3        854.944275  2023-12-31          3   
4              4        827.986938  2023-12-31          4   
...          ...               ...         ...        ...   
1344        2688       1099.762207  2024-06-16          0   
1345        2689       1436.247192  2024-06-16          1   
1346        2690       1747.274780  2024-06-16          2   
1347        2691       1748.497559  2024-06-16          3   
1348        2692       1840.961548  2024-06-16          4   

              issued_timestamp        forecast_timestamp  
0    2023-12-31 00:00:00+00:00 2023-12-31 12:00:00+00:00  
1    2023-12-31 00:00:00+00:00 2024-01-01 12:00:00

  df = pd.read_table(full_path, delim_whitespace=True, na_values=[-99999, -1])
  df = pd.read_table(full_path, delim_whitespace=True, na_values=[-99999, -1])
  df = pd.read_table(full_path, delim_whitespace=True, na_values=[-99999, -1])


----------------------------------------------------------------------------
87399000
PASSO DAS CANOAS - AUXILIAR
15
      Unnamed: 0  hybas_6120849080  issue_date  lead_time  \
0              0         16.626701  2023-12-31          0   
1              1         14.556030  2023-12-31          1   
2              2         13.627881  2023-12-31          2   
3              3         13.759616  2023-12-31          3   
4              4         13.521938  2023-12-31          4   
...          ...               ...         ...        ...   
1344        2688         46.200291  2024-06-16          0   
1345        2689         54.012100  2024-06-16          1   
1346        2690         72.570274  2024-06-16          2   
1347        2691         71.153137  2024-06-16          3   
1348        2692         71.589508  2024-06-16          4   

              issued_timestamp        forecast_timestamp  
0    2023-12-31 00:00:00+00:00 2023-12-31 12:00:00+00:00  
1    2023-12-31 00:00:00+00:00 2

  df = pd.read_table(full_path, delim_whitespace=True, na_values=[-99999, -1])
  df = pd.read_table(full_path, delim_whitespace=True, na_values=[-99999, -1])
