In [1]:

import netCDF4 as nc
import pandas as pd
import numpy as np
from datetime import timedelta, datetime
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import os
import pickle


In [2]:

def read_station(folder_path, file_name):
    
    full_path = os.path.join(folder_path, f'{file_name}.txt')
    try:
        # Load observation data from a text file
        # Specify -99999 and -1 as NaN values
        df = pd.read_table(full_path, delim_whitespace=True, na_values=[-99999, -1])

        df['date'] = pd.to_datetime(df['date'])

    except FileNotFoundError:
        print(f"No data: {full_path}")
        return pd.DataFrame()  # Return empty DataFrame if file is not found
    
    return df


In [3]:

def process_geo_data(file_data, index,info_stations):
    
    forecast_dict = {}

    # Iterate over the forecast files
    for forecast in range(0, len(file_data)):

        filename = file_data['File Name'].loc[forecast]
        filename2 = filename.split('_')[0]
        print(filename2)

        issue_date =  file_data['Date'].loc[forecast]
        issue_date = pd.to_datetime(issue_date, utc=True)

        id_reach = info_stations['reach_ids_geoglows'][index]
        df = pd.read_csv(f'{id_reach}/row_data/{filename2}_HR.csv')

        start_date = issue_date + timedelta(days=0.5) # make it center labeled

        # Adjust the 'Dates' column to datetime and ensure they are UTC-aware
        df['datetime'] = pd.to_datetime(df['datetime'], utc=True)

        # Group by the date part of the datetime, set the time to noon, calculate the mean, and reset index
        daily_avg = df.groupby(df['datetime'].dt.floor('D') + pd.Timedelta(hours=12)).mean(numeric_only=True).reset_index()
        
        # Ensure 'Dates' column in daily_avg is also tz-aware UTC
        daily_avg['datetime'] = pd.to_datetime(daily_avg['datetime'])
        print(daily_avg.head())

        # Create a daily time series starting from the start date
        time = [start_date + timedelta(days=i) for i in range(16)]  # Assumes 30 days of data
        
        # Store data in the dictionary using the issue_date as the key
        forecast_dict[issue_date] = {
            'time': time,
            'dis24_station': daily_avg,
            'start_date': start_date
        }

            # 'dis24_station': daily_avg.tolist(),
    return forecast_dict


In [4]:
def plot_geo_forecasts(forecast_dict, df_obs, station_name, name_data):
    f = 0

    # Iterate over the forecasts stored in the dictionary
    for issue_date, data in forecast_dict.items():
        f += 1
        time = pd.to_datetime(data['time'])  # Convert to datetime
        dis24_station = data['dis24_station']
        issue_date = pd.to_datetime(issue_date)  # Ensure issue_date is a datetime object

        plt.figure(figsize=(8, 5))
        
        plt.plot(time, dis24_station, color='black', alpha=0.8, label='Forecast')

        # Normalize the observed data 'date' column to midnight
        df_obs['date'] = pd.to_datetime(df_obs['date']).dt.normalize()
        plt.plot(df_obs['date'], df_obs[name_data], color='blue', label='Observed Data')
        
        plt.xlabel('Date')
        plt.ylabel('Discharge (m³/s)')
        plt.title(f'Google 24-hour Discharge Forecast for {station_name} Station - Issued on {issue_date.date()}')
        plt.xlim(time[0] - timedelta(days=3), time[-1])
        plt.gca().xaxis.set_major_locator(mdates.DayLocator(interval=2))
        plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%m/%d'))
        plt.gcf().autofmt_xdate()
        plt.grid(True, color='grey', alpha=0.2)
        plt.tight_layout()
        
        plt.axvline(x=issue_date, color='red', linestyle='--', label='Date of issue')
        
        plt.legend()
        plt.savefig(f'Figures/{station_name}_google_{f}.png', dpi=300)
        plt.show()


In [5]:

name_data = 'discharge'

# -----------------------------------------------------------------------------
# Load station data
info_stations = pd.read_csv('../Documents/porto_alegre_stations_ids.csv')


id_reach = info_stations['reach_ids_geoglows'][0]
files = [file for file in os.listdir(f'{id_reach}/row_data/') if file.endswith('_HR.csv')]


file_dates = [file.split('_')[0] for file in files]

file_dates = [datetime.strptime(date, '%Y-%m-%d') for date in file_dates]

file_data = pd.DataFrame({
    'File Name': files,
    'Date': file_dates
}).sort_values(by='Date').reset_index(drop=True)

print(file_data)



            File Name       Date
0   2024-03-31_HR.csv 2024-03-31
1   2024-04-01_HR.csv 2024-04-01
2   2024-04-02_HR.csv 2024-04-02
3   2024-04-03_HR.csv 2024-04-03
4   2024-04-04_HR.csv 2024-04-04
..                ...        ...
86  2024-06-25_HR.csv 2024-06-25
87  2024-06-26_HR.csv 2024-06-26
88  2024-06-27_HR.csv 2024-06-27
89  2024-06-28_HR.csv 2024-06-28
90  2024-06-29_HR.csv 2024-06-29

[91 rows x 2 columns]


In [6]:

# Specify the directory paths
hist_folder_path = f'Historic_{name_data}'
telem_folder_path = 'Telemetricas'

# Loop through each station using the 'Code' column
for index, row in info_stations.iterrows():
    station_name = row['Name']
    station_code = row['Code']

    print('----------------------------------------------------------------------------')
    print(station_code)
    print(station_name)

    # Read telemetric data
    df_telem = read_station('../Telemetricas', station_code)

    # Process data
    processed_data = process_geo_data(file_data,index, info_stations)

    # Save the processed data to a file
    with open(f'data_geo_{station_code}_HR.pkl', 'wb') as file:
        pickle.dump(processed_data, file)


----------------------------------------------------------------------------
87450004
CAIS MAUÁ C6
No data: ../Telemetricas\87450004.txt
2024-03-31
                   datetime  ensemble_52_m^3/s
0 2024-03-31 12:00:00+00:00           3.971084
1 2024-04-01 12:00:00+00:00           2.480375
2 2024-04-02 12:00:00+00:00           2.953449
3 2024-04-03 12:00:00+00:00           2.854551
4 2024-04-04 12:00:00+00:00           2.358355
2024-04-01
                   datetime  ensemble_52_m^3/s
0 2024-04-01 12:00:00+00:00           2.530714
1 2024-04-02 12:00:00+00:00           2.507179
2 2024-04-03 12:00:00+00:00           2.560827
3 2024-04-04 12:00:00+00:00           2.351635
4 2024-04-05 12:00:00+00:00           9.395020
2024-04-02
                   datetime  ensemble_52_m^3/s
0 2024-04-02 12:00:00+00:00           2.603069
1 2024-04-03 12:00:00+00:00           2.425387
2 2024-04-04 12:00:00+00:00           2.321760
3 2024-04-05 12:00:00+00:00           7.606192
4 2024-04-06 12:00:00+00:00    

  df = pd.read_table(full_path, delim_whitespace=True, na_values=[-99999, -1])


                   datetime  ensemble_52_m^3/s
0 2024-05-30 12:00:00+00:00         114.832826
1 2024-05-31 12:00:00+00:00         104.356030
2 2024-06-01 12:00:00+00:00         101.521612
3 2024-06-02 12:00:00+00:00          99.060829
4 2024-06-03 12:00:00+00:00          96.472994
2024-05-31
                   datetime  ensemble_52_m^3/s
0 2024-05-31 12:00:00+00:00         108.689372
1 2024-06-01 12:00:00+00:00         101.919135
2 2024-06-02 12:00:00+00:00          98.749593
3 2024-06-03 12:00:00+00:00          96.314903
4 2024-06-04 12:00:00+00:00          93.522275
2024-06-01
                   datetime  ensemble_52_m^3/s
0 2024-06-01 12:00:00+00:00         105.315065
1 2024-06-02 12:00:00+00:00          99.090962
2 2024-06-03 12:00:00+00:00          95.801954
3 2024-06-04 12:00:00+00:00          93.988614
4 2024-06-05 12:00:00+00:00          89.492225
2024-06-02
                   datetime  ensemble_52_m^3/s
0 2024-06-02 12:00:00+00:00         101.763589
1 2024-06-03 12:00:00+00:00

  df = pd.read_table(full_path, delim_whitespace=True, na_values=[-99999, -1])


                   datetime  ensemble_52_m^3/s
0 2024-05-06 12:00:00+00:00       15436.437292
1 2024-05-07 12:00:00+00:00       16481.887583
2 2024-05-08 12:00:00+00:00       16038.319854
3 2024-05-09 12:00:00+00:00       14982.376800
4 2024-05-10 12:00:00+00:00       13803.749625
2024-05-07
                   datetime  ensemble_52_m^3/s
0 2024-05-07 12:00:00+00:00       16797.535958
1 2024-05-08 12:00:00+00:00       16455.225333
2 2024-05-09 12:00:00+00:00       15448.780958
3 2024-05-10 12:00:00+00:00       14395.778900
4 2024-05-11 12:00:00+00:00       13260.299625
2024-05-08
                   datetime  ensemble_52_m^3/s
0 2024-05-08 12:00:00+00:00       16670.257000
1 2024-05-09 12:00:00+00:00       15682.000833
2 2024-05-10 12:00:00+00:00       14687.347042
3 2024-05-11 12:00:00+00:00       13687.792700
4 2024-05-12 12:00:00+00:00       11947.228625
2024-05-09
                   datetime  ensemble_52_m^3/s
0 2024-05-09 12:00:00+00:00       15861.114667
1 2024-05-10 12:00:00+00:00

  df = pd.read_table(full_path, delim_whitespace=True, na_values=[-99999, -1])


No data: ../Telemetricas\87270000.txt
2024-03-31
                   datetime  ensemble_52_m^3/s
0 2024-03-31 12:00:00+00:00          19.498960
1 2024-04-01 12:00:00+00:00          12.546987
2 2024-04-02 12:00:00+00:00           9.346338
3 2024-04-03 12:00:00+00:00           9.912805
4 2024-04-04 12:00:00+00:00           8.826742
2024-04-01
                   datetime  ensemble_52_m^3/s
0 2024-04-01 12:00:00+00:00          13.012882
1 2024-04-02 12:00:00+00:00          10.244803
2 2024-04-03 12:00:00+00:00          10.076773
3 2024-04-04 12:00:00+00:00           8.880807
4 2024-04-05 12:00:00+00:00          74.218190
2024-04-02
                   datetime  ensemble_52_m^3/s
0 2024-04-02 12:00:00+00:00          10.836512
1 2024-04-03 12:00:00+00:00           9.649399
2 2024-04-04 12:00:00+00:00           9.892292
3 2024-04-05 12:00:00+00:00          43.105991
4 2024-04-06 12:00:00+00:00          96.005034
2024-04-03
                   datetime  ensemble_52_m^3/s
0 2024-04-03 12:00:00+00:

  df = pd.read_table(full_path, delim_whitespace=True, na_values=[-99999, -1])


                   datetime  ensemble_52_m^3/s
0 2024-05-04 12:00:00+00:00         682.603721
1 2024-05-05 12:00:00+00:00         759.893280
2 2024-05-06 12:00:00+00:00         652.618168
3 2024-05-07 12:00:00+00:00         383.097098
4 2024-05-08 12:00:00+00:00         212.856611
2024-05-05
                   datetime  ensemble_52_m^3/s
0 2024-05-05 12:00:00+00:00         878.011088
1 2024-05-06 12:00:00+00:00         560.807004
2 2024-05-07 12:00:00+00:00         339.717919
3 2024-05-08 12:00:00+00:00         208.586913
4 2024-05-09 12:00:00+00:00         167.539770
2024-05-06
                   datetime  ensemble_52_m^3/s
0 2024-05-06 12:00:00+00:00         678.331056
1 2024-05-07 12:00:00+00:00         310.129797
2 2024-05-08 12:00:00+00:00         197.348273
3 2024-05-09 12:00:00+00:00         165.620003
4 2024-05-10 12:00:00+00:00         151.614242
2024-05-07
                   datetime  ensemble_52_m^3/s
0 2024-05-07 12:00:00+00:00         440.418141
1 2024-05-08 12:00:00+00:00

  df = pd.read_table(full_path, delim_whitespace=True, na_values=[-99999, -1])


                   datetime  ensemble_52_m^3/s
0 2024-04-10 12:00:00+00:00         186.908403
1 2024-04-11 12:00:00+00:00         168.919801
2 2024-04-12 12:00:00+00:00         226.439280
3 2024-04-13 12:00:00+00:00         296.576524
4 2024-04-14 12:00:00+00:00         419.922453
2024-04-11
                   datetime  ensemble_52_m^3/s
0 2024-04-11 12:00:00+00:00         166.234731
1 2024-04-12 12:00:00+00:00         187.967884
2 2024-04-13 12:00:00+00:00         199.609123
3 2024-04-14 12:00:00+00:00         223.962557
4 2024-04-15 12:00:00+00:00         518.467075
2024-04-12
                   datetime  ensemble_52_m^3/s
0 2024-04-12 12:00:00+00:00         184.832467
1 2024-04-13 12:00:00+00:00         175.735046
2 2024-04-14 12:00:00+00:00         185.175516
3 2024-04-15 12:00:00+00:00         217.565256
4 2024-04-16 12:00:00+00:00         260.061331
2024-04-13
                   datetime  ensemble_52_m^3/s
0 2024-04-13 12:00:00+00:00         179.920980
1 2024-04-14 12:00:00+00:00

  df = pd.read_table(full_path, delim_whitespace=True, na_values=[-99999, -1])


                   datetime  ensemble_52_m^3/s
0 2024-05-09 12:00:00+00:00          46.877318
1 2024-05-10 12:00:00+00:00          40.798246
2 2024-05-11 12:00:00+00:00         105.916513
3 2024-05-12 12:00:00+00:00         370.254154
4 2024-05-13 12:00:00+00:00         920.601494
2024-05-10
                   datetime  ensemble_52_m^3/s
0 2024-05-10 12:00:00+00:00          49.993692
1 2024-05-11 12:00:00+00:00         152.471015
2 2024-05-12 12:00:00+00:00         348.433305
3 2024-05-13 12:00:00+00:00         480.556074
4 2024-05-14 12:00:00+00:00         173.903551
2024-05-11
                   datetime  ensemble_52_m^3/s
0 2024-05-11 12:00:00+00:00         143.935706
1 2024-05-12 12:00:00+00:00         372.702528
2 2024-05-13 12:00:00+00:00         559.786258
3 2024-05-14 12:00:00+00:00         212.954862
4 2024-05-15 12:00:00+00:00         144.923561
2024-05-12
                   datetime  ensemble_52_m^3/s
0 2024-05-12 12:00:00+00:00         267.671314
1 2024-05-13 12:00:00+00:00

  df = pd.read_table(full_path, delim_whitespace=True, na_values=[-99999, -1])


                   datetime  ensemble_52_m^3/s
0 2024-04-09 12:00:00+00:00         527.963358
1 2024-04-10 12:00:00+00:00         680.514983
2 2024-04-11 12:00:00+00:00         588.974107
3 2024-04-12 12:00:00+00:00         501.963591
4 2024-04-13 12:00:00+00:00         962.706220
2024-04-10
                   datetime  ensemble_52_m^3/s
0 2024-04-10 12:00:00+00:00         676.333663
1 2024-04-11 12:00:00+00:00         604.058765
2 2024-04-12 12:00:00+00:00         583.319812
3 2024-04-13 12:00:00+00:00         653.543819
4 2024-04-14 12:00:00+00:00        1191.470563
2024-04-11
                   datetime  ensemble_52_m^3/s
0 2024-04-11 12:00:00+00:00         606.755321
1 2024-04-12 12:00:00+00:00         456.257121
2 2024-04-13 12:00:00+00:00         337.274525
3 2024-04-14 12:00:00+00:00         386.377927
4 2024-04-15 12:00:00+00:00        1271.870525
2024-04-12
                   datetime  ensemble_52_m^3/s
0 2024-04-12 12:00:00+00:00         469.674285
1 2024-04-13 12:00:00+00:00

  df = pd.read_table(full_path, delim_whitespace=True, na_values=[-99999, -1])


                   datetime  ensemble_52_m^3/s
0 2024-05-13 12:00:00+00:00         916.717895
1 2024-05-14 12:00:00+00:00         738.989888
2 2024-05-15 12:00:00+00:00         334.478916
3 2024-05-16 12:00:00+00:00         231.863404
4 2024-05-17 12:00:00+00:00         247.422757
2024-05-14
                   datetime  ensemble_52_m^3/s
0 2024-05-14 12:00:00+00:00         715.411949
1 2024-05-15 12:00:00+00:00         333.189603
2 2024-05-16 12:00:00+00:00         230.400685
3 2024-05-17 12:00:00+00:00         277.638409
4 2024-05-18 12:00:00+00:00         342.171898
2024-05-15
                   datetime  ensemble_52_m^3/s
0 2024-05-15 12:00:00+00:00         415.086482
1 2024-05-16 12:00:00+00:00         257.055527
2 2024-05-17 12:00:00+00:00         252.825187
3 2024-05-18 12:00:00+00:00         236.060813
4 2024-05-19 12:00:00+00:00         181.584747
2024-05-16
                   datetime  ensemble_52_m^3/s
0 2024-05-16 12:00:00+00:00         283.164681
1 2024-05-17 12:00:00+00:00

  df = pd.read_table(full_path, delim_whitespace=True, na_values=[-99999, -1])


                   datetime  ensemble_52_m^3/s
0 2024-04-15 12:00:00+00:00          63.994372
1 2024-04-16 12:00:00+00:00          31.326731
2 2024-04-17 12:00:00+00:00          81.512289
3 2024-04-18 12:00:00+00:00         111.833037
4 2024-04-19 12:00:00+00:00          25.211047
2024-04-16
                   datetime  ensemble_52_m^3/s
0 2024-04-16 12:00:00+00:00          31.048411
1 2024-04-17 12:00:00+00:00          45.116250
2 2024-04-18 12:00:00+00:00          54.396937
3 2024-04-19 12:00:00+00:00          13.794690
4 2024-04-20 12:00:00+00:00           7.961504
2024-04-17
                   datetime  ensemble_52_m^3/s
0 2024-04-17 12:00:00+00:00          64.275692
1 2024-04-18 12:00:00+00:00          58.903194
2 2024-04-19 12:00:00+00:00          13.673301
3 2024-04-20 12:00:00+00:00           8.157704
4 2024-04-21 12:00:00+00:00           8.063707
2024-04-18
                   datetime  ensemble_52_m^3/s
0 2024-04-18 12:00:00+00:00          86.699509
1 2024-04-19 12:00:00+00:00

  df = pd.read_table(full_path, delim_whitespace=True, na_values=[-99999, -1])


                   datetime  ensemble_52_m^3/s
0 2024-05-07 12:00:00+00:00          41.041321
1 2024-05-08 12:00:00+00:00          29.722613
2 2024-05-09 12:00:00+00:00          27.917603
3 2024-05-10 12:00:00+00:00          76.119663
4 2024-05-11 12:00:00+00:00         103.770280
2024-05-08
                   datetime  ensemble_52_m^3/s
0 2024-05-08 12:00:00+00:00          37.234538
1 2024-05-09 12:00:00+00:00          29.205402
2 2024-05-10 12:00:00+00:00          46.036607
3 2024-05-11 12:00:00+00:00          93.289014
4 2024-05-12 12:00:00+00:00          92.222388
2024-05-09
                   datetime  ensemble_52_m^3/s
0 2024-05-09 12:00:00+00:00          38.064197
1 2024-05-10 12:00:00+00:00          37.589266
2 2024-05-11 12:00:00+00:00          83.549148
3 2024-05-12 12:00:00+00:00         304.384522
4 2024-05-13 12:00:00+00:00         750.566799
2024-05-10
                   datetime  ensemble_52_m^3/s
0 2024-05-10 12:00:00+00:00          44.951122
1 2024-05-11 12:00:00+00:00

  df = pd.read_table(full_path, delim_whitespace=True, na_values=[-99999, -1])


                   datetime  ensemble_52_m^3/s
0 2024-04-13 12:00:00+00:00         161.133032
1 2024-04-14 12:00:00+00:00         131.801934
2 2024-04-15 12:00:00+00:00         116.851081
3 2024-04-16 12:00:00+00:00         163.057754
4 2024-04-17 12:00:00+00:00         246.133259
2024-04-14
                   datetime  ensemble_52_m^3/s
0 2024-04-14 12:00:00+00:00         194.125456
1 2024-04-15 12:00:00+00:00         358.343621
2 2024-04-16 12:00:00+00:00         441.391367
3 2024-04-17 12:00:00+00:00         508.488631
4 2024-04-18 12:00:00+00:00         526.602330
2024-04-15
                   datetime  ensemble_52_m^3/s
0 2024-04-15 12:00:00+00:00         377.743648
1 2024-04-16 12:00:00+00:00         500.143555
2 2024-04-17 12:00:00+00:00         578.531783
3 2024-04-18 12:00:00+00:00         549.325415
4 2024-04-19 12:00:00+00:00         490.924075
2024-04-16
                   datetime  ensemble_52_m^3/s
0 2024-04-16 12:00:00+00:00         500.347183
1 2024-04-17 12:00:00+00:00