In [1]:
import netCDF4 as nc
import pandas as pd
import numpy as np
from datetime import timedelta, datetime, timezone
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import os
import pickle



In [2]:

def print_netcdf_summary(file_path, return_variables=None):

    print(return_variables)
    """Prints a summary and optionally returns data for specific variables from a NetCDF file."""
    if not os.path.exists(file_path):
        print(f"Error: The file '{file_path}' does not exist.")
        return

    try:
        dataset = nc.Dataset(file_path, mode='r')
    except Exception as e:
        print(f"Error opening file: {e}")
        return

    variable_data = {}

    print("Global Attributes:")
    for attr in dataset.ncattrs():
        print(f"  {attr}: {dataset.getncattr(attr)}")

    print("\nDimensions:")
    for dim, dim_obj in dataset.dimensions.items():
        print(f"  {dim}: length {dim_obj.size} (unlimited: {dim_obj.isunlimited()})")

    print("\nVariables:")
    for var in dataset.variables:
        print(f"Variable: {var}")
        print(f"  Type: {dataset.variables[var].dtype}")
        print(f"  Dimensions: {dataset.variables[var].dimensions}")
        print(f"  Shape: {dataset.variables[var].shape}")
        for attr in dataset.variables[var].ncattrs():
            print(f"    {attr}: {dataset.variables[var].getncattr(attr)}")
        if return_variables and var in return_variables:
            variable_data[var] = dataset.variables[var][:]

    if 'time' in dataset.variables:
        T_var = dataset.variables['time']
        T_units = T_var.units if 'units' in T_var.ncattrs() else 'No units available'
        T_data = T_var[:]
        print(f"\nT units: {T_units} Data: {T_data}")

    dataset.close()

    if return_variables:
        print('retorna!')
        return variable_data


In [3]:

def read_station(folder_path, file_name):
    
    full_path = os.path.join(folder_path, f'{file_name}.txt')
    try:
        # Load observation data from a text file
        # Specify -99999 and -1 as NaN values
        df = pd.read_table(full_path, delim_whitespace=True, na_values=[-99999, -1])

        df['date'] = pd.to_datetime(df['date'])


    except FileNotFoundError:
        print(f"No data: {full_path}")
        return pd.DataFrame()  # Return empty DataFrame if file is not found
    
    return df


In [4]:

def list_files_and_dates(directory_path, file_prefix):
    """Checks if the directory exists and returns a DataFrame containing filenames and their corresponding dates."""
    if os.path.exists(directory_path):
        all_files = os.listdir(directory_path)
        # Filter files that start with the specified prefix
        file_list = [file for file in all_files if file.startswith(file_prefix)]
        file_dates = [file.split('_')[-1].split('.')[0][:-2] for file in file_list]
        
        # Convert strings to datetime objects
        file_dates = [datetime.strptime(date, '%Y%m%d') for date in file_dates]
        
        # Create DataFrame with filenames and dates
        file_data = pd.DataFrame({
            'File Name': file_list,
            'Date': file_dates
        }).sort_values(by='Date').reset_index(drop=True)
        
        return file_data
    else:
        print(f"The directory '{directory_path}' does not exist.")
        return None

In [5]:

def process_hype_data(directory_path, file_data, index_, info_stations):
    
    forecast_dict = {}  # Use a dictionary to store the data by issue_date

    # Iterate over the forecast files
    for forecast in range(len(file_data)):
        filename = file_data['File Name'].loc[forecast]
        file_path = os.path.join(directory_path, filename)
        
        # Open the dataset
        dataset = nc.Dataset(file_path, mode='r')
        
        # Extract necessary variables from the dataset
        variables = {var: dataset.variables[var][:] for var in dataset.variables}
        dis24 = variables['cout']
        
        id_station = info_stations['WWHOUTID'][index_]
        station_index = int(np.where(variables['wwhoutid'] == str(id_station))[0])
        
        issued_date = file_data['Date'].loc[forecast]
        
        # Ensure issued_date is aware of timezone
        if issued_date.tzinfo is None or issued_date.tzinfo.utcoffset(issued_date) is None:
            issued_date = issued_date.replace(tzinfo=timezone.utc)
        
        start_date = issued_date + timedelta(days=0.5)  # Adjust to 12:00
        
        # Close the dataset
        dataset.close()
        
        # Extract data for the station
        dis24_station = dis24[:, station_index]  # 30 x 51
        
        # Create a daily time series starting from the date specified in start_date
        time = [start_date + timedelta(days=i) for i in range(len(dis24_station))]
        
        # Store data in the dictionary using the issue_date as the key
        forecast_dict[issued_date] = {
            'time': time,
            'dis24_station': dis24_station,
            'start_date': start_date
        }

    return forecast_dict


In [6]:

def plot_hype_forecasts(forecast_dict, df_obs, station_name, name_data, index_):
    
    f = 0
    # Iterate over each entry in the dictionary
    for issued_date, data in forecast_dict.items():
        f = f+1
        time = pd.to_datetime(data['time'])  # Convert to datetime

        dis24_station = data['dis24_station']
        start_date = pd.to_datetime(data['start_date'])

        print(f'Issued date: {issued_date}, Start date: {start_date}')
        
        plt.figure(figsize=(8, 5))
        
        plt.plot(time, dis24_station, color='black', alpha=0.8, marker = 'o',linestyle = '-', label='Forecast')

        # Normalize the observed data 'date' column to midnight
        df_obs['date'] = pd.to_datetime(df_obs['date']).dt.normalize()
        plt.plot(df_obs['date'], df_obs[name_data], color='blue', label='Observed Data')
        
        plt.xlabel('Date')
        plt.ylabel('Discharge (m³/s)')
        plt.title(f'HYPE 24-hour Discharge Forecast for {station_name} Station - Issued on {issued_date}')
        plt.xlim(time[0] - timedelta(days=3), time[-1] + timedelta(days=1))
        plt.gca().xaxis.set_major_locator(mdates.DayLocator(interval=1))
        plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%m/%d'))
        plt.gcf().autofmt_xdate()
        plt.grid(True, color='grey', alpha=0.2)
        plt.tight_layout()


        
        plt.axvline(x=issued_date, color='red', linestyle='--', label='Issued Date')
        
        # Uncomment the following lines if threshold data needs to be plotted
        # for key, value in threshold_data.items():
        #     plt.axhline(y=value[index_], color='red' if key == 'rl50' else 'orange', linewidth=2, alpha=0.4, label=f'{key.upper()} Threshold')

        plt.legend()
        plt.savefig(f'Figures/{station_name}_hype_{f}.png', dpi=400)
        plt.show()



In [7]:
# See files from system
directory_path = 'brazil_delivery/'
prefix = "ww-hype_forecast_odf_"
file_data = list_files_and_dates(directory_path, prefix)

if file_data is not None:
    print(file_data)

                               File Name       Date
0   ww-hype_forecast_odf_20240430R1.0.nc 2024-04-30
1   ww-hype_forecast_odf_20240501R1.0.nc 2024-05-01
2   ww-hype_forecast_odf_20240502R1.0.nc 2024-05-02
3   ww-hype_forecast_odf_20240503R1.0.nc 2024-05-03
4   ww-hype_forecast_odf_20240504R1.0.nc 2024-05-04
5   ww-hype_forecast_odf_20240505R1.0.nc 2024-05-05
6   ww-hype_forecast_odf_20240506R1.0.nc 2024-05-06
7   ww-hype_forecast_odf_20240507R1.0.nc 2024-05-07
8   ww-hype_forecast_odf_20240508R1.0.nc 2024-05-08
9   ww-hype_forecast_odf_20240509R1.0.nc 2024-05-09
10  ww-hype_forecast_odf_20240510R1.0.nc 2024-05-10
11  ww-hype_forecast_odf_20240511R1.0.nc 2024-05-11
12  ww-hype_forecast_odf_20240512R1.0.nc 2024-05-12
13  ww-hype_forecast_odf_20240513R1.0.nc 2024-05-13
14  ww-hype_forecast_odf_20240514R1.0.nc 2024-05-14
15  ww-hype_forecast_odf_20240515R1.0.nc 2024-05-15
16  ww-hype_forecast_odf_20240516R1.0.nc 2024-05-16
17  ww-hype_forecast_odf_20240517R1.0.nc 2024-05-17
18  ww-hype_

In [8]:

filename = file_data['File Name'].loc[0]
file_path = os.path.join(directory_path, filename)
print_netcdf_summary(file_path)


None
Global Attributes:
  title: ww-hype 1.3.9 10d forecast
  institution: SMHI
  created: 2024-04-30 12:05
  format_version: 1
  coordinates: wwhoutid

Dimensions:
  time: length 10 (unlimited: False)
  basin: length 14 (unlimited: False)

Variables:
Variable: cout
  Type: float32
  Dimensions: ('time', 'basin')
  Shape: (10, 14)
    _FillValue: nan
    title: simulated outflow from olake/subbasin, only positive flow (outflow)
    units: m³/s
    coordinates: subid
Variable: time
  Type: int64
  Dimensions: ('time',)
  Shape: (10,)
    units: seconds since 1970-01-01
    calendar: proleptic_gregorian
Variable: wwhoutid
  Type: <class 'str'>
  Dimensions: ('basin',)
  Shape: (14,)

T units: seconds since 1970-01-01 Data: [1714435200 1714521600 1714608000 1714694400 1714780800 1714867200
 1714953600 1715040000 1715126400 1715212800]


In [9]:

name_data = 'discharge'

# -----------------------------------------------------------------------------
# Read data from station
info_stations = pd.read_csv('porto_alegre_stations_wwhoutid.csv')

# Specify the directory paths
hist_folder_path = f'Historic_{name_data}'
telem_folder_path = 'Telemetricas'

# Loop through each station using the 'Code' column
for index, row in info_stations.iterrows():
    station_name = row['Name']
    station_code = row['Code']
    print('----------------------------------------------------------------------------')
    print(station_code)
    print(station_name)

    # Read telemetric data
    df_telem = read_station('../Telemetricas', station_code)

    # Process data
    processed_data = process_hype_data(directory_path, file_data, index, info_stations)


    # Save the processed data to a file
    with open(f'data_hype_{station_code}.pkl', 'wb') as file:
        pickle.dump(processed_data, file)

    # Plot the forecasts
    # plot_hype_forecasts(processed_data, df_telem, station_name, name_data, index)


----------------------------------------------------------------------------
87450004
CAIS MAUÁ C6
----------------------------------------------------------------------------
87010000
TRIUNFO
----------------------------------------------------------------------------
87270000
PASSO MONTENEGRO
----------------------------------------------------------------------------
87382000
SÃO LEOPOLDO
----------------------------------------------------------------------------
86950000
TAQUARI
----------------------------------------------------------------------------
87401750
CORSAN ALVORADA
----------------------------------------------------------------------------
85900000
RIO PARDO
----------------------------------------------------------------------------
87160000
NOVA PALMIRA
----------------------------------------------------------------------------
87380000
CAMPO BOM
----------------------------------------------------------------------------
87399000
PASSO DAS CANOAS - AUXILIAR
----