In [2]:
import h5py
import numpy as np
import pandas as pd
from datetime import datetime, timedelta, timezone
import os

# Define the stations with their corresponding latitude and longitude values
stations = [
    {'station': 33345, 'city': 'Kyiv', 'lat': 50.4, 'lon': 30.45},
    {'station': 33347, 'city': 'Boryspil', 'lat': 50.34, 'lon': 30.97},
    {'station': 33231, 'city': 'Chornobyl', 'lat': 51.28, 'lon': 30.23},
    {'station': 33339, 'city': 'Fastiv', 'lat': 50.07, 'lon': 29.91},
    {'station': 33464, 'city': 'Byla_Tzerkva', 'lat': 49.78, 'lon': 30.18},
    {'station': 33354, 'city': 'Baryshivka', 'lat': 50.35, 'lon': 31.34},
    {'station': 33356, 'city': 'Yagotyn', 'lat': 50.23, 'lon': 31.8},
    {'station': 33466, 'city': 'Myronivka', 'lat': 49.66, 'lon': 31.0}
]

# Function to convert time from seconds since 1980-01-06 00:00:00 UTC to UTC+03:00
def convert_time(seconds_since_1980):
    base_time = datetime(1980, 1, 6, tzinfo=timezone.utc)
    utc_time = base_time + timedelta(seconds=int(seconds_since_1980))
    ukraine_time = utc_time.astimezone(timezone(timedelta(hours=3)))  # UTC+03:00
    return ukraine_time

# Get list of HDF5 files from the folder
folder_path = '/home/lol/jupyter/data/04.2021/jsimpsonhttps.pps.eosdis.nasa.gov/imerg/early/202104'
file_paths = [os.path.join(folder_path, f) for f in os.listdir(folder_path) if f.endswith('.RT-H5')]

# Initialize an empty list to hold the results
results = []

# Process each file
for file_path in file_paths:
    # Load the HDF5 file
    hdf5_file = h5py.File(file_path, 'r')
    
    # Retrieve the time value
    time_seconds = hdf5_file['Grid/time'][0]
    converted_time = convert_time(time_seconds)

    # Add 30 minutes to the converted time
    new_time = converted_time + timedelta(minutes=30)
    
    # Access the latitude, longitude, and precipitation datasets
    lats = hdf5_file['Grid/lat'][:]
    lons = hdf5_file['Grid/lon'][:]
    precipitation = hdf5_file['Grid/precipitation'][:]
    
    # Define the time index (replace with the actual time index if needed)
    time_index = 0
    
    # Retrieve values for each station
    row = {'time': new_time.replace(tzinfo=None)}
    for station in stations:
        target_lat = station['lat']
        target_lon = station['lon']
        
        # Find the nearest indices
        lat_idx = (np.abs(lats - target_lat)).argmin()
        lon_idx = (np.abs(lons - target_lon)).argmin()
        
        # Retrieve the value from the dataset
        value = precipitation[time_index, lon_idx, lat_idx]
        
        # Store the result
        row[f'{station["city"]}_{station["station"]}'] = value
    
    results.append(row)
    
    # Close the HDF5 file
    hdf5_file.close()

# Convert results to a DataFrame
df_results = pd.DataFrame(results)

# Sort the DataFrame by 'time'
df_results = df_results.sort_values(by='time').reset_index(drop=True)

print(df_results)
df_results.to_csv('df_results.csv', index=False)


                    time  Kyiv_33345  Boryspil_33347  Chornobyl_33231  \
0    2021-04-01 03:30:00         0.0             0.0              0.0   
1    2021-04-01 04:00:00         0.0             0.0              0.0   
2    2021-04-01 04:30:00         0.0             0.0              0.0   
3    2021-04-01 05:00:00         0.0             0.0              0.0   
4    2021-04-01 05:30:00         0.0             0.0              0.0   
...                  ...         ...             ...              ...   
1435 2021-05-01 01:00:00         0.0             0.0              0.0   
1436 2021-05-01 01:30:00         0.0             0.0              0.0   
1437 2021-05-01 02:00:00         0.0             0.0              0.0   
1438 2021-05-01 02:30:00         0.0             0.0              0.0   
1439 2021-05-01 03:00:00         0.0             0.0              0.0   

      Fastiv_33339  Byla_Tzerkva_33464  Baryshivka_33354  Yagotyn_33356  \
0              0.0                 0.0          