In [None]:
"""
    This notebook extracts IMERG Timeseries Precipitations (2000 - 2023) for Multi-stations. 
    
    contact
    ----------
    Dr. KENNETH EKPETERE |kenneth.ekpetere@gmail.com

    """

### **IMERG Multi-Stations Timeseries Extraction**

In [None]:
import geemap
import ee
import pandas as pd
import time
import os
# import time as tm
# from datetime import timedelta, datetime

In [None]:
# # Authenticate Earth Engine.
# ee.Authenticate()

In [None]:
# Initialize the Earth Engine module.
ee.Initialize()

In [None]:

# Function to extract IMERG time series for a given station/pixel
def extract_imerg_time_series(lat, lon):
    # IMERG dataset
    dataset = ee.ImageCollection('NASA/GPM_L3/IMERG_V06').select('precipitationCal')

    # Define point of interest from stn list
    point = ee.Geometry.Point(lon, lat)

    # Initialize empty list to store yearly dataframes
    yearly_dfs = []

    # Define start and end dates for yearly chunks
    start_date = ee.Date('2000-06-03')
    end_date = ee.Date('2023-12-31')

    # Iterate over years and extract data in yearly chunks
    year = start_date.get('year')
    while year.getInfo() <= end_date.get('year').getInfo():
        # Define current year's date range
        start_year = ee.Date.fromYMD(year, 1, 1)
        end_year = ee.Date.fromYMD(year, 12, 31)

        # Filter dataset by current year
        filtered = dataset.filterDate(start_year, end_year)

        # Extract time-series at the point
        ts = filtered.getRegion(point, scale=11132).getInfo()

        # Convert to DataFrame
        df = pd.DataFrame(ts[1:], columns=ts[0])
        df = df[['time', 'precipitationCal']]
        df['time'] = pd.to_datetime(df['time'], unit='ms')

        # Append yearly DataFrame to list
        yearly_dfs.append(df)

        # Move to the next year
        year = ee.Number(year).add(1)

    # Concatenate all yearly dataframes into one
    combined_df = pd.concat(yearly_dfs, ignore_index=True)

    return combined_df

# Read input CSV file
input_file = 'test_stn.csv'  # test stations (5 - stations)
# input_file = 'stn.csv'     # full stations (2360 - stations)
output_folder = 'output_files/'

data = pd.read_csv(input_file)

# Process each row in the CSV
for index, row in data.iterrows():
    unique_id = str(int(row['ID']))  # Convert ID to string

    try:
        lat = row['Lat']
        lon = row['Lon']

        # Extract IMERG time series
        ts_df = extract_imerg_time_series(lat, lon)

        # Save output to CSV
        filename = f"ts_{unique_id}_{lat}_{lon}.csv"
        output_path = os.path.join(output_folder, filename)
        ts_df.to_csv(output_path, index=False)

        print(f"Processed ID: {unique_id}. Saved to {output_path}")

    except Exception as e:
        print(f"Error processing ID {unique_id}: {str(e)}")

    # Pause for 5 seconds to prevent memory issues and respect GEE limitations
    time.sleep(5)

print("All IDs processed.")
