# Dataset	Resolution	Time Range	Best For percipitation
  # CHIRPS	      ~5.5 km	  1981 - Present	Drought, long-term climate studies
  # IMERG (GPM)	  ~10 km	  2000 - Present	Near real-time precipitation
  # PERSIANN-CDR  ~25 km	  1983 - Present	Long-term precipitation trends
  # ERA5	        ~9 km	    1950 - Present	Climate modeling, hourly data
  # GLDAS	        ~25 km	  2000 - Present	Land surface and hydrological modeling

In [None]:
import ee
import pandas as pd
import os

# Authenticate and initialize Google Earth Engine
ee.Authenticate()
ee.Initialize(project="gee-ndvi-berhe")

# Define location (Lankien, South Sudan)
latitude, longitude = 8.4281, 33.7833
roi = ee.Geometry.Point([longitude, latitude])

# Define time range
start_date = '2015-01-01'
end_date = '2024-12-31'

# --- 1. IMERG (Half-hourly & Daily) ---
imerg = ee.ImageCollection("NASA/GPM_L3/IMERG_V06") \
    .filterDate(start_date, end_date) \
    .filterBounds(roi)

# --- 2. ERA5 (Hourly) ---
era5 = ee.ImageCollection("ECMWF/ERA5_LAND/HOURLY") \
    .filterDate(start_date, end_date) \
    .filterBounds(roi)

# --- 3. CHIRPS (Daily) ---
chirps = ee.ImageCollection("UCSB-CHG/CHIRPS/DAILY") \
    .filterDate(start_date, end_date) \
    .filterBounds(roi)

# --- 4. GLDAS (3-hourly, aggregated to daily) ---
gldas = ee.ImageCollection("NASA/GLDAS/V021/NOAH/G025/T3H") \
    .filterDate(start_date, end_date) \
    .filterBounds(roi)

# --- 5. PERSIANN-CDR (Daily) ---
persiann = ee.ImageCollection("NOAA/PERSIANN-CDR") \
    .filterDate(start_date, end_date) \
    .filterBounds(roi)


# Function to extract values from image collections
def extract_values(image, band_name):
    date = image.date().format("YYYY-MM-dd HH:mm")  # Format date for hourly data
    value = image.reduceRegion(
        reducer=ee.Reducer.mean(),
        geometry=roi,
        scale=1000,
        bestEffort=True
    ).get(band_name)
    return ee.Feature(None, {'date': date, band_name: value})


# Extract Daily & Hourly Data
def extract_collection(collection, band_name):
    return collection.map(lambda img: extract_values(img, band_name))


# Extract data from all sources
imerg_daily = extract_collection(imerg, 'precipitationCal')  # IMERG daily
era5_hourly = extract_collection(era5, 'total_precipitation')  # ERA5 hourly
chirps_daily = extract_collection(chirps, 'precipitation')  # CHIRPS daily
gldas_3hourly = extract_collection(gldas, 'Rainf_tavg')  # GLDAS 3-hourly
persiann_daily = extract_collection(persiann, 'precipitation')  # PERSIANN daily


# Convert results to Pandas DataFrame
def collection_to_dataframe(collection, band_name):
    features = collection.getInfo()['features']
    data = []
    for feature in features:
        properties = feature['properties']
        data.append([properties['date'], properties.get(band_name, None)])
    return pd.DataFrame(data, columns=['Date', band_name])


# Convert each dataset to DataFrame
df_imerg = collection_to_dataframe(imerg_daily, 'precipitationCal')
df_era5 = collection_to_dataframe(era5_hourly, 'total_precipitation')
df_chirps = collection_to_dataframe(chirps_daily, 'precipitation')
df_gldas = collection_to_dataframe(gldas_3hourly, 'Rainf_tavg')
df_persiann = collection_to_dataframe(persiann_daily, 'precipitation')

# Merge all data
df = df_imerg.merge(df_era5, on="Date", how="outer") \
    .merge(df_chirps, on="Date", how="outer") \
    .merge(df_gldas, on="Date", how="outer") \
    .merge(df_persiann, on="Date", how="outer")

# Rename columns
df.columns = ['Date', 'IMERG_mm', 'ERA5_mm', 'CHIRPS_mm', 'GLDAS_mm', 'PERSIANN_mm']

# Save as CSV
output_folder = "Precipitation_Data"
os.makedirs(output_folder, exist_ok=True)
output_path = os.path.join(output_folder, "Precipitation_Lankien.csv")

df.to_csv(output_path, index=False)
print(f"Precipitation data saved to {output_path}")


# ERA5 

In [13]:
import ee
import pandas as pd
import os

# Initialize Earth Engine
ee.Initialize(project="gee-ndvi-berhe")

# Define location (Lankien, South Sudan)
latitude, longitude = 8.4281, 33.7833
roi = ee.Geometry.Point([longitude, latitude])

# Define time range
start_date = '2015-01-01'
end_date = '2024-12-31'

# ERA5 Daily Precipitation Dataset
era5 = ee.ImageCollection('ECMWF/ERA5_LAND/DAILY_AGGR') \
    .filterDate(start_date, end_date) \
    .filterBounds(roi) \
    .select('total_precipitation_sum')

# Function to extract daily precipitation values
def extract_precipitation(img):
    date = img.date().format("YYYY-MM-dd")
    value = img.reduceRegion(
        reducer=ee.Reducer.mean(),
        geometry=roi,
        scale=10000,
        bestEffort=True
    ).get('total_precipitation_sum')

    return ee.Feature(None, {'date': date, 'precipitation_mm': value})

# Apply extraction
precip_fc = ee.FeatureCollection(era5.map(extract_precipitation))

# Convert FeatureCollection to list
precip_list = precip_fc.toList(precip_fc.size()).getInfo()

# Convert to Pandas DataFrame
df = pd.DataFrame([f['properties'] for f in precip_list])

# Handle missing values
df.dropna(inplace=True)

# Save as CSV
output_folder = "Precipitation_Data"
os.makedirs(output_folder, exist_ok=True)
output_path = os.path.join(output_folder, "ERA5_Daily_Precipitation_Lankien.csv")

df.to_csv(output_path, index=False)
print(f"ERA5 Precipitation data saved to {output_path}")


ERA5 Precipitation data saved to Precipitation_Data/ERA5_Daily_Precipitation_Lankien.csv


# Code for CHIRPS Daily Precipitation Data Extraction:

In [None]:
import ee
import pandas as pd
import os

# Authenticate and initialize Earth Engine
ee.Authenticate()
ee.Initialize(project="gee-ndvi-berhe")

# Define location (Lankien, South Sudan)
latitude, longitude = 8.4281, 33.7833
roi = ee.Geometry.Point([longitude, latitude])

# Define time range
start_date = '2015-01-01'
end_date = '2024-12-31'

# Load CHIRPS daily precipitation data
precip_collection = ee.ImageCollection('UCSB-CHG/CHIRPS/DAILY') \
    .filterDate(start_date, end_date) \
    .filterBounds(roi)

# Function to extract precipitation values
def extract_precip(image):
    date = image.date().format("YYYY-MM-dd")
    precip_value = image.reduceRegion(
        reducer=ee.Reducer.mean(),
        geometry=roi,
        scale=500,
        bestEffort=True
    ).get('precipitation')
    return ee.Feature(None, {'date': date, 'precipitation': precip_value})

# Apply extraction
precip_features = precip_collection.map(extract_precip).getInfo()

# Collect extracted data
data = []
for feature in precip_features['features']:
    date = feature['properties']['date']
    precip = feature['properties']['precipitation']
    data.append([date, precip])

# Convert to Pandas DataFrame
df = pd.DataFrame(data, columns=['Date', 'Precipitation_mm'])

# Save as CSV
output_folder = "Precipitation_Data"
os.makedirs(output_folder, exist_ok=True)
output_path = os.path.join(output_folder, "CHIRPS_Precipitation_Lankien.csv")

df.to_csv(output_path, index=False)
print(f"Precipitation data saved to {output_path}")


# Code for IMERG Daily Precipitation Data Extraction:

In [18]:
import ee
import pandas as pd
import os

# Authenticate and initialize Earth Engine
ee.Authenticate()
ee.Initialize(project="gee-ndvi-berhe")

# Define location (Lankien, South Sudan)
latitude, longitude = 8.4281, 33.7833
roi = ee.Geometry.Point([longitude, latitude])

# Define time range
start_date = '2015-01-01'
end_date = '2024-12-31'

# Load IMERG precipitation data (NASA/GPM_L3/IMERG_V06)
precip_collection = ee.ImageCollection('NASA/GPM_L3/IMERG_V06') \
    .filterDate(start_date, end_date) \
    .filterBounds(roi)

# Function to extract precipitation values
def extract_precip(image):
    date = image.date().format("YYYY-MM-dd")
    precip_value = image.reduceRegion(
        reducer=ee.Reducer.mean(),
        geometry=roi,
        scale=500,
        bestEffort=True
    ).get('precipitationCal')  # 'precipitationCal' is the correct band for precipitation
    return ee.Feature(None, {'date': date, 'precipitation': precip_value})

# Function to extract data in chunks (monthly or yearly)
def extract_data_in_chunks(collection, chunk_size='year'):
    data = []
    # Define time range for chunks
    if chunk_size == 'month':
        years = range(2015, 2025)
        for year in years:
            start = f'{year}-01-01'
            end = f'{year}-12-31'
            chunk = collection.filterDate(start, end)
            features = chunk.map(extract_precip).getInfo()
            for feature in features['features']:
                date = feature['properties']['date']
                precip = feature['properties']['precipitation']
                data.append([date, precip])
    else:
        # You can also define other chunking strategies, e.g., monthly
        pass
    return data

# Extract precipitation data in chunks
data = extract_data_in_chunks(precip_collection, chunk_size='year')

# Convert to Pandas DataFrame
df = pd.DataFrame(data, columns=['Date', 'Precipitation_mm'])

# Save as CSV
output_folder = "IMERG_Precipitation"
os.makedirs(output_folder, exist_ok=True)
output_path = os.path.join(output_folder, "IMERG_Precipitation_Lankien.csv")

df.to_csv(output_path, index=False)
print(f"Precipitation data saved to {output_path}")


Precipitation data saved to IMERG_Precipitation/IMERG_Precipitation_Lankien.csv


In [1]:
import ee
import pandas as pd
import os

# Authenticate and initialize Earth Engine
ee.Authenticate()
ee.Initialize(project="gee-ndvi-berhe")

# Define location (Lankien, South Sudan)
latitude, longitude = 8.4281, 33.7833
roi = ee.Geometry.Point([longitude, latitude])

# Define time range
start_date = '2015-01-01'
end_date = '2024-12-31'

# Load IMERG precipitation data (NASA/GPM_L3/IMERG_V06)
precip_collection = ee.ImageCollection('NASA/GPM_L3/IMERG_V06') \
    .filterDate(start_date, end_date) \
    .filterBounds(roi)

# Function to extract precipitation values
def extract_precip(image):
    date = image.date().format("YYYY-MM-dd")
    precip_value = image.reduceRegion(
        reducer=ee.Reducer.mean(),
        geometry=roi,
        scale=500,
        bestEffort=True
    ).get('precipitationCal')  # 'precipitationCal' is the correct band for precipitation
    return ee.Feature(None, {'date': date, 'precipitation': precip_value})

# Function to extract data by month (monthly)
def extract_data_by_month(collection):
    data = []
    years = range(2015, 2025)
    for year in years:
        for month in range(1, 13):
            start = f'{year}-{month:02d}-01'
            end = f'{year}-{month:02d}-28'  # or use 30 or 31 depending on the month
            chunk = collection.filterDate(start, end)
            features = chunk.map(extract_precip).getInfo()
            for feature in features['features']:
                date = feature['properties']['date']
                precip = feature['properties']['precipitation']
                data.append([date, precip])
    return data

# Extract precipitation data by month
data = extract_data_by_month(precip_collection)

# Convert to Pandas DataFrame
df = pd.DataFrame(data, columns=['Date', 'Precipitation_mm'])

# Save as CSV
output_folder = "IMERG_Precipitation"
os.makedirs(output_folder, exist_ok=True)
output_path = os.path.join(output_folder, "IMERG_Precipitation_Lankien.csv")

df.to_csv(output_path, index=False)
print(f"Precipitation data saved to {output_path}")



Attention required for NASA/GPM_L3/IMERG_V06! You are using a deprecated asset.
To make sure your code keeps working, please update it.
Learn more: https://developers.google.com/earth-engine/datasets/catalog/NASA_GPM_L3_IMERG_V06


KeyboardInterrupt



# Calculate SPI

In [None]:
import ee
import pandas as pd
import os
import datetime

# Authenticate and initialize Earth Engine
ee.Authenticate()
ee.Initialize(project="gee-ndvi-berhe")

# Define location (Lankien, South Sudan)
latitude, longitude = 8.4281, 33.7833
roi = ee.Geometry.Point([longitude, latitude])

# Define time range
start_date = '2015-01-01'
end_date = datetime.date.today().strftime('%Y-%m-%d')  # Get today's date in 'YYYY-MM-DD' format

# Precipitation data from CHIRPS (daily data)
precipitation_collection = ee.ImageCollection('UCSB-CHG/CHIRPS/PENTAD') \
    .filterDate(start_date, end_date) \
    .filterBounds(roi)

# Function to extract daily precipitation values
def extract_precipitation(image):
    date = image.date().format("YYYY-MM-dd")  # Format the date
    precipitation_value = image.reduceRegion(
        reducer=ee.Reducer.mean(),
        geometry=roi,
        scale=5000,  # CHIRPS has 5 km resolution
        bestEffort=True
    ).get('precipitation')  # 'precipitation' is the band for rainfall
    return ee.Feature(None, {'date': date, 'precipitation': precipitation_value})

# Extract daily precipitation values
precipitation_features = precipitation_collection.map(extract_precipitation).getInfo()

# Combine the extracted features into a dataframe
data = []
for feature in precipitation_features['features']:
    date = feature['properties']['date']
    precipitation = feature['properties']['precipitation']
    data.append([date, precipitation])

# Convert to Pandas DataFrame
df = pd.DataFrame(data, columns=['Date', 'Precipitation'])

# Save as CSV
output_folder = "Precipitation_Data"
os.makedirs(output_folder, exist_ok=True)
output_path = os.path.join(output_folder, "CHIRPS_Daily_Precipitation_Lankien.csv")

df.to_csv(output_path, index=False)
print(f"Daily precipitation data saved to {output_path}")


In [None]:
import numpy as np
import scipy.stats as stats

# Function to calculate SPI for a given precipitation data series
def calculate_spi(precipitation_data, scale=1):
    # Fit the data to a Gamma distribution
    shape, loc, scale = stats.gamma.fit(precipitation_data)
    
    # Calculate the cumulative distribution function (CDF) values
    cdf = stats.gamma.cdf(precipitation_data, shape, loc, scale)
    
    # Calculate the SPI as the inverse of the CDF (standard normal deviate)
    spi = stats.norm.ppf(cdf)
    
    return spi

# Example usage with precipitation data from CSV
precipitation_data = df['Precipitation'].dropna().values
spi_values = calculate_spi(precipitation_data)

# Add SPI values to the DataFrame
df['SPI'] = spi_values

# Save SPI results as CSV
spi_output_path = os.path.join(output_folder, "SPI_Lankien.csv")
df.to_csv(spi_output_path, index=False)
print(f"SPI data saved to {spi_output_path}")
