In [None]:
################################## KENNETH EKPETERE ####################################
############################## HYBRID DDF IMPLEMENTATION #########################
##################################### (C) 2024  ########################################

In [1]:
import time
import pandas as pd
import numpy as np
import sys
from datetime import date
from datetime import datetime, timedelta
import math
import statistics
from math import exp
from scipy.stats import tmean, tstd
from scipy.stats import gumbel_r, genextreme, expon, genpareto

In [7]:

# Define the return periods and corresponding multipliers (for Gumbel as reference)
return_periods = {
    1: -1.133,
    2: -0.164,
    5: 0.720,
    10: 1.305,
    22: 1.943,
    25: 2.045,
    50: 2.594,
    100: 3.138,
    200: 3.681,
    500: 4.397,
    1000: 4.938
}

# Read the input CSV file
input_file = './DataFolder/AMS_imerg.csv'
data = pd.read_csv(input_file)

# List of precipitation columns
precipitation_columns = ["pre30m", "pre1h", "pre2h", "pre3h", "pre6h", "pre12h", "pre24h", "pre48h", "pre72h"]

# Initialize an empty list to store the result
results = []

# Define weights for the hybrid PDF components
weights = {
    'gumbel': 0.5,
    'gev': 0.2,
    'exponential': 0.15,
    'gpd': 0.15
}

# Function to compute the hybrid CDF
def hybrid_cdf(x, params):
    gumbel_cdf = gumbel_r.cdf(x, loc=params['gumbel']['loc'], scale=params['gumbel']['scale'])
    gev_cdf = genextreme.cdf(x, c=params['gev']['c'], loc=params['gev']['loc'], scale=params['gev']['scale'])
    exp_cdf = expon.cdf(x, scale=1/params['exponential']['rate'])
    gpd_cdf = genpareto.cdf(x, c=params['gpd']['c'], loc=params['gpd']['loc'], scale=params['gpd']['scale'])
    
    # Weighted sum of the CDFs
    hybrid_cdf_value = (
        weights['gumbel'] * gumbel_cdf +
        weights['gev'] * gev_cdf +
        weights['exponential'] * exp_cdf +
        weights['gpd'] * gpd_cdf
    )
    
    return hybrid_cdf_value

# Process each unique ID
for unique_id in data["ID"].unique():
    subset = data[data["ID"] == unique_id]
    
    # Calculate mean and standard deviation for the precipitation columns
    mean_pre = subset[precipitation_columns].mean()
    std_pre = subset[precipitation_columns].std()
    
    # Parameters for the distributions
    params = {
        'gumbel': {'loc': mean_pre, 'scale': std_pre},
        'gev': {'c': 0.1, 'loc': mean_pre, 'scale': std_pre},  # Adjust 'c' as needed
        'exponential': {'rate': 1/mean_pre},
        'gpd': {'c': 0.1, 'loc': mean_pre, 'scale': std_pre}   # Adjust 'c' as needed
    }
    
    # Calculate DDF values for each return period
    for rp, multiplier in return_periods.items():
        gumbel_ddf = mean_pre + (std_pre * multiplier)
        
        # Apply the hybrid CDF function
        hybrid_cdf_values = hybrid_cdf(gumbel_ddf, params)
        
        # Scale hybrid DDF values to match Gumbel range
        hybrid_ddf = gumbel_ddf * hybrid_cdf_values
        
        results.append([unique_id, rp] + hybrid_ddf.tolist())

# Create a DataFrame from the results
columns = ["ID", "RT"] + ["DDF" + col[3:] for col in precipitation_columns]
results_df = pd.DataFrame(results, columns=columns)

# Save the result to an output CSV file
output_file = 'Result/Hybrid_DDF_Output.csv'
results_df.to_csv(output_file, index=False)

print(f"Hybrid DDF calculations saved to {output_file}")


Hybrid DDF calculations saved to Result/Hybrid_DDF_Output.csv
