In [None]:
################################## KENNETH EKPETERE #################################
################################# PMP IMPLEMENTATION ###############################
##################################### (C) 2024  ########################################

In [1]:
import time
import pandas as pd
import numpy as np
import sys
from datetime import date
from datetime import datetime, timedelta
import math
import statistics
from math import exp
from scipy.stats import tmean, tstd

##### **HERSHFIELD PMP (Station and IMERG AMS)**

In [2]:
# Annual Maximums to PMP Computation
input_file = "annual_max.csv"
output_file = "annual_max_pmp.csv"

# Read input data
data = pd.read_csv(input_file, float_precision='round_trip')

# Subset columns to process
subset_columns = ['30-min', '1-hour', '2-hour', '3-hour', '6-hour', '12-hour', '24-hour', '48-hour', '72-hour']

# Filter rows for years between 2000 and 2024 (inclusive)
data_filtered = data[(data['year'] >= 2000) & (data['year'] <= 2024)]

# Group data by unique ID
grouped = data_filtered.groupby('ID')

# Initialize empty list to hold DataFrame chunks
output_chunks = []

# Loop through each group
for name, group in grouped:
    # Compute maximum value for each column
    max_pre = group[subset_columns].max()
    
    # Compute mean for each column
    mean_pre = group[subset_columns].mean()
    
    # Compute standard deviation for each column
    std_pre = group[subset_columns].std()
    
    # Compute trimmed mean for each column
    trimmed_mean = group[subset_columns].apply(lambda x: np.mean(np.sort(x)[:-1]))
    
    # Compute trimmed standard deviation for each column
    trimmed_std = group[subset_columns].apply(lambda x: np.std(np.sort(x)[:-1], ddof=1))
    
    # Compute frequency factor for each column
    freqfact = ((max_pre - trimmed_mean) / trimmed_std)
    
    # Compute HPMP for each column
    HPMP = mean_pre + (freqfact * std_pre)
    
    # Create DataFrame chunk for this group
    output_chunk = pd.DataFrame({
        'ID': name,
        '30-min': HPMP['30-min'],
        '1-hour': HPMP['1-hour'],
        '2-hour': HPMP['2-hour'],
        '3-hour': HPMP['3-hour'],
        '6-hour': HPMP['6-hour'],
        '12-hour': HPMP['12-hour'],
        '24-hour': HPMP['24-hour'],
        '48-hour': HPMP['48-hour'],
        '72-hour': HPMP['72-hour']
    }, index=[0])  # Ensure each chunk has only one row
    
    # Append chunk to list
    output_chunks.append(output_chunk)

# Concatenate all chunks into final DataFrame
output_data = pd.concat(output_chunks, ignore_index=True)

# Write output DataFrame to CSV
output_data.to_csv(output_file, index=False)
print("Hershfield PMP computation complete")


Hershfield PMP computation complete


##### **HERSHFIELD PMP (IMERG All-time Maximums)**

In [3]:
# Alltime Maximums to PMP Computation
input_file = "alltime_max.csv"
output_file = "alltime_max_pmp.csv"

# Read input data
data = pd.read_csv(input_file, float_precision='round_trip')

# Subset columns to process
subset_columns = ['30-min', '1-hour', '2-hour', '3-hour', '6-hour', '12-hour', '24-hour', '48-hour', '72-hour']

# Group data by unique ID
grouped = data.groupby('ID')

# Initialize empty list to hold DataFrame chunks
output_chunks = []

# Loop through each group
for name, group in grouped:
    # Compute maximum value for each column
    max_pre = group[subset_columns].max()
    
    # Compute mean for each column
    mean_pre = group[subset_columns].mean()
    
    # Compute standard deviation for each column
    std_pre = group[subset_columns].std()
    
    # Compute trimmed mean for each column
    trimmed_mean = group[subset_columns].apply(lambda x: np.mean(np.sort(x)[:-1]))
    
    # Compute trimmed standard deviation for each column
    trimmed_std = group[subset_columns].apply(lambda x: np.std(np.sort(x)[:-1], ddof=1))
    
    # Compute frequency factor for each column
    freqfact = ((max_pre - trimmed_mean) / trimmed_std)
    
    # Compute HPMP for each column
    HPMP = mean_pre + (freqfact * std_pre)
    
    # Create DataFrame chunk for this group
    output_chunk = pd.DataFrame({
        'ID': name,
        '30-min': HPMP['30-min'],
        '1-hour': HPMP['1-hour'],
        '2-hour': HPMP['2-hour'],
        '3-hour': HPMP['3-hour'],
        '6-hour': HPMP['6-hour'],
        '12-hour': HPMP['12-hour'],
        '24-hour': HPMP['24-hour'],
        '48-hour': HPMP['48-hour'],
        '72-hour': HPMP['72-hour']
    }, index=[0])  # Ensure each chunk has only one row
    
    # Append chunk to list
    output_chunks.append(output_chunk)

# Concatenate all chunks into final DataFrame
output_data = pd.concat(output_chunks, ignore_index=True)

# Write output DataFrame to CSV
output_data.to_csv(output_file, index=False)
print("Hershfield Alltime PMP computation complete")


Hershfield Alltime PMP computation complete


##### **HERSHFIELD PMP (IMERG's Partial Duration Maximums)**

In [4]:
# Alltime Maximums to PMP Computation
input_file = "alltime_partial_duration_max.csv"
output_file = "alltime_partial_duration_max_pmp.csv"

# Read input data
data = pd.read_csv(input_file, float_precision='round_trip')

# Subset columns to process
subset_columns = ['30-min', '1-hour', '2-hour', '3-hour', '6-hour', '12-hour', '24-hour', '48-hour', '72-hour']

# Group data by unique ID
grouped = data.groupby('ID')

# Initialize empty list to hold DataFrame chunks
output_chunks = []

# Loop through each group
for name, group in grouped:
    # Compute maximum value for each column
    max_pre = group[subset_columns].max()
    
    # Compute mean for each column
    mean_pre = group[subset_columns].mean()
    
    # Compute standard deviation for each column
    std_pre = group[subset_columns].std()
    
    # Compute trimmed mean for each column
    trimmed_mean = group[subset_columns].apply(lambda x: np.mean(np.sort(x)[:-1]))
    
    # Compute trimmed standard deviation for each column
    trimmed_std = group[subset_columns].apply(lambda x: np.std(np.sort(x)[:-1], ddof=1))
    
    # Compute frequency factor for each column
    freqfact = ((max_pre - trimmed_mean) / trimmed_std)
    
    # Compute HPMP for each column
    HPMP = mean_pre + (freqfact * std_pre)
    
    # Create DataFrame chunk for this group
    output_chunk = pd.DataFrame({
        'ID': name,
        '30-min': HPMP['30-min'],
        '1-hour': HPMP['1-hour'],
        '2-hour': HPMP['2-hour'],
        '3-hour': HPMP['3-hour'],
        '6-hour': HPMP['6-hour'],
        '12-hour': HPMP['12-hour'],
        '24-hour': HPMP['24-hour'],
        '48-hour': HPMP['48-hour'],
        '72-hour': HPMP['72-hour']
    }, index=[0])  # Ensure each chunk has only one row
    
    # Append chunk to list
    output_chunks.append(output_chunk)

# Concatenate all chunks into final DataFrame
output_data = pd.concat(output_chunks, ignore_index=True)

# Write output DataFrame to CSV
output_data.to_csv(output_file, index=False)
print("Hershfield Partial Duration PMP computation complete")


Hershfield Partial Duration PMP computation complete
