In [1]:
import pandas as pd
import numpy as np
import requests
import time
import os
import csv
from io import StringIO
import urllib3
from datetime import datetime, timedelta
import sys
from datetime import date
import math
import statistics
from math import exp
from scipy.stats import tmean, tstd

##### **PMP to PFE comparison**

In [3]:
# Function to compare PMP to PFE
def pmp2pfe_stats(pfe_folder, pmp_file_path, output_csv):
    # Step 1: Process pfe_files
    pfe_data = {}
    for filename in os.listdir(pfe_folder):
        if filename.endswith(".csv"):
            parts = filename.split("_")
            if len(parts) >= 4:
                station_id = parts[2]
                file_path = os.path.join(pfe_folder, filename)

                # Read the PFE file
                df = pd.read_csv(file_path, skiprows=1)
                df.columns = ["duration", "Y1", "Y2", "Y5", "Y10", "Y25", "Y50", "Y100", "Y200", "Y500", "Y1000"]

                # Filter and rename rows
                df = df[df["duration"].isin([
                    "30-min:", "60-min:", "2-hr:", "3-hr:", "6-hr:", "12-hr:", "24-hr:", "2-day:", "3-day:"])]
                duration_mapping = {
                    "30-min:": "30-min", "60-min:": "1-hour", "2-hr:": "2-hour", "3-hr:": "3-hour",
                    "6-hr:": "6-hour", "12-hr:": "12-hour", "24-hr:": "24-hour", "2-day:": "48-hour", "3-day:": "72-hour"
                }
                df["duration"] = df["duration"].map(duration_mapping)
                df.set_index("duration", inplace=True)
                pfe_data[station_id] = df

    # Step 2: Process pmp_file
    pmp_df = pd.read_csv(pmp_file_path)
    pmp_df["ID"] = pmp_df["ID"].astype(str)
    durations = ["30-min", "1-hour", "2-hour", "3-hour", "6-hour", "12-hour", "24-hour", "48-hour", "72-hour"]
    pmp_df[durations] = pmp_df[durations].div(25.4)  # Convert mm to inches

    # Step 3: Calculate RP_ranges
    rp_ranges = ["<Y1", "Y1 - Y2", "Y2 - Y5", "Y5 - Y10", "Y10 - Y25", "Y25 - Y50", "Y50 - Y100", "Y100 - Y200", "Y200 - Y500", "Y500 - Y1000", ">Y1000"]
    output_data = {"RP_range": rp_ranges}

    for duration in durations:
        freq = {key: 0 for key in rp_ranges}
        total_ids = len(pmp_df)

        for _, row in pmp_df.iterrows():
            station_id = row["ID"]
            value = row[duration]

            if station_id in pfe_data:
                pfe_df = pfe_data[station_id]
                pfe_row = pfe_df.loc[duration]

                for i, col in enumerate(["Y1", "Y2", "Y5", "Y10", "Y25", "Y50", "Y100", "Y200", "Y500", "Y1000"]):
                    if value < pfe_row[col]:
                        range_key = rp_ranges[i]
                        freq[range_key] += 1
                        break
                else:
                    freq[">Y1000"] += 1

        freq_counts = [freq[key] for key in rp_ranges]
        freq_percentages = [count / total_ids * 100 for count in freq_counts]

        output_data[f"freq_{duration.replace('-', '_')}"] = freq_counts
        output_data[f"percent_{duration.replace('-', '_')}"] = freq_percentages

    # Step 4: Write the output CSV
    output_df = pd.DataFrame(output_data)
    output_df.insert(0, "SN", range(1, len(output_df) + 1))  # Add the "SN" column at the beginning
    output_df.to_csv(output_csv, index=False)


# Define folder path for pfe files 
pfe_folder = "out_trim_pfe_files"

# # Station Annual Maximums PMP
# pmp_file = "annual_max_pmp_stn.csv"
# output_file = "return_period_annual_max_pmp_stn.csv"

# # IMERG Annual Maximums PMP
# pmp_file = "annual_max_pmp.csv"
# output_file = "return_period_annual_max_pmp.csv"

# # IMERG Alltime Maximums PMP
# pmp_file = "alltime_max_pmp.csv"
# output_file = "return_period_alltime_max_pmp.csv"

# # IMERG Alltime Maximums PMP
# pmp_file = "alltime_partial_duration_max_pmp.csv"
# output_file = "return_period_alltime_partial_duration_max_pmp.csv"

# IMERG Enhanced PMP from Curve Fit
pmp_file = "zzz5_curve_fitted_pmp.csv"
output_file = "zzz6_return_period_curve_fitted_pmp.csv"

# Run the function
pmp2pfe_stats(pfe_folder, pmp_file, output_file)

print("PMP to PFE Range Comparison complete. Output saved to", output_file)


PMP to PFE Range Comparison complete. Output saved to zzz6_return_period_curve_fitted_pmp.csv
