In [49]:
import pandas as pd
import numpy as np
import pyreadr
from pathlib import Path
import plotly.express as px

In [50]:
#paths
fig_path = Path("../figures")
df_path = Path("../dataframes")
base_path = Path("/home/users/laura.owen/old-home/data/users/lowen/extremes/heatwaves/HadUKGrid/dur-clim/probs")

# Load UK coords
result = pyreadr.read_r("/data/users/laura.owen/extremes/heatwaves/HadUKGrid/dur-clim/coords/UK_even_coords.RData")
lons = result["even_xorder_land_lon_indices"]
lats = result["even_xorder_land_lat_indices"]
UK_df = pd.DataFrame({
    "lon_index": lons.values.flatten(),
    "lat_index": lats.values.flatten()
})

#take subset for testing
#UK_df = UK_df.iloc[0:100, :]

# Year and return period info
yrs = [1980, 2080]
RPy = [0.5, 2, 5, 10, 20, 50, 100, 200, 500, 1000] #10RP levels
do_dur = list(range(1, 10)) #9 durations
stens = ["01"] # currently just have ens 01 for UK wide
#stens = ["01", "04", "05", "06", "07", "08", "09", "10", "11", "12", "13", "15"] #12 ensembles

# Empty dataframes to collect results
severity_df = []
mseverity_df = []
peakvalue_df = []
duration_df = []
tha_df = []
mhwt_df= []


In [51]:
skipped_levxally = 0
skipped_nDeqdally = 0
skipped_tha = 0
skipped_mhwt = 0
processed_levxally = 0
processed_nDeqdally = 0
processed_tha = 0
processed_mhwt = 0

for _, row in UK_df.iterrows():
    xco = str(row["lon_index"])
    yco = str(row["lat_index"])

    for st in stens:
        levxally_path = base_path / f"{st}/levxally_point_{xco}_{yco}_{st}.rds"
        nDeqdally_path = base_path / f"{st}/nDeqdally_point_{xco}_{yco}_{st}.rds"
        tha_path = base_path / f"{st}/tha_point_{xco}_{yco}_{st}.rds"
        mhwt_path = base_path / f"{st}/mhwt_point_{xco}_{yco}_{st}.rds"

        if levxally_path.exists():
            levxally = pyreadr.read_r(levxally_path)[None]  # shape (3, 10, n_years)
            severity_dict = {"xco": xco, "yco": yco, "ensemble": st}
            mseverity_dict = {"xco": xco, "yco": yco, "ensemble": st}
            peakvalue_dict = {"xco": xco, "yco": yco, "ensemble": st}
            expected_years = [1980, 2080]
            actual_years = expected_years[:levxally.shape[2]]  
            for rp_idx in range(10):
                for year in expected_years:
                    col_name = f"rp{rp_idx+1}_{year}"
                    severity_dict[col_name] = np.nan
                    mseverity_dict[col_name] = np.nan
                    peakvalue_dict[col_name] = np.nan
            for rp_idx in range(10):
                for year_idx, year in enumerate(actual_years):
                    col_name = f"rp{rp_idx+1}_{year}"
                    severity_dict[col_name] = levxally[0, rp_idx, year_idx].item()
                    mseverity_dict[col_name] = levxally[1, rp_idx, year_idx].item()
                    peakvalue_dict[col_name] = levxally[2, rp_idx, year_idx].item()
            severity_df.append(severity_dict)
            mseverity_df.append(mseverity_dict)
            peakvalue_df.append(peakvalue_dict)
            processed_levxally += 1
        else:
            skipped_levxally += 1

        if nDeqdally_path.exists():
            nDeqdally = pyreadr.read_r(nDeqdally_path)[None]  # Expected shape (durations, years)
            duration_dict = {"xco": xco, "yco": yco, "ensemble": st}
            for dur_idx, dur in enumerate(do_dur):
                for year in yrs:
                    col_name = f"duration{dur}_{year}"
                    duration_dict[col_name] = np.nan
            n_dur = nDeqdally.shape[0]
            n_years = nDeqdally.shape[1]
            for dur_idx in range(n_dur):
                for year_idx in range(n_years):
                    if dur_idx < len(do_dur) and year_idx < len(yrs):
                        col_name = f"duration{do_dur[dur_idx]}_{yrs[year_idx]}"
                        try:
                            duration_dict[col_name] = nDeqdally.iloc[dur_idx, year_idx]
                        except Exception as e:
                            print(f"⚠️ Error reading nDeqdally at dur_idx={dur_idx}, year_idx={year_idx}: {e}")
            duration_df.append(duration_dict)
            processed_nDeqdally += 1
        else:
            skipped_nDeqdally += 1

        if tha_path.exists():
            tha = pyreadr.read_r(tha_path)[None].squeeze()
            tha_years = [1980, 2020, 2080]
            tha_dict = {"xco": xco, "yco": yco, "ensemble": st}
            for year in tha_years:
                tha_dict[f"tha{year}"] = np.nan
            if len(tha) == 3:
                for idx, year in enumerate(tha_years):
                    tha_dict[f"tha{year}"] = tha.iloc[idx]
                processed_tha += 1
            elif len(tha) == 2:
                tha_dict[f"tha1980"] = tha.iloc[0]
                tha_dict[f"tha2080"] = tha.iloc[1]
                processed_tha += 1
            else:
                print(f"⚠️ Length mismatch! tha has length {len(tha)} at x={xco}, y={yco}, st={st}")
                skipped_tha += 1

            tha_df.append(tha_dict)
        else:
            skipped_tha += 1

        if mhwt_path.exists():
            mhwt = pyreadr.read_r(mhwt_path)[None].squeeze()
            mhwt_years = [1980, 2020, 2080]
            mhwt_dict = {"xco": xco, "yco": yco, "ensemble": st}
            for year in mhwt_years:
                mhwt_dict[f"mhwt{year}"] = np.nan
            for idx in range(len(mhwt)):
                if idx < len(mhwt_years):
                    mhwt_dict[f"mhwt{mhwt_years[idx]}"] = mhwt.iloc[idx]
                else:
                    print(f"⚠️ Extra data in mhwt beyond expected years at x={xco}, y={yco}, st={st}")
            mhwt_df.append(mhwt_dict)
            processed_mhwt += 1
        else:
            skipped_mhwt += 1

print(f"Levxally files: processed={processed_levxally}, skipped={skipped_levxally}")
print(f"nDeqdally files: processed={processed_nDeqdally}, skipped={skipped_nDeqdally}")
print(f"Tha files: processed={processed_tha}, skipped={skipped_tha}")
print(f"Mhwt files: processed={processed_mhwt}, skipped={skipped_mhwt}")

⚠️ Length mismatch! tha has length 51 at x=146, y=44, st=01
⚠️ Length mismatch! tha has length 51 at x=147, y=46, st=01
⚠️ Length mismatch! tha has length 51 at x=148, y=44, st=01
Levxally files: processed=5106, skipped=92
nDeqdally files: processed=5106, skipped=92
Tha files: processed=5103, skipped=95
Mhwt files: processed=5112, skipped=86


In [53]:
# Convert results to DataFrames
severity_df = pd.DataFrame(severity_df)
mseverity_df = pd.DataFrame(mseverity_df)
peakvalue_df = pd.DataFrame(peakvalue_df)
duration_df = pd.DataFrame(duration_df)
tha_df = pd.DataFrame(tha_df)
mhwt_df = pd.DataFrame(mhwt_df)

# print(severity_df)
# print(duration_df)
# print(tha_df)
# print(mhwt_df)

#save as pandas dfs
severity_df.to_csv(df_path / "severity_df.csv", index=False)
mseverity_df.to_csv(df_path / "mseverity_df.csv", index=False)
peakvalue_df.to_csv(df_path / "peakvalue_df.csv", index=False)
duration_df.to_csv(df_path / "duration_df.csv", index=False)
tha_df.to_csv(df_path / "tha_df.csv", index=False)
mhwt_df.to_csv(df_path / "mhwt_df.csv", index=False)