In [1]:
import rioxarray as rxr
import pandas as pd
import os
import numpy as np
from scipy.stats import gaussian_kde

In [2]:
IDir = r'/Volumes/PhD/imagery/masters/output/ROC/MSM1_ROC/'
ODir = r'/Volumes/PhD/imagery/masters/output/ROC/'

In [3]:
def MSM1_stats(basin, image):
    
    i_flat = image.values.flatten()
    pixarr = np.empty((i_flat.shape[0]), dtype= float)
    pixarr[:] = np.nan
    x = np.where(np.isnan(i_flat) == False)
        
    
    i_data = i_flat[x]
    mean_value = np.nanmean(i_data)
    median_value = np.nanmedian(i_data)
    kde = gaussian_kde(i_data)

    
    x_grid = np.linspace(i_data.min(), i_data.max(), 1000)

    
    kde_values = kde(x_grid)
   
    peak_index = kde_values.argmax()  
    peak_frequency = x_grid[peak_index]
    
    valid_data = i_flat[~np.isnan(i_flat)]

   
    percent_above_0_5 = np.sum(valid_data > 0.5) / len(valid_data) 
    percent_above_0_9 = np.sum(valid_data > 0.9) / len(valid_data) 

    basin_list.append(basin)
    mean_list.append(mean_value)
    median_list.append(median_value)
    peak_frequency_list.append(peak_frequency)
    above_50.append(percent_above_0_5)
    above_90.append(percent_above_0_9)
   




In [5]:
basin_list = []
mean_list = []
median_list = []
peak_frequency_list = []
above_50 = []
above_90 = []

for filename in os.listdir(IDir):
    if filename.startswith("._"):
        print(f"Skipping file: {filename}")
        continue
    file = os.path.join(IDir, filename)
    basin = filename.split('_')
    basin = basin[0]
    image = rxr.open_rasterio(file)
    MSM1_stats(basin, image)
    


Skipping file: ._17499_MSM1_20mmhr.tif
Skipping file: ._3115_MSM1_28mmhr.tif
Skipping file: ._3589_MSM1_28mmhr.tif
Skipping file: ._5683_MSM1_51mmhr.tif
Skipping file: ._5920_MSM1_24mmhr.tif
Skipping file: ._7185_MSM1_24mmhr.tif
Skipping file: ._7262_MSM1_39mmhr.tif
Skipping file: ._10810_MSM1_46mmhr.tif
Skipping file: ._10814_MSM1_46mmhr.tif
Skipping file: ._10865_MSM1_46mmhr.tif
Skipping file: ._10890_MSM1_46mmhr.tif
Skipping file: ._11112_MSM1_100mmhr.tif
Skipping file: ._11209_MSM1_36mmhr.tif
Skipping file: ._11514_MSM1_100mmhr.tif
Skipping file: ._11730_MSM1_36mmhr.tif
Skipping file: ._11857_MSM1_36mmhr.tif
Skipping file: ._11896_MSM1_100mmhr.tif
Skipping file: ._12057_MSM1_78mmhr.tif
Skipping file: ._12080_MSM1_100mmhr.tif
Skipping file: ._12288_MSM1_36mmhr.tif
Skipping file: ._12571_MSM1_36mmhr.tif
Skipping file: ._12729_MSM1_36mmhr.tif
Skipping file: ._12918_MSM1_24mmhr.tif
Skipping file: ._13132_MSM1_100mmhr.tif
Skipping file: ._13281_MSM1_78mmhr.tif
Skipping file: ._13378_MSM

In [6]:
MSM1_df = {
    'BASIN_ID': basin_list,
    'MSM1P_Mean': mean_list,
    'MSM1P_Med': median_list,
    'MSM1P_PF': peak_frequency_list,
    'MSM1_>50': above_50,
    'MSM1_>90': above_90
}
MSM1_df = pd.DataFrame(MSM1_df)

In [8]:
MSM1_df.to_csv(os.path.join(ODir, 'MSM1_ROC.csv'))