In [1]:
import rioxarray as rxr
import pandas as pd
import os
import numpy as np
from scipy.stats import gaussian_kde
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
IDir = r'/Volumes/PhD/imagery/masters/output/MSM1/'
ODir = r'/Volumes/PhD/imagery/masters/output/ROC/design_storm/'

In [3]:
def MSM1_stats(basin, RI, image, plot = False):
    
    i_flat = image.values.flatten()
    pixarr = np.empty((i_flat.shape[0]), dtype= float)
    pixarr[:] = np.nan
    x = np.where(np.isnan(i_flat) == False)
        
    
    i_data = i_flat[x]
    mean_value = np.nanmean(i_data)
    median_value = np.nanmedian(i_data)
    kde = gaussian_kde(i_data)

    
    x_grid = np.linspace(i_data.min(), i_data.max(), 1000)

    
    kde_values = kde(x_grid)
   
    peak_index = kde_values.argmax()  
    peak_frequency = x_grid[peak_index]
    
    valid_data = i_flat[~np.isnan(i_flat)]

   
    percent_above_0_5 = np.sum(valid_data > 0.5) / len(valid_data) 
    percent_above_0_9 = np.sum(valid_data > 0.9) / len(valid_data) 

    basin_list.append(basin)
    RI_list.append(RI)
    mean_list.append(mean_value)
    median_list.append(median_value)
    peak_frequency_list.append(peak_frequency)
    above_50.append(percent_above_0_5)
    above_90.append(percent_above_0_9)

    if plot == True:
        bin = np.arange(0, 1.05, 0.05)
        plt.figure(figsize=(10, 6))
        sns.histplot(valid_data, bins=bin, kde=True, alpha=.1)
        plt.axvline(x=mean_value, color='m', linestyle='--', label = f'Mean: {mean_value: .3f}')
        plt.axvline(x=median_value, color='b', linestyle='--', label = f'Median: {median_value: .3f}')
        plt.axvline(x=peak_frequency, color='y', linestyle='--', label = f'Peak Freq: {peak_frequency: .3f}')

        #plt.hist(valid_data, bins=bin, color='blue', alpha=0.7, edgecolor='black')  # Adjust bins as needed
        plt.title(f"Basin {basin} MSM1 Distribution")
        plt.xlabel("Probability")
        plt.ylabel("Frequency")
        plt.grid(axis='y', linestyle='--', alpha=0.7)
        plt.legend(loc='best')
        #plt.show()
        plt.savefig(os.path.join(ODir, f'{basin}_MSM1_Dist.png'), dpi = 300)
        plt.close()
        
   




In [4]:
basin_list = []
RI_list = []
mean_list = []
median_list = []
peak_frequency_list = []
above_50 = []
above_90 = []

for filename in os.listdir(IDir):
    if filename.startswith("._"):
        print(f"Skipping file: {filename}")
        continue
    file = os.path.join(IDir, filename)
    basin = filename.split('_')
    RI = basin[2]
    RI = RI.split('.')
    RI = RI[0]
    RI = RI[0:2]
    basin = basin[0]
    image = rxr.open_rasterio(file)
    MSM1_stats(basin, RI, image, False)
    
    
    


In [5]:
MSM1_df = {
    'BASIN_ID': basin_list,
    'RI': RI_list,
    'MSM1P_Mean': mean_list,
    'MSM1P_Med': median_list,
    'MSM1P_PF': peak_frequency_list,
    'MSM1_>50': above_50,
    'MSM1_>90': above_90
}
MSM1_df = pd.DataFrame(MSM1_df)
MSM1_df

Unnamed: 0,BASIN_ID,RI,MSM1P_Mean,MSM1P_Med,MSM1P_PF,MSM1_>50,MSM1_>90
0,10009,12,0.240171,0.221695,0.200251,0.000000,0.000000
1,10009,16,0.414237,0.385938,0.346617,0.293333,0.000000
2,10009,20,0.604781,0.581024,0.531380,0.786667,0.000000
3,10009,24,0.763173,0.753687,0.710027,1.000000,0.066667
4,10009,28,0.870135,0.870993,0.944994,1.000000,0.400000
...,...,...,...,...,...,...,...
13883,9997,24,0.892331,0.912209,0.934131,1.000000,0.577485
13884,9997,28,0.953307,0.965646,0.975238,1.000000,0.916179
13885,9997,32,0.980401,0.987019,0.990935,1.000000,0.996589
13886,9997,36,0.991863,0.995162,0.996640,1.000000,1.000000


In [6]:
MSM1_df.to_csv(os.path.join(ODir, 'MSM1_design-storm.csv'))