In [1]:
import pandas as pd
import numpy as np

In [12]:
# Load the CSV file
file_path = 'y_obs.csv'
data = pd.read_csv(file_path, parse_dates=[0])

# Set the unnamed first column (dates) as the index
data.set_index(data.columns[0],inplace=True)

In [13]:
data

Unnamed: 0_level_0,GRDC_6321100,GRDC_6321200,GRDC_6334270,GRDC_6334280,GRDC_6334800,GRDC_6335020,GRDC_6335030,GRDC_6335031,GRDC_6335032,GRDC_6335035,...,GRDC_6357010,GRDC_6357020,GRDC_6357500,GRDC_6357501,GRDC_6357502,GRDC_6357503,GRDC_6357505,GRDC_6357510,GRDC_6357520,GRDC_6357521
Unnamed: 0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2011-01-01,1.17,2.61,0.30,0.32,0.28,0.90,4.69,4.59,4.46,4.27,...,1.02,0.74,0.72,1.18,1.48,2.00,2.01,1.10,0.99,0.99
2011-01-02,0.43,0.85,0.00,0.00,0.00,0.81,1.64,1.72,1.42,0.99,...,0.46,0.14,0.59,1.27,2.09,2.81,2.82,1.58,1.80,1.80
2011-01-03,0.27,0.29,0.02,0.02,0.02,0.27,0.80,0.94,0.69,0.50,...,0.42,0.13,0.60,1.16,1.78,2.09,2.10,1.55,1.53,1.53
2011-01-04,0.25,0.06,3.65,3.96,2.86,0.12,0.14,0.09,0.10,0.17,...,0.20,0.09,0.11,0.10,0.09,0.14,0.14,0.06,0.06,0.06
2011-01-05,0.15,0.03,1.34,1.51,0.96,0.05,0.04,0.03,0.04,0.07,...,0.07,0.01,0.10,0.07,0.12,0.20,0.21,0.05,0.15,0.15
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-12-27,9.58,8.67,14.45,15.22,12.16,4.12,10.14,9.84,9.44,10.52,...,0.02,0.00,0.05,0.00,0.00,0.00,0.00,0.00,0.00,0.00
2020-12-28,0.91,1.85,4.44,4.74,3.57,5.72,3.81,3.58,4.18,5.58,...,0.48,0.32,0.73,0.74,1.15,1.59,1.60,0.81,0.96,0.96
2020-12-29,0.89,0.53,7.46,7.26,7.63,3.63,1.18,1.05,0.71,0.80,...,4.15,1.92,3.51,0.78,0.64,0.78,0.79,0.60,0.79,0.79
2020-12-30,1.08,0.83,5.89,6.17,5.47,1.04,1.44,1.02,0.86,1.22,...,1.29,0.00,1.89,0.00,0.00,0.00,0.00,0.00,0.00,0.00


In [19]:
# Function to calculate precipitation indices for a single catchment
def calculate_precipitation_indices(series):
    indices = {}

    # Total precipitation amount
    indices['prcptot'] = series.sum()

    # Number of days with daily precipitation amount above 1mm
    indices['RR1'] = (series > 1).sum()

    # Maximum 1-day precipitation amount
    indices['rx1day'] = series.max()

    # Maximum 5-days precipitation amount
    indices['rx5day'] = series.rolling(window=5).sum().max()

    # Number of consecutive wet days (daily precipitation ≥ 1 mm)
    def max_consecutive_wet_days(s):
        wet_days = (s >= 1).astype(int)
        return wet_days.groupby((wet_days != wet_days.shift()).cumsum()).cumsum().max()

    indices['cwd'] = max_consecutive_wet_days(series)

    # Number of days with daily precipitation amount above 20 mm
    indices['rr20mm'] = (series >= 20).sum()

    # Daily precipitation amount corresponding to the 90th percentile
    indices['r90p'] = series[series >= 1].quantile(0.9)

    # Daily precipitation amount corresponding to the 95th percentile
    indices['r95p'] = series[series >= 1].quantile(0.95)

    # Daily precipitation amount corresponding to the 99th percentile
    indices['r99p'] = series[series >= 1].quantile(0.99)

    # Frequency of rainy days exceeding the 90th percentile
    r90p_value = series[series >= 1].quantile(0.9)
    indices['r90pday'] = (series >= r90p_value).sum()

    # Frequency of rainy days exceeding the 95th percentile
    r95p_value = series[series >= 1].quantile(0.95)
    indices['r95pday'] = (series >= r95p_value).sum()

    # Frequency of rainy days exceeding the 99th percentile
    r99p_value = series[series >= 1].quantile(0.99)
    indices['r99pday'] = (series >= r99p_value).sum()

    # Magnitude of precipitation amount standardised over 95th percentile
    indices['nrr95p'] = (series / series.quantile(0.95)).mean()

    # Magnitude of precipitation amount standardised over 99th percentile
    indices['nrr99p'] = (series / series.quantile(0.99)).mean()

    # Dry Day Indices

    # Number of dry days (precipitation < 1 mm)
    indices['dry_days'] = (series < 1).sum()

    # Number of consecutive dry days (precipitation < 1 mm)
    def max_consecutive_dry_days(s):
        dry_days = (s < 1).astype(int)
        return dry_days.groupby((dry_days != dry_days.shift()).cumsum()).cumsum().max()

    indices['cdd'] = max_consecutive_dry_days(series)

    return pd.Series(indices)

# Function to calculate indices for each year and catchment
def calculate_yearly_indices(data):
    yearly_indices_list = []

    for year, group in data.groupby(data.index.year):
        for column in group.columns:
            catchment_data = group[column]
            indices = calculate_precipitation_indices(catchment_data)
            indices['Year'] = year
            indices['Catchment'] = column
            yearly_indices_list.append(indices)

    return pd.DataFrame(yearly_indices_list)



              prcptot    RR1  rx1day  rx5day   cwd  rr20mm     r90p      r95p  \
Catchment                                                                       
GRDC_6321100  839.773  157.2  25.740  57.325  11.3     2.3  10.5912  13.18615   
GRDC_6321200  896.479  167.9  27.354  58.117  12.2     1.6  10.3826  13.05340   
GRDC_6334270  875.065  160.2  27.347  54.955  10.8     2.0  10.5802  13.32980   
GRDC_6334280  884.859  162.1  27.463  55.642  11.1     2.2  10.5736  13.29750   
GRDC_6334800  838.328  157.3  27.285  52.950  10.3     2.2  10.4351  13.22830   
...               ...    ...     ...     ...   ...     ...      ...       ...   
GRDC_6357503  949.504  160.7  38.763  72.331  11.9     3.7  11.7943  15.40290   
GRDC_6357505  950.380  160.7  38.760  72.321  11.9     3.7  11.7946  15.42080   
GRDC_6357510  849.380  154.3  39.032  71.992  11.6     3.7  10.8879  14.38135   
GRDC_6357520  840.256  155.7  37.241  69.440  10.8     3.3  10.5361  13.92885   
GRDC_6357521  840.256  155.7

In [20]:
# Calculate indices for each year and each catchment
yearly_indices = calculate_yearly_indices(data)

# Calculate the average indices over all years for each catchment
average_indices = yearly_indices.groupby('Catchment').mean()

print(average_indices)


              prcptot    RR1  rx1day  rx5day   cwd  rr20mm     r90p      r95p  \
Catchment                                                                       
GRDC_6321100  839.773  157.2  25.740  57.325  11.3     2.3  10.5912  13.18615   
GRDC_6321200  896.479  167.9  27.354  58.117  12.2     1.6  10.3826  13.05340   
GRDC_6334270  875.065  160.2  27.347  54.955  10.8     2.0  10.5802  13.32980   
GRDC_6334280  884.859  162.1  27.463  55.642  11.1     2.2  10.5736  13.29750   
GRDC_6334800  838.328  157.3  27.285  52.950  10.3     2.2  10.4351  13.22830   
...               ...    ...     ...     ...   ...     ...      ...       ...   
GRDC_6357503  949.504  160.7  38.763  72.331  11.9     3.7  11.7943  15.40290   
GRDC_6357505  950.380  160.7  38.760  72.321  11.9     3.7  11.7946  15.42080   
GRDC_6357510  849.380  154.3  39.032  71.992  11.6     3.7  10.8879  14.38135   
GRDC_6357520  840.256  155.7  37.241  69.440  10.8     3.3  10.5361  13.92885   
GRDC_6357521  840.256  155.7

In [35]:
average_indices.to_csv('indices_obs.csv')

In [33]:
# Load the CSV file
file_path = 'y_sim.csv'
data_sim = pd.read_csv(file_path, parse_dates=[0])

# Set the unnamed first column (dates) as the index
data_sim.set_index(data_sim.columns[0],inplace=True)

In [34]:
# Calculate indices for each year and each catchment
yearly_indices2 = calculate_yearly_indices(data_sim)

# Calculate the average indices over all years for each catchment
average_indices2 = yearly_indices2.groupby('Catchment').mean()

print(average_indices2)

                 prcptot    RR1     rx1day     rx5day   cwd  rr20mm      r90p  \
Catchment                                                                       
GRDC_6321100  797.630926  224.8  21.084771  49.192894  24.6     0.9  6.430197   
GRDC_6321200  906.388821  232.8  20.097659  49.482126  25.1     0.4  7.406981   
GRDC_6334270  859.870864  226.3  22.884280  46.417280  25.3     0.6  7.148868   
GRDC_6334280    0.000000    0.0        NaN        NaN   0.0     0.0       NaN   
GRDC_6334800  945.043580  242.1  19.388961  54.903136  33.2     0.8  7.065829   
...                  ...    ...        ...        ...   ...     ...       ...   
GRDC_6357503  513.947294  137.0  30.101825  60.510466  20.2     1.1  7.026010   
GRDC_6357505  523.186156  141.3  25.800923  53.485628  19.8     1.2  6.810853   
GRDC_6357510  845.050719  232.3  32.983490  57.801370  18.9     2.2  6.384015   
GRDC_6357520  455.991130  119.7  38.012686  73.109239  12.1     0.9  6.955286   
GRDC_6357521  353.305506  10

In [36]:
average_indices2.to_csv('indices_sim.csv')