# c) Ngonye Falls Flow Analysis

Load the synthetic historic daily flow series for Ngonye and produce various summary statistics for later presentation.

## Inputs

| Data                       | Source                                        | Description                                 |
|----------------------------|-----------------------------------------------|---------------------------------------------|
| ngonye_synthetic.csv  | Notebook: b_synthetic_flow_ngonye |Synthetic daily flow series for Ngonye  Falls 1924/25 - 2016/17  |
| selected_years.csv | Mott MacDonald - Ngonye Falls Hydropower Project - 2018 Feasibility Study Update - Final Report Version D | List of representative selected years |


## Outputs
| File                           | Description                                 |
|--------------------------------|---------------------------------------------|
| ngonye_flow_daily.csv          | Daily flow data  |
| ngonye_flow_monthly.csv        | Flow summaries by month  |
| ngonye_flow_yearly.csv         | Flow summaries by year  |
| ngonye_flow_calmonthly.csv     | Flow summaries by calendar month |
| ngonye_flow_selected_years.csv | Flow summaries for selected representative years  |



## Parameters

In [2]:
year = "2022"

In [3]:
input_data='./input_data/'
output_data='./output_data/'
if year != '':
    input_data+=year + '/'
    output_data+=year + '/'

## Libraries

In [4]:
import numpy as np
import pandas as pd
import datetime

## Load the Daily Data

In [5]:

#daily = pd.read_csv(output_data + "ngonye_synthetic.csv")
daily_file = "ngonye_synthetic"
if year != '':
    daily_file += '_' + year
daily_file += '.csv'

daily = pd.read_csv(output_data + daily_file)
daily.tail(4)

FileNotFoundError: [Errno 2] No such file or directory: './output_data/2022/ngonye_synthetic_2022.csv'

Index by date and add some other columns for later use. 

Add a column for *WaterYear* which starts on 1st October and runs to 31st September the following year.

In [6]:
daily['Date']=pd.to_datetime(daily['Date'],format="%Y-%m-%d")#"%d/%m/%Y")
daily=daily.set_index(pd.DatetimeIndex(daily['Date']))


In [7]:
daily['Year']=daily.index.year
daily['Month']=daily.index.month
daily['Day']=daily.index.day
daily['MonthId']=daily['Year']+daily['Month']/100
daily['WaterYear']=daily.apply((lambda x: (x['Year'] if x['Month']>=10 else x['Year']-1)),axis=1)
daily['WaterMonth']=daily.apply((lambda x: (x['Month']-9 if x['Month']>=10 else x['Month']+3)),axis=1)
daily['WaterDay']=daily.apply(lambda x: (x['Date']-pd.Timestamp(x['WaterYear'], 10, 1)).days+1,axis=1)
daily['WaterWeek']=np.floor((daily['WaterDay']-1)/7)+1
daily['Volume']=daily['Flow']*60*60*24/(1000*1000*1000)
daily=daily.astype({'WaterWeek': 'int32'})
daily=daily.drop('Date',axis=1)
daily

Unnamed: 0_level_0,LaggedDate,VicFalls,Conversion,Flow,Exceedance,Year,Month,Day,MonthId,WaterYear,WaterMonth,WaterDay,WaterWeek,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1924-10-01,1924-10-12,100.0,1.384095,138.409487,0.998,1924,10,1,1924.10,1924,1,1,1,0.011959
1924-10-02,1924-10-13,100.0,1.384095,138.409487,0.998,1924,10,2,1924.10,1924,1,2,1,0.011959
1924-10-03,1924-10-14,100.0,1.384095,138.409487,0.998,1924,10,3,1924.10,1924,1,3,1,0.011959
1924-10-04,1924-10-15,100.0,1.384095,138.409487,0.998,1924,10,4,1924.10,1924,1,4,1,0.011959
1924-10-05,1924-10-16,100.0,1.384095,138.409487,0.998,1924,10,5,1924.10,1924,1,5,1,0.011959
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-09-26,2022-10-07,293.0,0.932126,273.112795,0.825,2022,9,26,2022.09,2021,12,361,52,0.023597
2022-09-27,2022-10-08,289.0,0.928947,268.465689,0.837,2022,9,27,2022.09,2021,12,362,52,0.023195
2022-09-28,2022-10-09,286.0,0.929724,265.901164,0.847,2022,9,28,2022.09,2021,12,363,52,0.022974
2022-09-29,2022-10-10,283.0,0.942741,266.795709,0.844,2022,9,29,2022.09,2021,12,364,52,0.023051


In [8]:
daily['Flow_difference']=np.abs(daily['Flow']-daily['Flow'].shift(1))
daily['Flow_difference_pct']=daily['Flow_difference']/daily['Flow']

## Setup the Monthly Data

Load the monthly data.

In [9]:

monthly=daily.groupby(['MonthId','Year','Month']).size().to_frame(name="Days").reset_index(['Month','Year'])
monthly

Unnamed: 0_level_0,Year,Month,Days
MonthId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1924.10,1924,10,31
1924.11,1924,11,30
1924.12,1924,12,31
1925.01,1925,1,31
1925.02,1925,2,28
...,...,...,...
2022.05,2022,5,31
2022.06,2022,6,30
2022.07,2022,7,31
2022.08,2022,8,31


Set the index and add additional columns for later use.

In [10]:

monthly['Day']=1
monthly['DateStart']=pd.to_datetime(monthly[['Year','Month','Day']])
monthly=monthly.drop('Day',1)

monthly['WaterYear']=monthly.apply((lambda x: (x['Year'] if x['Month']>=10 else x['Year']-1)),axis=1)
monthly['WaterMonth']=monthly.apply((lambda x: (x['Month']-9 if x['Month']>=10 else x['Month']+3)),axis=1)

monthly

  monthly=monthly.drop('Day',1)


Unnamed: 0_level_0,Year,Month,Days,DateStart,WaterYear,WaterMonth
MonthId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1924.10,1924,10,31,1924-10-01,1924,1
1924.11,1924,11,30,1924-11-01,1924,2
1924.12,1924,12,31,1924-12-01,1924,3
1925.01,1925,1,31,1925-01-01,1924,4
1925.02,1925,2,28,1925-02-01,1924,5
...,...,...,...,...,...,...
2022.05,2022,5,31,2022-05-01,2021,8
2022.06,2022,6,30,2022-06-01,2021,9
2022.07,2022,7,31,2022-07-01,2021,10
2022.08,2022,8,31,2022-08-01,2021,11


## Monthly flow summaries

Add flow summaries to the monthly data

In [11]:
monthly['Flow_min']=daily[['MonthId','Flow']].groupby('MonthId').min()
monthly['Flow_mean']=daily[['MonthId','Flow']].groupby('MonthId').mean()
monthly['Flow_median']=daily[['MonthId','Flow']].groupby('MonthId').median()
monthly['Flow_max']=daily[['MonthId','Flow']].groupby('MonthId').max()
monthly['Volume']=daily[['MonthId','Volume']].groupby('MonthId').sum()
monthly['Flow_range']=monthly['Flow_max']-monthly['Flow_min']
monthly[['Flow_min','Flow_mean','Flow_median','Flow_max','Flow_range']]
monthly

Unnamed: 0_level_0,Year,Month,Days,DateStart,WaterYear,WaterMonth,Flow_min,Flow_mean,Flow_median,Flow_max,Volume,Flow_range
MonthId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1924.10,1924,10,31,1924-10-01,1924,1,130.226808,136.771526,138.409487,142.600184,0.366329,12.373376
1924.11,1924,11,30,1924-11-01,1924,2,132.587158,153.655969,144.874859,192.034052,0.398276,59.446894
1924.12,1924,12,31,1924-12-01,1924,3,193.122845,276.157068,269.414462,441.966484,0.739659,248.843639
1925.01,1925,1,31,1925-01-01,1924,4,418.554562,704.059970,602.832326,1199.769467,1.885754,781.214905
1925.02,1925,2,28,1925-02-01,1924,5,1126.382002,1210.241015,1179.833397,1434.091840,2.927815,307.709839
...,...,...,...,...,...,...,...,...,...,...,...,...
2022.05,2022,5,31,2022-05-01,2021,8,2219.594927,2785.549155,2866.726609,3116.982122,7.460815,897.387195
2022.06,2022,6,30,2022-06-01,2021,9,996.060566,1534.964803,1481.816787,2204.402985,3.978629,1208.342420
2022.07,2022,7,31,2022-07-01,2021,10,486.618631,668.630149,627.771184,970.644398,1.790859,484.025768
2022.08,2022,8,31,2022-08-01,2021,11,344.141912,408.199554,407.746186,479.243580,1.093322,135.101669


## Annual Flow

Create a data table for annual (water year) summaries and populate.

In [12]:
yearly=monthly[['WaterYear']].groupby('WaterYear').count()

yearly['Flow_min']=daily[['WaterYear','Flow']].groupby('WaterYear').min()
yearly['Flow_median']=daily[['WaterYear','Flow']].groupby('WaterYear').median()
yearly['Flow_mean']=daily[['WaterYear','Flow']].groupby('WaterYear').mean()
yearly['Flow_max']=daily[['WaterYear','Flow']].groupby('WaterYear').max()
yearly['Flow_range']=yearly['Flow_max']-yearly['Flow_min']
yearly['Volume']=monthly[['WaterYear','Volume']].groupby('WaterYear').sum()

yearly

Unnamed: 0_level_0,Flow_min,Flow_median,Flow_mean,Flow_max,Flow_range,Volume
WaterYear,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1924,130.226808,571.451055,996.952493,3445.656272,3315.429465,31.439894
1925,159.364330,504.517440,1121.920521,4607.486825,4448.122495,35.380886
1926,193.122845,571.451055,994.109604,3303.454729,3110.331884,31.350240
1927,178.542186,518.534927,849.631440,2252.949427,2074.407241,26.867385
1928,200.710393,390.897659,569.273730,1569.566645,1368.856253,17.952616
...,...,...,...,...,...,...
2017,171.188037,814.797485,1611.713441,4855.789688,4684.601650,50.826995
2018,158.591340,462.702761,512.496300,1103.622889,945.031549,16.162083
2019,130.594892,605.738786,1376.892187,4709.551434,4578.956542,43.540635
2020,261.920014,786.688087,1353.436401,3732.969062,3471.049047,42.681970


In [13]:
Flow_mean_mean=yearly['Flow_mean'].describe()['mean']
Flow_max_mean=yearly['Flow_max'].describe()['mean']
Flow_min_mean=yearly['Flow_min'].describe()['mean']
Volume_mean=yearly['Volume'].describe()['mean']


yearly['Flow_mean_pct_var']=(yearly['Flow_mean']-Flow_mean_mean)/Flow_mean_mean*100
yearly['Flow_max_pct_var']=(yearly['Flow_max']-Flow_max_mean)/Flow_max_mean*100
yearly['Flow_min_pct_var']=(yearly['Flow_min']-Flow_min_mean)/Flow_min_mean*100
yearly['Volume_pct_var']=(yearly['Volume']-Volume_mean)/Volume_mean*100


Flow_mean_mean

1108.2698874215282

In [14]:
yearly['Flow_mean_5yr_mvCoefVar']=yearly['Flow_mean'].rolling(5,center=True).std()/Flow_mean_mean*100
yearly.loc[:,['Flow_mean_pct_var','Volume_pct_var']]

Unnamed: 0_level_0,Flow_mean_pct_var,Volume_pct_var
WaterYear,Unnamed: 1_level_1,Unnamed: 2_level_1
1924,-10.044250,-10.105979
1925,1.231707,1.162239
1926,-10.300766,-10.362319
1927,-23.337136,-23.179852
1928,-48.634016,-48.669265
...,...,...
2017,45.426079,45.326285
2018,-53.757085,-53.788818
2019,24.237986,24.492876
2020,22.121553,22.037751


In [16]:
mins=daily[['Year','Flow']].groupby('Year').idxmin()
mins=mins.reset_index()
mins['DaysToStart']=mins.apply(lambda x: x['Flow']-pd.Timestamp(datetime.date(x['Year'], 10, 1)),axis=1)
mins=mins.set_index('Year')
yearly['DaysToStart']=mins['DaysToStart']
yearly['SeasonStart']=mins['Flow']
yearly

Unnamed: 0_level_0,Flow_min,Flow_median,Flow_mean,Flow_max,Flow_range,Volume,Flow_mean_pct_var,Flow_max_pct_var,Flow_min_pct_var,Volume_pct_var,Flow_mean_5yr_mvCoefVar,DaysToStart,SeasonStart
WaterYear,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
1924,130.226808,571.451055,996.952493,3445.656272,3315.429465,31.439894,-10.044250,-5.145064,-39.692184,-10.105979,,24 days,1924-10-25
1925,159.364330,504.517440,1121.920521,4607.486825,4448.122495,35.380886,1.231707,26.838788,-26.198646,1.162239,,29 days,1925-10-30
1926,193.122845,571.451055,994.109604,3303.454729,3110.331884,31.350240,-10.300766,-9.059709,-10.565134,-10.362319,19.099237,23 days,1926-10-24
1927,178.542186,518.534927,849.631440,2252.949427,2074.407241,26.867385,-23.337136,-37.978906,-17.317413,-23.179852,19.614706,8 days,1927-10-09
1928,200.710393,390.897659,569.273730,1569.566645,1368.856253,17.952616,-48.634016,-56.791644,-7.051354,-48.669265,15.583496,28 days,1928-10-29
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2017,171.188037,814.797485,1611.713441,4855.789688,4684.601650,50.826995,45.426079,33.674279,-20.723107,45.326285,37.437157,18 days,2017-10-19
2018,158.591340,462.702761,512.496300,1103.622889,945.031549,16.162083,-53.757085,-69.618537,-26.556616,-53.788818,38.001919,41 days,2018-11-11
2019,130.594892,605.738786,1376.892187,4709.551434,4578.956542,43.540635,24.237986,29.648509,-39.521724,24.492876,38.008390,12 days,2019-10-13
2020,261.920014,786.688087,1353.436401,3732.969062,3471.049047,42.681970,22.121553,2.764325,21.294720,22.037751,,32 days,2020-11-02


In [17]:
annual_fdcs=pd.DataFrame(index=np.arange(0,1.01,0.01),columns=np.arange(yearly.index.min(),yearly.index.max()+1,1))
for col in annual_fdcs.columns:
    annual_fdcs[col]=np.percentile(daily.loc[daily['WaterYear']==col]['Flow'],((1-annual_fdcs.index)*100))

annual_fdcs

Unnamed: 0,1924,1925,1926,1927,1928,1929,1930,1931,1932,1933,...,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021
0.00,3445.656272,4607.486825,3303.454729,2252.949427,1569.566645,2175.853785,3345.707467,3797.340509,1593.032438,5617.413980,...,3651.155160,3618.067429,1455.639480,2983.091137,3912.450050,4855.789688,1103.622889,4709.551434,3732.969062,3116.982122
0.01,3419.900528,4513.614159,3224.521897,2252.949427,1549.423724,2143.358328,3303.454729,3764.249435,1569.566645,5520.978610,...,3623.714312,3572.641729,1455.639480,2983.091137,3889.608977,4744.596997,1078.769730,4619.494960,3711.426748,3108.878071
0.02,3333.876700,4458.846568,3108.752897,2241.268990,1531.959585,2091.997255,3259.666591,3707.228803,1515.215752,5351.536527,...,3572.517397,3533.098018,1443.913408,2977.673126,3839.634257,4709.551434,1054.628948,4519.693844,3646.204038,3067.627514
0.03,3303.454729,4443.191905,3012.999281,2200.586709,1500.405813,2039.091024,3207.239734,3675.441241,1500.405813,5156.372820,...,3561.342822,3489.129959,1435.719992,2965.031100,3764.471412,4658.921927,1032.142403,4463.665931,3587.955747,3034.324868
0.04,3241.885631,4404.034539,2884.017609,2200.586709,1472.965182,1948.128617,3121.433655,3636.513616,1475.987724,4954.626018,...,3550.509446,3465.636344,1426.954398,2965.031100,3610.779473,4531.490628,1019.805824,4378.066808,3541.251755,2997.205258
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0.96,137.812442,165.407477,202.369186,184.135576,219.118653,195.880398,219.118653,239.190660,200.710393,169.357494,...,237.561211,206.940127,236.162114,185.357045,173.175160,182.587401,174.680821,144.713480,281.090688,271.243137
0.97,137.476827,165.407477,200.710393,184.135576,207.086012,188.718095,218.695651,239.190660,200.710393,169.357494,...,234.469668,205.179474,231.404869,182.471627,166.013277,180.271912,173.016190,140.783869,280.647879,269.327566
0.98,133.255586,165.308180,200.710393,184.135576,207.086012,187.201563,208.843771,239.190660,198.907090,165.269565,...,231.404869,203.048407,231.404869,175.492510,166.013277,180.271912,171.188037,139.203689,274.375805,269.327566
0.99,130.226808,160.886096,198.907090,181.255631,202.149792,186.611800,207.086012,239.190660,196.274334,160.886096,...,229.807615,201.092877,226.122222,174.153840,161.142389,174.680821,165.902120,136.224430,266.945577,267.970343


In [18]:
monthly_fdcs=pd.DataFrame(index=np.arange(0,1.01,0.01),columns=[1,2,3,4,5,6,7,8,9,10,11,12])
for col in monthly_fdcs.columns:
    monthly_fdcs[col]=np.percentile(daily.loc[daily['WaterMonth']==col]['Flow'],((1-monthly_fdcs.index)*100))

monthly_fdcs

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,11,12
0.00,478.505711,667.761867,1147.327228,3908.489301,9912.101075,9500.241315,8571.638656,5693.519050,3969.587328,1861.266120,916.553878,559.898115
0.01,423.953411,552.272153,898.408055,2185.294844,5316.706998,8001.138179,6293.646314,5287.236076,2842.753269,1384.193879,722.611226,486.033980
0.02,392.323910,501.068035,826.859816,1783.095744,4687.581029,6915.664409,5977.788550,4673.033463,2559.356618,1228.050761,657.904744,462.136659
0.03,375.998536,470.313777,787.137043,1547.662033,4441.830630,6445.957376,5655.412805,4388.550843,2378.906443,1179.833397,615.708089,441.966484
0.04,363.142362,441.966484,742.234113,1410.793339,3965.444717,6002.962571,5537.063002,4162.299828,2294.793264,1124.760697,592.232702,432.392338
...,...,...,...,...,...,...,...,...,...,...,...,...
0.96,153.058273,167.422296,246.325998,402.260470,565.084809,776.634095,930.541527,836.876150,407.227222,301.875363,238.928599,188.718095
0.97,144.018380,161.142389,240.607240,392.323910,537.686382,755.778127,900.772271,786.564725,386.273841,285.951774,228.096602,178.836240
0.98,140.027611,155.942773,234.052042,375.998536,487.283775,733.545620,867.941781,692.398031,357.987631,267.064834,215.343927,166.914347
0.99,138.287793,142.424709,209.421297,330.304729,462.136659,619.803167,821.604217,565.084809,320.511784,249.427144,203.895358,158.196416


In [19]:
yearly['MeanQ3070']=annual_fdcs.loc[(annual_fdcs.index>=0.3) & (annual_fdcs.index<=0.7)].mean()
yearly

Unnamed: 0_level_0,Flow_min,Flow_median,Flow_mean,Flow_max,Flow_range,Volume,Flow_mean_pct_var,Flow_max_pct_var,Flow_min_pct_var,Volume_pct_var,Flow_mean_5yr_mvCoefVar,DaysToStart,SeasonStart,MeanQ3070
WaterYear,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1924,130.226808,571.451055,996.952493,3445.656272,3315.429465,31.439894,-10.044250,-5.145064,-39.692184,-10.105979,,24 days,1924-10-25,672.722688
1925,159.364330,504.517440,1121.920521,4607.486825,4448.122495,35.380886,1.231707,26.838788,-26.198646,1.162239,,29 days,1925-10-30,568.045210
1926,193.122845,571.451055,994.109604,3303.454729,3110.331884,31.350240,-10.300766,-9.059709,-10.565134,-10.362319,19.099237,23 days,1926-10-24,691.314607
1927,178.542186,518.534927,849.631440,2252.949427,2074.407241,26.867385,-23.337136,-37.978906,-17.317413,-23.179852,19.614706,8 days,1927-10-09,562.578023
1928,200.710393,390.897659,569.273730,1569.566645,1368.856253,17.952616,-48.634016,-56.791644,-7.051354,-48.669265,15.583496,28 days,1928-10-29,430.853205
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2017,171.188037,814.797485,1611.713441,4855.789688,4684.601650,50.826995,45.426079,33.674279,-20.723107,45.326285,37.437157,18 days,2017-10-19,1064.416949
2018,158.591340,462.702761,512.496300,1103.622889,945.031549,16.162083,-53.757085,-69.618537,-26.556616,-53.788818,38.001919,41 days,2018-11-11,459.408040
2019,130.594892,605.738786,1376.892187,4709.551434,4578.956542,43.540635,24.237986,29.648509,-39.521724,24.492876,38.008390,12 days,2019-10-13,708.340556
2020,261.920014,786.688087,1353.436401,3732.969062,3471.049047,42.681970,22.121553,2.764325,21.294720,22.037751,,32 days,2020-11-02,920.225564


In [20]:
fdc=pd.DataFrame({'Exceedance': np.arange(0,1.001,0.001)}).set_index('Exceedance')
fdc['Mean']=np.percentile(yearly['Flow_mean'],((1-fdc.index)*100))
fdc['Max']=np.percentile(yearly['Flow_max'],((1-fdc.index)*100))
fdc['Min']=np.percentile(yearly['Flow_min'],((1-fdc.index)*100))
fdc['Median']=np.percentile(yearly['Flow_median'],((1-fdc.index)*100))
fdc['MeanQ3070']=np.percentile(yearly['MeanQ3070'],((1-fdc.index)*100))

fdc

Unnamed: 0_level_0,Mean,Max,Min,Median,MeanQ3070
Exceedance,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0.000,2338.954021,9912.101075,339.734146,1111.584040,1265.831036
0.001,2321.176525,9835.756758,339.355284,1104.159145,1258.314862
0.002,2303.399029,9759.412442,338.976422,1096.734249,1250.798689
0.003,2285.621533,9683.068125,338.597560,1089.309354,1243.282516
0.004,2267.844038,9606.723808,338.218698,1081.884459,1235.766343
...,...,...,...,...,...
0.996,385.408882,871.495454,130.226808,269.656018,296.452582
0.997,380.774605,864.353840,130.226808,269.066599,294.822210
0.998,376.140327,857.212227,130.226808,268.477181,293.191839
0.999,371.506050,850.070614,130.226808,267.887762,291.561468


In [21]:
yearly['ExceedanceMean']=pd.merge_asof(yearly.reset_index().sort_values('Flow_mean'),fdc.reset_index().sort_values('Mean'),left_on='Flow_mean',right_on='Mean').set_index('WaterYear')['Exceedance']
yearly['ExceedanceMedian']=pd.merge_asof(yearly.reset_index().sort_values('Flow_median'),fdc.reset_index().sort_values('Median'),left_on='Flow_median',right_on='Median').set_index('WaterYear')['Exceedance']
yearly['ExceedanceMeanQ3070']=pd.merge_asof(yearly.reset_index().sort_values('MeanQ3070'),fdc.reset_index().sort_values('MeanQ3070'),left_on='MeanQ3070',right_on='MeanQ3070').set_index('WaterYear')['Exceedance']
yearly

Unnamed: 0_level_0,Flow_min,Flow_median,Flow_mean,Flow_max,Flow_range,Volume,Flow_mean_pct_var,Flow_max_pct_var,Flow_min_pct_var,Volume_pct_var,Flow_mean_5yr_mvCoefVar,DaysToStart,SeasonStart,MeanQ3070,ExceedanceMean,ExceedanceMedian,ExceedanceMeanQ3070
WaterYear,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
1924,130.226808,571.451055,996.952493,3445.656272,3315.429465,31.439894,-10.044250,-5.145064,-39.692184,-10.105979,,24 days,1924-10-25,672.722688,0.578,0.593,0.516
1925,159.364330,504.517440,1121.920521,4607.486825,4448.122495,35.380886,1.231707,26.838788,-26.198646,1.162239,,29 days,1925-10-30,568.045210,0.433,0.732,0.743
1926,193.122845,571.451055,994.109604,3303.454729,3110.331884,31.350240,-10.300766,-9.059709,-10.565134,-10.362319,19.099237,23 days,1926-10-24,691.314607,0.588,0.593,0.475
1927,178.542186,518.534927,849.631440,2252.949427,2074.407241,26.867385,-23.337136,-37.978906,-17.317413,-23.179852,19.614706,8 days,1927-10-09,562.578023,0.702,0.720,0.774
1928,200.710393,390.897659,569.273730,1569.566645,1368.856253,17.952616,-48.634016,-56.791644,-7.051354,-48.669265,15.583496,28 days,1928-10-29,430.853205,0.928,0.959,0.949
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2017,171.188037,814.797485,1611.713441,4855.789688,4684.601650,50.826995,45.426079,33.674279,-20.723107,45.326285,37.437157,18 days,2017-10-19,1064.416949,0.104,0.176,0.052
2018,158.591340,462.702761,512.496300,1103.622889,945.031549,16.162083,-53.757085,-69.618537,-26.556616,-53.788818,38.001919,41 days,2018-11-11,459.408040,0.959,0.824,0.908
2019,130.594892,605.738786,1376.892187,4709.551434,4578.956542,43.540635,24.237986,29.648509,-39.521724,24.492876,38.008390,12 days,2019-10-13,708.340556,0.227,0.454,0.433
2020,261.920014,786.688087,1353.436401,3732.969062,3471.049047,42.681970,22.121553,2.764325,21.294720,22.037751,,32 days,2020-11-02,920.225564,0.248,0.186,0.176


## Calendar months

Produce summaries of flow by calendar month

In [22]:
calmonthly=pd.DataFrame({'WaterMonth': [1,2,3,4,5,6,7,8,9,10,11,12],'MonthName': ['Oct','Nov','Dec','Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep'], 'Month':[10,11,12,1,2,3,4,5,6,7,8,9]})
calmonthly=calmonthly.set_index('WaterMonth')
calmonthly['Flow_min']=daily[['WaterMonth','Flow']].groupby('WaterMonth').min()
calmonthly['Flow_mean']=daily[['WaterMonth','Flow']].groupby('WaterMonth').mean()
calmonthly['Flow_median']=daily[['WaterMonth','Flow']].groupby('WaterMonth').median()
calmonthly['Flow_max']=daily[['WaterMonth','Flow']].groupby('WaterMonth').max()
calmonthly['Flow_std']=daily[['WaterMonth','Flow']].groupby('WaterMonth').std()
calmonthly['Flow_coefvar']=(calmonthly['Flow_std']/calmonthly['Flow_mean']*100).round(1)
calmonthly['Flow_difference_median']=daily[['WaterMonth','Flow_difference']].groupby('WaterMonth').median()
calmonthly['Flow_difference_mean']=daily[['WaterMonth','Flow_difference']].groupby('WaterMonth').mean()
calmonthly['Flow_difference_pct_mean']=daily[['WaterMonth','Flow_difference_pct']].groupby('WaterMonth').mean()
calmonthly

Unnamed: 0_level_0,MonthName,Month,Flow_min,Flow_mean,Flow_median,Flow_max,Flow_std,Flow_coefvar,Flow_difference_median,Flow_difference_mean,Flow_difference_pct_mean
WaterMonth,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1,Oct,10,130.226808,239.586164,231.404869,478.505711,58.52321,24.4,0.590268,2.04278,0.008757
2,Nov,11,130.226808,275.513179,261.075465,667.761867,80.204126,29.1,2.543492,4.246509,0.015041
3,Dec,12,188.718095,433.164943,407.227222,1147.327228,142.490203,32.9,5.565043,8.824996,0.019849
4,Jan,1,269.414462,727.422012,648.306537,3908.489301,339.260205,46.6,8.657045,16.212995,0.0195
5,Feb,2,390.897659,1361.392661,977.260017,9912.101075,1154.079257,84.8,14.143433,34.680914,0.021174
6,Mar,3,518.534927,2546.228156,2185.294844,9500.241315,1643.750118,64.6,27.135893,55.39299,0.021012
7,Apr,4,705.134327,2987.302029,2961.886698,8571.638656,1366.999358,45.8,22.848941,35.885942,0.011665
8,May,5,421.275798,2253.617141,2161.127291,5693.51905,956.601897,42.4,31.351153,35.849844,0.016172
9,Jun,6,269.414462,1223.989135,1160.715298,3969.587328,554.046067,45.3,26.589497,29.796264,0.024561
10,Jul,7,223.296205,605.494238,547.555407,1861.26612,242.262373,40.0,8.696563,11.895163,0.017784


## Calendar Month Flow exceedance

Flow exceedance values by calendar month.

P90 is flow which is exceeded for 90% of the time.

In [23]:

calmonthly['Flow_P95']=monthly[['WaterMonth','Flow_mean']].groupby('WaterMonth').quantile(0.05)
calmonthly['Flow_P90']=monthly[['WaterMonth','Flow_mean']].groupby('WaterMonth').quantile(0.1)
#calmonthly['Flow_P80']=monthly[['Month','Flow_mean']].groupby('Month').quantile(0.2)
calmonthly['Flow_P75']=monthly[['WaterMonth','Flow_mean']].groupby('WaterMonth').quantile(0.25)
calmonthly['Flow_P50']=monthly[['WaterMonth','Flow_mean']].groupby('WaterMonth').quantile(0.5)
calmonthly['Flow_P25']=monthly[['WaterMonth','Flow_mean']].groupby('WaterMonth').quantile(0.75)
#calmonthly['Flow_P20']=monthly[['Month','Flow_mean']].groupby('Month').quantile(0.8)
calmonthly['Flow_P10']=monthly[['WaterMonth','Flow_mean']].groupby('WaterMonth').quantile(0.9)
calmonthly['Flow_P05']=monthly[['WaterMonth','Flow_mean']].groupby('WaterMonth').quantile(0.95)

calmonthly

Unnamed: 0_level_0,MonthName,Month,Flow_min,Flow_mean,Flow_median,Flow_max,Flow_std,Flow_coefvar,Flow_difference_median,Flow_difference_mean,Flow_difference_pct_mean,Flow_P95,Flow_P90,Flow_P75,Flow_P50,Flow_P25,Flow_P10,Flow_P05
WaterMonth,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
1,Oct,10,130.226808,239.586164,231.404869,478.505711,58.52321,24.4,0.590268,2.04278,0.008757,161.326377,171.966577,201.443799,228.999983,273.836296,306.151504,341.97568
2,Nov,11,130.226808,275.513179,261.075465,667.761867,80.204126,29.1,2.543492,4.246509,0.015041,179.34665,202.188514,222.425157,265.931076,306.23644,376.544035,410.861618
3,Dec,12,188.718095,433.164943,407.227222,1147.327228,142.490203,32.9,5.565043,8.824996,0.019849,287.018424,306.820067,344.552266,403.167772,508.677308,568.96758,698.296859
4,Jan,1,269.414462,727.422012,648.306537,3908.489301,339.260205,46.6,8.657045,16.212995,0.0195,446.330057,482.06905,551.018859,638.986227,825.681984,997.276847,1293.270532
5,Feb,2,390.897659,1361.392661,977.260017,9912.101075,1154.079257,84.8,14.143433,34.680914,0.021174,642.963402,700.319298,828.817969,1003.461817,1453.771485,2486.969025,3169.449119
6,Mar,3,518.534927,2546.228156,2185.294844,9500.241315,1643.750118,64.6,27.135893,55.39299,0.021012,861.165038,919.100593,1203.592038,2350.764222,3269.857305,4425.486829,5634.96403
7,Apr,4,705.134327,2987.302029,2961.886698,8571.638656,1366.999358,45.8,22.848941,35.885942,0.011665,1076.70081,1255.240465,1951.131811,2959.508861,3942.900009,4733.503307,5135.20209
8,May,5,421.275798,2253.617141,2161.127291,5693.51905,956.601897,42.4,31.351153,35.849844,0.016172,941.422216,1133.801205,1671.84028,2273.748245,2717.308194,3345.483825,3796.95218
9,Jun,6,269.414462,1223.989135,1160.715298,3969.587328,554.046067,45.3,26.589497,29.796264,0.024561,456.034496,647.639433,876.128029,1198.420448,1491.945279,1817.112011,2079.076178
10,Jul,7,223.296205,605.494238,547.555407,1861.26612,242.262373,40.0,8.696563,11.895163,0.017784,329.345865,373.518404,454.053132,561.453868,739.894595,874.033285,1025.253416


## Prepare the Representative Years Summaries

In [24]:

selected = pd.read_csv("./input_data/" + "selected_years.csv").rename(columns={"Year": "WaterYear"}).set_index('WaterYear')
selected['Flow_min']=yearly['Flow_min']
selected['Flow_mean']=yearly['Flow_mean']
selected['Flow_max']=yearly['Flow_max']
selected['Volume']=yearly['Volume']
selected['ExceedanceMean']=yearly['ExceedanceMean']
selected['ExceedanceMedian']=yearly['ExceedanceMedian']
selected['ExceedanceMeanQ3070']=yearly['ExceedanceMeanQ3070']

selected

Unnamed: 0_level_0,Class,Flow_Exceedance,Flow_min,Flow_mean,Flow_max,Volume,ExceedanceMean,ExceedanceMedian,ExceedanceMeanQ3070
WaterYear,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1967,Very Wet,Q3,256.665904,1868.202233,5594.069795,59.077038,0.042,0.062,0.021
2013,Wet,Q12,200.087123,1345.002587,3618.067429,42.416002,0.258,0.139,0.114
2002,Median,Q50,197.88398,1078.126668,3880.402536,33.999803,0.516,0.464,0.495
1990,Dry,Q90,202.369186,772.54655,2294.569526,24.363028,0.774,0.918,0.887
1996,Very Dry,Q97,130.226808,554.682896,1400.024423,17.49248,0.939,0.97,0.959


In [25]:
flow_fdc=pd.DataFrame({'Exceedance': np.arange(0,1.001,0.001)}).set_index('Exceedance')
flow_fdc['Flow']=np.percentile(daily['Flow'],((1-flow_fdc.index)*100))
flow_fdc

Unnamed: 0_level_0,Flow
Exceedance,Unnamed: 1_level_1
0.000,9912.101075
0.001,8759.321492
0.002,7711.304088
0.003,6867.963397
0.004,6348.595621
...,...
0.996,144.018380
0.997,140.907676
0.998,138.409487
0.999,137.388414


In [26]:
floods = pd.read_csv("./input_data/" + "flood_return.csv").set_index('ReturnYears')
floods[['LastDate','WaterYear']]=pd.merge_asof(daily.reset_index().sort_values('Flow'),floods.reset_index(),left_on='Flow',right_on='Flow')[['Date','WaterYear','ReturnYears']].groupby('ReturnYears').max()
floods['YearsSince']=2019-floods['WaterYear']
floods


Unnamed: 0_level_0,Flow,LastDate,WaterYear,YearsSince
ReturnYears,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2,3418.0,2021-03-19,2020.0,-1.0
5,5124.0,2010-04-28,2009.0,10.0
10,6218.0,1978-04-22,1977.0,42.0
15,6724.0,1969-04-21,1968.0,51.0
20,7231.0,1969-04-18,1968.0,51.0
50,8489.0,1969-04-05,1968.0,51.0
100,9395.0,1958-03-01,1957.0,62.0
200,10272.0,NaT,,
500,11391.0,NaT,,
1000,12212.0,NaT,,


In [27]:
for flood in floods.reset_index().itertuples():
    if flood.ReturnYears<=100:     
        floods.at[flood.ReturnYears,'MeanDays']=daily.loc[daily['Flow']>=flood.Flow].groupby('WaterYear').count().mean()['Flow']
floods

Unnamed: 0_level_0,Flow,LastDate,WaterYear,YearsSince,MeanDays
ReturnYears,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2,3418.0,2021-03-19,2020.0,-1.0,45.72
5,5124.0,2010-04-28,2009.0,10.0,27.0
10,6218.0,1978-04-22,1977.0,42.0,33.2
15,6724.0,1969-04-21,1968.0,51.0,29.0
20,7231.0,1969-04-18,1968.0,51.0,30.0
50,8489.0,1969-04-05,1968.0,51.0,15.666667
100,9395.0,1958-03-01,1957.0,62.0,12.0
200,10272.0,NaT,,,
500,11391.0,NaT,,,
1000,12212.0,NaT,,,


## Weekly

In [28]:
daily.head(2)

Unnamed: 0_level_0,LaggedDate,VicFalls,Conversion,Flow,Exceedance,Year,Month,Day,MonthId,WaterYear,WaterMonth,WaterDay,WaterWeek,Volume,Flow_difference,Flow_difference_pct
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
1924-10-01,1924-10-12,100.0,1.384095,138.409487,0.998,1924,10,1,1924.1,1924,1,1,1,0.011959,,
1924-10-02,1924-10-13,100.0,1.384095,138.409487,0.998,1924,10,2,1924.1,1924,1,2,1,0.011959,0.0,0.0


In [29]:
weekly=daily.groupby(["WaterYear","WaterWeek"]).mean().drop(['VicFalls','Conversion','Volume','Flow_difference','Flow_difference_pct','Month','WaterMonth','Year','MonthId','Day','WaterDay'],axis=1)
weekly=weekly.join(daily.reset_index().groupby(["WaterYear","WaterWeek"]).agg(    
   Flow_max=('Flow', 'max'),
   Flow_min=('Flow', 'min'), 
   Year=('Year','min'),
   Volume=('Volume', 'sum'),
   Date=('Date','min')
))
weekly['Flow_difference']=weekly['Flow']-weekly['Flow'].shift(1)
weekly['Flow_difference_abs']=np.abs(weekly['Flow']-weekly['Flow'].shift(1))
weekly['Flow_difference_pct']=weekly['Flow_difference']/weekly['Flow']
weekly['Flow_difference_abs_pct']=weekly['Flow_difference_abs']/weekly['Flow']
weekly

Unnamed: 0_level_0,Unnamed: 1_level_0,Flow,Exceedance,Flow_max,Flow_min,Year,Volume,Date,Flow_difference,Flow_difference_abs,Flow_difference_pct,Flow_difference_abs_pct
WaterYear,WaterWeek,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1924,1,138.409487,0.998000,138.409487,138.409487,1924,0.083710,1924-10-01,,,,
1924,2,138.409487,0.998000,138.409487,138.409487,1924,0.083710,1924-10-08,0.000000,0.000000,0.000000,0.000000
1924,3,138.277348,0.998143,138.409487,137.484515,1924,0.083630,1924-10-15,-0.132139,0.132139,-0.000956,0.000956
1924,4,134.794662,0.999000,142.600184,130.226808,1924,0.081524,1924-10-22,-3.482685,3.482685,-0.025837,0.025837
1924,5,133.009668,0.999857,137.388414,130.226808,1924,0.080444,1924-10-29,-1.784995,1.784995,-0.013420,0.013420
...,...,...,...,...,...,...,...,...,...,...,...,...
2021,49,329.501263,0.742429,339.277063,321.199104,2022,0.199282,2022-09-02,-22.530546,22.530546,-0.068378,0.068378
2021,50,309.458675,0.770286,317.676254,301.974962,2022,0.187161,2022-09-09,-20.042588,20.042588,-0.064767,0.064767
2021,51,288.344242,0.803714,299.800463,281.783444,2022,0.174391,2022-09-16,-21.114433,21.114433,-0.073226,0.073226
2021,52,271.680619,0.832000,279.634645,265.901164,2022,0.164312,2022-09-23,-16.663623,16.663623,-0.061335,0.061335


In [30]:
weekly['IsPeak']=(weekly['Flow']>weekly['Flow'].shift(1)) & (weekly['Flow']>weekly['Flow'].shift(-1))
weekly.loc[weekly['IsPeak']==True]
weekly=weekly.reset_index()
weekly['YearWeek']=weekly['WaterYear']*1.0+weekly['WaterWeek']/100
weekly=weekly.set_index('YearWeek')
weekly['Yearly_max']=weekly.reset_index().merge(yearly,left_on='WaterYear',right_on='WaterYear')[['YearWeek','Flow_max_y']].set_index('YearWeek')
weekly['IsPeak'].where(weekly['Flow']>weekly['Yearly_max']/2,other=False,inplace=True)
weekly

Unnamed: 0_level_0,WaterYear,WaterWeek,Flow,Exceedance,Flow_max,Flow_min,Year,Volume,Date,Flow_difference,Flow_difference_abs,Flow_difference_pct,Flow_difference_abs_pct,IsPeak,Yearly_max
YearWeek,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
1924.01,1924,1,138.409487,0.998000,138.409487,138.409487,1924,0.083710,1924-10-01,,,,,False,3445.656272
1924.02,1924,2,138.409487,0.998000,138.409487,138.409487,1924,0.083710,1924-10-08,0.000000,0.000000,0.000000,0.000000,False,3445.656272
1924.03,1924,3,138.277348,0.998143,138.409487,137.484515,1924,0.083630,1924-10-15,-0.132139,0.132139,-0.000956,0.000956,False,3445.656272
1924.04,1924,4,134.794662,0.999000,142.600184,130.226808,1924,0.081524,1924-10-22,-3.482685,3.482685,-0.025837,0.025837,False,3445.656272
1924.05,1924,5,133.009668,0.999857,137.388414,130.226808,1924,0.080444,1924-10-29,-1.784995,1.784995,-0.013420,0.013420,False,3445.656272
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021.49,2021,49,329.501263,0.742429,339.277063,321.199104,2022,0.199282,2022-09-02,-22.530546,22.530546,-0.068378,0.068378,False,3116.982122
2021.50,2021,50,309.458675,0.770286,317.676254,301.974962,2022,0.187161,2022-09-09,-20.042588,20.042588,-0.064767,0.064767,False,3116.982122
2021.51,2021,51,288.344242,0.803714,299.800463,281.783444,2022,0.174391,2022-09-16,-21.114433,21.114433,-0.073226,0.073226,False,3116.982122
2021.52,2021,52,271.680619,0.832000,279.634645,265.901164,2022,0.164312,2022-09-23,-16.663623,16.663623,-0.061335,0.061335,False,3116.982122


In [35]:
weekly.loc[weekly[['WaterYear','Flow']].groupby('WaterYear').idxmax().set_index('Flow').index,'IsMax']=True
weekly.loc[weekly[['Year','Flow']].groupby('Year').idxmin().set_index('Flow').index,'IsMin']=True
weekly

Unnamed: 0_level_0,WaterYear,WaterWeek,Flow,Exceedance,Flow_max,Flow_min,Year,Volume,Date,Flow_difference,Flow_difference_abs,Flow_difference_pct,Flow_difference_abs_pct,IsPeak,Yearly_max,IsMax,IsMin
YearWeek,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
1924.01,1924,1,138.409487,0.998000,138.409487,138.409487,1924,0.083710,1924-10-01,,,,,False,3445.656272,,
1924.02,1924,2,138.409487,0.998000,138.409487,138.409487,1924,0.083710,1924-10-08,0.000000,0.000000,0.000000,0.000000,False,3445.656272,,
1924.03,1924,3,138.277348,0.998143,138.409487,137.484515,1924,0.083630,1924-10-15,-0.132139,0.132139,-0.000956,0.000956,False,3445.656272,,
1924.04,1924,4,134.794662,0.999000,142.600184,130.226808,1924,0.081524,1924-10-22,-3.482685,3.482685,-0.025837,0.025837,False,3445.656272,,
1924.05,1924,5,133.009668,0.999857,137.388414,130.226808,1924,0.080444,1924-10-29,-1.784995,1.784995,-0.013420,0.013420,False,3445.656272,,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021.49,2021,49,329.501263,0.742429,339.277063,321.199104,2022,0.199282,2022-09-02,-22.530546,22.530546,-0.068378,0.068378,False,3116.982122,,
2021.50,2021,50,309.458675,0.770286,317.676254,301.974962,2022,0.187161,2022-09-09,-20.042588,20.042588,-0.064767,0.064767,False,3116.982122,,
2021.51,2021,51,288.344242,0.803714,299.800463,281.783444,2022,0.174391,2022-09-16,-21.114433,21.114433,-0.073226,0.073226,False,3116.982122,,
2021.52,2021,52,271.680619,0.832000,279.634645,265.901164,2022,0.164312,2022-09-23,-16.663623,16.663623,-0.061335,0.061335,False,3116.982122,,


In [33]:
weekly_slim=weekly[['WaterYear','WaterWeek','Flow']]
weekly_slim

Unnamed: 0_level_0,WaterYear,WaterWeek,Flow
YearWeek,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1924.01,1924,1,138.409487
1924.02,1924,2,138.409487
1924.03,1924,3,138.277348
1924.04,1924,4,134.794662
1924.05,1924,5,133.009668
...,...,...,...
2021.49,2021,49,329.501263
2021.50,2021,50,309.458675
2021.51,2021,51,288.344242
2021.52,2021,52,271.680619


In [34]:
yearly['Max_week']=weekly.loc[weekly[['WaterYear','Flow']].groupby('WaterYear').idxmax()['Flow']].reset_index()[['WaterYear','WaterWeek']].set_index('WaterYear')['WaterWeek']
yearly.head(2)

Unnamed: 0_level_0,Flow_min,Flow_median,Flow_mean,Flow_max,Flow_range,Volume,Flow_mean_pct_var,Flow_max_pct_var,Flow_min_pct_var,Volume_pct_var,Flow_mean_5yr_mvCoefVar,DaysToStart,SeasonStart,MeanQ3070,ExceedanceMean,ExceedanceMedian,ExceedanceMeanQ3070,Max_week
WaterYear,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
1924,130.226808,571.451055,996.952493,3445.656272,3315.429465,31.439894,-10.04425,-5.145064,-39.692184,-10.105979,,24 days,1924-10-25,672.722688,0.578,0.593,0.516,27
1925,159.36433,504.51744,1121.920521,4607.486825,4448.122495,35.380886,1.231707,26.838788,-26.198646,1.162239,,29 days,1925-10-30,568.04521,0.433,0.732,0.743,26


In [36]:
yearly['Peak_count']=weekly.loc[weekly['IsPeak']==True].groupby(['WaterYear'])['IsPeak'].count()
yearly['Peak_weeks']=weekly.loc[weekly['IsPeak']==True].groupby(['WaterYear'])['WaterWeek'].apply(list)
yearly['Max_week']=weekly.loc[weekly['IsMax']==True].groupby(['WaterYear'])['WaterWeek'].max()
yearly['Min_weeks']=weekly.loc[weekly['IsMin']==True].groupby(['WaterYear'])['WaterWeek'].apply(list)

In [37]:
waterweeks=weekly[['WaterWeek','Flow','Flow_difference','Flow_difference_abs','Flow_difference_pct','Flow_difference_abs_pct']].groupby(["WaterWeek"]).mean()
waterweeks['Flow_difference_pct']=waterweeks['Flow_difference']/waterweeks['Flow']
waterweeks['Flow_difference_abs_pct']=waterweeks['Flow_difference_abs']/waterweeks['Flow']
waterweeks['Flow_P50']=weekly.reset_index()[['WaterWeek','Flow']].groupby(["WaterWeek"]).quantile(0.5)
waterweeks['Flow_P25']=weekly.reset_index()[['WaterWeek','Flow']].groupby(["WaterWeek"]).quantile(0.75)
waterweeks['Flow_P75']=weekly.reset_index()[['WaterWeek','Flow']].groupby(["WaterWeek"]).quantile(0.25)
waterweeks['Flow_P90']=weekly.reset_index()[['WaterWeek','Flow']].groupby(["WaterWeek"]).quantile(0.10)
waterweeks['Flow_P10']=weekly.reset_index()[['WaterWeek','Flow']].groupby(["WaterWeek"]).quantile(0.90)
waterweeks['YearlyMax_count']=weekly.loc[weekly['IsMax']==True].reset_index()[['WaterWeek','IsMax']].groupby(["WaterWeek"]).count()
waterweeks['YearlyPeak_count']=weekly.loc[weekly['IsPeak']==True].reset_index()[['WaterWeek','IsPeak']].groupby(["WaterWeek"]).count()
waterweeks['YearlyMin_count']=weekly.loc[weekly['IsMin']==True].reset_index()[['WaterWeek','IsMin']].groupby(["WaterWeek"]).count()

waterweeks

Unnamed: 0_level_0,Flow,Flow_difference,Flow_difference_abs,Flow_difference_pct,Flow_difference_abs_pct,Flow_P50,Flow_P25,Flow_P75,Flow_P90,Flow_P10,YearlyMax_count,YearlyPeak_count,YearlyMin_count
WaterWeek,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
1,248.22145,-7.056124,7.592539,-0.028427,0.030588,242.606619,281.509262,209.525412,184.113712,331.603621,,,7.0
2,239.969,-8.25245,10.106064,-0.03439,0.042114,233.472524,269.818159,203.703088,172.545622,315.738523,,,10.0
3,234.433927,-5.535073,8.365176,-0.02361,0.035682,223.016212,267.470496,199.337132,166.431846,297.189683,,,29.0
4,235.886378,1.452451,8.5205,0.006157,0.036121,226.681596,266.40023,198.292773,168.281889,309.061524,,,21.0
5,241.624513,5.738134,9.411457,0.023748,0.038951,230.239476,272.057488,201.172841,171.14474,320.267138,,,17.0
6,251.831278,10.206765,12.185538,0.04053,0.048388,241.678722,280.821186,206.922446,175.311272,337.233499,,,6.0
7,269.063086,17.231808,17.706009,0.064044,0.065806,258.371969,301.715325,213.481007,197.745312,373.065678,,,2.0
8,291.97587,22.912784,23.06946,0.078475,0.079012,282.849021,323.06687,233.265135,208.647116,395.820956,,,1.0
9,325.370482,33.394611,33.394611,0.102636,0.102636,308.634801,368.515724,260.706165,234.935124,435.392153,,,
10,367.105341,41.734859,42.494244,0.113686,0.115755,352.095645,420.254621,291.756188,257.931441,492.272616,,,


## Save the Data

In [38]:
daily.to_csv(output_data + 'ngonye_daily.csv')
monthly.to_csv(output_data + 'ngonye_flow_monthly.csv')
yearly.to_csv(output_data + 'ngonye_flow_yearly.csv')
calmonthly.to_csv(output_data + 'ngonye_flow_calmonthly.csv')
selected.to_csv(output_data + 'ngonye_flow_selected_years.csv')
fdc.to_csv(output_data + 'ngonye_flow_annual_exceedance.csv')
annual_fdcs.to_csv(output_data + 'ngonye_flow_annual_fdcs.csv')
flow_fdc.to_csv(output_data + 'ngonye_flow_fdc.csv')
monthly_fdcs.to_csv(output_data + 'ngonye_monthly_fdc.csv')
floods.to_csv(output_data + 'ngonye_floods.csv')
weekly.to_csv(output_data + 'ngonye_weekly.csv')
weekly_slim.to_csv(output_data + 'ngonye_weekly_slim.csv')
waterweeks.to_csv(output_data + 'ngonye_waterweekly.csv')


In [27]:
years=daily['WaterYear'].unique().tolist()

for year in years:
    days=daily.loc[daily.WaterYear==year]
    days.to_csv(output_data + '/years/daily_' + str(year) + '.csv')