# c) Ngonye Falls Flow Analysis

Load the synthetic historic daily flow series for Ngonye and produce various summary statistics for later presentation.

## Inputs

| Data                       | Source                                        | Description                                 |
|----------------------------|-----------------------------------------------|---------------------------------------------|
| ngonye_synthetic.csv  | Notebook: b_synthetic_flow_ngonye |Synthetic daily flow series for Ngonye  Falls 1924/25 - 2016/17  |
| selected_years.csv | Mott MacDonald - Ngonye Falls Hydropower Project - 2018 Feasibility Study Update - Final Report Version D | List of representative selected years |


## Outputs
| File                           | Description                                 |
|--------------------------------|---------------------------------------------|
| ngonye_flow_daily.csv          | Daily flow data  |
| ngonye_flow_monthly.csv        | Flow summaries by month  |
| ngonye_flow_yearly.csv         | Flow summaries by year  |
| ngonye_flow_calmonthly.csv     | Flow summaries by calendar month |
| ngonye_flow_selected_years.csv | Flow summaries for selected representative years  |



## Parameters

In [2]:
input_data='./input_data/'
#output_data='./output_data/2020/'
output_data='./output_data/'

## Libraries

In [3]:
import numpy as np
import pandas as pd
import datetime

## Load the Daily Data

In [4]:
#daily = pd.read_csv(output_data + "ngonye_synthetic_2020.csv")
daily = pd.read_csv(output_data + "ngonye_synthetic.csv")
daily.tail(4)

Unnamed: 0,Date,LaggedDate,VicFalls,Conversion,Flow,Exceedance
33964,2017-09-27,2017-10-08,204.0855,0.982689,200.552607,0.955
33965,2017-09-28,2017-10-09,204.0855,0.982689,200.552607,0.955
33966,2017-09-29,2017-10-10,204.0855,0.982689,200.552607,0.955
33967,2017-09-30,2017-10-11,200.9197,0.995488,200.01313,0.958


Index by date and add some other columns for later use. 

Add a column for *WaterYear* which starts on 1st October and runs to 31st September the following year.

In [5]:
daily['Date']=pd.to_datetime(daily['Date'],format="%Y-%m-%d")#"%d/%m/%Y")
daily=daily.set_index(pd.DatetimeIndex(daily['Date']))


In [6]:
daily['Year']=daily.index.year
daily['Month']=daily.index.month
daily['Day']=daily.index.day
daily['MonthId']=daily['Year']+daily['Month']/100
daily['WaterYear']=daily.apply((lambda x: (x['Year'] if x['Month']>=10 else x['Year']-1)),axis=1)
daily['WaterMonth']=daily.apply((lambda x: (x['Month']-9 if x['Month']>=10 else x['Month']+3)),axis=1)
daily['WaterDay']=daily.apply(lambda x: (x['Date']-pd.Timestamp(x['WaterYear'], 10, 1)).days+1,axis=1)
daily['WaterWeek']=np.floor((daily['WaterDay']-1)/7)+1
daily['Volume']=daily['Flow']*60*60*24/(1000*1000*1000)
daily=daily.astype({'WaterWeek': 'int32'})
daily=daily.drop('Date',axis=1)
daily.head(8)

Unnamed: 0_level_0,LaggedDate,VicFalls,Conversion,Flow,Exceedance,Year,Month,Day,MonthId,WaterYear,WaterMonth,WaterDay,WaterWeek,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1924-10-01,1924-10-12,100.0,1.114075,111.40748,0.999,1924,10,1,1924.1,1924,1,1,1,0.009626
1924-10-02,1924-10-13,100.0,1.114075,111.40748,0.999,1924,10,2,1924.1,1924,1,2,1,0.009626
1924-10-03,1924-10-14,100.0,1.114075,111.40748,0.999,1924,10,3,1924.1,1924,1,3,1,0.009626
1924-10-04,1924-10-15,100.0,1.114075,111.40748,0.999,1924,10,4,1924.1,1924,1,4,1,0.009626
1924-10-05,1924-10-16,100.0,1.114075,111.40748,0.999,1924,10,5,1924.1,1924,1,5,1,0.009626
1924-10-06,1924-10-17,100.0,1.114075,111.40748,0.999,1924,10,6,1924.1,1924,1,6,1,0.009626
1924-10-07,1924-10-18,100.0,1.114075,111.40748,0.999,1924,10,7,1924.1,1924,1,7,1,0.009626
1924-10-08,1924-10-19,100.0,1.114075,111.40748,0.999,1924,10,8,1924.1,1924,1,8,2,0.009626


In [7]:
daily['Flow_difference']=np.abs(daily['Flow']-daily['Flow'].shift(1))

## Setup the Monthly Data

Load the monthly data.

In [8]:

monthly=daily.groupby(['MonthId','Year','Month']).size().to_frame(name="Days").reset_index(['Month','Year'])
monthly

Unnamed: 0_level_0,Year,Month,Days
MonthId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1924.10,1924,10,31
1924.11,1924,11,30
1924.12,1924,12,31
1925.01,1925,1,31
1925.02,1925,2,28
...,...,...,...
2017.05,2017,5,31
2017.06,2017,6,30
2017.07,2017,7,31
2017.08,2017,8,31


Set the index and add additional columns for later use.

In [9]:

monthly['Day']=1
monthly['DateStart']=pd.to_datetime(monthly[['Year','Month','Day']])
monthly=monthly.drop('Day',1)


monthly['WaterYear']=monthly.apply((lambda x: (x['Year'] if x['Month']>=10 else x['Year']-1)),axis=1)
monthly['WaterMonth']=monthly.apply((lambda x: (x['Month']-9 if x['Month']>=10 else x['Month']+3)),axis=1)


monthly

Unnamed: 0_level_0,Year,Month,Days,DateStart,WaterYear,WaterMonth
MonthId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1924.10,1924,10,31,1924-10-01,1924,1
1924.11,1924,11,30,1924-11-01,1924,2
1924.12,1924,12,31,1924-12-01,1924,3
1925.01,1925,1,31,1925-01-01,1924,4
1925.02,1925,2,28,1925-02-01,1924,5
...,...,...,...,...,...,...
2017.05,2017,5,31,2017-05-01,2016,8
2017.06,2017,6,30,2017-06-01,2016,9
2017.07,2017,7,31,2017-07-01,2016,10
2017.08,2017,8,31,2017-08-01,2016,11


## Monthly flow summaries

Add flow summaries to the monthly data

In [10]:
monthly['Flow_min']=daily[['MonthId','Flow']].groupby('MonthId').min()
monthly['Flow_mean']=daily[['MonthId','Flow']].groupby('MonthId').mean()
monthly['Flow_median']=daily[['MonthId','Flow']].groupby('MonthId').median()
monthly['Flow_max']=daily[['MonthId','Flow']].groupby('MonthId').max()
monthly['Volume']=daily[['MonthId','Volume']].groupby('MonthId').sum()
monthly['Flow_range']=monthly['Flow_max']-monthly['Flow_min']
monthly[['Flow_min','Flow_mean','Flow_median','Flow_max','Flow_range']]
monthly

Unnamed: 0_level_0,Year,Month,Days,DateStart,WaterYear,WaterMonth,Flow_min,Flow_mean,Flow_median,Flow_max,Volume,Flow_range
MonthId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1924.10,1924,10,31,1924-10-01,1924,1,111.407480,116.382415,111.407480,126.773143,0.311719,15.365663
1924.11,1924,11,30,1924-11-01,1924,2,128.245150,153.980092,145.350957,191.219380,0.399116,62.974230
1924.12,1924,12,31,1924-12-01,1924,3,192.895237,275.677198,271.272511,440.033111,0.738374,247.137874
1925.01,1925,1,31,1925-01-01,1924,4,417.857772,692.738673,595.709061,1182.748622,1.855431,764.890850
1925.02,1925,2,28,1925-02-01,1924,5,1107.711819,1193.678452,1151.412417,1436.997257,2.887747,329.285437
...,...,...,...,...,...,...,...,...,...,...,...,...
2017.05,2017,5,31,2017-05-01,2016,8,1638.344662,2195.626922,2162.373750,2772.806184,5.880767,1134.461522
2017.06,2017,6,30,2017-06-01,2016,9,677.742437,1118.647129,1120.950733,1612.280052,2.899533,934.537615
2017.07,2017,7,31,2017-07-01,2016,10,381.711656,482.692384,466.039270,648.880158,1.292843,267.168501
2017.08,2017,8,31,2017-08-01,2016,11,264.066213,318.145603,320.059362,378.109947,0.852121,114.043734


## Annual Flow

Create a data table for annual (water year) summaries and populate.

In [11]:
yearly=monthly[['WaterYear']].groupby('WaterYear').count()

yearly['Flow_min']=daily[['WaterYear','Flow']].groupby('WaterYear').min()
yearly['Flow_median']=daily[['WaterYear','Flow']].groupby('WaterYear').median()
yearly['Flow_mean']=daily[['WaterYear','Flow']].groupby('WaterYear').mean()
yearly['Flow_max']=daily[['WaterYear','Flow']].groupby('WaterYear').max()
yearly['Flow_range']=yearly['Flow_max']-yearly['Flow_min']
yearly['Volume']=monthly[['WaterYear','Volume']].groupby('WaterYear').sum()

yearly

Unnamed: 0_level_0,Flow_min,Flow_median,Flow_mean,Flow_max,Flow_range,Volume
WaterYear,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1924,111.407480,562.828075,992.089057,3452.248382,3340.840902,31.286520
1925,158.541897,494.681083,1111.873684,4501.386001,4342.844104,35.064049
1926,192.895237,562.828075,990.425137,3313.040641,3120.145404,31.234047
1927,179.826510,512.117820,848.055028,2261.108088,2081.281577,26.817535
1928,195.207704,389.133936,564.473244,1589.152359,1393.944655,17.801228
...,...,...,...,...,...,...
2012,223.273423,689.582006,1336.562700,3614.802471,3391.529049,42.149841
2013,195.061590,808.999209,1340.578429,3574.946119,3379.884528,42.276481
2014,208.780681,542.376282,697.100110,1460.253290,1251.472609,21.983749
2015,176.218173,481.566763,1013.409963,2975.524395,2799.306221,32.046455


In [12]:
Flow_mean_mean=yearly['Flow_mean'].describe()['mean']
Flow_max_mean=yearly['Flow_max'].describe()['mean']
Flow_min_mean=yearly['Flow_min'].describe()['mean']
Volume_mean=yearly['Volume'].describe()['mean']


yearly['Flow_mean_pct_var']=(yearly['Flow_mean']-Flow_mean_mean)/Flow_mean_mean*100
yearly['Flow_max_pct_var']=(yearly['Flow_max']-Flow_max_mean)/Flow_max_mean*100
yearly['Flow_min_pct_var']=(yearly['Flow_min']-Flow_min_mean)/Flow_min_mean*100
yearly['Volume_pct_var']=(yearly['Volume']-Volume_mean)/Volume_mean*100


Flow_mean_mean

1096.6000969319757

In [13]:
yearly['Flow_mean_5yr_mvCoefVar']=yearly['Flow_mean'].rolling(5,center=True).std()/Flow_mean_mean*100
yearly.loc[:,['Flow_mean_pct_var','Volume_pct_var']]

Unnamed: 0_level_0,Flow_mean_pct_var,Volume_pct_var
WaterYear,Unnamed: 1_level_1,Unnamed: 2_level_1
1924,-9.530461,-9.592773
1925,1.392813,1.322977
1926,-9.682195,-9.744403
1927,-22.665060,-22.506595
1928,-48.525151,-48.560605
...,...,...
2012,21.882417,21.798469
2013,22.248615,22.164415
2014,-36.430782,-36.474566
2015,-7.586187,-7.396824


In [14]:
daily.head(2)

Unnamed: 0_level_0,LaggedDate,VicFalls,Conversion,Flow,Exceedance,Year,Month,Day,MonthId,WaterYear,WaterMonth,WaterDay,WaterWeek,Volume,Flow_difference
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
1924-10-01,1924-10-12,100.0,1.114075,111.40748,0.999,1924,10,1,1924.1,1924,1,1,1,0.009626,
1924-10-02,1924-10-13,100.0,1.114075,111.40748,0.999,1924,10,2,1924.1,1924,1,2,1,0.009626,0.0


In [15]:
mins=daily[['Year','Flow']].groupby('Year').idxmin()
mins=mins.reset_index()
mins['DaysToStart']=mins.apply(lambda x: x['Flow']-pd.Timestamp(datetime.date(x['Year'], 10, 1)),axis=1)
mins=mins.set_index('Year')
yearly['DaysToStart']=mins['DaysToStart']
yearly['SeasonStart']=mins['Flow']
yearly

Unnamed: 0_level_0,Flow_min,Flow_median,Flow_mean,Flow_max,Flow_range,Volume,Flow_mean_pct_var,Flow_max_pct_var,Flow_min_pct_var,Volume_pct_var,Flow_mean_5yr_mvCoefVar,DaysToStart,SeasonStart
WaterYear,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
1924,111.407480,562.828075,992.089057,3452.248382,3340.840902,31.286520,-9.530461,-4.579856,-48.229072,-9.592773,,0 days,1924-10-01
1925,158.541897,494.681083,1111.873684,4501.386001,4342.844104,35.064049,1.392813,24.418307,-26.325763,1.322977,,29 days,1925-10-30
1926,192.895237,562.828075,990.425137,3313.040641,3120.145404,31.234047,-9.682195,-8.427558,-10.361805,-9.744403,19.172583,23 days,1926-10-24
1927,179.826510,512.117820,848.055028,2261.108088,2081.281577,26.817535,-22.665060,-37.502974,-16.434827,-22.506595,19.697328,8 days,1927-10-09
1928,195.207704,389.133936,564.473244,1589.152359,1393.944655,17.801228,-48.525151,-56.075830,-9.287204,-48.560605,15.786859,28 days,1928-10-29
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2012,223.273423,689.582006,1336.562700,3614.802471,3391.529049,42.149841,21.882417,-0.086861,3.754903,21.798469,32.543527,22 days,2012-10-23
2013,195.061590,808.999209,1340.578429,3574.946119,3379.884528,42.276481,22.248615,-1.188491,-9.355103,22.164415,24.717243,25 days,2013-10-26
2014,208.780681,542.376282,697.100110,1460.253290,1251.472609,21.983749,-36.430782,-59.638600,-2.979857,-36.474566,24.290943,25 days,2014-10-26
2015,176.218173,481.566763,1013.409963,2975.524395,2799.306221,32.046455,-7.586187,-17.756507,-18.111617,-7.396824,,30 days,2015-10-31


In [16]:
annual_fdcs=pd.DataFrame(index=np.arange(0,1.01,0.01),columns=np.arange(yearly.index.min(),yearly.index.max()+1,1))
for col in annual_fdcs.columns:
    annual_fdcs[col]=np.percentile(daily.loc[daily['WaterYear']==col]['Flow'],((1-annual_fdcs.index)*100))

annual_fdcs

Unnamed: 0,1924,1925,1926,1927,1928,1929,1930,1931,1932,1933,...,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016
0.00,3452.248382,4501.386001,3313.040641,2261.108088,1589.152359,2187.823513,3355.917451,3803.114618,1617.352042,5588.707650,...,3367.275472,5796.211608,5149.143298,4355.928308,3544.189347,3614.802471,3574.946119,1460.253290,2975.524395,3889.991232
0.01,3441.005216,4444.488798,3210.822806,2261.108088,1563.480637,2155.208957,3313.040641,3773.148260,1589.152359,5438.818601,...,3275.623165,5741.625810,5110.454412,4355.928308,3526.092281,3580.525700,3539.407167,1460.253290,2975.524395,3857.302531
0.02,3343.911944,4385.141126,3103.529575,2249.359418,1544.907100,2094.979557,3244.545456,3670.338944,1523.864286,5299.444589,...,3184.610484,5687.283314,5038.131582,4305.723319,3496.298115,3539.883375,3508.024769,1448.576499,2971.684495,3825.614209
0.03,3313.040641,4355.316528,3006.520556,2217.372658,1511.071200,2043.723039,3187.836794,3639.447142,1511.071200,5068.368068,...,3156.889385,5586.851353,5019.068676,4269.187465,3466.295110,3528.810877,3484.505752,1438.767729,2962.724728,3750.866579
0.04,3228.508182,4305.170853,2867.157776,2217.372658,1466.370536,1958.747262,3116.989107,3600.306705,1474.502881,4838.446655,...,3138.658467,5473.424311,4984.095161,4235.066569,3384.867235,3526.468702,3462.403074,1430.124168,2962.724728,3572.412990
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0.96,111.407480,166.264580,199.092559,185.778706,220.530496,192.895237,220.530496,236.194334,195.207704,171.137982,...,249.791314,194.993676,220.039338,232.182724,256.647065,233.614046,201.936335,232.182724,187.940800,176.218173
0.97,111.407480,166.264580,195.207704,185.778706,201.170113,189.239605,219.852172,236.194334,195.207704,171.137982,...,246.848965,190.215195,216.508407,232.182724,251.752880,230.275563,200.552607,226.907998,184.495221,166.873518
0.98,111.407480,164.544380,195.207704,185.778706,201.170113,188.865366,202.164813,236.194334,194.795674,163.875413,...,244.406165,185.228146,216.508407,230.568395,251.752880,226.907998,200.013130,226.907998,178.913516,166.873518
0.99,111.407480,162.781303,194.795674,182.700509,198.433482,188.719829,201.170113,236.194334,192.895237,162.781303,...,242.318356,181.568679,216.508407,226.907998,243.549898,224.727292,196.059633,223.039628,177.714619,163.040615


In [17]:
monthly_fdcs=pd.DataFrame(index=np.arange(0,1.01,0.01),columns=[1,2,3,4,5,6,7,8,9,10,11,12])
for col in monthly_fdcs.columns:
    monthly_fdcs[col]=np.percentile(daily.loc[daily['WaterMonth']==col]['Flow'],((1-monthly_fdcs.index)*100))

monthly_fdcs

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,11,12
0.00,470.178949,647.648985,1127.726717,3886.053218,9912.101075,9530.170695,8539.833006,5672.879600,3920.473588,1871.701607,905.421193,550.998323
0.01,422.469819,542.687385,885.109448,2200.991559,5278.010081,8074.134868,6310.128475,5259.545196,2756.637637,1383.578136,703.233714,477.592094
0.02,399.858778,494.681083,817.724683,1855.386768,4532.094175,7030.940877,5965.180906,4498.050921,2489.877515,1199.561484,637.606621,459.260443
0.03,377.116761,468.154343,768.446072,1570.658646,4433.498867,6564.701613,5668.636825,4151.471898,2340.978515,1143.134165,606.736851,442.799181
0.04,360.348801,442.799181,733.851289,1449.727705,3987.361667,6056.465216,5452.641165,3970.811433,2266.016442,1096.498822,583.003491,435.496885
...,...,...,...,...,...,...,...,...,...,...,...,...
0.96,155.869765,166.873518,247.033791,402.902377,556.219534,768.446072,924.683994,805.845992,408.004546,299.180211,231.489459,191.219380
0.97,128.198030,162.781303,240.031205,391.231916,527.337598,739.647543,885.109448,746.380917,380.836942,284.650183,224.671047,183.079094
0.98,124.486097,150.346644,234.767880,373.010125,477.348487,709.845130,841.583297,666.806324,350.219205,265.426549,212.051449,169.193340
0.99,111.407480,142.437600,205.798914,325.660796,455.655156,608.795064,801.025144,550.490693,319.808854,246.195424,199.092559,155.857619


In [18]:
yearly['MeanQ3070']=annual_fdcs.loc[(annual_fdcs.index>=0.3) & (annual_fdcs.index<=0.7)].mean()
yearly

Unnamed: 0_level_0,Flow_min,Flow_median,Flow_mean,Flow_max,Flow_range,Volume,Flow_mean_pct_var,Flow_max_pct_var,Flow_min_pct_var,Volume_pct_var,Flow_mean_5yr_mvCoefVar,DaysToStart,SeasonStart,MeanQ3070
WaterYear,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1924,111.407480,562.828075,992.089057,3452.248382,3340.840902,31.286520,-9.530461,-4.579856,-48.229072,-9.592773,,0 days,1924-10-01,663.910873
1925,158.541897,494.681083,1111.873684,4501.386001,4342.844104,35.064049,1.392813,24.418307,-26.325763,1.322977,,29 days,1925-10-30,560.136645
1926,192.895237,562.828075,990.425137,3313.040641,3120.145404,31.234047,-9.682195,-8.427558,-10.361805,-9.744403,19.172583,23 days,1926-10-24,683.462554
1927,179.826510,512.117820,848.055028,2261.108088,2081.281577,26.817535,-22.665060,-37.502974,-16.434827,-22.506595,19.697328,8 days,1927-10-09,553.193737
1928,195.207704,389.133936,564.473244,1589.152359,1393.944655,17.801228,-48.525151,-56.075830,-9.287204,-48.560605,15.786859,28 days,1928-10-29,425.954365
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2012,223.273423,689.582006,1336.562700,3614.802471,3391.529049,42.149841,21.882417,-0.086861,3.754903,21.798469,32.543527,22 days,2012-10-23,861.912642
2013,195.061590,808.999209,1340.578429,3574.946119,3379.884528,42.276481,22.248615,-1.188491,-9.355103,22.164415,24.717243,25 days,2013-10-26,979.811925
2014,208.780681,542.376282,697.100110,1460.253290,1251.472609,21.983749,-36.430782,-59.638600,-2.979857,-36.474566,24.290943,25 days,2014-10-26,592.878045
2015,176.218173,481.566763,1013.409963,2975.524395,2799.306221,32.046455,-7.586187,-17.756507,-18.111617,-7.396824,,30 days,2015-10-31,561.336942


In [19]:
fdc=pd.DataFrame({'Exceedance': np.arange(0,1.001,0.001)}).set_index('Exceedance')
fdc['Mean']=np.percentile(yearly['Flow_mean'],((1-fdc.index)*100))
fdc['Max']=np.percentile(yearly['Flow_max'],((1-fdc.index)*100))
fdc['Min']=np.percentile(yearly['Flow_min'],((1-fdc.index)*100))
fdc['Median']=np.percentile(yearly['Flow_median'],((1-fdc.index)*100))
fdc['MeanQ3070']=np.percentile(yearly['MeanQ3070'],((1-fdc.index)*100))

fdc

Unnamed: 0_level_0,Mean,Max,Min,Median,MeanQ3070
Exceedance,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0.000,2333.913589,9912.101075,338.321042,1089.456049,1258.612518
0.001,2316.919094,9842.336791,338.059005,1085.876604,1251.665732
0.002,2299.924599,9772.572506,337.796969,1082.297159,1244.718947
0.003,2282.930104,9702.808222,337.534932,1078.717713,1237.772162
0.004,2265.935608,9633.043938,337.272895,1075.138268,1230.825377
...,...,...,...,...,...
0.996,378.412037,850.976129,105.899226,271.948132,295.240979
0.997,373.785825,843.050582,105.097392,271.526661,293.716393
0.998,369.159613,835.125035,104.295558,271.105189,292.191808
0.999,364.533402,827.199488,103.493723,270.683718,290.667222


In [20]:
yearly['ExceedanceMean']=pd.merge_asof(yearly.reset_index().sort_values('Flow_mean'),fdc.reset_index().sort_values('Mean'),left_on='Flow_mean',right_on='Mean').set_index('WaterYear')['Exceedance']
yearly['ExceedanceMedian']=pd.merge_asof(yearly.reset_index().sort_values('Flow_median'),fdc.reset_index().sort_values('Median'),left_on='Flow_median',right_on='Median').set_index('WaterYear')['Exceedance']
yearly['ExceedanceMeanQ3070']=pd.merge_asof(yearly.reset_index().sort_values('MeanQ3070'),fdc.reset_index().sort_values('MeanQ3070'),left_on='MeanQ3070',right_on='MeanQ3070').set_index('WaterYear')['Exceedance']
yearly

Unnamed: 0_level_0,Flow_min,Flow_median,Flow_mean,Flow_max,Flow_range,Volume,Flow_mean_pct_var,Flow_max_pct_var,Flow_min_pct_var,Volume_pct_var,Flow_mean_5yr_mvCoefVar,DaysToStart,SeasonStart,MeanQ3070,ExceedanceMean,ExceedanceMedian,ExceedanceMeanQ3070
WaterYear,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
1924,111.407480,562.828075,992.089057,3452.248382,3340.840902,31.286520,-9.530461,-4.579856,-48.229072,-9.592773,,0 days,1924-10-01,663.910873,0.566,0.577,0.490
1925,158.541897,494.681083,1111.873684,4501.386001,4342.844104,35.064049,1.392813,24.418307,-26.325763,1.322977,,29 days,1925-10-30,560.136645,0.435,0.729,0.740
1926,192.895237,562.828075,990.425137,3313.040641,3120.145404,31.234047,-9.682195,-8.427558,-10.361805,-9.744403,19.172583,23 days,1926-10-24,683.462554,0.577,0.577,0.457
1927,179.826510,512.117820,848.055028,2261.108088,2081.281577,26.817535,-22.665060,-37.502974,-16.434827,-22.506595,19.697328,8 days,1927-10-09,553.193737,0.696,0.716,0.772
1928,195.207704,389.133936,564.473244,1589.152359,1393.944655,17.801228,-48.525151,-56.075830,-9.287204,-48.560605,15.786859,28 days,1928-10-29,425.954365,0.935,0.957,0.946
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2012,223.273423,689.582006,1336.562700,3614.802471,3391.529049,42.149841,21.882417,-0.086861,3.754903,21.798469,32.543527,22 days,2012-10-23,861.912642,0.250,0.291,0.196
2013,195.061590,808.999209,1340.578429,3574.946119,3379.884528,42.276481,22.248615,-1.188491,-9.355103,22.164415,24.717243,25 days,2013-10-26,979.811925,0.240,0.149,0.109
2014,208.780681,542.376282,697.100110,1460.253290,1251.472609,21.983749,-36.430782,-59.638600,-2.979857,-36.474566,24.290943,25 days,2014-10-26,592.878045,0.816,0.613,0.653
2015,176.218173,481.566763,1013.409963,2975.524395,2799.306221,32.046455,-7.586187,-17.756507,-18.111617,-7.396824,,30 days,2015-10-31,561.336942,0.544,0.772,0.729


## Calendar months

Produce summaries of flow by calendar month

In [21]:
calmonthly=pd.DataFrame({'WaterMonth': [1,2,3,4,5,6,7,8,9,10,11,12],'MonthName': ['Oct','Nov','Dec','Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep'], 'Month':[10,11,12,1,2,3,4,5,6,7,8,9]})
calmonthly=calmonthly.set_index('WaterMonth')
calmonthly['Flow_min']=daily[['WaterMonth','Flow']].groupby('WaterMonth').min()
calmonthly['Flow_mean']=daily[['WaterMonth','Flow']].groupby('WaterMonth').mean()
calmonthly['Flow_median']=daily[['WaterMonth','Flow']].groupby('WaterMonth').median()
calmonthly['Flow_max']=daily[['WaterMonth','Flow']].groupby('WaterMonth').max()
calmonthly['Flow_std']=daily[['WaterMonth','Flow']].groupby('WaterMonth').std()
calmonthly['Flow_coefvar']=(calmonthly['Flow_std']/calmonthly['Flow_mean']*100).round(1)
calmonthly['Flow_difference_median']=daily[['WaterMonth','Flow_difference']].groupby('WaterMonth').median()
calmonthly['Flow_difference_mean']=daily[['WaterMonth','Flow_difference']].groupby('WaterMonth').mean()

calmonthly

Unnamed: 0_level_0,MonthName,Month,Flow_min,Flow_mean,Flow_median,Flow_max,Flow_std,Flow_coefvar,Flow_difference_median,Flow_difference_mean
WaterMonth,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1,Oct,10,104.715941,238.660886,226.907998,470.178949,60.062263,25.2,0.379423,1.978081
2,Nov,11,102.691889,275.866045,261.696065,647.648985,80.762929,29.3,2.677874,4.306122
3,Dec,12,189.239605,432.274175,409.106916,1127.726717,138.405394,32.0,5.474372,8.653071
4,Jan,1,271.272511,719.155106,630.112599,3886.053218,346.206105,48.1,8.250021,16.105198
5,Feb,2,389.133936,1347.302149,971.432166,9912.101075,1164.334969,86.4,14.228588,34.257749
6,Mar,3,512.11782,2530.627517,2169.222344,9530.170695,1655.902944,65.4,27.478407,55.777539
7,Apr,4,684.68935,2965.046253,2959.582773,8539.833006,1354.222393,45.7,23.348842,35.992421
8,May,5,420.574478,2219.353143,2155.208957,5672.8796,924.20238,41.6,30.883235,35.244076
9,Jun,6,271.272511,1202.711506,1127.726717,3920.473588,551.403835,45.8,27.790675,29.777271
10,Jul,7,222.282836,589.266363,532.519223,1871.701607,236.555563,40.1,8.060916,11.462868


## Calendar Month Flow exceedance

Flow exceedance values by calendar month.

P90 is flow which is exceeded for 90% of the time.

In [22]:

calmonthly['Flow_P95']=monthly[['WaterMonth','Flow_mean']].groupby('WaterMonth').quantile(0.05)
calmonthly['Flow_P90']=monthly[['WaterMonth','Flow_mean']].groupby('WaterMonth').quantile(0.1)
#calmonthly['Flow_P80']=monthly[['Month','Flow_mean']].groupby('Month').quantile(0.2)
calmonthly['Flow_P75']=monthly[['WaterMonth','Flow_mean']].groupby('WaterMonth').quantile(0.25)
calmonthly['Flow_P50']=monthly[['WaterMonth','Flow_mean']].groupby('WaterMonth').quantile(0.5)
calmonthly['Flow_P25']=monthly[['WaterMonth','Flow_mean']].groupby('WaterMonth').quantile(0.75)
#calmonthly['Flow_P20']=monthly[['Month','Flow_mean']].groupby('Month').quantile(0.8)
calmonthly['Flow_P10']=monthly[['WaterMonth','Flow_mean']].groupby('WaterMonth').quantile(0.9)
calmonthly['Flow_P05']=monthly[['WaterMonth','Flow_mean']].groupby('WaterMonth').quantile(0.95)

calmonthly

Unnamed: 0_level_0,MonthName,Month,Flow_min,Flow_mean,Flow_median,Flow_max,Flow_std,Flow_coefvar,Flow_difference_median,Flow_difference_mean,Flow_P95,Flow_P90,Flow_P75,Flow_P50,Flow_P25,Flow_P10,Flow_P05
WaterMonth,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
1,Oct,10,104.715941,238.660886,226.907998,470.178949,60.062263,25.2,0.379423,1.978081,165.825897,174.06847,198.854712,228.553944,274.379442,308.303716,342.900956
2,Nov,11,102.691889,275.866045,261.696065,647.648985,80.762929,29.3,2.677874,4.306122,177.246956,200.382409,220.49841,265.415353,310.071792,375.766476,409.987002
3,Dec,12,189.239605,432.274175,409.106916,1127.726717,138.405394,32.0,5.474372,8.653071,289.694551,307.73154,346.646602,402.232751,504.355164,563.635871,686.821342
4,Jan,1,271.272511,719.155106,630.112599,3886.053218,346.206105,48.1,8.250021,16.105198,436.077612,478.735213,544.047768,620.581304,811.898477,998.431259,1316.035451
5,Feb,2,389.133936,1347.302149,971.432166,9912.101075,1164.334969,86.4,14.228588,34.257749,627.376418,691.6486,809.232195,987.329902,1419.926149,2480.077173,3200.961332
6,Mar,3,512.11782,2530.627517,2169.222344,9530.170695,1655.902944,65.4,27.478407,55.777539,850.552377,912.982777,1185.55643,2339.768363,3122.687389,4347.734066,5638.633267
7,Apr,4,684.68935,2965.046253,2959.582773,8539.833006,1354.222393,45.7,23.348842,35.992421,1084.109875,1258.243124,1925.274679,2959.610071,3698.135039,4655.112868,5104.73868
8,May,5,420.574478,2219.353143,2155.208957,5672.8796,924.20238,41.6,30.883235,35.244076,877.28009,1129.552309,1684.940273,2236.295172,2674.281341,3218.235479,3622.938224
9,Jun,6,271.272511,1202.711506,1127.726717,3920.473588,551.403835,45.8,27.790675,29.777271,450.378826,631.860811,855.496133,1178.297558,1467.250107,1814.091609,1973.805875
10,Jul,7,222.282836,589.266363,532.519223,1871.701607,236.555563,40.1,8.060916,11.462868,327.23814,370.195596,449.489093,543.299981,690.345489,848.615727,981.384016


## Prepare the Representative Years Summaries

In [23]:
selected = pd.read_csv(input_data + "selected_years.csv").rename(columns={"Year": "WaterYear"}).set_index('WaterYear')
selected['Flow_min']=yearly['Flow_min']
selected['Flow_mean']=yearly['Flow_mean']
selected['Flow_max']=yearly['Flow_max']
selected['Volume']=yearly['Volume']
selected['ExceedanceMean']=yearly['ExceedanceMean']
selected['ExceedanceMedian']=yearly['ExceedanceMedian']
selected['ExceedanceMeanQ3070']=yearly['ExceedanceMeanQ3070']


selected

Unnamed: 0_level_0,Class,Flow_Exceedance,Flow_min,Flow_mean,Flow_max,Volume,ExceedanceMean,ExceedanceMedian,ExceedanceMeanQ3070
WaterYear,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1967,Very Wet,Q3,256.682789,1845.080784,5513.419394,58.345883,0.044,0.063,0.022
2013,Wet,Q12,195.06159,1340.578429,3574.946119,42.276481,0.24,0.149,0.109
2002,Median,Q50,193.637004,1072.800093,3846.356144,33.831824,0.5,0.446,0.479
1990,Dry,Q90,199.092559,770.97938,2293.963201,24.313606,0.772,0.914,0.881
1996,Very Dry,Q97,102.691889,547.165449,1399.622528,17.25541,0.946,0.968,0.957


In [24]:
flow_fdc=pd.DataFrame({'Exceedance': np.arange(0,1.001,0.001)}).set_index('Exceedance')
flow_fdc['Flow']=np.percentile(daily['Flow'],((1-flow_fdc.index)*100))
flow_fdc

Unnamed: 0_level_0,Flow
Exceedance,Unnamed: 1_level_1
0.000,9912.101075
0.001,8829.697418
0.002,7890.806381
0.003,7005.060692
0.004,6479.994666
...,...
0.996,142.406007
0.997,130.554212
0.998,124.486097
0.999,111.407480


In [25]:
floods = pd.read_csv(input_data + "flood_return.csv").set_index('ReturnYears')
floods[['LastDate','WaterYear']]=pd.merge_asof(daily.reset_index().sort_values('Flow'),floods.reset_index(),left_on='Flow',right_on='Flow')[['Date','WaterYear','ReturnYears']].groupby('ReturnYears').max()
floods['YearsSince']=2019-floods['WaterYear']
floods


Unnamed: 0_level_0,Flow,LastDate,WaterYear,YearsSince
ReturnYears,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2,3418.0,2017-04-17,2016.0,3.0
5,5124.0,2010-04-26,2009.0,10.0
10,6218.0,1978-04-23,1977.0,42.0
15,6724.0,1969-04-22,1968.0,51.0
20,7231.0,1969-04-18,1968.0,51.0
50,8489.0,1969-04-05,1968.0,51.0
100,9395.0,1958-03-01,1957.0,62.0
200,10272.0,NaT,,
500,11391.0,NaT,,
1000,12212.0,NaT,,


In [26]:
for flood in floods.reset_index().itertuples():
    if flood.ReturnYears<=100:     
        floods.at[flood.ReturnYears,'MeanDays']=daily.loc[daily['Flow']>=flood.Flow].groupby('WaterYear').count().mean()['Flow']
floods

Unnamed: 0_level_0,Flow,LastDate,WaterYear,YearsSince,MeanDays
ReturnYears,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2,3418.0,2017-04-17,2016.0,3.0,44.5
5,5124.0,2010-04-26,2009.0,10.0,25.190476
10,6218.0,1978-04-23,1977.0,42.0,33.6
15,6724.0,1969-04-22,1968.0,51.0,30.25
20,7231.0,1969-04-18,1968.0,51.0,30.333333
50,8489.0,1969-04-05,1968.0,51.0,15.666667
100,9395.0,1958-03-01,1957.0,62.0,12.0
200,10272.0,NaT,,,
500,11391.0,NaT,,,
1000,12212.0,NaT,,,


## Weekly

In [27]:
daily.head(2)

Unnamed: 0_level_0,LaggedDate,VicFalls,Conversion,Flow,Exceedance,Year,Month,Day,MonthId,WaterYear,WaterMonth,WaterDay,WaterWeek,Volume,Flow_difference
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
1924-10-01,1924-10-12,100.0,1.114075,111.40748,0.999,1924,10,1,1924.1,1924,1,1,1,0.009626,
1924-10-02,1924-10-13,100.0,1.114075,111.40748,0.999,1924,10,2,1924.1,1924,1,2,1,0.009626,0.0


In [38]:
weekly=daily.groupby(["WaterYear","WaterWeek"]).mean().drop(['VicFalls','Conversion','Volume','Flow_difference','Month','WaterMonth','Year','MonthId','Day','WaterDay'],axis=1)
weekly=weekly.join(daily.groupby(["WaterYear","WaterWeek"]).agg(    
   Flow_max=('Flow', 'max'),
   Flow_min=('Flow', 'min'), 
   Volume=('Volume', 'sum')
))
weekly['Flow_difference']=np.abs(weekly['Flow']-weekly['Flow'].shift(1))
weekly

Unnamed: 0_level_0,Unnamed: 1_level_0,Flow,Exceedance,Flow_max,Flow_min,Volume,Flow_difference
WaterYear,WaterWeek,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1924,1,111.407480,0.999000,111.407480,111.407480,0.067379,
1924,2,111.407480,0.999000,111.407480,111.407480,0.067379,0.000000
1924,3,112.952876,0.999000,122.225257,111.407480,0.068314,1.545397
1924,4,125.656246,0.998000,126.773143,124.486097,0.075997,12.703370
1924,5,129.601800,0.997571,137.637775,125.962097,0.078383,3.945554
...,...,...,...,...,...,...,...
2016,49,250.464258,0.873286,259.367845,240.031205,0.151481,20.792553
2016,50,226.821410,0.915571,234.738655,220.039338,0.137182,23.642848
2016,51,215.657828,0.933429,220.039338,209.678917,0.130430,11.163582
2016,52,200.905599,0.953571,203.023550,200.552607,0.121508,14.752229


In [40]:
weekly.groupby(["WaterWeek"]).quantile(0.5)

Unnamed: 0_level_0,Flow,Exceedance,Flow_max,Flow_min,Volume,Flow_difference
WaterWeek,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,242.002583,0.885429,246.195424,236.194334,0.146363,6.683368
2,230.686445,0.906143,236.194334,222.282836,0.139519,8.914078
3,222.267266,0.922571,226.907998,220.530496,0.134427,7.957321
4,224.613579,0.918,229.365085,222.282836,0.135846,6.705832
5,226.998574,0.912143,234.76788,224.671047,0.137289,7.201642
6,238.819201,0.891143,249.79918,234.738655,0.144438,10.778844
7,258.797652,0.860571,270.262246,249.79918,0.156521,15.692956
8,284.601958,0.809143,295.765464,270.262246,0.172127,20.391229
9,310.643535,0.768857,331.611567,294.723267,0.187877,28.77942
10,352.146395,0.710286,373.010125,335.472818,0.212978,34.674378


In [48]:
waterweeks=weekly['Flow'].groupby(["WaterWeek"]).mean().to_frame()
waterweeks['Flow_P50']=weekly.reset_index()[['WaterWeek','Flow']].groupby(["WaterWeek"]).quantile(0.5)
waterweeks['Flow_P25']=weekly.reset_index()[['WaterWeek','Flow']].groupby(["WaterWeek"]).quantile(0.75)
waterweeks['Flow_P75']=weekly.reset_index()[['WaterWeek','Flow']].groupby(["WaterWeek"]).quantile(0.25)
waterweeks['Flow_P90']=weekly.reset_index()[['WaterWeek','Flow']].groupby(["WaterWeek"]).quantile(0.10)
waterweeks['Flow_P10']=weekly.reset_index()[['WaterWeek','Flow']].groupby(["WaterWeek"]).quantile(0.90)
waterweeks

Unnamed: 0_level_0,Flow,Flow_P50,Flow_P25,Flow_P75,Flow_P90,Flow_P10
WaterWeek,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,247.424567,242.002583,282.694814,206.979197,185.273867,332.674814
2,239.184116,230.686445,270.762559,199.767957,175.849187,321.052465
3,233.340808,222.267266,269.158894,196.059633,168.465652,301.444318
4,234.782733,224.613579,266.899664,193.438219,172.349166,312.289216
5,240.949615,226.998574,274.832185,198.019267,172.171366,324.686626
6,251.805066,238.819201,286.584663,204.064169,178.501496,339.104976
7,269.250848,258.797652,303.360052,217.378812,191.902165,372.393031
8,292.935813,284.601958,330.799756,234.294583,204.032193,397.864494
9,326.443075,310.643535,374.389797,263.462439,234.079908,434.990344
10,367.983624,352.146395,420.28893,292.494298,258.078831,490.386776


## Save the Data

In [49]:
daily.to_csv(output_data + 'ngonye_daily.csv')
monthly.to_csv(output_data + 'ngonye_flow_monthly.csv')
yearly.to_csv(output_data + 'ngonye_flow_yearly.csv')
calmonthly.to_csv(output_data + 'ngonye_flow_calmonthly.csv')
selected.to_csv(output_data + 'ngonye_flow_selected_years.csv')
fdc.to_csv(output_data + 'ngonye_flow_annual_exceedance.csv')
annual_fdcs.to_csv(output_data + 'ngonye_flow_annual_fdcs.csv')
flow_fdc.to_csv(output_data + 'ngonye_flow_fdc.csv')
monthly_fdcs.to_csv(output_data + 'ngonye_monthly_fdc.csv')
floods.to_csv(output_data + 'ngonye_floods.csv')
weekly.to_csv(output_data + 'ngonye_weekly.csv')
waterweeks.to_csv(output_data + 'ngonye_waterweekly.csv')


In [27]:
years=daily['WaterYear'].unique().tolist()

for year in years:
    days=daily.loc[daily.WaterYear==year]
    days.to_csv(output_data + '/years/daily_' + str(year) + '.csv')