# c) Ngonye Falls Flow Analysis

Load the synthetic historic daily flow series for Ngonye and produce various summary statistics for later presentation.

## Inputs

| Data                       | Source                                        | Description                                 |
|----------------------------|-----------------------------------------------|---------------------------------------------|
| ngonye_synthetic.csv  | Notebook: b_synthetic_flow_ngonye |Synthetic daily flow series for Ngonye  Falls 1924/25 - 2016/17  |
| selected_years.csv | Mott MacDonald - Ngonye Falls Hydropower Project - 2018 Feasibility Study Update - Final Report Version D | List of representative selected years |


## Outputs
| File                           | Description                                 |
|--------------------------------|---------------------------------------------|
| ngonye_flow_daily.csv          | Daily flow data  |
| ngonye_flow_monthly.csv        | Flow summaries by month  |
| ngonye_flow_yearly.csv         | Flow summaries by year  |
| ngonye_flow_calmonthly.csv     | Flow summaries by calendar month |
| ngonye_flow_selected_years.csv | Flow summaries for selected representative years  |



## Parameters

In [1]:
input_data='./input_data/'
output_data='./output_data/2020/'

## Libraries

In [2]:
import numpy as np
import pandas as pd
import datetime

## Load the Daily Data

In [3]:
daily = pd.read_csv(output_data + "ngonye_synthetic_2020.csv")
daily.tail(4)

Unnamed: 0,Date,LaggedDate,VicFalls,Conversion,Flow,Exceedance
34927,2020-05-17,2020-05-28,3438.1311,1.035186,3559.1039,0.057
34928,2020-05-18,2020-05-29,3393.3391,1.028636,3490.510716,0.059
34929,2020-05-19,2020-05-30,3304.8137,1.025935,3390.523841,0.063
34930,2020-05-20,2020-05-31,3217.6933,1.031443,3318.867319,0.07


Index by date and add some other columns for later use. 

Add a column for *WaterYear* which starts on 1st October and runs to 31st September the following year.

In [4]:
daily['Date']=pd.to_datetime(daily['Date'],format="%Y-%m-%d")#"%d/%m/%Y")
daily=daily.set_index(pd.DatetimeIndex(daily['Date']))


In [5]:
daily['Year']=daily.index.year
daily['Month']=daily.index.month
daily['Day']=daily.index.day
daily['MonthId']=daily['Year']+daily['Month']/100
daily['WaterYear']=daily.apply((lambda x: (x['Year'] if x['Month']>=10 else x['Year']-1)),axis=1)
daily['WaterMonth']=daily.apply((lambda x: (x['Month']-9 if x['Month']>=10 else x['Month']+3)),axis=1)
daily['WaterDay']=daily.apply(lambda x: (x['Date']-pd.Timestamp(x['WaterYear'], 10, 1)).days+1,axis=1)
daily['WaterWeek']=np.floor((daily['WaterDay']-1)/7)+1
daily['Volume']=daily['Flow']*60*60*24/(1000*1000*1000)
daily=daily.astype({'WaterWeek': 'int32'})
daily=daily.drop('Date',axis=1)
daily.head(8)

Unnamed: 0_level_0,LaggedDate,VicFalls,Conversion,Flow,Exceedance,Year,Month,Day,MonthId,WaterYear,WaterMonth,WaterDay,WaterWeek,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1924-10-01,1924-10-12,100.0,1.407305,140.730461,0.999,1924,10,1,1924.1,1924,1,1,1,0.012159
1924-10-02,1924-10-13,100.0,1.407305,140.730461,0.999,1924,10,2,1924.1,1924,1,2,1,0.012159
1924-10-03,1924-10-14,100.0,1.407305,140.730461,0.999,1924,10,3,1924.1,1924,1,3,1,0.012159
1924-10-04,1924-10-15,100.0,1.407305,140.730461,0.999,1924,10,4,1924.1,1924,1,4,1,0.012159
1924-10-05,1924-10-16,100.0,1.407305,140.730461,0.999,1924,10,5,1924.1,1924,1,5,1,0.012159
1924-10-06,1924-10-17,100.0,1.407305,140.730461,0.999,1924,10,6,1924.1,1924,1,6,1,0.012159
1924-10-07,1924-10-18,100.0,1.407305,140.730461,0.999,1924,10,7,1924.1,1924,1,7,1,0.012159
1924-10-08,1924-10-19,100.0,1.407305,140.730461,0.999,1924,10,8,1924.1,1924,1,8,2,0.012159


In [6]:
daily['Flow_difference']=np.abs(daily['Flow']-daily['Flow'].shift(1))

## Setup the Monthly Data

Load the monthly data.

In [7]:

monthly=daily.groupby(['MonthId','Year','Month']).size().to_frame(name="Days").reset_index(['Month','Year'])
monthly

Unnamed: 0_level_0,Year,Month,Days
MonthId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1924.10,1924,10,31
1924.11,1924,11,30
1924.12,1924,12,31
1925.01,1925,1,31
1925.02,1925,2,28
...,...,...,...
2020.01,2020,1,31
2020.02,2020,2,29
2020.03,2020,3,31
2020.04,2020,4,30


Set the index and add additional columns for later use.

In [8]:

monthly['Day']=1
monthly['DateStart']=pd.to_datetime(monthly[['Year','Month','Day']])
monthly=monthly.drop('Day',1)


monthly['WaterYear']=monthly.apply((lambda x: (x['Year'] if x['Month']>=10 else x['Year']-1)),axis=1)
monthly['WaterMonth']=monthly.apply((lambda x: (x['Month']-9 if x['Month']>=10 else x['Month']+3)),axis=1)


monthly

Unnamed: 0_level_0,Year,Month,Days,DateStart,WaterYear,WaterMonth
MonthId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1924.10,1924,10,31,1924-10-01,1924,1
1924.11,1924,11,30,1924-11-01,1924,2
1924.12,1924,12,31,1924-12-01,1924,3
1925.01,1925,1,31,1925-01-01,1924,4
1925.02,1925,2,28,1925-02-01,1924,5
...,...,...,...,...,...,...
2020.01,2020,1,31,2020-01-01,2019,4
2020.02,2020,2,29,2020-02-01,2019,5
2020.03,2020,3,31,2020-03-01,2019,6
2020.04,2020,4,30,2020-04-01,2019,7


## Monthly flow summaries

Add flow summaries to the monthly data

In [9]:
monthly['Flow_min']=daily[['MonthId','Flow']].groupby('MonthId').min()
monthly['Flow_mean']=daily[['MonthId','Flow']].groupby('MonthId').mean()
monthly['Flow_median']=daily[['MonthId','Flow']].groupby('MonthId').median()
monthly['Flow_max']=daily[['MonthId','Flow']].groupby('MonthId').max()
monthly['Volume']=daily[['MonthId','Volume']].groupby('MonthId').sum()
monthly['Flow_range']=monthly['Flow_max']-monthly['Flow_min']
monthly[['Flow_min','Flow_mean','Flow_median','Flow_max','Flow_range']]
monthly

Unnamed: 0_level_0,Year,Month,Days,DateStart,WaterYear,WaterMonth,Flow_min,Flow_mean,Flow_median,Flow_max,Volume,Flow_range
MonthId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1924.10,1924,10,31,1924-10-01,1924,1,136.426365,140.260227,140.730461,147.267478,0.375673,10.841113
1924.11,1924,11,30,1924-11-01,1924,2,138.842814,155.713684,145.877389,191.818302,0.403610,52.975488
1924.12,1924,12,31,1924-12-01,1924,3,193.853441,278.597425,274.563994,441.309291,0.746195,247.455850
1925.01,1925,1,31,1925-01-01,1924,4,415.093360,704.220852,603.438563,1201.809492,1.886185,786.716132
1925.02,1925,2,28,1925-02-01,1924,5,1136.130420,1216.016599,1183.088802,1436.348946,2.941787,300.218526
...,...,...,...,...,...,...,...,...,...,...,...,...
2020.01,2020,1,31,2020-01-01,2019,4,395.097857,516.568596,522.484418,650.839340,1.383577,255.741483
2020.02,2020,2,29,2020-02-01,2019,5,668.984658,853.254089,816.882894,1263.259054,2.137913,594.274396
2020.03,2020,3,31,2020-03-01,2019,6,1355.107206,3306.205061,3830.078120,4368.092483,8.855340,3012.985277
2020.04,2020,4,30,2020-04-01,2019,7,3788.601993,4293.912312,4350.407672,4744.909910,11.129821,956.307917


## Annual Flow

Create a data table for annual (water year) summaries and populate.

In [10]:
yearly=monthly[['WaterYear']].groupby('WaterYear').count()

yearly['Flow_min']=daily[['WaterYear','Flow']].groupby('WaterYear').min()
yearly['Flow_median']=daily[['WaterYear','Flow']].groupby('WaterYear').median()
yearly['Flow_mean']=daily[['WaterYear','Flow']].groupby('WaterYear').mean()
yearly['Flow_max']=daily[['WaterYear','Flow']].groupby('WaterYear').max()
yearly['Flow_range']=yearly['Flow_max']-yearly['Flow_min']
yearly['Volume']=monthly[['WaterYear','Volume']].groupby('WaterYear').sum()

yearly

Unnamed: 0_level_0,Flow_min,Flow_median,Flow_mean,Flow_max,Flow_range,Volume
WaterYear,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1924,136.426365,572.393700,998.639190,3409.264637,3272.838272,31.493085
1925,161.301294,501.467555,1122.608229,4664.810479,4503.509184,35.402573
1926,193.853441,572.393700,995.784115,3302.442827,3108.589385,31.403048
1927,178.619675,514.847078,851.635409,2247.955945,2069.336270,26.930756
1928,202.597254,388.497980,570.892297,1581.297711,1378.700457,18.003659
...,...,...,...,...,...,...
2015,174.590793,486.691392,1017.250512,2992.244933,2817.654140,32.167903
2016,154.808811,577.070823,1085.153413,3882.710344,3727.901533,34.221398
2017,172.425625,816.882894,1612.795352,4867.713722,4695.288097,50.861114
2018,162.535334,458.925880,512.927211,1108.517240,945.981907,16.175673


In [11]:
Flow_mean_mean=yearly['Flow_mean'].describe()['mean']
Flow_max_mean=yearly['Flow_max'].describe()['mean']
Flow_min_mean=yearly['Flow_min'].describe()['mean']
Volume_mean=yearly['Volume'].describe()['mean']


yearly['Flow_mean_pct_var']=(yearly['Flow_mean']-Flow_mean_mean)/Flow_mean_mean*100
yearly['Flow_max_pct_var']=(yearly['Flow_max']-Flow_max_mean)/Flow_max_mean*100
yearly['Flow_min_pct_var']=(yearly['Flow_min']-Flow_min_mean)/Flow_min_mean*100
yearly['Volume_pct_var']=(yearly['Volume']-Volume_mean)/Volume_mean*100


Flow_mean_mean

1108.6000218703466

In [12]:
yearly['Flow_mean_5yr_mvCoefVar']=yearly['Flow_mean'].rolling(5,center=True).std()/Flow_mean_mean*100
yearly.loc[:,['Flow_mean_pct_var','Volume_pct_var']]

Unnamed: 0_level_0,Flow_mean_pct_var,Volume_pct_var
WaterYear,Unnamed: 1_level_1,Unnamed: 2_level_1
1924,-9.918891,-9.490960
1925,1.263594,1.744648
1926,-10.176430,-9.749722
1927,-23.179200,-22.602794
1928,-48.503312,-48.258676
...,...,...
2015,-8.240078,-7.551580
2016,-2.114975,-1.649970
2017,45.480364,46.171471
2018,-53.731986,-53.512189


In [13]:
daily.head(2)

Unnamed: 0_level_0,LaggedDate,VicFalls,Conversion,Flow,Exceedance,Year,Month,Day,MonthId,WaterYear,WaterMonth,WaterDay,WaterWeek,Volume,Flow_difference
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
1924-10-01,1924-10-12,100.0,1.407305,140.730461,0.999,1924,10,1,1924.1,1924,1,1,1,0.012159,
1924-10-02,1924-10-13,100.0,1.407305,140.730461,0.999,1924,10,2,1924.1,1924,1,2,1,0.012159,0.0


In [14]:
mins=daily[['Year','Flow']].groupby('Year').idxmin()
mins=mins.reset_index()
mins['DaysToStart']=mins.apply(lambda x: x['Flow']-pd.Timestamp(datetime.date(x['Year'], 10, 1)),axis=1)
mins=mins.set_index('Year')
yearly['DaysToStart']=mins['DaysToStart']
yearly['SeasonStart']=mins['Flow']
yearly

Unnamed: 0_level_0,Flow_min,Flow_median,Flow_mean,Flow_max,Flow_range,Volume,Flow_mean_pct_var,Flow_max_pct_var,Flow_min_pct_var,Volume_pct_var,Flow_mean_5yr_mvCoefVar,DaysToStart,SeasonStart
WaterYear,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
1924,136.426365,572.393700,998.639190,3409.264637,3272.838272,31.493085,-9.918891,-6.145478,-37.001095,-9.490960,,24 days,1924-10-25
1925,161.301294,501.467555,1122.608229,4664.810479,4503.509184,35.402573,1.263594,28.418766,-25.514361,1.744648,,29 days,1925-10-30
1926,193.853441,572.393700,995.784115,3302.442827,3108.589385,31.403048,-10.176430,-9.086203,-10.482445,-9.749722,19.071064,23 days,1926-10-24
1927,178.619675,514.847078,851.635409,2247.955945,2069.336270,26.930756,-23.179200,-38.115443,-17.517087,-22.602794,19.587987,8 days,1927-10-09
1928,202.597254,388.497980,570.892297,1581.297711,1378.700457,18.003659,-48.503312,-56.468049,-6.444731,-48.258676,15.564574,28 days,1928-10-29
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2015,174.590793,486.691392,1017.250512,2992.244933,2817.654140,32.167903,-8.240078,-17.625721,-19.377542,-7.551580,30.997613,30 days,2015-10-31
2016,154.808811,577.070823,1085.153413,3882.710344,3727.901533,34.221398,-2.114975,6.888130,-28.512457,-1.649970,37.928583,35 days,2016-11-05
2017,172.425625,816.882894,1612.795352,4867.713722,4695.288097,50.861114,45.480364,34.004541,-20.377373,46.171471,41.144721,18 days,2017-10-19
2018,162.535334,458.925880,512.927211,1108.517240,945.981907,16.175673,-53.731986,-69.483344,-24.944507,-53.512189,,41 days,2018-11-11


In [15]:
annual_fdcs=pd.DataFrame(index=np.arange(0,1.01,0.01),columns=np.arange(yearly.index.min(),yearly.index.max()+1,1))
for col in annual_fdcs.columns:
    annual_fdcs[col]=np.percentile(daily.loc[daily['WaterYear']==col]['Flow'],((1-annual_fdcs.index)*100))

annual_fdcs

Unnamed: 0,1924,1925,1926,1927,1928,1929,1930,1931,1932,1933,...,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
0.00,3409.264637,4664.810479,3302.442827,2247.955945,1581.297711,2181.923359,3326.534014,3783.435520,1608.156213,5557.926859,...,4433.227139,3596.589500,3650.390173,3618.237985,1471.949472,2992.244933,3882.710344,4867.713722,1108.517240,4744.909910
0.01,3371.659300,4545.565643,3240.071165,2247.955945,1562.244149,2141.437375,3302.442827,3759.062598,1581.297711,5484.566992,...,4433.227139,3566.761817,3623.885134,3587.440057,1471.949472,2992.244933,3845.121914,4787.713433,1081.482358,4692.840808
0.02,3319.788482,4471.995632,3102.433104,2240.765840,1549.203591,2089.940644,3279.532620,3685.318934,1525.102636,5345.915252,...,4382.560537,3490.510716,3592.219865,3531.109177,1461.939405,2983.563498,3813.852929,4744.909910,1061.857902,4605.919841
0.03,3302.442827,4433.327484,3004.697032,2210.564144,1509.079191,2046.427120,3210.218265,3668.764083,1509.079191,5160.121318,...,4352.066546,3423.250538,3580.983662,3472.353421,1444.862405,2963.306816,3754.287002,4695.530045,1033.551910,4574.967121
0.04,3258.054322,4380.539789,2864.847098,2210.564144,1477.086791,1951.027131,3116.130426,3635.751696,1481.578650,4977.516460,...,4340.279270,3340.427258,3568.730995,3428.968569,1434.014704,2963.306816,3614.320917,4582.178079,1022.186861,4524.750179
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0.96,140.730461,164.933200,204.918708,183.647871,220.239392,195.551050,220.239392,244.245354,202.597254,170.784690,...,239.285342,255.743131,241.300237,209.026642,239.285342,184.713081,174.590793,184.035945,176.567499,141.970967
0.97,140.730461,164.919431,202.597254,183.647871,207.552529,189.441599,219.866099,244.245354,202.597254,170.784690,...,239.285342,252.332454,236.359216,206.487458,231.528691,183.860458,166.973942,180.526210,174.417579,141.772327
0.98,138.858569,164.919431,202.597254,183.647871,207.552529,188.394575,209.662008,244.245354,200.443268,164.933200,...,236.779262,252.332454,231.528691,205.606486,231.528691,177.090301,166.973942,180.526210,172.425625,139.343439
0.99,136.426365,161.988851,200.443268,182.093677,204.176532,187.987399,207.552529,244.245354,195.793566,161.988851,...,231.528691,248.282741,226.147943,203.637391,224.199928,175.875652,163.066768,176.567499,166.395536,137.000087


In [16]:
monthly_fdcs=pd.DataFrame(index=np.arange(0,1.01,0.01),columns=[1,2,3,4,5,6,7,8,9,10,11,12])
for col in monthly_fdcs.columns:
    monthly_fdcs[col]=np.percentile(daily.loc[daily['WaterMonth']==col]['Flow'],((1-monthly_fdcs.index)*100))

monthly_fdcs

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,11,12
0.00,476.849854,663.943467,1146.569578,3878.779701,9912.101075,9508.124796,8507.274690,5686.865286,3943.549770,1861.476158,915.865709,560.629111
0.01,424.490533,549.530498,899.518328,2191.746071,5317.622296,7989.937157,6286.498298,5300.780788,2842.810203,1394.480330,726.801911,485.596811
0.02,395.807458,501.467555,833.428499,1818.304856,4732.736457,6956.444306,5978.768490,4741.501343,2555.561612,1238.646740,655.090742,465.615458
0.03,377.169560,469.415459,790.342813,1564.610012,4517.092752,6485.681433,5646.279239,4385.553942,2383.534305,1192.011550,616.740655,441.309291
0.04,358.718949,441.309291,752.060053,1430.369646,3977.686984,6036.851183,5503.720706,4162.406886,2286.821706,1142.918223,601.275201,434.623561
...,...,...,...,...,...,...,...,...,...,...,...,...
0.96,149.404467,169.055353,248.282741,401.810067,561.389392,777.790697,917.878188,833.428499,404.824287,302.673278,239.342408,189.441599
0.97,145.364787,162.535334,244.245354,388.683378,528.892296,755.871088,900.696801,780.621185,383.846587,289.823512,224.895479,178.619675
0.98,144.524391,151.431209,232.909260,373.238431,485.596811,738.771730,862.284925,686.658627,357.524261,270.998952,215.573229,166.941809
0.99,140.730461,145.162444,214.040774,330.919326,458.887389,618.543978,823.390218,562.527241,321.375420,249.828480,204.983022,158.672129


In [17]:
yearly['MeanQ3070']=annual_fdcs.loc[(annual_fdcs.index>=0.3) & (annual_fdcs.index<=0.7)].mean()
yearly

Unnamed: 0_level_0,Flow_min,Flow_median,Flow_mean,Flow_max,Flow_range,Volume,Flow_mean_pct_var,Flow_max_pct_var,Flow_min_pct_var,Volume_pct_var,Flow_mean_5yr_mvCoefVar,DaysToStart,SeasonStart,MeanQ3070
WaterYear,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1924,136.426365,572.393700,998.639190,3409.264637,3272.838272,31.493085,-9.918891,-6.145478,-37.001095,-9.490960,,24 days,1924-10-25,674.120031
1925,161.301294,501.467555,1122.608229,4664.810479,4503.509184,35.402573,1.263594,28.418766,-25.514361,1.744648,,29 days,1925-10-30,567.629337
1926,193.853441,572.393700,995.784115,3302.442827,3108.589385,31.403048,-10.176430,-9.086203,-10.482445,-9.749722,19.071064,23 days,1926-10-24,691.695756
1927,178.619675,514.847078,851.635409,2247.955945,2069.336270,26.930756,-23.179200,-38.115443,-17.517087,-22.602794,19.587987,8 days,1927-10-09,562.225391
1928,202.597254,388.497980,570.892297,1581.297711,1378.700457,18.003659,-48.503312,-56.468049,-6.444731,-48.258676,15.564574,28 days,1928-10-29,430.512862
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2015,174.590793,486.691392,1017.250512,2992.244933,2817.654140,32.167903,-8.240078,-17.625721,-19.377542,-7.551580,30.997613,30 days,2015-10-31,568.992668
2016,154.808811,577.070823,1085.153413,3882.710344,3727.901533,34.221398,-2.114975,6.888130,-28.512457,-1.649970,37.928583,35 days,2016-11-05,707.440683
2017,172.425625,816.882894,1612.795352,4867.713722,4695.288097,50.861114,45.480364,34.004541,-20.377373,46.171471,41.144721,18 days,2017-10-19,1066.181537
2018,162.535334,458.925880,512.927211,1108.517240,945.981907,16.175673,-53.731986,-69.483344,-24.944507,-53.512189,,41 days,2018-11-11,459.175086


In [18]:
fdc=pd.DataFrame({'Exceedance': np.arange(0,1.001,0.001)}).set_index('Exceedance')
fdc['Mean']=np.percentile(yearly['Flow_mean'],((1-fdc.index)*100))
fdc['Max']=np.percentile(yearly['Flow_max'],((1-fdc.index)*100))
fdc['Min']=np.percentile(yearly['Flow_min'],((1-fdc.index)*100))
fdc['Median']=np.percentile(yearly['Flow_median'],((1-fdc.index)*100))
fdc['MeanQ3070']=np.percentile(yearly['MeanQ3070'],((1-fdc.index)*100))

fdc

Unnamed: 0_level_0,Mean,Max,Min,Median,MeanQ3070
Exceedance,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0.000,2338.136797,9912.101075,341.844823,1116.017127,1267.364371
0.001,2320.680606,9838.050221,341.421203,1108.956166,1260.022389
0.002,2303.224415,9763.999367,340.997582,1101.895205,1252.680407
0.003,2285.768225,9689.948513,340.573961,1094.834245,1245.338425
0.004,2268.312034,9615.897659,340.150341,1087.773284,1237.996443
...,...,...,...,...,...
0.996,386.097394,869.748439,134.682339,275.589310,297.979637
0.997,381.545584,862.682083,134.565470,274.928664,296.440237
0.998,376.993775,855.615727,134.448601,274.268019,294.900837
0.999,372.441966,848.549371,134.331731,273.607374,293.361437


In [19]:
yearly['ExceedanceMean']=pd.merge_asof(yearly.reset_index().sort_values('Flow_mean'),fdc.reset_index().sort_values('Mean'),left_on='Flow_mean',right_on='Mean').set_index('WaterYear')['Exceedance']
yearly['ExceedanceMedian']=pd.merge_asof(yearly.reset_index().sort_values('Flow_median'),fdc.reset_index().sort_values('Median'),left_on='Flow_median',right_on='Median').set_index('WaterYear')['Exceedance']
yearly['ExceedanceMeanQ3070']=pd.merge_asof(yearly.reset_index().sort_values('MeanQ3070'),fdc.reset_index().sort_values('MeanQ3070'),left_on='MeanQ3070',right_on='MeanQ3070').set_index('WaterYear')['Exceedance']
yearly

Unnamed: 0_level_0,Flow_min,Flow_median,Flow_mean,Flow_max,Flow_range,Volume,Flow_mean_pct_var,Flow_max_pct_var,Flow_min_pct_var,Volume_pct_var,Flow_mean_5yr_mvCoefVar,DaysToStart,SeasonStart,MeanQ3070,ExceedanceMean,ExceedanceMedian,ExceedanceMeanQ3070
WaterYear,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
1924,136.426365,572.393700,998.639190,3409.264637,3272.838272,31.493085,-9.918891,-6.145478,-37.001095,-9.490960,,24 days,1924-10-25,674.120031,0.569,0.583,0.506
1925,161.301294,501.467555,1122.608229,4664.810479,4503.509184,35.402573,1.263594,28.418766,-25.514361,1.744648,,29 days,1925-10-30,567.629337,0.432,0.733,0.737
1926,193.853441,572.393700,995.784115,3302.442827,3108.589385,31.403048,-10.176430,-9.086203,-10.482445,-9.749722,19.071064,23 days,1926-10-24,691.695756,0.579,0.583,0.464
1927,178.619675,514.847078,851.635409,2247.955945,2069.336270,26.930756,-23.179200,-38.115443,-17.517087,-22.602794,19.587987,8 days,1927-10-09,562.225391,0.695,0.713,0.769
1928,202.597254,388.497980,570.892297,1581.297711,1378.700457,18.003659,-48.503312,-56.468049,-6.444731,-48.258676,15.564574,28 days,1928-10-29,430.512862,0.927,0.958,0.948
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2015,174.590793,486.691392,1017.250512,2992.244933,2817.654140,32.167903,-8.240078,-17.625721,-19.377542,-7.551580,30.997613,30 days,2015-10-31,568.992668,0.548,0.769,0.727
2016,154.808811,577.070823,1085.153413,3882.710344,3727.901533,34.221398,-2.114975,6.888130,-28.512457,-1.649970,37.928583,35 days,2016-11-05,707.440683,0.485,0.544,0.432
2017,172.425625,816.882894,1612.795352,4867.713722,4695.288097,50.861114,45.480364,34.004541,-20.377373,46.171471,41.144721,18 days,2017-10-19,1066.181537,0.106,0.179,0.053
2018,162.535334,458.925880,512.927211,1108.517240,945.981907,16.175673,-53.731986,-69.483344,-24.944507,-53.512189,,41 days,2018-11-11,459.175086,0.958,0.821,0.906


## Calendar months

Produce summaries of flow by calendar month

In [20]:
calmonthly=pd.DataFrame({'WaterMonth': [1,2,3,4,5,6,7,8,9,10,11,12],'MonthName': ['Oct','Nov','Dec','Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep'], 'Month':[10,11,12,1,2,3,4,5,6,7,8,9]})
calmonthly=calmonthly.set_index('WaterMonth')
calmonthly['Flow_min']=daily[['WaterMonth','Flow']].groupby('WaterMonth').min()
calmonthly['Flow_mean']=daily[['WaterMonth','Flow']].groupby('WaterMonth').mean()
calmonthly['Flow_median']=daily[['WaterMonth','Flow']].groupby('WaterMonth').median()
calmonthly['Flow_max']=daily[['WaterMonth','Flow']].groupby('WaterMonth').max()
calmonthly['Flow_std']=daily[['WaterMonth','Flow']].groupby('WaterMonth').std()
calmonthly['Flow_coefvar']=(calmonthly['Flow_std']/calmonthly['Flow_mean']*100).round(1)
calmonthly['Flow_difference_median']=daily[['WaterMonth','Flow_difference']].groupby('WaterMonth').median()
calmonthly['Flow_difference_mean']=daily[['WaterMonth','Flow_difference']].groupby('WaterMonth').mean()

calmonthly

Unnamed: 0_level_0,MonthName,Month,Flow_min,Flow_mean,Flow_median,Flow_max,Flow_std,Flow_coefvar,Flow_difference_median,Flow_difference_mean
WaterMonth,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1,Oct,10,134.214862,240.452311,227.997436,476.849854,58.753131,24.4,0.443581,2.03981
2,Nov,11,136.426365,276.637425,258.138454,663.943467,80.254088,29.0,2.641179,4.248738
3,Dec,12,189.441599,433.247875,404.824287,1146.569578,142.627648,32.9,5.444278,8.763367
4,Jan,1,274.563994,725.402991,642.529939,3878.779701,342.674456,47.2,8.852725,16.18948
5,Feb,2,388.49798,1355.143532,963.878739,9912.101075,1156.475167,85.3,14.245631,34.420494
6,Mar,3,514.847078,2543.016796,2181.923359,9508.124796,1654.483306,65.1,27.002723,55.891871
7,Apr,4,705.966848,2988.165995,2963.306816,8507.27469,1377.110714,46.1,23.067929,36.125255
8,May,5,417.792093,2242.323533,2141.437375,5686.865286,959.300196,42.8,31.536647,35.600122
9,Jun,6,274.563994,1216.476143,1146.569578,3943.54977,557.011857,45.8,26.736915,29.559349
10,Jul,7,222.608408,600.540714,542.940107,1861.476158,244.075683,40.6,8.810637,11.787291


## Calendar Month Flow exceedance

Flow exceedance values by calendar month.

P90 is flow which is exceeded for 90% of the time.

In [21]:

calmonthly['Flow_P95']=monthly[['WaterMonth','Flow_mean']].groupby('WaterMonth').quantile(0.05)
calmonthly['Flow_P90']=monthly[['WaterMonth','Flow_mean']].groupby('WaterMonth').quantile(0.1)
#calmonthly['Flow_P80']=monthly[['Month','Flow_mean']].groupby('Month').quantile(0.2)
calmonthly['Flow_P75']=monthly[['WaterMonth','Flow_mean']].groupby('WaterMonth').quantile(0.25)
calmonthly['Flow_P50']=monthly[['WaterMonth','Flow_mean']].groupby('WaterMonth').quantile(0.5)
calmonthly['Flow_P25']=monthly[['WaterMonth','Flow_mean']].groupby('WaterMonth').quantile(0.75)
#calmonthly['Flow_P20']=monthly[['Month','Flow_mean']].groupby('Month').quantile(0.8)
calmonthly['Flow_P10']=monthly[['WaterMonth','Flow_mean']].groupby('WaterMonth').quantile(0.9)
calmonthly['Flow_P05']=monthly[['WaterMonth','Flow_mean']].groupby('WaterMonth').quantile(0.95)

calmonthly

Unnamed: 0_level_0,MonthName,Month,Flow_min,Flow_mean,Flow_median,Flow_max,Flow_std,Flow_coefvar,Flow_difference_median,Flow_difference_mean,Flow_P95,Flow_P90,Flow_P75,Flow_P50,Flow_P25,Flow_P10,Flow_P05
WaterMonth,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
1,Oct,10,134.214862,240.452311,227.997436,476.849854,58.753131,24.4,0.443581,2.03981,160.75733,172.463161,202.494118,228.260591,276.845152,309.339052,343.863249
2,Nov,11,136.426365,276.637425,258.138454,663.943467,80.254088,29.0,2.641179,4.248738,179.29013,202.479836,223.162735,266.603874,310.139389,376.351774,410.339154
3,Dec,12,189.441599,433.247875,404.824287,1146.569578,142.627648,32.9,5.444278,8.763367,288.511573,308.423202,344.791328,402.183584,508.380546,569.976033,700.486554
4,Jan,1,274.563994,725.402991,642.529939,3878.779701,342.674456,47.2,8.852725,16.18948,442.928771,478.53384,547.589889,632.716633,825.467477,999.655326,1308.047513
5,Feb,2,388.49798,1355.143532,963.878739,9912.101075,1156.475167,85.3,14.245631,34.420494,642.176981,695.145508,827.420317,991.360655,1437.473802,2430.523435,3188.201287
6,Mar,3,514.847078,2543.016796,2181.923359,9508.124796,1654.483306,65.1,27.002723,55.891871,857.95964,916.559886,1193.508241,2350.538497,3177.664195,4433.549176,5642.031254
7,Apr,4,705.966848,2988.165995,2963.306816,8507.27469,1377.110714,46.1,23.067929,36.125255,1068.823441,1259.100493,1912.072459,2962.148565,4014.390232,4743.372546,5145.84121
8,May,5,417.792093,2242.323533,2141.437375,5686.865286,959.300196,42.8,31.536647,35.600122,922.708436,1134.529109,1679.686792,2251.580328,2686.491351,3351.68759,3869.161261
9,Jun,6,274.563994,1216.476143,1146.569578,3943.54977,557.011857,45.8,26.736915,29.559349,453.516844,646.076229,864.271631,1186.311958,1483.164033,1827.855511,2091.334224
10,Jul,7,222.608408,600.540714,542.940107,1861.476158,244.075683,40.6,8.810637,11.787291,329.570313,371.861621,451.614156,552.128587,718.185358,882.303425,1028.43382


## Prepare the Representative Years Summaries

In [22]:
selected = pd.read_csv(input_data + "selected_years.csv").rename(columns={"Year": "WaterYear"}).set_index('WaterYear')
selected['Flow_min']=yearly['Flow_min']
selected['Flow_mean']=yearly['Flow_mean']
selected['Flow_max']=yearly['Flow_max']
selected['Volume']=yearly['Volume']
selected['ExceedanceMean']=yearly['ExceedanceMean']
selected['ExceedanceMedian']=yearly['ExceedanceMedian']
selected['ExceedanceMeanQ3070']=yearly['ExceedanceMeanQ3070']


selected

Unnamed: 0_level_0,Class,Flow_Exceedance,Flow_min,Flow_mean,Flow_max,Volume,ExceedanceMean,ExceedanceMedian,ExceedanceMeanQ3070
WaterYear,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1967,Very Wet,Q3,255.78397,1868.638585,5539.057623,59.090837,0.043,0.062,0.022
2013,Wet,Q12,201.49239,1342.719247,3618.237985,42.343994,0.253,0.142,0.116
2002,Median,Q50,198.679712,1078.181769,3846.264235,34.00154,0.506,0.443,0.485
1990,Dry,Q90,204.918708,774.650885,2286.126948,24.42939,0.769,0.916,0.885
1996,Very Dry,Q97,136.426365,556.633594,1411.936742,17.553997,0.937,0.969,0.958


In [23]:
flow_fdc=pd.DataFrame({'Exceedance': np.arange(0,1.001,0.001)}).set_index('Exceedance')
flow_fdc['Flow']=np.percentile(daily['Flow'],((1-flow_fdc.index)*100))
flow_fdc

Unnamed: 0_level_0,Flow
Exceedance,Unnamed: 1_level_1
0.000,9912.101075
0.001,8760.450163
0.002,7798.747217
0.003,6905.534712
0.004,6402.292338
...,...
0.996,145.266063
0.997,144.428820
0.998,141.399969
0.999,139.932640


In [24]:
floods = pd.read_csv(input_data + "flood_return.csv").set_index('ReturnYears')
floods[['LastDate','WaterYear']]=pd.merge_asof(daily.reset_index().sort_values('Flow'),floods.reset_index(),left_on='Flow',right_on='Flow')[['Date','WaterYear','ReturnYears']].groupby('ReturnYears').max()
floods['YearsSince']=2019-floods['WaterYear']
floods


Unnamed: 0_level_0,Flow,LastDate,WaterYear,YearsSince
ReturnYears,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2,3418.0,2020-05-18,2019.0,0.0
5,5124.0,2010-04-28,2009.0,10.0
10,6218.0,1978-04-22,1977.0,42.0
15,6724.0,1969-04-21,1968.0,51.0
20,7231.0,1969-04-18,1968.0,51.0
50,8489.0,1969-04-04,1968.0,51.0
100,9395.0,1958-03-01,1957.0,62.0
200,10272.0,NaT,,
500,11391.0,NaT,,
1000,12212.0,NaT,,


In [25]:
for flood in floods.reset_index().itertuples():
    if flood.ReturnYears<=100:     
        floods.at[flood.ReturnYears,'MeanDays']=daily.loc[daily['Flow']>=flood.Flow].groupby('WaterYear').count().mean()['Flow']
floods

Unnamed: 0_level_0,Flow,LastDate,WaterYear,YearsSince,MeanDays
ReturnYears,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2,3418.0,2020-05-18,2019.0,0.0,46.934783
5,5124.0,2010-04-28,2009.0,10.0,27.238095
10,6218.0,1978-04-22,1977.0,42.0,33.2
15,6724.0,1969-04-21,1968.0,51.0,29.25
20,7231.0,1969-04-18,1968.0,51.0,30.0
50,8489.0,1969-04-04,1968.0,51.0,15.0
100,9395.0,1958-03-01,1957.0,62.0,12.0
200,10272.0,NaT,,,
500,11391.0,NaT,,,
1000,12212.0,NaT,,,


## Save the Data

In [26]:
daily.to_csv(output_data + 'ngonye_daily.csv')
monthly.to_csv(output_data + 'ngonye_flow_monthly.csv')
yearly.to_csv(output_data + 'ngonye_flow_yearly.csv')
calmonthly.to_csv(output_data + 'ngonye_flow_calmonthly.csv')
selected.to_csv(output_data + 'ngonye_flow_selected_years.csv')
fdc.to_csv(output_data + 'ngonye_flow_annual_exceedance.csv')
annual_fdcs.to_csv(output_data + 'ngonye_flow_annual_fdcs.csv')
flow_fdc.to_csv(output_data + 'ngonye_flow_fdc.csv')
monthly_fdcs.to_csv(output_data + 'ngonye_monthly_fdc.csv')
floods.to_csv(output_data + 'ngonye_floods.csv')

In [27]:
years=daily['WaterYear'].unique().tolist()

for year in years:
    days=daily.loc[daily.WaterYear==year]
    days.to_csv(output_data + '/years/daily_' + str(year) + '.csv')