# c) Ngonye Falls Flow Analysis

Load the synthetic historic daily flow series for Ngonye and produce various summary statistics for later presentation.

## Inputs

| Data                       | Source                                        | Description                                 |
|----------------------------|-----------------------------------------------|---------------------------------------------|
| ngonye_synthetic.csv  | Notebook: b_synthetic_flow_ngonye |Synthetic daily flow series for Ngonye  Falls 1924/25 - 2016/17  |
| selected_years.csv | Mott MacDonald - Ngonye Falls Hydropower Project - 2018 Feasibility Study Update - Final Report Version D | List of representative selected years |


## Outputs
| File                           | Description                                 |
|--------------------------------|---------------------------------------------|
| ngonye_flow_daily.csv          | Daily flow data  |
| ngonye_flow_monthly.csv        | Flow summaries by month  |
| ngonye_flow_yearly.csv         | Flow summaries by year  |
| ngonye_flow_calmonthly.csv     | Flow summaries by calendar month |
| ngonye_flow_selected_years.csv | Flow summaries for selected representative years  |



## Parameters

In [25]:
input_data='./input_data/'
output_data='./output_data/2020/'

## Libraries

In [2]:
import numpy as np
import pandas as pd

## Load the Daily Data

In [3]:
daily = pd.read_csv(output_data + "ngonye_synthetic_2020.csv")
daily.tail(4)

Unnamed: 0,Date,LaggedDate,VicFalls,Conversion,Flow,Exceedance
34927,2020-05-17,2020-05-28,3438.1311,1.025158,3524.628422,0.057
34928,2020-05-18,2020-05-29,3393.3391,1.031077,3498.793989,0.058
34929,2020-05-19,2020-05-30,3304.8137,1.041461,3441.835365,0.063
34930,2020-05-20,2020-05-31,3217.6933,1.034753,3329.517841,0.07


Index by date and add some other columns for later use. 

Add a column for *WaterYear* which starts on 1st October and runs to 31st September the following year.

In [4]:
daily['Date']=pd.to_datetime(daily['Date'],format="%Y-%m-%d")#"%d/%m/%Y")
daily=daily.set_index(pd.DatetimeIndex(daily['Date']))


In [5]:
daily['Year']=daily.index.year
daily['Month']=daily.index.month
daily['Day']=daily.index.day
daily['MonthId']=daily['Year']+daily['Month']/100
daily['WaterYear']=daily.apply((lambda x: (x['Year'] if x['Month']>=10 else x['Year']-1)),axis=1)
daily['WaterMonth']=daily.apply((lambda x: (x['Month']-9 if x['Month']>=10 else x['Month']+3)),axis=1)
daily['WaterDay']=daily.apply(lambda x: (x['Date']-pd.Timestamp(x['WaterYear'], 10, 1)).days+1,axis=1)
daily['WaterWeek']=np.floor((daily['WaterDay']-1)/7)+1
daily['Volume']=daily['Flow']*60*60*24/(1000*1000*1000)
daily=daily.astype({'WaterWeek': 'int32'})
daily=daily.drop('Date',axis=1)
daily.head(8)

Unnamed: 0_level_0,LaggedDate,VicFalls,Conversion,Flow,Exceedance,Year,Month,Day,MonthId,WaterYear,WaterMonth,WaterDay,WaterWeek,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1924-10-01,1924-10-12,100.0,1.114075,111.40748,0.999,1924,10,1,1924.1,1924,1,1,1,0.009626
1924-10-02,1924-10-13,100.0,1.114075,111.40748,0.999,1924,10,2,1924.1,1924,1,2,1,0.009626
1924-10-03,1924-10-14,100.0,1.114075,111.40748,0.999,1924,10,3,1924.1,1924,1,3,1,0.009626
1924-10-04,1924-10-15,100.0,1.114075,111.40748,0.999,1924,10,4,1924.1,1924,1,4,1,0.009626
1924-10-05,1924-10-16,100.0,1.114075,111.40748,0.999,1924,10,5,1924.1,1924,1,5,1,0.009626
1924-10-06,1924-10-17,100.0,1.114075,111.40748,0.999,1924,10,6,1924.1,1924,1,6,1,0.009626
1924-10-07,1924-10-18,100.0,1.114075,111.40748,0.999,1924,10,7,1924.1,1924,1,7,1,0.009626
1924-10-08,1924-10-19,100.0,1.114075,111.40748,0.999,1924,10,8,1924.1,1924,1,8,2,0.009626


In [6]:
daily['Flow_difference']=np.abs(daily['Flow']-daily['Flow'].shift(1))

## Setup the Monthly Data

Load the monthly data.

In [7]:

monthly=daily.groupby(['MonthId','Year','Month']).size().to_frame(name="Days").reset_index(['Month','Year'])
monthly

Unnamed: 0_level_0,Year,Month,Days
MonthId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1924.10,1924,10,31
1924.11,1924,11,30
1924.12,1924,12,31
1925.01,1925,1,31
1925.02,1925,2,28
...,...,...,...
2020.01,2020,1,31
2020.02,2020,2,29
2020.03,2020,3,31
2020.04,2020,4,30


Set the index and add additional columns for later use.

In [8]:

monthly['Day']=1
monthly['DateStart']=pd.to_datetime(monthly[['Year','Month','Day']])
monthly=monthly.drop('Day',1)


monthly['WaterYear']=monthly.apply((lambda x: (x['Year'] if x['Month']>=10 else x['Year']-1)),axis=1)
monthly['WaterMonth']=monthly.apply((lambda x: (x['Month']-9 if x['Month']>=10 else x['Month']+3)),axis=1)


monthly

Unnamed: 0_level_0,Year,Month,Days,DateStart,WaterYear,WaterMonth
MonthId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1924.10,1924,10,31,1924-10-01,1924,1
1924.11,1924,11,30,1924-11-01,1924,2
1924.12,1924,12,31,1924-12-01,1924,3
1925.01,1925,1,31,1925-01-01,1924,4
1925.02,1925,2,28,1925-02-01,1924,5
...,...,...,...,...,...,...
2020.01,2020,1,31,2020-01-01,2019,4
2020.02,2020,2,29,2020-02-01,2019,5
2020.03,2020,3,31,2020-03-01,2019,6
2020.04,2020,4,30,2020-04-01,2019,7


## Monthly flow summaries

Add flow summaries to the monthly data

In [9]:
monthly['Flow_min']=daily[['MonthId','Flow']].groupby('MonthId').min()
monthly['Flow_mean']=daily[['MonthId','Flow']].groupby('MonthId').mean()
monthly['Flow_median']=daily[['MonthId','Flow']].groupby('MonthId').median()
monthly['Flow_max']=daily[['MonthId','Flow']].groupby('MonthId').max()
monthly['Volume']=daily[['MonthId','Volume']].groupby('MonthId').sum()
monthly['Flow_range']=monthly['Flow_max']-monthly['Flow_min']
monthly[['Flow_min','Flow_mean','Flow_median','Flow_max','Flow_range']]
monthly

Unnamed: 0_level_0,Year,Month,Days,DateStart,WaterYear,WaterMonth,Flow_min,Flow_mean,Flow_median,Flow_max,Volume,Flow_range
MonthId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1924.10,1924,10,31,1924-10-01,1924,1,111.407480,116.382415,111.407480,126.773143,0.311719,15.365663
1924.11,1924,11,30,1924-11-01,1924,2,128.245150,153.980092,145.350957,191.219380,0.399116,62.974230
1924.12,1924,12,31,1924-12-01,1924,3,192.895237,276.400204,272.319956,440.033111,0.740310,247.137874
1925.01,1925,1,31,1925-01-01,1924,4,417.857772,692.739318,595.709061,1182.748622,1.855433,764.890850
1925.02,1925,2,28,1925-02-01,1924,5,1107.711819,1193.681452,1151.412417,1436.997257,2.887754,329.285437
...,...,...,...,...,...,...,...,...,...,...,...,...
2020.01,2020,1,31,2020-01-01,2019,4,392.775102,510.932210,517.454361,632.978157,1.368481,240.203055
2020.02,2020,2,29,2020-02-01,2019,5,648.880158,832.339221,794.743484,1256.155461,2.085509,607.275303
2020.03,2020,3,31,2020-03-01,2019,6,1359.127800,3274.999361,3846.356144,4284.908210,8.771758,2925.780410
2020.04,2020,4,30,2020-04-01,2019,7,3808.307964,4212.960813,4256.346149,4536.802269,10.919994,728.494305


## Annual Flow

Create a data table for annual (water year) summaries and populate.

In [10]:
yearly=monthly[['WaterYear']].groupby('WaterYear').count()

yearly['Flow_min']=daily[['WaterYear','Flow']].groupby('WaterYear').min()
yearly['Flow_median']=daily[['WaterYear','Flow']].groupby('WaterYear').median()
yearly['Flow_mean']=daily[['WaterYear','Flow']].groupby('WaterYear').mean()
yearly['Flow_max']=daily[['WaterYear','Flow']].groupby('WaterYear').max()
yearly['Flow_range']=yearly['Flow_max']-yearly['Flow_min']
yearly['Volume']=monthly[['WaterYear','Volume']].groupby('WaterYear').sum()

yearly

Unnamed: 0_level_0,Flow_min,Flow_median,Flow_mean,Flow_max,Flow_range,Volume
WaterYear,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1924,111.407480,562.828075,992.228244,3452.248382,3340.840902,31.290910
1925,158.541897,494.681083,1112.050278,4501.386001,4342.844104,35.069618
1926,192.895237,562.828075,990.540954,3313.040641,3120.145404,31.237700
1927,179.826510,512.117820,848.305420,2261.108088,2081.281577,26.825453
1928,195.207704,389.165134,564.806554,1589.152359,1393.944655,17.811739
...,...,...,...,...,...,...
2015,176.218173,481.566763,1013.597943,2975.524395,2799.306221,32.052400
2016,156.550907,565.558109,1081.527227,3889.991232,3733.440325,34.107043
2017,173.839355,794.743484,1594.220416,4719.752686,4545.913331,50.275335
2018,159.330426,458.329494,506.338559,1088.140256,928.809830,15.967893


In [11]:
Flow_mean_mean=yearly['Flow_mean'].describe()['mean']
Flow_max_mean=yearly['Flow_max'].describe()['mean']
Flow_min_mean=yearly['Flow_min'].describe()['mean']
Volume_mean=yearly['Volume'].describe()['mean']


yearly['Flow_mean_pct_var']=(yearly['Flow_mean']-Flow_mean_mean)/Flow_mean_mean*100
yearly['Flow_max_pct_var']=(yearly['Flow_max']-Flow_max_mean)/Flow_max_mean*100
yearly['Flow_min_pct_var']=(yearly['Flow_min']-Flow_min_mean)/Flow_min_mean*100
yearly['Volume_pct_var']=(yearly['Volume']-Volume_mean)/Volume_mean*100


Flow_mean_mean

1100.6951155961453

In [12]:
yearly['Flow_mean_5yr_mvCoefVar']=yearly['Flow_mean'].rolling(5,center=True).std()/Flow_mean_mean*100
yearly.loc[:,['Flow_mean_pct_var','Volume_pct_var']]

Unnamed: 0_level_0,Flow_mean_pct_var,Volume_pct_var
WaterYear,Unnamed: 1_level_1,Unnamed: 2_level_1
1924,-9.854397,-9.429875
1925,1.031636,1.507424
1926,-10.007691,-9.583890
1927,-22.930028,-22.354937
1928,-48.686376,-48.444725
...,...,...
2015,-7.912924,-7.225777
2016,-1.741435,-1.278706
2017,44.837602,45.519686
2018,-53.998291,-53.781656


In [14]:
annual_fdcs=pd.DataFrame(index=np.arange(0,1.01,0.01),columns=np.arange(yearly.index.min(),yearly.index.max()+1,1))
for col in annual_fdcs.columns:
    annual_fdcs[col]=np.percentile(daily.loc[daily['WaterYear']==col]['Flow'],((1-annual_fdcs.index)*100))

annual_fdcs

Unnamed: 0,1924,1925,1926,1927,1928,1929,1930,1931,1932,1933,...,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
0.00,3452.248382,4501.386001,3313.040641,2261.108088,1589.152359,2187.823513,3355.917451,3803.114618,1617.395612,5588.707650,...,4355.928308,3544.189347,3614.802471,3574.946119,1460.253290,2975.524395,3889.991232,4719.752686,1088.140256,4536.802269
0.01,3441.005216,4444.488798,3210.822806,2261.108088,1563.480637,2155.197370,3313.040641,3773.148260,1589.152359,5438.818601,...,4355.928308,3526.092281,3580.525700,3539.407167,1460.253290,2975.524395,3857.302531,4573.851003,1075.646599,4525.309128
0.02,3343.911944,4385.141126,3103.529575,2249.359418,1544.955083,2094.974659,3244.545456,3670.338944,1523.864286,5299.444589,...,4305.723319,3496.298115,3539.883375,3508.024769,1448.576499,2971.684495,3825.614209,4536.802269,1060.081632,4488.876741
0.03,3313.040641,4355.316528,3006.520556,2217.372658,1511.071200,2043.732163,3187.836794,3639.447142,1511.071200,5068.368068,...,4269.187465,3466.295110,3528.810877,3484.505752,1438.767729,2962.724728,3750.866579,4526.228579,1045.013484,4469.569335
0.04,3228.508182,4305.170853,2867.157776,2217.372658,1466.370536,1958.743556,3116.989107,3600.306705,1474.502881,4838.446655,...,4235.066569,3384.867235,3526.468702,3462.403074,1430.124168,2962.724728,3572.412990,4476.614167,1036.439624,4429.179165
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0.96,111.407480,166.264580,199.092559,185.778706,220.783389,192.895237,220.783389,237.172286,195.207704,171.137982,...,233.153796,257.673822,234.588362,201.936335,233.153796,187.940800,176.218173,184.649250,178.520398,126.507336
0.97,111.407480,166.264580,195.207704,185.778706,201.170113,189.239605,220.084834,237.172286,195.207704,171.137982,...,233.153796,251.832705,230.479847,200.552607,227.354006,184.495221,166.873518,181.568679,176.027868,124.470848
0.98,111.407480,164.544380,195.207704,185.778706,201.170113,188.865366,202.164813,237.172286,194.795674,163.875413,...,230.751659,251.832705,227.354006,200.013130,227.354006,178.913516,166.873518,181.568679,173.839355,118.342898
0.99,111.407480,162.781303,194.795674,182.700509,198.433482,188.719829,201.170113,237.172286,192.895237,162.781303,...,227.354006,244.545839,225.662391,196.059633,223.977081,177.714619,163.040615,178.520398,165.949459,113.751854


In [15]:
monthly_fdcs=pd.DataFrame(index=np.arange(0,1.01,0.01),columns=[1,2,3,4,5,6,7,8,9,10,11,12])
for col in monthly_fdcs.columns:
    monthly_fdcs[col]=np.percentile(daily.loc[daily['WaterMonth']==col]['Flow'],((1-monthly_fdcs.index)*100))

monthly_fdcs

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,11,12
0.00,470.178949,647.648985,1127.726717,3886.053218,9912.101075,9530.170695,8539.833006,5672.879600,3920.473588,1871.701607,905.421193,550.998323
0.01,422.469819,542.008751,878.981546,2195.511587,5273.950802,8020.515563,6307.949994,5236.347071,2845.510365,1390.481080,702.656016,477.366576
0.02,394.981833,494.681083,817.734860,1832.296133,4525.276921,6953.746517,5965.180906,4535.484003,2554.691947,1208.735316,636.925788,459.260443
0.03,376.756612,462.565067,766.466607,1560.949468,4422.214010,6467.112964,5654.653489,4305.170853,2384.832211,1164.539248,607.363264,440.033111
0.04,360.348801,440.033111,728.497070,1431.851745,3944.569950,6023.131598,5449.661652,4056.544869,2294.660340,1117.925236,589.231835,435.496885
...,...,...,...,...,...,...,...,...,...,...,...,...
0.96,145.350957,169.193340,244.545839,402.902377,552.204909,749.779955,906.559998,817.734860,408.004546,303.400625,232.598692,189.239605
0.97,126.318127,163.040615,237.482935,389.557247,524.444293,731.590438,885.109448,751.665588,383.513195,288.094701,225.387386,179.826510
0.98,121.080110,150.346644,227.950669,373.010125,477.366576,712.083701,843.538372,667.143018,353.907049,267.629258,212.051449,166.861340
0.99,111.407480,142.595563,208.234672,329.718341,455.655156,608.795064,803.129825,554.339898,322.450257,247.688229,199.600945,155.869765


In [16]:
yearly['MeanQ3070']=annual_fdcs.loc[(annual_fdcs.index>=0.3) & (annual_fdcs.index<=0.7)].mean()
yearly

Unnamed: 0_level_0,Flow_min,Flow_median,Flow_mean,Flow_max,Flow_range,Volume,Flow_mean_pct_var,Flow_max_pct_var,Flow_min_pct_var,Volume_pct_var,Flow_mean_5yr_mvCoefVar,MeanQ3070
WaterYear,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1924,111.407480,562.828075,992.228244,3452.248382,3340.840902,31.290910,-9.854397,-4.439773,-47.785776,-9.429875,,664.141945
1925,158.541897,494.681083,1112.050278,4501.386001,4342.844104,35.069618,1.031636,24.600961,-25.694917,1.507424,,560.328498
1926,192.895237,562.828075,990.540954,3313.040641,3120.145404,31.237700,-10.007691,-8.293124,-9.594265,-9.583890,19.094174,683.544820
1927,179.826510,512.117820,848.305420,2261.108088,2081.281577,26.825453,-22.930028,-37.411224,-15.719289,-22.354937,19.619004,553.400644
1928,195.207704,389.165134,564.806554,1589.152359,1393.944655,17.811739,-48.686376,-56.011346,-8.510463,-48.444725,15.723668,426.301190
...,...,...,...,...,...,...,...,...,...,...,...,...
2015,176.218173,481.566763,1013.597943,2975.524395,2799.306221,32.052400,-7.912924,-17.635768,-17.410436,-7.225777,30.868596,561.543688
2016,156.550907,565.558109,1081.527227,3889.991232,3733.440325,34.107043,-1.741435,7.677201,-26.628050,-1.278706,37.813369,698.948108
2017,173.839355,794.743484,1594.220416,4719.752686,4545.913331,50.275335,44.837602,30.645477,-18.525336,45.519686,40.788510,1058.100404
2018,159.330426,458.329494,506.338559,1088.140256,928.809830,15.967893,-53.998291,-69.879650,-25.325350,-53.781656,,453.461034


In [17]:
fdc=pd.DataFrame({'Exceedance': np.arange(0,1.001,0.001)}).set_index('Exceedance')
fdc['Mean']=np.percentile(yearly['Flow_mean'],((1-fdc.index)*100))
fdc['Max']=np.percentile(yearly['Flow_max'],((1-fdc.index)*100))
fdc['Min']=np.percentile(yearly['Flow_min'],((1-fdc.index)*100))
fdc['Median']=np.percentile(yearly['Flow_median'],((1-fdc.index)*100))
fdc['MeanQ3070']=np.percentile(yearly['MeanQ3070'],((1-fdc.index)*100))

fdc

Unnamed: 0_level_0,Mean,Max,Min,Median,MeanQ3070
Exceedance,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0.000,2334.081716,9912.101075,339.448893,1089.456049,1258.614313
0.001,2316.528625,9840.061868,339.178049,1085.759883,1251.440725
0.002,2298.975535,9768.022662,338.907205,1082.063716,1244.267137
0.003,2281.422444,9695.983455,338.636362,1078.367550,1237.093549
0.004,2263.869353,9623.944248,338.365518,1074.671383,1229.919961
...,...,...,...,...,...
0.996,379.236395,852.009896,104.726966,272.335352,296.001392
0.997,374.457818,843.825908,104.218197,271.858527,294.431659
0.998,369.679242,835.641919,103.709428,271.381702,292.861925
0.999,364.900666,827.457930,103.200658,270.904876,291.292192


In [18]:
yearly['ExceedanceMean']=pd.merge_asof(yearly.reset_index().sort_values('Flow_mean'),fdc.reset_index().sort_values('Mean'),left_on='Flow_mean',right_on='Mean').set_index('WaterYear')['Exceedance']
yearly['ExceedanceMedian']=pd.merge_asof(yearly.reset_index().sort_values('Flow_median'),fdc.reset_index().sort_values('Median'),left_on='Flow_median',right_on='Median').set_index('WaterYear')['Exceedance']
yearly['ExceedanceMeanQ3070']=pd.merge_asof(yearly.reset_index().sort_values('MeanQ3070'),fdc.reset_index().sort_values('MeanQ3070'),left_on='MeanQ3070',right_on='MeanQ3070').set_index('WaterYear')['Exceedance']
yearly

Unnamed: 0_level_0,Flow_min,Flow_median,Flow_mean,Flow_max,Flow_range,Volume,Flow_mean_pct_var,Flow_max_pct_var,Flow_min_pct_var,Volume_pct_var,Flow_mean_5yr_mvCoefVar,MeanQ3070,ExceedanceMean,ExceedanceMedian,ExceedanceMeanQ3070
WaterYear,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
1924,111.407480,562.828075,992.228244,3452.248382,3340.840902,31.290910,-9.854397,-4.439773,-47.785776,-9.429875,,664.141945,0.569,0.583,0.495
1925,158.541897,494.681083,1112.050278,4501.386001,4342.844104,35.069618,1.031636,24.600961,-25.694917,1.507424,,560.328498,0.443,0.734,0.737
1926,192.895237,562.828075,990.540954,3313.040641,3120.145404,31.237700,-10.007691,-8.293124,-9.594265,-9.583890,19.094174,683.544820,0.579,0.583,0.464
1927,179.826510,512.117820,848.305420,2261.108088,2081.281577,26.825453,-22.930028,-37.411224,-15.719289,-22.354937,19.619004,553.400644,0.695,0.713,0.769
1928,195.207704,389.165134,564.806554,1589.152359,1393.944655,17.811739,-48.686376,-56.011346,-8.510463,-48.444725,15.723668,426.301190,0.927,0.958,0.948
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2015,176.218173,481.566763,1013.597943,2975.524395,2799.306221,32.052400,-7.912924,-17.635768,-17.410436,-7.225777,30.868596,561.543688,0.548,0.769,0.727
2016,156.550907,565.558109,1081.527227,3889.991232,3733.440325,34.107043,-1.741435,7.677201,-26.628050,-1.278706,37.813369,698.948108,0.485,0.544,0.432
2017,173.839355,794.743484,1594.220416,4719.752686,4545.913331,50.275335,44.837602,30.645477,-18.525336,45.519686,40.788510,1058.100404,0.116,0.179,0.053
2018,159.330426,458.329494,506.338559,1088.140256,928.809830,15.967893,-53.998291,-69.879650,-25.325350,-53.781656,,453.461034,0.958,0.805,0.906


## Calendar months

Produce summaries of flow by calendar month

In [19]:
calmonthly=pd.DataFrame({'WaterMonth': [1,2,3,4,5,6,7,8,9,10,11,12],'MonthName': ['Oct','Nov','Dec','Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep'], 'Month':[10,11,12,1,2,3,4,5,6,7,8,9]})
calmonthly=calmonthly.set_index('WaterMonth')
calmonthly['Flow_min']=daily[['WaterMonth','Flow']].groupby('WaterMonth').min()
calmonthly['Flow_mean']=daily[['WaterMonth','Flow']].groupby('WaterMonth').mean()
calmonthly['Flow_median']=daily[['WaterMonth','Flow']].groupby('WaterMonth').median()
calmonthly['Flow_max']=daily[['WaterMonth','Flow']].groupby('WaterMonth').max()
calmonthly['Flow_std']=daily[['WaterMonth','Flow']].groupby('WaterMonth').std()
calmonthly['Flow_coefvar']=(calmonthly['Flow_std']/calmonthly['Flow_mean']*100).round(1)
calmonthly['Flow_difference_median']=daily[['WaterMonth','Flow_difference']].groupby('WaterMonth').median()
calmonthly['Flow_difference_mean']=daily[['WaterMonth','Flow_difference']].groupby('WaterMonth').mean()

calmonthly

Unnamed: 0_level_0,MonthName,Month,Flow_min,Flow_mean,Flow_median,Flow_max,Flow_std,Flow_coefvar,Flow_difference_median,Flow_difference_mean
WaterMonth,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1,Oct,10,104.715941,237.193288,226.452123,470.178949,60.561395,25.5,0.41203,2.009348
2,Nov,11,102.691889,274.242236,259.557224,647.648985,80.532715,29.4,2.599778,4.290208
3,Dec,12,189.239605,429.122491,408.004546,1127.726717,138.019113,32.2,5.336509,8.579225
4,Jan,1,272.319956,714.220385,623.809012,3886.053218,342.777187,48.0,8.227209,15.968647
5,Feb,2,389.165134,1340.767869,958.390495,9912.101075,1152.988935,86.0,14.24339,34.357388
6,Mar,3,512.11782,2528.186738,2187.823513,9530.170695,1646.895287,65.1,27.263577,55.56737
7,Apr,4,684.66279,2969.188296,2962.724728,8539.833006,1359.475597,45.8,23.099901,35.62507
8,May,5,420.574478,2237.21896,2155.19737,5672.8796,951.149569,42.5,30.637837,35.299461
9,Jun,6,272.319956,1209.220152,1127.726717,3920.473588,561.953925,46.5,27.75109,29.844775
10,Jul,7,222.607729,592.199272,536.696602,1871.701607,239.872202,40.5,8.181238,11.583101


## Calendar Month Flow exceedance

Flow exceedance values by calendar month.

P90 is flow which is exceeded for 90% of the time.

In [20]:

calmonthly['Flow_P95']=monthly[['WaterMonth','Flow_mean']].groupby('WaterMonth').quantile(0.05)
calmonthly['Flow_P90']=monthly[['WaterMonth','Flow_mean']].groupby('WaterMonth').quantile(0.1)
#calmonthly['Flow_P80']=monthly[['Month','Flow_mean']].groupby('Month').quantile(0.2)
calmonthly['Flow_P75']=monthly[['WaterMonth','Flow_mean']].groupby('WaterMonth').quantile(0.25)
calmonthly['Flow_P50']=monthly[['WaterMonth','Flow_mean']].groupby('WaterMonth').quantile(0.5)
calmonthly['Flow_P25']=monthly[['WaterMonth','Flow_mean']].groupby('WaterMonth').quantile(0.75)
#calmonthly['Flow_P20']=monthly[['Month','Flow_mean']].groupby('Month').quantile(0.8)
calmonthly['Flow_P10']=monthly[['WaterMonth','Flow_mean']].groupby('WaterMonth').quantile(0.9)
calmonthly['Flow_P05']=monthly[['WaterMonth','Flow_mean']].groupby('WaterMonth').quantile(0.95)

calmonthly

Unnamed: 0_level_0,MonthName,Month,Flow_min,Flow_mean,Flow_median,Flow_max,Flow_std,Flow_coefvar,Flow_difference_median,Flow_difference_mean,Flow_P95,Flow_P90,Flow_P75,Flow_P50,Flow_P25,Flow_P10,Flow_P05
WaterMonth,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
1,Oct,10,104.715941,237.193288,226.452123,470.178949,60.561395,25.5,0.41203,2.009348,160.506283,173.228949,196.231899,225.341699,274.697694,308.297527,342.357097
2,Nov,11,102.691889,274.242236,259.557224,647.648985,80.532715,29.4,2.599778,4.290208,178.861236,201.240836,219.366759,264.468186,308.883776,375.778965,409.615009
3,Dec,12,189.239605,429.122491,408.004546,1127.726717,138.019113,32.2,5.336509,8.579225,286.202487,306.888789,343.674551,400.676463,502.057756,560.017195,683.819026
4,Jan,1,272.319956,714.220385,623.809012,3886.053218,342.777187,48.0,8.227209,15.968647,439.708967,474.433201,539.640992,617.679945,806.426143,992.730272,1300.117042
5,Feb,2,389.165134,1340.767869,958.390495,9912.101075,1152.988935,86.0,14.24339,34.357388,626.769736,678.978131,808.50457,979.420581,1431.520239,2415.596187,3174.242367
6,Mar,3,512.11782,2528.186738,2187.823513,9530.170695,1646.895287,65.1,27.263577,55.56737,836.966305,903.283596,1185.328597,2351.263733,3164.216827,4343.289642,5612.883016
7,Apr,4,684.66279,2969.188296,2962.724728,8539.833006,1359.475597,45.8,23.099901,35.62507,1060.174534,1246.603713,1916.404587,2966.095833,3962.193734,4649.230528,5072.221228
8,May,5,420.574478,2237.21896,2155.19737,5672.8796,951.149569,42.5,30.637837,35.299461,907.22854,1119.670169,1683.20294,2254.719484,2687.946063,3342.973946,3823.200562
9,Jun,6,272.319956,1209.220152,1127.726717,3920.473588,561.953925,46.5,27.75109,29.844775,451.028911,632.223272,850.154752,1178.300695,1479.139615,1829.64109,2092.681423
10,Jul,7,222.607729,592.199272,536.696602,1871.701607,239.872202,40.5,8.181238,11.583101,328.789937,371.456899,448.899176,543.301914,704.482862,866.197284,1012.322114


## Prepare the Representative Years Summaries

In [21]:
selected = pd.read_csv(input_data + "selected_years.csv").rename(columns={"Year": "WaterYear"}).set_index('WaterYear')
selected['Flow_min']=yearly['Flow_min']
selected['Flow_mean']=yearly['Flow_mean']
selected['Flow_max']=yearly['Flow_max']
selected['Volume']=yearly['Volume']
selected['ExceedanceMean']=yearly['ExceedanceMean']
selected['ExceedanceMedian']=yearly['ExceedanceMedian']
selected['ExceedanceMeanQ3070']=yearly['ExceedanceMeanQ3070']


selected

Unnamed: 0_level_0,Class,Flow_Exceedance,Flow_min,Flow_mean,Flow_max,Volume,ExceedanceMean,ExceedanceMedian,ExceedanceMeanQ3070
WaterYear,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1967,Very Wet,Q3,257.709689,1845.183184,5513.419394,58.349121,0.043,0.057,0.022
2013,Wet,Q12,195.06159,1340.744246,3574.946119,42.281711,0.253,0.142,0.116
2002,Median,Q50,193.637004,1072.993706,3846.356144,33.83793,0.506,0.443,0.485
1990,Dry,Q90,199.092559,771.196729,2293.963201,24.32046,0.769,0.916,0.874
1996,Very Dry,Q97,102.691889,547.304651,1399.622528,17.259799,0.937,0.969,0.958


In [22]:
flow_fdc=pd.DataFrame({'Exceedance': np.arange(0,1.001,0.001)}).set_index('Exceedance')
flow_fdc['Flow']=np.percentile(daily['Flow'],((1-flow_fdc.index)*100))
flow_fdc

Unnamed: 0_level_0,Flow
Exceedance,Unnamed: 1_level_1
0.000,9912.101075
0.001,8793.047209
0.002,7867.080491
0.003,6937.385641
0.004,6412.813597
...,...
0.996,138.742045
0.997,126.773143
0.998,124.169579
0.999,111.407480


In [23]:
floods = pd.read_csv(input_data + "flood_return.csv").set_index('ReturnYears')
floods[['LastDate','WaterYear']]=pd.merge_asof(daily.reset_index().sort_values('Flow'),floods.reset_index(),left_on='Flow',right_on='Flow')[['Date','WaterYear','ReturnYears']].groupby('ReturnYears').max()
floods['YearsSince']=2019-floods['WaterYear']
floods


Unnamed: 0_level_0,Flow,LastDate,WaterYear,YearsSince
ReturnYears,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2,3418.0,2020-05-19,2019.0,0.0
5,5124.0,2010-04-26,2009.0,10.0
10,6218.0,1978-04-23,1977.0,42.0
15,6724.0,1969-04-22,1968.0,51.0
20,7231.0,1969-04-18,1968.0,51.0
50,8489.0,1969-04-05,1968.0,51.0
100,9395.0,1958-03-01,1957.0,62.0
200,10272.0,NaT,,
500,11391.0,NaT,,
1000,12212.0,NaT,,


In [24]:
for flood in floods.reset_index().itertuples():
    if flood.ReturnYears<=100:     
        floods.at[flood.ReturnYears,'MeanDays']=daily.loc[daily['Flow']>=flood.Flow].groupby('WaterYear').count().mean()['Flow']
floods

Unnamed: 0_level_0,Flow,LastDate,WaterYear,YearsSince,MeanDays
ReturnYears,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2,3418.0,2020-05-19,2019.0,0.0,45.48
5,5124.0,2010-04-26,2009.0,10.0,25.190476
10,6218.0,1978-04-23,1977.0,42.0,33.6
15,6724.0,1969-04-22,1968.0,51.0,30.25
20,7231.0,1969-04-18,1968.0,51.0,30.333333
50,8489.0,1969-04-05,1968.0,51.0,15.666667
100,9395.0,1958-03-01,1957.0,62.0,12.0
200,10272.0,NaT,,,
500,11391.0,NaT,,,
1000,12212.0,NaT,,,


## Save the Data

In [26]:
daily.to_csv(output_data + 'ngonye_daily.csv')
monthly.to_csv(output_data + 'ngonye_flow_monthly.csv')
yearly.to_csv(output_data + 'ngonye_flow_yearly.csv')
calmonthly.to_csv(output_data + 'ngonye_flow_calmonthly.csv')
selected.to_csv(output_data + 'ngonye_flow_selected_years.csv')
fdc.to_csv(output_data + 'ngonye_flow_annual_exceedance.csv')
annual_fdcs.to_csv(output_data + 'ngonye_flow_annual_fdcs.csv')
flow_fdc.to_csv(output_data + 'ngonye_flow_fdc.csv')
monthly_fdcs.to_csv(output_data + 'ngonye_monthly_fdc.csv')
floods.to_csv(output_data + 'ngonye_floods.csv')

In [33]:
years=daily['WaterYear'].unique().tolist()

for year in years:
    days=daily.loc[daily.WaterYear==year]
    days.to_csv(output_data + '/years/daily_' + str(year) + '.csv')