In [1]:
# Settings

# General settings
data_folder = "/home/andrea/Projects/Niger/per_calib/data/" 
start_analysis = "1980-01-01 00:00"
end_analysis = "1983-12-31 23:00"
freq="D"

# Runoff coefficient 
basin_rainfall_fld = "/home/andrea/Projects/Niger/rainfall_analysis/"
min_percentage = 0

In [2]:
# Imports
%matplotlib inline

from ipywidgets import interactive,HBox
import pandas as pd
import numpy as np
from IPython.display import clear_output
import matplotlib.pyplot as plt
        
import ipywidgets as widgets
from IPython.display import display, HTML
import os, glob
import datetime as dt
import matplotlib.lines as mlines

# Define custom functions
def create_df(choices):
    df = pd.DataFrame(index=pd.date_range(start_time,end_time,freq=freq), columns=choices)
    for name in choices:
        series = pd.read_csv(os.path.join(data_folder, name + ".csv"), index_col=0, header=0, parse_dates=True, dayfirst=True, na_values='')
        df[name] = series.reindex(pd.date_range(start_time,end_time,freq=freq), method=None)
    return df

def multiplot(widg):
    choices = widg['new']
    df = create_df(choices)
    data = df.loc[:, choices] if choices else df
    output.clear_output(wait=True)
    with output:
        ax = data.plot(figsize=(10,7))
        plt.show()
        
def combinedplot(widg):
    choices = widg['new']
    data = series["discharge"].loc[:, choices] if choices else series
    data_rain = series["mm_h"].loc[:, choices] if choices else series
    output2.clear_output(wait=True)
    with output2:
        ax = data_rain.plot(figsize=(15,7), color='b')
        plt.ylim(bottom=0)
        plt.ylabel(("mm"))
        ax2 = ax.twinx()
        ax1 = data.plot(figsize=(15,7), color='r')
        plt.ylim(bottom=0)
        plt.ylabel(("m3/s"))
        blue_line = mlines.Line2D([], [], color='blue', label='rain')
        red_line = mlines.Line2D([], [], color='red', label='discharge')
        plt.legend(handles=[blue_line, red_line])
        plt.show()

In [3]:
# Set timing
start_time = dt.datetime.strptime(start_analysis, "%Y-%m-%d %H:%M")
end_time = dt.datetime.strptime(end_analysis, "%Y-%m-%d %H:%M")

In [4]:
# Read files and extract station names
files = glob.glob(os.path.join(data_folder,"*.csv"))
opts = sorted([os.path.basename(i).replace(".csv","") for i in files])

# Generate list
selector = widgets.SelectMultiple(
options=opts,
value=[opts[1]],
rows=len(opts),
disabled=False)

output = widgets.Output()

# Set layout and display
form_item_layout = widgets.Layout(
    display='flex',
    flex_flow='row',
    justify_content='space-between'
)
display(widgets.HBox([selector, output], layout=form_item_layout))

# Re-generate multiplot
selector.observe(multiplot, names='value')

HBox(children=(SelectMultiple(index=(1,), options=('ADHI_1001', 'ADHI_1002', 'ADHI_1004', 'ADHI_185', 'ADHI_28…

## Runoff coefficient analysis

In [5]:
series = {}
for data_type in ["mm_h","m3_s"]:
    series[data_type] = pd.read_csv(os.path.join(basin_rainfall_fld,"average_rainfall_" + data_type + ".txt"), index_col=0, header=0, parse_dates=True)
    series[data_type].replace([np.inf, -np.inf], 0, inplace=True)
    
if freq != "H":
    series["mm_h"] = series["mm_h"].resample(freq).agg(pd.Series.sum, skipna=False)
    series["m3_s"] = series["m3_s"].resample(freq).agg(pd.Series.mean, skipna=False)
    
missing_data = [i for i in series["mm_h"].columns if not os.path.isfile(os.path.join(data_folder, i + ".csv"))]
display("WARNING! Data for sections " + ", ".join(missing_data) + " are missing!")

series["discharge"] = create_df([i for i in series["mm_h"].columns if i not in missing_data])



In [6]:
 series["mm_h"].resample("Y").sum()

Unnamed: 0,ADHI_185,ADHI_280,ADHI_550,ADHI_551,ADHI_599,ADHI_619,ADHI_620,ADHI_1001,ADHI_1002,ADHI_1004,G4900,SIEREM_01,SIEREM_02,SIEREM_03,ADHI_merged_01
1979-12-31,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1980-12-31,1188.771284,641.154247,462.072259,443.533253,572.283071,513.35535,402.744905,552.932788,640.577254,777.934392,495.912465,732.040346,1024.595241,150.339913,648.180842
1981-12-31,1218.226619,632.341701,345.779324,442.353334,595.371294,532.775115,409.519233,524.464831,591.802322,784.358138,437.703574,725.090246,1044.108035,134.794522,539.157431
1982-12-31,1178.179326,606.386315,372.217659,339.281063,556.809714,521.809397,420.694615,530.578379,540.698925,729.921427,495.723658,652.285565,1013.790041,102.415644,483.962047
1983-12-31,964.723486,582.147537,250.073243,292.727577,497.947485,428.326842,284.003076,443.327446,562.156935,632.312398,321.741338,606.20197,846.467408,94.308306,436.743031


In [7]:
# Analyse series
series["discharge"].dropna(axis=1, how="all", inplace=True)

availability = (100*(series["discharge"].resample("Y").count()/series["m3_s"].resample("Y").count()).dropna(axis=1, how="all").dropna(axis=0, how="all")).astype("int")
runoff_coefficient = ((series["discharge"].resample("Y").agg(pd.Series.sum, skipna=True))/(series["m3_s"].resample("Y").agg(pd.Series.sum, skipna=True))).dropna(axis=1, how="all").dropna(axis=0, how="all")
runoff_coefficient[availability<=min_percentage] = np.nan

availability.set_index(np.array([str(i) + "_avail" for i in availability.index.year]), inplace=True)
runoff_coefficient.set_index(np.array([str(i) + "_coeff" for i in runoff_coefficient.index.year]), inplace=True)

with pd.option_context('display.float_format', '{:0.3f}'.format):
    display(pd.concat([availability.T, runoff_coefficient.T], axis=1))
pd.concat([availability.T, runoff_coefficient.T], axis=1).to_csv("/home/andrea/Desktop/coeff.csv")


Unnamed: 0,1980_avail,1981_avail,1982_avail,1983_avail,1980_coeff,1981_coeff,1982_coeff,1983_coeff
ADHI_1001,56,46,50,54,0.04,0.019,0.03,0.026
ADHI_1002,38,4,0,0,0.012,0.002,,
ADHI_1004,24,0,23,0,0.006,,0.005,
ADHI_185,91,92,90,50,0.094,0.057,0.022,0.017
ADHI_280,26,24,49,27,0.007,0.003,0.014,0.008
ADHI_550,13,36,33,28,0.015,0.04,0.083,0.093
ADHI_551,34,47,41,41,0.026,0.009,0.03,0.021
ADHI_599,46,40,34,45,0.069,0.05,0.026,0.037
ADHI_619,45,42,25,25,0.043,0.03,0.034,0.028
ADHI_620,12,20,21,4,0.029,0.045,0.051,0.005


In [8]:
# Generate list
selector2 = widgets.Dropdown(
options=availability.T.index,
value=availability.T.index[0])

output2 = widgets.Output()

# Set layout and display
form_item_layout = widgets.Layout(
    display='flex',
    justify_content='space-between'
)
display(widgets.VBox([selector2, output2], layout=form_item_layout))

# Re-generate multiplot
selector2.observe(combinedplot, names='value')

VBox(children=(Dropdown(options=('ADHI_1001', 'ADHI_1002', 'ADHI_1004', 'ADHI_185', 'ADHI_280', 'ADHI_550', 'A…