In [1]:
%matplotlib inline
import matplotlib.pyplot as plt, seaborn as sn, mpld3
import pandas as pd, os, glob
sn.set_context('notebook')

In [2]:
# Parse Norwegian flows (all data)
nor_xls = r'\\niva-of5\osl-userdata$\JES\Documents\James_Work\Staff\Susi_S\ECOREG\Raw_Data\Norway\ECOREG discharge complete.xlsx'
nor_df = pd.read_excel(nor_xls, 
                       sheetname='discharge ECOREG',
                       index_col=0)

# Resample to daily
nor_df = nor_df.resample('D').mean()

# Interpolate
nor_df['S12.8'].interpolate(method='linear', inplace=True)

nor_df.head()

Unnamed: 0_level_0,S109.2,S109.21,S109.9,S12.137,S12.2,S12.207,S12.7,S12.8,S16.1,S16.128,...,S27.16,S30.8,S35.2,S36.31,S36.32,S50.11,S50.13,S6.10,S6.9,S8.2
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2000-01-01,9.47282,0.51892,3.59022,20.14581,6.63797,0.92749,1.96617,,17.23073,1.37648,...,5.29065,,,,0.19093,0.97375,1.2915,0.09012,,2.27335
2000-01-02,7.72643,0.5172,2.67732,19.96283,6.63797,0.95418,1.60709,,16.96029,1.26817,...,10.30134,,,,0.2238,1.00387,1.26581,0.09012,,1.95569
2000-01-03,5.95121,0.50668,2.28116,19.98467,6.63797,0.90896,1.52404,,18.14732,1.1804,...,12.24407,,,,0.2829,1.12578,1.24016,0.09866,,1.73315
2000-01-04,5.95121,0.49346,2.07084,20.27129,6.25267,0.76776,1.51251,,19.49061,1.13079,...,10.02236,,,,0.28754,1.16399,1.21454,0.09866,,1.75367
2000-01-05,5.95121,0.50905,2.0863,19.97185,6.25267,0.8759,1.46788,,21.34705,0.98371,...,6.99554,,,,0.43283,1.14173,1.21454,0.09457,,1.55473


In [40]:
def process_iha(df):
    """ Processes the first five groups of IHA indicators.
    
    Args:
        df  Pandas data frame with DAILY resolution consisting
            of a single column entitled 'site_name' and a date-time
            index.
    
    Returns:
        Pandas data frame of IHA parameters calculated from the
        IHA R package.
    """
    # Set up connection to R. This all seems unnecessarily complicated!
    import rpy2.interactive as r
    import rpy2.interactive.packages
    from rpy2.robjects.packages import importr
    import pandas.rpy.common as com
    from rpy2.robjects import pandas2ri
    pandas2ri.activate()
    
    # Circular mean function from scipy (see above)
    from scipy.stats import circmean

    # Load necessary R packages
    importr('zoo', lib_loc="//niva-of5/osl-userdata$/JES/Documents/R/win-library/3.2")   
    importr('IHA', lib_loc="//niva-of5/osl-userdata$/JES/Documents/R/win-library/3.2")
    
    # Import R packages into interactive session
    zoo = r.packages.importr('zoo')
    iha = r.packages.importr('IHA')
    
    # Get path to package methods
    rlib = r.packages.packages

    # Convert df to 2 columns ['Dates', 'Flows']
    df2 = df.reset_index()

    # Convert Pandas df to R 
    ts = rlib.zoo.read_zoo(df2, format="%Y-%m-%d")
    
    # Processing for Group 1
    # Calculate group 1 stats.
    rg1 = rlib.IHA.group1(ts)

    # Convert back to Python
    grp1 = com.convert_robj(rg1)
    
    # Get stats
    grp1 = grp1.describe().T

    # Coefficient of dispersion
    grp1['CoD'] = (grp1['75%'] - grp1['25%']) / grp1['50%']

    # Format grp 1 df
    grp1.index.name = 'Indicator'
    grp1.reset_index(inplace=True)
    grp1['Group'] = 1
    grp1.index = [grp1['Group'], grp1['Indicator']]
    grp1 = grp1[['50%', 'CoD']]
    
    # Processing for Group 2
    # Calculate group 2 stats.
    rg2 = rlib.IHA.group2(ts)

    # Convert back to Python
    grp2 = com.convert_robj(rg2)

    # Get stats
    grp2 = grp2.describe().T

    # Coefficient of dispersion
    grp2['CoD'] = (grp2['75%'] - grp2['25%']) / grp2['50%']

    # Format grp 2 df
    grp2.index.name = 'Indicator'
    grp2.reset_index(inplace=True)
    grp2['Group'] = 2
    grp2 = grp2[grp2['Indicator'] != 'year']
    grp2.index = [grp2['Group'], grp2['Indicator']]
    grp2 = grp2[['50%', 'CoD']]

    # Processing for Group 3
    # Calculate group 3 stats.
    rg3 = rlib.IHA.group3(ts)

    # Convert back to Python
    grp3 = com.convert_robj(rg3)

    # Get stats using circular mean and assuming 366 days per year (as in IHA)
    c_av = circmean(grp3, high=366, low=0, axis=0)
    
    # We won't include a CoD for this stat.
    # Build df to store this info
    grp3 = pd.DataFrame(data=[c_av, [pd.np.nan, pd.np.nan]], 
                        columns=['Min', 'Max'],
                        index=['50%', 'CoD']).T

    # Format grp 2 df
    grp3.index.name = 'Indicator'
    grp3.reset_index(inplace=True)
    grp3['Group'] = 3
    grp3.index = [grp3['Group'], grp3['Indicator']]
    grp3 = grp3[['50%', 'CoD']]

    # Processing for Group 4
    # Calculate group 4 stats.
    rg4 = rlib.IHA.group4(ts)

    # Convert back to Python
    grp4 = com.convert_robj(rg4)

    # Get stats
    grp4 = grp4.describe().T

    # Coefficient of dispersion
    grp4['CoD'] = (grp4['75%'] - grp4['25%']) / grp4['50%']

    # Format grp 4 df
    grp4.index.name = 'Indicator'
    grp4.reset_index(inplace=True)
    grp4['Group'] = 4
    grp4.index = [grp4['Group'], grp4['Indicator']]
    grp4 = grp4[['50%', 'CoD']]  

    # Processing for Group 5
    # Calculate group 5 stats.
    rg5 = rlib.IHA.group5(ts)

    # Convert back to Python
    grp5 = com.convert_robj(rg5)
    return grp5
    # Get stats
    grp5 = grp5.describe().T

    # Coefficient of dispersion
    grp5['CoD'] = (grp5['75%'] - grp5['25%']) / grp5['50%']

    # Format grp 4 df
    grp5.index.name = 'Indicator'
    grp5.reset_index(inplace=True)
    grp5['Group'] = 5
    grp5.index = [grp5['Group'], grp5['Indicator']]
    grp5 = grp5[['50%', 'CoD']]  

    # Combine results
    iha_res = pd.concat([grp1, grp2, grp3, grp4, grp5], axis=0)
    
    # Rename 50% col (because not all values are actually medians)
    # Also add heirarchical index for site_name
    iha_res.columns = [[df.columns[0], df.columns[0]],['Cent_Est', 'CoD']]
   
    return iha_res

In [41]:
col = 'S50.11'
st = '2012-09-10'
end = '2013-09-10'

# Get series and trunctae
df = nor_df[[col]].truncate(before=st, after=end)

# Fill no data
df.interpolate(method='linear', inplace=True)

# Assert no missing values remain
assert df.isnull().sum(axis=0)[0] == 0, 'Dataframe has missing values'

# Append results
df2 = process_iha(df)

df2

Unnamed: 0,Rise rate,Fall rate,Reversals
2012,0.49733,-0.37923,7.0
2013,0.27711,-0.0565,106.0


In [12]:
col = 'S109.2'
st = '2012-09-15'
end = '2013-09-15'

# Get series and trunctae
df = nor_df[[col]].truncate(before=st, after=end)

# Fill no data
df.interpolate(method='linear', inplace=True)

# Assert no missing values remain
assert df.isnull().sum(axis=0)[0] == 0, 'Dataframe has missing values'

# Append results
df2 = process_iha(df)

df2

Unnamed: 0,October,November,December,January,February,March,April,May,June,July,August,September
2012,,,,,,,,,,,,14.84182
2013,10.3861,8.467465,5.46271,3.6946,2.632585,2.493,6.132395,78.11932,58.761195,23.99583,25.64333,11.12775
