In [None]:
import xarray as xr
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
import os

In [None]:
root = os.path.abspath(os.path.join(os.getcwd(),'..'))

In [None]:
sites = ['extras','ganga_damodar','godavari','kali','kaveri','krishna','mahanadi','narmada','penner','sharavati','tapi']

In [None]:
import yaml

In [None]:
sites_data = yaml.load(open(os.path.join(root,'bin','experiments-lowest.yaml'),'r'),Loader=yaml.SafeLoader)

In [None]:
ds_gconv = {}
for site in sites:
    ds_gconv[site] = xr.load_dataset(os.path.join(root,'data','final_preds',f'{site}-gconv-errors.nc'))
ds_noconv = {}
for site in sites:
    #el = os.path.splitext(os.path.split(sites_data[site]['no_gconv'])[-1])[0][-2:]
    #ds_noconv[site] = xr.load_dataset(os.path.join(root,'data','final_preds',f'{site}-no_gconv-{el}-errors.nc'))
    ds_noconv[site] = xr.load_dataset(os.path.join(root,'data','final_preds',f'{site}-no_gconv-errors.nc'))

In [None]:
ds_gconv = {}
for site in sites:
    #print (sites_data[site]['gconv'].split('-'))
    el = os.path.splitext(os.path.split(sites_data[site]['gconv'])[-1])[0][-2:]
    #el = str(int(el)+10)
    ds_gconv[site] = xr.load_dataset(os.path.join(root,'data','final_preds',f'{site}-gconv-{el}-errors.nc'))
ds_noconv = {}
for site in sites:
    el = os.path.splitext(os.path.split(sites_data[site]['no_gconv'])[-1])[0][-2:]
    ds_noconv[site] = xr.load_dataset(os.path.join(root,'data','final_preds',f'{site}-no_gconv-{el}-errors.nc'))

In [None]:
all_pearson_gconv = xr.merge([ds_gconv[site]['pearson-r'] for site in sites])
all_pearson_noconv = xr.merge([ds_noconv[site]['pearson-r'] for site in sites])

In [None]:
all_pearson_gconv.quantile(dim='site',q=0.25)['step'].values

In [None]:
all_pearson_gconv.quantile(dim='site',q=0.25)['pearson-r'].values.squeeze()

In [None]:
from matplotlib.patches import Patch
from matplotlib.lines import Line2D

In [None]:
fig,ax = plt.subplots(1,1,figsize=(6,3))
all_pearson_gconv.mean(dim='site').drop('model')['pearson-r'].plot(ax=ax, c='g')
all_pearson_noconv.mean(dim='site').drop('model')['pearson-r'].plot(ax=ax, c='b')
ax.fill_between(
    all_pearson_gconv.quantile(dim='site',q=0.25)['step'].values.squeeze(),
    all_pearson_gconv.quantile(dim='site',q=0.33)['pearson-r'].values.squeeze(),
    all_pearson_gconv.quantile(dim='site',q=0.67)['pearson-r'].values.squeeze(),
    color = '#d9ead3',
    alpha=0.5,
)
ax.fill_between(
    all_pearson_noconv.quantile(dim='site',q=0.25)['step'].values.squeeze(),
    all_pearson_noconv.quantile(dim='site',q=0.33)['pearson-r'].values.squeeze(),
    all_pearson_noconv.quantile(dim='site',q=0.67)['pearson-r'].values.squeeze(),
    color = '#cfe2f3',
    alpha=0.5,
)
ax.set_ylabel('Coefficient of Determination')
ax.set_xlabel('Prediction Horizon [days')

legend_elements = [
    Line2D([0], [0], color='g', lw=2, label='Baseline Mean'),
    Line2D([0], [0], color='b', lw=2, label='GConv Ablation Mean'),
    Patch(facecolor='#d9ead3', edgecolor=None,label='Baseline $\mp 0.5\sigma$'),
    Patch(facecolor='#cfe2f3', edgecolor=None,label='GConv Abl. $\mp 0.5\sigma$'),
]
ax.legend(handles=legend_elements, loc='lower center', ncol=2, bbox_to_anchor=(0.5,-0.45))
fig.savefig('./gconv_prediction_horizon.pdf',bbox_inches='tight')

In [None]:
(all_pearson_noconv.mean(dim='site').drop('model')['pearson-r'] - all_pearson_gconv.mean(dim='site').drop('model')['pearson-r']).mean()

In [None]:
df_gconv = all_pearson_gconv.mean(dim='site').drop('model')['pearson-r'].to_pandas().T
df_noconv = all_pearson_noconv.mean(dim='site').drop('model')['pearson-r'].to_pandas().T

In [None]:
fig, ax = plt.subplots(1,1,figsize=(6,4))
df_gconv.rolling(5).mean().plot(ax=ax, c='g')
df_noconv.rolling(5).mean().plot(ax=ax, c='c')

In [None]:
slices = [slice(0,5,None),slice(5,15,None),slice(15,30,None),slice(30,50,None),slice(50,75,None),slice(75,90,None)]

In [None]:
def violin(arr, c_face, c_edge, c_edge_mean):
    fig, ax = plt.subplots(1,1,figsize=(6,3))
    data = [arr.sel({'step':s}).mean(dim=['step','model'])['pearson-r'].values for s in slices]
    collections = ax.violinplot(data, showmeans=True)
    print (collections.keys())
    vp = collections['bodies']
    for b in vp:
        b.set_facecolor(c_face)
    for e in ['cmaxes','cmins','cbars']:
        vp = collections[e]
        vp.set_edgecolor(c_edge)
    vp = collections['cmeans']
    #for b in vp:
    vp.set_edgecolor(c_edge_mean)
    vp.set_linewidth(3)
    ax.set_ylim(-0.1,0.8)
    ax.set_xticklabels(['','0-5 days']+[f'{s.start}-{s.stop}' for s in slices[1:]])
    ax.set_ylabel('Coefficient of Determination')
    return fig

In [None]:
#gconv_fig = violin(all_pearson_gconv, '#cfe2f3ff', '#e06666') # 
gconv_fig = violin(all_pearson_gconv, '#f9cbab', '#e97f34', '#e06666')
gconv_fig.savefig('./gconv-allsites.png',bbox_inches='tight', transparent=True)

In [None]:
no_gconv_fig = violin(all_pearson_gconv, '#cfe2f3ff', '#3d85c6', '#e06666') # 
#no_gconv_fig = violin(all_pearson_gconv, '#f9cbab', '#e97f34', '#e06666')
no_gconv_fig.savefig('./no_gconv-allsites.png',bbox_inches='tight', transparent=True)

In [None]:
all_pearson_gconv.sel({'step':slice(75,90,None)})['pearson-r'].mean() # 37.2 33.7

In [None]:
import json

In [None]:
site_res = {site:json.load(open(os.path.join(root,'data',f'{site}.json'),'r')) for site in sites}

In [None]:
site_res

In [None]:
for site in sorted(sites):
    print ( len(site_res[site]),site,', '.join([s.replace('_',' ')  for s in site_res[site]]))

In [None]:
tables_slices = [slice(0,5,None),slice(5,20,None),slice(20,50,None),slice(50,90,None)]

In [None]:
for site in sites:
    print(site,' & '.join([f'{el:02.2f}' for el in [all_pearson_gconv.sel({'site':site_res[site],'step':s})['pearson-r'].mean().values for s in tables_slices]]))

In [None]:
for site in sites:
    print(site)
    print (all_pearson_gconv.sel({'site':site_res[site]}).drop('model').mean(dim='step')['pearson-r'].to_pandas().T.sort_values(0))
    #print (all_pearson_gconv.sel({'site':site_res[site]}).drop('model').mean(dim='step')['pearson-r'].to_pandas().T.sort_values(0).mean())

In [None]:
all_pearson_gconv.drop('model').mean(dim='step')['pearson-r'].to_pandas().T.sort_values(0)

In [None]:
all_pearson_gconv

In [None]:
violin(all_pearson_noconv)

In [None]:
fig, axs = plt.subplots(2,1,figsize=(6,6))
data_gconv = [all_pearson_gconv.sel({'step':s}).mean(dim=['step','model'])['pearson-r'].values for s in slices]
data_noconv = [all_pearson_noconv.sel({'step':s}).mean(dim=['step','model'])['pearson-r'].values for s in slices]
axs[0].violinplot(data_gconv, showmeans=True)
axs[1].violinplot(data_noconv, showmeans=True)

axs[0].set_ylim(-0.05,0.8)
axs[1].set_ylim(-0.05,0.8)

In [None]:
df_gconv.rolling(5).mean().plot()

In [None]:
all_pearson_gconv

### get the dataframe to compare sizes and things

In [None]:
from google.cloud import bigquery
from shapely import geometry, wkt
import geopandas as gpd

In [None]:
client=bigquery.Client()

In [None]:
Q = f"""
    SELECT *
    FROM `oxeo-main.wave2web.tracked-reservoirs`
"""

In [None]:
df = client.query(Q).result().to_dataframe()

In [None]:
from area import area

In [None]:
df['lake_geom'] = df['lake_geom'].apply(wkt.loads)

In [None]:
df['geometry'] = df['upstream_geom'].apply(wkt.loads)

In [None]:
df['lake_area'] = df['lake_geom'].apply(lambda geom: area(geometry.mapping(geom)))

In [None]:
df['upstream_area'] = df['geometry'].apply(lambda geom: area(geometry.mapping(geom)))

In [None]:
res_df = all_pearson_gconv.mean(dim=['model','step'])['pearson-r'].to_pandas()

In [None]:
res_df.index.isin(df['name'])

In [None]:
df = pd.merge(df, pd.DataFrame(res_df), how='left', left_on='name',right_index=True).rename(columns={0:'pearson-r'})

In [None]:
df.sort_values('lake_area') # indirasagar

In [None]:
plt.scatter(df.loc[~df['pearson-r'].isna(),'pearson-r'], df.loc[~df['pearson-r'].isna(),'lake_area'])

In [None]:
plt.scatter(df.loc[~df['pearson-r'].isna(),'pearson-r'], df.loc[~df['pearson-r'].isna(),'upstream_area'])

In [None]:
df.loc[~df['pearson-r'].isna()].sort_values('pearson-r')

In [None]:
gdf= gpd.GeoDataFrame(df, geometry='geometry')