Exploratory analysis of batch results

In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
from __future__ import division

In [3]:
import os
import sys
import numpy as np
import pandas as pd
import pickle
import tarfile
from glob import glob

from pandas.api.types import CategoricalDtype

# dirty hack for dev purposes only - when finished I will install the package properly
sys.path.insert(0, os.path.abspath(".."))
import camp_process

In [4]:
# sns.set_context("talk")
# sns.set_style("white")

pj = os.path.join

# Revised dataset which has SW-GW interaction
DATA_PATH = '../data'

batch_files = glob(pj(DATA_PATH, '*.gz'))

In [5]:
def convert_indicators_to_pd(x):
    """Convert CIM result dict to a DataFrame.
    
    Parameters
    ==========
    * x : dict
    """
    master = pd.DataFrame()
    for k, v in x.items():
        try:
            tmp = pd.DataFrame.from_dict(v).T
        except ValueError:
            tmp = pd.DataFrame(v, index=[k]).T
        # End try

        master = pd.concat((master, tmp), axis=1)
    # End for
        
    return master


def combine_df(x, y):
    """Combine two CIM result DFs, adding rows together."""
    return pd.concat((x, y), ignore_index=True)


In [6]:
import os

In [7]:
output_fn = "{}/no_irrigation_extreme_results.csv".format(DATA_PATH)

if not os.path.exists(output_fn):
    extremes_fn = "C:/development/campaspe/sa-comparison/no_irrig_extreme.gz".format(DATA_PATH)
    ext_indicators, ext_scen_info = camp_process.catchment_indicators(extremes_fn, timeseries=False)
    extremes_results = convert_indicators_to_pd(ext_indicators)
    extremes_results.to_csv(output_fn)

In [8]:
output_fn = "{}/with_irrigation_extreme_results.csv".format(DATA_PATH)

if not os.path.exists(output_fn):
    extremes_fn = "C:/development/campaspe/sa-comparison/with_irrig_extreme.gz".format(DATA_PATH)
    ext_indicators, ext_scen_info = camp_process.catchment_indicators(extremes_fn, timeseries=False)
    extremes_results = convert_indicators_to_pd(ext_indicators)
    extremes_results.to_csv(output_fn)

---

In [9]:
# Different data directory as these ones are very large (e.g. > 4.5GB)
oat_no_irrigation_large_sample_fn = "C:/development/campaspe/sa-comparison/oat_no_irrigation_10.gz"

In [10]:
output_fn = '{}/oat_no_irrigation_10_results.csv'.format(DATA_PATH)

if not os.path.exists(output_fn):
    no_irrig_indicators, scenario_info = camp_process.catchment_indicators(oat_no_irrigation_large_sample_fn, timeseries=False)
    no_irrigation_results = convert_indicators_to_pd(no_irrig_indicators)
    no_irrigation_results = no_irrigation_results.reset_index(drop=True)
    no_irrigation_results.to_csv(output_fn)

In [11]:
oat_with_irrigation_large_sample_fn = "C:/development/campaspe/sa-comparison/oat_with_irrigation_10.gz"

In [12]:
with_irrig_indicators, scenario_info = camp_process.catchment_indicators(oat_with_irrigation_large_sample_fn, timeseries=False)
with_irrigation_results = convert_indicators_to_pd(with_irrig_indicators)
with_irrigation_results = with_irrigation_results.reset_index(drop=True)

with_irrigation_results.to_csv('{}/oat_with_irrigation_10_results.csv'.format(DATA_PATH))

---

In [13]:
# Morris OAT results

In [14]:
output_fn = '{}/moat_no_irrigation_10_results.csv'.format(DATA_PATH)

if not os.path.exists(output_fn):
    # Different data directory as these ones are very large (e.g. > 4.5GB)
    moat_no_irrigation_fn = "C:/development/campaspe/sa-comparison/moat_no_irrig_10.gz"
    no_irrig_indicators, scenario_info = camp_process.catchment_indicators(moat_no_irrigation_fn, timeseries=False)
    no_irrigation_results = convert_indicators_to_pd(no_irrig_indicators)
    no_irrigation_results = no_irrigation_results.reset_index(drop=True)

    no_irrigation_results.to_csv(output_fn)

In [15]:
output_fn = '{}/moat_with_irrigation_10_results.csv'.format(DATA_PATH)

if not os.path.exists(output_fn): 
    moat_with_irrigation_fn = "C:/development/campaspe/sa-comparison/moat_with_irrig_10.gz"
    with_irrig_indicators, scenario_info = camp_process.catchment_indicators(moat_with_irrigation_fn, timeseries=False)
    with_irrigation_results = convert_indicators_to_pd(with_irrig_indicators)
    with_irrigation_results = with_irrigation_results.reset_index(drop=True)

    with_irrigation_results.to_csv(output_fn)

---

In [16]:
# Saltelli sampling results

In [17]:
# Different data directory as these ones are very large (e.g. > 4.5GB)

# saltelli_no_irrigation_fn = "D:/temp/oat_study/saltelli_no_irrig_10.gz"


In [18]:
output_fn = '{}/saltelli_no_irrigation_10_results.csv'.format(DATA_PATH)

if not os.path.exists(output_fn): 
    saltelli_no_irrigation_fn = "C:/development/campaspe/sa-comparison/saltelli_no_irrig_10.gz"
    no_irrig_indicators, scenario_info = camp_process.catchment_indicators(saltelli_no_irrigation_fn, timeseries=False)
    no_irrigation_results = convert_indicators_to_pd(no_irrig_indicators)
    no_irrigation_results = no_irrigation_results.reset_index(drop=True)
    no_irrigation_results.to_csv(output_fn)

In [19]:
output_fn = '{}/saltelli_with_irrigation_10_results.csv'.format(DATA_PATH)

if not os.path.exists(output_fn): 
    saltelli_with_irrigation_fn = "C:/development/campaspe/sa-comparison/saltelli_with_irrig_10.gz"

    with_irrig_indicators, scenario_info = camp_process.catchment_indicators(saltelli_with_irrigation_fn, timeseries=False)
    with_irrigation_results = convert_indicators_to_pd(with_irrig_indicators)
    with_irrigation_results = with_irrigation_results.reset_index(drop=True)
    with_irrigation_results.to_csv(output_fn)

---

In [20]:
output_fn = '{}/radial_no_irrigation_10_results.csv'.format(DATA_PATH)

if not os.path.exists(output_fn): 
    radial_no_irrigation_fn = "C:/development/campaspe/sa-comparison/radial_no_irrig_10.gz"
    no_irrig_indicators, scenario_info = camp_process.catchment_indicators(radial_no_irrigation_fn, timeseries=False)
    no_irrigation_results = convert_indicators_to_pd(no_irrig_indicators)
    no_irrigation_results = no_irrigation_results.reset_index(drop=True)
    no_irrigation_results.to_csv(output_fn)

In [21]:
output_fn = '{}/radial_with_irrigation_10_results.csv'.format(DATA_PATH)

if not os.path.exists(output_fn): 
    radial_with_irrigation_fn = "C:/development/campaspe/sa-comparison/radial_with_irrig_10.gz"

    with_irrig_indicators, scenario_info = camp_process.catchment_indicators(radial_with_irrigation_fn, timeseries=False)
    with_irrigation_results = convert_indicators_to_pd(with_irrig_indicators)
    with_irrigation_results = with_irrigation_results.reset_index(drop=True)
    with_irrigation_results.to_csv(output_fn)