In [None]:
#import ipywidgets as widgets
#from ast import literal_eval
#import sys
#import os
#import subprocess
#import configparser
#import pandas as pd
#from ipywe.ipywe._utils import close, enable, disable
#from ipywe.ipywe.fileselector import FileSelectorPanel
#from domain_lister import dlist

%run -i __utils.py

If `use_import` is set to `True`, configurations will be loaded from the filename assigned to `ini_inport`.
Regardless of the value of `use_import`, configurations will be saved to the filename assigned to `ini_export`.

In [None]:
use_import = True
ini_import = "SOMOSPIE_input.ini"
ini_export = "SOMOSPIE_input.ini"
possible_domains = "../data/dom_list.csv" # Set this variable to change where the comprehensive list of domains is to be found
if use_import:
    defaultconf = ConfigParser()
    defaultconf.read(ini_import)

The following cell can be used to setup/download satellite data for a year, upon setting the targ_year variable and running.

In [None]:
# The following has been moved into a function in __utils.py.
# It needs to be made into a script that runs each year's fetch in an independent shell, perhaps using tmux.

targ_year = 2017 # Set this variable to determine what year will be retrieved/setup.

%cd ../data
if not os.path.exists("TerrestrialEcoregions_L2_Shapefile") or not os.path.exists("NEONDomains_0"):
    %run -t fetch_ecoregions.sh

if (1978 < targ_year) and (targ_year < 2018):
    if not os.path.exists("ESA_CCI/{}_ESA_monthly.rds".format(targ_year)):
        if not os.path.exists("ESA_CCI/{}".format(targ_year)):
            !./fetch_soil_moisture.sh $targ_year
        !../code/extract_SM_monthly.R $targ_year ESA_CCI/
else:
    print("Full-year ESA-CCI soil moisture data only available 1979 through 2017.")
%cd ../code

Here's the dictionary we will use to build all the configuration lists and dictionaries below.

In [None]:
# Each entry of the form "<NAME>": {"category":"<category>", "type":"<type>", "description":"<description>"}
configuration_dictionary = {
    "START": {"category":"setup", "type":"dir", \
              "description":"The main working directory, containing relevant code and data subdirectories."},
    "CODE": {"category":"setup", "type":"dir", \
             "description":"The subdirectory with code."}, 
    "DATA": {"category":"setup", "type":"dir", \
             "description":"The subdirectory for data."},
    "OUTPUT": {"category":"setup", "type":"dir", \
               "description":"The subdirectory for SOMOSPIE output."}, 
    "DOMAINS": {"category":"data", "type":"file", \
                "description":"The file with list list of all domains."}, 
    "SM_FILE": {"category":"data", "type":"file", \
                "description":"The file with soil moisture data."}, 
    "COV_FILE": {"category":"data", "type":"file", \
                 "description":"The file with covariate data."}, 
    "EVAL_FILE": {"category":"data", "type":"file", \
                  "description":"The file with evaluation data."}, 
    "YEAR": {"category":"proc", "type":"text", \
             "description":"A list of years."}, 
    "MONTHS": {"category":"proc", "type":"text", \
               "description":"A list of months."}, 
    "MAKE_T_E": {"category":"proc", "type":"bool", \
                 "description":"True if you want to generate (using SM_FILE and COV_FILE) a train and eval file for each region "}, 
    "USE_PCA": {"category":"proc", "type":"bool", \
                "description":"True if you want to perform PCA dimension reduction on the covariate data."}, 
    "VALIDATE": {"category":"proc", "type":"float", \
                 "description":"0 for no validation; 1.xx to compare predictions to xx% of the original data; 2 to compare prediction to the training data"}, 
    "RAND_SEED": {"category":"proc", "type":"int", \
                  "description":"Specify a positive integer, or 0 to generate a new random seed."}, 
    "USE_VIS": {"category":"proc", "type":"bool", \
                "description":"True if you want to generate images for the predictions."}, 
    "MIN_TEST_POINTS": {"category":"proc", "type":"int", \
                        "description":"The minimum number of test points required for a region to be used."},  
    "BUFFER": {"category":"proc", "type":"int", \
               "description":"Specify a positive integer for the nubmer of kms you want to expand the training data around each region; 0 for no buffer."},  
    "SUPER": {"category":"proc", "type":"bool", \
              "description":"If true, then training data for ESA-CCI ecoregions will be expanded to one higher leverl."},  
    "MODICT": {"category":"method", "type":"modict", \
               "description":"Dictionary of modeling methods and their parameter specifications"}, 
}

In [None]:
# All the variable names, sorted by type, for easier config parsing.
title_dict = {}
for entry in configuration_dictionary:
    entry_type = configuration_dictionary[entry]["type"]
    # Add the title to the list of titles with that type, or create a new list for a new type
    if entry_type in title_dict:
        title_dict[entry_type].append(entry)
    else:
        title_dict[entry_type] = [entry]
#title_dict = {"dir": ["START", "CODE", "DATA", "OUTPUT"], \
#              "file": ["DOMAINS", "SM_FILE", "COV_FILE", "EVAL_FILE"], \
#              "text": ["YEAR", "MONTHS", "BUFFER"], \
#              "bool": ["MAKE_T_E", "USE_PCA", "USE_VIS", "SUPER"], \
#              "int": ["RAND_SEED", "MIN_TEST_POINTS"], \
#              "float": ["VALIDATE"], \
#              "modict": ["MODICT"]}

titles = list(configuration_dictionary.keys())
#titles = [title for value in title_dict.values() for title in value]

paths = {key: "../" for key in title_dict["dir"] + title_dict["file"]}
#argnames = ["START", "CODE", "DATA", "DOMAINS", "OUTPUT", "SM_FILE", "COV_FILE", "EVAL_FILE"]
#paths = {key: "" for key in argnames}

def setPath(pathvar, newpath):
    paths[pathvar] = newpath
def setPathFunc(pathvar):
    return lambda x: setPath(pathvar, x)
## Replaced the below with the above lambda function
#def setStart(newpath):
#    paths["START"] = newpath
#def setCode(newpath):
#    paths["CODE"] = newpath
#def setData(newpath):
#    paths["DATA"] = newpath
#def setDomains(newpath):
#    paths["DOMAINS"] = newpath
#def setOutput(newpath):
#    paths["OUTPUT"] = newpath
#funcs = {"START":setStart, "CODE":setCode, "DATA":setData, "DOMAINS":setDomains, "OUTPUT":setOutput}

The following cell can be modified to determine usage of an imported ini, as well as change the names and paths of import and export files.

In [None]:
# Stores method names, exactly as expected in .ini file
methods = ["1NN", "KKNN", "RF", "HYPPO", "UNMODEL"]

# The following dictionary has a str-type description for each parameter
descriptions = {entry: configuration_dictionary[entry]["description"] for entry in configuration_dictionary}

# Sets up the file selector widgets as a dictionary, with or without default input. 
# This is important, as the FileSelector is class with multiple widgets at work 
# - we need an original instance to properly retrieve the input. 
# Using the traditional ipython widget value returns incorrectly. 
path_widgets = {}
if use_import:
    for j in title_dict["dir"]:
        paths[j] = defaultconf["DEFAULT"][j]
        if not os.path.exists(paths[j]):
            os.mkdir(paths[j])
    for j in title_dict["file"]:
        paths[j] = defaultconf["DEFAULT"]["DATA"] + "/" + defaultconf["DEFAULT"][j]
        
for j in title_dict["dir"]:
    path_widgets[j] = FileSelectorPanel(descriptions[j], \
                                        stay_alive=True, \
                                        newdir_toolbar_button=True, \
                                        next=setPathFunc(j), \
                                        start_dir=paths[j], \
                                        type="directory")
for j in title_dict["file"]:
    path_widgets[j] = FileSelectorPanel(descriptions[j], \
                                        stay_alive=True, \
                                        newdir_toolbar_button=True, \
                                        next=setPathFunc(j), \
                                        start_dir=os.path.dirname(paths[j]), \
                                        type="file")

# Makes a VBox out of HBoxes of widgets for method arguments, for addition to the input GUI.
#for step, i in enumerate(methods):
#    boxpop = (widgets.Label(i), widgets.Checkbox(), widgets.Text())
#    items.append(widgets.HBox(boxpop))
boxpop = lambda label: (widgets.Label(label), widgets.Checkbox(), widgets.Text())
items = [widgets.HBox(boxpop(method)) for method in methods]
modict = widgets.VBox(items)

# A list of variables and their associated widgets, in a dictionary. 
child_type = {}
for title in title_dict["dir"]:
    child_type[title] = widgets.VBox([path_widgets[title].panel])
for title in title_dict["file"]:
    child_type[title] = widgets.VBox([path_widgets[title].panel])
for title in title_dict["text"]:
    child_type[title] = widgets.Text()
for title in title_dict["bool"]:
    child_type[title] = widgets.Checkbox()
for title in title_dict["int"]:
    child_type[title] = widgets.IntText()
for title in title_dict["float"]:
    child_type[title] = widgets.FloatText()
for title in title_dict["modict"]:
    child_type[title] = modict

inputs = widgets.Accordion(children=[child_type[title] for title in titles])

# Assigns names to our Accordion entries.
for index, j in enumerate(titles):
    inputs.set_title(index, j)
    
# The following code fills the widgets with the input from the imported .ini, if one was imported.
if use_import:
    for index, k in enumerate(titles[:-1]):
        if k in title_dict["bool"]:
            inputs.children[index].value = bool((int(defaultconf["DEFAULT"][k])))
        else:
            inputs.children[index].value = defaultconf["DEFAULT"][k]

    tempdict = literal_eval(defaultconf["DEFAULT"][titles[-1]])
    if tempdict:
        for index, s in enumerate(methods):
            if s in tempdict:
                inputs.children[-1].children[index].children[2].value = str(tempdict[s])
                inputs.children[-1].children[index].children[1].value = True
                                               

display(inputs)

In [None]:
# `possible_domains` points at a csv exported from:
# https://docs.google.com/spreadsheets/d/13gw8jq1Hhtv8C4iDB8NJvIduGGusQZLECflvoD5e52A
doms = pd.read_csv(possible_domains, skiprows=[1], dtype=str)
doms.drop(columns="BOX", inplace=True)
doms = doms.fillna('')
lists = doms.to_dict('list')
for instance in lists:
    lists[instance] = list(filter(None, lists[instance]))
regions = dlist(lists, [widgets.Checkbox])
#BOX_entry = widgets.HBox((widgets.Text(), widgets.Checkbox()))
XBOX_entry = widgets.HBox((widgets.HTML("Lon.: x1="), 
                           widgets.FloatText(min=-180),
                           widgets.HTML("< x2="), 
                           widgets.FloatText(max=180)
                         ))
YBOX_entry = widgets.HBox((widgets.HTML("Lat.: y1="), 
                           widgets.FloatText(min=-90),
                           widgets.HTML("< y2="), 
                           widgets.FloatText(max=90)
                         ))
BOX_box = widgets.VBox((XBOX_entry, YBOX_entry))
BOX_box_box = widgets.VBox((BOX_box, widgets.Checkbox()))
BOXES = widgets.VBox((BOX_box_box,))
regions.children += (BOXES,)
regions.set_title(doms.shape[1], "BOX") #ToDo: Make it so this isn't arbitrarily the last one. Low Priority.

display(regions)

In [None]:
# Building the output file.

config = ConfigParser()
config["DEFAULT"] = {}
for index, j in enumerate(titles[:-1]):
    # Boolean variables
    if j in title_dict["bool"]:
        config["DEFAULT"][j] = str(int(inputs.children[index].value))
    # File paths from FileSelector
    elif j in title_dict["file"]:
        config["DEFAULT"][j] = os.path.relpath(paths[j], start=paths["DATA"])
        # If no file was specified, the above could result in ".", which can cause problems.
        if len(config["DEFAULT"][j])<2:
            config["DEFAULT"][j] = ""
    # Directory paths from FileSelector
    elif j in title_dict["dir"]:
        config["DEFAULT"][j] = paths[j]
    # Everything else.
    else:
        config["DEFAULT"][j] = str(inputs.children[index].value)
        
# Filling the ML methods output.
modict = "{\n"
for learners in inputs.children[-1].children:
    if learners.children[1].value:
        modict += f"\"{learners.children[0].value}\":{learners.children[2].value},\n"
modict += "}"
config["DEFAULT"][titles[-1]] = modict # ToDo: Make it so this isn't arbitrarily the last one. Very low priority.

with open(ini_export, 'w') as file:
    config.write(file)
        

In [None]:
region_output = []
for i, region in enumerate(regions.children):
    for entry in region.children:
        if entry.children[1].value:
            if i < len(doms.keys()):
                if doms.keys()[i] in ["CEC.1", "CEC.2"]:
                    appender = doms.keys()[i].split(".")[0]
                else:
                    appender = doms.keys()[i]
                region_output.append((appender, entry.children[0].value))
            else:
                appender = "BOX"
                xs = entry.children[0].children[0]
                x1, x2 = xs.children[1].value, xs.children[3].value
                ys = entry.children[0].children[1]
                y1, y2 = ys.children[1].value, ys.children[3].value
                region_output.append((appender, f"{x1}_{x2}_{y1}_{y2}"))
with open(config["DEFAULT"]["DATA"]+"/"+config["DEFAULT"]["DOMAINS"], 'w') as file:    
    file.write(str(region_output))

In [None]:
!python SOMOSPIE_wrapper.py

The following cells allow for output visualization, from any existing output in the SOMOSPIE out folder. This includes a run completed above, but does not explicitly select the one most recently completed.

In [None]:
class output_management:
    def __init__(self):
        self.selected_out = ""
        self.params = {"models" : {}, "months" : [], "years" : [], "regions" : []}
        
    def setOut(self, newpath):
        self.selected_out = newpath
    
    def parseParamFile(self, fname):
        path = self.selected_out + '/' + fname
        with open(path, 'r') as f:
            inputdict = literal_eval(f.readline())
            print(inputdict)

out_manager = output_management()

FileSelectorPanel("Output directory to visualize.", \
                                     stay_alive=True, \
                                     newdir_toolbar_button=True, \
                                     next=out_manager.setOut, \
                                     start_dir=paths["OUTPUT"], \
                                     type="directory").show()

In [None]:
print(out_manager.selected_out)
out_manager.parseParamFile("job.params")
