# ***DESCRIPTION*** 
## ***Run Tigramite (PCMCI) for SPCAM data with specified settings:***
### Fixed:
- PC-stable (i.e., MCI component not run)
- tau_min/tau_max = -1
- Significance: analytics
- experiments: '002_train_1_year'
- links: parents (state fields) -> children (parameterizations)
### Options:
- children (parameterizations)
- region: lat/lon limits (gridpoints to be used)
- levels: children's levels to be explored
- pc_alphas: list of value(s)

In [1]:
# Python packages
import sys, getopt, yaml, time, datetime
from datetime import datetime as dt
import numpy                  as np
from pathlib              import Path

# Utils
from   utils.constants    import SPCAM_Vars, DATA_FOLDER, ANCIL_FILE, OUTPUT_FILE_PATTERN
from   utils.constants    import tau_min, tau_max, significance, experiment
import utils.utils            as utils
import utils.links            as links
import utils.pcmci_algorithm  as algorithm

## Specifications

In [2]:
argv           = sys.argv[1:] #argv           = ['-c', 'cfg_pipeline.yml']
try:
    opts, args = getopt.getopt(argv,"hc:a",["cfg_file=","add="])
except getopt.GetoptError:
    print ('pipeline.py -c [cfg_file] -a [add]')
    sys.exit(2)
for opt, arg in opts:
    if opt == '-h':
        print ('pipeline.py -c [cfg_file]')
        sys.exit()
    elif opt in ("-c", "--cfg_file"):
        yml_cfgFilenm = arg
    elif opt in ("-a", "--add"):
        pass

# YAML config file
yml_cfgFile       = open(yml_cfgFilenm)
yml_cfg           = yaml.load(yml_cfgFile, Loader=yaml.FullLoader)

# Load specifications
spcam_parents     = yml_cfg['spcam_parents']
spcam_children    = yml_cfg['spcam_children']
pc_alphas         = yml_cfg['pc_alphas']
region            = yml_cfg['region']
lim_levels        = yml_cfg['lim_levels']
target_levels     = yml_cfg['target_levels']
verbosity         = yml_cfg['verbosity']
output_folder     = yml_cfg['output_folder']
overwrite         = False

In [3]:
## Region / Gridpoints
if region is False:
    region     = [ [-90,90] , [0,-.5] ] # All
gridpoints = utils.get_gridpoints(region)

## Children levels (parents includes all)
if lim_levels is not False and target_levels is False:
    target_levels = utils.get_levels(lim_levels)

In [4]:
## Model's grid
levels, latitudes, longitudes = utils.read_ancilaries(Path(DATA_FOLDER, ANCIL_FILE))

## Latitude / Longitude indexes
idx_lats = [utils.find_closest_value(latitudes, gridpoint[0])      for gridpoint in gridpoints]
idx_lons = [utils.find_closest_longitude(longitudes, gridpoint[1]) for gridpoint in gridpoints]

## Level indexes (children & parents)
parents_idx_levs = [[round(lev, 2), i] for i, lev in enumerate(levels)] # All
if target_levels is not False:
    children_idx_levs = [[lev, utils.find_closest_value(levels, lev)] for lev in target_levels]
else:
    children_idx_levs = parents_idx_levs

In [5]:
## Variables
spcam_vars_include = spcam_parents + spcam_children
SPCAM_Vars         = [var for var in SPCAM_Vars if var.label in spcam_vars_include]
var_parents        = [var for var in SPCAM_Vars if var.type == "in"]
var_children       = [var for var in SPCAM_Vars if var.type == "out"]  

### Processing

In [6]:
len_grid = len(gridpoints)
t_start = time.time()
for i_grid, (i_lat, i_lon) in enumerate(gridpoints):
    
    t_start_gridpoint = time.time()
    data_parents = None
    
    idx_lat = idx_lats[i_grid]
    idx_lon = idx_lons[i_grid]
    
    for child in var_children:
        print(f"{dt.now()} Variable: {child.name}")
        if child.dimensions == 2:
            child_levels = [[levels[-1],0]]
        elif child.dimensions == 3:
            child_levels = children_idx_levs
        for level in child_levels:
            
            results_filename = OUTPUT_FILE_PATTERN.format(
                    var_name   = child.name,
                    level      = level[-1]+1,
                    lat        = int(i_lat),
                    lon        = int(i_lon),
                    experiment = experiment
            )
            results_file = Path(output_folder, results_filename)
            
            if not overwrite and results_file.is_file():
                print(f"{dt.now()} Found file {results_file}, skipping.")
                continue # Ignore this level
            
            
            # Only load parents if necessary to analyze a child
            # they stay loaded until the next gridpoint
            if data_parents is None:
                
                print(f"{dt.now()} Gridpoint {i_grid+1}/{len_grid}: lat={latitudes[idx_lats[i_grid]]}"
                      + f" ({idx_lat}), lon={longitudes[idx_lons[i_grid]]} ({idx_lon})")
                
                print(f"Load Parents (state fields)...")
                t_before_load_parents = time.time()
                data_parents = utils.load_data(
                    var_parents,
                    experiment,
                    DATA_FOLDER,
                    parents_idx_levs,
                    idx_lat,
                    idx_lon)
                time_load_parents = datetime.timedelta(seconds = time.time() - t_before_load_parents)
                print(f"{dt.now()} All parents loaded. Time: {time_load_parents}")
            
            # Process child
            data_child = utils.load_data([child],
                                   experiment,
                                   DATA_FOLDER,
                                   [level],
                                   idx_lat,
                                   idx_lon)
            data = [*data_parents, *data_child]
            
            # Find links
            print(f"{dt.now()} Finding links for {child.name} at level {level[-1]+1}")
            t_before_find_links = time.time()
            results = algorithm.find_links(data, pc_alphas, 0)
            time_links = datetime.timedelta(seconds = time.time() - t_before_find_links)
            total_time = datetime.timedelta(seconds = time.time() - t_start)
            print(f"{dt.now()} Links found. Time: {time_links}" + f" Total time so far: {total_time}")

            # Store causal links
            utils.save_results(results, child, level[-1], i_lat, i_lon, experiment, output_folder)

    time_point = datetime.timedelta(seconds = time.time() - t_start_gridpoint)
    total_time = datetime.timedelta(seconds = time.time() - t_start)
    print(f"{dt.now()} All links in gridpoint found. Time: {time_point}."
          + f" Total time so far: {total_time}")
    print("")
    
print(f"{dt.now()} Execution complete. Total time: {total_time}")

2021-02-22 08:36:13.064823 Variable: tphystnd
2021-02-22 08:36:13.082973 Found file causal_links/tphystnd_1_lat--87_lon-120_002_train_1_year.obj, skipping.
2021-02-22 08:36:13.090360 Found file causal_links/tphystnd_2_lat--87_lon-120_002_train_1_year.obj, skipping.
2021-02-22 08:36:13.093909 Found file causal_links/tphystnd_3_lat--87_lon-120_002_train_1_year.obj, skipping.
2021-02-22 08:36:13.100573 Found file causal_links/tphystnd_4_lat--87_lon-120_002_train_1_year.obj, skipping.
2021-02-22 08:36:13.105227 Found file causal_links/tphystnd_5_lat--87_lon-120_002_train_1_year.obj, skipping.
2021-02-22 08:36:13.106345 Found file causal_links/tphystnd_6_lat--87_lon-120_002_train_1_year.obj, skipping.
2021-02-22 08:36:13.107256 Found file causal_links/tphystnd_7_lat--87_lon-120_002_train_1_year.obj, skipping.
2021-02-22 08:36:13.108278 Found file causal_links/tphystnd_8_lat--87_lon-120_002_train_1_year.obj, skipping.
2021-02-22 08:36:13.113237 Found file causal_links/tphystnd_9_lat--87_lon-

KeyboardInterrupt: 