# ***DESCRIPTION*** 
## ***Run Tigramite (PCMCI) for SPCAM data with specified settings:***
### Fixed:
- PC-stable (i.e., MCI component not run)
- tau_min/tau_max = -1
- Significance: analytics
- experiments: '002_train_1_year'
- links: parents (state fields) -> children (parameterizations)
### Options:
- analysis: 'single': gridpoints individually
            'concat': gridpoints contatenated into a 
                      single time-series
- children (parameterizations)
- region: lat/lon limits (gridpoints to be used)
- levels: children's levels to be explored
- pc_alphas: list of value(s)

In [1]:
# Python packages
import sys, getopt, yaml, time, datetime
from datetime import datetime as dt
import numpy                  as np
from pathlib              import Path

# Utils
from   utils.constants    import SPCAM_Vars, DATA_FOLDER, ANCIL_FILE #, OUTPUT_FILE_PATTERN
from   utils.constants    import tau_min, tau_max, significance, experiment
import utils.utils            as utils
import utils.processing       as proc
# import utils.links            as links
import utils.pcmci_algorithm  as algorithm

## Specifications

In [2]:
argv           = sys.argv[1:] # argv           = ['-c', 'cfg_pipeline.yml']
#argv           = ['-c', 'cfg_pipeline.yml']
try:
    opts, args = getopt.getopt(argv,"hc:a",["cfg_file=","add="])
except getopt.GetoptError:
    print ('pipeline.py -c [cfg_file] -a [add]')
    sys.exit(2)
for opt, arg in opts:
    if opt == '-h':
        print ('pipeline.py -c [cfg_file]')
        sys.exit()
    elif opt in ("-c", "--cfg_file"):
        yml_cfgFilenm = arg
    elif opt in ("-a", "--add"):
        pass

# YAML config file
yml_cfgFile       = open(yml_cfgFilenm)
yml_cfg           = yaml.load(yml_cfgFile, Loader=yaml.FullLoader)

# Load specifications
analysis            = yml_cfg['analysis']
spcam_parents       = yml_cfg['spcam_parents']
spcam_children      = yml_cfg['spcam_children']
pc_alphas           = yml_cfg['pc_alphas']
region              = yml_cfg['region']
lim_levels          = yml_cfg['lim_levels']
target_levels       = yml_cfg['target_levels']
verbosity           = yml_cfg['verbosity']
output_folder       = yml_cfg['output_folder']
output_file_pattern = yml_cfg['output_file_pattern'][analysis]
overwrite           = False

In [3]:
## Region / Gridpoints
if region is False:
    region     = [ [-90,90] , [0,-.5] ] # All
gridpoints = utils.get_gridpoints(region)

## Children levels (parents includes all)
if lim_levels is not False and target_levels is False:
    target_levels = utils.get_levels(lim_levels)

In [4]:
## Model's grid
levels, latitudes, longitudes = utils.read_ancilaries(Path(DATA_FOLDER, ANCIL_FILE))

## Latitude / Longitude indexes
idx_lats = [utils.find_closest_value(latitudes, gridpoint[0])      for gridpoint in gridpoints]
idx_lons = [utils.find_closest_longitude(longitudes, gridpoint[1]) for gridpoint in gridpoints]

## Level indexes (children & parents)
parents_idx_levs = [[round(lev, 2), i] for i, lev in enumerate(levels)] # All
if target_levels is not False:
    children_idx_levs = [[lev, utils.find_closest_value(levels, lev)] for lev in target_levels]
else:
    children_idx_levs = parents_idx_levs

In [5]:
## Variables
spcam_vars_include = spcam_parents + spcam_children
SPCAM_Vars         = [var for var in SPCAM_Vars if var.label in spcam_vars_include]
var_parents        = [var for var in SPCAM_Vars if var.type == "in"]
var_children       = [var for var in SPCAM_Vars if var.type == "out"]  

In [6]:
print(gridpoints)
print(var_parents)
print(var_children)
print(parents_idx_levs)
print(children_idx_levs)

[[4.185920533189154, 120.9375]]
[(tbp, 3, in), (ps, 2, in)]
[(fsns, 2, out)]
[[3.64, 0], [7.59, 1], [14.36, 2], [24.61, 3], [38.27, 4], [54.6, 5], [72.01, 6], [87.82, 7], [103.32, 8], [121.55, 9], [142.99, 10], [168.23, 11], [197.91, 12], [232.83, 13], [273.91, 14], [322.24, 15], [379.1, 16], [445.99, 17], [524.69, 18], [609.78, 19], [691.39, 20], [763.4, 21], [820.86, 22], [859.53, 23], [887.02, 24], [912.64, 25], [936.2, 26], [957.49, 27], [976.33, 28], [992.56, 29]]
[[3.64, 0], [7.59, 1], [14.36, 2], [24.61, 3], [38.27, 4], [54.6, 5], [72.01, 6], [87.82, 7], [103.32, 8], [121.55, 9], [142.99, 10], [168.23, 11], [197.91, 12], [232.83, 13], [273.91, 14], [322.24, 15], [379.1, 16], [445.99, 17], [524.69, 18], [609.78, 19], [691.39, 20], [763.4, 21], [820.86, 22], [859.53, 23], [887.02, 24], [912.64, 25], [936.2, 26], [957.49, 27], [976.33, 28], [992.56, 29]]


### Processing

In [7]:
def concat(
    gridpoints,
    var_parents,
    var_children,
    pc_alphas,
    parents_idx_levs,
    children_idx_levs,
    idx_lats,
    idx_lons,
    output_file_pattern,
    output_folder,
    overwrite
          ):
    
    ## Model's grid
    levels, latitudes, longitudes = utils.read_ancilaries(Path(DATA_FOLDER, ANCIL_FILE))
    
    ## Processing
    len_grid     = len(gridpoints)
    t_start      = time.time()
    data_parents = None
    
    ## outFile exists?
    for child in var_children:
        print(f"{dt.now()} Variable: {child.name}")
        if child.dimensions == 2:
            child_levels = [[levels[-1],0]]
        elif child.dimensions == 3:
            child_levels = children_idx_levs
        for level in child_levels:
            
            results_filename = output_file_pattern.format(
                        var_name   = child.name,
                        level      = level[-1]+1,
                        lat1       = int(gridpoints[0][0]),
                        lat2       = int(gridpoints[-1][0]),
                        lon1       = int(gridpoints[0][-1]),
                        lon2       = int(gridpoints[-1][-1]),
                        experiment = experiment
                )
            results_file = Path(output_folder, results_filename)
    
    
            if not overwrite and results_file.is_file():
                print(f"{dt.now()} Found file {results_file}, skipping.")
                continue # Ignore this level
    

            # Only load parents if necessary to analyze a child
            # they stay loaded until the next gridpoint
            if data_parents is None:
                print(); print(f"Load Parents (state fields)...")
                t_before_load_parents = time.time()
                for i_grid, (i_lat, i_lon) in enumerate(gridpoints):

                    t_start_gridpoint = time.time()

                    idx_lat = idx_lats[i_grid]
                    idx_lon = idx_lons[i_grid]
                
                    print(f"{dt.now()} Gridpoint {i_grid+1}/{len_grid}: lat={latitudes[idx_lats[i_grid]]}"
                          + f" ({idx_lat}), lon={longitudes[idx_lons[i_grid]]} ({idx_lon})")

                    normalized_parents = utils.load_data_concat(
                        var_parents,
                        experiment,
                        DATA_FOLDER,
                        parents_idx_levs,
                        idx_lat,
                        idx_lon)
                    if data_parents is None:
                        data_parents = normalized_parents
                    else:
                        data_parents = np.concatenate((data_parents, normalized_parents), axis=1)
                # Format data
                data_parents = utils.format_data(data_parents, var_parents, parents_idx_levs)

                time_load_parents = datetime.timedelta(seconds = time.time() - t_before_load_parents)
                print(f"{dt.now()} All parents loaded. Time: {time_load_parents}"); print("")
            
            
            # Process data child
            print(f"Load {child.name}...")
            t_before_load_child = time.time()
            data_child = None
            for i_grid, (i_lat, i_lon) in enumerate(gridpoints):
                
                idx_lat = idx_lats[i_grid]
                idx_lon = idx_lons[i_grid]
            
                normalized_child = utils.load_data_concat(
                        [child],
                        experiment,
                        DATA_FOLDER,
                        [level],
                        idx_lat,
                        idx_lon)
                if data_child is None:
                    data_child = normalized_child
                else:
                    data_child = np.concatenate((data_child, normalized_child), axis=1)
            time_load_child = datetime.timedelta(seconds = time.time() - t_before_load_child)
            print(f"{dt.now()} Child loaded. Time: {time_load_child}"); print("")
            
            # Format data
            data_child = utils.format_data(data_child, [child], [level])
            data = [*data_parents, *data_child]
            
            # Find links
            print(f"{dt.now()} Finding links for {child.name} at level {level[-1]+1}")
            t_before_find_links = time.time()
            results = algorithm.find_links(data, pc_alphas, 0)
            time_links = datetime.timedelta(seconds = time.time() - t_before_find_links)
            total_time = datetime.timedelta(seconds = time.time() - t_start)
            print(f"{dt.now()} Links found. Time: {time_links}" + f" Total time so far: {total_time}")
            print()
            
            # Store causal links
            utils.save_results(results, results_filename, output_folder)


    total_time = datetime.timedelta(seconds = time.time() - t_start)
    print(f"{dt.now()} Execution complete. Total time: {total_time}")

In [8]:
if analysis == 'single':
    proc.single(
        gridpoints,
        var_parents,
        var_children,
        pc_alphas,
        parents_idx_levs,
        children_idx_levs,
        idx_lats,
        idx_lons,
        output_file_pattern,
        output_folder,
        overwrite
    )
elif analysis == 'concat':
    concat(
        gridpoints,
        var_parents,
        var_children,
        pc_alphas,
        parents_idx_levs,
        children_idx_levs,
        idx_lats,
        idx_lons,
        output_file_pattern,
        output_folder,
        overwrite
    )
else:
    print("Please specify a valid analysis, i.e., 'single' or 'concat'; stop script")
    exit()

2021-02-22 17:53:47.755003 Variable: fsns
2021-02-22 17:53:47.755476 Gridpoint 1/1: lat=4.185920533189154 (33), lon=120.9375 (43)
Load Parents (state fields)...
2021-02-22 17:54:09.268938 All parents loaded. Time: 0:00:21.513329
2021-02-22 17:54:09.952248 Finding links for fsns at level 1
2021-02-22 17:54:16.025105 Links found. Time: 0:00:06.072633 Total time so far: 0:00:28.270103
Saved results into "test_causal_links/fsns_1_lat-4_lon-120_002_train_1_year.obj"
2021-02-22 17:54:16.029378 All links in gridpoint found. Time: 0:00:28.274368. Total time so far: 0:00:28.274376

2021-02-22 17:54:16.029431 Execution complete. Total time: 0:00:28.274376
