In [None]:
#before running this notebook, you'll need to download the NSIDC data and CESM1 large ensemble data
#using the scripts and instructions in our quickstart guide
#the preprocessing script will now map 

import scripts.preprocessing.preprocess 
import argparse
#There's an issue with 3 files from the NSIDC v4 that will be quickly identified by the script below, 
#but to save processing time, it's best to delete files: 
#[seaice_conc_monthly_nh_198407_n07_v04r00.nc, seaice_conc_monthly_nh_198801_f08_v04r00.nc, seaice_conc_monthly_nh_198712_f08_v04r00.nc]
#we're not sure why but the values for all points in these regions were set to 0. By removing them from the preprocessing step, 
#the code will automatically interpolate based on the previous and next time steps.

In [None]:
#using argparse to remain consistent with command line script in scripts.preprocessing.preprocess.py
parser = argparse.ArgumentParser()
parser.add_argument("input_directory", type=str, default="",
        help="Directory containing data files")
parser.add_argument("reference_gridfile", type=str,
                        help="Filepath to grid description textfile")
parser.add_argument("output_directory", type=str, default="",
        help="Directory to store serialized output")
parser.add_argument("--recursive", "-r", action="store_true",
        help="If specified, recursively process directories "
                         "within the input directory")
parser.add_argument("--pattern", type=str, default="",
        help="Optionally only include directories "
                             "with a pattern match")
#load NASA estimates of NSIDC data
args = parser.parse_args(["/local/simdata/NSIDC/monthly/north_v4/","configs/grids/polar_stereographic_north_grid.txt","/local/preprocessed/"])
scripts.preprocessing.preprocess.main(args)
#load SST observational data
args = parser.parse_args(["/local/simdata/NSIDC/monthly/ORAS5/sst/","configs/grids/polar_stereographic_north_grid.txt","/local/preprocessed/"])
scripts.preprocessing.preprocess.main(args)
#currently load only one of the CESM1 ensemble members (or averages)
preprocess_args = parser.parse_args(["/local/simdata/CESM1/monthly/002_files/SST/","configs/grids/polar_stereographic_north_grid.txt","/local/preprocessed/"])
scripts.preprocessing.preprocess.main(preprocess_args)

#The intent of this step is that we are able to add in additional loading methods for new variables and new datasets
#but all downstream analysis is independent of this and koopman models along with prediction analytics can be run 
#for any combination of 1d or 2d spatial datasets with monthly timesteps (soon any arbitrary time step)

In [None]:
#To remain consistent between simulation and observational data, we create a mask to make sure 
#that we skip spatial regions in the simulation where there is no data from observation
#you will only need to do this once per analysis regardless of the number of variables or files you wish to process 
#(assuming your analysis has a consistent spatial grid of interest)
import scripts.preprocessing.create_mask

#input_filepath should point to a single target .nc file with the grid of interest (use the first NSIDC file from the analysis above)
#variable describes the variable of interest for the mask. (use "cdr_seaice_conc_monthly" if NSIDC v4 file is your input)
#minimum_value and maximum value: any grid points where variable falls outside of this range for the target variable are masked
#output_filepath: location to write out the mask file.  You should make sure your configs/config.yml file is updated with this location
scripts.preprocessing.create_mask.compute_mask(input_filepath='/local/simdata/NSIDC/monthly/north_v4/seaice_conc_monthly_nh_197903_n07_v04r00.nc',
                                              variable="cdr_seaice_conc_monthly",
                                              minimum_value=0,
                                              maximum_value=100,
                                              output_filepath="/local/preprocessed/north_nsidc_stereographic_mask.pkl")
#update the config file to point to this mask file

In [None]:
#now that you have saved ClimateData objects for all data you wish to model/compare against
#You can train a koopman model on the dataset and time window of interest.
import scripts.training.train
import argparse
#You'll need to modify the default configs/configuration file to fit your data
#descriptions of all variables can be found there. (most important is to modify the serialized_dat_map)

# cli args                                                                                                                         
parser = argparse.ArgumentParser()
parser.add_argument("config", type=str, help="Path to configuration file")
#args = parser.parse_args(["configs/example_config.yml"])
args = parser.parse_args(["configs/planer_test.yaml"])
scripts.training.train.main(args)

In [None]:
#the above will generate a fully trained koopman model along with some diagnostic plots saved to the output_dir
#you can then run the prediction analysis to see what the koopman predictions look like
import scripts.prediction.predict
import os, argparse
#using argparse to remain consistent with command line script in scripts.preprocessing.preprocess.py                                                                                                                      
parser = argparse.ArgumentParser()
parser.add_argument("config", type=str, help="Path to configuration file")
parser.add_argument("model", type=str, help="Path to serialized model")
parser.add_argument("end_date", type=int, help="YYYYMMDD (e.g., 20101201")
parser.add_argument("output_directory", type=str,
                    help="Where to output plots")
parser.add_argument("--upper_mode_bound", type=int, default=-1,
                    help="""Prediction will use modes 0 to this upperbound.                                                        
                    If not specified, all modes will be used.""")
args = parser.parse_args(["configs/planer_test.yaml","/local/results_gallery/NSIDC_merged+sst_POLAR_19800101to20091201/koopman_model.pkl","20201201","/local/predictions/"])

# verify                                                                                                                           
assert os.path.exists(args.config), \
    f"{args.config} not found!"
assert os.path.exists(args.model), \
    f"{args.model} not found!"
os.makedirs(args.output_directory, exist_ok=True)
scripts.prediction.predict.main(args)


In [None]:
#robustness analysis can similarly be performed, but works better using the command line utilities