# Advanced Parameter Handling

> Additional utilities to handle complex types that are not always supported by workflow frameworks.

In [1]:
# default_exp data_handler

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
# export

import os
import sys
from dataclasses import dataclass
from importlib import import_module
from pathlib import Path
from typing import Any, Dict, List

import numpy as np
import pandas as pd
from nbdev.export import get_config

from sciflow.params import params_as_dict

In [4]:
# export

supported_parameters = (str, int, float)
supported_args = (Dict, List)
supported_conversion_args = (pd.Series, pd.DataFrame, np.ndarray, Path)
suppported_types = []
suppported_types.extend(supported_parameters)
suppported_types.extend(supported_args)
suppported_types.extend(supported_conversion_args)

In [5]:
# export


@dataclass
class ParamMeta:
    instance_type: type
    is_scalar: bool
    is_json_type: bool
    persist_type: str
    has_metaflow_param: bool
    has_sagemaker_param: bool

In [6]:
# export


def load_module(fully_qualified_module_name):
    root_path = str(get_config().path("root_path"))
    if not root_path in sys.path:
        sys.path.append(root_path)
    package = ".".join(fully_qualified_module_name.split(".")[:-1])
    fully_qualified_module_name.split(".")[-1]
    module = import_module(fully_qualified_module_name, package)
    return module

In [7]:
fully_qualified_module_name = "sciflow.test.test_data_handling"

In [8]:
root_path = str(get_config().path("root_path"))
if not root_path in sys.path:
    sys.path.append(root_path)
package = ".".join(fully_qualified_module_name.split(".")[:-1])
fully_qualified_module_name.split(".")[-1]

'test_data_handling'

In [9]:
package

'sciflow.test'

In [10]:
module = import_module(fully_qualified_module_name, package)

In [11]:
module

<module 'sciflow.test.test_data_handling' from '/home/sagemaker-user/git/sciflow/sciflow/test/test_data_handling.py'>

In [12]:
assert load_module("sciflow.test.test_data_handling").int_param == 3
assert load_module("sciflow.test.test_data_handling").float_param == 1.1
assert load_module("sciflow.test.test_clustering").traffic_percent == 1
assert load_module("sciflow.test.test_clustering_no_params").traffic_percent == 1
assert load_module("sciflow.test.test_export").some_param == "test"
assert load_module("sciflow.test.test_module").some_param == 2
assert type(load_module("sciflow.test.test_data_handling").series_param) == pd.Series
assert load_module("sciflow.utils").odbc_connect is not None

In [14]:
# export


def extract_param_meta(module_name: str, params: Dict[str, Any]):
    module = load_module(module_name)
    param_vals = {}
    for k in params.keys():
        param_vals[k] = getattr(module, k)
    param_meta = {}

    for key, val in param_vals.items():
        if any([isinstance(val, t) for t in supported_parameters]):
            param_meta[key] = ParamMeta(
                instance_type=type(val),
                is_scalar=True,
                is_json_type=False,
                persist_type="pickle",
                has_metaflow_param=True,
                has_sagemaker_param=True,
            )
        elif any([isinstance(val, t) for t in supported_args]):
            param_meta[key] = ParamMeta(
                instance_type=type(val),
                is_scalar=False,
                is_json_type=True,
                persist_type="pickle",
                has_metaflow_param=True,
                has_sagemaker_param=False,
            )
        elif any([isinstance(val, t) for t in supported_conversion_args]):
            if isinstance(val, np.ndarray):
                param_meta[key] = ParamMeta(
                    instance_type=type(val),
                    is_scalar=False,
                    is_json_type=False,
                    persist_type="numpy",
                    has_metaflow_param=False,
                    has_sagemaker_param=False,
                )
            elif isinstance(val, pd.Series) or isinstance(val, pd.DataFrame):
                param_meta[key] = ParamMeta(
                    instance_type=type(val),
                    is_scalar=False,
                    is_json_type=False,
                    persist_type="pandas",
                    has_metaflow_param=False,
                    has_sagemaker_param=False,
                )
            elif isinstance(val, Path):
                param_meta[key] = ParamMeta(
                    instance_type=type(val),
                    is_scalar=False,
                    is_json_type=False,
                    persist_type="pickle",
                    has_metaflow_param=True,
                    has_sagemaker_param=False,
                )
        else:
            param_meta[key] = ParamMeta(
                instance_type=type(val),
                is_scalar=False,
                is_json_type=False,
                persist_type="unsupported",
                has_metaflow_param=False,
                has_sagemaker_param=False,
            )
    return param_meta

In [22]:
nb_path = os.path.join(Path(".").resolve(), "test", "test_data_handling.ipynb")
params = params_as_dict(nb_path)

In [23]:
param_meta = extract_param_meta("sciflow.test.test_data_handling", params)

In [24]:
from pathlib import PosixPath

In [25]:
assert all(
    [
        param_meta["int_param"].instance_type == int,
        param_meta["float_param"].instance_type == float,
        param_meta["str_param"].instance_type == str,
        param_meta["input_path"].instance_type == PosixPath,
        param_meta["model_path"].instance_type == PosixPath,
        param_meta["dict_param"].instance_type == dict,
        param_meta["list_param"].instance_type == list,
        param_meta["series_param"].instance_type == pd.Series,
        param_meta["df_param"].instance_type == pd.DataFrame,
    ]
)

In [26]:
assert param_meta["int_param"].has_metaflow_param
assert param_meta["int_param"].has_sagemaker_param
assert param_meta["str_param"].has_metaflow_param
assert param_meta["str_param"].has_sagemaker_param
assert param_meta["float_param"].has_metaflow_param
assert param_meta["float_param"].has_sagemaker_param

In [29]:
assert param_meta["list_param"].has_metaflow_param
assert not param_meta["list_param"].has_sagemaker_param
assert param_meta["dict_param"].has_metaflow_param
assert not param_meta["dict_param"].has_sagemaker_param
assert param_meta["input_path"].has_metaflow_param
assert not param_meta["input_path"].has_sagemaker_param

In [19]:
nb_path = os.path.join(Path(".").resolve(), "test", "test_clustering.ipynb")
params = params_as_dict(nb_path)
param_meta = extract_param_meta("sciflow.test.test_clustering", params)

In [20]:
param_meta.keys()

dict_keys(['traffic_percent', 'workers', 'model_level', 'min_date'])