In [None]:
%reload_ext autoreload
%autoreload 2

import sys
import os
sys.path.append(os.path.abspath(os.path.join(os.path.dirname('__file__'), '..')))

import time
from pinard.presets.ref_models import decon, bacon, customizable_bacon, bacon_classification
from pinard.presets.preprocessings import decon_set, bacon_set
from pinard.data_splitters import KennardStoneSplitter
from pinard.transformations import StandardNormalVariate as SNV, SavitzkyGolay as SG, Gaussian as GS, Derivate as  Dv
from pinard.transformations import Rotate_Translate as RT, Spline_X_Simplification as SXS, Random_X_Operation as RXO
from pinard.transformations import CropTransformer
from pinard.core.runner import ExperimentRunner
from pinard.core.config import Config

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import KFold, RepeatedKFold, StratifiedKFold, RepeatedStratifiedKFold, ShuffleSplit, GroupKFold, StratifiedShuffleSplit, BaseCrossValidator, TimeSeriesSplit
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler

model_sklearn = {
    "class": "sklearn.cross_decomposition.PLSRegression",
    "model_params": {
        "n_components": 21,
    }
}
    
finetune_pls_experiment = {
    "action": "finetune",
    "finetune_params": {
        'model_params': {
            'n_components': ('int', 5, 20),
        },
        'training_params': {},
        'tuner': 'sklearn'
    }
}

bacon_train = {"action": "train", "training_params": {"epochs": 2000, "batch_size": 500, "patience": 200, "cyclic_lr": True, "base_lr": 1e-6, "max_lr": 1e-3, "step_size": 400}}
bacon_train_short = {"action": "train", "training_params": {"epochs": 10, "batch_size": 500, "patience": 20, "cyclic_lr": True, "base_lr": 1e-6, "max_lr": 1e-3, "step_size": 40}}
bacon_finetune = {
    "action": "finetune",
    "finetune_params": {
        "n_trials": 5,
        "model_params": {
            "filters_1": [8, 16, 32, 64], 
            "filters_2": [8, 16, 32, 64], 
            "filters_3": [8, 16, 32, 64]
        }
    },
    "training_params": {
        "epochs": 10,
    }
}

full_bacon_finetune = {
    "action": "finetune",
    "training_params": {
        "epochs": 500,
        "patience": 100,
    },
    "finetune_params": {
        "nb_trials": 150,
        "model_params": {
            'spatial_dropout': (float, 0.01, 0.5),
            'filters1': [4, 8, 16, 32, 64, 128, 256],
            'kernel_size1': [3, 5, 7, 9, 11, 13, 15],
            # 'strides1': [1, 2, 3, 4, 5],
            # 'activation1': ['relu', 'selu', 'elu', 'swish'],
            'dropout_rate': (float, 0.01, 0.5),
            'filters2': [4, 8, 16, 32, 64, 128, 256],
            # 'kernel_size2': [3, 5, 7, 9, 11, 13, 15],
            # 'strides2': [1, 2, 3, 4, 5],
            'activation2': ['relu', 'selu', 'elu', 'swish'],
            'normalization_method1': ['BatchNormalization', 'LayerNormalization'],
            'filters3': [4, 8, 16, 32, 64, 128, 256],
            # 'kernel_size3': [3, 5, 7, 9, 11, 13, 15],
            # 'strides3': [1, 2, 3, 4, 5],
            'activation3': ['relu', 'selu', 'elu', 'swish'],
            # 'normalization_method2': ['BatchNormalization', 'LayerNormalization'],
            # 'dense_units': [4, 8, 16, 32, 64, 128, 256],
            'dense_activation': ['relu', 'selu', 'elu', 'swish'],
        },
        # "training_params": {
        #     "batch_size": [32, 64, 128, 256, 512],
        #     "cyclic_lr": [True, False],
        #     "base_lr": (float, 1e-6, 1e-2),
        #     "max_lr": (float, 1e-3, 1e-1),
        #     "step_size": (int, 500, 5000),
        # },
    }
}

x_pipeline_PLS = [
    RobustScaler(),
    # {"samples": [None, SXS, RXO]},
    # {"split": RepeatedKFold(n_splits=3, n_repeats=1)},
    {"features": [None, GS(2,1), SG, SNV, Dv, [GS, SNV], [GS, GS],[GS, SG],[SG, SNV], [GS, Dv], [SG, Dv]]},
    MinMaxScaler()
]
                    


x_pipeline_full = [
    RobustScaler(),
    {"samples": [None, None,None,None,SXS, RXO]},
    {"split": RepeatedKFold(n_splits=3, n_repeats=1)},
    {"features": [None, GS(2,1), SG, SNV, Dv, [GS, SNV], [GS, GS],[GS, SG],[SG, SNV], [GS, Dv], [SG, Dv]]},
    MinMaxScaler()
]

x_pipeline_full2 = [
    RobustScaler(),
    {"samples": [None, None,None,None,SXS, RXO]},
    {"split": RepeatedKFold(n_splits=3, n_repeats=1)},
    {"features": [None, GS(2,1), SG, SNV, Dv, [GS, SNV], [GS, GS],[GS, SG],[SG, SNV], [GS, Dv], [SG, Dv]]},
    MinMaxScaler()
]

x_pipeline = [
    RobustScaler(), 
    # {"samples": [None, SXS]}, 
    # {"split": RepeatedKFold(n_splits=3, n_repeats=1)}, 
    {"features": [None, [GS(), SNV()], SG(), GS()]}, 
    # {"features": [None, GS]}, 
    # {"features": [None, GS, SG, SNV, Dv, [GS, SNV], [GS, GS],[GS, SG],[SG, SNV], [GS, Dv], [SG, Dv]]},
    # {"features": [None, SG, GS, SNV, [SG, SNV], [GS, SNV], [SG, GS]]}, 
    # bacon_set(),
    MinMaxScaler()
]
bacon_finetune_classif = {
    "action": "finetune",
    "task": "classification",
    "finetune_params": {
        "n_trials": 5,
        "model_params": {
            "filters_1": [8, 16, 32, 64], 
            "filters_2": [8, 16, 32, 64], 
            "filters_3": [8, 16, 32, 64]
        }
    },
    "training_params": {
        "epochs": 5,
    }
}

finetune_randomForestclassifier = {
    "action": "finetune",
    "task": "classification",
    "finetune_params": {
        'model_params': {
            'n_estimators': ('int', 5, 20),
        },
        'training_params': {},
        'tuner': 'sklearn'
    }
}

seed = 123459456

datasets = "sample_data/mock_data3_classif"
y_pipeline = MinMaxScaler()
# processing only
config2 = Config("sample_data/mock_data3", x_pipeline_full, y_pipeline, None, None, seed)
## TRAINING
# regression
config1 = Config("sample_data/mock_data2", x_pipeline, y_pipeline, bacon, bacon_train_short, seed)
config4 = Config("sample_data/mock_data3", x_pipeline_PLS, y_pipeline, model_sklearn, None, seed)
# classification
config3 = Config("sample_data/mock_data3_classif", x_pipeline, None, bacon_classification, {"task":"classification", "training_params":{"epochs":5}}, seed*2)
config11 = Config("sample_data/mock_data3_binary", x_pipeline, None, bacon_classification, {"task":"classification", "training_params":{"epochs":5}}, seed*2)
config5 = Config("sample_data/mock_data3_classif", x_pipeline, None, RandomForestClassifier, {"task":"classification"}, seed*2)
config10 = Config("sample_data/mock_data3_binary", x_pipeline, None, RandomForestClassifier, {"task":"classification"}, seed*2)
## FINETUNING
# regression
config6 = Config("sample_data/mock_data3", x_pipeline, y_pipeline, bacon, bacon_finetune, seed)
config7 = Config("sample_data/mock_data3", x_pipeline, y_pipeline, model_sklearn, finetune_pls_experiment, seed)
# classification
config8 = Config("sample_data/mock_data3_classif", x_pipeline, None, bacon_classification, bacon_finetune_classif, seed*2)
config9 = Config("sample_data/mock_data3_classif", x_pipeline, None, RandomForestClassifier, finetune_randomForestclassifier, seed*2)


# configs = [config1, config2, config3, config4, config5, config6, config7, config8, config9]
configs = [config3, config5]

start = time.time()
runner = ExperimentRunner(configs, resume_mode="restart")
dataset, model_manager = runner.run()
end = time.time()
print(f"Time elapsed: {end-start} seconds")


# print(dataset)
# print(dataset.raw_x_train.shape)
# print(dataset.to_str("union"))


# # chart all sample transformations
# sample_0 = dataset.x_train[0][0]
# print(sample_0.shape)
# import matplotlib.pyplot as plt
# import numpy as np
# fig, axs = plt.subplots(5, 5, figsize=(15, 5))
# for i, ax in enumerate(axs.flat):
#     ax.plot(sample_0[i])
#     ax.set_title(f"Sample {i}")
# plt.show()


# class AddVal(TransformerMixin, BaseEstimator):
#     def __init__(self, val):
#         self.val = val

#     def fit(self, X, y=None):
#         return self

#     def transform(self, X):
#         return X + self.val

In [None]:
import pinard
print(pinard.__version__)

In [None]:
import ace_tools as tools
import pandas as pd

# Sklearn metrics list
sklearn_metrics = [
    "explained_variance", "r2", "max_error", "matthews_corrcoef",
    "neg_median_absolute_error", "neg_mean_absolute_error",
    "neg_mean_absolute_percentage_error", "neg_mean_squared_error",
    "neg_mean_squared_log_error", "neg_root_mean_squared_error",
    "neg_root_mean_squared_log_error", "neg_mean_poisson_deviance",
    "neg_mean_gamma_deviance", "d2_absolute_error_score", "accuracy",
    "top_k_accuracy", "roc_auc", "roc_auc_ovr", "roc_auc_ovo",
    "roc_auc_ovr_weighted", "roc_auc_ovo_weighted", "balanced_accuracy",
    "average_precision", "neg_log_loss", "neg_brier_score",
    "positive_likelihood_ratio", "neg_negative_likelihood_ratio",
    "adjusted_rand_score", "rand_score", "homogeneity_score",
    "completeness_score", "v_measure_score", "mutual_info_score",
    "adjusted_mutual_info_score", "normalized_mutual_info_score",
    "fowlkes_mallows_score"
]

# Tensorflow/keras metrics list
tensorflow_metrics = [
    "MeanSquaredError", "RootMeanSquaredError", "MeanAbsoluteError",
    "MeanAbsolutePercentageError", "MeanSquaredLogarithmicError",
    "CosineSimilarity", "LogCoshError", "R2Score", "AUC",
    "FalseNegatives", "FalsePositives", "Precision", "PrecisionAtRecall",
    "Recall", "RecallAtPrecision", "SensitivityAtSpecificity",
    "SpecificityAtSensitivity", "TrueNegatives", "TruePositives",
    "Hinge", "SquaredHinge", "CategoricalHinge", "KLDivergence",
    "Poisson", "BinaryCrossentropy", "CategoricalCrossentropy",
    "SparseCategoricalCrossentropy", "Accuracy", "BinaryAccuracy",
    "CategoricalAccuracy", "SparseCategoricalAccuracy",
    "TopKCategoricalAccuracy", "SparseTopKCategoricalAccuracy",
    "F1Score", "FBetaScore", "IoU", "BinaryIoU", "MeanIoU",
    "OneHotIoU", "OneHotMeanIoU"
]

# Metric name mapping: (tensorflow_name, sklearn_name, abbreviation, method_name)
# Initialize with common names
metrics_mapping = [
    ("MeanSquaredError", "neg_mean_squared_error", "mse", "Mean Squared Error"),
    ("RootMeanSquaredError", "neg_root_mean_squared_error", "rmse", "Root Mean Squared Error"),
    ("MeanAbsoluteError", "neg_mean_absolute_error", "mae", "Mean Absolute Error"),
    ("MeanAbsolutePercentageError", "neg_mean_absolute_percentage_error", "mape", "Mean Absolute Percentage Error"),
    ("MeanSquaredLogarithmicError", "neg_mean_squared_log_error", "msle", "Mean Squared Logarithmic Error"),
    ("CosineSimilarity", None, "cos_sim", "Cosine Similarity"),
    ("LogCoshError", None, "log_cosh", "Log Cosh Error"),
    ("R2Score", "r2", "r2", "R2 Score"),
    ("AUC", "roc_auc", "auc", "Area Under the Curve"),
    ("Precision", None, "prec", "Precision"),
    ("Recall", None, "recall", "Recall"),
    ("Accuracy", "accuracy", "acc", "Accuracy"),
    ("TopKCategoricalAccuracy", "top_k_accuracy", "top_k_acc", "Top K Categorical Accuracy"),
    ("BinaryCrossentropy", None, "bin_crossentropy", "Binary Crossentropy"),
    ("CategoricalCrossentropy", None, "cat_crossentropy", "Categorical Crossentropy"),
    ("SparseCategoricalCrossentropy", None, "sparse_cat_crossentropy", "Sparse Categorical Crossentropy"),
    ("F1Score", None, "f1", "F1 Score"),
    ("IoU", None, "iou", "Intersection over Union")
]

# Add remaining metrics with None in the missing columns
for metric in sklearn_metrics:
    if not any(metric in row for row in metrics_mapping):
        metrics_mapping.append((None, metric, None, None))

for metric in tensorflow_metrics:
    if not any(metric in row for row in metrics_mapping):
        metrics_mapping.append((metric, None, None, None))

# Create dataframe
df = pd.DataFrame(metrics_mapping, columns=["tensorflow_name", "sklearn_name", "abbreviation", "method_name"])

# Display the dataframe to the user
tools.display_dataframe_to_user(name="Metric Comparison", dataframe=df)