In [None]:
from main import *

from sklearn.metrics import f1_score, recall_score, confusion_matrix, ConfusionMatrixDisplay
from sklearn.model_selection import train_test_split, StratifiedKFold, GridSearchCV, ParameterGrid

from sklearn.pipeline import Pipeline
from sktime.transformations.panel.tsfresh import TSFreshFeatureExtractor
from sktime.classification.sklearn import RotationForest


from sktime.pipeline import make_pipeline
from sktime.transformations.panel.rocket import MiniRocketMultivariate
from sklearn.linear_model import LogisticRegression

from sktime.classification.interval_based import DrCIF
from sktime.classification.shapelet_based import ShapeletTransformClassifier
from sktime.classification.dictionary_based import TemporalDictionaryEnsemble
from sktime.classification.deep_learning import InceptionTimeClassifier

import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)

In [None]:
machines = ["M01", "M02","M03"]
process_names = ["OP00","OP01","OP02","OP03","OP04","OP05","OP06","OP07","OP08","OP09","OP10","OP11","OP12","OP13","OP14"]
labels = ["good","bad"]
path_to_dataset = Path("./data/").absolute()

In [None]:
X_data = []
y_data = []

for process_name, machine, label in itertools.product(process_names, machines, labels):
    data_path = os.path.join(path_to_dataset, machine, process_name, label)
    data_list, data_label = data_loader_utils.load_tool_research_data(data_path, label=label, add_additional_label = True, verbose = False)
    X_data.extend(data_list)
    y_data.extend(data_label)

df = get_df(X_data, y_data)

In [None]:
rotf = RotationForest(n_jobs = -1) 

param_grid_rotf = {
    "n_estimators": [50, 200]
    }

results_rotf, objects_rotf = get_uni_cv_results(rotf, param_grid_rotf, df)

with open("cv_results/rotf.pkl", "wb") as f: 
    pickle.dump(results_rotf, f)

In [None]:
fp = Pipeline([
    (
        "transform",
        TSFreshFeatureExtractor(
            default_fc_parameters="efficient",
            show_warnings = False),
    ),
    ("classifier", RotationForest ()),
])

param_grid_fp = {
    "classifier__n_estimators": [50, 200]
    }

cv_results_fp, gs_objects_fp = get_cv_results(fp, param_grid_fp, df)

with open("cv_results/fp.pkl", "wb") as f:
    pickle.dump(cv_results_fp, f)

In [None]:
drcif = DrCIF(time_limit_in_minutes = 10, n_jobs = -1)

param_grid_drcif = {
    "att_subsample_size": [5, 10, 20],       
}

cv_results_drcif, gs_objects_drcif = get_cv_results(drcif, param_grid_drcif, df)

with open("cv_results/drcif.pkl", "wb") as f:
    pickle.dump(cv_results_drcif, f)

In [None]:
trf = MiniRocketMultivariate(n_jobs = 1) 
clf = LogisticRegression(solver = "liblinear", n_jobs = 1)
        
MiniRocketLR = make_pipeline(trf, clf)

param_grid_MiniRocketLR = {
    "num_kernels" : [1000, 10000],
    'C': [0.01, 0.1, 1, 10, 100]
}

cv_results_mr, gs_objects_mr = get_cv_results(MiniRocketLR, param_grid_MiniRocketLR, df)

with open("cv_results/mr.pkl", "wb") as f:
    pickle.dump(cv_results_mr, f)

In [None]:
tde = TemporalDictionaryEnsemble(time_limit_in_minutes = 1, n_jobs = -1)

param_grid_tde = {
    "n_parameter_samples" : [50, 250],
    "max_ensemble_size" : [5, 50],
    "randomly_selected_params" : [5, 50]
}

cv_results_tde, gs_objects_tde = get_cv_results(tde, param_grid_tde, df)

with open("cv_results/tde.pkl", "wb") as f: 
    pickle.dump(cv_results_tde, f)

In [None]:
itc = InceptionTimeClassifier(n_epochs = 50, batch_size = 16)

param_grid_itc = {
    "kernel_size" : [10, 40],
    "n_filters" : [6, 32],
}

cv_results_itc, gs_objects_itc = get_cv_results(itc, param_grid_itc, df, n_jobs = 1)

with open("cv_results/itc.pkl", "wb") as f: 
    pickle.dump(cv_results_itc, f)

In [None]:
mr_mean_ranks, best_mr = extract_mean_rank(cv_results_mr)
drcif_mean_ranks, best_drcif = extract_mean_rank(cv_results_drcif)
itc_mean_ranks, best_itc = extract_mean_rank(cv_results_itc)
tde_mean_ranks, best_tde = extract_mean_rank(cv_results_tde)

In [None]:
rotf = RotationForest(n_estimators = 200, n_jobs = -1) 

In [None]:
fp = Pipeline([
    (
        "transform",
        TSFreshFeatureExtractor(
            default_fc_parameters="efficient",
            show_warnings = False),
    ),
    (
        "classifier", 
         RotationForest(
     )
    ),
])

In [None]:
trf = MiniRocketMultivariate(n_jobs = 1) 
clf = LogisticRegression(C = 0.01, penalty = "l2", solver = "liblinear", n_jobs = 1)
MiniRocketLR = make_pipeline(trf, clf)

In [None]:
drcif = DrCIF(att_subsample_size = 5, time_limit_in_minutes = 10, n_jobs = -1)

In [None]:
tde = TemporalDictionaryEnsemble(max_ensemble_size = 50, n_parameter_samples = 50, randomly_selected_params = 50, time_limit_in_minutes = 1, n_jobs = -1)

In [None]:
itc = InceptionTimeClassifier(kernel_size = 10, n_filters = 6,n_epochs = 50, batch_size = 16)

In [None]:
test_mr, mr_objects = get_test_results(MiniRocketLR)

In [None]:
test_drcif, drcif_objects = get_test_results(drcif)

In [None]:
test_tde, tde_objects = get_test_results(tde)

In [None]:
test_itc, itc_objects = get_test_results(itc)

In [None]:
results = pd.DataFrame(
    (
        test_mr["model_f1"], 
                        test_drcif["model_f1"], 
                        test_tde["model_f1"], 
                        test_itc["model_f1"], 
                        test_fp["model_f1"]
    )
)
results["model"] = ["MR", "DrCIF", "TDE", "ITC"]
results = results.set_index("model")
results = results.rename(columns = {0: "machine", 1 : "time", 2: "operation"})
results["avg"] = results.mean(axis = 1)