In [2]:
from main import *
import time

from sklearn.metrics import f1_score, recall_score, confusion_matrix, ConfusionMatrixDisplay

from sklearn.model_selection import train_test_split, StratifiedKFold, GridSearchCV

from sktime.pipeline import make_pipeline

from sktime.dists_kernels.dtw import DtwDtaidistMultiv
from sktime.classification.distance_based import KNeighborsTimeSeriesClassifier

from sktime.transformations.panel.rocket import MiniRocketMultivariate
from sklearn.linear_model import LogisticRegression

#from sktime.classification.feature_based import FreshPRINCE
from sktime.classification.interval_based import DrCIF
#from sktime.classification.shapelet_based import ShapeletTransformClassifier
from sktime.classification.dictionary_based import TemporalDictionaryEnsemble
from sktime.classification.deep_learning import InceptionTimeClassifier
#from sktime.classification.hybrid import HIVECOTEV2

import json

In [2]:
machines = ["M01", "M02","M03"]
process_names = ["OP00","OP01","OP02","OP03","OP04","OP05","OP06","OP07","OP08","OP09","OP10","OP11","OP12","OP13","OP14"]
labels = ["good","bad"]
path_to_dataset = Path("./data/").absolute()

split_functions = [machine_split, time_split, op_split] 
splits = ["machine", "time", "operation"]

In [3]:
X_data = []
y_data = []

for process_name, machine, label in itertools.product(process_names, machines, labels):
    data_path = os.path.join(path_to_dataset, machine, process_name, label)
    data_list, data_label = data_loader_utils.load_tool_research_data(data_path, label=label, add_additional_label = True, verbose = False)
    #concatenating
    X_data.extend(data_list)
    y_data.extend(data_label)

In [4]:
X = [x.astype(np.float64) for x in X_data] # Ensure all examples have the same datatype
X = np.array([x[:4096, :3] for x in X]) # Select the first 4096 timesteps of all three dimensions for each row
y = np.array([0 if id.split("_")[-1] == "good" else 1 for id in y_data])

# Reshape X into a dataframe that is compatible with MiniRocket transform
axis = ["X-axis", "Y-axis", "Z-axis"] 
axisdict = {"X-axis": [], "Y-axis":[],  "Z-axis":[]}
for i, ax in enumerate(axis):
    for n in range(X.shape[0]):
        axisdict[ax].append(pd.Series(X[n][:,i]))

X_df = pd.DataFrame(axisdict)

a = [y.split("_") for y in y_data]
y_df = pd.DataFrame(a)
df = X_df.join(y_df).rename(columns = {0: "MC", 1: "MM", 2: "YY", 3: "OP", 4: "n", 5: "y"})
df["y"] = df["y"].apply(lambda x: 1 if x == "bad" else 0)

In [5]:
def get_cv_results(clf, param_grid, df = df, splits = splits, split_functions = split_functions):
    cv_results = {}
    gs_objects = {}
    for i, split in enumerate(splits):
        print(f"Processing the {split}-wise split...")
        splitter = split_functions[i]
        X_train, X_test, y_train, y_test = splitter(df)
        gs = GridSearchCV(clf, 
                          param_grid, 
                          scoring = "f1", 
                          n_jobs = -1, 
                          cv = StratifiedKFold(n_splits = 3)
                         )
        gs.fit(X_train, y_train)

        cv_results[split] = gs.cv_results_
        gs_objects[split] = gs
    return cv_results, gs_objects

In [None]:
import pickle 

with open('saved_dictionary.pkl', 'wb') as f:
    pickle.dump(dictionary, f)
        
with open('saved_dictionary.pkl', 'rb') as f:
    loaded_dict = pickle.load(f)

In [None]:
distance = DtwDtaidistMultiv(use_c = True)
knn = KNeighborsTimeSeriesClassifier(n_neighbors = 1, distance = distance, n_jobs = -1)

knn_f1 = []
knn_recall = []

for i, split in enumerate(splits):
    print(f"Processing {split}-wise split...")
    splitter = split_functions[i]
    
    X_train, X_test, y_train, y_test = splitter(df)
    knn.fit(X_train, y_train)
    y_preds = knn.predict(X_test)
    
    knn_f1.append(f1_score(y_test, y_preds))
    knn_recall.append(recall_score(y_test, y_preds))

In [None]:
trf = MiniRocketMultivariate(n_jobs = 1) 
clf = LogisticRegression(solver = "liblinear", n_jobs = 1)
        
MiniRocketLR = make_pipeline(trf, clf)

param_grid_MiniRocketLR = {
    "penalty" : ["l1", "l2"],
    'C': [0.01, 0.1, 1, 10, 100]
}

cv_results_MiniRocketLR, gs_objects_MiniRocketLR = get_cv_results(MiniRocketLR, param_grid_MiniRocketLR)

import pickle 
with open("cv_results/MiniRocketLR.pkl", "wb") as f:
    pickle.dump(cv_results_MiniRocketLR, f)

In [None]:
drcif = DrCIF(n_jobs = -1)

param_grid_drcif = {
    'n_estimators': [50, 100, 200],
    "att_subsample_size": [5, 10, 20],       
}

cv_results_drcif, gs_objects_drcif = get_cv_results(drcif, param_grid_drcif)

with open("cv_results/drdcif.pkl", "wb") as f:
    pickle.dump(cv_results_drcif, f)

In [None]:
tde = TemporalDictionaryEnsemble(n_jobs = -1)

param_grid_tde = {
    "n_parameter_samples" : [50, 250],
    "max_ensemble_size" : [5, 50],
    "randomly_selected_params" : [5, 50]
}

cv_results_tde, gs_objects_tde = get_cv_results(tde, param_grid_tde)

with open("cv_results/tde.pkl", "wb") as f: 
    pickle.dump(cv_results_tde, f)

In [None]:
itc = InceptionTimeClassifier(n_epochs = 50, batch_size = 16)

param_grid_itc = {
    "kernel_size" : [10, 40],
    "n_filters" : [6, 32],
}

cv_results_itc, gs_objects_itc = get_cv_results(itc, param_grid_itc)

with open("cv_results/itc.pkl", "wb") as f: 
    pickle.dump(cv_results_itc, f)