In [1]:
from main import *

from sklearn.model_selection import train_test_split, StratifiedKFold

from sktime.transformations.panel.rocket import MiniRocketMultivariate
from sklearn.linear_model import LogisticRegression

from sklearn.metrics import f1_score, recall_score, confusion_matrix, ConfusionMatrixDisplay

In [2]:
machines = ["M01", "M02","M03"]
process_names = ["OP00","OP01","OP02","OP03","OP04","OP05","OP06","OP07","OP08","OP09","OP10","OP11","OP12","OP13","OP14"]
labels = ["good","bad"]
path_to_dataset = Path("./data/").absolute()

In [3]:
X_data = []
y_data = []

for process_name, machine, label in itertools.product(process_names, machines, labels):
    data_path = os.path.join(path_to_dataset, machine, process_name, label)
    data_list, data_label = data_loader_utils.load_tool_research_data(data_path, label=label, add_additional_label = True, verbose = False)
    #concatenating
    X_data.extend(data_list)
    y_data.extend(data_label)

In [4]:
X = [x.astype(np.float64) for x in X_data] # Ensure all examples have the same datatype
X = np.array([x[:4096, :3] for x in X]) # Select the first 4096 timesteps of all three dimensions for each row
y = np.array([0 if id.split("_")[-1] == "good" else 1 for id in y_data])

# Reshape X into a dataframe that is compatible with MiniRocket transform
axis = ["X-axis", "Y-axis", "Z-axis"] 
axisdict = {"X-axis": [], "Y-axis":[],  "Z-axis":[]}
for i, ax in enumerate(axis):
    for n in range(X.shape[0]):
        axisdict[ax].append(pd.Series(X[n][:,i]))

X_df = pd.DataFrame(axisdict)

a = [y.split("_") for y in y_data]
y_df = pd.DataFrame(a)
df = X_df.join(y_df).rename(columns = {0: "MC", 1: "MM", 2: "YY", 3: "OP", 4: "n", 5: "y"})
df["y"] = df["y"].apply(lambda x: 1 if x == "bad" else 0)

In [None]:
from sklearn.model_selection import GridSearchCV
from sktime.classification.distance_based import KNeighborsTimeSeriesClassifier

X_train, X_test, y_train, y_test = machine_split(df)


knn = KNeighborsTimeSeriesClassifier(n_neighbors = 1, distance = "dtw", n_jobs = -1)
knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)
f1_score(y_pred, y_test)

#param_grid = {"n_neighbors": [1, 5], "distance": ["euclidean", "dtw"]}
#parameter_tuning_method = GridSearchCV(knn, param_grid, cv=StratifiedKFold(n_splits=4))

#parameter_tuning_method.fit(X_train, y_train)
#y_pred = parameter_tuning_method.predict(X_test)