In [2]:
from main import *


# StratifiedCrossValidation
from sklearn.model_selection import train_test_split, StratifiedKFold

from sktime.transformations.panel.rocket import MiniRocketMultivariate
from sklearn.linear_model import LogisticRegression

from sklearn.metrics import f1_score, recall_score, confusion_matrix, ConfusionMatrixDisplay

In [3]:
machines = ["M01", "M02","M03"]
process_names = ["OP00","OP01","OP02","OP03","OP04","OP05","OP06","OP07","OP08","OP09","OP10","OP11","OP12","OP13","OP14"]
labels = ["good","bad"]
path_to_dataset = Path("./data/").absolute()

In [4]:
X_data = []
y_data = []

for process_name, machine, label in itertools.product(process_names, machines, labels):
    data_path = os.path.join(path_to_dataset, machine, process_name, label)
    data_list, data_label = data_loader_utils.load_tool_research_data(data_path, label=label, add_additional_label = True, verbose = False)
    #concatenating
    X_data.extend(data_list)
    y_data.extend(data_label)

In [5]:
X = [x.astype(np.float64) for x in X_data] # Ensure all examples have the same datatype
X = np.array([x[:4096, :3] for x in X]) # Select the first 4096 timesteps of all three dimensions for each row
y = np.array([0 if id.split("_")[-1] == "good" else 1 for id in y_data])

# Reshape X into a dataframe that is compatible with MiniRocket transform
axis = ["X-axis", "Y-axis", "Z-axis"] 
axisdict = {"X-axis": [], "Y-axis":[],  "Z-axis":[]}
for i, ax in enumerate(axis):
    for n in range(X.shape[0]):
        axisdict[ax].append(pd.Series(X[n][:,i]))

X_df = pd.DataFrame(axisdict)

In [6]:
a = [y.split("_") for y in y_data]
y_df = pd.DataFrame(a)
df = X_df.join(y_df).rename(columns = {0: "MC", 1: "MM", 2: "YY", 3: "OP", 4: "n", 5: "y"})
df["y"] = df["y"].apply(lambda x: 1 if x == "bad" else 0)

In [10]:
M01 = df[df["MC"] == "M01"]
X_M01, y_M01 = M01.iloc[:,0:3], M01.iloc[:,-1]

M02 = df[df["MC"] == "M02"]
X_M02, y_M02 = M02.iloc[:,0:3], M02.iloc[:,-1]

M03 = df[df["MC"] == "M03"]
X_M03, y_M03 = M03.iloc[:,0:3], M03.iloc[:,-1]


F1_machine = []
Recall_machine = []

for i in range(3):
    X_M01_train, X_M01_test, y_M01_train, y_M01_test = train_test_split(X_M01, y_M01, test_size = 0.9, stratify = y_M01)
    
    X_M02_train, X_M02_test, y_M02_train, y_M02_test = train_test_split(X_M02, y_M02, test_size = 0.9, stratify = y_M02)
    
    X_train = pd.concat((X_M01_train, X_M02_train))
    y_train = pd.concat((y_M01_train, y_M02_train))
    
    X_test = pd.concat((X_M01_test, X_M02_test, X_M03))
    y_test = pd.concat((y_M01_test, y_M02_test, y_M03))
    
    trf = MiniRocketMultivariate(n_jobs = -1) 
    trf.fit(X_train)
    X_train_trf = trf.transform(X_train)
    X_test_trf = trf.transform(X_test) 
    
    clf = LogisticRegression()
    
    clf.fit(X_train_trf, y_train)
    y_pred = clf.predict(X_test_trf)

    F1_machine.append(f1_score(y_test, y_pred))
    Recall_machine.append(recall_score(y_test, y_pred))

In [11]:
Feb_2019 = df[(df["MM"] == "Feb") & (df["YY"] == "2019")]
Aug_2019 = df[(df["MM"] == "Aug") & (df["YY"] == "2019")]
Feb_2020 = df[(df["MM"] == "Feb") & (df["YY"] == "2020")]
Aug_2020 = df[(df["MM"] == "Aug") & (df["YY"] == "2020")]
Feb_2021 = df[(df["MM"] == "Feb") & (df["YY"] == "2021")]
Aug_2021 = df[(df["MM"] == "Aug") & (df["YY"] == "2021")]

X_Feb_2019, y_Feb_2019 = Feb_2019.iloc[:,0:3], Feb_2019.iloc[:,-1]
X_Aug_2019, y_Aug_2019 = Aug_2019.iloc[:,0:3], Aug_2019.iloc[:,-1]
X_Feb_2020, y_Feb_2020 = Feb_2020.iloc[:,0:3], Feb_2020.iloc[:,-1]
X_Aug_2020, y_Aug_2020 = Aug_2020.iloc[:,0:3], Aug_2020.iloc[:,-1]
X_Feb_2021, y_Feb_2021 = Feb_2021.iloc[:,0:3], Feb_2021.iloc[:,-1]
X_Aug_2021, y_Aug_2021 = Aug_2021.iloc[:,0:3], Aug_2021.iloc[:,-1]

F1_time = []
Recall_time = []

for i in range(3):
    X_Feb_2019_train, X_Feb_2019_test, y_Feb_2019_train, y_Feb_2019_test = train_test_split(X_Feb_2019, y_Feb_2019, test_size = 0.9, stratify = y_Feb_2019)
        
    X_Aug_2019_train, X_Aug_2019_test, y_Aug_2019_train, y_Aug_2019_test = train_test_split(X_Aug_2019, y_Aug_2019, test_size = 0.9, stratify = y_Aug_2019)

    X_Feb_2020_train, X_Feb_2020_test, y_Feb_2020_train, y_Feb_2020_test = train_test_split(X_Feb_2020, y_Feb_2020, test_size = 0.9, stratify = y_Feb_2020)
    
    X_Feb_2021_train, X_Feb_2021_test, y_Feb_2021_train, y_Feb_2021_test = train_test_split(X_Feb_2021, y_Feb_2021, test_size = 0.9, stratify = y_Feb_2021)

    X_train = pd.concat((X_Feb_2019_train, X_Aug_2019_train, X_Feb_2020_train, X_Feb_2021_train))
    X_test = pd.concat((X_Feb_2019_test, X_Aug_2019_test, X_Feb_2020_test, X_Aug_2020, X_Feb_2021_test, X_Aug_2021))
    
    y_train = pd.concat((y_Feb_2019_train, y_Aug_2019_train, y_Feb_2020_train, y_Feb_2021_train))
    y_test = pd.concat((y_Feb_2019_test, y_Aug_2019_test, y_Feb_2020_test, y_Aug_2020, y_Feb_2021_test, y_Aug_2021))
    
    trf = MiniRocketMultivariate(n_jobs = -1) 
    trf.fit(X_train)
    X_train_trf = trf.transform(X_train)
    X_test_trf = trf.transform(X_test) 
    
    clf = LogisticRegression()
    
    clf.fit(X_train_trf, y_train)
    y_pred = clf.predict(X_test_trf)
    
    F1_time.append(f1_score(y_test, y_pred))
    Recall_time.append(recall_score(y_test, y_pred))


In [12]:
OP07 = df[df["OP"] == "OP07"]
OP01 = df[df["OP"] == "OP01"]
OP02 = df[df["OP"] == "OP02"]
OP10 = df[df["OP"] == "OP10"]
OP04 = df[df["OP"] == "OP04"]
OP = df[~df["OP"].isin(["OP07", "OP01", "OP02", "OP10", "OP04"])]

X_OP07, y_OP07 = OP07.iloc[:,0:3], OP07.iloc[:,-1]
X_OP01, y_OP01 = OP01.iloc[:,0:3], OP01.iloc[:,-1]
X_OP02, y_OP02 = OP02.iloc[:,0:3], OP02.iloc[:,-1]
X_OP10, y_OP10 = OP10.iloc[:,0:3], OP10.iloc[:,-1]
X_OP04, y_OP04 = OP04.iloc[:,0:3], OP04.iloc[:,-1]
X_OP, y_OP = OP.iloc[:,0:3], OP.iloc[:,-1]

F1_OP = []
Recall_OP = []

for i in range(3):
    X_OP07_train, X_OP07_test, y_OP07_train, y_OP07_test = train_test_split(X_OP07, y_OP07, test_size = 0.8, stratify = y_OP07)
    
    X_OP01_train, X_OP01_test, y_OP01_train, y_OP01_test = train_test_split(X_OP01, y_OP01, test_size = 0.8, stratify = y_OP01)
    
    X_OP02_train, X_OP02_test, y_OP02_train, y_OP02_test = train_test_split(X_OP02, y_OP02, test_size = 0.8, stratify = y_OP02)
    
    X_OP10_train, X_OP10_test, y_OP10_train, y_OP10_test = train_test_split(X_OP10, y_OP10, test_size = 0.8, stratify = y_OP10)
    
    X_OP04_train, X_OP04_test, y_OP04_train, y_OP04_test = train_test_split(X_OP04, y_OP04, test_size = 0.8, stratify = y_OP04)
    
    X_train = pd.concat((X_OP07_train, X_OP01_train, X_OP02_train, X_OP10_train, X_OP04_train))
    X_test = pd.concat((X_OP07_test, X_OP01_test, X_OP02_test, X_OP10_test, X_OP04_test, X_OP))
    
    y_train = pd.concat((y_OP07_train, y_OP01_train, y_OP02_train, y_OP10_train, y_OP04_train))
    y_test = pd.concat((y_OP07_test, y_OP01_test, y_OP02_test, y_OP10_test, y_OP04_test, y_OP))
    
    
    trf = MiniRocketMultivariate(n_jobs = -1) 
    trf.fit(X_train)
    X_train_trf = trf.transform(X_train)
    X_test_trf = trf.transform(X_test) 
    
    clf = LogisticRegression()
    
    clf.fit(X_train_trf, y_train)
    y_pred = clf.predict(X_test_trf)

    F1_OP.append(f1_score(y_test, y_pred))
    Recall_OP.append(recall_score(y_test, y_pred))

In [13]:
print("F1 Machine Wise")
print(np.mean(F1_machine))
print("Recall Machine Wise")
print(np.mean(Recall_machine))
print("F1 Time Wise")
print(np.mean(F1_time))
print("Recall Time Wise")
print(np.mean(Recall_time))
print("F1 OP Wise")
print(np.mean(F1_OP))
print("Recall OP Wise")
print(np.mean(Recall_OP))

F1 Machine Wise
0.973499280331894
Recall Machine Wise
0.9583333333333334
F1 Time Wise
0.9841269841269841
Recall Time Wise
0.96875
F1 OP Wise
0.9789570567244986
Recall OP Wise
0.96875
