In [1]:
from pyphm.datasets.milling import MillingPrepMethodA
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import numpy as np
import pandas as pd
from pathlib import Path
import seaborn as sns
import scipy.io as sio # for reading matlab files
import zipfile
import gdown
import os
import glob

import warnings
warnings.filterwarnings("ignore") # supress all the matplotlib deprecation warnings
from IPython.display import clear_output, display, Image # incase you want to clear the output of a cell
%load_ext autoreload
%autoreload 2

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
path_colab = Path.cwd().parent.parent / 'content'

if path_colab.exists():
    proj_dir = Path.cwd()
else:
    proj_dir = Path.cwd().parent

print(proj_dir)

c:\Users\stajyer\tspipe


In [3]:
sub_dir = proj_dir / 'data' / 'processed' / 'window1024_stride64_test'

df = pd.read_csv(sub_dir / 'milling_processed.csv.gz')
df.head()

Unnamed: 0,cut_id,cut_no,case,time,ae_spindle,ae_table,vib_spindle,vib_table,smcdc,smcac,tool_class
0,0_0,0,1,0.0,0.219727,0.272827,0.733643,2.116699,6.84082,0.124512,0
1,0_0,0,1,0.004,0.246582,0.322266,0.778809,2.277832,6.660156,-0.561523,0
2,0_0,0,1,0.008,0.294189,0.283813,0.758057,2.34375,6.508789,-2.099609,0
3,0_0,0,1,0.012,0.323486,0.26001,0.726318,2.44873,6.542969,-2.731934,0
4,0_0,0,1,0.016,0.290527,0.253296,0.653076,2.546387,6.621094,-3.505859,0


In [4]:
raw_data_dir = proj_dir / 'data' / 'raw'

raw_milling_data = pd.read_csv(raw_data_dir / 'milling' / 'mill.csv')
raw_milling_data = raw_milling_data.drop(['Unnamed: 0'], axis=1)

In [5]:
path_processed_dir = proj_dir / 'data' / 'processed' / 'window1024_stride64_test'
df_features_downloaded = pd.read_csv(path_processed_dir / 'milling_features_comp_stride64_len1024.csv')
df_features_downloaded.head()

Unnamed: 0,cut_id,smcac__time_reversal_asymmetry_statistic__lag_1,smcac__time_reversal_asymmetry_statistic__lag_2,smcac__time_reversal_asymmetry_statistic__lag_3,smcac__c3__lag_1,smcac__c3__lag_2,smcac__c3__lag_3,smcac__cid_ce__normalize_True,smcac__cid_ce__normalize_False,smcac__symmetry_looking__r_0.0,...,smcdc__query_similarity_count__query_None__threshold_0.0,"smcdc__matrix_profile__feature_""min""__threshold_0.98","smcdc__matrix_profile__feature_""max""__threshold_0.98","smcdc__matrix_profile__feature_""mean""__threshold_0.98","smcdc__matrix_profile__feature_""median""__threshold_0.98","smcdc__matrix_profile__feature_""25""__threshold_0.98","smcdc__matrix_profile__feature_""75""__threshold_0.98",cut_no,case,tool_class
0,87_20,-0.009862,-0.00882,0.002745,-0.997182,-0.733741,-0.35414,11.424614,17.72574,0.0,...,,3.789384,5.941307,4.691854,4.617381,3.912163,5.372661,87,11,1
1,87_21,-0.014233,-0.00694,0.003651,-1.006841,-0.739986,-0.355168,11.403917,17.642734,0.0,...,,3.022031,5.579988,3.749164,3.582706,3.339192,4.022824,87,11,1
2,87_22,-0.023152,-0.024342,-0.000312,-1.067194,-0.764072,-0.354402,11.388923,17.604872,0.0,...,,2.896524,5.421811,3.517452,3.349038,3.180649,3.620394,87,11,1
3,87_23,-0.008329,-0.001119,0.017044,-1.070455,-0.782522,-0.37435,11.327472,17.49656,0.0,...,,2.866619,5.212993,3.357202,3.245255,3.119372,3.488481,87,11,1
4,87_24,0.006926,0.018749,0.019649,-1.014428,-0.744961,-0.359926,11.280507,17.361086,0.0,...,,2.866619,4.435201,3.249217,3.202348,3.0327,3.403513,87,11,1


In [7]:
# get the percentage of each tool_class
df_p = df_features_downloaded.groupby("tool_class").size() / df_features_downloaded.shape[0] * 100
df_p = df_p.reset_index()
df_p.columns = ["tool_class", "percentage"]

# get the count of each tool_class
df_c = df_features_downloaded.groupby("tool_class").size().to_frame().reset_index()
df_c.columns = ["tool_class", "count"]

# merge the two dataframes
df_pc = df_p.merge(df_c, on="tool_class")[["tool_class", "count", "percentage"]]
df_pc["percentage"] = df_pc["percentage"].round(2)

# Turning the results into a binary classification problem. ###

df_b = {'tool_class': [0, 1], 
        'count': [df_pc['count'][0] + df_pc['count'][1], df_pc['count'][2]], 
        'percentage': [df_pc['percentage'][0] + df_pc['percentage'][1], df_pc['percentage'][2]]}

df_b = pd.DataFrame(data=df_b)

df_features_downloaded_b = df_features_downloaded
df_features_downloaded_b = df_features_downloaded_b.replace(1, 0)
df_features_downloaded_b = df_features_downloaded_b.replace(2, 1)

In [8]:
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from tsfresh.feature_selection import select_features
from tsfresh.feature_selection.relevance import calculate_relevance_table

imp = SimpleImputer(strategy='most_frequent')

df_features_downloaded_b = df_features_downloaded_b.dropna(axis=1, how='all')

imputed_df_tool_class = df_features_downloaded_b['tool_class']

imputed_df = imp.fit_transform(df_features_downloaded_b.drop(columns=['tool_class']))

scaler = StandardScaler()
imputed_df_sc = scaler.fit_transform(imputed_df)

imputed_df_sc = pd.DataFrame(imputed_df_sc, columns=df_features_downloaded_b.drop(columns=['tool_class']).columns)

relevance_table = calculate_relevance_table(imputed_df_sc, imputed_df_tool_class)

print(relevance_table)

relevance_table = relevance_table.sort_values("p_value", inplace=True)

                                                                                              feature  \
feature                                                                                                 
vib_spindle__quantile__q_0.7                                             vib_spindle__quantile__q_0.7   
vib_spindle__quantile__q_0.6                                             vib_spindle__quantile__q_0.6   
vib_spindle__fft_coefficient__attr_"abs"__coeff_0   vib_spindle__fft_coefficient__attr_"abs"__coeff_0   
vib_spindle__fft_coefficient__attr_"real"__coeff_0  vib_spindle__fft_coefficient__attr_"real"__coe...   
vib_spindle__quantile__q_0.8                                             vib_spindle__quantile__q_0.8   
...                                                                                               ...   
smcdc__number_crossing_m__m_0                                           smcdc__number_crossing_m__m_0   
smcdc__number_crossing_m__m_-1                         

In [9]:
df_selected = select_features(imputed_df_sc, imputed_df_tool_class)

print(df_selected)

      vib_spindle__quantile__q_0.7  vib_spindle__quantile__q_0.6  \
0                        -0.603003                     -0.603893   
1                        -0.603003                     -0.603893   
2                        -0.589502                     -0.603893   
3                        -0.589502                     -0.603893   
4                        -0.589502                     -0.603893   
...                            ...                           ...   
9035                     -0.514499                     -0.525499   
9036                     -0.514499                     -0.525499   
9037                     -0.529500                     -0.525499   
9038                     -0.529500                     -0.525499   
9039                     -0.529500                     -0.525499   

      vib_spindle__fft_coefficient__attr_"abs"__coeff_0  \
0                                             -0.609765   
1                                             -0.608637   
2     

In [10]:
### Imputed Raw Data ###
from sklearn.model_selection import train_test_split

#Forward selection

X = df_selected.iloc[:,:5]
y = imputed_df_tool_class

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

X_train.head()

Unnamed: 0,vib_spindle__quantile__q_0.7,vib_spindle__quantile__q_0.6,"vib_spindle__fft_coefficient__attr_""abs""__coeff_0","vib_spindle__fft_coefficient__attr_""real""__coeff_0",vib_spindle__quantile__q_0.8
6354,0.927056,0.948303,0.960278,0.960278,0.923971
1541,0.237029,0.227081,0.190128,0.190128,0.247192
3133,1.150564,1.183484,1.201322,1.201322,1.125565
322,-0.01948,-0.008101,0.00153,0.00153,-0.0408
2031,-0.184487,-0.180567,-0.172324,-0.172324,-0.184795


In [11]:
from keras import Model, Sequential, layers, ops

#Source: https://www.kaggle.com/code/manohar676/binary-classification-using-mlp-autoencoder

encoding_dim = [100, 50, 25, 12, 6]
actual_dim = X.shape[1]

input_attributes = layers.Input(shape=(actual_dim,))

#encoded = layers.Dense(encoding_dim, activation="relu")(input_attributes)
#decoded = layers.Dense(actual_dim, activation="sigmoid")(encoded)

encoded = layers.Dense(encoding_dim[0], activation="sigmoid")(input_attributes)
encoded = layers.Dense(encoding_dim[1], activation="sigmoid")(encoded)
encoded = layers.Dense(encoding_dim[2], activation="sigmoid")(encoded)
encoded = layers.Dense(encoding_dim[3], activation="sigmoid")(encoded)
encoded = layers.Dense(encoding_dim[4], activation="relu")(encoded)
decoded = layers.Dense(encoding_dim[3], activation="sigmoid")(encoded)
decoded = layers.Dense(encoding_dim[2], activation="sigmoid")(decoded)
decoded = layers.Dense(encoding_dim[1], activation="sigmoid")(decoded)
decoded = layers.Dense(encoding_dim[0], activation="sigmoid")(decoded)

output = layers.Dense(actual_dim, activation="relu")(decoded)

autoencoder = Model(input_attributes, output)

print(autoencoder.summary())

None


In [12]:
encoder = Model(input_attributes, encoded)

X_train_0 = X_train.iloc[np.where(y_train == 0)]
y_train_0 = y_train[y_train == 0]

X_train_nonLinear_features = encoder.predict(X_train_0)
X_test_nonLinear_features = encoder.predict(X_test)

X_train_0 = np.concatenate((X_train_0, X_train_nonLinear_features), axis=1)
X_test_0 = np.concatenate((X_test, X_test_nonLinear_features), axis=1)

[1m184/184[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 720us/step


In [13]:
perceptron_model = Sequential()

perceptron_model.add(layers.Dense(1, input_dim=X_train_0.shape[1], activation='sigmoid'))

perceptron_model.compile(loss='mse', optimizer='adam')

perceptron_model.fit(X_train_0, y_train_0, epochs=10)

Epoch 1/10
[1m184/184[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 556us/step - loss: 0.2297
Epoch 2/10
[1m184/184[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 543us/step - loss: 0.1738
Epoch 3/10
[1m184/184[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 537us/step - loss: 0.1203
Epoch 4/10
[1m184/184[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 617us/step - loss: 0.0911
Epoch 5/10
[1m184/184[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 535us/step - loss: 0.0703
Epoch 6/10
[1m184/184[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 559us/step - loss: 0.0552
Epoch 7/10
[1m184/184[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 620us/step - loss: 0.0440
Epoch 8/10
[1m184/184[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 636us/step - loss: 0.0356
Epoch 9/10
[1m184/184[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 449us/step - loss: 0.0292
Epoch 10/10
[1m184/184[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s

<keras.src.callbacks.history.History at 0x2088eb12210>

In [14]:
from sklearn.model_selection import cross_val_score
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report

test_pred=perceptron_model.predict(X_test_0)
rmse = np.sqrt((y_test.values - test_pred.flatten())**2)
y_pred_binary = (rmse >= 0.5).astype(int) #0.1531
#scores = cross_val_score(perceptron_model, X, y, cv=5)

accuracy = accuracy_score(y_test, y_pred_binary)
precision = precision_score(y_test, y_pred_binary, average='weighted')
recall = recall_score(y_test, y_pred_binary, average='weighted')
f1 = f1_score(y_test, y_pred_binary, average='weighted')
conf_matrix_model = confusion_matrix(y_test, y_pred_binary)
class_report = classification_report(y_test, y_pred_binary)

print("Accuracy: " + str(accuracy))
print("Precision: " + str(precision))
print("Recall: " + str(recall))
print("F1 Score: " + str(f1))
print("Confusion Matrix:")
print(conf_matrix_model)
print("Classification Report:")
print(class_report)

[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 754us/step
Accuracy: 1.0
Precision: 1.0
Recall: 1.0
F1 Score: 1.0
Confusion Matrix:
[[2506    0]
 [   0  206]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      2506
           1       1.00      1.00      1.00       206

    accuracy                           1.00      2712
   macro avg       1.00      1.00      1.00      2712
weighted avg       1.00      1.00      1.00      2712

