In [2]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [12]:
import pandas as pd
import os

def load_and_label_data(base_path, label, max_files=None):
    combined_100KHzdata = []
    combined_2000KHzdata = []

    # Initialize a counter
    file_counter = 0

    # Iterate over each timestamped folder
    for timestamp_folder in os.listdir(base_path):
        if max_files and file_counter >= max_files:
            break

        timestamp_folder_path = os.path.join(base_path, timestamp_folder, "raw")

        # Extract the full timestamp from the folder name and convert to datetime format
        timestamp = timestamp_folder.split('_')[0] + '_' + timestamp_folder.split('_')[1]
        timestamp = pd.to_datetime(timestamp, format='%Y.%m.%d_%H.%M.%S')
        print(timestamp_folder_path)
        # Load the 2000KHz data
        df_2000KHz = pd.read_parquet(os.path.join(timestamp_folder_path, "Sampling2000KHz_AEKi-0.parquet"))
        mean_2000KHz = df_2000KHz.mean().to_frame().T
        mean_2000KHz['timestamp'] = timestamp
        mean_2000KHz['label'] = label

        # Load the 100KHz data
        df_100KHz = pd.read_parquet(os.path.join(timestamp_folder_path, "Sampling100KHz_Irms_Grinding-Grinding spindle current L1-Grinding spindle current L2-Grinding spindle current L3-0.parquet"))
        mean_100KHz = df_100KHz.mean().to_frame().T
        mean_100KHz['timestamp'] = timestamp
        mean_100KHz['label'] = label

        # Append the mean data to the combined lists
        combined_100KHzdata.append(mean_100KHz)
        combined_2000KHzdata.append(mean_2000KHz)

        # Increment the counter
        file_counter += 1

    # Combine all the mean data into a single dataframe
    final_combined_100KHzdata = pd.concat(combined_100KHzdata, ignore_index=True)
    final_combined_2000KHzdata = pd.concat(combined_2000KHzdata, ignore_index=True)

    return final_combined_100KHzdata, final_combined_2000KHzdata


# Define the paths to the OK and NOK data directories
ok_data_path = '/content/gdrive/MyDrive/Data/OK_Measurements'
nok_data_path = '/content/gdrive/MyDrive/Data/NOK_Measurements'

# Load OK and NOK data
ok_100KHzdata, ok_2000KHzdata = load_and_label_data(ok_data_path, label=0)
nok_100KHzdata, nok_2000KHzdata = load_and_label_data(nok_data_path, label=1)
# Combine OK and NOK data
all_100KHzdata = pd.concat([ok_100KHzdata, nok_100KHzdata], ignore_index=True)
all_2000KHzdata = pd.concat([ok_2000KHzdata, nok_2000KHzdata], ignore_index=True)

# Print the first few rows of the combined data for inspection
print("Combined 100KHz Data Sample (Mean):")
print(all_100KHzdata.head())
print("\nCombined 2000KHz Data Sample (Mean):")
print(all_2000KHzdata.head())


/content/gdrive/MyDrive/Data/OK_Measurements/2024.02.14_22.14.16_Grinding/raw
/content/gdrive/MyDrive/Data/OK_Measurements/2024.02.14_22.11.43_Grinding/raw
/content/gdrive/MyDrive/Data/OK_Measurements/2024.02.14_22.05.15_Grinding/raw
/content/gdrive/MyDrive/Data/OK_Measurements/2024.02.14_22.13.15_Grinding/raw
/content/gdrive/MyDrive/Data/OK_Measurements/2024.02.14_22.03.13_Grinding/raw
/content/gdrive/MyDrive/Data/OK_Measurements/2024.02.14_22.09.41_Grinding/raw
/content/gdrive/MyDrive/Data/OK_Measurements/2024.02.14_22.00.40_Grinding/raw
/content/gdrive/MyDrive/Data/OK_Measurements/2024.02.14_22.08.48_Grinding/raw
/content/gdrive/MyDrive/Data/OK_Measurements/2024.02.14_22.12.44_Grinding/raw
/content/gdrive/MyDrive/Data/OK_Measurements/2024.02.14_22.06.46_Grinding/raw
/content/gdrive/MyDrive/Data/OK_Measurements/2024.02.14_22.02.12_Grinding/raw
/content/gdrive/MyDrive/Data/OK_Measurements/2024.02.14_22.01.11_Grinding/raw
/content/gdrive/MyDrive/Data/OK_Measurements/2024.02.14_22.04.44

In [13]:
from sklearn.preprocessing import StandardScaler

# Separate features and labels for 100KHz data
features_100KHz = all_100KHzdata.drop(columns=['timestamp', 'label'])  # Exclude timestamp and label
timestamps_100KHz = all_100KHzdata['timestamp']
labels_100KHz = all_100KHzdata['label']

# Normalize features for 100KHz data
scaler_100KHz = StandardScaler()
normalized_features_100KHz = scaler_100KHz.fit_transform(features_100KHz)

# Combine normalized features with timestamps and labels
normalized_100KHzdata = pd.DataFrame(normalized_features_100KHz, columns=features_100KHz.columns)
normalized_100KHzdata.insert(0, 'timestamp', timestamps_100KHz)  # Add timestamp column back
normalized_100KHzdata['label'] = labels_100KHz.values  # Add label column back

# Separate features and labels for 2000KHz data
features_2000KHz = all_2000KHzdata.drop(columns=['timestamp', 'label'])  # Exclude timestamp and label
timestamps_2000KHz = all_2000KHzdata['timestamp']
labels_2000KHz = all_2000KHzdata['label']

# Normalize features for 2000KHz data
scaler_2000KHz = StandardScaler()
normalized_features_2000KHz = scaler_2000KHz.fit_transform(features_2000KHz)

# Combine normalized features with timestamps and labels
normalized_2000KHzdata = pd.DataFrame(normalized_features_2000KHz, columns=features_2000KHz.columns)
normalized_2000KHzdata.insert(0, 'timestamp', timestamps_2000KHz)  # Add timestamp column back
normalized_2000KHzdata['label'] = labels_2000KHz.values  # Add label column back

print("Normalized 100KHz Data Sample:")
print(normalized_100KHzdata.head())
print(len(normalized_100KHzdata))
print("\nNormalized 2000KHz Data Sample:")
print(normalized_2000KHzdata.head())
print(len(normalized_2000KHzdata))

Normalized 100KHz Data Sample:
            timestamp  Irms_Grinding_rate100000_clipping0_batch0  \
0 2024-02-14 22:14:16                                  -0.690665   
1 2024-02-14 22:11:43                                  -0.880084   
2 2024-02-14 22:05:15                                  -1.014171   
3 2024-02-14 22:13:15                                  -0.892442   
4 2024-02-14 22:03:13                                  -1.089597   

   Grinding spindle current L1_rate100000_clipping0_batch0  \
0                                           0.072306         
1                                           0.084977         
2                                           0.286297         
3                                          -0.288736         
4                                          -1.187396         

   Grinding spindle current L2_rate100000_clipping0_batch0  \
0                                          -1.032386         
1                                           0.108840         
2

In [14]:
import pandas as pd

# Concatenate the 100KHz and 2000KHz data
normalized_100KHzdata = normalized_100KHzdata.set_index('timestamp')
normalized_2000KHzdata = normalized_2000KHzdata.set_index('timestamp')

# Concatenate along columns
combined_data = pd.concat([normalized_100KHzdata, normalized_2000KHzdata], axis=1, join='inner').reset_index()

# Remove duplicate 'label' columns and keep the first one
combined_data = combined_data.loc[:, ~combined_data.columns.duplicated()]

# Ensure the label column is at the end
label = combined_data.pop('label')
combined_data['label'] = label

print("Combined Data Sample:")
print(combined_data.head())
print(len(combined_data))

Combined Data Sample:
            timestamp  Irms_Grinding_rate100000_clipping0_batch0  \
0 2024-02-14 22:14:16                                  -0.690665   
1 2024-02-14 22:11:43                                  -0.880084   
2 2024-02-14 22:05:15                                  -1.014171   
3 2024-02-14 22:13:15                                  -0.892442   
4 2024-02-14 22:03:13                                  -1.089597   

   Grinding spindle current L1_rate100000_clipping0_batch0  \
0                                           0.072306         
1                                           0.084977         
2                                           0.286297         
3                                          -0.288736         
4                                          -1.187396         

   Grinding spindle current L2_rate100000_clipping0_batch0  \
0                                          -1.032386         
1                                           0.108840         
2         

In [18]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

In [42]:

X=combined_data.iloc[:,1:-1]
y=combined_data['label']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=65)


In [43]:
from sklearn.metrics import accuracy_score, classification_report
svm = SVC(kernel='linear', C=1.0, random_state=42)
svm.fit(X_train, y_train)

y_pred = svm.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print("Accuracy:", accuracy)
print("\nClassification Report:\n", report)

Accuracy: 1.0

Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        14
           1       1.00      1.00      1.00        15

    accuracy                           1.00        29
   macro avg       1.00      1.00      1.00        29
weighted avg       1.00      1.00      1.00        29



In [44]:
import numpy as np
from sklearn.svm import SVC
from sklearn.model_selection import cross_validate
from prettytable import PrettyTable
# Cross validation for hyper-parameter tuning
hyperparameter_score_list = []
for kernel in ['linear', 'poly', 'rbf', 'sigmoid']:
    for c in np.arange(0.5,3,0.5):
        svm = SVC(kernel=kernel, C=c)
        scores = cross_validate(svm, X_train, y_train, cv=10, scoring='accuracy')
        mean_score = np.mean(scores['test_score'])
        hyperparameter_score_list.append([kernel, c, mean_score])
# Choose the hyper-parameters (with highest average accuracy)
myTable = PrettyTable(["Kernel", "C", "Avg accuracy"])
for row in hyperparameter_score_list:
    myTable.add_row([row[0], row[1], round(row[2],3)])
print(myTable)

+---------+-----+--------------+
|  Kernel |  C  | Avg accuracy |
+---------+-----+--------------+
|  linear | 0.5 |     1.0      |
|  linear | 1.0 |     1.0      |
|  linear | 1.5 |     1.0      |
|  linear | 2.0 |     1.0      |
|  linear | 2.5 |     1.0      |
|   poly  | 0.5 |    0.967     |
|   poly  | 1.0 |    0.967     |
|   poly  | 1.5 |    0.967     |
|   poly  | 2.0 |    0.967     |
|   poly  | 2.5 |    0.967     |
|   rbf   | 0.5 |     1.0      |
|   rbf   | 1.0 |     1.0      |
|   rbf   | 1.5 |     1.0      |
|   rbf   | 2.0 |     1.0      |
|   rbf   | 2.5 |     1.0      |
| sigmoid | 0.5 |     1.0      |
| sigmoid | 1.0 |     1.0      |
| sigmoid | 1.5 |     1.0      |
| sigmoid | 2.0 |     1.0      |
| sigmoid | 2.5 |     1.0      |
+---------+-----+--------------+


In [45]:
svm = SVC(kernel='poly', C=1.0, random_state=42)
svm.fit(X_train, y_train)

y_pred = svm.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print("Accuracy:", accuracy)
print("\nClassification Report:\n", report)

Accuracy: 0.9310344827586207

Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.86      0.92        14
           1       0.88      1.00      0.94        15

    accuracy                           0.93        29
   macro avg       0.94      0.93      0.93        29
weighted avg       0.94      0.93      0.93        29

