In [4]:
import os
import pandas as pd
from pandas import json_normalize
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
import warnings
warnings.filterwarnings("ignore")

# Function to read data from text (CSV) files
def read_data(file_name):
    df = pd.read_csv(file_name, names=['data_id', 'accel0X', 'accel0Y', 'accel0Z', 'accel1X', 'accel1Y', 'accel1Z', 'tension', 'timestamp'], dtype=str)
    df['time'] = pd.to_datetime(df['timestamp'], unit='ms')
    df[['accel0X', 'accel0Y', 'accel0Z', 'accel1X', 'accel1Y', 'accel1Z', 'tension']] = df[['accel0X', 'accel0Y', 'accel0Z', 'accel1X', 'accel1Y', 'accel1Z', 'tension']].apply(pd.to_numeric, errors='coerce')
    df['tension'] = 0.650 * (df['tension'] - 2166)
    return df

def establish_printing_start(file_name):
    df = pd.read_json(file_name, lines=True)
    df = json_normalize(df.to_dict('records'))
    return df[df.status == 'P'].head(1)['timestamp'].values[0]

# Add Label Encoder
label_encoder = LabelEncoder()

def process_and_label_data(base_dir):
    categories = ['arm_failure', 'bowden', 'plastic', 'proper', 'retraction_05', 'unstick']
    all_data = []

    for category in categories:
        txt_file = os.path.join(base_dir, category, 't.txt')
        json_file = os.path.join(base_dir, category, 'j.json')

        df = read_data(txt_file)
        start_time = establish_printing_start(json_file)
        df = df[df.time > start_time]
        
        # Add category label
        df['label'] = category

        all_data.append(df)

    combined_df = pd.concat(all_data, ignore_index=True)
    return combined_df

def zscore_normalize_data(df, columns):
    for column in columns:
        df[column] = df[column].astype(float)
        mean = df[column].mean()
        std = df[column].std()
        df[column] = (df[column] - mean) / std
    return df

def create_1d_cnn_model(input_shape, num_classes):
    model = Sequential()
    model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=input_shape))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Conv1D(filters=128, kernel_size=3, activation='relu'))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Conv1D(filters=256, kernel_size=3, activation='relu'))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Flatten())
    model.add(Dense(100, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(num_classes, activation='softmax'))
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

# Call the function to process and label the data
base_directory = r'C:\MyFiles\AI\UNI\ML-DP-AI\Project\dataset\WithBase'
all_data = process_and_label_data(base_directory)

# Handle missing values using interpolation
all_data_int = all_data.interpolate(method='linear')

# Normalize the features
features = ['accel0X', 'accel0Y', 'accel0Z', 'accel1X', 'accel1Y', 'accel1Z', 'tension']


In [5]:
# This cell removes only one feature
# This cell removes only one feature
# This cell removes only one feature
# This cell removes only one feature
# This cell removes only one feature
# This cell removes only one feature

def run_experiment1(features_to_use):
    # Clone and preprocess data for a new experiment
    data = all_data_int.copy()
    data = zscore_normalize_data(data, features_to_use)
    
    # Reshape data into segments
    X = data[features_to_use].values
    y = data['label'].values

    time_steps = 100
    samples = len(X) // time_steps
    X = X[:samples * time_steps].reshape(samples, time_steps, len(features_to_use))

    # Encode categorical labels
    y = label_encoder.fit_transform(y)
    y = y[:samples * time_steps].reshape(samples, time_steps, -1)
    y = np.apply_along_axis(lambda x: np.bincount(x.astype(int)).argmax(), axis=1, arr=y)

    # Split data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Create the model
    input_shape = (X_train.shape[1], X_train.shape[2])
    num_classes = len(np.unique(y))
    model = create_1d_cnn_model(input_shape, num_classes)

    # Early stopping
    early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

    # Train the model
    history = model.fit(X_train, y_train, epochs=100, batch_size=32, validation_split=0.2, callbacks=[early_stopping], verbose=0)

    # Evaluate the model
    test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=0)
    return test_accuracy

# Evaluate the baseline model with all features
baseline_accuracy = run_experiment1(features)
print(f'Baseline Test Accuracy with all features: {baseline_accuracy:.4f}')

# Evaluate the effect of each feature
results1 = {}
for feature in features:
    features_to_use = [f for f in features if f != feature]
    test_accuracy = run_experiment1(features_to_use)
    results1[feature] = test_accuracy
    print(f'Test Accuracy without {feature}: {test_accuracy:.4f}')

# Display a sorted list of feature importances
print("\nFeature Impact:")
for feature, accuracy in sorted(results1.items(), key=lambda item: baseline_accuracy - item[1], reverse=True):
    print(f"{feature}: {baseline_accuracy - accuracy:.4f} drop in accuracy")


Baseline Test Accuracy with all features: 0.9635
Test Accuracy without accel0X: 0.9459
Test Accuracy without accel0Y: 0.9406
Test Accuracy without accel0Z: 0.9314
Test Accuracy without accel1X: 0.9594
Test Accuracy without accel1Y: 0.9574
Test Accuracy without accel1Z: 0.9582
Test Accuracy without tension: 0.9429

Feature Impact:
accel0Z: 0.0320 drop in accuracy
accel0Y: 0.0229 drop in accuracy
tension: 0.0205 drop in accuracy
accel0X: 0.0176 drop in accuracy
accel1Y: 0.0061 drop in accuracy
accel1Z: 0.0053 drop in accuracy
accel1X: 0.0041 drop in accuracy


In [6]:
# This cell uses only one feature
# This cell uses only one feature
# This cell uses only one feature
# This cell uses only one feature
# This cell uses only one feature
# This cell uses only one feature
# This cell uses only one feature
# This cell uses only one feature

def run_experiment2(features_to_use):
    # Clone and preprocess data for a new experiment
    data = all_data_int.copy()
    data = zscore_normalize_data(data, features_to_use)
    
    # Reshape data into segments
    X = data[features_to_use].values
    y = data['label'].values

    time_steps = 100
    samples = len(X) // time_steps
    X = X[:samples * time_steps].reshape(samples, time_steps, len(features_to_use))

    # Encode categorical labels
    y = label_encoder.fit_transform(y)
    y = y[:samples * time_steps].reshape(samples, time_steps, -1)
    y = np.apply_along_axis(lambda x: np.bincount(x.astype(int)).argmax(), axis=1, arr=y)

    # Split data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Create the model
    input_shape = (X_train.shape[1], X_train.shape[2])
    num_classes = len(np.unique(y))
    model = create_1d_cnn_model(input_shape, num_classes)

    # Early stopping
    early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

    # Train the model
    history = model.fit(X_train, y_train, epochs=100, batch_size=32, validation_split=0.2, callbacks=[early_stopping], verbose=0)

    # Evaluate the model
    test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=0)
    return test_accuracy

# Evaluate the baseline model with all features
baseline_accuracy = run_experiment2(features)
print(f'Baseline Test Accuracy with all features: {baseline_accuracy:.4f}')

# Evaluate the effect of each feature
results2 = {}
for feature in features:
    features_to_use = [f for f in features if f == feature]
    test_accuracy = run_experiment2(features_to_use)
    results2[feature] = test_accuracy
    print(f'Test Accuracy with only {feature}: {test_accuracy:.4f}')

# Display a sorted list of feature importances
print("\nFeature Impact:")
for feature, accuracy in sorted(results2.items(), key=lambda item: baseline_accuracy - item[1], reverse=True):
    print(f"{feature}: {baseline_accuracy - accuracy:.4f} drop in accuracy")


Baseline Test Accuracy with all features: 0.9647
Test Accuracy with only accel0X: 0.6431
Test Accuracy with only accel0Y: 0.7066
Test Accuracy with only accel0Z: 0.6281
Test Accuracy with only accel1X: 0.5546
Test Accuracy with only accel1Y: 0.6022
Test Accuracy with only accel1Z: 0.5558
Test Accuracy with only tension: 0.5954

Feature Impact:
accel1X: 0.4101 drop in accuracy
accel1Z: 0.4089 drop in accuracy
tension: 0.3692 drop in accuracy
accel1Y: 0.3625 drop in accuracy
accel0Z: 0.3366 drop in accuracy
accel0X: 0.3216 drop in accuracy
accel0Y: 0.2581 drop in accuracy


In [8]:
# This cell removes two features
# This cell removes two features
# This cell removes two features
# This cell removes two features
# This cell removes two features
# This cell removes two features

import random
def run_experiment3(features_to_use):
    # Clone and preprocess data for a new experiment
    data = all_data_int.copy()
    data = zscore_normalize_data(data, features_to_use)
    
    # Reshape data into segments
    X = data[features_to_use].values
    y = data['label'].values

    time_steps = 100
    samples = len(X) // time_steps
    X = X[:samples * time_steps].reshape(samples, time_steps, len(features_to_use))

    # Encode categorical labels
    y = label_encoder.fit_transform(y)
    y = y[:samples * time_steps].reshape(samples, time_steps, -1)
    y = np.apply_along_axis(lambda x: np.bincount(x.astype(int)).argmax(), axis=1, arr=y)

    # Split data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Create the model
    input_shape = (X_train.shape[1], X_train.shape[2])
    num_classes = len(np.unique(y))
    model = create_1d_cnn_model(input_shape, num_classes)

    # Early stopping
    early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

    # Train the model
    history = model.fit(X_train, y_train, epochs=100, batch_size=32, validation_split=0.2, callbacks=[early_stopping], verbose=0)

    # Evaluate the model
    test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=0)
    return test_accuracy

# Evaluate the baseline model with all features
baseline_accuracy = run_experiment3(features)
print(f'Baseline Test Accuracy with all features: {baseline_accuracy:.4f}')

# Evaluate the effect of removing two features at a time for 5 trials
trials = 5
results3 = {}
for i in range(trials):
    features_to_remove = random.sample(features, 2)
    features_to_use = [f for f in features if f not in features_to_remove]
    test_accuracy = run_experiment3(features_to_use)
    results3[tuple(features_to_remove)] = test_accuracy
    print(f'Test Accuracy without {features_to_remove}: {test_accuracy:.4f}')

# Display a sorted list of feature importances
print("\nFeature Impact:")
for features_removed, accuracy in sorted(results3.items(), key=lambda item: baseline_accuracy - item[1], reverse=True):
    print(f"Removed {features_removed}: {baseline_accuracy - accuracy:.4f} drop in accuracy")


Baseline Test Accuracy with all features: 0.9677
Test Accuracy without ['accel1Z', 'accel0Z']: 0.9127
Test Accuracy without ['accel1X', 'tension']: 0.9337
Test Accuracy without ['accel0Z', 'accel1X']: 0.9164
Test Accuracy without ['accel1X', 'accel0X']: 0.9404
Test Accuracy without ['accel1X', 'tension']: 0.9365

Feature Impact:
Removed ('accel1Z', 'accel0Z'): 0.0550 drop in accuracy
Removed ('accel0Z', 'accel1X'): 0.0513 drop in accuracy
Removed ('accel1X', 'tension'): 0.0313 drop in accuracy
Removed ('accel1X', 'accel0X'): 0.0273 drop in accuracy


In [9]:
# This cell removes three features
# This cell removes three features
# This cell removes three features
# This cell removes three features
# This cell removes three features
# This cell removes three features
# This cell removes three features

def run_experiment4(features_to_use):
    # Clone and preprocess data for a new experiment
    data = all_data_int.copy()
    data = zscore_normalize_data(data, features_to_use)
    
    # Reshape data into segments
    X = data[features_to_use].values
    y = data['label'].values

    time_steps = 100
    samples = len(X) // time_steps
    X = X[:samples * time_steps].reshape(samples, time_steps, len(features_to_use))

    # Encode categorical labels
    y = label_encoder.fit_transform(y)
    y = y[:samples * time_steps].reshape(samples, time_steps, -1)
    y = np.apply_along_axis(lambda x: np.bincount(x.astype(int)).argmax(), axis=1, arr=y)

    # Split data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Create the model
    input_shape = (X_train.shape[1], X_train.shape[2])
    num_classes = len(np.unique(y))
    model = create_1d_cnn_model(input_shape, num_classes)

    # Early stopping
    early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

    # Train the model
    history = model.fit(X_train, y_train, epochs=100, batch_size=32, validation_split=0.2, callbacks=[early_stopping], verbose=0)

    # Evaluate the model
    test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=0)
    return test_accuracy

# Evaluate the baseline model with all features
baseline_accuracy = run_experiment4(features)
print(f'Baseline Test Accuracy with all features: {baseline_accuracy:.4f}')

# Evaluate the effect of removing three features at a time for 5 trials
trials = 5
results4 = {}
for i in range(trials):
    features_to_remove = random.sample(features, 3)
    features_to_use = [f for f in features if f not in features_to_remove]
    test_accuracy = run_experiment4(features_to_use)
    results4[tuple(features_to_remove)] = test_accuracy
    print(f'Test Accuracy without {features_to_remove}: {test_accuracy:.4f}')

# Display a sorted list of feature importances
print("\nFeature Impact:")
for features_removed, accuracy in sorted(results4.items(), key=lambda item: baseline_accuracy - item[1], reverse=True):
    print(f"Removed {features_removed}: {baseline_accuracy - accuracy:.4f} drop in accuracy")


Baseline Test Accuracy with all features: 0.9612
Test Accuracy without ['accel0X', 'accel0Z', 'accel0Y']: 0.7660
Test Accuracy without ['accel0X', 'accel1Y', 'accel1X']: 0.9404
Test Accuracy without ['accel1X', 'accel0X', 'accel1Y']: 0.9425
Test Accuracy without ['accel1X', 'tension', 'accel0Z']: 0.8855
Test Accuracy without ['accel1X', 'tension', 'accel0X']: 0.9172

Feature Impact:
Removed ('accel0X', 'accel0Z', 'accel0Y'): 0.1952 drop in accuracy
Removed ('accel1X', 'tension', 'accel0Z'): 0.0758 drop in accuracy
Removed ('accel1X', 'tension', 'accel0X'): 0.0441 drop in accuracy
Removed ('accel0X', 'accel1Y', 'accel1X'): 0.0209 drop in accuracy
Removed ('accel1X', 'accel0X', 'accel1Y'): 0.0187 drop in accuracy


In [10]:
# This cell removes four features
# This cell removes four features
# This cell removes four features
# This cell removes four features
# This cell removes four features
# This cell removes four features


import random
def run_experiment5(features_to_use):
    # Clone and preprocess data for a new experiment
    data = all_data_int.copy()
    data = zscore_normalize_data(data, features_to_use)
    
    # Reshape data into segments
    X = data[features_to_use].values
    y = data['label'].values

    time_steps = 100
    samples = len(X) // time_steps
    X = X[:samples * time_steps].reshape(samples, time_steps, len(features_to_use))

    # Encode categorical labels
    y = label_encoder.fit_transform(y)
    y = y[:samples * time_steps].reshape(samples, time_steps, -1)
    y = np.apply_along_axis(lambda x: np.bincount(x.astype(int)).argmax(), axis=1, arr=y)

    # Split data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Create the model
    input_shape = (X_train.shape[1], X_train.shape[2])
    num_classes = len(np.unique(y))
    model = create_1d_cnn_model(input_shape, num_classes)

    # Early stopping
    early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

    # Train the model
    history = model.fit(X_train, y_train, epochs=100, batch_size=32, validation_split=0.2, callbacks=[early_stopping], verbose=0)

    # Evaluate the model
    test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=0)
    return test_accuracy

# Evaluate the baseline model with all features
baseline_accuracy = run_experiment5(features)
print(f'Baseline Test Accuracy with all features: {baseline_accuracy:.4f}')

# Evaluate the effect of removing three features at a time for 5 trials
trials = 5
results5 = {}
for i in range(trials):
    features_to_remove = random.sample(features, 4)
    features_to_use = [f for f in features if f not in features_to_remove]
    test_accuracy = run_experiment5(features_to_use)
    results5[tuple(features_to_remove)] = test_accuracy
    print(f'Test Accuracy without {features_to_remove}: {test_accuracy:.4f}')

# Display a sorted list of feature importances
print("\nFeature Impact:")
for features_removed, accuracy in sorted(results5.items(), key=lambda item: baseline_accuracy - item[1], reverse=True):
    print(f"Removed {features_removed}: {baseline_accuracy - accuracy:.4f} drop in accuracy")


Baseline Test Accuracy with all features: 0.9635
Test Accuracy without ['accel1X', 'accel0Z', 'accel1Z', 'accel1Y']: 0.9149
Test Accuracy without ['accel0X', 'accel0Z', 'accel1X', 'accel1Y']: 0.8688
Test Accuracy without ['accel1Y', 'accel0Y', 'tension', 'accel1X']: 0.8774
Test Accuracy without ['accel1Z', 'accel0Y', 'accel1Y', 'accel0X']: 0.7681
Test Accuracy without ['accel0X', 'accel1Y', 'accel0Y', 'accel1X']: 0.7825

Feature Impact:
Removed ('accel1Z', 'accel0Y', 'accel1Y', 'accel0X'): 0.1954 drop in accuracy
Removed ('accel0X', 'accel1Y', 'accel0Y', 'accel1X'): 0.1809 drop in accuracy
Removed ('accel0X', 'accel0Z', 'accel1X', 'accel1Y'): 0.0947 drop in accuracy
Removed ('accel1Y', 'accel0Y', 'tension', 'accel1X'): 0.0861 drop in accuracy
Removed ('accel1X', 'accel0Z', 'accel1Z', 'accel1Y'): 0.0486 drop in accuracy


In [11]:
# This cell removes five features
# This cell removes five features
# This cell removes five features
# This cell removes five features
# This cell removes five features
# This cell removes five features
# This cell removes five features

import random
def run_experiment6(features_to_use):
    # Clone and preprocess data for a new experiment
    data = all_data_int.copy()
    data = zscore_normalize_data(data, features_to_use)
    
    # Reshape data into segments
    X = data[features_to_use].values
    y = data['label'].values

    time_steps = 100
    samples = len(X) // time_steps
    X = X[:samples * time_steps].reshape(samples, time_steps, len(features_to_use))

    # Encode categorical labels
    y = label_encoder.fit_transform(y)
    y = y[:samples * time_steps].reshape(samples, time_steps, -1)
    y = np.apply_along_axis(lambda x: np.bincount(x.astype(int)).argmax(), axis=1, arr=y)

    # Split data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Create the model
    input_shape = (X_train.shape[1], X_train.shape[2])
    num_classes = len(np.unique(y))
    model = create_1d_cnn_model(input_shape, num_classes)

    # Early stopping
    early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

    # Train the model
    history = model.fit(X_train, y_train, epochs=100, batch_size=32, validation_split=0.2, callbacks=[early_stopping], verbose=0)

    # Evaluate the model
    test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=0)
    return test_accuracy

# Evaluate the baseline model with all features
baseline_accuracy = run_experiment6(features)
print(f'Baseline Test Accuracy with all features: {baseline_accuracy:.4f}')

# Evaluate the effect of removing three features at a time for 5 trials
trials = 5
results6 = {}
for i in range(trials):
    features_to_remove = random.sample(features, 5)
    features_to_use = [f for f in features if f not in features_to_remove]
    test_accuracy = run_experiment6(features_to_use)
    results6[tuple(features_to_remove)] = test_accuracy
    print(f'Test Accuracy without {features_to_remove}: {test_accuracy:.4f}')

# Display a sorted list of feature importances
print("\nFeature Impact:")
for features_removed, accuracy in sorted(results6.items(), key=lambda item: baseline_accuracy - item[1], reverse=True):
    print(f"Removed {features_removed}: {baseline_accuracy - accuracy:.4f} drop in accuracy")


Baseline Test Accuracy with all features: 0.9616
Test Accuracy without ['accel0X', 'accel1Y', 'accel1Z', 'accel0Y', 'accel1X']: 0.7631
Test Accuracy without ['accel1Y', 'accel0Y', 'tension', 'accel0Z', 'accel0X']: 0.6160
Test Accuracy without ['accel1X', 'accel0Y', 'accel0X', 'accel0Z', 'tension']: 0.6863
Test Accuracy without ['accel1Y', 'accel0Y', 'tension', 'accel1X', 'accel1Z']: 0.8498
Test Accuracy without ['accel1Z', 'accel1Y', 'accel0Z', 'tension', 'accel0Y']: 0.7213

Feature Impact:
Removed ('accel1Y', 'accel0Y', 'tension', 'accel0Z', 'accel0X'): 0.3456 drop in accuracy
Removed ('accel1X', 'accel0Y', 'accel0X', 'accel0Z', 'tension'): 0.2752 drop in accuracy
Removed ('accel1Z', 'accel1Y', 'accel0Z', 'tension', 'accel0Y'): 0.2402 drop in accuracy
Removed ('accel0X', 'accel1Y', 'accel1Z', 'accel0Y', 'accel1X'): 0.1984 drop in accuracy
Removed ('accel1Y', 'accel0Y', 'tension', 'accel1X', 'accel1Z'): 0.1118 drop in accuracy
