In [None]:
#This cell is logistic Regression and Pass and Fail classes
#This cell is logistic Regression and Pass and Fail classes
#This cell is logistic Regression and Pass and Fail classes
#This cell is logistic Regression and Pass and Fail classes
#This cell is logistic Regression and Pass and Fail classes
#This cell is logistic Regression and Pass and Fail classes
#This cell is logistic Regression and Pass and Fail classes

import os
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix, log_loss
import seaborn as sns
import matplotlib.pyplot as plt
from imblearn.over_sampling import SMOTE

# Function to read data from text (CSV) files
def read_data(file_name):
    df = pd.read_csv(file_name, names=['data_id', 'accel0X', 'accel0Y', 'accel0Z', 'accel1X', 'accel1Y', 'accel1Z', 'tension', 'timestamp'], dtype=str)
    df['timestamp'] = pd.to_numeric(df['timestamp'])
    df['time'] = pd.to_datetime(df['timestamp'], unit='ms')
    df[['accel0X', 'accel0Y', 'accel0Z', 'accel1X', 'accel1Y', 'accel1Z', 'tension']] = df[['accel0X', 'accel0Y', 'accel0Z', 'accel1X', 'accel1Y', 'accel1Z', 'tension']].apply(pd.to_numeric, errors='coerce')
    df['tension'] = 0.650 * (df['tension'] - 2166)
    return df

def establish_printing_start(file_name):
    df = pd.read_json(file_name, lines=True)
    df = pd.json_normalize(df.to_dict('records'))
    return df[df.status == 'P'].head(1)['timestamp'].values[0]

# Function to process and label data
def process_and_label_data(base_dir):
    categories = ['arm_failure', 'bowden', 'plastic', 'proper', 'retraction_05', 'unstick']
    all_data = []

    for category in categories:
        txt_file = os.path.join(base_dir, category, 't.txt')
        json_file = os.path.join(base_dir, category, 'j.json')

        df = read_data(txt_file)
        start_time = establish_printing_start(json_file)
        df = df[df.time > start_time]

        # Add category label
        df['label'] = category

        all_data.append(df)

    combined_df = pd.concat(all_data, ignore_index=True)
    return combined_df

# Function to compute windowed RMS
def window_rms(a, window_size):
    a2 = np.power(a, 2)
    window = np.ones(window_size) / float(window_size)
    rms = np.sqrt(np.convolve(a2, window, 'valid'))
    # Pad the result to match the original length
    pad_length = len(a) - len(rms)
    return np.pad(rms, (pad_length // 2, pad_length - pad_length // 2), 'edge')

# Apply the window_rms function to each feature
def apply_window_rms(df, features, window_size):
    for feature in features:
        df[feature] = window_rms(df[feature].values, window_size)
    return df

# Normalizing the features using z-score
def zscore_normalize_data(df, columns):
    scaler = StandardScaler()
    df[columns] = scaler.fit_transform(df[columns])
    return df

# Directory containing the data
base_directory = r'C:\MyFiles\AI\UNI\ML-DP-AI\Project\dataset\WithBase'
all_data = process_and_label_data(base_directory)

# Handle missing values using interpolation
all_data = all_data.interpolate(method='linear')

# Select features to normalize
features = ['accel0X', 'accel0Y', 'accel0Z', 'accel1X', 'accel1Y', 'accel1Z', 'tension']

# Apply moving RMS
window_size = 50000
all_data = apply_window_rms(all_data, features, window_size)

# Normalize the transformed features
all_data = zscore_normalize_data(all_data, features)

# Encode categorical labels
label_encoder = LabelEncoder()
all_data['label'] = label_encoder.fit_transform(all_data['label'])

# Convert labels to binary (pass/fail)
# Assuming 'proper' is the only pass class, and all others are fail
all_data['label'] = all_data['label'].apply(lambda x: 1 if x == label_encoder.transform(['proper'])[0] else 0)

# Reshape data into segments
time_steps = 100
samples = len(all_data) // time_steps
X = all_data[features].values[:samples * time_steps].reshape(samples, time_steps, len(features))
y = all_data['label'].values[:samples * time_steps].reshape(samples, time_steps, -1)
y = np.apply_along_axis(lambda x: np.bincount(x.astype(int)).argmax(), axis=1, arr=y)

# Flatten the input for logistic regression
X_flat = X.reshape(samples, -1)
y_flat = y.flatten() 

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_flat, y_flat, test_size=0.2, random_state=42)

# Apply SMOTE to the training data
smote = SMOTE(random_state=42)
X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)

# Train logistic regression model
logistic_model = LogisticRegression(max_iter=1000)
logistic_model.fit(X_train_resampled, y_train_resampled)
probabilities = logistic_model.predict_proba(X_test)[:, 1]
# # Apply a custom threshold
custom_threshold = 0.7
y_pred = (probabilities >= custom_threshold).astype(int)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Test Accuracy: {accuracy}")
log_loss_value = log_loss(y_test, y_pred)
print(f"Test Log Loss: {log_loss_value}")

# Print classification report
print(classification_report(y_test, y_pred, target_names=['Fail', 'Pass']))

# Compute the confusion matrix
cm = confusion_matrix(y_test, y_pred)

# Plot the confusion matrix
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['Fail', 'Pass'], yticklabels=['Fail', 'Pass'])
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('Confusion Matrix')
plt.show()



In [None]:
#This cell is 1D-CNN for Binary Classification and Pass and Fail classes

import os
import pandas as pd
from pandas import json_normalize
import numpy as np
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
import matplotlib.pyplot as plt
from imblearn.over_sampling import SMOTE

# Function to read data from text (CSV) files
def read_data(file_name):
    df = pd.read_csv(file_name, names=['data_id', 'accel0X', 'accel0Y', 'accel0Z', 'accel1X', 'accel1Y', 'accel1Z', 'tension', 'timestamp'], dtype=str)
    df['timestamp'] = pd.to_numeric(df['timestamp'])
    df['time'] = pd.to_datetime(df['timestamp'], unit='ms')
    df[['accel0X', 'accel0Y', 'accel0Z', 'accel1X', 'accel1Y', 'accel1Z', 'tension']] = df[['accel0X', 'accel0Y', 'accel0Z', 'accel1X', 'accel1Y', 'accel1Z', 'tension']].apply(pd.to_numeric, errors='coerce')
    df['tension'] = 0.650 * (df['tension'] - 2166)
    return df

def establish_printing_start(file_name):
    df = pd.read_json(file_name, lines=True)
    df = pd.json_normalize(df.to_dict('records'))
    return df[df.status == 'P'].head(1)['timestamp'].values[0]

# Function to process and label data
def process_and_label_data(base_dir):
    categories = ['arm_failure', 'bowden', 'plastic', 'proper', 'retraction_05', 'unstick']
    all_data = []

    for category in categories:
        txt_file = os.path.join(base_dir, category, 't.txt')
        json_file = os.path.join(base_dir, category, 'j.json')

        df = read_data(txt_file)
        start_time = establish_printing_start(json_file)
        df = df[df.time > start_time]

        # Add category label
        df['label'] = category

        all_data.append(df)

    combined_df = pd.concat(all_data, ignore_index=True)
    return combined_df

# Function to compute windowed RMS
def window_rms(a, window_size):
    a2 = np.power(a, 2)
    window = np.ones(window_size) / float(window_size)
    rms = np.sqrt(np.convolve(a2, window, 'valid'))
    # Pad the result to match the original length
    pad_length = len(a) - len(rms)
    return np.pad(rms, (pad_length // 2, pad_length - pad_length // 2), 'edge')

# Apply the window_rms function to each feature
def apply_window_rms(df, features, window_size):
    for feature in features:
        df[feature] = window_rms(df[feature].values, window_size)
    return df

# Normalizing the features using z-score
def zscore_normalize_data(df, columns):
    scaler = StandardScaler()
    df[columns] = scaler.fit_transform(df[columns])
    return df

# Directory containing the data
base_directory = r'C:\MyFiles\AI\UNI\ML-DP-AI\Project\dataset\WithBase'
all_data = process_and_label_data(base_directory)

# Handle missing values using interpolation
all_data = all_data.interpolate(method='linear')

# Select features to normalize
features = ['accel0X', 'accel0Y', 'accel0Z', 'accel1X', 'accel1Y', 'accel1Z', 'tension']
categories = ['arm_failure', 'bowden', 'plastic', 'proper', 'retraction_05', 'unstick']

# # Apply moving RMS
window_size = 50000
all_data = apply_window_rms(all_data, features, window_size)

# Normalize the transformed features
all_data = zscore_normalize_data(all_data, features)

# Encode categorical labels
label_encoder = LabelEncoder()
all_data['label'] = label_encoder.fit_transform(all_data['label'])

# Convert labels to binary (pass/fail)
# Assuming 'proper' is the only pass class, and all others are fail
all_data['label'] = all_data['label'].apply(lambda x: 1 if x == label_encoder.transform(['proper'])[0] else 0)

# Reshape data into segments
time_steps = 100
samples = len(all_data) // time_steps
X = all_data[features].values[:samples * time_steps].reshape(samples, time_steps, len(features))
y = all_data['label'].values[:samples * time_steps].reshape(samples, time_steps, -1)
y = np.apply_along_axis(lambda x: np.bincount(x.astype(int)).argmax(), axis=1, arr=y)

# Flatten the input for logistic regression
X_flat = X.reshape(samples, -1)
y_flat = y.flatten()  # Flatten y to make it 1-dimensional

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_flat, y_flat, test_size=0.2, random_state=42)

# Apply SMOTE to the training data
smote = SMOTE(random_state=42)
X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)

# Reshape the resampled data to fit the 1D CNN
X_train_resampled = X_train_resampled.reshape(-1, time_steps, len(features))
X_test = X_test.reshape(-1, time_steps, len(features))

# Define the 1D-CNN model
def create_1d_cnn_model(input_shape, num_classes):
    model = Sequential()
    model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=input_shape))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Conv1D(filters=128, kernel_size=3, activation='relu'))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Conv1D(filters=256, kernel_size=3, activation='relu'))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Flatten())
    model.add(Dense(100, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(num_classes, activation='softmax'))
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

# Create the model
input_shape = (X_train_resampled.shape[1], X_train_resampled.shape[2])
num_classes = 2  # Since we now have binary classification (Pass or Fail)
model = create_1d_cnn_model(input_shape, num_classes)

# Early stopping
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

# Train the model
history = model.fit(X_train_resampled, y_train_resampled, epochs=100, batch_size=32, validation_split=0.2, callbacks=[early_stopping])

# Evaluate the model
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f'Test Accuracy: {test_accuracy}')
print(f'Test Loss: {test_loss}')

# Save the model
model.save('1d_cnn_model.h5')

# Plot training & validation accuracy values
plt.figure(figsize=(12, 6))

plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model Accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')

# Plot training & validation loss values
plt.subplot(1, 2, 2)
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model Loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')

plt.show()

import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
from tensorflow.keras.models import load_model

# Load the trained model
model = load_model('1d_cnn_model.h5')

# Make predictions on the test data
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)

# Calculate and print the accuracy
accuracy = accuracy_score(y_test, y_pred_classes)
print(f'Test Accuracy: {accuracy:.5f}')

# Generate and print the classification report
report = classification_report(y_test, y_pred_classes, target_names=['Fail', 'Pass'])
print('Classification Report:')
print(report)

# Generate and plot the confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred_classes)
plt.figure(figsize=(7, 5))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=['Fail', 'Pass'], yticklabels=['Fail', 'Pass'])
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix')
plt.show()


# Evaluate the model
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f'Test Accuracy: {test_accuracy}')
print(f'Test Loss: {test_loss}')