In [None]:
import os
import numpy as np
import cv2
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score

# Dataset parameters
dataset_path = "data"
image_size = (120, 160)  # As per the paper
image_extensions = ["bmp"]  # Update this if other extensions exist

# Function to load images from the LG folder (following the exact structure)
def load_images_from_folder(folder_path):
    images = []
    labels = []
    names = []
    corruptedFiles = []
    image_extensions = ["bmp"]

    # Traverse through the group -> subject -> LG -> session -> anotherfolder -> images
    for group_folder in os.listdir(folder_path):
        group_path = os.path.join(folder_path, group_folder)
        if os.path.isdir(group_path):  # If it is a group folder
            for subject_folder in os.listdir(group_path):
                subject_path = os.path.join(group_path, subject_folder)
                if os.path.isdir(subject_path):  # If it is a subject folder
                    lg_folder = os.path.join(subject_path, "LG")  # Looking specifically for the 'LG' folder
                    if os.path.isdir(lg_folder):  # Only process if 'LG' folder exists
                        # Iterate over sessions inside 'LG' folder
                        for session_folder in os.listdir(lg_folder):
                            session_path = os.path.join(lg_folder, session_folder)
                            if os.path.isdir(session_path):  # Check each session
                                # Iterate over each subfolder inside the session
                                for subfolder in os.listdir(session_path):
                                    subfolder_path = os.path.join(session_path, subfolder)
                                    if os.path.isdir(subfolder_path):  # If it's a subfolder containing images
                                        # Now we go through all the image files
                                        for file in os.listdir(subfolder_path):
                                            if any(file.lower().endswith(ext) for ext in image_extensions):
                                                img_path = os.path.join(subfolder_path, file)
                                                try:
                                                    # Read image in grayscale and resize
                                                    img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
                                                    if img is not None:
                                                        img = cv2.resize(img, image_size)
                                                        images.append(img)
                                                        labels.append(file[6] != '0')  # Assuming the label is based on the file name (0 or 1)
                                                        names.append(file)
                                                except Exception as e:
                                                    corruptedFiles.append((img_path, str(e)))

    print(f"Following {len(corruptedFiles)} files are corrupt or encountered error: \n {corruptedFiles}")
    
    # Return the images, labels, and names if any images were found
    if images:
        return np.array(images), np.array(labels), np.array(names)
    else:
        print("No images were loaded.")
        return np.array([]), np.array([]), np.array([])  # Return empty arrays if no images are found

# Load dataset (only from the LG folder)
images, labels, names = load_images_from_folder(dataset_path)
print(f"Read {len(images)} images from the LG folder with shape {images.shape}")
# Flatten images to vectors
images = images.reshape(images.shape[0], -1)  # Shape (N, 19200)

# Split dataset into train (60%) and test (40%)
X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.4, random_state=42)

Following 0 files are corrupt or encountered error: 
 []
Read 2820 images from the LG folder with shape (2820, 160, 120)


In [4]:
from sklearn.preprocessing import StandardScaler

# Step 2: Split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.2, random_state=42)

# Step 3: Scale the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [7]:
import tensorflow as tf


model = tf.keras.Sequential([
    tf.keras.layers.InputLayer(input_shape=(X_train.shape[1],)),
    tf.keras.layers.Dense(1, activation='linear', kernel_regularizer=tf.keras.regularizers.l2(0.01))  # linear activation
])

# Step 5: Compile the model
model.compile(optimizer='adam', loss='hinge', metrics=['accuracy']) 

In [17]:
model.fit(X_train, y_train, epochs=25, batch_size=32, validation_data=(X_test, y_test))

Epoch 1/25
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 79ms/step - accuracy: 0.8555 - loss: 0.3529 - val_accuracy: 0.7394 - val_loss: 0.6974
Epoch 2/25
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 77ms/step - accuracy: 0.8785 - loss: 0.2729 - val_accuracy: 0.7429 - val_loss: 0.6926
Epoch 3/25
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 73ms/step - accuracy: 0.8883 - loss: 0.2758 - val_accuracy: 0.7713 - val_loss: 0.6856
Epoch 4/25
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 81ms/step - accuracy: 0.9605 - loss: 0.1442 - val_accuracy: 0.6826 - val_loss: 0.9481
Epoch 5/25
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 76ms/step - accuracy: 0.8752 - loss: 0.3047 - val_accuracy: 0.7340 - val_loss: 0.8513
Epoch 6/25
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 75ms/step - accuracy: 0.9474 - loss: 0.1752 - val_accuracy: 0.7057 - val_loss: 0.9650
Epoch 7/25
[1m71/71[0m [32m━━━━

<keras.src.callbacks.history.History at 0x2cefdf610>

In [18]:
test_loss, test_acc = model.evaluate(X_test, y_test)
print(f"Test accuracy: {test_acc}")

[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - accuracy: 0.7517 - loss: 0.7975
Test accuracy: 0.7482269406318665


In [14]:
predictions = model.predict(X_test)
predicted_classes = np.argmax(predictions, axis=1)
true_classes = np.argmax(y_test, axis=1)

report = classification_report(true_classes, predicted_classes, target_names=['Fit for Duty', 'Alcoholic'])
print(report)

[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step


AxisError: axis 1 is out of bounds for array of dimension 1

In [None]:
# Perform 5-fold cross-validation to tune hyperparameters
svm = SVC(kernel='rbf')
param_grid = {
    'C': [0.1, 1, 10, 100],
    'gamma': ['scale', 'auto', 0.001, 0.01, 0.1],
}
grid_search = GridSearchCV(svm, param_grid, cv=5, scoring='accuracy', n_jobs=-1)
grid_search.fit(X_train, y_train)

# Best parameters and model
print("Best parameters from CV:", grid_search.best_params_)
best_svm = grid_search.best_estimator_

# Train the best model on the full training set
best_svm.fit(X_train, y_train)

# Evaluate on the test set
y_pred = best_svm.predict(X_test)
print("Accuracy on Test Set:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))
