In [4]:
# Import necessary libraries
import numpy as np
import cv2
import scipy.io
from sklearn.model_selection import train_test_split
from sklearn.cluster import KMeans
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization, Activation
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import LearningRateScheduler
from sklearn.metrics import classification_report

# Load the dataset
data_path = 'umist_cropped.mat'  
data = scipy.io.loadmat(data_path)


images = []
labels = []
for i in range(data['facedat'].shape[1]):
    for j in range(data['facedat'][0, i].shape[2]):
        images.append(data['facedat'][0, i][:, :, j])
        labels.append(i)

# Convert images and labels to numpy arrays
images = np.array(images)
labels = np.array(labels)

# Preprocessing functions
def convert_to_grayscale(image):
    if len(image.shape) == 2 or (len(image.shape) > 2 and image.shape[2] == 1):
        return image  # Image is already grayscale
    else:
        return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

def normalize_image(image):
    return image / 255.0

# Preprocess images
preprocessed_images = []
for image in images:
    gray = convert_to_grayscale(image)
    norm = normalize_image(gray)
    preprocessed_images.append(norm)

preprocessed_images = np.array(preprocessed_images)

# Apply K-means clustering
n_clusters = 5  
flattened_images = preprocessed_images.reshape(preprocessed_images.shape[0], -1)
kmeans = KMeans(n_clusters=n_clusters, random_state=42)
clusters = kmeans.fit_predict(flattened_images)


# Split data into training, validation, and test sets
X_train, X_temp, y_train, y_temp = train_test_split(preprocessed_images, labels, test_size=0.3, random_state=42, stratify=labels)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42, stratify=y_temp)

# Reshape images for CNN input
image_height, image_width = preprocessed_images.shape[1], preprocessed_images.shape[2]
X_train_cnn = X_train.reshape(X_train.shape[0], image_height, image_width, 1)
X_val_cnn = X_val.reshape(X_val.shape[0], image_height, image_width, 1)
X_test_cnn = X_test.reshape(X_test.shape[0], image_height, image_width, 1)

# Convert labels to categorical
y_train_cat = to_categorical(y_train)
y_val_cat = to_categorical(y_val)
y_test_cat = to_categorical(y_test)

# Data Augmentation
datagen = ImageDataGenerator(
    rotation_range=30,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest')

# Learning Rate Scheduler
def lr_scheduler(epoch):
    return 0.001 * np.exp(-epoch / 10)

# Define the CNN model architecture
model = Sequential()
model.add(Conv2D(32, (3, 3), input_shape=(image_height, image_width, 1)))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(64, (3, 3)))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(len(np.unique(labels)), activation='softmax'))

# Compile the model
optimizer = Adam(learning_rate=0.001)
model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])

# Learning Rate Scheduler Callback
lr_schedule = LearningRateScheduler(lr_scheduler)

# Train the model with data augmentation
history = model.fit(datagen.flow(X_train_cnn, y_train_cat, batch_size=32), 
          validation_data=(X_val_cnn, y_val_cat), 
          steps_per_epoch=len(X_train_cnn) / 32, epochs=30, verbose=1, callbacks=[lr_schedule])

# Evaluate the model on the test set
test_loss, test_accuracy = model.evaluate(X_test_cnn, y_test_cat)
print("Test accuracy:", test_accuracy)

# Generate classification report
y_pred = model.predict(X_test_cnn)
y_pred_classes = np.argmax(y_pred, axis=1)

classification_rep = classification_report(np.argmax(y_test_cat, axis=1), y_pred_classes)
print(classification_rep)


  super()._check_params_vs_input(X, default_n_init=10)


Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Test accuracy: 0.12643678486347198
              precision    recall  f1-score   support

           0       1.00      0.67      0.80         6
           1       0.00      0.00      0.00         5
           2       0.00      0.00      0.00         4
           3       0.00      0.00      0.00         3
           4       0.00      0.00      0.00         4
           5       0.00      0.00      0.00         3
           6       0.00      0.00      0.00         3
           7       0.00      0.00      0.00         4
           8       0.00      0.00      0.00         3
           9       0.00      0.00      0.00         5
          10       

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
