In [None]:
import os
import cv2
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, Input
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [None]:
# Set the path to your dataset
dataset_path = 'drive/MyDrive/Vanderbilt/SSDA/ML/Image classification'

In [None]:
# Categories for classification
categories = ['recto', 'verso', 'double']

In [None]:
# Function to load and preprocess the images
def load_images(dataset_path, categories, image_size=(150, 150)):
    data = []
    labels = []

    for category in categories:
        path = os.path.join(dataset_path, category)
        for img_name in os.listdir(path):
            img_path = os.path.join(path, img_name)
            try:
                # Load image, convert to grayscale, resize, and normalize
                img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
                img = cv2.resize(img, image_size)
                img = img / 255.0
                data.append(img)
                labels.append(category)
            except Exception as e:
                print(f"Error loading image {img_name}: {e}")

    return np.array(data), np.array(labels)

In [None]:
# Load and preprocess the data
X, y = load_images(dataset_path, categories)
X = np.expand_dims(X, axis=-1)  # add channel dimension (for grayscale images)

In [None]:
# Encode labels as integers
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)
y_encoded = to_categorical(y_encoded, num_classes=len(categories))

In [None]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

In [None]:
# Define the CNN model using Input()
model = Sequential()

# Input layer
model.add(Input(shape=(150, 150, 1)))  # 150x150 image with 1 channel for grayscale

# Add convolutional layers
model.add(Conv2D(32, (3, 3), activation='relu')) # captures basic features like edges and corners, ReLU introduces non-linearity
model.add(MaxPooling2D(pool_size=(2, 2))) # downsamples feature maps to increase computational efficiency and prevent overfitting

model.add(Conv2D(64, (3, 3), activation='relu')) # increases number of filters to allow model to learn more detailed features
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

# Flatten and fully connected layers
model.add(Flatten()) # converts 2D feature maps to 1D vectors to connect convolutional layers to fully connected layers
model.add(Dense(128, activation='relu')) # combines features learned in convolutional layers to make predictions
model.add(Dropout(0.5)) # randomly "drops" 50% of the neurons in the fully connected layer to prevent overfitting
model.add(Dense(len(categories), activation='softmax')) # predicts the class with highest probability

In [None]:
# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) # Adaptive Moment Estimation and categorical cross-entropy

In [None]:
# Data augmentation to enhance training
datagen = ImageDataGenerator(
    rotation_range=20,
    zoom_range=0.15,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.15,
    horizontal_flip=False,  # Disable horizontal flip to avoid label ambiguity
    fill_mode="nearest"
)

In [None]:
# Train the model
batch_size = 32
epochs = 15

history = model.fit(X_train, y_train, batch_size=batch_size,
                    validation_data=(X_test, y_test),
                    steps_per_epoch=len(X_train) // batch_size,
                    epochs=epochs)

# Save the model
model.save('folio_classifier_model.keras')

Epoch 1/15
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 2s/step - accuracy: 0.3875 - loss: 1.4722 - val_accuracy: 0.7179 - val_loss: 1.0454
Epoch 2/15
[1m1/4[0m [32m━━━━━[0m[37m━━━━━━━━━━━━━━━[0m [1m5s[0m 2s/step - accuracy: 0.3571 - loss: 1.1241

  self.gen.throw(typ, value, traceback)


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 448ms/step - accuracy: 0.3571 - loss: 1.1241 - val_accuracy: 0.5128 - val_loss: 0.9916
Epoch 3/15
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 1s/step - accuracy: 0.4552 - loss: 1.0137 - val_accuracy: 0.5128 - val_loss: 0.8968
Epoch 4/15
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 142ms/step - accuracy: 0.4643 - loss: 0.9226 - val_accuracy: 0.5128 - val_loss: 0.8513
Epoch 5/15
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 2s/step - accuracy: 0.5917 - loss: 0.8176 - val_accuracy: 0.9231 - val_loss: 0.5612
Epoch 6/15
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 152ms/step - accuracy: 0.8214 - loss: 0.6573 - val_accuracy: 1.0000 - val_loss: 0.4476
Epoch 7/15
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 1s/step - accuracy: 0.9083 - loss: 0.4733 - val_accuracy: 1.0000 - val_los

In [None]:
# Evaluate the model
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f"Test accuracy: {test_accuracy * 100:.2f}%")

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 77ms/step - accuracy: 1.0000 - loss: 1.0334e-04
Test accuracy: 100.00%


In [None]:
import cv2
import numpy as np
from tensorflow.keras.preprocessing.image import img_to_array

# Define a function to preprocess the input image and predict the class label
def predict_folio_class(image_path, model):
    # Define the image size your model was trained on (150x150 in this case)
    image_size = (150, 150)

    # Load the image from the provided path, convert to grayscale
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)

    # Resize the image to match the input size of the model
    image = cv2.resize(image, image_size)

    # Convert the image to a numpy array and normalize pixel values
    image = img_to_array(image) / 255.0

    # Expand dimensions to fit the model input shape (1, 150, 150, 1)
    image = np.expand_dims(image, axis=0)

    # Use the model to predict the class label probabilities
    predictions = model.predict(image)

    # Get the index of the highest probability
    predicted_class_index = np.argmax(predictions, axis=1)[0]

    # Define the class labels (these should match your training labels)
    class_labels = ['double', 'recto', 'verso']

    # Map the predicted index to the corresponding class label
    predicted_class_label = class_labels[predicted_class_index]

    return predicted_class_label

In [None]:
predict_folio_class("drive/MyDrive/Vanderbilt/SSDA/ML/Image classification/recto/239746-0029.jpg", model)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 104ms/step


'recto'