In [1]:
# Step 1: Import Libraries
import os
import glob
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from sklearn.model_selection import train_test_split

# Check TensorFlow version
print("TensorFlow version:", tf.__version__)

# Step 2: Define Paths and Gather Filepaths
# Assuming you have unzipped the SOCOFing dataset and it resides in a folder named "SOCOFing/Real"
data_dir = 'SOCOFing/Real'  # Change this if your dataset folder is located elsewhere
file_paths = glob.glob(os.path.join(data_dir, '*.bmp'))
print(f"Found {len(file_paths)} images.")

# Step 3: Simulate Blood Group Labels
def simulate_blood_group(file_name):
    """
    This function extracts the finger type from the filename and returns a simulated blood group.
    Example filename: "101__M_Left_index_finger.bmp"
    """
    base_name = os.path.basename(file_name)
    parts = base_name.split('_')

    # Extract the finger information; here we assume the finger name is in the fourth part.
    # Adjust the index if your file naming is different.
    if len(parts) >= 4:
        finger = parts[3].lower()  # e.g., "index", "thumb", "middle", or "ring"
    else:
        return 'Unknown'

    # Simulated mapping (for demonstration purposes only)
    if 'index' in finger:
        return 'A+'
    elif 'thumb' in finger:
        return 'B+'
    elif 'middle' in finger:
        return 'O+'
    elif 'ring' in finger:
        return 'AB+'
    else:
        return 'Unknown'

# Create a DataFrame with file paths and simulated blood group labels
df = pd.DataFrame({'filepath': file_paths})
df['blood_group'] = df['filepath'].apply(simulate_blood_group)

# Remove any rows with 'Unknown' labels (if any)
df = df[df['blood_group'] != 'Unknown']
print("Sample data:")
print(df.head())

# Step 4: Create Image Data Generators for Training and Validation
img_height, img_width = 224, 224
batch_size = 32

# Data augmentation and rescaling; using validation_split to reserve 20% for validation.
datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.1,
    zoom_range=0.1,
    horizontal_flip=True,
    validation_split=0.2
)

train_generator = datagen.flow_from_dataframe(
    dataframe=df,
    x_col="filepath",
    y_col="blood_group",
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode="categorical",
    subset='training',
    shuffle=True
)

validation_generator = datagen.flow_from_dataframe(
    dataframe=df,
    x_col="filepath",
    y_col="blood_group",
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode="categorical",
    subset='validation',
    shuffle=True
)

# Step 5: Build a CNN Model
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(img_height, img_width, 3)),
    MaxPooling2D(pool_size=(2, 2)),

    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),

    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),

    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(len(train_generator.class_indices), activation='softmax')
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

# Step 6: Train the Model
epochs = 10  # Adjust the number of epochs as needed
history = model.fit(
    train_generator,
    epochs=epochs,
    validation_data=validation_generator
)

# Step 7: Visualize Training History
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs_range = range(epochs)

plt.figure(figsize=(14, 5))

plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()


TensorFlow version: 2.18.0
Found 0 images.
Sample data:
Empty DataFrame
Columns: [filepath, blood_group]
Index: []


KeyError: 'blood_group'