In [1]:
import os
import cv2
import numpy as np
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

In [2]:
# Path to the dataset
dataset_path = './dataset-doctor-bills/'
forged_dir = os.path.join(dataset_path, 'forged')
genuine_dir = os.path.join(dataset_path, 'genuine')

In [3]:
# Hyperparameters
window_size = (128, 128)  # Sliding window size (input size for the model)
step_size = 64  # Step size for the sliding window
batch_size = 32
epochs = 10
learning_rate = 0.0001

In [4]:
# Function to apply sliding window without resizing the full image
def sliding_window(image, stepSize, windowSize):
    for y in range(0, image.shape[0] - windowSize[1] + 1, stepSize):
        for x in range(0, image.shape[1] - windowSize[0] + 1, stepSize):
            yield (x, y, image[y:y + windowSize[1], x:x + windowSize[0]])


In [5]:
# Preprocess the dataset (load and extract patches)
def load_and_extract_patches(directory):
    images = []
    labels = []
    
    for label, folder in enumerate(['forged', 'genuine']):
        folder_path = os.path.join(directory, folder)
        for img_file in os.listdir(folder_path):
            img_path = os.path.join(folder_path, img_file)
            image = cv2.imread(img_path)
            if image is not None:
                # Directly apply sliding window on the original high-resolution image
                for (x, y, window) in sliding_window(image, stepSize=step_size, windowSize=window_size):
                    if window.shape[0] == window_size[1] and window.shape[1] == window_size[0]:
                        images.append(window)
                        labels.append(label)  # 0 for forged, 1 for genuine
    return np.array(images), np.array(labels)

In [6]:
# Load dataset
X, y = load_and_extract_patches(dataset_path)

In [7]:
import os
import cv2
import numpy as np
from tensorflow.keras.utils import Sequence

# Custom generator to load images and extract patches on the fly
class PatchDataGenerator(Sequence):
    def __init__(self, image_paths, labels, batch_size, window_size=(128, 128), step_size=64):
        self.image_paths = image_paths
        self.labels = labels
        self.batch_size = batch_size
        self.window_size = window_size
        self.step_size = step_size
        self.indexes = np.arange(len(self.image_paths))
        
    def __len__(self):
        # Total number of batches per epoch
        return int(np.floor(len(self.image_paths) / self.batch_size))
    
    def __getitem__(self, index):
        # Get batch indexes
        batch_indexes = self.indexes[index * self.batch_size:(index + 1) * self.batch_size]
        
        # Load and preprocess a batch of images
        images, labels = self.__data_generation(batch_indexes)
        
        return np.array(images), np.array(labels)
    
    def __data_generation(self, batch_indexes):
        images = []
        labels = []
        
        for i in batch_indexes:
            img_path = self.image_paths[i]
            image = cv2.imread(img_path)
            label = self.labels[i]  # 0 for forged, 1 for genuine
            if image is not None:
                # Apply sliding window
                for (x, y, window) in sliding_window(image, stepSize=self.step_size, windowSize=self.window_size):
                    if window.shape[0] == self.window_size[1] and window.shape[1] == self.window_size[0]:
                        window = window / 255.0  # Normalize
                        images.append(window)
                        labels.append(label)
        return images, labels


In [9]:
# Define dataset path
dataset_path = 'dataset-doctor-bills'  # Adjust as necessary

# Get image paths and labels
image_paths = []
labels = []

for label, folder in enumerate(['forged', 'genuine']):
    folder_path = os.path.join(dataset_path, folder)
    for img_file in os.listdir(folder_path):
        img_path = os.path.join(folder_path, img_file)
        image_paths.append(img_path)
        labels.append(label)  # 0 for forged, 1 for genuine

# Define batch size
batch_size = 16

# Split the dataset into training and validation sets
train_image_paths, val_image_paths, train_labels, val_labels = train_test_split(
    image_paths, labels, test_size=0.2, random_state=42  # 20% validation data
)

# Create training generator
train_generator = PatchDataGenerator(train_image_paths, train_labels, batch_size=batch_size)

# Create validation generator
val_generator = PatchDataGenerator(val_image_paths, val_labels, batch_size=batch_size)

# Define the ResNet50 model
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(128, 128, 3))
x = base_model.output
x = Flatten()(x)
x = Dense(128, activation='relu')(x)
predictions = Dense(1, activation='sigmoid')(x)

model = Model(inputs=base_model.input, outputs=predictions)

# Freeze the ResNet50 layers (optional)
for layer in base_model.layers:
    layer.trainable = False

# Compile the model
learning_rate = 0.001  # Ensure this is defined
model.compile(optimizer=Adam(learning_rate=learning_rate), loss='binary_crossentropy', metrics=['accuracy'])

# Train the model using the generators
model.fit(
    train_generator,
    epochs=10,
    steps_per_epoch=len(train_generator),
    validation_data=val_generator,
    validation_steps=len(val_generator)
)

NameError: name 'train_test_split' is not defined