In [1]:
import cv2
import os
import numpy as np

import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers, models, optimizers
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.resnet50 import preprocess_input
from sklearn.model_selection import train_test_split






In [2]:
import os
from PIL import Image
import numpy as np

# Paths
forged_path = 'C:/Users/Saahil/Documents/Projects/HackrX/dataset-doctor-bills/forged'
genuine_path = 'C:/Users/Saahil/Documents/Projects/HackrX/dataset-doctor-bills/genuine'

# Initialize lists to store data and labels
images = []
labels = []

# Load forged images
for root, dirs, files in os.walk(forged_path):
    for file in files:
        img = Image.open(os.path.join(root, file)).convert('RGB')
        img = np.array(img)  # Convert to array
        images.append(img)
        labels.append(1)  # Label for forged

# Load genuine images
for root, dirs, files in os.walk(genuine_path):
    for file in files:
        img = Image.open(os.path.join(root, file)).convert('RGB')
        img = np.array(img)
        images.append(img)
        labels.append(0)  # Label for genuine


In [3]:
from keras.preprocessing.image import ImageDataGenerator

datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

In [4]:
images = np.array(images)
labels = np.array(labels)

In [4]:
# Split data into train/test (10% test)
X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.3, random_state=42)

# Data Augmentation on training data only
datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Fit augmentation only on training data
datagen.fit(X_train)

In [7]:
print(f"Number of training samples (X_train): {len(X_train)}")
print(f"Number of test samples (X_test): {len(X_test)}")
print(f"Number of training labels (y_train): {len(y_train)}")
print(f"Number of test labels (y_test): {len(y_test)}")

Number of training samples (X_train): 74
Number of test samples (X_test): 32
Number of training labels (y_train): 74
Number of test labels (y_test): 32


In [6]:
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.regularizers import l2
from tensorflow.keras.metrics import Precision, Recall
from sklearn.metrics import accuracy_score, precision_score, recall_score

base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# Adding custom layers on top of ResNet50
x = base_model.output
x = Flatten()(x)  # Flatten the output layer
x = Dense(512, activation='relu', kernel_regularizer=l2(0.01))(x)  # Fully connected layer with L2 regularization
x = BatchNormalization()(x)  # Batch normalization
x = Dropout(0.5)(x)  # Dropout for regularization

# Output layer for binary classification
predictions = Dense(1, activation='sigmoid')(x)

# Combine the base ResNet50 model with the new layers
model = Model(inputs=base_model.input, outputs=predictions)

# Freeze the base ResNet50 layers to prevent updating their weights
for layer in base_model.layers:
    layer.trainable = False

# Compile the model with Adam optimizer and necessary metrics
model.compile(optimizer=Adam(learning_rate=1e-4), 
              loss='binary_crossentropy', 
              metrics=['accuracy', Precision(), Recall()])

# Callbacks for early stopping and learning rate reduction
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True, verbose=1)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=3, verbose=1)







In [7]:
history = model.fit(
    datagen.flow(X_train, y_train, batch_size=32),
    epochs=50,
    validation_data=(X_test, y_test),
    callbacks=[early_stopping, reduce_lr]
)




Epoch 1/50




: 

In [None]:
# Evaluate the model on the test data
test_loss, test_accuracy, test_precision, test_recall = model.evaluate(X_test, y_test, verbose=1)

print(f"Test Loss: {test_loss:.4f}")
print(f"Test Accuracy: {test_accuracy:.4f}")
print(f"Test Precision: {test_precision:.4f}")
print(f"Test Recall: {test_recall:.4f}")

# Predict on the test set for detailed metrics
y_pred = (model.predict(X_test) > 0.5).astype("int32")

# Calculate metrics using sklearn for better insights
train_accuracy = accuracy_score(y_train, (model.predict(X_train) > 0.5).astype("int32"))
test_accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)

print(f"\nTraining Accuracy: {train_accuracy:.4f}")
print(f"Testing Accuracy: {test_accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")