In [1]:
import os
import cv2
import numpy as np

base_dir = r"G:\fraud_document_ai\data\processed\roi"
img_size = 128

X = []
y = []

for label, class_id in [("genuine", 0), ("fraud", 1)]:
    folder = os.path.join(base_dir, label)
    for file in os.listdir(folder):
        img = cv2.imread(os.path.join(folder, file), cv2.IMREAD_GRAYSCALE)
        img = cv2.resize(img, (img_size, img_size))
        img = img / 255.0
        X.append(img)
        y.append(class_id)

X = np.array(X).reshape(-1, img_size, img_size, 1)
y = np.array(y)

print("ROI Dataset shape:", X.shape)
print("Labels shape:", y.shape)

ROI Dataset shape: (70, 128, 128, 1)
Labels shape: (70,)


In [2]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

model = Sequential([
    Conv2D(32, (3,3), activation='relu', input_shape=(128,128,1)),
    MaxPooling2D(2,2),

    Conv2D(64, (3,3), activation='relu'),
    MaxPooling2D(2,2),

    Flatten(),
    Dense(64, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])

model.compile(
    optimizer=Adam(learning_rate=0.001),
    loss='binary_crossentropy',
    metrics=['accuracy']
)

model.summary()

history = model.fit(
    X_train, y_train,
    epochs=12,
    batch_size=8,
    validation_data=(X_test, y_test)
)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/12
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 130ms/step - accuracy: 0.5510 - loss: 1.1166 - val_accuracy: 0.7619 - val_loss: 0.5519
Epoch 2/12
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 86ms/step - accuracy: 0.6122 - loss: 0.7387 - val_accuracy: 0.7619 - val_loss: 0.6009
Epoch 3/12
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 79ms/step - accuracy: 0.6122 - loss: 0.6574 - val_accuracy: 0.7619 - val_loss: 0.5713
Epoch 4/12
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 79ms/step - accuracy: 0.6735 - loss: 0.7093 - val_accuracy: 0.7619 - val_loss: 0.5607
Epoch 5/12
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 73ms/step - accuracy: 0.6327 - loss: 0.6638 - val_accuracy: 0.7619 - val_loss: 0.5745
Epoch 6/12
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 78ms/step - accuracy: 0.6939 - loss: 0.6272 - val_accuracy: 0.7619 - val_loss: 0.5442
Epoch 7/12
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━

In [3]:
loss, accuracy = model.evaluate(X_test, y_test)
print("ROI CNN Test Accuracy:", accuracy)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 96ms/step - accuracy: 0.3810 - loss: 0.6558
ROI CNN Test Accuracy: 0.380952388048172


In [4]:
import pandas as pd

preds = model.predict(X_test).flatten()

df = pd.DataFrame({
    "True_Label": y_test,
    "Fraud_Score": preds
})

print(df)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 189ms/step
    True_Label  Fraud_Score
0            0     0.547043
1            0     0.597466
2            0     0.075917
3            0     0.577033
4            1     0.576555
5            0     0.547043
6            0     0.547043
7            0     0.532060
8            0     0.056586
9            0     0.551532
10           0     0.540482
11           0     0.547043
12           1     0.603266
13           0     0.551532
14           1     0.551532
15           0     0.574218
16           1     0.547043
17           0     0.577478
18           1     0.554526
19           0     0.096505
20           0     0.591526
