# Scientific Image Forgery Detection In Biomedical (ForensicAI)

In [6]:
# Essential Libraries related to Data Analysis

import os # To check the existence of directories
import sys # Exit the program with error codes
import argparse # When running the script it will hanndle the data, output with flags and automatically generate help txt when running script
import warnings # Control Python warning message
import time # Measure how long image processing takes
from pathlib import Path # File path handling

import numpy as np # Numerical counting
import cv2 # OpenCV (Open Source Computer Vision Library)
import pandas as pd # Data manipulation and analysis
import matplotlib
matplotlib.use('Agg') # Configure matplotlib to work without a display
import matplotlib.pyplot as plt # Core plotting library (like MATLAB's plotting)
import matplotlib.patches as matches # Graphical shapes and patches
from matplotlib.gridspec import GridSpec # Advanced subplot layout
from matplotlib.ticker import MaxNLocator # Control axis tick marks

warnings.filterwarnings('ignore')

In [3]:
# Progress bars in the terminal
# Show real-time progress: [â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ  ] 80% 4000/5000
try:
    from tqdm import tqdm
    HAS_TQDM = True
except ImportError:
    HAS_TQDM = False
    print("[INFO] Install tqdm for progress bars: pip install tqdm")

# Statistical visualization built on matplotlib
try:
    import seaborn as sns
    HAS_SEABORN = True
except ImportError:
    HAS_SEABORN = False
    print("[INFO] Install seaborn for enhanced plots: pip install seaborn")

#### Global Configuaration

In [4]:
# Colour palette (dark forensics theme)
BG      = '#0a0e1a'   # page background
PANEL   = '#111827'   # panel background
GRID    = '#1e2d3d'   # gridlines
TEXT    = '#e2e8f0'   # primary text
MUTED   = '#64748b'   # secondary text
AUTH_C  = '#00d4aa'   # authentic class colour
FORG_C  = '#ff4d6d'   # forged class colour
ACC1    = '#f9c846'   # accent gold
ACC2    = '#7c83fd'   # accent lavender
CH_R    = '#ff6b6b'   # red channel
CH_G    = '#6bcb77'   # green channel
CH_B    = '#4d96ff'   # blue channel

In [15]:
# Image file extensions (A Python set (unordered collection) of valid image file extensions)
IMG_EXTS = {'.png', '.jpg', '.jpeg', '.tif', '.tiff', '.bmp'}

#### Section 1 - Dataset Discovery

In [16]:
# Found it! Now scan for images
print("="*70)
print("  SCANNING DATASET")
print("="*70 + "\n")

valid_ext = {'.png', '.jpg', '.jpeg', '.tif', '.tiff', '.bmp'}
image_list = []

# Scan authentic
auth_dir = os.path.join(DATASET_PATH, "authentic")
print("ðŸ“‚ Scanning authentic/...")
for root, dirs, files in os.walk(auth_dir):
    for f in files:
        if Path(f).suffix.lower() in valid_ext:
            image_list.append((os.path.join(root, f), 'authentic'))

n_auth = len([x for x in image_list if x[1] == 'authentic'])
print(f"   Found {n_auth} images")

# Scan forged
forg_dir = os.path.join(DATASET_PATH, "forged")
print("\nðŸ“‚ Scanning forged/...")
for root, dirs, files in os.walk(forg_dir):
    for f in files:
        if Path(f).suffix.lower() in valid_ext:
            image_list.append((os.path.join(root, f), 'forged'))

n_forg = len([x for x in image_list if x[1] == 'forged'])
print(f"   Found {n_forg} images")

# Summary
print("\n" + "="*70)
print("  âœ… DATASET READY")
print("="*70)
print(f"\n  Total: {len(image_list)} images")
print(f"    Authentic: {n_auth} ({n_auth/len(image_list)*100:.1f}%)")
print(f"    Forged:    {n_forg} ({n_forg/len(image_list)*100:.1f}%)")

# Show samples
print(f"\n  Sample authentic files:")
for path, label in [x for x in image_list if x[1]=='authentic'][:3]:
    print(f"    â€¢ {os.path.basename(path)}")

print(f"\n  Sample forged files:")
for path, label in [x for x in image_list if x[1]=='forged'][:3]:
    print(f"    â€¢ {os.path.basename(path)}")

print("\n" + "="*70)
print(f"\nâœ¨ Variable 'image_list' is ready with {len(image_list)} images!")

  SCANNING DATASET

ðŸ“‚ Scanning authentic/...
   Found 2377 images

ðŸ“‚ Scanning forged/...
   Found 2751 images

  âœ… DATASET READY

  Total: 5128 images
    Authentic: 2377 (46.4%)
    Forged:    2751 (53.6%)

  Sample authentic files:
    â€¢ 10.png
    â€¢ 10015.png
    â€¢ 10017.png

  Sample forged files:
    â€¢ 10.png
    â€¢ 10015.png
    â€¢ 10017.png


âœ¨ Variable 'image_list' is ready with 5128 images!
