In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import os
from PIL import Image
import random
from skimage import measure

import cv2
from shapely.geometry import Polygon


# Read images from dir

In [None]:
authentic_images_dir = "/kaggle/input/recodai-luc-scientific-image-forgery-detection/train_images/authentic"
forged_images_dir = "/kaggle/input/recodai-luc-scientific-image-forgery-detection/train_images/forged"
forged_images_mask_dir = "/kaggle/input/recodai-luc-scientific-image-forgery-detection/train_masks"

In [None]:
authentic_images_files = sorted(os.listdir(authentic_images_dir))
forged_images_files = sorted(os.listdir(forged_images_dir))
forged_images_mask_files = sorted(os.listdir(forged_images_mask_dir))

In [None]:
print(f"Length of Authentic files: {len(authentic_images_files)}")
print(f"Length of Forged files: {len(forged_images_files)}")
print(f"Length of Forged mask files: {len(forged_images_mask_files)}")

In [None]:
def plot_sample(n=1, i=None):
    if i is None:
        i = random.randint(0, len(authentic_images_files) - n)

    mask_path = os.path.join(forged_images_mask_dir, forged_images_mask_files[i])
    authentic_path = os.path.join(authentic_images_dir, f"{forged_images_mask_files[i].split('.')[0]}.png")
    forged_path = os.path.join(forged_images_dir, f"{forged_images_mask_files[i].split('.')[0]}.png")

    # Read images
    authentic_img = np.array(Image.open(authentic_path))
    forged_img = np.array(Image.open(forged_path))
    mask_img = np.load(mask_path)

    print(authentic_img.shape, forged_img.shape, mask_img.shape)

    # assume mask_img[0] is the binary mask
    mask = mask_img[0] if mask_img.ndim > 2 else mask_img
    mask = (mask > 0.5).astype(np.uint8)

    # find contours (boundaries) of forged areas
    contours = measure.find_contours(mask, level=0.5)

    # Plot side-by-side
    plt.figure(figsize=(12, 4))

    plt.subplot(1, 3, 1)
    plt.imshow(authentic_img)
    plt.title("Authentic Image")
    plt.axis('off')

    plt.subplot(1, 3, 2)
    plt.imshow(forged_img)
    plt.title("Forged Image (Highlighted)")
    plt.axis('off')

    # draw contours over forged image
    for contour in contours:
        poly = Polygon(contour[:, ::-1]) 
        buffer_distance = 7  # pixels or coordinate units
        buffered_poly = poly.buffer(buffer_distance)
        
        # Convert back to array for plotting
        buffered_contour = np.array(buffered_poly.exterior.coords)
        # plt.plot(contour[:, 1], contour[:, 0], color='red', linewidth=1)
        plt.plot(buffered_contour[:, 0], buffered_contour[:, 1], color='red', linestyle='--')

    plt.subplot(1, 3, 3)
    plt.imshow(mask, cmap='gray')
    plt.title("Forged Mask")
    plt.axis('off')

    plt.tight_layout()
    plt.show()

In [None]:
plot_sample(i=11)
plot_sample(i=51)
plot_sample(i=101)
plot_sample(i=501)
plot_sample(i=1001)
plot_sample(i=1502)
plot_sample(i=2001)

# First Impression

1. The forged images from training dataset are mostly following Copy & Move technique to create a forgerd images.
2. The copied part / entity from the image rotated at random angles.
3. Majority of the images are from microscopic slides.
4. Multiple entities are copied & moved in the same forged image.
5. Mistake in corn forged images, When multiple entites are copied & moved but only one entity alone is masked.


## Simply we can put like
In the image, if some enitiy is repeated then its forged 

---