In [1]:
!pip install pillow scikit-image tqdm








In [1]:
import os
import numpy as np
from PIL import Image, ImageChops, ImageEnhance, ImageOps
from skimage.feature import local_binary_pattern
from tqdm import tqdm

IMG_SIZE = (256, 256)

def preprocess_image(image_path):
    img = Image.open(image_path).convert('L')
    img = img.resize(IMG_SIZE)
    img = ImageOps.equalize(img)
    return img

def compute_ela_score(image_path, quality=90):
    original = Image.open(image_path).convert('RGB').resize(IMG_SIZE)
    temp_path = image_path + '.ela.jpg'
    original.save(temp_path, 'JPEG', quality=quality)
    compressed = Image.open(temp_path)
    ela_image = ImageChops.difference(original, compressed)
    extrema = ela_image.getextrema()
    max_diff = max([ex[1] for ex in extrema])
    if max_diff == 0:
        max_diff = 1
    scale = 255.0 / max_diff
    ela_image = ImageEnhance.Brightness(ela_image).enhance(scale)
    ela_array = np.array(ela_image.convert('L'))  # Use grayscale for ELA stats
    os.remove(temp_path)
    return np.mean(ela_array), np.max(ela_array)

def compute_lbp_score(image_path, P=8, R=1):
    image = preprocess_image(image_path)
    image_np = np.array(image)
    lbp = local_binary_pattern(image_np, P, R, method='uniform')
    (hist, _) = np.histogram(lbp.ravel(), bins=np.arange(0, P + 3), range=(0, P + 2))
    hist = hist.astype("float")
    hist /= (hist.sum() + 1e-6)
    return hist.mean(), hist.std()

def compute_pixel_stats(image_path):
    image = preprocess_image(image_path)
    image_np = np.array(image)
    return np.mean(image_np), np.std(image_np)

def collect_features(folder):
    features = []
    for fname in tqdm(os.listdir(folder)):
        if fname.lower().endswith(('.jpg', '.jpeg', '.png', '.bmp', '.tiff', '.webp')):
            path = os.path.join(folder, fname)
            ela_mean, ela_max = compute_ela_score(path)
            lbp_mean, lbp_std = compute_lbp_score(path)
            pix_mean, pix_std = compute_pixel_stats(path)
            features.append({
                'filename': fname,
                'ela_mean': ela_mean,
                'ela_max': ela_max,
                'lbp_mean': lbp_mean,
                'lbp_std': lbp_std,
                'pixel_mean': pix_mean,
                'pixel_std': pix_std
            })
    return features

def get_features_for_split(base_dir):
    real_dir = os.path.join(base_dir, 'Real')
    fake_dir = os.path.join(base_dir, 'Fake')
    real_features = collect_features(real_dir)
    for f in real_features:
        f['label'] = 'Real'
    fake_features = collect_features(fake_dir)
    for f in fake_features:
        f['label'] = 'Fake'
    return real_features + fake_features

def find_best_threshold(features, key):
    values = [f[key] for f in features]
    labels = [f['label'] for f in features]
    best_acc = 0
    best_thresh = None
    for thresh in np.linspace(min(values), max(values), 100):
        preds = ['Fake' if v > thresh else 'Real' for v in values]
        acc = np.mean([p == l for p, l in zip(preds, labels)])
        if acc > best_acc:
            best_acc = acc
            best_thresh = thresh
    return best_thresh, best_acc

In [11]:
# ...existing code...

if __name__ == "__main__":
    train_base = r"E:\sem4\proj\NamedDataset\images\Train"
    test_base = r"E:\sem4\proj\NamedDataset\images\Test"
    doc_types = ['IndianCurrency', 'SlovakIDCard']  # Add more if needed

    feature_keys = ['ela_mean', 'ela_max', 'lbp_mean', 'lbp_std', 'pixel_mean', 'pixel_std']
    results = []
    for doc_type in doc_types:
        print(f"\n=== Document Type: {doc_type} ===")
        train_dir = os.path.join(train_base, doc_type)
        test_dir = os.path.join(test_base, doc_type)

        print("Extracting features from train set...")
        train_features = get_features_for_split(train_dir)
        print("Extracting features from test set...")
        test_features = get_features_for_split(test_dir)

        for key in feature_keys:
            print(f"\nFinding threshold for {key.upper()}...")
            thresh, acc = find_best_threshold(train_features, key)
            print(f"Best threshold: {thresh:.4f}, Train accuracy: {acc:.4f}")

            # Evaluate on test set
            test_values = [f[key] for f in test_features]
            test_labels = [f['label'] for f in test_features]
            test_preds = ['Fake' if v > thresh else 'Real' for v in test_values]
            test_acc = np.mean([p == l for p, l in zip(test_preds, test_labels)])
            print(f"Test accuracy: {test_acc:.4f}")
            # Store results
            results.append({
                'document_type': doc_type,
                'feature_key': key,
                'threshold': thresh,
                'train_accuracy': acc,
                'test_accuracy': test_acc
            })
  # Convert to DataFrame and display
import pandas as pd 
df_results = pd.DataFrame(results)
# print("\nSummary DataFrame:")
    # print(df_results)


=== Document Type: IndianCurrency ===
Extracting features from train set...


FileNotFoundError: [WinError 3] The system cannot find the path specified: 'E:\\sem4\\proj\\NamedDataset\\images\\Train\\IndianCurrency\\Real'

In [3]:
df_results

Unnamed: 0,document_type,feature_key,threshold,train_accuracy,test_accuracy
0,IndianCurrency,ela_mean,35.749466,0.625,0.60221
1,IndianCurrency,ela_max,255.0,0.625,0.60221
2,IndianCurrency,lbp_mean,0.1,0.625,0.60221
3,IndianCurrency,lbp_std,0.039862,0.75,0.734807
4,IndianCurrency,pixel_mean,127.421957,0.6725,0.60221
5,IndianCurrency,pixel_std,74.686326,0.64,0.574586
6,SlovakIDCard,ela_mean,15.163803,0.5,0.49
7,SlovakIDCard,ela_max,126.0,0.5,0.505
8,SlovakIDCard,lbp_mean,0.1,0.505,0.525
9,SlovakIDCard,lbp_std,0.035266,0.5,0.5


In [6]:
import os
import numpy as np
from PIL import Image, ImageChops, ImageEnhance
from skimage.feature import local_binary_pattern
from tqdm import tqdm

def evaluate_images(image_dir, feature_key=None, threshold=None):
    print(f"Evaluating images in: {image_dir}")
    results = []
    for fname in tqdm(os.listdir(image_dir)):
        if fname.lower().endswith(('.jpg', '.jpeg', '.png', '.bmp', '.tiff', '.webp')):
            path = os.path.join(image_dir, fname)
            ela_mean, ela_max = compute_ela_score(path)
            lbp_mean, lbp_std = compute_lbp_score(path)
            pix_mean, pix_std = compute_pixel_stats(path)
            result = {
                'filename': fname,
                'ela_mean': ela_mean,
                'ela_max': ela_max,
                'lbp_mean': lbp_mean,
                'lbp_std': lbp_std,
                'pixel_mean': pix_mean,
                'pixel_std': pix_std
            }
            # Predict if threshold and feature_key are provided
            if feature_key and threshold is not None:
                value = result[feature_key]
                prediction = 'Fake' if value > threshold else 'Real'
                result['prediction'] = prediction
                print(f"{fname}: {feature_key}={value:.4f} -> Prediction: {prediction}")
            else:
                print(f"{fname}: ELA_MEAN={ela_mean:.2f}, ELA_MAX={ela_max:.2f}, "
                      f"LBP_MEAN={lbp_mean:.4f}, LBP_STD={lbp_std:.4f}, "
                      f"PIXEL_MEAN={pix_mean:.2f}, PIXEL_STD={pix_std:.2f}")
            results.append(result)
    return results
# Example usage:
# Replace this path with your folder containing images
image_folder = r"E:\sem4\proj\Dataset\images\Eval"
evaluate_images(image_folder, feature_key='lbp_std', threshold=0.0399)

Evaluating images in: E:\sem4\proj\Dataset\images\Eval


 88%|████████▊ | 7/8 [00:00<00:00, 18.31it/s]

Fake_IndianCurrency_00123.png: lbp_std=0.0865 -> Prediction: Fake
Fake_IndianCurrency_007.jpg: lbp_std=0.0317 -> Prediction: Real
Fake_SlovakID_100.png: lbp_std=0.0336 -> Prediction: Real
Fake_SlovakID_101.png: lbp_std=0.0351 -> Prediction: Real
Fake_SpanishID_070.jpg: lbp_std=0.0423 -> Prediction: Fake
Real_IndianCurrency_079.jpg: lbp_std=0.0360 -> Prediction: Real
Real_SlovakID_100.png: lbp_std=0.0345 -> Prediction: Real


100%|██████████| 8/8 [00:00<00:00, 14.98it/s]

Real_SpanishID_020.jpg: lbp_std=0.0431 -> Prediction: Fake





[{'filename': 'Fake_IndianCurrency_00123.png',
  'ela_mean': np.float64(21.278564453125),
  'ela_max': np.uint8(218),
  'lbp_mean': np.float64(0.09999999999847413),
  'lbp_std': np.float64(0.08651942826014546),
  'pixel_mean': np.float64(153.82676696777344),
  'pixel_std': np.float64(83.32816915729389),
  'prediction': 'Fake'},
 {'filename': 'Fake_IndianCurrency_007.jpg',
  'ela_mean': np.float64(17.25677490234375),
  'ela_max': np.uint8(99),
  'lbp_mean': np.float64(0.09999999999847414),
  'lbp_std': np.float64(0.03165887450529336),
  'pixel_mean': np.float64(127.04475402832031),
  'pixel_std': np.float64(73.69703761286412),
  'prediction': 'Real'},
 {'filename': 'Fake_SlovakID_100.png',
  'ela_mean': np.float64(12.759109497070312),
  'ela_max': np.uint8(100),
  'lbp_mean': np.float64(0.09999999999847413),
  'lbp_std': np.float64(0.0335922310170125),
  'pixel_mean': np.float64(125.82546997070312),
  'pixel_std': np.float64(73.52961308016037),
  'prediction': 'Real'},
 {'filename': 'Fa