In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import numpy as np
import pandas as pd
import os
from pathlib import Path
import json
import cv2
from tqdm import tqdm
from scipy.fftpack import dct
from scipy.ndimage import median_filter, gaussian_filter
from scipy import stats
from skimage.measure import label, regionprops
from skimage.feature import local_binary_pattern, hog, graycomatrix, graycoprops
from skimage.filters import sobel
import matplotlib.pyplot as plt
from concurrent.futures import ThreadPoolExecutor, as_completed
import warnings
warnings.filterwarnings('ignore')

import xgboost as xgb
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.utils.class_weight import compute_sample_weight
import joblib
import torch

print(f"GPU Available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")

print("="*80)
print("ACCURACY-OPTIMIZED FORGERY DETECTION WITH ENSEMBLE")
print("="*80)

# ============================================================================
# CONFIGURATION
# ============================================================================

class Config:
    BASE_PATH = Path('/kaggle/input/recodai-luc-scientific-image-forgery-detection')
    TRAIN_IMAGES_DIR = BASE_PATH / 'train_images'
    TRAIN_MASKS_DIR = BASE_PATH / 'train_masks'
    TEST_IMAGES_DIR = BASE_PATH / 'test_images'
    SAMPLE_SUB_PATH = BASE_PATH / 'sample_submission.csv'
    
    # Feature extraction
    PATCH_SIZE = 64
    PATCHES_PER_IMAGE_AUTHENTIC = 25
    PATCHES_PER_IMAGE_FORGED = 80
    
    # Training data
    MAX_TRAIN_IMAGES = 800
    MAX_TRAIN_SAMPLES = 60000
    
    # Training
    USE_GPU = True
    USE_ENSEMBLE = True
    USE_AUGMENTATION = True
    USE_CLASS_WEIGHTS = True  # Instead of SMOTE
    
    # XGBoost params
    XGBOOST_PARAMS = {
        'n_estimators': 400,
        'max_depth': 10,
        'learning_rate': 0.05,
        'subsample': 0.8,
        'colsample_bytree': 0.8,
        'min_child_weight': 3,
        'gamma': 0.1,
        'reg_alpha': 0.5,
        'reg_lambda': 2.0,
    }
    
    # Random Forest params
    RF_PARAMS = {
        'n_estimators': 250,
        'max_depth': 18,
        'min_samples_split': 5,
        'min_samples_leaf': 2,
        'n_jobs': -1,
        'class_weight': 'balanced'
    }
    
    # Prediction
    STRIDE = 28
    FORGERY_THRESHOLD = 0.45
    MIN_REGION_AREA = 120
    
    # Parallel processing
    NUM_WORKERS = 6
    
    VISUALIZE_SAMPLES = True
    MAX_VIZ_SAMPLES = 2

# ============================================================================
# DATA DISCOVERY
# ============================================================================

def discover_data(config):
    print("\n" + "="*80)
    print("DATA DISCOVERY")
    print("="*80)
    
    authentic_dir = config.TRAIN_IMAGES_DIR / 'authentic'
    forged_dir = config.TRAIN_IMAGES_DIR / 'forged'
    
    authentic_images = sorted(list(authentic_dir.glob('*.png')))[:config.MAX_TRAIN_IMAGES] if authentic_dir.exists() else []
    forged_images = sorted(list(forged_dir.glob('*.png')))[:config.MAX_TRAIN_IMAGES] if forged_dir.exists() else []
    
    print(f"üìÅ Training - Authentic: {len(authentic_images)}, Forged: {len(forged_images)}")
    
    mask_files = {}
    if config.TRAIN_MASKS_DIR.exists():
        for mask_path in config.TRAIN_MASKS_DIR.glob('*.npy'):
            mask_files[mask_path.stem] = mask_path
    
    print(f"üìÅ Masks: {len(mask_files)}")
    
    test_images = sorted(list(config.TEST_IMAGES_DIR.glob('*.png')))
    print(f"üìÅ Test: {len(test_images)}")
    print("="*80)
    
    return authentic_images, forged_images, mask_files, test_images

# ============================================================================
# COMPREHENSIVE FEATURE EXTRACTION
# ============================================================================

def extract_comprehensive_features(patch):
    """Extract comprehensive features"""
    
    if patch.shape[0] < 8 or patch.shape[1] < 8:
        return None
    
    features = []
    
    if patch.ndim == 3:
        gray = cv2.cvtColor(patch, cv2.COLOR_RGB2GRAY)
    else:
        gray = patch
    
    gray = gray.astype(np.float32)
    
    # 1. Color features
    if patch.ndim == 3:
        for channel in range(3):
            ch = patch[:, :, channel].astype(np.float32)
            features.extend([
                np.mean(ch), np.std(ch), np.median(ch),
                np.percentile(ch, 25), np.percentile(ch, 75),
                np.min(ch), np.max(ch), np.var(ch)
            ])
        
        r, g, b = patch[:, :, 0], patch[:, :, 1], patch[:, :, 2]
        features.extend([
            np.mean(r / (g + 1)), np.mean(g / (b + 1)), np.mean(b / (r + 1)),
            np.std(r - g), np.std(g - b), np.std(b - r)
        ])
    
    # 2. Grayscale statistics
    features.extend([
        np.mean(gray), np.std(gray), np.median(gray),
        np.percentile(gray, 10), np.percentile(gray, 25),
        np.percentile(gray, 75), np.percentile(gray, 90),
        np.min(gray), np.max(gray), np.var(gray), np.ptp(gray)
    ])
    
    # 3. DCT features
    dct_result = dct(dct(gray.T, norm='ortho').T, norm='ortho')
    dct_feat = dct_result[:8, :8].flatten()
    features.extend(dct_feat)
    
    h, w = gray.shape
    center_h, center_w = h // 2, w // 2
    low_freq = np.sum(np.abs(dct_result[center_h-4:center_h+4, center_w-4:center_w+4]))
    mid_freq = np.sum(np.abs(dct_result[center_h-8:center_h+8, center_w-8:center_w+8])) - low_freq
    high_freq = np.sum(np.abs(dct_result)) - mid_freq - low_freq
    total = low_freq + mid_freq + high_freq + 1e-10
    features.extend([low_freq/total, mid_freq/total, high_freq/total])
    
    # 4. Gradient features
    grad_x = cv2.Sobel(gray, cv2.CV_32F, 1, 0, ksize=3)
    grad_y = cv2.Sobel(gray, cv2.CV_32F, 0, 1, ksize=3)
    grad_mag = np.sqrt(grad_x**2 + grad_y**2)
    grad_dir = np.arctan2(grad_y, grad_x)
    
    features.extend([
        np.mean(np.abs(grad_x)), np.std(np.abs(grad_x)), np.max(np.abs(grad_x)),
        np.mean(np.abs(grad_y)), np.std(np.abs(grad_y)), np.max(np.abs(grad_y)),
        np.mean(grad_mag), np.std(grad_mag), np.max(grad_mag),
        np.percentile(grad_mag, 90), np.std(grad_dir)
    ])
    
    # 5. Edge features
    edges = cv2.Canny(gray.astype(np.uint8), 50, 150)
    edge_density = np.sum(edges > 0) / edges.size
    
    h, w = edges.shape
    h_third, w_third = max(1, h // 3), max(1, w // 3)
    edge_regions = []
    for i in range(3):
        for j in range(3):
            region = edges[i*h_third:(i+1)*h_third, j*w_third:(j+1)*w_third]
            if region.size > 0:
                edge_regions.append(np.sum(region > 0) / region.size)
    
    features.append(edge_density)
    features.extend(edge_regions)
    
    # 6. LBP
    try:
        lbp = local_binary_pattern(gray, 8, 1, method='uniform')
        lbp_hist, _ = np.histogram(lbp.ravel(), bins=10, range=(0, 10), density=True)
        features.extend(lbp_hist)
    except:
        features.extend([0] * 10)
    
    # 7. GLCM
    try:
        gray_uint = gray.astype(np.uint8)
        glcm = graycomatrix(gray_uint, [1], [0, np.pi/4, np.pi/2, 3*np.pi/4], 
                           levels=256, symmetric=True, normed=True)
        
        for prop in ['contrast', 'dissimilarity', 'homogeneity', 'energy', 'correlation']:
            features.extend(graycoprops(glcm, prop).ravel())
    except:
        features.extend([0] * 20)
    
    # 8. Noise analysis
    denoised = median_filter(gray, size=3)
    noise = gray - denoised
    
    features.extend([
        np.std(noise), np.mean(np.abs(noise)),
        np.percentile(np.abs(noise), 90),
        np.percentile(np.abs(noise), 95),
        np.max(np.abs(noise))
    ])
    
    gaussian_denoised = gaussian_filter(gray, sigma=1.0)
    high_freq_noise = gray - gaussian_denoised
    features.extend([np.std(high_freq_noise), np.mean(np.abs(high_freq_noise))])
    
    # 9. Texture variance
    h, w = gray.shape
    h_half, w_half = h // 2, w // 2
    
    if h_half > 0 and w_half > 0:
        subregions = [
            gray[:h_half, :w_half], gray[:h_half, w_half:],
            gray[h_half:, :w_half], gray[h_half:, w_half:]
        ]
        for region in subregions:
            if region.size > 0:
                features.extend([np.mean(region), np.std(region), np.var(region)])
    
    # 10. HOG
    if gray.shape[0] >= 16 and gray.shape[1] >= 16:
        try:
            hog_features = hog(gray, orientations=9, pixels_per_cell=(8, 8),
                              cells_per_block=(2, 2), visualize=False, feature_vector=True)
            features.extend(hog_features[:32])
        except:
            features.extend([0] * 32)
    
    # 11. Sobel edges
    try:
        sobel_edges = sobel(gray)
        features.extend([
            np.mean(sobel_edges), np.std(sobel_edges),
            np.max(sobel_edges), np.percentile(sobel_edges, 90)
        ])
    except:
        features.extend([0] * 4)
    
    # 12. Statistical moments
    features.extend([stats.skew(gray.ravel()), stats.kurtosis(gray.ravel())])
    
    return np.array(features)

# ============================================================================
# DATA AUGMENTATION
# ============================================================================

def augment_patch(patch):
    """Apply random augmentation"""
    augmented = [patch]
    augmented.append(cv2.flip(patch, 1))
    augmented.append(cv2.flip(patch, 0))
    augmented.append(cv2.rotate(patch, cv2.ROTATE_90_CLOCKWISE))
    return augmented

# ============================================================================
# TRAINING DATA GENERATION
# ============================================================================

def process_image_patches_advanced(img_path, mask_path, config, is_forged):
    """Process one image and extract patches"""
    patches_data = []
    
    try:
        img = cv2.imread(str(img_path))
        if img is None:
            return patches_data
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        
        h, w = img.shape[:2]
        if h < config.PATCH_SIZE or w < config.PATCH_SIZE:
            return patches_data
        
        if is_forged and mask_path:
            mask = np.load(str(mask_path))
            if mask.ndim > 2:
                mask = mask[:, :, 0] if mask.shape[2] == 1 else mask.max(axis=2)
            if mask.shape[:2] != (h, w):
                mask = cv2.resize(mask, (w, h), interpolation=cv2.INTER_NEAREST)
            mask = (mask > 0).astype(np.uint8)
            
            forged_coords = np.argwhere(mask > 0)
            
            if len(forged_coords) > 0:
                n_samples = min(config.PATCHES_PER_IMAGE_FORGED, len(forged_coords))
                indices = np.random.choice(len(forged_coords), n_samples, replace=False)
                
                for idx in indices:
                    coord = forged_coords[idx]
                    y_center, x_center = coord
                    y = max(0, min(y_center - config.PATCH_SIZE // 2, h - config.PATCH_SIZE))
                    x = max(0, min(x_center - config.PATCH_SIZE // 2, w - config.PATCH_SIZE))
                    
                    patch = img[y:y+config.PATCH_SIZE, x:x+config.PATCH_SIZE]
                    if patch.shape[0] == config.PATCH_SIZE and patch.shape[1] == config.PATCH_SIZE:
                        if config.USE_AUGMENTATION and np.random.rand() > 0.5:
                            augmented_patches = augment_patch(patch)
                            for aug_patch in augmented_patches:
                                features = extract_comprehensive_features(aug_patch)
                                if features is not None:
                                    patches_data.append((features, 1))
                        else:
                            features = extract_comprehensive_features(patch)
                            if features is not None:
                                patches_data.append((features, 1))
            
            for _ in range(config.PATCHES_PER_IMAGE_AUTHENTIC // 2):
                y = np.random.randint(0, h - config.PATCH_SIZE + 1)
                x = np.random.randint(0, w - config.PATCH_SIZE + 1)
                patch_mask = mask[y:y+config.PATCH_SIZE, x:x+config.PATCH_SIZE]
                
                if patch_mask.sum() / patch_mask.size < 0.05:
                    patch = img[y:y+config.PATCH_SIZE, x:x+config.PATCH_SIZE]
                    if patch.shape[0] == config.PATCH_SIZE and patch.shape[1] == config.PATCH_SIZE:
                        features = extract_comprehensive_features(patch)
                        if features is not None:
                            patches_data.append((features, 0))
        else:
            for _ in range(config.PATCHES_PER_IMAGE_AUTHENTIC):
                y = np.random.randint(0, h - config.PATCH_SIZE + 1)
                x = np.random.randint(0, w - config.PATCH_SIZE + 1)
                
                patch = img[y:y+config.PATCH_SIZE, x:x+config.PATCH_SIZE]
                if patch.shape[0] == config.PATCH_SIZE and patch.shape[1] == config.PATCH_SIZE:
                    features = extract_comprehensive_features(patch)
                    if features is not None:
                        patches_data.append((features, 0))
    
    except Exception as e:
        pass
    
    return patches_data

def generate_training_data_advanced(authentic_images, forged_images, mask_files, config):
    """Generate training data with parallel processing"""
    
    print("\n" + "="*80)
    print("GENERATING TRAINING DATA")
    print("="*80)
    
    all_patches = []
    
    print("\nüìä Processing authentic images...")
    with ThreadPoolExecutor(max_workers=config.NUM_WORKERS) as executor:
        futures = [executor.submit(process_image_patches_advanced, img_path, None, config, False) 
                   for img_path in authentic_images]
        
        for future in tqdm(as_completed(futures), total=len(futures), desc="Authentic"):
            all_patches.extend(future.result())
    
    print("\nüìä Processing forged images...")
    with ThreadPoolExecutor(max_workers=config.NUM_WORKERS) as executor:
        futures = []
        for img_path in forged_images:
            mask_path = mask_files.get(img_path.stem)
            futures.append(executor.submit(process_image_patches_advanced, img_path, mask_path, config, True))
        
        for future in tqdm(as_completed(futures), total=len(futures), desc="Forged"):
            all_patches.extend(future.result())
    
    if len(all_patches) == 0:
        return np.array([]), np.array([])
    
    X_train = np.array([p[0] for p in all_patches])
    y_train = np.array([p[1] for p in all_patches])
    
    # Balanced sampling
    if len(X_train) > config.MAX_TRAIN_SAMPLES:
        authentic_indices = np.where(y_train == 0)[0]
        forged_indices = np.where(y_train == 1)[0]
        
        n_per_class = config.MAX_TRAIN_SAMPLES // 2
        
        if len(authentic_indices) > n_per_class:
            authentic_indices = np.random.choice(authentic_indices, n_per_class, replace=False)
        if len(forged_indices) > n_per_class:
            forged_indices = np.random.choice(forged_indices, n_per_class, replace=False)
        
        indices = np.concatenate([authentic_indices, forged_indices])
        np.random.shuffle(indices)
        
        X_train = X_train[indices]
        y_train = y_train[indices]
    
    print(f"\n‚úì Training samples: {len(X_train)}")
    print(f"  - Authentic: {np.sum(y_train == 0)} ({100*np.sum(y_train == 0)/len(y_train):.1f}%)")
    print(f"  - Forged: {np.sum(y_train == 1)} ({100*np.sum(y_train == 1)/len(y_train):.1f}%)")
    print(f"  - Features: {X_train.shape[1]}")
    
    return X_train, y_train

# ============================================================================
# MODEL TRAINING
# ============================================================================

def train_ensemble_model(X_train, y_train, config):
    """Train ensemble of models"""
    
    print("\n" + "="*80)
    print("TRAINING ENSEMBLE")
    print("="*80)
    
    # Standardize
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    
    # Compute sample weights for class imbalance
    sample_weights = None
    if config.USE_CLASS_WEIGHTS:
        sample_weights = compute_sample_weight('balanced', y_train)
        print(f"\nüìä Using class weights for balance")
    
    models = []
    
    # XGBoost with GPU
    print("\nüöÄ Training XGBoost (GPU)...")
    xgb_params = {
        'tree_method': 'hist',
        'device': 'cuda' if config.USE_GPU else 'cpu',
        'objective': 'binary:logistic',
        'eval_metric': 'logloss',
        'random_state': 42,
        **config.XGBOOST_PARAMS
    }
    
    xgb_model = xgb.XGBClassifier(**xgb_params)
    if sample_weights is not None:
        xgb_model.fit(X_train, y_train, sample_weight=sample_weights, verbose=50)
    else:
        xgb_model.fit(X_train, y_train, verbose=50)
    models.append(('XGBoost', xgb_model))
    
    # Random Forest
    if config.USE_ENSEMBLE:
        print("\nüå≤ Training Random Forest...")
        rf_model = RandomForestClassifier(**config.RF_PARAMS, random_state=42)
        rf_model.fit(X_train, y_train)
        models.append(('RandomForest', rf_model))
    
    # Evaluate
    print("\nüìä Model Performance:")
    for name, model in models:
        score = model.score(X_train, y_train)
        print(f"   {name}: {score:.4f}")
    
    return models, scaler

# ============================================================================
# PREDICTION
# ============================================================================

def predict_image_ensemble(image, models, scaler, config):
    """Predict using ensemble"""
    
    h, w = image.shape[:2]
    
    if h < config.PATCH_SIZE or w < config.PATCH_SIZE:
        return np.zeros((h, w), dtype=np.uint8), 0.0
    
    prediction_maps = []
    
    for model_name, model in models:
        prediction_map = np.zeros((h, w), dtype=np.float32)
        count_map = np.zeros((h, w), dtype=np.int32)
        
        patches = []
        positions = []
        
        for y in range(0, h - config.PATCH_SIZE + 1, config.STRIDE):
            for x in range(0, w - config.PATCH_SIZE + 1, config.STRIDE):
                patch = image[y:y+config.PATCH_SIZE, x:x+config.PATCH_SIZE]
                
                if patch.shape[0] == config.PATCH_SIZE and patch.shape[1] == config.PATCH_SIZE:
                    features = extract_comprehensive_features(patch)
                    if features is not None:
                        patches.append(features)
                        positions.append((y, x))
        
        if len(patches) == 0:
            continue
        
        X = scaler.transform(np.array(patches))
        predictions = model.predict_proba(X)[:, 1]
        
        for (y, x), pred in zip(positions, predictions):
            prediction_map[y:y+config.PATCH_SIZE, x:x+config.PATCH_SIZE] += pred
            count_map[y:y+config.PATCH_SIZE, x:x+config.PATCH_SIZE] += 1
        
        mask = np.divide(prediction_map, count_map, where=count_map > 0)
        prediction_maps.append(mask)
    
    if len(prediction_maps) > 0:
        ensemble_mask = np.mean(prediction_maps, axis=0)
    else:
        return np.zeros((h, w), dtype=np.uint8), 0.0
    
    confidence = np.mean(ensemble_mask[ensemble_mask > 0]) if ensemble_mask.max() > 0 else 0
    
    binary_mask = (ensemble_mask > config.FORGERY_THRESHOLD).astype(np.uint8)
    
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (7, 7))
    binary_mask = cv2.morphologyEx(binary_mask, cv2.MORPH_CLOSE, kernel, iterations=3)
    binary_mask = cv2.morphologyEx(binary_mask, cv2.MORPH_OPEN, kernel, iterations=2)
    
    labeled = label(binary_mask)
    regions = regionprops(labeled)
    
    refined_mask = np.zeros_like(binary_mask)
    for region in regions:
        if region.area >= config.MIN_REGION_AREA:
            if region.perimeter > 0:
                compactness = 4 * np.pi * region.area / (region.perimeter ** 2)
                if compactness > 0.05:
                    coords = region.coords
                    refined_mask[coords[:, 0], coords[:, 1]] = 1
    
    return refined_mask, confidence

# ============================================================================
# VISUALIZATION
# ============================================================================

def visualize_detection(image, mask, case_id, confidence, save_path='detection.png'):
    fig, axes = plt.subplots(1, 3, figsize=(15, 5))
    
    axes[0].imshow(image)
    axes[0].set_title(f'Original ({case_id})')
    axes[0].axis('off')
    
    axes[1].imshow(mask, cmap='hot')
    axes[1].set_title('Mask')
    axes[1].axis('off')
    
    overlay = image.copy()
    if mask.max() > 0:
        mask_colored = np.zeros_like(image)
        mask_colored[:, :, 0] = mask * 255
        overlay = cv2.addWeighted(overlay, 0.7, mask_colored, 0.3, 0)
    
    axes[2].imshow(overlay)
    axes[2].set_title(f'Conf: {confidence:.3f}')
    axes[2].axis('off')
    
    plt.tight_layout()
    plt.savefig(save_path, dpi=100, bbox_inches='tight')
    plt.close()
    print(f"   üíæ Saved: {save_path}")

# ============================================================================
# RLE ENCODING
# ============================================================================

def rle_encode(mask):
    dots = np.where(mask.T.flatten() == 1)[0]
    run_lengths = []
    prev = -2
    for b in dots:
        if b > prev + 1:
            run_lengths.extend((b + 1, 0))
        run_lengths[-1] += 1
        prev = b
    return run_lengths

# ============================================================================
# MAIN
# ============================================================================

def main():
    config = Config()
    
    authentic_images, forged_images, mask_files, test_images = discover_data(config)
    
    X_train, y_train = generate_training_data_advanced(authentic_images, forged_images, mask_files, config)
    
    if len(X_train) == 0:
        print("\n‚ùå ERROR: No training data!")
        return None
    
    models, scaler = train_ensemble_model(X_train, y_train, config)
    
    joblib.dump(models, 'ensemble_models.pkl')
    joblib.dump(scaler, 'feature_scaler.pkl')
    print("\n‚úì Models saved")
    
    print("\n" + "="*80)
    print("PREDICTING")
    print("="*80)
    
    sample_sub = pd.read_csv(config.SAMPLE_SUB_PATH)
    results = []
    viz_count = 0
    detection_summary = {'authentic': 0, 'forgery': 0, 'confidences': []}
    
    for idx, row in tqdm(sample_sub.iterrows(), total=len(sample_sub), desc="üîç"):
        case_id = str(row['case_id'])
        
        test_img_path = None
        for img_path in test_images:
            if img_path.stem == case_id:
                test_img_path = img_path
                break
        
        if test_img_path and test_img_path.exists():
            try:
                img = cv2.imread(str(test_img_path))
                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
                
                print(f"\nüì∑ {case_id}: {img.shape[1]}x{img.shape[0]}px")
                
                mask, confidence = predict_image_ensemble(img, models, scaler, config)
                detection_summary['confidences'].append(confidence)
                
                is_forgery = confidence >= 0.45 and mask.sum() >= config.MIN_REGION_AREA
                
                if not is_forgery:
                    results.append({'case_id': int(case_id), 'annotation': 'authentic'})
                    detection_summary['authentic'] += 1
                    print(f"   ‚úì AUTHENTIC ({confidence:.3f})")
                else:
                    run_lengths = rle_encode(mask)
                    if len(run_lengths) > 0:
                        results.append({'case_id': int(case_id), 'annotation': json.dumps([int(x) for x in run_lengths])})
                        detection_summary['forgery'] += 1
                        print(f"   ‚ö†Ô∏è  FORGERY ({confidence:.3f}), area: {mask.sum()}px")
                    else:
                        results.append({'case_id': int(case_id), 'annotation': 'authentic'})
                        detection_summary['authentic'] += 1
                
                if config.VISUALIZE_SAMPLES and viz_count < config.MAX_VIZ_SAMPLES:
                    visualize_detection(img, mask, case_id, confidence, f'detection_{case_id}.png')
                    viz_count += 1
            
            except Exception as e:
                print(f"   ‚ùå Error: {e}")
                results.append({'case_id': int(case_id), 'annotation': 'authentic'})
                detection_summary['authentic'] += 1
        else:
            results.append({'case_id': int(case_id), 'annotation': 'authentic'})
            detection_summary['authentic'] += 1
    
    submission_df = pd.DataFrame(results)
    submission_df.to_csv('submission.csv', index=False)
    
    print("\n" + "="*80)
    print("COMPLETE")
    print("="*80)
    print(f"‚úì Total: {len(submission_df)}")
    print(f"‚úì Authentic: {detection_summary['authentic']}, Forgeries: {detection_summary['forgery']}")
    print(f"üíæ Saved: submission.csv")
    print("="*80)
    
    return submission_df

if __name__ == "__main__":
    submission = main()