In [None]:
!nvidia-smi

In [None]:
import os
import warnings
import logging

# Note: The Python logging code is kept, but the primary suppression relies on the shell command above.

# 1. Suppress the Python logging framework (ensures Keras logs are also silenced)
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" 
logging.getLogger('tensorflow').setLevel(logging.FATAL) 
warnings.filterwarnings("ignore", category=UserWarning)

In [None]:
# --- CONFIGURATION (FINAL AGGRESSIVE LOG SUPPRESSION) ---

COMPETITION_SLUG = "recodai-luc-scientific-image-forgery-detection"
TRAIN_ROOT = f"/kaggle/input/{COMPETITION_SLUG}/train_images/forged"
MASK_ROOT = f"/kaggle/input/{COMPETITION_SLUG}/train_masks"

IMAGE_SIZE = 256
BATCH_SIZE = 16
N_FOLDS = 1 
# ---------------------

import os
import sys
import warnings
import logging
import numpy as np
import pandas as pd
from glob import glob
from tqdm.notebook import tqdm
import gc

# 1. CRITICAL: Redirect stderr to suppress C++ logs (E0000, I0000, cuDNN, etc.)
# This is the most reliable method when os.environ fails.
class SuppressTFLogs:
    def __enter__(self):
        # Save original stderr
        self.original_stderr = sys.stderr
        # Redirect stderr to /dev/null
        sys.stderr = open(os.devnull, 'w')

    def __exit__(self, exc_type, exc_val, exc_tb):
        # Restore original stderr
        sys.stderr = self.original_stderr

# 2. Set environment variables to level 3 (FATAL)
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" 
logging.getLogger('tensorflow').setLevel(logging.FATAL) 
warnings.filterwarnings("ignore", category=UserWarning) 

# --- Import TensorFlow within the suppressed context ---
with SuppressTFLogs():
    # Only import heavy libraries inside the suppressed block
    import tensorflow as tf
    from tensorflow.keras import layers, models, backend as K
    from tensorflow.keras.callbacks import Callback, EarlyStopping, ReduceLROnPlateau
    import cv2
    
tf.random.set_seed(42)

In [None]:
# --- CONFIGURATION (FINAL CODE POST-FIX) ---

COMPETITION_SLUG = "recodai-luc-scientific-image-forgery-detection"

# The path structure that should finally work after resolving external kernel issues:
TRAIN_ROOT = f"/kaggle/input/{COMPETITION_SLUG}/train_images/forged"
MASK_ROOT = f"/kaggle/input/{COMPETITION_SLUG}/train_masks"

IMAGE_SIZE = 256
BATCH_SIZE = 16
N_FOLDS = 1 
# ---------------------

import numpy as np
import pandas as pd
import os
import warnings
import logging
import tensorflow as tf
from tensorflow.keras import layers, models, backend as K
from tensorflow.keras.callbacks import Callback, EarlyStopping, ReduceLROnPlateau
import cv2
from glob import glob
from tqdm.notebook import tqdm
import gc

tf.random.set_seed(42)

# --- 1. Utility Functions, Metrics, and Callbacks ---

def rle_encode(mask):
    """Encodes a binary mask using Run Length Encoding (Defined for completeness)."""
    pixels = mask.flatten()
    pixels[0] = 0
    pixels[-1] = 0
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 2
    runs[1::2] = runs[1::2] - runs[:-1:2]
    return ' '.join(str(x) for x in runs)

def dice_coef(y_true, y_pred, smooth=1e-7):
    y_true_f = K.flatten(y_true)
    y_pred_f = K.flatten(y_pred)
    intersection = K.sum(y_true_f * y_pred_f)
    return (2. * intersection + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth)

def dice_loss(y_true, y_pred):
    return 1 - dice_coef(y_true, y_pred)

class EpochStatReporter(Callback):
    """Callback to report total skipped (corrupt) samples after each epoch."""

    def __init__(self, generator):
        super().__init__()
        self.generator = generator

    def on_epoch_end(self, epoch, logs=None):
        skipped = self.generator.skipped_count
        log_message = f"Epoch {epoch + 1} finished: "
        for k, v in logs.items():
            log_message += f"{k}: {v:.4f} "
        log_message += f"| TOTAL SAMPLES SKIPPED: {skipped}"
        
        print("\n" + "="*80)
        print(log_message)
        print("="*80 + "\n")

# --- 2. Forgery Feature Extraction (Optimized) ---

def get_forgery_features(image_id):
    """Generates a Noise Residual feature map (Stream 2 input)."""
    img_path = os.path.join(TRAIN_ROOT, f'{image_id}.png') 
    img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE) 
    
    if img is None:
        return np.zeros((IMAGE_SIZE, IMAGE_SIZE, 3), dtype=np.float32)

    blur = cv2.GaussianBlur(img, (5, 5), 0)
    residual = img.astype(np.float32) - blur.astype(np.float32)
    
    residual = cv2.resize(residual, (IMAGE_SIZE, IMAGE_SIZE), interpolation=cv2.INTER_LINEAR)
    residual = (residual - residual.min()) / (residual.max() - residual.min() + 1e-7)
    
    return np.stack([residual]*3, axis=-1).astype(np.float32)

# --- 3. Custom Dual-Stream Data Generator (WITH SKIPPED COUNT) ---

class DualStreamDataGenerator(tf.keras.utils.Sequence):
    
    def __init__(self, df, batch_size=16, shuffle=True, **kwargs):
        super().__init__(**kwargs) 
        self.df = df
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.on_epoch_end()
        self.skipped_count = 0 

    def on_epoch_end(self):
        self.indexes = np.arange(len(self.df))
        if self.shuffle:
            np.random.shuffle(self.indexes)
        self.skipped_count = 0 

    def __len__(self):
        return int(np.floor(len(self.df) / self.batch_size))

    def __getitem__(self, index):
        indexes = self.indexes[index * self.batch_size:(index + 1) * self.batch_size]
        batch_df = self.df.iloc[indexes]
        
        temp_X1, temp_X2, temp_Y = [], [], []

        for row in batch_df.itertuples():
            image_id = row.id
            img_path = os.path.join(TRAIN_ROOT, f'{image_id}.png') 
            mask_path = os.path.join(MASK_ROOT, f'{image_id}.npy') 
            
            # --- Load Image and Features ---
            try:
                # RGB Image
                img = cv2.imread(img_path)
                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
                X1_sample = cv2.resize(img, (IMAGE_SIZE, IMAGE_SIZE), interpolation=cv2.INTER_LINEAR) / 255.0
                
                # Feature Map
                X2_sample = get_forgery_features(image_id)
                
            except Exception as e:
                self.skipped_count += 1
                continue 

            # --- Load and Validate Mask (CRITICAL FIXES) ---
            try:
                mask = np.load(mask_path)
                
                # Squeeze and validate dimensions
                mask = np.squeeze(mask)
                if mask.ndim < 2 or 0 in mask.shape:
                    self.skipped_count += 1
                    continue 

                # Resize the valid mask
                mask_resized = cv2.resize(mask, (IMAGE_SIZE, IMAGE_SIZE), interpolation=cv2.INTER_NEAREST)
                
                # Reshape to the exact required target shape (256, 256, 1)
                Y_sample = mask_resized.reshape(IMAGE_SIZE, IMAGE_SIZE, 1).astype(np.float32)
            
            except Exception as e:
                # Handles mask corruption
                self.skipped_count += 1
                continue 
            
            # Add valid samples to temporary lists
            temp_X1.append(X1_sample)
            temp_X2.append(X2_sample)
            temp_Y.append(Y_sample)
            
        # --- Final Batch Construction (Handling Skips) ---
        
        if not temp_X1:
             # If the entire batch was corrupt, return a placeholder batch (TUPLE format)
             placeholder_x1 = np.zeros((self.batch_size, IMAGE_SIZE, IMAGE_SIZE, 3), dtype=np.float32)
             placeholder_x2 = np.zeros((self.batch_size, IMAGE_SIZE, IMAGE_SIZE, 3), dtype=np.float32)
             placeholder_y = np.zeros((self.batch_size, IMAGE_SIZE, IMAGE_SIZE, 1), dtype=np.float32)
             
             return (placeholder_x1, placeholder_x2), placeholder_y

        # Pad the batch with the last valid sample if any were skipped
        while len(temp_X1) < self.batch_size:
            temp_X1.append(temp_X1[-1])
            temp_X2.append(temp_X2[-1])
            temp_Y.append(temp_Y[-1])
            
        # CRITICAL FIX: Return a TUPLE for the inputs (X1, X2)
        return (np.array(temp_X1), np.array(temp_X2)), np.array(temp_Y)

# --- 4. Dual-Stream U-Net Model ---
def build_dual_stream_unet(input_shape):
    input_rgb = layers.Input(shape=input_shape, name='rgb_input')
    conv_rgb = models.Sequential([
        layers.Conv2D(32, 3, activation='relu', padding='same'), layers.MaxPooling2D(),
        layers.Conv2D(64, 3, activation='relu', padding='same')], name='rgb_stream')(input_rgb)
    input_feat = layers.Input(shape=input_shape, name='feature_input')
    conv_feat = models.Sequential([
        layers.Conv2D(32, 3, activation='relu', padding='same'), layers.MaxPooling2D(),
        layers.Conv2D(64, 3, activation='relu', padding='same')], name='feature_stream')(input_feat)
    merged = layers.concatenate([conv_rgb, conv_feat])
    up1 = layers.UpSampling2D(size=(2, 2))(merged)
    conv_final = layers.Conv2D(128, 3, activation='relu', padding='same')(up1)
    output = layers.Conv2D(1, 1, activation='sigmoid', padding='same')(conv_final)
    model = models.Model(inputs=[input_rgb, input_feat], outputs=output)
    return model

# --- 5. Training Loop (Execution) ---

all_files = glob(os.path.join(TRAIN_ROOT, '*.png'))
df = pd.DataFrame([os.path.basename(f).replace('.png', '') for f in all_files], columns=['id'])

# --- Final Checks ---
if len(df) == 0:
    raise RuntimeError(f"FATAL: The path '{TRAIN_ROOT}' is empty. Kernel environment failed to load data.")

if len(df) < BATCH_SIZE:
    raise RuntimeError(f"FATAL: Data size ({len(df)}) is less than BATCH_SIZE ({BATCH_SIZE}). Cannot train.")

# Initialize Generator and Model
train_gen = DualStreamDataGenerator(df, batch_size=BATCH_SIZE) 
model = build_dual_stream_unet((IMAGE_SIZE, IMAGE_SIZE, 3))

model.compile(optimizer='adam', loss=dice_loss, metrics=[dice_coef, 'accuracy'])

# Implement the stability callbacks
reduce_lr = ReduceLROnPlateau(
    monitor='loss', 
    factor=0.5, 
    patience=3, 
    min_lr=1e-6, 
    verbose=1
)

early_stop = EarlyStopping(
    monitor='loss', 
    patience=5, 
    restore_best_weights=True, 
    verbose=1
)

# Instantiate the custom callback
stat_reporter = EpochStatReporter(train_gen)
callbacks = [stat_reporter, reduce_lr, early_stop]

print(f"Generator will produce {len(train_gen)} batches per epoch.")

model.fit(train_gen, epochs=100, verbose=0, callbacks=callbacks) 

# Save weights to the temporary directory for Part 2 inference
model.save_weights('/tmp/model_new_scratch.weights.h5')

# Final cleanup
del model; del train_gen; gc.collect()

In [None]:
!ls /tmp/ 

In [None]:
# --- CONFIGURATION ---
IMAGE_SIZE = 256
THRESHOLD = 0.45 # Use the optimal threshold determined from Part 1 validation
OUTPUT_FILENAME = "submission.csv"

# PATHS:
COMPETITION_SLUG = "recodai-luc-scientific-image-forgery-detection"
TEST_ROOT = f"/kaggle/input/{COMPETITION_SLUG}/test_images" 
SAMPLE_SUBMISSION_FILE = f"/kaggle/input/{COMPETITION_SLUG}/sample_submission.csv"
# Model path must include the correct Keras 3 extension:
model_path = "/tmp/model_new_scratch.weights.h5" 

# ----------------------

import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers, models, backend as K
import os
import cv2
import gc
from tqdm.notebook import tqdm

tf.get_logger().setLevel('ERROR')

# --- 1. Utility Functions and Model Definition (CRITICAL: Must match Part 1) ---

def rle_encode(mask):
    """Encodes a binary mask using Run Length Encoding (REQUIRED FOR SUBMISSION)."""
    pixels = mask.flatten()
    pixels[0] = 0
    pixels[-1] = 0
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 2
    runs[1::2] = runs[1::2] - runs[:-1:2]
    return ' '.join(str(x) for x in runs)

def get_forgery_features(image_id):
    """
    Generates the Noise Residual feature map (MUST match Part 1 exactly).
    The path is set for the test root.
    """
    img_path = os.path.join(TEST_ROOT, f'{image_id}.png')
    img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE) 
    
    if img is None:
        return np.zeros((IMAGE_SIZE, IMAGE_SIZE, 3), dtype=np.float32)

    blur = cv2.GaussianBlur(img, (5, 5), 0)
    residual = img.astype(np.float32) - blur.astype(np.float32)
    
    residual = cv2.resize(residual, (IMAGE_SIZE, IMAGE_SIZE), interpolation=cv2.INTER_LINEAR)
    residual = (residual - residual.min()) / (residual.max() - residual.min() + 1e-7)
    
    return np.stack([residual]*3, axis=-1).astype(np.float32)

def build_dual_stream_unet(input_shape):
    """Model architecture must be identical to Part 1."""
    input_rgb = layers.Input(shape=input_shape, name='rgb_input')
    conv_rgb = models.Sequential([
        layers.Conv2D(32, 3, activation='relu', padding='same'), layers.MaxPooling2D(),
        layers.Conv2D(64, 3, activation='relu', padding='same')], name='rgb_stream')(input_rgb)
    input_feat = layers.Input(shape=input_shape, name='feature_input')
    conv_feat = models.Sequential([
        layers.Conv2D(32, 3, activation='relu', padding='same'), layers.MaxPooling2D(),
        layers.Conv2D(64, 3, activation='relu', padding='same')], name='feature_stream')(input_feat)
    merged = layers.concatenate([conv_rgb, conv_feat])
    up1 = layers.UpSampling2D(size=(2, 2))(merged)
    conv_final = layers.Conv2D(128, 3, activation='relu', padding='same')(up1)
    output = layers.Conv2D(1, 1, activation='sigmoid', padding='same')(conv_final)
    model = models.Model(inputs=[input_rgb, input_feat], outputs=output)
    return model

# --- 2. Setup and Model Loading (KEY ERROR FIX APPLIED) ---

submission_df = pd.read_csv(SAMPLE_SUBMISSION_FILE)

# --- FIX: Identify the correct ID column ---
id_column = None
if 'id' in submission_df.columns:
    id_column = 'id'
elif 'Id' in submission_df.columns:
    id_column = 'Id'
elif 'ImageId' in submission_df.columns:
    id_column = 'ImageId'
else:
    # Fallback to the first column (least reliable but safer than crashing)
    id_column = submission_df.columns[0] 
    
if id_column is None:
    raise KeyError("FATAL: Could not find 'id', 'Id', or 'ImageId' column in sample_submission.csv")

test_image_ids = submission_df[id_column].unique()
# ----------------------------------------

try:
    model = build_dual_stream_unet((IMAGE_SIZE, IMAGE_SIZE, 3))
    # CRITICAL: Load the weights with the corrected extension
    model.load_weights(model_path)
except Exception as e:
    print(f"FATAL: Could not load model weights from {model_path}. Submission will fail.")

# --- 3. Prediction and RLE Encoding Loop (Optimized) ---
predictions = []
for image_id in tqdm(test_image_ids, desc="Generating Predictions"):
    
    img_path = os.path.join(TEST_ROOT, f'{image_id}.png')
    
    # 3.1 Prepare Inputs
    img = cv2.imread(img_path)
    
    if img is None:
        rle_string = 'authentic'
    else:
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        
        # Prepare dual stream inputs
        X1 = cv2.resize(img, (IMAGE_SIZE, IMAGE_SIZE)) / 255.0
        X2 = get_forgery_features(image_id) 

        X1_batch = np.expand_dims(X1, axis=0)
        X2_batch = np.expand_dims(X2, axis=0)
        
        # 3.2 Predict Mask
        mask_prediction = model.predict([X1_batch, X2_batch], verbose=0)[0, ..., 0] 
        
        # 3.3 Threshold and Encode
        binary_mask = (mask_prediction > THRESHOLD).astype(np.uint8)
        
        # Check if a forgery was detected
        if binary_mask.sum() == 0:
            rle_string = 'authentic'
        else:
            rle_string = rle_encode(binary_mask)

    predictions.append([image_id, rle_string])

# --- 4. Final Submission ---
# The final submission output requires two columns: the ID and the prediction.
# Note: We must use the ID column name found earlier.
final_submission_df = pd.DataFrame(predictions, columns=[id_column, 'predicted'])
final_submission_df.to_csv(OUTPUT_FILENAME, index=False)

# Clean up memory
del model; gc.collect()

In [None]:
import cv2
import matplotlib.pyplot as plt
from IPython.display import Image, display # Import necessary display tools
import os

# Define the image path using the simplest logical structure 
IMAGE_ID = '45'
IMAGE_PATH = f"/kaggle/input/recodai-luc-scientific-image-forgery-detection/test_images/{IMAGE_ID}.png"
OUTPUT_FILENAME = f'test_image_{IMAGE_ID}_plot.png'

# --- Image Plotting Logic ---
try:
    # 1. Load the image
    img = cv2.imread(IMAGE_PATH)

    if img is None:
        print(f"ERROR: Image not found at the path: {IMAGE_PATH}")
    else:
        # 2. Convert to RGB
        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        # 3. Plot the image using matplotlib
        plt.figure(figsize=(6, 6))
        plt.imshow(img_rgb)
        plt.title(f'Test Image {IMAGE_ID}.png (Predicted: FORGED)')
        plt.axis('off')
        
        # 4. Save the figure to the working directory
        plt.savefig(OUTPUT_FILENAME)
        plt.close()
        
        # 5. Display the saved image file for immediate viewing in the notebook
        print(f"Image saved to: {OUTPUT_FILENAME}")
        display(Image(filename=OUTPUT_FILENAME)) # Force display of the saved file

except Exception as e:
    print(f"An error occurred during image plotting: {e}")

In [None]:
import pandas as pd
import numpy as np

def analyze_authenticity(submission_file='submission.csv'):
    """
    Reads the submission file, identifies the single test image, and prints
    whether the model predicted the image to be authentic or forged.
    """
    try:
        df = pd.read_csv(submission_file)
    except FileNotFoundError:
        print(f"ERROR: The file '{submission_file}' was not found.")
        print("Please ensure the Part 2 notebook ran and the file is in the current directory.")
        return

    if df.shape[0] == 0:
        print("ERROR: The submission file is empty.")
        return

    # 1. Identify the column names (based on previous analysis)
    # The first column is the ID, the second is the prediction.
    id_col = df.columns[0]
    pred_col = df.columns[1]
    
    # 2. Get the prediction for the first (and only) image
    image_id = df.loc[0, id_col]
    prediction = df.loc[0, pred_col]

    # 3. Determine authenticity
    # A numeric RLE string means 'Forged'. The word 'authentic' means 'Authentic'.
    if prediction.strip().lower() == 'authentic':
        result = "AUTHENTIC"
    elif prediction.replace(' ', '').replace('.', '').isdigit():
        # Check if the string is composed purely of numbers and spaces (RLE format)
        result = "FORGED"
    else:
        result = "UNCERTAIN/INVALID FORMAT"
        
    print("=" * 40)
    print(f"ANALYSIS OF TEST IMAGE {image_id}:")
    print("-" * 40)
    print(f"Model Prediction: {result}")
    print(f"Prediction Value: {prediction[:30]}...")
    print(f"Conclusion: Image {image_id}.png is predicted to be {result}.")
    print("=" * 40)

# Execute the analysis
analyze_authenticity()

In [None]:
import pandas as pd

# Analyze the file in the working directory
try:
    df = pd.read_csv("submission.csv")
    print("\nSUCCESS: File Loaded.")
    print("--- First 5 Rows ---")
    print(df.head())
    print("\n--- Summary Stats ---")
    print(df['predicted'].value_counts(normalize=True).head())
    print(f"\nTotal rows: {len(df)}")
except FileNotFoundError:
    print("FATAL: The 'submission.csv' file was not saved or is not accessible.")

In [None]:
!cat /kaggle/working/submission.csv