In [1]:
!python --version

Python 3.11.5


In [2]:
import os
import glob
import cv2
import numpy as np
import tensorflow as tf
from tqdm.notebook import tqdm # Use tqdm.notebook for Jupyter
import matplotlib.pyplot as plt
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, UpSampling2D, concatenate, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.utils import Sequence # Sequence is directly in keras.utils
# from tensorflow.keras.utils import * # Avoid star imports if possible
import shutil # Import shutil for file copying
import random

In [3]:
gpus = tf.config.list_physical_devices('GPU')
print("Available GPUs:", gpus)

Available GPUs: []


In [6]:
# --- IMPORTANT: Set the correct path to your downloaded dataset ---
SOURCE_DATA_DIR = r"C:\Users\Shubham Kumavat\Downloads\archive\CoMoFoD_small_v2"
# ------------------------------------------------------------------

DEST_BASE_DIR = "dataset"
DEST_IMAGES_DIR = os.path.join(DEST_BASE_DIR, "images")
DEST_LABELS_DIR = os.path.join(DEST_BASE_DIR, "labels")
DEST_MASKS_DIR = os.path.join(DEST_BASE_DIR, "masks")

# Create destination directories if they don't exist
os.makedirs(DEST_LABELS_DIR, exist_ok=True)
os.makedirs(DEST_IMAGES_DIR, exist_ok=True)
os.makedirs(DEST_MASKS_DIR, exist_ok=True)

print(f"Dataset directories ensured/created in: '{DEST_BASE_DIR}'")
print(f"Source directory set to: '{SOURCE_DATA_DIR}'")

Dataset directories ensured/created in: 'dataset'
Source directory set to: 'C:\Users\Shubham Kumavat\Downloads\archive\CoMoFoD_small_v2'


In [7]:
try:
    source_images = os.listdir(SOURCE_DATA_DIR)
    print(f"Found {len(source_images)} files/folders in source directory: '{SOURCE_DATA_DIR}'")

    print("\nCopying files (this might take a moment)...")
    # Copying loop using shutil (safer and platform-independent)
    copied_counts = {'labels': 0, 'masks': 0, 'images': 0, 'skipped': 0, 'errors': 0}

    for image_filename in tqdm(source_images):
        source_path = os.path.join(SOURCE_DATA_DIR, image_filename)

        # Skip if it's not a file (e.g., a sub-directory) or doesn't exist
        if not os.path.isfile(source_path):
            print(f"Skipping non-file item: {image_filename}")
            copied_counts['skipped'] += 1
            continue

        try:
            # Determine destination based on filename structure
            parts = image_filename.split('_')
            copied = False
            if len(parts) == 2:
                if "B." in image_filename: # More specific check for label mask
                    shutil.copy(source_path, DEST_LABELS_DIR)
                    copied_counts['labels'] += 1
                    copied = True
                elif "M." in image_filename: # More specific check for cropped mask
                    shutil.copy(source_path, DEST_MASKS_DIR)
                    copied_counts['masks'] += 1
                    copied = True
            elif len(parts) > 2:
                 # Assuming 'F' (forged) and 'O' (original?) go to images
                if "_F_" in image_filename or "_O_" in image_filename:
                     shutil.copy(source_path, DEST_IMAGES_DIR)
                     copied_counts['images'] += 1
                     copied = True

            if not copied:
                 #print(f"Skipping file (doesn't match naming convention): {image_filename}")
                 copied_counts['skipped'] += 1


        except Exception as e:
            print(f"ERROR copying {source_path}: {e}")
            copied_counts['errors'] += 1

    # Verification - Count files in destination directories
    print("\n--- Dataset Preparation Complete ---")
    print(f"Total Labels copied: {copied_counts['labels']} (Actual files: {len(os.listdir(DEST_LABELS_DIR))})")
    print(f"Total Masks copied: {copied_counts['masks']} (Actual files: {len(os.listdir(DEST_MASKS_DIR))})")
    print(f"Total Forged/Original Images copied: {copied_counts['images']} (Actual files: {len(os.listdir(DEST_IMAGES_DIR))})")
    print(f"Files skipped: {copied_counts['skipped']}")
    print(f"Errors during copy: {copied_counts['errors']}")


except FileNotFoundError:
     print(f"ERROR: Source directory not found: '{SOURCE_DATA_DIR}'")
     print("Please ensure the path in Cell 3 is correct and the data exists.")
except Exception as e:
     print(f"An error occurred during dataset preparation: {e}")

Exception ignored in: <function tqdm.__del__ at 0x0000025387A07400>
Traceback (most recent call last):
  File "C:\Users\Shubham Kumavat\venv-py310-tf\lib\site-packages\tqdm\std.py", line 1148, in __del__
    self.close()
  File "C:\Users\Shubham Kumavat\venv-py310-tf\lib\site-packages\tqdm\notebook.py", line 279, in close
    self.disp(bar_style='danger', check_delay=False)
AttributeError: 'tqdm_notebook' object has no attribute 'disp'


Found 10402 files/folders in source directory: 'C:\Users\Shubham Kumavat\Downloads\archive\CoMoFoD_small_v2'

Copying files (this might take a moment)...
An error occurred during dataset preparation: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html
