In [1]:
# Installing extra dependencies into drive
!pip install lightning



In [2]:
import pytorch_lightning as L

In [3]:
import sys
import os

def is_colab_env():
    return "google.colab" in sys.modules

def mount_google_drive(drive_dir="/content/drive/", repo_dir="MyDrive/repositories/deepfake-detection"):
    # mount google drive
    from google.colab import drive
    drive.mount(drive_dir)

    # change to correct working directory
    import os
    os.chdir(f"{drive_dir}{repo_dir}")
    print(os.listdir()) # verify content

def resolve_path(levels_deep=3):
    if is_colab_env():
        mount_google_drive()
    else:
        # Get the directory of the current script
        current_dir = os.path.dirname(os.path.abspath('__file__'))

        # Construct the path to the parent directory
        for i in range(levels_deep):
            current_dir = os.path.dirname(current_dir)

        # Add the parent directory to sys.path
        sys.path.append(current_dir)
        print(sys.path)

resolve_path()

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).
['README.md', 'src', 'environment.yml', '.git', '.gitignore', 'config.py', '__pycache__', 'playground', 'reports']


In [4]:
# import local config
import config

In [5]:
from src.transforms.ela import ela
from src.models.resnet import ResNetClassifier, DEFAULT_DATA_TRANSFORMS
from src.adapters.datasets.sida import SidADataModule

In [6]:
model_id = "ela_resnet_benchmark_sida"
model_benchmark_dir = f"{config.BENCHMARK_DIR}/{model_id}"

In [7]:
from src.adapters.datasets.wilddeepfake import load_streaming_dataset, create_data_loaders
import config

dataset_name = "xingjunm/WildDeepfake"
max_samples = 50000  # For quick development, remove for full dataset
batch_size = 16
num_workers = 2
seed = config.SEED

# datasets = load_streaming_dataset(
#     dataset_name,
#     max_samples=max_samples,
#     seed=seed
# )
# train_loader, val_loader, test_loader = create_data_loaders(
#     datasets,
#     batch_size=batch_size,
#     num_workers=num_workers,
#     additional_transforms=ela
# )

In [8]:
checkpoint_path = "./reports/ela_resnet/lightning_logs/version_2/checkpoints/epoch=0-step=625.ckpt"

In [9]:
ela_resnet_model = ResNetClassifier.load_from_checkpoint(checkpoint_path=checkpoint_path)

In [10]:
trainer = L.Trainer(
    devices=1,
    callbacks=[],
    default_root_dir=model_benchmark_dir,
    log_every_n_steps=10,
    profiler="simple", # track time taken
    max_steps=31_250,
    # limit_train_batches=1000,   # how many batches per "epoch"
    # limit_val_batches=200,      # how many val batches per "epoch"
  )

INFO:pytorch_lightning.utilities.rank_zero:ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: False, used: False
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs


In [11]:
sida_data_module = SidADataModule(seed=42, transforms=DEFAULT_DATA_TRANSFORMS, additional_transforms=ela)

In [None]:
trainer.test(ela_resnet_model, datamodule=sida_data_module)

Resolving data files:   0%|          | 0/249 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/34 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/249 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/34 [00:00<?, ?it/s]

/usr/local/lib/python3.12/dist-packages/pytorch_lightning/utilities/data.py:123: Your `IterableDataset` has `__len__` defined. In combination with multi-process data loading (when num_workers > 1), `__len__` could be inaccurate if each worker is not configured independently to avoid having duplicate data.


Testing: |          | 0/? [00:00<?, ?it/s]