In [1]:
%pip install pytorch-lightning

Collecting pytorch-lightning
  Downloading pytorch_lightning-2.5.6-py3-none-any.whl.metadata (20 kB)
Collecting torchmetrics>0.7.0 (from pytorch-lightning)
  Downloading torchmetrics-1.8.2-py3-none-any.whl.metadata (22 kB)
Collecting lightning-utilities>=0.10.0 (from pytorch-lightning)
  Downloading lightning_utilities-0.15.2-py3-none-any.whl.metadata (5.7 kB)
Downloading pytorch_lightning-2.5.6-py3-none-any.whl (831 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m831.6/831.6 kB[0m [31m10.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading lightning_utilities-0.15.2-py3-none-any.whl (29 kB)
Downloading torchmetrics-1.8.2-py3-none-any.whl (983 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m983.2/983.2 kB[0m [31m28.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: lightning-utilities, torchmetrics, pytorch-lightning
Successfully installed lightning-utilities-0.15.2 pytorch-lightning-2.5.6 torchmetrics-1.8.2


In [2]:
import sys
import os

def is_colab_env():
    return "google.colab" in sys.modules

def mount_google_drive(drive_dir="/content/drive/", repo_dir="MyDrive/repositories/deepfake-detection"):
    # mount google drive
    from google.colab import drive
    drive.mount(drive_dir)

    # change to correct working directory
    import os
    repo_dir = f"{drive_dir}{repo_dir}"
    os.chdir(repo_dir)
    print(os.listdir()) # verify content
    return repo_dir

def resolve_path(levels_deep=3):
    if is_colab_env():
        return mount_google_drive()
    else:
        # Get the directory of the current script
        current_dir = os.path.dirname(os.path.abspath('__file__'))

        # Construct the path to the parent directory
        for i in range(levels_deep):
            current_dir = os.path.dirname(current_dir)

        # Add the parent directory to sys.path
        sys.path.append(current_dir)
        print(sys.path)
        return current_dir

proj_dir = resolve_path()

Mounted at /content/drive/
['src', '.git', 'playground', 'reports', 'analysis', 'dct_mean_real_fake.png', 'faceforensics_download.py', 'freqnet_image.ipynb', 'hf_wdf.sh', 'run_jupyter.sh', 'setup', 'environment.yml', 'README.md', 'config.py', '__pycache__', '.gitignore', '.cache', 'xet', '.tmp', '.hf_datasets_tmp']


In [3]:
data_dir = f"{proj_dir}/.datasets/"
cache_dir = f"{proj_dir}/.cache/"

In [4]:
# add this to prevent huggingface from downloading cache in local storage in colab
import os
from pathlib import Path

# Create a directory for temporary files within proj_dir on Google Drive
tmp_dir = Path(proj_dir) / ".tmp"
tmp_dir.mkdir(parents=True, exist_ok=True)

# Set the TMPDIR environment variable to this new directory
os.environ["TMPDIR"] = str(tmp_dir)
print(f"TMPDIR set to: {os.environ['TMPDIR']}")

TMPDIR set to: /content/drive/MyDrive/repositories/deepfake-detection/.tmp


In [5]:
hf_output = "/content/drive/MyDrive/.hf/"

In [6]:
os.environ['HF_HOME'] = hf_output

In [7]:
!export HF_DATASETS_CACHE=cache_dir
!export HF_DATASETS_DOWNLOADED_DATASETS_PATH=data_dir

In [8]:
import datasets
from pathlib import Path
datasets.config.DOWNLOADED_DATASETS_PATH = Path(data_dir)
datasets.config.HF_DATASETS_CACHE = Path(cache_dir)

In [9]:
# import local config
import config

In [10]:
# import library dependencies
import numpy as np

In [11]:
# pytorch
import torch
import pytorch_lightning as L

In [12]:
# import local dependencies
from src.adapters.datasets.wilddeepfake import WildDeepfakeDataModule
from src.models.resnet import ResNetClassifier

In [13]:
from src.transforms.frequency import get_transforms
dct_transforms = get_transforms("dct")

In [14]:
seed = config.SEED_2

# Set seeds for reproducibility
torch.manual_seed(seed)
np.random.seed(seed)

# Determine device (GPU or CPU)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

Using device: cpu


In [15]:
model_weights = None
freeze_features = False

In [16]:
training_method = "pretrained" if model_weights is not None else "fullfinetune"
model_id = f"dct_resnet18_{training_method}_seed{seed}"
model_checkpoint_dir = f"{proj_dir}/{config.CHECKPOINTS_DIR}/{model_id}"

In [17]:
batch_size = 32
num_workers = 2

# define datamodule
wilddeepfake_data_module = WildDeepfakeDataModule(
    batch_size=batch_size,
    num_workers=num_workers,
    seed=seed,
    transforms=dct_transforms,
    dataset_cache_dir=cache_dir
)

In [18]:
# Set Hugging Face Datasets specific temporary directory to Google Drive
# This ensures that temporary files created during dataset processing (e.g., mapping, caching) are stored on Drive
os.environ["HF_DATASETS_TEMP_DIR"] = str(Path(proj_dir) / ".hf_datasets_tmp")
Path(os.environ["HF_DATASETS_TEMP_DIR"]).mkdir(parents=True, exist_ok=True)
print(f"HF_DATASETS_TEMP_DIR set to: {os.environ['HF_DATASETS_TEMP_DIR']}")

HF_DATASETS_TEMP_DIR set to: /content/drive/MyDrive/repositories/deepfake-detection/.hf_datasets_tmp


In [19]:
# define early stopper
early_stop_callback = L.callbacks.EarlyStopping(
    monitor="val_loss",       # metric to track
    patience=3,               # epochs to wait for improvement
    mode="min",               # "min" because we want val_loss to decrease
    verbose=True
)

In [20]:
# define ligntning checkpoint
best_loss_checkpoint = L.callbacks.ModelCheckpoint(
    monitor="val_loss",
    mode="min",
    save_top_k=1,
)

In [21]:
# define model
deepfake_detector = ResNetClassifier(in_channels=1, freeze_features=freeze_features)

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth


100%|██████████| 44.7M/44.7M [00:00<00:00, 69.2MB/s]


In [22]:
# define trainer
max_epochs = 100
trainer = L.Trainer(
    devices=1,
    callbacks=[early_stop_callback, best_loss_checkpoint],
    default_root_dir=model_checkpoint_dir,
    log_every_n_steps=100,
    profiler="simple", # track time taken
    max_epochs=max_epochs,
  )

INFO:pytorch_lightning.utilities.rank_zero:GPU available: False, used: False
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores


In [None]:
# train model
trainer.fit(deepfake_detector, datamodule=wilddeepfake_data_module)

Resolving data files:   0%|          | 0/963 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/157 [00:00<?, ?it/s]

Loading dataset shards:   0%|          | 0/124 [00:00<?, ?it/s]

In [None]:
# test dataset on unseen samples
trainer.test(deepfake_detector, datamodule=wilddeepfake_data_module)