In [None]:
!pip install -q kaggle
!pip install denoising_diffusion_pytorch
clear_output()

In [None]:
import os
import pandas as pd
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import numpy as np
from IPython.display import clear_output
from PIL import Image
from denoising_diffusion_pytorch import Unet, GaussianDiffusion
clear_output()

In [None]:
from google.colab import files
files.upload()
clear_output()

In [None]:
!mkdir ~/.kaggle
!mv ./kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json
!kaggle datasets download -d freddiegraboski/btd-mri-and-ct-deepfake-test-sets
! unzip "btd-mri-and-ct-deepfake-test-sets.zip"
clear_output()

In [None]:

class MedicalDeepfakeDataset(Dataset):
    def __init__(self, root_dir, modality, attack_type, tamper_type, patch_size, transform=None):
        self.root_dir = root_dir
        self.modality = modality
        self.attack_type = attack_type
        self.tamper_type = tamper_type
        self.patch_size = patch_size
        self.transform = transform

        # Load the appropriate CSV file
        if modality == 'CT':
            csv_path = os.path.join(root_dir, f'CT/{attack_type}/CT_{attack_type}.csv')
            self.data_frame = pd.read_csv(csv_path)
            if tamper_type:
                self.data_frame = self.data_frame[self.data_frame[f'{tamper_type}-CT-{attack_type}'] == 1]
        elif modality == 'MRI':
            csv_path = os.path.join(root_dir, f'MRI/MRI_{attack_type}.csv')
            self.data_frame = pd.read_csv(csv_path)
        else:
            raise ValueError('Invalid modality. Choose either "CT" or "MRI".')

    def __len__(self):
        return len(self.data_frame)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        row = self.data_frame.iloc[idx]
        file_path = row['path']
        x, y = row[['x', 'y']].astype(int)
        label = row['type']

        # Load and normalize the image
        if self.modality == 'CT':
            image = np.load(file_path)
            image = np.clip(image, -700, 2000)
            image = (image + 700) / 2700
        elif self.modality == 'MRI':
            image = Image.open(file_path).convert('L')  # Convert to grayscale
            if self.transform:
                image = self.transform(image).squeeze().numpy()
            image = np.clip(image, -700, 2000)
            image = (image + 700) / 2700

        # Calculate patch coordinates
        half_patch = self.patch_size // 2
        start_x = max(0, x - half_patch)
        end_x = min(image.shape[1], x + half_patch)
        start_y = max(0, y - half_patch)
        end_y = min(image.shape[0], y + half_patch)

        # Extract patch
        patch = image[start_y:end_y, start_x:end_x]

        # Ensure patch is the correct size by padding if necessary
        if patch.shape[0] != self.patch_size or patch.shape[1] != self.patch_size:
            patch = np.pad(patch, ((0, self.patch_size - patch.shape[0]), (0, self.patch_size - patch.shape[1])), 'constant')

        patch = patch.reshape(1, self.patch_size, self.patch_size)  # Add channel dimension

        sample = {'image': patch, 'label': label}

        return sample

In [None]:

transform = transforms.Compose([
    transforms.ToTensor()
])

# Define root directory
root_dir = ''

In [None]:

# ## CTGAN-CT-Inject
# This dataset contains CT scans with cancer injected using the CTGAN method.
ctgan_ct_inject_dataset = MedicalDeepfakeDataset(
    root_dir=root_dir,
    modality='CT',
    attack_type='injection',
    tamper_type='CTGAN',
    patch_size=96,
    transform=transform
)
ctgan_ct_inject_dataloader = DataLoader(ctgan_ct_inject_dataset, batch_size=32, shuffle=True)

# ## SD-CT-Inject
# This dataset contains CT scans with cancer injected using the Stable Diffusion method.
sd_ct_inject_dataset = MedicalDeepfakeDataset(
    root_dir=root_dir,
    modality='CT',
    attack_type='injection',
    tamper_type='SD',
    patch_size=96,
    transform=transform
)
sd_ct_inject_dataloader = DataLoader(sd_ct_inject_dataset, batch_size=32, shuffle=True)

# ## CTGAN-CT-Remove
# This dataset contains CT scans with cancer removed using the CTGAN method.
ctgan_ct_remove_dataset = MedicalDeepfakeDataset(
    root_dir=root_dir,
    modality='CT',
    attack_type='removal',
    tamper_type='CTGAN',
    patch_size=96,
    transform=transform
)
ctgan_ct_remove_dataloader = DataLoader(ctgan_ct_remove_dataset, batch_size=32, shuffle=True)

# ## SD-CT-Remove
# This dataset contains CT scans with cancer removed using the Stable Diffusion method.
sd_ct_remove_dataset = MedicalDeepfakeDataset(
    root_dir=root_dir,
    modality='CT',
    attack_type='removal',
    tamper_type='SD',
    patch_size=96,
    transform=transform
)
sd_ct_remove_dataloader = DataLoader(sd_ct_remove_dataset, batch_size=32, shuffle=True)

# ## SD-MRI-Inject
# This dataset contains MRI scans with cancer injected using the Stable Diffusion method.
sd_mri_inject_dataset = MedicalDeepfakeDataset(
    root_dir=root_dir,
    modality='MRI',
    attack_type='injection',
    tamper_type=None,  # No specific tamper type needed for MRI
    patch_size=128,
    transform=transform
)
sd_mri_inject_dataloader = DataLoader(sd_mri_inject_dataset, batch_size=32, shuffle=True)

# ## SD-MRI-Remove
# This dataset contains MRI scans with cancer removed using the Stable Diffusion method.
sd_mri_remove_dataset = MedicalDeepfakeDataset(
    root_dir=root_dir,
    modality='MRI',
    attack_type='removal',
    tamper_type=None,  # No specific tamper type needed for MRI
    patch_size=128,
    transform=transform
)
sd_mri_remove_dataloader = DataLoader(sd_mri_remove_dataset, batch_size=32, shuffle=True)
