In [1]:
import re
import pandas as pd 
from PIL import Image

import torch
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader

In [2]:
class DeepfakeDataset(Dataset): 
    def __init__(self, csv_file, root_dir, transform=None): 
        self.data = pd.read_csv(csv_file) 
        self.root_dir = root_dir 
        self.transform = transform
        

    def __len__(self): 
        return len(self.data)

    def __getitem__(self, idx):
        img_path = self.root_dir + self.data.iloc[idx, 1]
        image = Image.open(img_path).convert("RGB")
        label = self.data.iloc[idx, 2]
        image = self.transform(image)

        return image, torch.tensor(label)

In [3]:
transform = transforms.Compose([
    transforms.Resize((128, 128)), 
    transforms.ToTensor()
])

dataset = DeepfakeDataset(
    csv_file="/kaggle/input/ai-vs-human-generated-dataset/train.csv", 
    root_dir="/kaggle/input/ai-vs-human-generated-dataset/",
    transform=transform
)

train_loader = DataLoader(
    dataset, 
    batch_size=32, 
    shuffle=True,    
)