In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Încarcă fișierele CSV
train_df = pd.read_csv("../data/train.csv")
val_df   = pd.read_csv("../data/validation.csv")
test_df  = pd.read_csv("../data/test.csv")

# Inspectare rapidă
print("Train:", train_df.shape)
print(train_df.head(), "\n")
print("Validation:", val_df.shape)
print(val_df.head(), "\n")

# Distribuția claselor în train
print(train_df['label'].value_counts())
sns.countplot(x='label', data=train_df)
plt.title("Distribuția claselor în train")
plt.show()


In [3]:
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
import os

# 2.1. Transformări
train_transforms = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485,0.456,0.406],
                         std=[0.229,0.224,0.225])
])
val_transforms = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485,0.456,0.406],
                         std=[0.229,0.224,0.225])
])

# 2.2. Dataset custom
class DeepfakeDataset(Dataset):
    def __init__(self, df, img_dir, transform=None):
        self.df = df.reset_index(drop=True)
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img = Image.open(os.path.join(self.img_dir, f"{row.image_id}.png")).convert("RGB")
        if self.transform:
            img = self.transform(img)
        label = torch.tensor(row.label, dtype=torch.long)
        return img, label

# 2.3. DataLoader
train_ds = DeepfakeDataset(train_df, "data/train", transform=train_transforms)
val_ds   = DeepfakeDataset(val_df,   "data/validation", transform=val_transforms)

train_loader = DataLoader(train_ds, batch_size=32, shuffle=True,  num_workers=4)
val_loader   = DataLoader(val_ds,   batch_size=32, shuffle=False, num_workers=4)

print("Batch-uri train:", len(train_loader))
print("Batch-uri val:  ", len(val_loader))


Batch-uri train: 391
Batch-uri val:   40


In [None]:
import numpy as np

def imshow_batch(images, labels, nrow=4):
    imgs = images.numpy().transpose((0,2,3,1))
    mean = np.array([0.485,0.456,0.406])
    std  = np.array([0.229,0.224,0.225])
    imgs = std*imgs + mean
    imgs = np.clip(imgs, 0, 1)
    fig, axes = plt.subplots(len(imgs)//nrow, nrow, figsize=(12,6))
    axes = axes.flatten()
    for img, lbl, ax in zip(imgs, labels, axes):
        ax.imshow(img)
        ax.set_title(f"Label: {lbl.item()}")
        ax.axis('off')
    plt.tight_layout()
    plt.show()

# Preia și afișează primul batch
images, labels = next(iter(train_loader))
imshow_batch(images, labels, nrow=4)
