In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, random_split
from torchvision import transforms, models
from PIL import Image
from torchvision import transforms, datasets, models
import os
import numpy as np
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
from sklearn.metrics import accuracy_score, classification_report


In [2]:
class ImageDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.classes = ['real', 'fake']
        self.image_paths = []
        self.labels = []

        # Load real images
        real_dir = os.path.join(root_dir, 'real')
        for img_name in os.listdir(real_dir):
            self.image_paths.append(os.path.join(real_dir, img_name))
            self.labels.append(0)
        
        fake_dir = os.path.join(root_dir, 'fake')
        for img_name in os.listdir(fake_dir):
            self.image_paths.append(os.path.join(fake_dir, img_name))
            self.labels.append(1)

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        image = Image.open(img_path).convert('RGB')
        label = self.labels[idx]

        if self.transform:
            image = self.transform(image)
        
        return image, label


In [3]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

data_dir = r'E:\datasets\realfake\real_vs_fake\real-vs-fake\train'

dataset = datasets.ImageFolder(root=data_dir, transform=transform)
print(f"Found {len(dataset)} images, classes: {dataset.classes}")

train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size

train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=4)



Found 20000 images, classes: ['fake', 'real']


In [4]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {device}')


Using device: cuda


In [5]:
model = models.vit_b_16(pretrained=True)
model.heads.head = nn.Linear(model.heads.head.in_features, 2)
model = model.to(device)


Downloading: "https://download.pytorch.org/models/vit_b_16-c867db91.pth" to C:\Users\jai54/.cache\torch\hub\checkpoints\vit_b_16-c867db91.pth
100%|██████████| 330M/330M [00:16<00:00, 20.9MB/s] 


In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=1e-4, weight_decay=0.01)


def train_model(model, train_loader, test_loader, criterion, optimizer, num_epochs=10):
    best_acc = 0.0
    train_losses, test_losses, test_accuracies = [], [], []
    
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        
        for images, labels in tqdm(train_loader, desc=f'Epoch {epoch+1}/{num_epochs}'):
            images, labels = images.to(device), labels.to(device)
            
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
        
        train_losses.append(running_loss / len(train_loader))
        
        model.eval()
        test_loss = 0.0
        all_preds, all_labels = [], []
        
        with torch.no_grad():
            for images, labels in test_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)
                test_loss += loss.item()
                
                preds = outputs.argmax(dim=1)
                all_preds.extend(preds.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())
        
        acc = accuracy_score(all_labels, all_preds)
        test_losses.append(test_loss / len(test_loader))
        test_accuracies.append(acc)
        
        print(f'Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_losses[-1]:.4f}, Test Loss: {test_losses[-1]:.4f}, Test Acc: {acc:.4f}')
        
        if acc > best_acc:
            best_acc = acc
            torch.save(model.state_dict(), 'best_vit_model.pth')
    
    return train_losses, test_losses, test_accuracies


In [7]:
train_losses, test_losses, test_accuracies = train_model(
    model, train_loader, val_loader, criterion, optimizer, num_epochs=10
)

Epoch 1/10:   0%|          | 0/500 [00:00<?, ?it/s]

Epoch [1/10], Train Loss: 0.3521, Test Loss: 0.2666, Test Acc: 0.8908


Epoch 2/10:   0%|          | 0/500 [00:00<?, ?it/s]

Epoch [2/10], Train Loss: 0.1221, Test Loss: 0.1961, Test Acc: 0.9175


Epoch 3/10:   0%|          | 0/500 [00:00<?, ?it/s]

Epoch [3/10], Train Loss: 0.0813, Test Loss: 0.1146, Test Acc: 0.9577


Epoch 4/10:   0%|          | 0/500 [00:00<?, ?it/s]

Epoch [4/10], Train Loss: 0.0597, Test Loss: 0.1572, Test Acc: 0.9497


Epoch 5/10:   0%|          | 0/500 [00:00<?, ?it/s]

Epoch [5/10], Train Loss: 0.0489, Test Loss: 0.1769, Test Acc: 0.9380


Epoch 6/10:   0%|          | 0/500 [00:00<?, ?it/s]

Epoch [6/10], Train Loss: 0.0480, Test Loss: 0.1696, Test Acc: 0.9500


Epoch 7/10:   0%|          | 0/500 [00:00<?, ?it/s]

Epoch [7/10], Train Loss: 0.0424, Test Loss: 0.1347, Test Acc: 0.9557


Epoch 8/10:   0%|          | 0/500 [00:00<?, ?it/s]

Epoch [8/10], Train Loss: 0.0408, Test Loss: 0.1416, Test Acc: 0.9473


Epoch 9/10:   0%|          | 0/500 [00:00<?, ?it/s]

Epoch [9/10], Train Loss: 0.0348, Test Loss: 0.1269, Test Acc: 0.9510


Epoch 10/10:   0%|          | 0/500 [00:00<?, ?it/s]

Epoch [10/10], Train Loss: 0.0342, Test Loss: 0.1841, Test Acc: 0.9405


In [12]:
print(f"Test accuracy: {test_accuracies[-1]:.4f}")


Test accuracy: 0.9405


In [13]:
from PIL import Image
import torch
from torchvision import transforms

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406], 
        std=[0.229, 0.224, 0.225]
    )
])

def predict_image(model, image_path, device):
    model.eval()
    image = Image.open(image_path).convert('RGB')
    image = transform(image).unsqueeze(0).to(device)
    
    with torch.no_grad():
        outputs = model(image)
        probabilities = torch.softmax(outputs, dim=1)
        predicted_class = torch.argmax(probabilities, dim=1).item()
        confidence = probabilities[0][predicted_class].item()
    
    label = "Real" if predicted_class == 0 else "Fake"
    
    return label, confidence


In [27]:
predict_image(model,r"C:\Users\jai54\OneDrive\dev\humanVS\iamge_2.png",device)

('Fake', 0.9995105266571045)

In [28]:
predict_image(model,r"C:\Users\jai54\OneDrive\dev\humanVS\image.png",device)

('Fake', 0.9999622106552124)