This notebook is the fine tunning of the Robust CLIP based on the "https://github.com/chs20/RobustVLM"

In [None]:
%pip install open-clip-torch

In [None]:
import os
from tqdm import tqdm
from PIL import Image
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix

import torch
from torch.utils.data import Dataset, DataLoader, random_split
from torchvision import transforms
from open_clip import create_model_and_transforms

In [None]:
model, _, image_processor = create_model_and_transforms('hf-hub:chs20/fare2-clip')

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

In [None]:
data_transform = transforms.Compose(
    [
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ]
)

In [None]:
class CustomDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.data = []

        # Traverse REAL and FAKE folders
        for label, folder in enumerate(["REAL", "FAKE"]):
            folder_path = os.path.join(root_dir, folder)
            for img_file in os.listdir(folder_path):
                img_path = os.path.join(folder_path, img_file)
                if img_file.lower().endswith(("jpg", "jpeg", "png")):
                    self.data.append((img_path, label))

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_path, label = self.data[idx]
        image = Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        return image, label


In [None]:
root_dir = "/workspace/content/CIFAKE_final/train"
dataset = CustomDataset(root_dir=root_dir, transform=data_transform)

train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

# Create Data Loaders for training and testing
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [None]:
# Binary Classifier Model using CLIP image embeddings
class BinaryClassifier(torch.nn.Module):
    def __init__(self, base_model):
        super(BinaryClassifier, self).__init__()
        self.base_model = base_model
        self.dense_layers = torch.nn.Sequential(
            torch.nn.Linear(768, 512),
            torch.nn.GELU(),
            torch.nn.Dropout(0.5),
            torch.nn.Linear(512, 1),
            torch.nn.Sigmoid(),
        )

    def forward(self, images):
        with torch.no_grad():
            image_features = self.base_model.encode_image(images).float()

        output = self.dense_layers(image_features)
        return output


In [None]:
combined_model = BinaryClassifier(model).to(device)

criterion = torch.nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(combined_model.parameters(), lr=1e-4)


In [None]:
# Training Loop

epochs = 3

for epoch in range(epochs):
    combined_model.train()
    train_loss = 0.0
    train_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}", leave=False)

    for images, labels in train_bar:
        images, labels = images.to(device), labels.float().to(device).unsqueeze(1)

        # Forward pass
        optimizer.zero_grad()
        outputs = combined_model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        train_bar.set_postfix(loss=loss.item())

    print(f"Epoch {epoch+1}/{epochs}, Loss: {train_loss / len(train_loader)}")


In [None]:
# Evaluation Loop (Testing)

combined_model.eval()
y_true, y_pred = [], []

with torch.no_grad():
    test_bar = tqdm(test_loader, desc="Testing", leave=False)
    for images, labels in test_bar:
        images = images.to(device)
        labels = labels.cpu().numpy()

        outputs = combined_model(images).cpu().numpy()
        predictions = (outputs > 0.5).astype(int)

        y_true.extend(labels)
        y_pred.extend(predictions)

# Metrics
accuracy = accuracy_score(y_true, y_pred)
f1 = f1_score(y_true, y_pred)
conf_matrix = confusion_matrix(y_true, y_pred)

print(f"Accuracy: {accuracy:.4f}")
print(f"F1 Score: {f1:.4f}")
print(f"Confusion Matrix:\n{conf_matrix}")