## Zero-shot without classes incoporated

In [11]:
import torch
from PIL import Image
import requests
from transformers import CLIPProcessor, CLIPModel
import os
from torcheval.metrics import BinaryAccuracy, BinaryF1Score, BinaryConfusionMatrix, BinaryPrecisionRecallCurve

def clip_pred(imgs):
    model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
    processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
    inputs = processor(
        text=["a synthetic image created by AI", "a real image taken by a human"],
        images=imgs,
        return_tensors="pt",
        padding=True
    )

    outputs = model(**inputs)
    logits_per_image = outputs.logits_per_image  # this is the image-text similarity score
    prob = logits_per_image.softmax(dim=1)  
    return prob

In [12]:
def load_images_from_folder(folder):
    images = []
    
    for filename in os.listdir(folder):
        img = Image.open(os.path.join(folder, filename))
        if img is not None:
            images.append(img)
        
    
    labels = torch.zeros(len(images), dtype=torch.int)
    return images, labels

In [13]:
def metrics(ys, ts):
    acc = BinaryAccuracy()
    f1 = BinaryF1Score()
    cm = BinaryConfusionMatrix()
    acc.update(ys, ts)
    f1.update(ys, ts)
    cm.update(ys, ts)
    return acc.compute(), f1.compute(), cm.compute()


In [14]:
folder_path = '/Users/lucialu/Github/DTU_Workspace/5_semester/02456-deep-learning-with-PyTorch/4_Convolutional/images'
images, labels = load_images_from_folder(folder_path)
probs = clip_pred(images)
metrics(probs[:,1], labels)



(tensor(1.),
 tensor(0.),
 tensor([[2., 0.],
         [0., 0.]]))

### HPC adjusted zero-shot no classes

In [None]:
import torch
from PIL import Image
from transformers import CLIPProcessor, CLIPModel
import os
from torcheval.metrics import BinaryAccuracy, BinaryF1Score, BinaryConfusionMatrix

# Enable debugging
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
os.environ["NCCL_DEBUG"] = "INFO"
os.environ["PYTHONFAULTHANDLER"] = "1"

# Log GPU details
if torch.cuda.is_available():
    print(f"CUDA is available. Device: {torch.cuda.get_device_name(0)}")
    print(f"CUDA Memory Allocated: {torch.cuda.memory_allocated() / (1024 ** 3):.2f} GB")
    print(f"CUDA Memory Reserved: {torch.cuda.memory_reserved() / (1024 ** 3):.2f} GB")
else:
    print("CUDA is not available. Check your GPU setup.")

def clip_pred(imgs, model, processor):
    """
    Perform prediction using the CLIP model.
    """
    inputs = processor(
        text=["a synthetic image created by AI", "a real image taken by a human"],
        images=imgs,
        return_tensors="pt",
        padding=True
    )
    
    inputs = {k: v.to("cuda") for k, v in inputs.items()}  # Move inputs to GPU
    with torch.no_grad():  # Disable gradient calculation for inference
        outputs = model(**inputs)
    logits_per_image = outputs.logits_per_image  # Image-text similarity score
    prob = logits_per_image.softmax(dim=1)  # Probability over classes
    return prob

def load_images_from_folders(fake_folder, real_folder):
    """
    Load images and their corresponding labels from specified folders.
    """
    images = []
    labels = []
    
    # Load FAKE images
    for filename in os.listdir(fake_folder):
        img_path = os.path.join(fake_folder, filename)
        if os.path.isfile(img_path):
            img = Image.open(img_path).convert("RGB")
            images.append(img)
            labels.append(0)  # Label for "FAKE"
    
    # Load REAL images
    for filename in os.listdir(real_folder):
        img_path = os.path.join(real_folder, filename)
        if os.path.isfile(img_path):
            img = Image.open(img_path).convert("RGB")
            images.append(img)
            labels.append(1)  # Label for "REAL"
    
    return images, torch.tensor(labels, dtype=torch.int)

def evaluate_model(images, labels, batch_size=64):
    """
    Evaluate the CLIP model using mini-batch processing and calculate metrics.
    """
    # Load the CLIP model and processor
    model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").to("cuda")
    processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
    
    # Process images in mini-batches
    probs = []
    for i in range(0, len(images), batch_size):
        batch_imgs = images[i:i+batch_size]
        batch_probs = clip_pred(batch_imgs, model, processor)
        probs.append(batch_probs)
    
    probs = torch.cat(probs, dim=0)  # Combine all batches
    preds = torch.argmax(probs, dim=1)  # Predicted labels
    
    # Calculate metrics
    acc = BinaryAccuracy()
    f1 = BinaryF1Score()
    cm = BinaryConfusionMatrix()
    
    acc.update(preds, labels)
    f1.update(preds, labels)
    cm.update(preds, labels)
    
    print(f"Accuracy: {acc.compute().item():.4f}")
    print(f"F1 Score: {f1.compute().item():.4f}")
    print(f"Confusion Matrix: {cm.compute()}")
    return acc.compute(), f1.compute(), cm.compute()

# Paths to the datasets
fake_folder = r'/dtu/blackhole/18/160664/test/FAKE/'
real_folder = r'/dtu/blackhole/18/160664/test/REAL/'

# Load the images and labels
images, labels = load_images_from_folders(fake_folder, real_folder)
print(f"Loaded {len(images)} images.")
print(f"Labels: {labels}")

# Evaluate the model
evaluate_model(images, labels)

## Zero-shot with classes implemented

In [2]:
import torch
from PIL import Image
import requests
from transformers import CLIPProcessor, CLIPModel
import os
from torcheval.metrics import BinaryAccuracy, BinaryF1Score, BinaryConfusionMatrix, BinaryPrecisionRecallCurve

def clip_pred(imgs, imgs_class):
    model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
    processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
    logits = torch.empty(0,2)

    for i in range(len(imgs)):
        input = processor(
            text=["an artificially generated image of a" + str(imgs_class[i]), "a real image of a" + str(imgs_class[i])],
            images=imgs[i],
            return_tensors="pt",
            padding=True
        )

        output = model(**input)
        logits = torch.cat((logits, output.logits_per_image), dim=0)  # this is the image-text similarity score
    prob = logits.softmax(dim=1)  
    return prob

def load_images_from_folder(folder):
    images = []
    class_types = {'(2)':'automobile','(3)': 'bird', '(4)': 'cat', '(5)': 'deer', '(6)': 'dog', '(7)': 'frog', '(8)': 'horse', '(9)': 'ship', '(10)': 'truck'}
    imgs_class = []

    for filename in os.listdir(folder): # SPLIT billeder op i hver deres class i stedet for at loop over alle enkelte billder
        # Herefter kør modellen for hver class
        img = Image.open(os.path.join(folder, filename))
        if img is not None:
            images.append(img)

            for c in class_types.keys():
                if c in filename:
                    imgs_class.append(class_types.get(c))
                    break
                else:
                    imgs_class.append('airplane')
                    break

    labels = torch.zeros(len(images), dtype=torch.int)
    return images, labels, imgs_class

def load_image_classes_from_folder(folder):
    class_types = {'(2)': 'automobile', '(3)':'bird', '(4)': 'cat', '(5)': 'deer', '(6)': 'dog', '(7)': 'frog', '(8)': 'horse', '(9)': 'ship', '(10)': 'truck'}
    images_by_class = {i: [] for i in class_types.values()} 
    images_by_class['airplane'] = []

    for filename in os.listdir(folder):
        img = Image.open(os.path.join(folder, filename))

        for key, value in class_types.items():
            if key in filename:
                img_class = value
                break
            else:
                img_class = 'airplane'

        images_by_class[img_class].append(img)

    return images_by_class

def metrics(ys, ts):
    acc = BinaryAccuracy()
    f1 = BinaryF1Score()
    cm = BinaryConfusionMatrix()
    prc = BinaryPrecisionRecallCurve()
    acc.update(ys, ts)
    f1.update(ys, ts)
    cm.update(ys, ts)
    prc.update(ys, ts)

    return acc.compute(), f1.compute(), cm.compute(), prc.compute()

folder_path = "images"
# images, labels, imgs_class = load_images_from_folder(folder_path)
# probs = clip_pred(images, imgs_class)
# metrics(probs[:,1], labels)

images_by_class = load_image_classes_from_folder(folder_path)
images_by_class

{'automobile': [<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=32x32>],
 'bird': [<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=32x32>],
 'cat': [<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=32x32>,
  <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=32x32>,
  <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=32x32>],
 'deer': [],
 'dog': [],
 'frog': [],
 'horse': [],
 'ship': [],
 'truck': [<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=32x32>],
 'airplane': []}

In [6]:
class_types = {'(2)': 'automobile', '(3)':'bird', '(4)': 'cat', '(5)': 'deer', '(6)': 'dog', '(7)': 'frog', '(8)': 'horse', '(9)': 'ship', '(10)': 'truck'}
images_by_class = {i: [] for i in class_types.values()} 
images_by_class['airplane'] = []

folder = 'images'

for filename in os.listdir(folder):
    img = Image.open(os.path.join(folder, filename))

    for key, value in class_types.items():
        if key in filename:
            img_index = value
            break
        else:
            img_index = 'airplane'

    images_by_class[img_index].append(img)

images_by_class

{'automobile': [<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=32x32>],
 'bird': [<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=32x32>],
 'cat': [<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=32x32>,
  <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=32x32>,
  <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=32x32>],
 'deer': [],
 'dog': [],
 'frog': [],
 'horse': [],
 'ship': [],
 'truck': [<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=32x32>],
 'airplane': []}

In [7]:
for filename in os.listdir(folder):
    img = Image.open(os.path.join(folder, filename))

    for key, value in class_types.items():
        print(key, value)

(2) automobile
(3) bird
(4) cat
(5) deer
(6) dog
(7) frog
(8) horse
(9) ship
(10) truck
(2) automobile
(3) bird
(4) cat
(5) deer
(6) dog
(7) frog
(8) horse
(9) ship
(10) truck
(2) automobile
(3) bird
(4) cat
(5) deer
(6) dog
(7) frog
(8) horse
(9) ship
(10) truck
(2) automobile
(3) bird
(4) cat
(5) deer
(6) dog
(7) frog
(8) horse
(9) ship
(10) truck
(2) automobile
(3) bird
(4) cat
(5) deer
(6) dog
(7) frog
(8) horse
(9) ship
(10) truck
(2) automobile
(3) bird
(4) cat
(5) deer
(6) dog
(7) frog
(8) horse
(9) ship
(10) truck


### Testing functions?

In [None]:
import torch
from PIL import Image
from transformers import CLIPProcessor, CLIPModel
import os
from torcheval.metrics import BinaryAccuracy, BinaryF1Score, BinaryConfusionMatrix

# Enable debugging
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
os.environ["NCCL_DEBUG"] = "INFO"
os.environ["PYTHONFAULTHANDLER"] = "1"

# Log GPU details
if torch.cuda.is_available():
    print(f"CUDA is available. Device: {torch.cuda.get_device_name(0)}")
    print(f"CUDA Memory Allocated: {torch.cuda.memory_allocated() / (1024 ** 3):.2f} GB")
    print(f"CUDA Memory Reserved: {torch.cuda.memory_reserved() / (1024 ** 3):.2f} GB")
else:
    print("CUDA is not available. Check your GPU setup.")

def clip_pred(imgs, model, processor):
    """
    Perform prediction using the CLIP model.
    """
    inputs = processor(
        text=["a synthetic image created by AI", "a real image taken by a human"],
        images=imgs,
        return_tensors="pt",
        padding=True
    )
    
    inputs = {k: v.to("cuda") for k, v in inputs.items()}  # Move inputs to GPU
    with torch.no_grad():  # Disable gradient calculation for inference
        outputs = model(**inputs)
    logits_per_image = outputs.logits_per_image  # Image-text similarity score
    prob = logits_per_image.softmax(dim=1)  # Probability over classes
    return prob

def load_images_from_folders(fake_folder, real_folder):
    """
    Load images and their corresponding labels from specified folders.
    """
    images = []
    labels = []
    
    # Load FAKE images
    for filename in os.listdir(fake_folder):
        img_path = os.path.join(fake_folder, filename)
        if os.path.isfile(img_path):
            img = Image.open(img_path).convert("RGB")
            images.append(img)
            labels.append(0)  # Label for "FAKE"
    
    # Load REAL images
    for filename in os.listdir(real_folder):
        img_path = os.path.join(real_folder, filename)
        if os.path.isfile(img_path):
            img = Image.open(img_path).convert("RGB")
            images.append(img)
            labels.append(1)  # Label for "REAL"
    
    return images, torch.tensor(labels, dtype=torch.int)

def evaluate_model(images, labels, batch_size=64):
    """
    Evaluate the CLIP model using mini-batch processing and calculate metrics.
    """
    # Load the CLIP model and processor
    model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").to("cuda")
    processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
    
    # Process images in mini-batches
    probs = []
    for i in range(0, len(images), batch_size):
        batch_imgs = images[i:i+batch_size]
        batch_probs = clip_pred(batch_imgs, model, processor)
        probs.append(batch_probs)
    
    probs = torch.cat(probs, dim=0)  # Combine all batches
    preds = torch.argmax(probs, dim=1)  # Predicted labels
    
    # Calculate metrics
    acc = BinaryAccuracy()
    f1 = BinaryF1Score()
    cm = BinaryConfusionMatrix()
    
    acc.update(preds, labels)
    f1.update(preds, labels)
    cm.update(preds, labels)
    
    print(f"Accuracy: {acc.compute().item():.4f}")
    print(f"F1 Score: {f1.compute().item():.4f}")
    print(f"Confusion Matrix: {cm.compute()}")
    return acc.compute(), f1.compute(), cm.compute()

# Paths to the datasets
fake_folder = r'/dtu/blackhole/18/160664/test/FAKE/'
real_folder = r'/dtu/blackhole/18/160664/test/REAL/'

# Load the images and labels
images, labels = load_images_from_folders(fake_folder, real_folder)
print(f"Loaded {len(images)} images.")
print(f"Labels: {labels}")

# Evaluate the model
evaluate_model(images, labels)