In [19]:
import os
import torch
import json
from PIL import Image
from transformers import CLIPProcessor, CLIPModel

# Load the CLIP model
device = "cuda" if torch.cuda.is_available() else "cpu"
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").to(device)
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

# List of artifacts to check
artifact_list = [
    "Anatomically incorrect paw structures",
    "Impossible foreshortening in animal bodies",
    "Anatomically impossible joint configurations",
    "Misaligned body panels",
    "Cinematization Effects",
    "Artificial smoothness",
    "Movie-poster like composition of ordinary scenes",
    "Exaggerated characteristic features",
    "Incorrect Skin Tones",
    "Synthetic material appearance",
    "Inconsistent shadow directions",
    "Multiple light source conflicts",
    "Multiple inconsistent shadow sources",
    "Dramatic lighting that defies natural physics",
    "Distorted window reflections",
    "Ghosting effects: Semi-transparent duplicates of elements",
    "Regular grid-like artifacts in textures",
    "Repeated element patterns",
    "Scale inconsistencies within the same object class",
    "Unrealistic specular highlights",
    "Spatial relationship errors",
    "Aliasing along high-contrast edges",
    "Artificial enhancement artifacts",
    "Unnatural pose artifacts",
    "Color coherence breaks",
    "Unnatural color transitions",
    "Discontinuous surfaces",
    "Floating or disconnected components",
    "Irregular proportions in mechanical components",
    "Abruptly cut off objects",
    "Impossible mechanical connections",
    "Inconsistent scale of mechanical parts",
    "Physically impossible structural elements",
    "Scale inconsistencies within single object"
]

# Function to analyze a single image for artifacts and return results
def analyze_single_image(image_path):
    # Load, resize, and preprocess the image
    image = Image.open(image_path).convert("RGB").resize((32, 32))
    inputs = processor(text=artifact_list, images=image, return_tensors="pt", padding=True).to(device)

    # Get similarity scores from CLIP
    outputs = model(**inputs)
    logits_per_image = outputs.logits_per_image  # Shape: (1, len(artifact_list))
    scores = logits_per_image.softmax(dim=1).squeeze()  # Convert to probabilities

    # Select the top artifacts based on a threshold
    threshold = 0.05  # Adjust based on desired sensitivity
    detected_artifacts = [
        artifact_list[i] for i in range(len(artifact_list)) if scores[i] > threshold
    ]

    return detected_artifacts

# Load predictions from the given JSON file
def load_predictions(json_file_path):
    with open(json_file_path, 'r') as file:
        predictions = json.load(file)
    print(f"Loaded predictions: {predictions[:5]}")  # Print first 5 entries to verify structure
    return predictions

# Filter predictions to include only "fake" images
def filter_fake_predictions(predictions):
    fake_predictions = [entry for entry in predictions if entry["prediction"].lower() == "fake"]
    print(f"Fake predictions: {fake_predictions}")  # Print first 5 fake predictions for verification
    return fake_predictions

# Function to analyze images with "fake" predictions and save results to JSON
def analyze_fake_images(directory_path, fake_predictions, output_json_path):
    results = []
    
    # Loop through the predictions that are "fake"
    for entry in fake_predictions:
        image_index = entry["index"]
        filename = f"{image_index}.png"  # Modify as per your filename format
        file_path = os.path.join(directory_path, filename)
        
        # Skip if the image doesn't exist
        if not os.path.exists(file_path):
            continue
        
        # Analyze the image for artifacts
        detected_artifacts = analyze_single_image(file_path)
        
        # Add to results if artifacts are detected
        if detected_artifacts:
            results.append({
                "index": image_index,  # Use the index from the prediction
                "explanation": detected_artifacts  # Only include the artifact names and explanations
            })
    
    # Save results to JSON file
    if results:
        with open(output_json_path, 'w') as json_file:
            json.dump(results, json_file, indent=4)
        print(f"Results saved to {output_json_path}")
    else:
        print("No results to save.")

# Example usage
input_predictions_json = "/kaggle/input/submission-json/json.json"  # Path to your prediction JSON file
directory_path = "/kaggle/input/adobe-test/perturbed_images_32"  # Replace with your directory path
output_json_path = "/kaggle/working/file1.json"  # Output path for the results

# Load and filter predictions
predictions = load_predictions(input_predictions_json)
fake_predictions = filter_fake_predictions(predictions)

# Analyze fake images and save results
analyze_fake_images(directory_path, fake_predictions, output_json_path)


Loaded predictions: [{'index': 176, 'prediction': 'real'}, {'index': 88, 'prediction': 'fake'}, {'index': 162, 'prediction': 'fake'}, {'index': 189, 'prediction': 'real'}, {'index': 77, 'prediction': 'real'}]
Fake predictions: [{'index': 88, 'prediction': 'fake'}, {'index': 162, 'prediction': 'fake'}, {'index': 215, 'prediction': 'fake'}, {'index': 62, 'prediction': 'fake'}, {'index': 76, 'prediction': 'fake'}, {'index': 163, 'prediction': 'fake'}, {'index': 89, 'prediction': 'fake'}, {'index': 175, 'prediction': 'fake'}, {'index': 48, 'prediction': 'fake'}, {'index': 217, 'prediction': 'fake'}, {'index': 202, 'prediction': 'fake'}, {'index': 216, 'prediction': 'fake'}, {'index': 49, 'prediction': 'fake'}, {'index': 75, 'prediction': 'fake'}, {'index': 158, 'prediction': 'fake'}, {'index': 65, 'prediction': 'fake'}, {'index': 71, 'prediction': 'fake'}, {'index': 206, 'prediction': 'fake'}, {'index': 207, 'prediction': 'fake'}, {'index': 213, 'prediction': 'fake'}, {'index': 70, 'predic