In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/pegasus/Screenshot 2024-11-24 161819.png
/kaggle/input/cifake-test/1018 (8).jpg
/kaggle/input/cifake-test/1017 (6).jpg
/kaggle/input/cifake-test/1018 (4).jpg
/kaggle/input/cifake-test/1017 (7).jpg
/kaggle/input/cifake-test/1018.jpg
/kaggle/input/cifake-test/1019 (2).jpg
/kaggle/input/cifake-test/1018 (10).jpg
/kaggle/input/cifake-test/1019 (4).jpg
/kaggle/input/cifake-test/1017 (9).jpg
/kaggle/input/cifake-test/1017 (4).jpg
/kaggle/input/cifake-test/1018 (9).jpg
/kaggle/input/cifake-test/1018 (2).jpg
/kaggle/input/cifake-test/1019 (5).jpg
/kaggle/input/artifact-interiit/Artifact_Description.csv
/kaggle/input/random-mirror-lady-fake-lol/Screenshot 2024-11-24 154214.png


# **Installing dependencies and setting up**

In [6]:
%cd /kaggle/working/
!git clone -b v1.0 https://github.com/camenduru/LLaVA
%cd /kaggle/working/LLaVA

!pip install -q transformers==4.36.2
!pip install -q gradio .

from transformers import AutoTokenizer, BitsAndBytesConfig
from llava.model import LlavaLlamaForCausalLM
import torch


/kaggle/working
Cloning into 'LLaVA'...
remote: Enumerating objects: 1960, done.[K
remote: Total 1960 (delta 0), reused 0 (delta 0), pack-reused 1960 (from 1)[K
Receiving objects: 100% (1960/1960), 13.60 MiB | 42.99 MiB/s, done.
Resolving deltas: 100% (1173/1173), done.
/kaggle/working/LLaVA
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
albumentations 1.4.17 requires pydantic>=2.7.0, but you have pydantic 1.10.19 which is incompatible.
jupyterlab 4.2.5 requires httpx>=0.25.0, but you have httpx 0.24.0 which is incompatible.
jupyterlab 4.2.5 requires jupyter-lsp>=2.0.0, but you have jupyter-lsp 1.5.1 which is incompatible.
jupyterlab-lsp 5.1.0 requires jupyter-lsp>=2.0.0, but you have jupyter-lsp 1.5.1 which is incompatible.
kaggle-environments 1.14.15 requires transformers>=4.33.1, but you have transformers 4.31.0 which is incompatible.
pytorch-lightni

# **Using CLIP to get the top k artifacts using cosine similarities**

In [31]:
import os
import json
import pandas as pd
import torch
from transformers import CLIPProcessor, CLIPModel
from PIL import Image

# Load CSV data
csv_path = "/kaggle/input/artifact-interiit/Artifact_Description.csv"
data = pd.read_csv(csv_path, on_bad_lines="skip")

# Load CLIP model and processor
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

# Prepare artifact descriptions
artifact_texts = data['Artifact'] + ": " + data['Description']
inputs_text = processor(text=artifact_texts.tolist(), return_tensors="pt", padding=True)

# Extract text features
with torch.no_grad():
    text_features = model.get_text_features(**inputs_text)

# Function to extract image features
def get_image_features(image_path):
    # Load the image using Pillow
    image = Image.open(image_path).convert("RGB")
    # Process the image
    inputs_image = processor(images=image, return_tensors="pt")
    with torch.no_grad():
        image_features = model.get_image_features(**inputs_image)
    return image_features

# Compare image features with artifact text features and save results
def find_artifacts(image_path, k=5):
    image_features = get_image_features(image_path)
    similarities = torch.nn.functional.cosine_similarity(image_features, text_features)
    top_k_indices = similarities.topk(k).indices
    top_k_similarities = similarities[top_k_indices].cpu().numpy()
    results = data.iloc[top_k_indices].copy()
    results['Cosine_Similarity'] = top_k_similarities
    return results

# Process all images in a folder and save results to a JSON file
def process_folder(image_folder, output_json, k=5):
    results_dict = {}
    
    for image_name in os.listdir(image_folder):
        image_path = os.path.join(image_folder, image_name)
        
        if os.path.isfile(image_path):  # Ensure it's a file
            print(f"Processing: {image_name}")
            
            try:
                top_artifacts = find_artifacts(image_path, k=k)
                # Convert DataFrame to a dictionary format for JSON serialization
                results_dict[image_name] = top_artifacts.to_dict(orient='records')
            except Exception as e:
                print(f"Error processing {image_name}: {e}")
    
    # Save the results dictionary to a JSON file
    with open(output_json, 'w') as json_file:
        json.dump(results_dict, json_file, indent=4)
    print(f"Results saved to {output_json}")

# Define the folder path and output JSON file
image_folder = "/kaggle/input/cifake-test"  # Replace with your folder path
output_json = "/kaggle/working/top_artifacts.json"

# Run the folder processing
process_folder(image_folder, output_json, k=5)




Processing: 1018 (8).jpg
Processing: 1017 (6).jpg
Processing: 1018 (4).jpg
Processing: 1017 (7).jpg
Processing: 1018.jpg
Processing: 1019 (2).jpg
Processing: 1018 (10).jpg
Processing: 1019 (4).jpg
Processing: 1017 (9).jpg
Processing: 1017 (4).jpg
Processing: 1018 (9).jpg
Processing: 1018 (2).jpg
Processing: 1019 (5).jpg
Results saved to /kaggle/working/top_artifacts.json


# **Loading LLAVA into transformers**

In [8]:

model_path = "4bit/llava-v1.5-13b-3GB"
    
kwargs = {"device_map": "auto"}
kwargs['load_in_4bit'] = True
kwargs['quantization_config'] = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type='nf4'
)
model = LlavaLlamaForCausalLM.from_pretrained(model_path, low_cpu_mem_usage=True, **kwargs)
tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=False)

vision_tower = model.get_vision_tower()
if not vision_tower.is_loaded:
    vision_tower.load_model()
vision_tower.to(device='cuda')
image_processor = vision_tower.image_processor

import os
import requests
from PIL import Image
from io import BytesIO
from llava.conversation import conv_templates, SeparatorStyle
from llava.utils import disable_torch_init
from llava.constants import IMAGE_TOKEN_INDEX, DEFAULT_IMAGE_TOKEN, DEFAULT_IM_START_TOKEN, DEFAULT_IM_END_TOKEN
from llava.mm_utils import tokenizer_image_token, get_model_name_from_path, KeywordsStoppingCriteria
from transformers import TextStreamer





config.json:   0%|          | 0.00/1.18k [00:00<?, ?B/s]

pytorch_model.bin.index.json:   0%|          | 0.00/33.7k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/9 [00:00<?, ?it/s]

pytorch_model-00001-of-00009.bin:   0%|          | 0.00/2.97G [00:00<?, ?B/s]

pytorch_model-00002-of-00009.bin:   0%|          | 0.00/2.93G [00:00<?, ?B/s]

pytorch_model-00003-of-00009.bin:   0%|          | 0.00/2.89G [00:00<?, ?B/s]

pytorch_model-00004-of-00009.bin:   0%|          | 0.00/2.96G [00:00<?, ?B/s]

pytorch_model-00005-of-00009.bin:   0%|          | 0.00/2.89G [00:00<?, ?B/s]

pytorch_model-00006-of-00009.bin:   0%|          | 0.00/2.98G [00:00<?, ?B/s]

pytorch_model-00007-of-00009.bin:   0%|          | 0.00/2.87G [00:00<?, ?B/s]

pytorch_model-00008-of-00009.bin:   0%|          | 0.00/2.89G [00:00<?, ?B/s]

pytorch_model-00009-of-00009.bin:   0%|          | 0.00/2.72G [00:00<?, ?B/s]

config.json:   0%|          | 0.00/4.76k [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/9 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/154 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/749 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/438 [00:00<?, ?B/s]

preprocessor_config.json:   0%|          | 0.00/316 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.71G [00:00<?, ?B/s]

In [9]:
def caption_image(image_file, prompt):
    if image_file.startswith('http') or image_file.startswith('https'):
        response = requests.get(image_file)
        image = Image.open(BytesIO(response.content)).convert('RGB')
    else:
        image = Image.open(image_file).convert('RGB')
    disable_torch_init()
    conv_mode = "llava_v0"
    conv = conv_templates[conv_mode].copy()
    roles = conv.roles
    image_tensor = image_processor.preprocess(image, return_tensors='pt')['pixel_values'].half().cuda()
    inp = f"{roles[0]}: {prompt}"
    inp = DEFAULT_IM_START_TOKEN + DEFAULT_IMAGE_TOKEN + DEFAULT_IM_END_TOKEN + '\n' + inp
    conv.append_message(conv.roles[0], inp)
    conv.append_message(conv.roles[1], None)
    raw_prompt = conv.get_prompt()
    input_ids = tokenizer_image_token(raw_prompt, tokenizer, IMAGE_TOKEN_INDEX, return_tensors='pt').unsqueeze(0).cuda()
    stop_str = conv.sep if conv.sep_style != SeparatorStyle.TWO else conv.sep2
    keywords = [stop_str]
    stopping_criteria = KeywordsStoppingCriteria(keywords, tokenizer, input_ids)
    with torch.inference_mode():
      output_ids = model.generate(input_ids, images=image_tensor, do_sample=True, temperature=0.2,
                                  max_new_tokens=4096, use_cache=True, stopping_criteria=[stopping_criteria])
    outputs = tokenizer.decode(output_ids[0, input_ids.shape[1]:]).strip()
    conv.messages[-1][-1] = outputs
    output = outputs.rsplit('</s>', 1)[0]
    return image, output

# **Final descriptions of all Artifacts**

In [13]:
json_file_path = "/kaggle/working/top_artifacts.json"  # Path to your JSON file
with open(json_file_path, 'r') as file:
    ans = json.load(file)
for image_name, artifacts in ans.items():
    print(f"Image: {image_name}")
    for artifact in artifacts:
        _, artifact['Img_desc'] = caption_image(f'/kaggle/input/cifake-test/{image_name}', f"Explain why this image is AI generated because it has {artifact['Description']}. Also provide where in the image is this violation happening")
    print(f"{image_name} processed successfully!")


Image: 1018 (8).jpg
1018 (8).jpg processed successfully!
Image: 1017 (6).jpg
1017 (6).jpg processed successfully!
Image: 1018 (4).jpg
1018 (4).jpg processed successfully!
Image: 1017 (7).jpg
1017 (7).jpg processed successfully!
Image: 1018.jpg
1018.jpg processed successfully!
Image: 1019 (2).jpg
1019 (2).jpg processed successfully!
Image: 1018 (10).jpg
1018 (10).jpg processed successfully!
Image: 1019 (4).jpg
1019 (4).jpg processed successfully!
Image: 1017 (9).jpg
1017 (9).jpg processed successfully!
Image: 1017 (4).jpg
1017 (4).jpg processed successfully!
Image: 1018 (9).jpg
1018 (9).jpg processed successfully!
Image: 1018 (2).jpg
1018 (2).jpg processed successfully!
Image: 1019 (5).jpg
1019 (5).jpg processed successfully!


In [14]:
output_json = "/kaggle/working/top_artifacts_with_description.json"
with open(output_json, 'w') as json_file:
        json.dump(ans, json_file, indent=4)
print("JSON dumped successfully, task 2 pack")


JSON dumped successfully, task 2 pack
