In [22]:
import pandas as pd
from PIL import Image
from io import BytesIO
from transformers import CLIPProcessor, CLIPModel
import torch
from transformers import pipeline
from langchain.prompts import ChatPromptTemplate
from langchain_community.chat_models import ChatOllama
from langchain_core.runnables import RunnablePassthrough

In [23]:
df = pd.read_parquet("train-00000-of-00330.parquet")

In [24]:
device = "cuda" if torch.cuda.is_available() else "cpu"
clip_model = CLIPModel.from_pretrained("openai/clip-vit-large-patch14").to(device)
processor = CLIPProcessor.from_pretrained("openai/clip-vit-large-patch14")

In [25]:
#del model
#torch.cuda.empty_cache()

In [26]:
prompts = ["A photo taken inside a building", "A photo taken outside in nature"]

In [27]:
def classify_image(row):
    try:
        # Skip unsupported image types
        if not str(row['mime_type']).startswith(("image/jpeg", "image/png")):
            return {"clip_label": None, "clip_confidence": 0.0}
        
        # Extract raw image bytes correctly
        img_bytes = row['image']['bytes']
        img = Image.open(BytesIO(img_bytes)).convert("RGB")

        # Run through CLIP
        inputs = processor(images=img, text=prompts, return_tensors="pt").to(device)
        with torch.no_grad():
            outputs = clip_model(**inputs)
            probs = outputs.logits_per_image.softmax(dim=1).squeeze().cpu().tolist()

        return {
            "clip_label": "inside" if probs[0] > probs[1] else "outside",
            "clip_confidence": max(probs)
        }

    except Exception as e:
        print(f"Error processing row: {e}")
        return {"clip_label": None, "clip_confidence": 0.0}

In [28]:
df_sample = df.head(20).copy()
results = df_sample.apply(classify_image, axis=1, result_type="expand")
df_sample = pd.concat([df_sample, results], axis=1)

In [29]:
del clip_model
torch.cuda.empty_cache()

In [10]:
df_sample_llama = df.head(20).copy()

In [11]:
caption_template = ChatPromptTemplate.from_template(
    """Classify the following image caption as either "inside" or "outside".
Caption: {caption}
Answer:"""
)

In [13]:
llama_model = ChatOllama(model="llama3.1:8b")

caption_chain = (
    {"caption": RunnablePassthrough()}
    | caption_template
    | llama_model
)

In [14]:
def classify_caption_lc(row):
    try:
        caption = row["caption_attribution_description"]
        if not caption or not isinstance(caption, str):
            return {"llama_label": None}

        # Fix: wrap input in a dict for chain
        result = caption_chain.invoke({"caption": caption}).content.strip().lower()

        if "inside" in result:
            return {"llama_label": "inside"}
        elif "outside" in result:
            return {"llama_label": "outside"}
        else:
            return {"llama_label": None}

    except Exception as e:
        print("Error:", e)
        return {"llama_label": None}

In [15]:
llama_results = df_sample.apply(classify_caption_lc, axis=1, result_type="expand")
#print(llama_results)
df_sample = pd.concat([df_sample, llama_results], axis=1)

In [21]:
del llama_model
torch.cuda.empty_cache()

In [16]:
df_sample

Unnamed: 0,image,image_url,embedding,metadata_url,original_height,original_width,mime_type,caption_attribution_description,wit_features,clip_label,clip_confidence,llama_label
0,{'bytes': b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x...,https://upload.wikimedia.org/wikipedia/commons...,"[1.4784087, 2.8710432, 0.0, 0.51603067, 1.3256...",http://commons.wikimedia.org/wiki/File:Scolope...,3000.0,4000.0,image/jpeg,"English: Puerto Rican Giant Centipede, Scolope...","{'language': ['ro', 'vi', 'sk', 'zh-TW', 'ar',...",outside,0.994741,outside
1,{'bytes': b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x...,https://upload.wikimedia.org/wikipedia/commons...,"[4.219804, 7.780965, 1.8214827, 1.2291162, 0.0...",http://commons.wikimedia.org/wiki/File:Moulay-...,800.0,1200.0,image/jpeg,English: Moulay Ali Cherif Mausoleum in Rissan...,"{'language': ['ar', 'fr', 'it', 'pt'], 'page_u...",outside,0.640316,outside
2,{'bytes': b'\xff\xd8\xff\xdb\x00C\x00\x04\x03\...,https://upload.wikimedia.org/wikipedia/commons...,"[3.2203984, 7.894166, 0.0, 3.9915848, 4.106125...",http://commons.wikimedia.org/wiki/File:Lord_Wa...,462.0,692.0,image/svg+xml,,"{'language': ['uk', 'fr', 'fr', 'iw', 'en', 'd...",,0.0,
3,{'bytes': b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x...,https://upload.wikimedia.org/wikipedia/commons...,"[15.76484, 1.4144201, 17.731287, 2.4145026, 1....",http://commons.wikimedia.org/wiki/File:Teucriu...,1704.0,2272.0,image/jpeg,"English: Teucrium corymbosum, Marysville, Vict...","{'language': ['en', 'vi', 'vi', 'de', 'pt'], '...",outside,0.999666,outside
4,{'bytes': b'\xff\xd8\xff\xdb\x00C\x00\x04\x03\...,https://upload.wikimedia.org/wikipedia/commons...,"[3.790454, 9.945694, 0.31239522, 0.0, 0.856851...",http://commons.wikimedia.org/wiki/File:Tropica...,133.0,129.0,image/jpeg,Tropical Storm Fausto at high latitude in the ...,"{'language': ['es'], 'page_url': ['https://es....",outside,0.997371,outside
5,{'bytes': b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x...,https://upload.wikimedia.org/wikipedia/commons...,"[0.0, 0.1094743, 0.5533953, 1.9391636, 1.91426...",http://commons.wikimedia.org/wiki/File:Bers%C3...,4099.0,3548.0,image/jpeg,Bersée Nord Nord-Pas-de-Calais-Picardie France.,"{'language': ['fr'], 'page_url': ['https://fr....",outside,0.999879,outside
6,{'bytes': b'\xff\xd8\xff\xdb\x00C\x00\x04\x03\...,https://upload.wikimedia.org/wikipedia/commons...,"[3.624626, 6.1326885, 4.302157, 0.28337502, 12...",http://commons.wikimedia.org/wiki/File:PortaRo...,4096.0,3072.0,image/jpeg,English: Igreja do Rosário,"{'language': ['pt'], 'page_url': ['https://pt....",inside,0.841995,inside
7,{'bytes': b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x...,https://upload.wikimedia.org/wikipedia/commons...,"[1.5762159, 2.586473, 0.6373471, 1.7306752, 4....",http://commons.wikimedia.org/wiki/File:Blaine_...,4256.0,2832.0,image/jpeg,"English: Blaine Luetkemeyer, member of the Uni...","{'language': ['hu', 'hu', 'it', 'en', 'sv', 'e...",outside,0.708823,inside
8,{'bytes': b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x...,https://upload.wikimedia.org/wikipedia/commons...,"[4.413841, 11.25443, 1.8598346, 0.52995455, 8....",http://commons.wikimedia.org/wiki/File:Jastrow...,650.0,1143.0,image/jpeg,Deutsch: Optische Täuschung: Jastrow-Illusion,"{'language': ['en', 'ru', 'de', 'ca', 'cs'], '...",outside,0.892977,outside
9,{'bytes': b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x...,https://upload.wikimedia.org/wikipedia/commons...,"[5.164383, 9.40226, 0.7800921, 6.873645, 6.908...",http://commons.wikimedia.org/wiki/File:ZP_05_K...,300.0,387.0,image/jpeg,"English: Konkalisahak, wayang kulit figure fro...","{'language': ['de'], 'page_url': ['https://de....",outside,0.936048,inside


In [19]:
df_sample["image_url"][6]

'https://upload.wikimedia.org/wikipedia/commons/d/d4/PortaRosario.jpg'