In [None]:
!pip install transformers pillow torch pandas
#installing required libraries
!pip install onnxruntime oracledb pillow numpy

import os
import torch
import pandas as pd
from PIL import Image
from transformers import CLIPProcessor, CLIPModel

# Load CLIP
model_id = "/kaggle/input/openaiclip-vit-base-patch32/pytorch/default/1"
model = CLIPModel.from_pretrained(model_id)
processor = CLIPProcessor.from_pretrained(model_id)

# Path to dataset
dataset_path = "/kaggle/input/fingerprint-based-blood-group-detection/dataset"

data = []
labels = []

# Loop over dataset folders (A+, A-, AB+, ...)
for label in os.listdir(dataset_path):
    class_dir = os.path.join(dataset_path, label)
    if not os.path.isdir(class_dir):
        continue
    
    for fname in os.listdir(class_dir):
        fpath = os.path.join(class_dir, fname)
        try:
            img = Image.open(fpath).convert("RGB")
            inputs = processor(images=img, return_tensors="pt")

            with torch.no_grad():
                embedding = model.get_image_features(**inputs)
            
            embedding = embedding.squeeze().tolist()
            
            data.append(embedding)
            labels.append(label)

        except Exception as e:
            print("Error processing:", fpath, e)

# Save to CSV (label + embedding as list)
df = pd.DataFrame({"label": labels, "embedding": data})
df.to_csv("fingerprint_embeddings.csv", index=False)

print("✅ Saved embeddings to fingerprint_embeddings.csv")
