In [1]:
import numpy as np
import torch
from transformers import CLIPModel, CLIPProcessor
from PIL import Image
from pathlib import Path
import shutil

class ChartSegregator:
    def __init__(self, model_name="openai/clip-vit-base-patch32"):
        print("Loading CLIP model...")
        self.model = CLIPModel.from_pretrained(model_name)
        self.processor = CLIPProcessor.from_pretrained(model_name)
        self.model.eval()
        self.labels = ["pie chart", "bar chart", "line chart", "table"]
        self.target_folders = ["pie_chart", "bar_chart", "line_chart", "table"]

    def segregate(self, image_folder):
        image_folder = Path(image_folder)
        # Create subfolders if not exist
        for folder in self.target_folders:
            (image_folder / folder).mkdir(exist_ok=True)
        
        # Gather all image paths
        image_paths = []
        for ext in ['*.jpg', '*.jpeg', '*.png']:
            image_paths.extend(image_folder.glob(ext))
        print(f"Found {len(image_paths)} images.")

        # Precompute label embeddings
        with torch.no_grad():
            text_inputs = self.processor(text=self.labels, return_tensors="pt", padding=True)
            text_embs = self.model.get_text_features(**text_inputs)
            text_embs = text_embs / text_embs.norm(dim=-1, keepdim=True)

        # Process and segregate images
        for path in image_paths:
            try:
                img = Image.open(path).convert("RGB")
                inputs = self.processor(images=img, return_tensors="pt")
                with torch.no_grad():
                    img_emb = self.model.get_image_features(**inputs)
                    img_emb = img_emb / img_emb.norm(dim=-1, keepdim=True)
                # Compute similarities
                sims = (img_emb @ text_embs.T).cpu().numpy().flatten()
                pred_idx = int(np.argmax(sims))
                target_folder = self.target_folders[pred_idx]
                print(f"{path.name}: {self.labels[pred_idx]} (sim={sims[pred_idx]:.2f})")
                # Move file
                shutil.move(str(path), str(image_folder / target_folder / path.name))
            except Exception as e:
                print(f"Error processing {path.name}: {e}")

if __name__ == "__main__":
    input_folder = "data/images"  # change as needed
    engine = ChartSegregator()
    engine.segregate(input_folder)


Loading CLIP model...


Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


Fetching 1 files:   0%|          | 0/1 [00:00<?, ?it/s]

FileNotFoundError: [WinError 3] The system cannot find the path specified: 'data\\images\\pie_chart'