In [1]:
import os
import time
import json
import torch
from PIL import Image
from transformers import AutoModel, AutoTokenizer

# Define paths
DATASET_PATH = r"C:\Users\Patrick\Documents\thesis\Dataset\OwnDataSet"
RESULTS_PATH = r"C:\Users\Patrick\Documents\thesis\Dataset\Results"

def load_image(image_file):
    return Image.open(image_file).convert('RGB')

def process_images(model, tokenizer, dataset_path):
    results = []
    for filename in os.listdir(dataset_path):
        if filename.endswith((".png", ".jpg", ".jpeg")):
            img_path = os.path.join(dataset_path, filename)
            
            start_time = time.time()
            
            # Load image
            image = load_image(img_path)
            
            # Generate description
            question = "Please describe the image shortly Maximum 150 characters."
            msgs = [{"role": "user", "content": question}]
            params = {
                "sampling": True,
                "top_p": 0.8,
                "top_k": 100,
                "temperature": 0.7,
                "repetition_penalty": 1.05,
                "max_new_tokens": 35
            }
            
            response, _, _ = model.chat(image=image, msgs=msgs, context=None, tokenizer=tokenizer, **params)
            
            end_time = time.time()
            
            # Calculate metrics
            processing_time = end_time - start_time
            output_tokens = len(tokenizer.encode(response))
            
            results.append({
                "filename": filename,
                "processing_time": processing_time,
                "output_tokens": output_tokens,
                "alternative_text": response
            })
            
            print(f"Processed {filename}")
    
    return results

def save_results(results, output_path):
    os.makedirs(output_path, exist_ok=True)
    output_file = os.path.join(output_path, "MiniCPM-V2_analysis_results.json")
    with open(output_file, "w") as f:
        json.dump(results, f, indent=2)
    print(f"Results saved to {output_file}")

def main():
    # Load model and tokenizer
    model_path = 'openbmb/MiniCPM-V-2'
    model = AutoModel.from_pretrained(model_path, trust_remote_code=True).to(dtype=torch.bfloat16)
    tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = model.to(device=device)
    model.eval()

    # Process images
    results = process_images(model, tokenizer, DATASET_PATH)

    # Print summary
    total_time = sum(r["processing_time"] for r in results)
    total_output_tokens = sum(r["output_tokens"] for r in results)
    num_images = len(results)
    
    print(f"Processed {num_images} images")
    print(f"Total processing time: {total_time:.2f} seconds")
    print(f"Average time per image: {total_time/num_images:.2f} seconds")
    print(f"Total output tokens: {total_output_tokens}")
    print(f"Average output tokens per image: {total_output_tokens/num_images:.2f}")

    # Save results
    save_results(results, RESULTS_PATH)

if __name__ == "__main__":
    main()

  from .autonotebook import tqdm as notebook_tqdm
Loading checkpoint shards: 100%|██████████| 2/2 [00:02<00:00,  1.45s/it]


Processed 01.jpg
Processed 02.jpg
Processed 03.jpg
Processed 04.jpg
Processed 05.jpg
Processed 06.png
Processed 07.png
Processed 08.png
Processed 09.png
Processed 10.png
Processed 11.png
Processed 12.png
Processed 13.png
Processed 14.jpg
Processed 15.png
Processed 15 images
Total processing time: 55.46 seconds
Average time per image: 3.70 seconds
Total output tokens: 486
Average output tokens per image: 32.40
Results saved to C:\Users\Patrick\Documents\thesis\Dataset\Results\MiniCPM-V2_analysis_results.json
