### Libraries and Device

In [None]:
import warnings
warnings.filterwarnings("ignore")

import torch
from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor
import traceback
import os
from tqdm import tqdm
import pandas as pd

from util import preprocess_image

device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"[INFO] Using device: {device}")

### Qwen model

In [None]:
qwen_model_name = "Qwen/Qwen2.5-VL-3B-Instruct"
qwen_processor_folder = os.path.join("..", "assets", "qwen_processor")

print("[INFO] Loading Qwen processor...")
qwen_processor = AutoProcessor.from_pretrained(qwen_processor_folder, trust_remote_code=True, use_fast=True, verbose=False)
print("[INFO] Qwen processor loaded.\n")

print("[INFO] Loading Qwen model...")
qwen_model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
    qwen_model_name,
    torch_dtype=torch.float16,
)
qwen_model = qwen_model.visual
qwen_model.to(device)
print("[INFO] Qwen model loaded.")

## Extract image's embedding with Qwen

In [None]:
def get_qwen_embedding(image_path, img_size=384):
    """
    Extract embedding from an image using the Qwen model.

    Args:
        image_path (str): Path to the input image.
        img_size (int): Size to which the image will be resized (default is 384).

    Returns:
        torch.Tensor: Extracted embedding tensor, or None if extraction fails.
    """

    # Check if the model and processor are loaded
    if not qwen_model or not qwen_processor:
        print("[ERROR] Qwen model or processor not loaded.")
        return None

    # Initialize variables
    image = None
    inputs_payload_cpu = None 
    pixel_values = None
    image_grid_thw = None
    model_output_tensor = None
    embedding_cpu = None

    # Load and preprocess the image
    try:
        image = preprocess_image(image_path, img_size)
    except FileNotFoundError:
        print(f"[ERROR] File {image_path} not found.")
        return None
    except Exception as e:
        print(f"[ERROR] Unable to open image {image_path}: {e}")
        return None

    try:
        # Prepare the input for the model
        inputs_payload_cpu = qwen_processor.image_processor(images=[image], return_tensors="pt")
        del image

        pixel_values = inputs_payload_cpu['pixel_values'].to(device)
        image_grid_thw = inputs_payload_cpu['image_grid_thw'].to(device)
        del inputs_payload_cpu

        # Extract embedding using the Qwen model
        with torch.no_grad():
            model_output_tensor = qwen_model(pixel_values, grid_thw=image_grid_thw)

            if model_output_tensor is not None and model_output_tensor.nelement() > 0:
                embedding_cpu = model_output_tensor.cpu()
            else:
                print(f"[ERROR] No output or empty output from model for image {image_path}.")
                embedding_cpu = None

    except RuntimeError as e:
        print(f"[ERROR] Runtime error during extraction for {image_path}: {e}")
        traceback.print_exc()
        embedding_cpu = None
        
    except Exception as e:
        print(f"[ERROR] Unexpected error during extraction for {image_path}: {e}")
        traceback.print_exc()
        embedding_cpu = None

    finally:
        # Free up memory
        del pixel_values
        del image_grid_thw
        del model_output_tensor
        if torch.cuda.is_available():
            torch.cuda.empty_cache()

    return embedding_cpu

## Extract embeddings from ImageNet's images

In [None]:
# Images settings
img_size = 384

# Paths
images_folder = os.path.join("..", "..", "CV_data", "miniImageNet")
save_path = os.path.join("..", "..", "CV_data", "separated_embeddings", "qwen_384")
train_images_csv = os.path.join("..", "assets", "train_images.csv")

# Extract list of image names from CSV
df = pd.read_csv(train_images_csv)
image_names = df['filename'].tolist()

# Loop through images and extract embeddings
loop = tqdm(image_names, desc="[LOOP] Processing images", unit="image")
for file_name in loop:
    image_path = os.path.join(images_folder, file_name)
    embedding = get_qwen_embedding(image_path, img_size=img_size)

    # Save embedding if successfully obtained
    if embedding is not None:
        torch.save(embedding, os.path.join(save_path, file_name.split(".")[0] + ".pt"))
    else:
        print(f"[ERROR] Failed to get embedding for {file_name}, skipping.")

    # Free up memory after processing each image deleting the embedding
    del embedding
    if torch.cuda.is_available():
        torch.cuda.empty_cache()