In [None]:
import os
import torch
import pandas as pd
import numpy as np
import timm
from torchvision import transforms
from PIL import Image
from tqdm import tqdm

In [None]:


# Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Load pretrained Swin Transformer model
model = timm.create_model('swin_base_patch4_window7_224', pretrained=True)
model.head = torch.nn.Identity()  # Remove classification head
model.to(device)
model.eval()

# Image transformation
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5]*3, std=[0.5]*3)
])

# Dataset path
main_dir = "/home/stud1/Desktop/PIL_MAIN/Leaf Dataset"
subfolders = ["365NoUV", "395NoUV", "WhiteNoUV", "365UV", "395UV", "WhiteUV"]

# Load labels
labels_csv_path = os.path.join(main_dir, "/home/stud1/Desktop/PIL_MAIN/Leaf Dataset/labels.csv")
labels_df = pd.read_csv(labels_csv_path)
filename_to_label = dict(zip(labels_df["filename"].astype(str), labels_df["label"]))

# Output directory for embeddings
embedding_dir = "embeddingssss"
os.makedirs(embedding_dir, exist_ok=True)

# Iterate over each modality (folder)
for folder in subfolders:
    print(f"\n📁 Processing: {folder}")
    folder_path = os.path.join(main_dir, folder)

    embeddings = []
    labels = []
    file_ids = []

    for file in tqdm(sorted(os.listdir(folder_path))):
        if not file.lower().endswith(('.jpg', '.jpeg', '.png')):
            continue

        file_id = os.path.splitext(file)[0]  # e.g., "0"
        if file_id not in filename_to_label:
            continue

        img_path = os.path.join(folder_path, file)

        try:
            image = Image.open(img_path).convert("RGB")
        except Exception as e:
            print(f"❌ Failed to open {img_path}: {e}")
            continue

        image_tensor = transform(image).unsqueeze(0).to(device)

        with torch.no_grad():
            embedding = model(image_tensor).squeeze().flatten().cpu().numpy()

        embeddings.append(embedding)
        labels.append(filename_to_label[file_id])
        file_ids.append(file_id)

    # Save folder-specific files
    base_path = os.path.join(embedding_dir, folder.replace(" ", "_"))
    np.save(f"{base_path}_embeddings.npy", np.array(embeddings))
    np.save(f"{base_path}_labels.npy", np.array(labels))
    np.save(f"{base_path}_file_ids.npy", np.array(file_ids))

    print(f"✅ Saved: {folder} → embeddings, labels, file_ids")

print("\n🎉 Done! All modality-wise embeddings saved.")
