In [None]:
!pip install pennylane

Collecting pennylane
  Downloading PennyLane-0.39.0-py3-none-any.whl.metadata (9.2 kB)
Collecting rustworkx>=0.14.0 (from pennylane)
  Downloading rustworkx-0.15.1-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.9 kB)
Collecting appdirs (from pennylane)
  Downloading appdirs-1.4.4-py2.py3-none-any.whl.metadata (9.0 kB)
Collecting autoray>=0.6.11 (from pennylane)
  Downloading autoray-0.7.0-py3-none-any.whl.metadata (5.8 kB)
Collecting pennylane-lightning>=0.39 (from pennylane)
  Downloading PennyLane_Lightning-0.39.0-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (26 kB)
Downloading PennyLane-0.39.0-py3-none-any.whl (1.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m22.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading autoray-0.7.0-py3-none-any.whl (930 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m930.0/930.0 kB[0m [31m43.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading PennyLane_Lightning-0.39.0-cp310

In [None]:
pip install medmnist

Collecting medmnist
  Downloading medmnist-3.0.2-py3-none-any.whl.metadata (14 kB)
Collecting fire (from medmnist)
  Downloading fire-0.7.0.tar.gz (87 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/87.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m87.2/87.2 kB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Downloading medmnist-3.0.2-py3-none-any.whl (25 kB)
Building wheels for collected packages: fire
  Building wheel for fire (setup.py) ... [?25l[?25hdone
  Created wheel for fire: filename=fire-0.7.0-py3-none-any.whl size=114249 sha256=d1ea1ba0a0f5716d9ff7616ffa643a8f56dc184bc8ce55260fc812a9fc4b76b5
  Stored in directory: /root/.cache/pip/wheels/19/39/2f/2d3cadc408a8804103f1c34ddd4b9f6a93497b11fa96fe738e
Successfully built fire
Installing collected packages: fire, medmnist
Successfully installed fire-0.7.0 medmnist-3.0.2


In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset, random_split
from torchvision import transforms, models
from transformers import CLIPProcessor, CLIPModel
import pennylane as qml
import numpy as np
import copy
from medmnist import INFO
import medmnist
from sklearn.metrics import f1_score, accuracy_score, precision_score
from sklearn.metrics.pairwise import cosine_similarity

torch.manual_seed(42)
np.random.seed(42)

# Quantum Circuit Configuration
n_qubits = 4
q_depth = 4
dev = qml.device("default.qubit", wires=n_qubits)

@qml.qnode(dev, interface="torch")
def qnode(inputs, weights):
    weights = weights.view(q_depth, 2 * n_qubits)
    for layer in range(q_depth):
        for i in range(n_qubits):
            qml.RY(weights[layer, i], wires=i)
        for i in range(n_qubits):
            qml.CRX(weights[layer, n_qubits + i], wires=[i, (i + 1) % n_qubits])
    return [qml.expval(qml.PauliZ(wires=i)) for i in range(n_qubits)]

# Custom QuantumCLIP Model
class QuantumCLIP(nn.Module):
    def __init__(self, latent_dim=128, n_qubits=4, q_depth=4):
        super(QuantumCLIP, self).__init__()
        self.text_encoder = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
        self.text_projection = nn.Linear(self.text_encoder.config.text_config.hidden_size, latent_dim, bias=True)

        resnet = models.resnet18(pretrained=True)
        self.image_encoder = nn.Sequential(
            *list(resnet.children())[:-2],
            nn.AdaptiveAvgPool2d((1, 1))
        )
        self.pre_net = nn.Linear(512, n_qubits)
        self.pre_norm = nn.LayerNorm(n_qubits)
        self.q_params = nn.Parameter(torch.randn(q_depth, 2 * n_qubits) * 0.01)
        self.post_net = nn.Linear(n_qubits, latent_dim, bias=True)
        self.skip_projection = nn.Linear(n_qubits, latent_dim, bias=True)

    def forward(self, text_inputs, images):
        text_embeddings = self.text_encoder.get_text_features(**text_inputs)
        text_latent = self.text_projection(text_embeddings)

        image_features = self.image_encoder(images)
        image_features = image_features.view(image_features.size(0), -1)

        pre_out = self.pre_net(image_features)
        pre_out = self.pre_norm(pre_out)
        q_in = torch.tanh(pre_out) * np.pi

        q_out = torch.stack([torch.tensor(qnode(q_in[i], self.q_params)).float() for i in range(len(q_in))]).to(device)
        skip_out = self.skip_projection(pre_out)
        image_latent = self.post_net(q_out) + skip_out

        return text_latent, image_latent

# MedMNIST Dataset Loader
class MedMNISTDataset(Dataset):
    def __init__(self, split, transform=None):
        info = INFO["pathmnist"]
        self.data_flag = "pathmnist"
        self.task = info["task"]
        self.n_channels = info["n_channels"]
        self.n_classes = len(info["label"])
        self.class_names = {int(k): v for k, v in info["label"].items()}

        DataClass = getattr(medmnist, info["python_class"])
        self.data = DataClass(split=split, download=True)
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img, label = self.data[idx]
        if isinstance(label, np.ndarray):
            label = label.item()
        if self.transform:
            img = self.transform(img)
        text = f"This is a {self.class_names[label]}."
        return img, text, label

from sklearn.metrics import f1_score

def train_model(model, dataloaders, dataset_sizes, criterion, optimizer, scheduler, num_epochs):
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print(f"Epoch {epoch + 1}/{num_epochs}")
        print("-" * 10)

        for phase in ["train", "validation"]:
            model.train() if phase == "train" else model.eval()

            total_loss = 0.0
            total_corrects = 0
            total_samples = 0
            all_preds = []
            all_labels = []

            for images, texts, labels in dataloaders[phase]:
                images, labels = images.to(device), labels.to(device)
                text_inputs = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")(
                    text=texts, return_tensors="pt", padding=True, truncation=True
                )
                text_inputs = {k: v.to(device) for k, v in text_inputs.items()}

                optimizer.zero_grad()
                with torch.set_grad_enabled(phase == "train"):
                    text_latent, image_latent = model(text_inputs, images)
                    logits = torch.matmul(F.normalize(text_latent), F.normalize(image_latent).T)
                    targets = torch.arange(len(logits)).to(device)
                    loss = criterion(logits, targets)

                    _, preds = torch.max(logits, 1)
                    if phase == "train":
                        loss.backward()
                        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=5.0)
                        optimizer.step()


                total_loss += loss.item() * images.size(0)  # Scale loss by batch size
                total_corrects += torch.sum(preds == labels.data).item()
                total_samples += labels.size(0)


                all_preds.extend(preds.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())


            epoch_loss = total_loss / total_samples
            epoch_acc = total_corrects / total_samples
            epoch_f1 = f1_score(all_labels, all_preds, average="weighted")

            print(f"{phase.capitalize()} Loss: {epoch_loss:.4f}, Accuracy: {epoch_acc:.4f}, F1-Score: {epoch_f1:.4f}")


            if phase == "validation" and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

            if phase == "train":
                scheduler.step()


    model.load_state_dict(best_model_wts)
    return model

# Evaluation Function
def evaluate_model(model, dataloader):
    model.eval()
    all_preds = []
    all_labels = []
    all_cosine_sims = []

    with torch.no_grad():
        for images, texts, labels in dataloader:
            images, labels = images.to(device), labels.to(device)
            text_inputs = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")(
                text=texts, return_tensors="pt", padding=True, truncation=True
            )
            text_inputs = {k: v.to(device) for k, v in text_inputs.items()}

            text_latent, image_latent = model(text_inputs, images)


            cosine_sim = F.cosine_similarity(text_latent, image_latent, dim=1)
            all_cosine_sims.extend(cosine_sim.cpu().numpy())

            logits = torch.matmul(F.normalize(text_latent), F.normalize(image_latent).T)
            _, preds = torch.max(logits, 1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    avg_cosine_sim = np.mean(all_cosine_sims)
    avg_accuracy = accuracy_score(all_labels, all_preds)
    avg_f1 = f1_score(all_labels, all_preds, average="weighted")
    avg_precision = precision_score(all_labels, all_preds, average="weighted")

    print(f"Average Cosine Similarity: {avg_cosine_sim:.4f}")
    print(f"Average Accuracy: {avg_accuracy:.4f}")
    print(f"Average F1-Score: {avg_f1:.4f}")
    print(f"Average Precision: {avg_precision:.4f}")

    return avg_accuracy, avg_f1, avg_precision

# Main Script
if __name__ == "__main__":
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])

    train_dataset = MedMNISTDataset(split="train", transform=transform)
    test_dataset = MedMNISTDataset(split="test", transform=transform)

    train_subset, _ = random_split(train_dataset, [500, len(train_dataset) - 500])
    test_subset, _ = random_split(test_dataset, [100, len(test_dataset) - 100])

    dataset_sizes = {"train": len(train_subset), "validation": len(test_subset)}

    dataloaders = {
        "train": DataLoader(train_subset, batch_size=16, shuffle=True),
        "validation": DataLoader(test_subset, batch_size=16, shuffle=False)
    }

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = QuantumCLIP(latent_dim=128, n_qubits=n_qubits, q_depth=q_depth).to(device)

    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4, weight_decay=1e-4)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=5, T_mult=2)

    model = train_model(model, dataloaders, dataset_sizes, criterion, optimizer, scheduler, num_epochs=5)

    avg_accuracy, avg_f1, avg_precision = evaluate_model(model, dataloaders["validation"])

Using downloaded and verified file: /root/.medmnist/pathmnist.npz
Using downloaded and verified file: /root/.medmnist/pathmnist.npz




Epoch 1/5
----------
Train Loss: 2.4875, Accuracy: 0.0880, F1-Score: 0.1095
Validation Loss: 2.3613, Accuracy: 0.0600, F1-Score: 0.0834
Epoch 2/5
----------
Train Loss: 2.3331, Accuracy: 0.0420, F1-Score: 0.0540
Validation Loss: 2.3034, Accuracy: 0.0100, F1-Score: 0.0043
Epoch 3/5
----------
Train Loss: 2.2739, Accuracy: 0.0620, F1-Score: 0.0744
Validation Loss: 2.2956, Accuracy: 0.0300, F1-Score: 0.0310
Epoch 4/5
----------
Train Loss: 2.2627, Accuracy: 0.0960, F1-Score: 0.1215
Validation Loss: 2.2927, Accuracy: 0.0300, F1-Score: 0.0310
Epoch 5/5
----------
Train Loss: 2.2547, Accuracy: 0.0500, F1-Score: 0.0599
Validation Loss: 2.2783, Accuracy: 0.0400, F1-Score: 0.0366
Average Cosine Similarity: 0.4810
Average Accuracy: 0.0600
Average F1-Score: 0.0834
Average Precision: 0.1655
