# Recursos computacionais da GAN MOSAIQ

Este notebook estima o custo computacional do modelo MOSAIQ aplicado ao BreastMNIST. A metodologia replica o estudo feito para a GAN quântica, medindo tempo de treinamento por classe, quantidade de parâmetros do gerador e tempo médio de inferência de uma amostra sintética.


In [1]:
import time
from statistics import mean

import medmnist
from medmnist import INFO
import pandas as pd
import torch
from torch.utils.data import Subset
from torchvision import transforms

from medmnist_data import load_medmnist_data
from quantum_gan_medmnist import (
    MosaiqDiscriminator,
    MosaiqQuantumGenerator,
    create_mosaiq_pca_loaders,
    train_mosaiq_gan,
)


In [2]:
DATA_FLAG = "breastmnist"
BATCH_SIZE = 128
NUM_EPOCHS = 50

TARGET_IMG_SIZE = 8
N_GENERATORS = 8
N_QUBITS = 5
Q_DEPTH = 6
PCA_DIMS = N_GENERATORS * N_QUBITS

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device_str = "cuda" if torch.cuda.is_available() else "cpu"

transform_lowres = transforms.Compose([
    transforms.Resize((TARGET_IMG_SIZE, TARGET_IMG_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5], std=[0.5]),
])

transform_highres = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5], std=[0.5]),
])

bundle = load_medmnist_data(
    data_flag=DATA_FLAG,
    batch_size=BATCH_SIZE,
    download=True,
    transform=transform_lowres,
    shuffle_train=True,
)

train_dataset = bundle.train_dataset
label_names = {int(k): v for k, v in bundle.label_names.items()}
label_ids = sorted(label_names.keys())

dataset_class = getattr(medmnist, INFO[DATA_FLAG]["python_class"])
train_dataset_highres = dataset_class(
    split="train",
    transform=transform_highres,
    download=True,
)

train_loaders, tensor_pca, labels, pca_model = create_mosaiq_pca_loaders(
    train_dataset_highres,
    batch_size=BATCH_SIZE,
    target_size=TARGET_IMG_SIZE,
    pca_dims=PCA_DIMS,
)


Using downloaded and verified file: /home/mahlow/.medmnist/breastmnist.npz
Using downloaded and verified file: /home/mahlow/.medmnist/breastmnist.npz
Using downloaded and verified file: /home/mahlow/.medmnist/breastmnist.npz


  return torch._C._cuda_getDeviceCount() > 0


In [3]:
def subset_by_label(dataset, label):
    indices = [i for i in range(len(dataset)) if int(dataset.labels[i]) == label]
    return Subset(dataset, indices)


def count_parameters(model):
    return sum(param.numel() for param in model.parameters())


def measure_inference_time(generator, *, latent_dim, num_runs=32):
    generator.eval()
    sync = torch.cuda.synchronize if torch.cuda.is_available() else (lambda: None)
    with torch.no_grad():
        sync()
        start = time.time()
        for _ in range(num_runs):
            noise = torch.rand(1, latent_dim, device=device) * (torch.pi / 2)
            generator(noise)
        sync()
    return (time.time() - start) / num_runs


def measure_inference_time_per_label(generators):
    tempos = []
    rows = []
    for label, generator in generators.items():
        tempo = measure_inference_time(generator, latent_dim=N_QUBITS)
        tempos.append(tempo)
        rows.append(
            {
                "Label_ID": label,
                "Label_Nome": label_names[label],
                "Tempo_inferência_img_seg": tempo,
            }
        )
    return (mean(tempos) if tempos else float("nan")), rows


In [4]:
def run_mosaiq_gan():
    generators = {}
    per_label_training = []
    start_total = time.time()

    for label in label_ids:
        generator = MosaiqQuantumGenerator(
            N_GENERATORS,
            N_QUBITS,
            Q_DEPTH,
        )
        discriminator = MosaiqDiscriminator(input_dim=PCA_DIMS)

        start_label = time.time()
        train_mosaiq_gan(
            train_loaders[label],
            generator,
            discriminator,
            epochs=NUM_EPOCHS,
            device=device_str,
        )
        elapsed_label = time.time() - start_label

        per_label_training.append(
            {
                "Label_ID": label,
                "Label_Nome": label_names[label],
                "Tempo_treinamento_classe_seg": elapsed_label,
            }
        )
        generators[label] = generator.eval()

    total_time = time.time() - start_total
    avg_inference, per_label_inference = measure_inference_time_per_label(generators)

    summary = {
        "GAN": "MOSAIQ-GAN",
        "Tempo_treinamento_seg": total_time,
        "Parametros_Gerador": count_parameters(next(iter(generators.values()))),
        "Tempo_inferência_img_seg": avg_inference,
    }

    return summary, per_label_training, per_label_inference


In [None]:
summary, per_label_training, per_label_inference = run_mosaiq_gan()

df_summary = pd.DataFrame([summary])
df_summary["Tempo_treinamento_min"] = df_summary["Tempo_treinamento_seg"] / 60
df_summary["Tempo_inferência_img_ms"] = df_summary["Tempo_inferência_img_seg"] * 1_000
df_summary


In [None]:
df_treinamento = pd.DataFrame(per_label_training)
df_treinamento["Tempo_treinamento_classe_min"] = (
    df_treinamento["Tempo_treinamento_classe_seg"] / 60
)
df_treinamento


In [None]:
df_inferencia = pd.DataFrame(per_label_inference)
df_inferencia["Tempo_inferência_img_ms"] = (
    df_inferencia["Tempo_inferência_img_seg"] * 1_000
)
df_inferencia
