# Experimentos: Baseline com QSVM

Este notebook estabelece baselines de performance com QSVM em datasets

Usando Seleção de features.


# Dataset = Breast Cancer Wisconsin (Diagnostic)

In [None]:
from sklearn.datasets import load_breast_cancer 
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report
import numpy as np
import pandas as pd


from qiskit.circuit.library import ZZFeatureMap
from qiskit.primitives import StatevectorSampler    
from qiskit_machine_learning.kernels import FidelityQuantumKernel  
from qiskit_machine_learning.algorithms import QSVC
import time

## Loading data and initial analysis

In [17]:
# 2. loading data
X, y = load_breast_cancer(return_X_y=True)
feature_names = load_breast_cancer().feature_names
df_X = pd.DataFrame(X, columns=feature_names)

print(f"Dataset carregado com {X.shape[0]} amostras e {X.shape[1]} features.")

Dataset carregado com 569 amostras e 30 features.


## Split data into training and testing sets

In [18]:
X_train_full, X_test_full, y_train, y_test = train_test_split(
    df_X, y, test_size=0.25, random_state=42, stratify=y
)
print("Dados divididos em treino e teste.")

Dados divididos em treino e teste.


## Applying feature selection by correlation (30 -> 5 features)

In [19]:
# Seleção de features com base em correlação (aplicada somente em treino)
train_df = X_train_full.copy()
train_df["diagnosis"] = y_train

# Correlação das features com a variável alvo
correlation = train_df.corr(numeric_only=True)["diagnosis"].abs().sort_values(ascending=False)
top_features = correlation[1:6].index.tolist()  

print("Top 5 features selecionadas:", top_features)

# Aplicando seleção nas bases de treino e teste
X_train = X_train_full[top_features]
X_test = X_test_full[top_features]



Top 5 features selecionadas: ['worst concave points', 'mean concave points', 'worst perimeter', 'worst radius', 'mean perimeter']


## Normalize data

In [20]:
# normalizing data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
print("Dados normalizados.")





Dados normalizados.


# Training QSVM model

In [21]:
# criando qsvm
num_features = X_train_scaled.shape[1]
feature_map = ZZFeatureMap(feature_dimension=num_features, reps=1)
fidelity_kernel = FidelityQuantumKernel(feature_map=feature_map)
qsvc = QSVC(quantum_kernel=fidelity_kernel, random_state=42)

print("Treinando QSVM...")

start_time = time.time()
qsvc.fit(X_train_scaled, y_train)
end_time = time.time()
print(f"Modelo treinado com sucesso em {end_time - start_time:.2f} segundos.")

#  Avaliar
predictions = qsvc.predict(X_test_scaled)
print("\n--- Relatório de Classificação Final (QSVM com seleção de features) ---")
print(classification_report(y_test, predictions, target_names=load_breast_cancer().target_names))

Treinando QSVM...
Modelo treinado com sucesso em 587.86 segundos.

--- Relatório de Classificação Final (QSVM com seleção de features) ---
              precision    recall  f1-score   support

   malignant       0.80      0.77      0.79        53
      benign       0.87      0.89      0.88        90

    accuracy                           0.85       143
   macro avg       0.84      0.83      0.83       143
weighted avg       0.85      0.85      0.85       143

