In [141]:
import numpy as np

window_size = 11
from kl.utils import load_fx
X, y, returns = load_fx(data_start=0, data_end=5000, window_size=window_size, shift=1)

In [142]:
import torch
# Check for available device
if torch.cuda.is_available():
    device = torch.device("cuda")
    print("Using CUDA for GPU acceleration")
elif torch.backends.mps.is_available():
    device = torch.device("mps")
    print("Using MPS (Apple Silicon) for GPU acceleration")
else:
    device = torch.device("cpu")
    print("Using CPU")
# device = torch.device("cpu")    

In [143]:
import torch
import torch.nn as nn
import torch.optim as optim

# SVM-like classifier (linear model)
class SVMClassifier(nn.Module):
    def __init__(self, input_dim):
        super(SVMClassifier, self).__init__()
        self.fc = nn.Linear(input_dim, 1)  # Linear classifier with one output for binary classification

    def forward(self, x):
        return self.fc(x)  # No activation, similar to SVM's linear decision boundary

# Hinge loss function (for SVM-like behavior)
def hinge_loss_fn(y_pred, y_true):
    return torch.mean(torch.clamp(1 - y_pred * y_true, min=0))

In [144]:
import pennylane as qml
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.svm import SVC
from sklearn.metrics import hinge_loss
from kl.utils import load_fx
from torch.utils.data import DataLoader, TensorDataset
import numpy as np

def reparameterize(mu, log_var):
    std = torch.exp(0.5 * log_var)  # Compute standard deviation from log variance
    eps = torch.randn_like(std)     # Sample from standard normal distribution
    return mu + eps * std  # Reparameterization: z = mu + sigma * epsilon


# Define the quantum circuit for 8 qubits (one per feature)
def quantum_circuit(params, x):
    n_qubits = len(x)  # Ensure we are only working with 8 qubits
    for i in range(n_qubits):
        qml.RX(x[i], wires=i)  # Apply RX to qubit i with the i-th feature
        qml.RY(params[0], wires=i)
        qml.RZ(params[1], wires=i)
    
    # Measure the expectation value of Pauli-Z on all qubits
    # Return only a single vector of length 8 (one value per qubit)
    return [qml.expval(qml.PauliZ(i)) for i in range(n_qubits)]

class QuantumEncoder(nn.Module):
    def __init__(self, n_qubits, n_features, latent_dim):
        super(QuantumEncoder, self).__init__()
        self.n_qubits = n_qubits
        self.n_features = n_features
        self.latent_dim = latent_dim  # New latent space size
        
        # Quantum circuit and device (still tied to n_qubits)
        self.dev = qml.device("default.qubit", wires=n_qubits)
        self.qcircuit = qml.QNode(quantum_circuit, self.dev)

        # Fully connected layers for bottleneck (reduce to latent_dim)
        self.fc_mu = nn.Linear(n_qubits, latent_dim)      # Mean for latent space
        self.fc_logvar = nn.Linear(n_qubits, latent_dim)  # Log variance for latent space

    def forward(self, x):
        outputs = []
        params = torch.randn(2).cpu().numpy()  # Parameters for quantum circuit
        
        for sample in x:
            sample = sample.detach().cpu().numpy()  # Convert to NumPy for quantum processing
            output = self.qcircuit(params, sample)
            outputs.append(np.array(output))
        
        outputs_np = np.stack(outputs, axis=0)
        
        # Convert to tensor and move to appropriate device
        latent_vector = torch.tensor(outputs_np, dtype=torch.float32).to(x.device)
        
        # Bottleneck layer: Reduce latent space to desired latent_dim
        mu = self.fc_mu(latent_vector)  # Reduced to latent_dim size
        log_var = self.fc_logvar(latent_vector)  # Reduced to latent_dim size

        return mu, log_var
    
# Define the classical decoder
class ClassicalDecoder(nn.Module):
    def __init__(self, n_qubits, n_features):
        super(ClassicalDecoder, self).__init__()
        self.fc1 = nn.Linear(n_qubits, 128)  # Change input size from 1 to 8 to match latent vector size
        self.fc2 = nn.Linear(128, n_features)
        self.relu = nn.ReLU()

    def forward(self, x):
        # Decode the quantum output
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x


In [145]:
class QuantumVAE(nn.Module):
    def __init__(self, n_qubits, n_features, latent_dim):
        super(QuantumVAE, self).__init__()
        self.encoder = QuantumEncoder(n_qubits, n_features, latent_dim)  # Pass latent_dim to encoder
        self.decoder = ClassicalDecoder(latent_dim, n_features)  # Use latent_dim in the decoder
        self.classifier = SVMClassifier(latent_dim)  # Use latent_dim in the SVM classifier

    def forward(self, x):
        # Get mean and log variance from the encoder
        mu, log_var = self.encoder(x)

        # Reparameterization trick to sample latent vector z
        latent = reparameterize(mu, log_var)

        # Get reconstructed data and SVM classification output
        reconstructed_x = self.decoder(latent)
        classification_output = self.classifier(latent)

        # Return latent vector, reconstructed data, classification output, and parameters for KL divergence
        return latent, reconstructed_x, classification_output, mu, log_var

In [146]:
def compute_class_variance(latent_vectors, labels):
    # Calculate variance of latent vectors for each class
    unique_labels = torch.unique(labels)
    class_variances = {}
    
    for label in unique_labels:
        class_latents = latent_vectors[labels == label]
        class_variance = torch.var(class_latents, dim=0).mean()  # Average variance across all latent dimensions
        class_variances[label.item()] = class_variance
    
    return class_variances

In [147]:
def train_vae_with_svm(vae, dataloader, optimizer, n_epochs=10):
    vae.train()

    for epoch in range(n_epochs):
        total_loss = 0
        for batch in dataloader:
            x, labels = batch
            x, labels = x.to(device), labels.to(device)  # Move batch data to device

            # Forward pass
            latent_vectors, reconstructed_x, svm_predictions, mu, log_var = vae(x)

            # Compute variance for each class
            class_variances = compute_class_variance(latent_vectors, labels)

            # Reconstruction loss
            recon_loss = nn.MSELoss()(reconstructed_x, x)

            # KL divergence
            kl_divergence = -0.5 * torch.sum(1 + log_var - mu.pow(2) - log_var.exp())

            # SVM loss
            svm_loss = 0
            total_variance = sum(class_variances.values())  # Total variance normalization

            for label in class_variances:
                variance_weight = class_variances[label]
                class_indices = (labels == label).unsqueeze(1).bool()
                svm_loss += (variance_weight / total_variance) * hinge_loss_fn(svm_predictions[class_indices], class_indices.float())

            beta = min(1.0, epoch / 10)  # Gradually increase beta over epochs
            # Total loss
            total_batch_loss = recon_loss + epoch * epoch * kl_divergence + 0.1 * svm_loss
            # total_batch_loss =  svm_loss

            # Backpropagation
            optimizer.zero_grad()
            total_batch_loss.backward()
            optimizer.step()

            total_loss += total_batch_loss.item()

        print(f'Epoch [{epoch+1}/{n_epochs}], Loss: {total_loss/len(dataloader):.4f}, KL: {kl_divergence.item():.6f}, SVM Loss: {svm_loss:.4f}, Reconstruction Loss: {recon_loss:.4f}')

In [148]:
# Initialize the Quantum VAE
vae_model = QuantumVAE(n_qubits=X.shape[1], n_features=X.shape[1], latent_dim=2)  # Adjust the number of qubits and features based on your data

# Move the VAE model to the appropriate device
vae_model = vae_model.to(device)

# Define the optimizer
optimizer = optim.Adam(vae_model.parameters(), lr=1e-3)

# Example DataLoader (assuming you have X and y)
dataloader = DataLoader(TensorDataset(torch.tensor(X, dtype=torch.float32), torch.tensor(y, dtype=torch.long)), batch_size=64, shuffle=True)

# # Try a larger batch size to better utilize GPU
# dataloader = DataLoader(TensorDataset(torch.tensor(X, dtype=torch.float32), torch.tensor(y, dtype=torch.long)), batch_size=128, shuffle=True)

# Train the VAE with the embedded SVM classifier
train_vae_with_svm(vae_model, dataloader, optimizer, n_epochs=50)

In [None]:
def inference(vae, x):
    vae.eval()  # Set the model to evaluation mode

    with torch.no_grad():  # Disable gradient computation for inference
        x = x.to(device)  # Move the input data to the device
        latent_vectors, reconstructed_x, svm_predictions, mu, log_var = vae(x)
        
        class_labels = (svm_predictions > 0).float()  # Assuming binary classification
        return reconstructed_x, class_labels, latent_vectors, mu, log_var

In [None]:
X_test = X
# Example input data for inference (can be a batch of new data)
new_data = torch.tensor(X_test, dtype=torch.float32)

# Perform inference
reconstructed_data, predicted_labels, latent_vectors, mu, log_var = inference(vae_model, new_data)

In [None]:
predicted_labels_np = predicted_labels.cpu().detach().numpy()
reconstructed_data_np = reconstructed_data.cpu().detach().numpy()
latent_vectors_np = latent_vectors.cpu().detach().numpy()

In [None]:
np.savetxt('reconstructed_data.txt', reconstructed_data_np)
np.savetxt('latent_vectors.txt', latent_vectors_np)
np.savetxt('predicted_labels.txt', predicted_labels_np)
np.savetxt('y_np.txt', y)
# Print results
# print("Reconstructed Data:\n", reconstructed_data)
# print("Predicted Labels:\n", predicted_labels)
# print("Latent Vectors:\n", latent_vectors)

In [None]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
acc = accuracy_score(y, predicted_labels_np)
print(f"Accuracy: {acc}")