<a href="https://colab.research.google.com/github/Anni1808/MINed_Hackoholics/blob/main/QuantumML.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install pennylane torch numpy pandas scikit-learn


Collecting pennylane
  Downloading PennyLane-0.40.0-py3-none-any.whl.metadata (10 kB)
Collecting rustworkx>=0.14.0 (from pennylane)
  Downloading rustworkx-0.16.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (10 kB)
Collecting tomlkit (from pennylane)
  Downloading tomlkit-0.13.2-py3-none-any.whl.metadata (2.7 kB)
Collecting appdirs (from pennylane)
  Downloading appdirs-1.4.4-py2.py3-none-any.whl.metadata (9.0 kB)
Collecting autoray>=0.6.11 (from pennylane)
  Downloading autoray-0.7.0-py3-none-any.whl.metadata (5.8 kB)
Collecting pennylane-lightning>=0.40 (from pennylane)
  Downloading PennyLane_Lightning-0.40.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (27 kB)
Collecting diastatic-malt (from pennylane)
  Downloading diastatic_malt-2.15.2-py3-none-any.whl.metadata (2.6 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu1

In [None]:
import pennylane as qml
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split

# Load and align datasets (keeping same preprocessing code)
train_df = pd.read_csv("final_merged_dataset.csv")
test_df = pd.read_csv("aligned_test.csv")

def align_features(train_df, test_df):
    train_cols = set(train_df.columns) - {'Type'}
    test_cols = set(test_df.columns) - {'Type'}
    common_cols = list(train_cols.intersection(test_cols))
    common_cols.sort()
    if 'Type' in train_df.columns:
        common_cols.append('Type')
    return train_df[common_cols], test_df[common_cols]

def preprocess_features(df):
    df_processed = df.copy()
    for column in df_processed.columns:
        if column != 'Type':
            if df_processed[column].dtype == 'object':
                df_processed[column] = df_processed[column].fillna('missing')
                df_processed[column] = pd.util.hash_array(df_processed[column].values)
                min_val = df_processed[column].min()
                max_val = df_processed[column].max()
                if min_val != max_val:
                    df_processed[column] = (df_processed[column] - min_val) / (max_val - min_val)
                else:
                    df_processed[column] = 0
            else:
                df_processed[column] = df_processed[column].fillna(df_processed[column].mean())
    return df_processed

print("Original feature counts:")
print(f"Training features: {len(train_df.columns) - 1}")
print(f"Test features: {len(test_df.columns) - 1}")

train_df, test_df = align_features(train_df, test_df)
print(f"Number of features after alignment: {len(train_df.columns) - 1}")

# Preprocess features
train_df = preprocess_features(train_df)
test_df = preprocess_features(test_df)

# Encode labels
label_encoder = LabelEncoder()
train_df['Type'] = label_encoder.fit_transform(train_df['Type'])
test_df['Type'] = label_encoder.transform(test_df['Type'])

# Prepare features
X_train = train_df.drop(columns=['Type']).values
y_train = train_df['Type'].values
X_test = test_df.drop(columns=['Type']).values
y_test = test_df['Type'].values

# Remove constant features
feature_std = np.std(X_train, axis=0)
non_constant_features = feature_std > 0
X_train = X_train[:, non_constant_features]
X_test = X_test[:, non_constant_features]
print(f"Features after removing constants: {X_train.shape[1]}")

# Scale features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Convert to tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.long)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.long)

# Quantum settings
n_qubits = 4
print(f"Using {n_qubits} qubits")
dev = qml.device("default.qubit", wires=n_qubits)

@qml.qnode(dev, interface="torch")
def quantum_circuit(inputs, weights):
    """Quantum circuit for a single input"""
    # Encode the first n_qubits features
    for i in range(n_qubits):
        qml.RY(inputs[i], wires=i)

    # Apply entangling layers
    qml.StronglyEntanglingLayers(weights, wires=range(n_qubits))

    # Return expectations
    return [qml.expval(qml.PauliZ(i)) for i in range(n_qubits)]

class QuantumLayer(nn.Module):
    def __init__(self, n_qubits):
        super().__init__()
        weight_shapes = {"weights": (3, n_qubits, 3)}
        self.ql = qml.qnn.TorchLayer(quantum_circuit, weight_shapes)

    def forward(self, x):
        batch_size = x.shape[0]
        x = x[:, :n_qubits]  # Take first n_qubits features

        # Process each input in the batch separately
        q_out = torch.stack([self.ql(x[i]) for i in range(batch_size)])
        return q_out

class QuantumClassifier(nn.Module):
    def __init__(self, n_qubits, n_classes):
        super().__init__()
        self.quantum_layer = QuantumLayer(n_qubits)
        self.post_process = nn.Sequential(
            nn.Linear(n_qubits, 32),
            nn.ReLU(),
            nn.Linear(32, n_classes)
        )

    def forward(self, x):
        q_out = self.quantum_layer(x)
        return self.post_process(q_out)

# Initialize model
n_classes = len(np.unique(y_train))
model = QuantumClassifier(n_qubits, n_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

# Training settings
batch_size = 32
epochs = 20
best_loss = float('inf')
patience = 5
patience_counter = 0

# Training loop
for epoch in range(epochs):
    model.train()
    total_loss = 0
    n_batches = 0

    # Shuffle data
    indices = torch.randperm(len(X_train))
    X_train = X_train[indices]
    y_train = y_train[indices]

    for i in range(0, len(X_train), batch_size):
        batch_X = X_train[i:i+batch_size]
        batch_y = y_train[i:i+batch_size]

        optimizer.zero_grad()
        outputs = model(batch_X)
        loss = criterion(outputs, batch_y)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        n_batches += 1

        if n_batches % 10 == 0:
            print(f"Epoch {epoch+1}, Batch {n_batches}, Loss: {loss.item():.4f}")

    avg_loss = total_loss / n_batches
    print(f"Epoch {epoch+1}/{epochs}, Average Loss: {avg_loss:.4f}")

    # Early stopping
    if avg_loss < best_loss:
        best_loss = avg_loss
        patience_counter = 0
    else:
        patience_counter += 1
        if patience_counter >= patience:
            print("Early stopping triggered")
            break

# Evaluation
model.eval()
with torch.no_grad():
    batch_predictions = []
    for i in range(0, len(X_test), batch_size):
        batch_X = X_test[i:i+batch_size]
        outputs = model(batch_X)
        batch_predictions.append(outputs.argmax(dim=1))

    y_pred = torch.cat(batch_predictions)
    accuracy = (y_pred == y_test).sum().item() / len(y_test)
    print(f"\nTest Accuracy: {accuracy * 100:.2f}%")

    # Confusion matrix
    from sklearn.metrics import confusion_matrix
    cm = confusion_matrix(y_test.numpy(), y_pred.numpy())
    print("\nConfusion Matrix:")
    print(cm)

Original feature counts:
Training features: 357
Test features: 356
Number of features after alignment: 356
Features after removing constants: 305
Using 4 qubits
Epoch 1, Batch 10, Loss: 1.9301
Epoch 1, Batch 20, Loss: 1.8164
Epoch 1, Batch 30, Loss: 1.8936
Epoch 1/20, Average Loss: 1.8576
Epoch 2, Batch 10, Loss: 1.8315
Epoch 2, Batch 20, Loss: 1.7053
Epoch 2, Batch 30, Loss: 1.7377
Epoch 2/20, Average Loss: 1.7819
Epoch 3, Batch 10, Loss: 1.6849
Epoch 3, Batch 20, Loss: 1.7959
Epoch 3, Batch 30, Loss: 1.6166
Epoch 3/20, Average Loss: 1.7093
Epoch 4, Batch 10, Loss: 1.7425
Epoch 4, Batch 20, Loss: 1.7062
Epoch 4, Batch 30, Loss: 1.7922
Epoch 4/20, Average Loss: 1.6494
Epoch 5, Batch 10, Loss: 1.5698
Epoch 5, Batch 20, Loss: 1.5380
Epoch 5, Batch 30, Loss: 1.4659
Epoch 5/20, Average Loss: 1.5974
Epoch 6, Batch 10, Loss: 1.5530
Epoch 6, Batch 20, Loss: 1.7105
Epoch 6, Batch 30, Loss: 1.5856
Epoch 6/20, Average Loss: 1.5725
Epoch 7, Batch 10, Loss: 1.5186
Epoch 7, Batch 20, Loss: 1.8421
E