<a href="https://colab.research.google.com/github/JhaAyushCanCode/Quantum-Classical_Hybrid_ML/blob/main/QML_QML_Load_MainData_1L.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install tensorflow tensorflow-datasets pennylane cirq tensorflow-quantum transformers seaborn



In [None]:
import pennylane as qml
from pennylane import numpy as np
import numpy as onp
import tensorflow as tf
import tensorflow_datasets as tfds
from transformers import BertTokenizer, TFBertModel
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score
import matplotlib.pyplot as plt
import seaborn as sns



In [None]:
# Load GoEmotions Dataset
dataset, info = tfds.load('goemotions', with_info=True)
train_dataset = dataset['train']

texts, labels = [], []

# Get list of emotion keys
emotion_keys = [key for key in list(info.features.keys()) if key not in ['comment_text']]

for example in tfds.as_numpy(train_dataset):
    texts.append(example['comment_text'].decode('utf-8'))

    # Get the index of the first label that is True
    label_index = next(i for i, key in enumerate(emotion_keys) if example[key])
    labels.append(label_index)

print(f"Total samples: {len(texts)}")
print(f"Number of classes: {len(emotion_keys)}")


Total samples: 43410
Number of classes: 28


In [None]:
# Text embedding using BERT but in smaller batches due to overloading earlier

from transformers import BertTokenizer, TFBertModel
import numpy as np

# Load tokenizer and model
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
bert_model = TFBertModel.from_pretrained('bert-base-uncased')

batch_size = 64  # Start safe
embeddings = []

for i in range(0, len(texts), batch_size):
    batch_texts = texts[i:i+batch_size]
    inputs = tokenizer(batch_texts, return_tensors='tf', padding=True, truncation=True, max_length=64)

    # Forward pass through BERT
    batch_embeddings = bert_model(inputs)[0][:, 0, :].numpy()  # CLS token embedding

    embeddings.append(batch_embeddings)

    print(f"Processed batch {i//batch_size + 1} / {len(texts)//batch_size + 1}")

# Combine all batches
embeddings = np.vstack(embeddings)
print(f"Final embedding shape: {embeddings.shape}")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
TensorFlow and JAX classes are deprecated and will be removed in Transformers v5. We recommend migrating to PyTorch classes or pinning your version of Transformers.
Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFBertModel: ['cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing TFBertModel from a PyTorch 

Processed batch 1 / 679
Processed batch 2 / 679
Processed batch 3 / 679
Processed batch 4 / 679
Processed batch 5 / 679
Processed batch 6 / 679
Processed batch 7 / 679
Processed batch 8 / 679
Processed batch 9 / 679
Processed batch 10 / 679
Processed batch 11 / 679
Processed batch 12 / 679
Processed batch 13 / 679
Processed batch 14 / 679
Processed batch 15 / 679
Processed batch 16 / 679
Processed batch 17 / 679
Processed batch 18 / 679
Processed batch 19 / 679
Processed batch 20 / 679
Processed batch 21 / 679
Processed batch 22 / 679
Processed batch 23 / 679
Processed batch 24 / 679
Processed batch 25 / 679
Processed batch 26 / 679
Processed batch 27 / 679
Processed batch 28 / 679
Processed batch 29 / 679
Processed batch 30 / 679
Processed batch 31 / 679
Processed batch 32 / 679
Processed batch 33 / 679
Processed batch 34 / 679
Processed batch 35 / 679
Processed batch 36 / 679
Processed batch 37 / 679
Processed batch 38 / 679
Processed batch 39 / 679
Processed batch 40 / 679
Processed

In [None]:
# Splitting

# Normalize inputs for quantum circuit
X_train = (X_train / np.linalg.norm(X_train, axis=1, keepdims=True)) * np.pi
X_test = (X_test / np.linalg.norm(X_test, axis=1, keepdims=True)) * np.pi
                                # 27 emotions + neutral

In [None]:
# One-Hot Encoding Labels

y_train_onehot = onp.eye(n_classes)[y_train]
y_test_onehot = onp.eye(n_classes)[y_test]

In [None]:
# The Setup of Quantum Device

n_qubits = 8                                              # Scaling 8 qubits for deeper feature mapping
dev = qml.device("default.qubit", wires=n_qubits)

In [None]:
# The Build of the Scaled Quantum circuit (Quite heavy circuit we're using, look at the numbber of Qubits)

@qml.qnode(dev)

def quantum_circuit(inputs, weights):
  qml.templates.AngleEmbedding(inputs[:n_qubits], wires=range(n_qubits), rotation='Y')
  qml.templates.StronglyEntanglingLayers(weights, wires=range(n_qubits))
  return [qml.expval(qml.PauliZ(i)) for i in range(n_qubits)]

In [None]:
# Defining Prediction Logic

from pennylane import math as qml_math

def softmax(x):
    e_x = qml_math.exp(x - qml_math.max(x, axis=1, keepdims=True))
    return e_x / qml_math.sum(e_x, axis=1, keepdims=True)


def predict(X, weights):
    preds = [quantum_circuit(x, weights) for x in X]
    logits = qml_math.stack(preds)
    logits = qml_math.matmul(logits, W_output) + b_output
    return logits  # Raw scores

In [None]:
# Initializing Parameters

n_layers = 3  # Pretty Deep

np.random.seed(42)

# Correct weight without requires_grad coz Pennylane does not have it, ealier was including it
weights = qml.numpy.array(np.random.uniform(low=-np.pi, high=np.pi, size=(n_layers, n_qubits, 3)), requires_grad=True)
W_output = qml.numpy.array(0.1 * np.random.randn(n_qubits, n_classes), requires_grad=True)
b_output = qml.numpy.array(0.1 * np.random.randn(n_classes), requires_grad=True)


In [None]:
# THE GREAT TRAINING LOOP (Scaled with progress monitoring system)

opt = qml.AdamOptimizer(stepsize=0.01)
epochs = 100
batch_size = 64
accuracy_history = []

for epoch in range(epochs):
  batch_index = onp.random.randint(0, len(X_train), batch_size)
  X_batch = X_train[batch_index]
  y_batch = y_train_onehot[batch_index]

  def cost(weights):
    preds = predict(X_batch, weights)
    return qml_math.mean(qml_math.sum((y_batch - preds) ** 2, axis=1))

  weights = opt.step(cost, weights)

  if epoch % 5 == 0:
    y_pred_logits = predict(X_test, weights)
    acc = accuracy_score(onp.array(y_test), onp.argmax(y_pred_logits, axis=1))
    accuracy_history.append(acc)
    print(f"Epoch {epoch}: Test Accuracy = {acc:.2f}")

Epoch 0: Test Accuracy = 0.03




Epoch 5: Test Accuracy = 0.03




Epoch 10: Test Accuracy = 0.03




KeyboardInterrupt: 

In [None]:
gradients = qml.grad(cost)(weights)
print("Gradient norms by layer:", [qml_math.linalg.norm(g) for g in gradients])
print("Overall gradient norm:", sum(qml_math.linalg.norm(g) for g in gradients))

Gradient norms by layer: [0.0, 0.0, 0.0]
Overall gradient norm: 0.0


In [None]:
# Final Evaluation

y_pred_final = predict(X_test, weights)
final_acc = accuracy_score(onp.array(y_test), onp.argmax(y_pred_final, axis=1))
print(f"Final Test Accuracy: {final_acc:.2f}")


print("Detailed Classification Report:")
print(classification_report(onp.array(y_test), onp.argmax(y_pred_final, axis=1)))

Final Test Accuracy: 0.02
Detailed Classification Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00       839
           1       0.00      0.00      0.00       461
           2       0.00      0.00      0.00       319
           3       0.00      0.00      0.00       406
           4       0.00      0.00      0.00       492
           5       0.00      0.00      0.00       164
           6       0.00      0.00      0.00       258
           7       0.00      0.00      0.00       370
           8       0.00      0.00      0.00       100
           9       0.00      0.00      0.00       216
          10       0.00      0.00      0.00       331
          11       0.00      0.00      0.00       115
          12       0.00      0.00      0.00        45
          13       0.00      0.00      0.00       149
          14       0.00      0.00      0.00        98
          15       0.00      0.00      0.00       427
          16       0.00

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [None]:
#Accuracy Plot

plt.figure(figsize=(10,6))
plt.plot(range(0, epochs, 5), accuracy_history, marker='o', color='blue')
plt.xlabel("Epochs")
plt.ylabel("Test Accuracy")
plt.title("Quantum-Classical Hybrid Model Accuracy Over Epochs")
plt.grid(True)
plt.show()