In [34]:
import torch

# Check if CUDA is available and if so, which device is being used
if torch.cuda.is_available():
    print("CUDA is available.")
    device = torch.device('cuda')  # Set the device to GPU
    print('Running on the GPU:', torch.cuda.get_device_name())
else:
    print("CUDA is not available.")
    device = torch.device('cpu')  # Set the device to CPU
    print('Running on the CPU')

CUDA is available.
Running on the GPU: NVIDIA GeForce RTX 3090


In [45]:
env_name = 'CartPole-v1'

import gymnasium as gym
import tianshou as ts
import numpy as np

from tianshou.policy import DQNPolicy, QRDQNPolicy, C51Policy, RainbowPolicy
from tianshou.env import DummyVectorEnv
from tianshou.data import Collector, VectorReplayBuffer, PrioritizedVectorReplayBuffer
from tianshou.trainer import offpolicy_trainer
from tianshou.utils.net.discrete import NoisyLinear

env = gym.make(env_name)

train_envs = ts.env.DummyVectorEnv([lambda: gym.make(env_name) for _ in range(10)])
test_envs = ts.env.DummyVectorEnv([lambda: gym.make(env_name) for _ in range(100)])

In [46]:
import os
import random
from collections import namedtuple
from datetime import datetime
import copy

import torch
import torch.nn as nn
import torch.optim as optim
from torch.nn.parameter import Parameter
from torch.utils.tensorboard import SummaryWriter
import torch.nn.functional as F
from tqdm.autonotebook import tqdm

import pennylane as qml

In [47]:
def encode(n_qubits, inputs):
    for wire in range(n_qubits):
        qml.RX(inputs[wire], wires=wire)


def layer(n_qubits, y_weight, z_weight):
    for wire, y_weight in enumerate(y_weight):
        qml.RY(y_weight, wires=wire)
    for wire, z_weight in enumerate(z_weight):
        qml.RZ(z_weight, wires=wire)
    for wire in range(n_qubits):
        qml.CZ(wires=[wire, (wire + 1) % n_qubits])


def measure(n_qubits):
    return [
        qml.expval(qml.PauliZ(0) @ qml.PauliZ(1)),
        qml.expval(qml.PauliZ(2) @ qml.PauliZ(3))
    ]


def get_model(n_qubits, n_layers, data_reupload, return_val=True):
    dev = qml.device("default.qubit", wires=n_qubits)
    shapes = {
        "y_weights": (n_layers, n_qubits),
        "z_weights": (n_layers, n_qubits)
    }

    @qml.qnode(dev, interface='torch')
    def circuit(inputs, y_weights, z_weights):
        for layer_idx in range(n_layers):
            if (layer_idx == 0) or data_reupload:
                encode(n_qubits, inputs)
            layer(n_qubits, y_weights[layer_idx], z_weights[layer_idx])
        if return_val:
            return measure(n_qubits)
        else:
            return qml.vn_entropy(wires=[0])
    
    model = qml.qnn.TorchLayer(circuit, shapes)    
    return model 

entropy_out = []

In [48]:
class QuantumDQN(nn.Module):
    def __init__(self, n_qubits, n_actions, n_layers, w_input, w_output, data_reupload):
        super(QuantumDQN, self).__init__()
        self.n_qubits = n_qubits
        self.n_actions = n_actions
        self.n_layers = n_layers
        self.w_input = w_input
        self.w_output = w_output
        self.data_reupload = data_reupload
        self.q_layers = get_model(n_qubits=self.n_qubits,
                                  n_layers=n_layers,
                                  data_reupload=data_reupload,
                                  return_val=True)
        self.entropy = get_model(n_qubits=self.n_qubits,
                                  n_layers=n_layers,
                                  data_reupload=data_reupload,
                                  return_val=False)
        if w_input:
            self.w_input2 = Parameter(torch.Tensor(self.n_qubits))
            nn.init.normal_(self.w_input2, mean=0.)
        else:
            self.register_parameter('w_input', None)
        if w_output:
            self.w_output2 = Parameter(torch.Tensor(self.n_actions))
            nn.init.normal_(self.w_output2, mean=90.)
        else:
            self.register_parameter('w_output', None)

    def forward(self, inputs, **kwargs):
      batch_size = inputs.shape[0]  # Get the batch size
      outputs = []
      entropy = []
    
      for i in range(batch_size):
        input_i = inputs[i]  # Get the i-th input in the batch
        input_i = torch.tensor(input_i, dtype=torch.float32)  # Convert input_i to a PyTorch tensor
        if self.w_input2 is not None:
            input_i = input_i * self.w_input2
        input_i = torch.atan(input_i)
        output_i = self.q_layers(input_i)
        entropy_i = self.entropy(input_i)
        output_i = (1 + output_i) / 2
        outputs.append(output_i)
        entropy.append(entropy_i)

      outputs = torch.stack(outputs)  # Stack outputs along the batch dimension
      # entropy_out = torch.stack(entropy)
      entropy_out.append(entropy)

      if self.w_output2 is not None:
        outputs = outputs * self.w_output2
      else:
        outputs = 90 * outputs
        outputs = outputs.view(-1, self.n_qubits * 2)

      return outputs, None

    def __deepcopy__(self, memodict={}):
        # Target Network: Create a new instance of the class
        new_instance = QuantumDQN(n_qubits = self.n_qubits,
                                      n_actions = self.n_actions,
                                      n_layers = self.n_layers,
                                      w_input = self.w_input,
                                      w_output = self.w_output,
                                      data_reupload = self.data_reupload)

        # Assign the quantum parts after copying
        new_instance.q_layers = copy.deepcopy(self.q_layers, memodict)
        new_instance.entropy = copy.deepcopy(self.entropy, memodict)

        return new_instance

In [49]:
# Use your defined network
state_shape = env.observation_space.shape[0]  # equivalent to 4 for CartPole-v1
action_shape = env.action_space.n  # equivalent to 2 for CartPole-v1

net = QuantumDQN(n_qubits=state_shape, n_actions=action_shape, n_layers=3, w_input=True, w_output=True, 
                     data_reupload=True)
# net = net.to(args.device)

optim = torch.optim.RMSprop(net.parameters(), lr=0.001)
policy = DQNPolicy(net, optim, discount_factor=0.99,
                       estimation_step=5,
                       target_update_freq=320, is_double=False)
# policy = policy.to(args.device)

buffer = VectorReplayBuffer(total_size=20000, buffer_num=10)  # max size of the replay buffer
train_collector = Collector(policy, train_envs, buffer, exploration_noise=True)
test_collector = Collector(policy, test_envs, exploration_noise=True)

In [50]:
from torch.utils.tensorboard import SummaryWriter
from tianshou.utils import TensorboardLogger
writer = SummaryWriter(f'log/CartPole_Quantum_DQN')
logger = TensorboardLogger(writer)

# Start training
result = offpolicy_trainer(
    policy,
    train_collector,
    test_collector,
    max_epoch=1,  # maximum number of epochs
    step_per_epoch=1000,  # number of steps per epoch
    step_per_collect=10,  # number of steps per data collection
    update_per_step=0.1,
    episode_per_test=1000,  # number of episodes per test
    batch_size=64,  # batch size for updating model
    train_fn=lambda epoch, env_step: policy.set_eps(0.1),
    test_fn=lambda epoch, env_step: policy.set_eps(0.05),
    stop_fn=lambda mean_rewards: mean_rewards >= env.spec.reward_threshold,
    logger=logger)

print(f'Finished training! Use {result["duration"]}')

Epoch #1: 1001it [14:41,  1.14it/s, env_step=1000, len=10, loss=1139.431, n/ep=2, n/st=10, rew=10.00]


Epoch #1: test_reward: 9.592000 ± 0.988704, best_reward: 9.657000 ± 1.060826 in #0
Finished training! Use 1620.69s


In [56]:
import re
import pandas as pd

numbers = [[float(re.search(r'\d+\.\d+', str(value)).group()) for value in sublist] for sublist in entropy_out]

# Create a DataFrame
df = pd.DataFrame(numbers)
# Transpose the DataFrame to have the inner lists as columns
df = df.transpose()

print(df)

       0       1       2       3       4       5       6       7       8    \
0   0.6693  0.6675  0.6663  0.6656  0.6651  0.6642  0.6628  0.6607  0.6582   
1   0.6723  0.6703  0.6719  0.6699  0.6686  0.6679  0.6673  0.6664  0.6649   
2   0.6723  0.6702  0.6690  0.6683  0.6677  0.6667  0.6651  0.6629  0.6603   
3   0.6697  0.6677  0.6665  0.6658  0.6652  0.6642  0.6627  0.6605  0.6579   
4   0.6738  0.6719  0.6708  0.6703  0.6697  0.6687  0.6671  0.6648  0.6751   
..     ...     ...     ...     ...     ...     ...     ...     ...     ...   
95  0.6718  0.6699  0.6688  0.6681  0.6675  0.6665  0.6649  0.6627  0.6711   
96  0.6685  0.6666  0.6654  0.6647  0.6641  0.6632  0.6617  0.6595  0.6570   
97  0.6751  0.6732  0.6721  0.6715  0.6710  0.6703  0.6688  0.6667  0.6641   
98  0.6738  0.6720  0.6708  0.6702  0.6698  0.6690  0.6677  0.6657  0.6631   
99  0.6720  0.6698  0.6685  0.6678  0.6672  0.6662  0.6646  0.6624  0.6700   

       9    ...     595     596     597     598     599     600

In [20]:
# qml.math.vn_entropy([0.4781+0.j, 0.4778+0.j, 0.4782+0.j, 0.4778+0.j], indices=[0])

0.08233635023100414

In [21]:
# qml.math.vn_entropy([0.4781+0.j, 0.4778+0.j, 0.4782+0.j, 0.4778+0.j], indices=[1])

0.08233635023100414

In [31]:
# qml.math.vn_entropy([0.4781+0.j, 0.4778+0.j, 0.4782+0.j, 0.4782+0.j], indices=[1])

0.08198868931856183

In [28]:
# import csv
# file_path = f'/scratch/connectome/justin/log_0/PennyLane_CartPole_Quantum_DQN_entropy_00.csv'
# with open(file_path, mode='w', newline='') as file:
#     csv_writer = csv.writer(file)
v    csv_writer.writerow(entropy_out)