In [23]:
import matplotlib.pyplot as plt
%matplotlib inline

In [24]:
import numpy as np
import gym
from qiskit import QuantumCircuit, transpile, execute, Aer
import qiskit
from scipy.optimize import minimize

In [25]:

# Define the FrozenLake environment
env = gym.make('FrozenLake-v1',)

# Q-learning parameters
num_episodes = 500
learning_rate = 0.5
discount_factor = 0.99
epsilon = 0.1


In [26]:
# Quantum circuit parameters
num_qubits = 4  # Adjust this based on your specific VQC design
num_actions = 4  # Number of possible actions in the environment

# Initialize Q-table
num_states = env.observation_space.n
q_table = np.zeros((num_states, num_actions))
#print(q_table.size)

# Quantum circuit setup
backend = Aer.get_backend('qasm_simulator')

In [27]:
from math import atan

In [28]:
# Define the fixed variational part of the circuit with Rx and Cx gates
def build_variational_circuit(params):
    circuit = qiskit.circuit.library.TwoLocal(num_qubits, "rx", "cz", entanglement="circular", reps=1, insert_barriers=True)
    circuit = circuit.assign_parameters(params)
    return circuit

# Define a function to get fixed parameters based on the state
def get_params(state):
    # Example: Return an array of 8 fixed parameters based on the state
    # You can customize this function to map states to specific parameter values
    fixed_params = []
    state_bin = format(state, '04b')
    for i in range(4):
        if state_bin[i] == '1':
            fixed_params.append(atan(state))
        else:
            fixed_params.append(np.random.rand())
    params = fixed_params + fixed_params
    return params

# Objective function to maximize expected reward
def objective_function():
    # Initialize variables
    rewards = []

    # Q-learning loop
    for ep in range(num_episodes):
        state = env.reset()
        total_reward = 0
        done = False
        print(f"Episode: {ep}")
        while not done:
            # Encode the current state into the quantum state
            state_binary = format(state, '04b')  # Assuming 4 qubits for state encoding
            encoding_circuit = QuantumCircuit(num_qubits)
            for i in range(num_qubits):
                if state_binary[i] == '1':
                    encoding_circuit.x(i)
            
            encoding_circuit.barrier()
            encoding_circuit.h([0,1,2,3])
            encoding_circuit.barrier()
        
            # Build the fixed variational circuit with fixed parameters based on the state
            fixed_params = get_params(state)
            variational_circuit = build_variational_circuit(fixed_params)

            # Combine encoding and variational parts of the circuit
            quantum_circuit = encoding_circuit.compose(variational_circuit)
            quantum_circuit.measure_all()

            # Simulate the circuit to obtain measurement outcomes
            transpiled_circuit = transpile(quantum_circuit, backend)
            result = execute(transpiled_circuit, backend, shots=32768).result()
            counts = result.get_counts(quantum_circuit)

            # Map measurement outcomes to actions (customize as needed)
            action_counts = {'00': 0, '01': 0, '10': 0, '11': 0}
            for outcome, count in counts.items():
                first_two_bits = outcome[:2]
                if first_two_bits in action_counts:
                    action_counts[first_two_bits] += count

            action = int(max(action_counts, key=lambda x: action_counts[x]), 2)

            if np.random.rand() < epsilon:
                action = env.action_space.sample()  # Explore

            # Take the selected action and observe the next state and reward
            next_state, reward, done, _ = env.step(action)

            # Update the Q-table using Q-learning update rule
            q_table[state, action] = (1 - learning_rate) * q_table[state, action] + \
                learning_rate * (reward + discount_factor * np.max(q_table[next_state, :]))

            total_reward += reward

            if done:
                rewards.append(total_reward)
                state = env.reset()  # Reset the environment when an episode is done
                break
            else:
                state = next_state  # Update the current state if the episode is not done

    env.close()
    return rewards

# Initial guess for parameter values (you can change this if needed)
#initial_params = np.random.rand(num_qubits * 2)

rewards = objective_function()
# Use a classical optimizer to find the best parameters
# result = minimize(objective_function, initial_params, method='COBYLA')
# best_params = result.x

# After optimization, best_params contain the optimized parameter values
# print("Optimal parameters:", best_params)


Episode: 0
Episode: 1
Episode: 2
Episode: 3
Episode: 4
Episode: 5
Episode: 6
Episode: 7
Episode: 8
Episode: 9
Episode: 10
Episode: 11
Episode: 12
Episode: 13
Episode: 14
Episode: 15
Episode: 16
Episode: 17
Episode: 18
Episode: 19
Episode: 20
Episode: 21
Episode: 22
Episode: 23
Episode: 24
Episode: 25
Episode: 26
Episode: 27
Episode: 28
Episode: 29
Episode: 30
Episode: 31
Episode: 32
Episode: 33
Episode: 34
Episode: 35
Episode: 36
Episode: 37
Episode: 38
Episode: 39
Episode: 40
Episode: 41
Episode: 42
Episode: 43
Episode: 44
Episode: 45
Episode: 46
Episode: 47
Episode: 48
Episode: 49
Episode: 50
Episode: 51
Episode: 52
Episode: 53
Episode: 54
Episode: 55
Episode: 56
Episode: 57
Episode: 58
Episode: 59
Episode: 60
Episode: 61
Episode: 62
Episode: 63
Episode: 64
Episode: 65
Episode: 66
Episode: 67
Episode: 68
Episode: 69
Episode: 70
Episode: 71
Episode: 72
Episode: 73
Episode: 74
Episode: 75
Episode: 76
Episode: 77
Episode: 78
Episode: 79
Episode: 80
Episode: 81
Episode: 82
Episode: 83
Ep

In [29]:
rewards

[0.0,
 0.0,
 0.0,
 1.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 1.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 1.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 1.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 1.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 1.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0

In [30]:
q_table

array([[0.59501087, 0.60489871, 0.59501765, 0.61580329],
       [0.28523553, 0.55572162, 0.19261581, 0.6388441 ],
       [0.63596222, 0.63672867, 0.64143689, 0.64230339],
       [0.4022617 , 0.32544664, 0.25882899, 0.64526069],
       [0.58945631, 0.4061966 , 0.56862045, 0.29489849],
       [0.        , 0.        , 0.        , 0.        ],
       [0.49362583, 0.17599671, 0.63566828, 0.09574807],
       [0.        , 0.        , 0.        , 0.        ],
       [0.25471347, 0.51431466, 0.47107354, 0.56944348],
       [0.59749872, 0.59052423, 0.2538739 , 0.4558636 ],
       [0.60711689, 0.31457526, 0.17318847, 0.38502245],
       [0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.        ],
       [0.17761918, 0.6774429 , 0.72634513, 0.06802637],
       [0.61329499, 0.55606031, 0.9080225 , 0.63092353],
       [0.        , 0.        , 0.        , 0.        ]])

In [31]:
import pickle

file_path = "q_table3.pkl"

with open(file_path, 'wb') as file:
    pickle.dump(q_table, file)