# Scheduling operations
In this notebook we will cover the QGym `Scheduling` environment.

This environment is aimed at solving the problem of scheduling operations in the best possibly way, whilst taking hardware constraints and commutation rules into account.

In [None]:
%matplotlib inline
import numpy as np
import networkx as nx
from networkx.generators import fast_gnp_random_graph
import matplotlib.pyplot as plt
from stable_baselines3 import PPO
from stable_baselines3.common.env_checker import check_env
from IPython.display import clear_output

from qgym.envs.scheduling import Scheduling
from qgym.envs.scheduling.scheduling_rewarders import EpisodeRewarder

In [None]:
def render_rgb(step, rgb_array):
    clear_output(wait=True)
    plt.figure(figsize=(40, 20))
    plt.title(f"Step {step}", fontsize=40)
    plt.imshow(rgb_array)
    plt.axis("off")
    plt.show()

In [None]:
hardware_spec = {
    "qubit_number": 3,
    "gates": {
        "prep": 1,
        "x": 2,
        "y": 2,
        "z": 2,
        "h": 2,
        "cnot": 4,
        "swap": 3,
        "measure": 10,
    },
    "machine_restrictions": {
        "same_start": {"measure"},
        "not_in_same_cycle": {"x": ["y", "z"], "y": ["x", "z"], "z": ["x", "y"]},
    },
}


In [None]:
env = Scheduling(hardware_spec, max_gates=10)
env.rewarder = EpisodeRewarder(illegal_action_penalty=-10)
check_env(env, warn=True)

model = PPO("MultiInputPolicy", env, verbose=1)

model.learn(int(2e6))

In [None]:
obs = env.reset()
for i in range(1000):
    action, states = model.predict(obs, deterministic=False)
    obs, rewards, done, info = env.step(action)
    render_rgb(i, env.render(mode="rgb_array"))
    if done:
        break