# Produce Paper Plots from Data
This notebook allows to produce plots in the styles of Fig. 3, 4 and 5 of the manuscript
#### Import modules

In [None]:
import sys
import os
import numpy as np
import matplotlib.pyplot as plt
sys.path.append(os.path.join('..','src'))
from matplotlib.lines import Line2D
import matplotlib
import matplotlib.gridspec as gridspec
import sac
import sac_tri
import plotting
import colorsys
from pathlib import Path
from mpl_toolkits.axes_grid1 import make_axes_locatable
from matplotlib import colors as mcolors
from matplotlib.offsetbox import AnchoredOffsetbox, TextArea, HPacker

## Two-level Engine (Fig. 3)

### Visualize data
We first visualize the full logged data. In all cells below, replace ```log_dir``` with the corresponding folder with the training data.

In [None]:
log_dir = "../paper_plot_data/main/2021_02_01-14_12_31_two_level_engine/"
plotting.plot_sac_logs(log_dir,is_tri=True,plot_to_file_line=None,actions_per_log=6000, suppress_show=False,
                       save_plot=False,actions_ylim=None,actions_to_plot=300)

### Export final deterministic protocol
We load the saved policy, test it on the environment, and export a file with the deterministc policy. The upper panel shows the running reward exponentially weighed with a very large gamma. This is to validate convergence, and to get a good estimate of the power. The second panel shows the deterministic policy over the last steps, and finally the average output power is printed

In [None]:
log_dir = "../paper_plot_data/main/2021_02_01-14_12_31_two_level_engine/"

#load the trained model
loaded_train = sac_tri.SacTrain()
loaded_train.load_train(log_dir, no_train=True)
#evaluate the deterministic policy
loaded_train.evaluate_current_policy(deterministic=True, steps=6000, gamma=0.9999,actions_to_plot=70,
                                     save_policy_to_file_name="det_policy.txt",actions_ylim=[0.3,1.])

### Produce final plot (Fig. 3)

In [None]:
log_dir = "../paper_plot_data/main/2021_02_01-14_12_31_two_level_engine/"
det_policy_sublocation = "saved_policies/det_policy.txt"
actions_to_plot_large = 10
actions_to_plot_small = 30
actions_per_log = 6000
act_0 = 6000-2
act_1 = 240000-2
act_2 = 492000-2
prot_linewidth=4.
small_action_ylim = [0.3,1]
large_action_ylim = [0.45,0.8]
reward_ylabel= r"$100\times\ev*{P_{[\text{E}]}}_\gamma$"
reward_plot_extra_args = ([0,500000], [0.93,0.93])
reward_plot_extra_kwargs = dict(color='black',linewidth=0.8, dashes=(4,4))
reward_legend_labels=["RL Cycle", "Exact Bound"]
action_legend_lines=[Line2D([0], [0], color='orange', linewidth=4), 
                    Line2D([0], [0], color='cornflowerblue', linewidth=4),
                    Line2D([0], [0], color='limegreen', linewidth=4)]
action_legend_text=["Hot","Cold","None"]
action_legend_location=[0.22, 1.]
plot_file_name = "two_level_engine.pdf"

plotting.sac_paper_plot(log_dir, det_policy_sublocation,act_0,act_1,act_2,is_tri=True,
                        actions_to_plot_large=actions_to_plot_large, actions_to_plot_small=actions_to_plot_small,
                        actions_per_log=actions_per_log,prot_linewidth=prot_linewidth, plot_file_name=plot_file_name,
                        small_action_ylim=small_action_ylim, large_action_ylim=large_action_ylim,
                        reward_ylabel=reward_ylabel,reward_plot_extra_args=reward_plot_extra_args,
                       reward_plot_extra_kwargs=reward_plot_extra_kwargs,reward_legend_labels=reward_legend_labels,
                       action_legend_lines=action_legend_lines,action_legend_text=action_legend_text,
                       action_legend_location=action_legend_location)

## Superconducting Qubit Refrigerator (Fig. 4)

### Visualize data
We first visualize the full logged data

In [None]:
log_dir = "../paper_plot_data/main/2021_02_05-16_13_38_superconducting_qubit_refrigerator/"

plotting.plot_sac_logs(log_dir, is_tri=False, plot_to_file_line=None,actions_per_log=6000, suppress_show=False,
                       save_plot=False,actions_ylim=None,actions_to_plot=60)

### Export final deterministic protocol
We load the saved policy, test it on the environment, and export a file with the deterministc policy. The upper panel shows the running reward exponentially weighed with a very large gamma. This is to validate convergence, and to get a good estimate of the power. The second panel shows the deterministic policy over the last steps, and finally the average output power is printed

In [None]:
log_dir = "../paper_plot_data/main/2021_02_05-16_13_38_superconducting_qubit_refrigerator/"

#load the trained model
loaded_train = sac.SacTrain()
loaded_train.load_train(log_dir, no_train=True)
#evaluate the deterministic policy
loaded_train.evaluate_current_policy(deterministic=True, steps=8001, gamma=0.9999,actions_to_plot=70,
                                     save_policy_to_file_name="det_policy.txt",actions_ylim=None)

### Produce final plot (Fig. 4)

In [None]:
log_dir = "../paper_plot_data/main/2021_02_05-16_13_38_superconducting_qubit_refrigerator/"
det_policy_sublocation = "saved_policies/det_policy.txt"
actions_to_plot_large = 100
actions_to_plot_small = 60
actions_per_log = 6000
act_0 = 6000-2
act_1 = 180000-2
act_2 = 490000-2
prot_linewidth=3.
small_action_ylim = [-0.05, 0.8]
large_action_ylim = [-0.05,0.55]
reward_ylabel= r"$10^4\times\ev*{P_{[\text{R}]}}_\gamma$"
reward_plot_extra_args = ([0,500000], [2,2])
reward_plot_extra_kwargs = dict(color='black',linewidth=0.8, dashes=(4,4))
extra_cycles = [lambda x,a=2,omega=0.065,dt=0.982: 0.25*(1. + np.tanh(a*np.cos(omega*x*dt))/np.tanh(a)),
               [0,100],"black"]
extra_cycles_linewidth = 0.8
reward_legend_labels=['RL Cycle', 'Trapezoidal Cycle']
plot_file_name = "qubit_refrigerator.pdf"

#get location of files
running_reward_file, _, actions_file = \
                                plotting.sac_logs_file_location(log_dir, False, None,None,None)

#font size
matplotlib.rcParams.update({'font.size': 14, "text.usetex": True,
                            'text.latex.preamble' : r'\usepackage{amsmath}\usepackage{physics}'})

#create the axis (subplots)
fig = plt.figure(constrained_layout=True, figsize=(6,5))
gs = gridspec.GridSpec(3, 4, figure=fig, height_ratios = [1,0.7,1],width_ratios=[0.5,0.5,0.3,0.2])
reward_ax = fig.add_subplot(gs[0, :])
prot_0_ax = fig.add_subplot(gs[1, 0])
prot_1_ax = fig.add_subplot(gs[1, 1],sharey=prot_0_ax)
prot_2_ax = fig.add_subplot(gs[1, 2:],sharey=prot_0_ax)
prot_final_ax = fig.add_subplot(gs[2, 0:3])
coupling_ax = fig.add_subplot(gs[2, 3],sharey=prot_final_ax)
plt.setp(prot_1_ax.get_yticklabels(), visible=False)
plt.setp(prot_2_ax.get_yticklabels(), visible=False)          
plt.setp(coupling_ax.get_yticklabels(), visible=False)          

#set the reward axis
plotting.plot_running_reward_on_axis(running_reward_file, reward_ax, plot_to_file_line = None, linewidth=2.7,
                                    custom_color = "black", lines_to_mark = [plotting.nearest_int(act_0/actions_per_log),
                                    plotting.nearest_int(act_1/actions_per_log),plotting.nearest_int(act_2/actions_per_log)],
                                    custom_mark_color="black",ylim=None,ylabel=reward_ylabel,
                                    plot_extra_args=reward_plot_extra_args, plot_extra_kwargs=reward_plot_extra_kwargs,
                                    legend_labels=reward_legend_labels)

#set the three actions axis
plotting.plot_actions_on_axis(actions_file, prot_0_ax, is_tri=False, actions_to_plot=actions_to_plot_small,
                            actions_ylim=small_action_ylim,plot_to_file_line=act_0,constant_steps=True,
                            linewidth = prot_linewidth,two_xticks=True)
plotting.plot_actions_on_axis(actions_file, prot_1_ax, is_tri=False, actions_to_plot=actions_to_plot_small,
                            plot_to_file_line=act_1,ylabel="",
                            constant_steps=True, linewidth = prot_linewidth,two_xticks=True)
plotting.plot_actions_on_axis(actions_file, prot_2_ax, is_tri=False, actions_to_plot=actions_to_plot_small,
                            plot_to_file_line=act_2, ylabel="",
                            constant_steps=True, linewidth = prot_linewidth, two_xticks=True)

#set the final protocol axis
plotting.plot_actions_on_axis(log_dir + det_policy_sublocation, prot_final_ax, False, actions_to_plot=actions_to_plot_large,
                            actions_ylim=large_action_ylim,plot_to_file_line=None,constant_steps=True,
                            k_notation=False, x_count_from_zero=True,linewidth = prot_linewidth,
                            xlabel="$t[dt]$",extra_cycles=extra_cycles, extra_cycles_linewidth=extra_cycles_linewidth)

#coupling strength functions for panel d
s = lambda de, b, w: g/2 * 1/(1 + q**2*(de/w - w/de)**2)*de/(np.exp(b*de) - 1)
s_tot = lambda de, b, w: s(de,b,w)+s(-de,b,w)
s_hot = lambda de: s_tot(de,bh,wh)
s_cold = lambda de: s_tot(de,bc,wc)
de = lambda u: 2*e0*np.sqrt(d**2 + u**2);

#parameters used for this figure
g = 1;q = 30;e0 = 1;d = 0.12;wh = 1.03;wc = 0.24;bh = 10/3;bc = 2*bh;ec = 0.24;

#plot panel d
u_vals = np.linspace(0,0.6,100)
hot_coupling = s_hot(de(u_vals))
cold_coupling = s_cold(de(u_vals))
coupling_ax.plot(hot_coupling, u_vals, color="red")
coupling_ax.plot(cold_coupling, u_vals, color="blue")
coupling_ax.set_xlim([0.,0.55])

#add the colored label to panel d
xbox1 = TextArea(r"$\gamma^{(\text{C})}_{u(t)},$", textprops=dict(color="b", ha='center',va='bottom'))
xbox2 = TextArea(r"$\gamma^{(\text{H})}_{u(t)}$", textprops=dict(color="r", ha='center',va='bottom'))
xbox = HPacker(children=[xbox1, xbox2],align="bottom", pad=0, sep=5)
anchored_xbox = AnchoredOffsetbox(loc=3, child=xbox, pad=0., frameon=False, bbox_to_anchor=(0.2, -0.6), 
                                  bbox_transform=coupling_ax.transAxes, borderpad=0.)
coupling_ax.add_artist(anchored_xbox)

#add the (a) (b) (c) (d) labels
reward_ax.text(-0.12,-0.38, r'\textbf{(a)}', transform=reward_ax.transAxes )
prot_0_ax.text(-0.4,-0.55, r'\textbf{(b)}', transform=prot_0_ax.transAxes )
prot_final_ax.text(-0.16,-0.43, r'\textbf{(c)}', transform=prot_final_ax.transAxes )
prot_final_ax.text(0.95,-0.43, r'\textbf{(d)}', transform=prot_final_ax.transAxes )

#save if necessary
if plot_file_name is not None:
    plot_folder = os.path.join(log_dir, plotting.PLOT_DIR_NAME)
    Path(plot_folder).mkdir(parents=True, exist_ok=True)
    plot_file_name = os.path.join(plot_folder, plot_file_name)
    plt.savefig(plot_file_name)

#show
plt.show()

## Quantum Harmonic Oscillator Engine

### Visualize data of left panel
We first visualize the full logged data

In [None]:
log_dir = "../paper_plot_data/main/2021_02_02-18_41_42_harmonic_engine/"

plotting.plot_sac_logs(log_dir,is_tri=True,plot_to_file_line=None,actions_per_log=6000, suppress_show=False,
                       save_plot=False,actions_ylim=None,actions_to_plot=300)

### Export final deterministic protocol of left panel
We load the saved policy, test it on the environment, and export a file with the deterministc policy. The upper panel shows the running reward exponentially weighed with a very large gamma. This is to validate convergence, and to get a good estimate of the power. The second panel shows the deterministic policy over the last steps, and finally the average output power is printed

In [None]:
log_dir = "../paper_plot_data/main/2021_02_02-18_41_42_harmonic_engine/"

#load the trained model
loaded_train = sac_tri.SacTrain()
loaded_train.load_train(log_dir, no_train=True)
#evaluate the deterministic policy
loaded_train.evaluate_current_policy(deterministic=True, steps=8000-2, gamma=0.9999,actions_to_plot=70,
                                     save_policy_to_file_name="det_policy.txt",actions_ylim=[0.3,1.05])

### Visualize data of right panel
We first visualize the full logged data

In [None]:
log_dir = "../paper_plot_data/main/2021_02_03-09_42_46_harmonic_engine_larger_range/"

plotting.plot_sac_logs(log_dir,is_tri=True,plot_to_file_line=None,actions_per_log=6000, suppress_show=False,
                       save_plot=False,actions_ylim=None,actions_to_plot=300)

### Export final deterministic protocol of right panel
We load the saved policy, test it on the environment, and export a file with the deterministc policy. The upper panel shows the running reward exponentially weighed with a very large gamma. This is to validate convergence, and to get a good estimate of the power. The second panel shows the deterministic policy over the last steps, and finally the average output power is printed

In [None]:
log_dir = "../paper_plot_data/main/2021_02_03-09_42_46_harmonic_engine_larger_range/"

loaded_train = sac_tri.SacTrain()
loaded_train.load_train(log_dir, no_train=True)
loaded_train.evaluate_current_policy(deterministic=True, steps=8760-150, gamma=0.9999,actions_to_plot=200,
                                     save_policy_to_file_name="det_policy.txt",actions_ylim=None)

### Produce final plot (Fig. 5)
It will be located in the Jupyter Notebook directory

In [None]:
log_0_dir = "../paper_plot_data/main/2021_02_02-18_41_42_harmonic_engine/"
log_1_dir = "../paper_plot_data/main/2021_02_03-09_42_46_harmonic_engine_larger_range/"
det_policy_sublocation = "saved_policies/det_policy.txt"
actions_to_plot_0 = 66
actions_to_plot_1 = 98
actions_per_log = 6000
custom_colors=["orange","cornflowerblue","limegreen"]
prot_linewidth=4.
reward_linewidth = None
action_ylim = [0.25,1.6]
reward_ylim = [-2.1,2.5]
reward_ylabel= r"$10\times\ev*{P_{[\text{E}]}}_\gamma$"
reward_plot_extra_args_0 = ([0,250000], [1.31,1.31])
reward_plot_extra_kwargs_0 = dict(color='black',linewidth=0.8, dashes=(4,4))
reward_plot_extra_args_1 = ([0,600000], [1.31,1.31])
reward_plot_extra_kwargs_1 = dict(color='black',linewidth=0.8, dashes=(4,4))
extra_cycles_0 = plotting.produce_otto_cycle(u_min=0.5,u_max=1.,t1=2.9,t2=1.79,t3=3.2,t4=1.67,dt=0.2,t_range=[0,66*0.2])
extra_cycles_linewidth_0 = 1.5
extra_cycles_1 = plotting.produce_otto_cycle(u_min=0.5,u_max=1.,t1=2.9,t2=1.79,t3=3.2,t4=1.67,dt=0.2,t_range=[0,98*0.2])
extra_cycles_linewidth_1 = 1.5
cycle_legend_lines=[Line2D([0], [0], color='orange', linewidth=4), 
                    Line2D([0], [0], color='cornflowerblue', linewidth=4),
                    Line2D([0], [0], color='limegreen', linewidth=4)]
cycle_legend_text=["Hot","Cold","None"]
cycle_legend_location=[-0.63,0.05]
plot_file_location = "harmonic_engine.pdf"


#get location of files
running_reward_file_0, running_loss_file_0, actions_file_0 = \
                                plotting.sac_logs_file_location(log_0_dir,True,None,None,None)
running_reward_file_1, running_loss_file_1, actions_file_1 = \
                                plotting.sac_logs_file_location(log_1_dir,True,None,None,None)

#font size
matplotlib.rcParams.update({'font.size': 14, "text.usetex": True,
                            'text.latex.preamble' : r'\usepackage{amsmath}\usepackage{physics}'})

#create the axis (subplots)
fig = plt.figure(constrained_layout=True, figsize=(6,3.5))
gs = gridspec.GridSpec(2, 2, figure=fig, height_ratios = [1,1])
reward_0_ax = fig.add_subplot(gs[0, 0])
reward_1_ax = fig.add_subplot(gs[0, 1],sharey=reward_0_ax)
prot_final_0_ax = fig.add_subplot(gs[1, 0])
prot_final_1_ax = fig.add_subplot(gs[1, 1],sharey=prot_final_0_ax)
plt.setp(reward_1_ax.get_yticklabels(), visible=False)          
plt.setp(prot_final_1_ax.get_yticklabels(), visible=False)          


#set the reward axis
plotting.plot_running_reward_on_axis(running_reward_file_0, reward_0_ax, plot_to_file_line = None, 
                                     linewidth=reward_linewidth,custom_color = "black",ylim=reward_ylim,
                                     ylabel=reward_ylabel,plot_extra_args=reward_plot_extra_args_0, 
                                     plot_extra_kwargs=reward_plot_extra_kwargs_0,yticks=[-2.,0.,2.5],
                                     legend_labels=['RL Cycle', 'Otto Cycle'])

plotting.plot_running_reward_on_axis(running_reward_file_1, reward_1_ax, plot_to_file_line = None, 
                                     linewidth=reward_linewidth,custom_color = "black",ylim=reward_ylim,
                                     ylabel="",plot_extra_args=reward_plot_extra_args_1, 
                                     plot_extra_kwargs=reward_plot_extra_kwargs_1)


#set the final protocol axis
plotting.plot_actions_on_axis(log_0_dir + det_policy_sublocation, prot_final_0_ax, is_tri=True,
                            actions_to_plot=actions_to_plot_0, actions_ylim=action_ylim,plot_to_file_line=None,
                            custom_colors=custom_colors, constant_steps=True, k_notation=False, x_count_from_zero=True,
                            linewidth = prot_linewidth,extra_cycles=extra_cycles_0,
                            extra_cycles_linewidth=extra_cycles_linewidth_0,xlabel="$t[dt]$",
                            legend_lines=cycle_legend_lines,legend_text=cycle_legend_text,
                            legend_location=cycle_legend_location,legend_cols=1)
plotting.plot_actions_on_axis(log_1_dir + det_policy_sublocation, prot_final_1_ax, is_tri=True,
                            actions_to_plot=actions_to_plot_1, actions_ylim=action_ylim,plot_to_file_line=None,
                            custom_colors=custom_colors, constant_steps=True, k_notation=False, x_count_from_zero=True,
                            linewidth = prot_linewidth,extra_cycles=extra_cycles_1,
                            extra_cycles_linewidth=extra_cycles_linewidth_1,xlabel="$t[dt]$",ylabel="")

#add the (a) (b) (c) (d) labels
reward_0_ax.text(0.,-0.46, r'\textbf{(a)}', transform=reward_0_ax.transAxes )
reward_1_ax.text(0.,-0.46, r'\textbf{(b)}', transform=reward_1_ax.transAxes )
prot_final_0_ax.text(0.,-0.46, r'\textbf{(c)}', transform=prot_final_0_ax.transAxes )
prot_final_1_ax.text(0.,-0.46, r'\textbf{(d)}', transform=prot_final_1_ax.transAxes )

#save file
plt.savefig(plot_file_location)
#show
plt.show()
