In [1]:
import numpy as np
import pandas as pd

In [2]:
import sys
import yaml
from pathlib import Path
sys.path.append(str(Path.cwd().parent))

In [3]:
from grid.policy import occupation_measure, get_ratio
from grid.utils import load_data, load_all
from grid.plotting import plot_policy, plot_occupation_measure, plot_reward, plot_ratio

In [None]:
configs_file = 'configs.yaml'

with open(configs_file,'r') as handle:
    try:
        configs = yaml.safe_load(handle)
    except yaml.YAMLError as e:
        print('({}) ERROR ... Could not load configs yaml.'.format('root'))
        raise e        

# Notebook Sections

* * *

1. Grid world.<br>
2. Load single data object.<br>
    a. either from local file system.<br>
    b. or from COS bucket.<br>
3. Retrieve simulation information.<br>
4. Load data objects iteratively.

* * *

## 1. Grid World

* * *

Consider an $m \times n$ grid. Each tile of the grid constitutes a state. 

$$ \begin{array}{|c|c|c|}
\hline (0,0) & \cdots & (0,n-1) \\ \hline
\hline &  &  \\ \hline 
\hline (m-1,0) & \cdots & (m-1,n-1) \\
\hline
\end{array} $$

Actions are $\mathcal{A} = \{U,D,L,R,N\}$ correspond to moving *up*, *down*, *left*, *right*, or *none* for staying put. 

When possible, we move with probability $1-\delta$ under action $a=U,D,L,R$. There is a small transition probability $\delta$ to one of the other states.

The goal is to reach a pre-determined terminal cell.

* * *

## 2. Load Single Data Object

Load a single generated policy data object:
* either from local file system.
* or from COS bucket.

Specify ```policy_data_obj_file``` to load.

```is_local = True``` if loading from local system. Then specify file ```root_in``` and ```root_out```.

```is_local = False``` if loading from COS bucket. Then specify ```configs.yaml``` path.

In [4]:
is_local = False

In [5]:
policy_data_obj_file = ''

In [6]:
if is_local:
    
    root_in = configs['local']['buckets']['data']
    root_out = configs['local']['buckets']['data_dest']

Load pickled policy data object.

In [7]:
if len(policy_data_obj_file)>0:
    
    if is_local:

        data = load_data(policy_data_obj_file,root=root_in)

    else:

        data = load_data(policy_data_obj_file,bucket='data_dest',configs=configs_file)

    assert data is not None, 'Data object load failed!'

## 3. Retrieve Information

Retrieve relevant information from policy data object.

Grid specs.

In [8]:
if len(policy_data_obj_file)>0:
    
    m, n = data['m'], data['n']

    actions = ['U','D','L','R','N']
    states = [*range(m*n)]

    state_num = len(states)
    action_num = len(actions)

    discount = data['discount']
    noise = data['noise']

    tol = data['tol']
    atol = abs(int(np.log10(tol)))-2

    final = data['final']
    obstacles = data['obstacles']
    state_dist = data['state_dist']
    init_state = data['init_state']

    P = data['P']
    r = data['r']
    d = data['d']

    p_opt = data['p_opt']
    rho_opt = occupation_measure(p_opt,state_dist,discount,P,tol)
    reward_opt, risk_opt, ratio_opt = get_ratio(rho_opt,r,d)

    print('Reward in optimal policy: {:.3f}\nRisk in optimal policy: {:.3f}\nRatio in optimal policy: {:.3f}'.format(reward_opt,risk_opt,ratio_opt))

    plot_policy(p_opt,states,actions,m,n,final,obstacles)

Data Policy: The data policy will not be deterministic, but some noised version of the optimal policy.

In [9]:
if len(policy_data_obj_file)>0:
    
    p_data = data['p_data']
    rho_data = occupation_measure(p_data,state_dist,discount,P,tol)
    reward_data, risk_data, ratio_data = get_ratio(rho_data,r,d)

    print('Reward in data policy: {:.3f}\nRisk in data policy: {:.3f}\nRatio in data policy: {:.3f}'.format(reward_data,risk_data,ratio_data))

    plot_policy(p_data,states,actions,m,n,final,obstacles)
    plot_occupation_measure(rho_data,states,actions,m,n,tol=1e-8)

Pareto Front.

In [10]:
if len(policy_data_obj_file)>0:
    
    rewards, risks = data['pareto_rewards'], data['pareto_risks']

    for i, (re, ri) in enumerate(zip(rewards, risks)):
        print('Iteration {}: reward {:.3f}, risk {:.3f}, ratio: {:.3f}'.format(i,(re),ri,(re)/(ri)))

    plot_reward(risks, rewards, atol=2, figsize=(17,5))
    plot_ratio(risks, rewards, atol=2, figsize=(17,5))

## 4. Load Iteratively

Load all policy data objects in the ```data``` bucket.

Choose whether to remove data object from its current bucket to a destination bucket by specifying `is_move`.

Limit the number of retrieved data objects by specifying `N`.

In [11]:
is_local = False

In [12]:
if is_local:
    
    root_in = configs['local']['buckets']['data']
    root_out = configs['local']['buckets']['data_dest']
    
else:
    
    source_bucket = 'data'
    archive_bucket = 'data_dest'

In [13]:
is_move = False

In [14]:
if is_local:

    if is_move:
    
        datas = load_all(root_in=root_in, root_out=root_out, N=5)
    
    else:
        
        datas = load_all(root_in=root_in, N=5)
    
else:
    
    datas = load_all(configs=configs_file, N=5, bucket_in=source_bucket, bucket_out=archive_bucket)

In [15]:
if len(datas)>0:

    print('Uploaded {} objects.'.format(len(datas)))

else:
    
    print('Nothing uploaded.')

Uploaded 5 objects.
