# model
> Model of the environment | System-Under-Steer
- order: 4

In [None]:
#| default_exp model

In [None]:
!which python

/Users/kobus/aishiftscheduler/.venv/bin/python


In [None]:
# Set up autoreload
%load_ext autoreload
%autoreload

In [None]:
#| hide
from nbdev.showdoc import *

In [None]:
#| export
from collections import defaultdict
import pandas as pd
# import matplotlib as mpl
# import matplotlib.pyplot as plt
from copy import copy
# import time
# import math
# from pprint import pprint
## !pip install -U "ray"
# import ray
# import json

from fastcore.basics import patch
import aishiftscheduler.config as cf
# import aishiftscheduler.policy as pol
import aishiftscheduler.visualization as vis

/Users/kobus/aishiftscheduler/aishiftscheduler/shift_scheduler_data.xlsx
/Users/kobus/aishiftscheduler/aishiftscheduler/AIShiftScheduler1.png

len(thNAMES)=5
thNAMES=['thCumSlots', 'thSickProb', 'thCumMerits', 'thContSlots', 'thSelect']


In [None]:
pd.__version__

'2.0.3'

In [None]:
#| export
pd.options.display.float_format = '{:,.4f}'.format
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', None)

In [None]:
class MyClass(int): pass

In [None]:
@patch
def func(self: MyClass, a): return self + a

In [None]:
mc = MyClass(3)

In [None]:
mc.func(10)

13

### 4.3 Mathematical Model | SUS Design

A Python class is used to implement the model for the SUS (System Under Steer):

```
class Model():
  def __init__(self, S_0_info):
    ...
    ...
```

#### 4.3.1 State variables

We will have the *learnable* parameters:

$$(\theta^{CumShifts}, \theta^{SickProb}, \theta^{CumMerits}, \theta^{Select})$$

The *state variables* represent *what we need to know*.

- $R^{Avail}_t = (R^{Avail}_{ta})_{a \in \cal A}$ where $\cal{A} = \{\alpha_1, \alpha_2, ... \alpha_{10}\}$
  - $R^{Avail}_{ta}$ = Boolean indicator for whether this resource (with attribute $a$), is available at $t$ for rental
  - $\alpha_1$ = 1_Courtesy
  - $\alpha_2$ = 2_Courtesy
  - $\alpha_3$ = 3_Courtesy
  - ...
  - $\alpha_{15}$ = 9_Stocker
  - $\alpha_{16}$ = 10_Stocker
- $R^{CumShifts}_t = (R^{CumShifts}_{ta})_{a \in \cal A}$ where $\cal{A} = \{\alpha_1, \alpha_2, ... \alpha_{10}\}$
  - $R^{CumShifts}_{ta}$ = Number of shifts this resource (with attribute $a$), has worked at $t$
  - $\alpha_1$ = 1_Courtesy
  - $\alpha_2$ = 2_Courtesy
  - $\alpha_3$ = 3_Courtesy
  - ...
  - $\alpha_{15}$ = 9_Stocker
  - $\alpha_{16}$ = 10_Stocker
- $R^{CumMerits}_t = (R^{CumMerits}_{ta})_{a \in \cal A}$ where $\cal{A} = \{\alpha_1, \alpha_2, ... \alpha_{10}\}$
  - $R^{CumMerits}_{ta}$ = Number of net merits this resource (with attribute $a$), has gained at $t$
  - $\alpha_1$ = 1_Courtesy
  - $\alpha_2$ = 2_Courtesy
  - $\alpha_3$ = 3_Courtesy
  - ...
  - $\alpha_{15}$ = 9_Stocker
  - $\alpha_{16}$ = 10_Stocker  
- $D^{Shift}_t = (D^{Shift}_{tb})_{b \in \cal B}$ where $\cal{B} = \{\beta_1, \beta_2\}$
  - $D^{Shift}_{tb}$ = Number of demands for this resource (with attribute $b$), at $t$
  - $\beta_1$ = Courtesy
  - $\beta_1$ = Stocker
- $n^{Merits}_t = (n^{Merits}_{tb})_{b \in \cal B}$ where $\cal{B} = \{\beta_1, \beta_2\}$
  - $n^{Merits}_{tb}$ = Number of merits for this resource (with attribute $b$), at $t$
  - $\beta_1$ = Courtesy
  - $\beta_1$ = Stocker  

The state is:

$$
\begin{aligned}
S_t &= (R^{Avail}_t, R^{CumShifts}_t, R^{CumMerits}_t, D^{Shift}_t, n^{Merits}_t)
\end{aligned}
$$

#### 4.3.2 Decision variables

The *decision variables* represent *what we control*.

The decision vector is given by:

- $x_t = (x_{tab})_{a\in \cal A, b\in \cal B}$ where
  - $\cal{A} = \{\alpha_1, \alpha_2, ... \alpha_{10}\}$
  - $\cal{B} = \{\beta_1, \beta_2\}$
  - $x_{tab}$ is a boolean vector that indicates whether a specific resource is to be allocated to a demand

- Decisions are made with a policy (TBD below):
  - $X^{\pi}(S_t)$

#### 4.3.3 Exogenous information variables

The *exogenous information variables* represent *what we did not know (when we made a decision)*. These are the variables that we cannot control directly. The information in these variables become available *after* we make the decision $x_t$.

When we assume that the demand in each time period is revealed, without any model to predict the demand based on past demands, we have, using approach 1:

$$
\begin{aligned}
D_{t+1} &= W_{t+1} \\
        &= \hat{D}_{t+1}
\end{aligned}         
$$

Alternatively, when we assume that we observe the *change* in demand $\hat{D}_{t+1}=p_{t+1}-p_{t}$, we have, using approach 2:

$$
\begin{aligned}
D_{t+1} &= D_t + W_{t+1} \\
        &= D_t + \hat{D}_{t+1}
\end{aligned}
$$

We will make use of approach 1 which means that the exogenous information, $W_{t+1}$, is the directly observed demands of the resources.

Similarly, for the earned merits, we have

$$
\begin{aligned}
n_{t+1} &= W_{t+1} \\
        &= \hat{n}_{t+1}
\end{aligned}         
$$

The exogenous information is obtained by calls to

`DemandSimulator.simulate(...)`

`MeritSimulator.simulate(...)`

#### 4.3.4 Transition function

The *transition function* describe how the state variables evolve over time. We have the equations:

$$
R^{Avail}_{t+1} =
\begin{cases}
  1 & \text{if resource with attribute $a$ has not been allocated} \\
  0 & \text{if resource with attribute $a$ has been allocated  }
\end{cases}
$$

$$
R^{CumShifts}_{t+1} =
\begin{cases}
  R^{CumShifts}_{t} + 1 & \text{if resource was allocated} \\
  R^{CumShifts}_{t} & \text{if resource was not allocated  }
\end{cases}
$$

$$
R^{CumMerits}_{t+1} = R^{CumMerits}_{t} + n^{Merits}_t
$$

Collectively, they represent the general transition function:

$$
S_{t+1} = S^M(S_t,X^{\pi}(S_t))
$$

#### 4.3.5 Objective function

The *objective function* captures the performance metrics of the solution to the problem.

We can write the state-dependant reward (also called contribution due to the allocation of a resource with attribute $b$):

$$
C(S_t,x_t) =
\begin{cases}
  1 & \text{if resource was allocated} \\
  -1 & \text{if resource was not allocated  }
\end{cases}
$$

We have the objective function:

$$
\max_{\pi}\mathbb{E}\{\sum_{t=0}^{T}C(S_t,x_t,W_{t+1}) \}
$$

The learned parameters are:

$$(\theta^{CumShifts}, \theta^{SickProb}, \theta^{CumMerits}, \theta^{Select})$$

#### 4.3.6 Implementation of the System Under Steer (SUS) Model

In [None]:
#| export
class Model(): pass

In [None]:
#| export
@patch
def __init__(self:Model, pars, W_fn=None, S__M_fn=None, C_fn=None):
    self.params = pars
    self.S_t = {
      'R_t': pd.DataFrame({
        'ResourceId': pars.RESOURCE_IDS,
        'Type': pars.TYPES,
        'RAvail_t': pars.get_availabilities(pd.to_datetime(pars.START_DATE_TIME)),
        'RCumSlots_t': [0]*len(pars.TYPES), ##cumulative allocs (for T)
        # /////////////////////
        ## 'RMonthCumSlots_t': [0]*len(cf.TYPES),
        ## 'RWeekCumSlots_t': [0]*len(cf.TYPES),
        ## 'RUtil_t': [0]*len(cf.TYPES),
        # \\\\\\\\\
        'RCumMerits_t': [0]*len(pars.TYPES),
        'RComplete_t': [0]*len(pars.TYPES), ## 1/0: daily slot-run complete or not
      }),
      'D_t': pd.DataFrame({ ## Demands
        'Type': pars.RESOURCE_TYPES,
        ## base demand, due to D_n
        'DSlot_t': [1]*len(pars.RESOURCE_TYPES), ##why 1?
        ## demand due to busyness, i.e. foot traffic
        'DBusy_t': [0]*len(pars.RESOURCE_TYPES), 
        ## demand due to volume, number of items seems to correlate best with demand in general
        'DVolume_t': [0]*len(pars.RESOURCE_TYPES),
        ## demand due to revenue, e.g. high-value items requires more staff for approval for returns 
        'DRevenue_t': [0]*len(pars.RESOURCE_TYPES),
        ## demand max due to all sources
        'DMax_t': [0]*len(pars.RESOURCE_TYPES), 
      }),
      'M_t': pd.DataFrame({ ## Merits
        'ResourceId': pars.RESOURCE_IDS,
        'Type': pars.TYPES,
        'nMerits_t': [0]*len(pars.TYPES),
      }),
      'B_t': pd.DataFrame({ ## Busyness
        'Busy_t': [0],
      }),
      'V_t': pd.DataFrame({ ## Volume
        'Volume_t': [0],
      }),
      'N_t': pd.DataFrame({ ## Revenue
        'Revenue_t': [0],
      }),      
      'xAlloc_t_1': pd.DataFrame({ ##previous allocation
        'Comb': pars.abNAMES, ##Combination
        'Allocd_t': [False]*len(pars.abNAMES), ##Allocated
      }),
    }
    self.x_t = {
      'xAlloc_t': pd.DataFrame({
        'Comb': pars.abNAMES, ##Combination
        'Allocd_t': [False]*len(pars.abNAMES), ##Allocated
      }),
    }
    self.Ccum_CumSlots = 0.0
    self.Ccum_SickProb = 0.0
    self.Ccum_CumMerits = 0.0
    self.Ccum_ContSlots = 0.0
    self.Ccum = 0.0 ##cumulative reward

    self.Ucum_Total = 0 ##cumulative unallocated/unmet demands
    ##cumulative unallocated/unmet demands
    self.Ucum = {rt: 0 for rt in pars.RESOURCE_TYPES}
    self.W_tt1 = {}

In [None]:
# M = Model()

In [None]:
# @patch
# def reset(self:Model):
#   self.Ccum = 0.0
#   self.Ucum = 0

In [None]:
#| export
@patch
def W_fn(self:Model, t, dt): ## exogenous information
    return {
      ## 'demands': DEM.simulate(),
      ## 'demands': {'Courtesy': 2, 'Stocker': 1},
      'demands': { ## TAB function
        self.params.RESOURCE_TYPES[int(col.split('_')[1])]: cf.EXOG_INFO.loc[cf.EXOG_INFO['Date'] == dt, col].squeeze() 
        for col in [f'D_{d}' for d in range(len(self.params.RESOURCE_TYPES))]
      },
      ## 'merits': MER.simulate(),
      'merits': { ## TAB function
        self.params.aNAMES[int(col.split('_')[1])]: cf.EXOG_INFO.loc[cf.EXOG_INFO['Date'] == dt, col].squeeze() 
        for col in [f'M_{m}' for m in range(len(self.params.RESOURCE_IDS))]
      },
      'availabilities': self.params.get_availabilities(dt), ## TAB function
      'busyness': cf.EXOG_INFO.loc[cf.EXOG_INFO['Date'] == dt, 'busyness'].squeeze(),
      'volume': cf.EXOG_INFO.loc[cf.EXOG_INFO['Date'] == dt, 'volume'].squeeze(),
      'revenue': cf.EXOG_INFO.loc[cf.EXOG_INFO['Date'] == dt, 'revenue'].squeeze()
    }

In [None]:
# t = 7

In [None]:
# dt = pd.to_datetime(cf.START_DATE_TIME); dt

In [None]:
# M.W_fn(t, dt)

In [None]:
#| export
@patch
def update_Ccum(self:Model, t, dt, S_t, x_t, theta):
    ## dow, hod = ((t + 1)//24)%7, (t + 1)%24; ##print(f'{dow=}, {hod=}')
    ## dow, qod = ((t + 1)//96)%7, (t + 1)%96; ##print(f'{dow=}, {qod=}')
    dow, sod = ((t + 1)//self.params.SLOTS_PER_DAY)%7, (t + 1)%self.params.SLOTS_PER_DAY; ##print(f'{dow=}, {sod=}')
    m1 = \
      x_t['xAlloc_t'].merge(
      S_t['R_t']\
       [['RCumSlots_t', 'RAvail_t', 'RCumMerits_t']],
       left_index=True, right_index=True)

    ## merge in previous allocations
    m1a = \
      S_t['xAlloc_t_1'].merge(
      m1[['Allocd_t', 'RCumSlots_t', 'RAvail_t', 'RCumMerits_t']],
      left_index=True, right_index=True)
    m1a.rename(columns={'Allocd_t_x': 'Allocd_t_1', 'Allocd_t_y': 'Allocd_t'}, inplace=True)

    m2 = \
      m1a.merge(
      cf.SICK_PROBS[[dt.month_name()]],
      left_index=True, right_index=True)
    m2.rename(columns={dt.month_name(): 'SickProb'}, inplace=True)

    m2['Capacity'] = pd.DataFrame({'Capacity': self.params.get_capacities(dow)})
    m2['Ccum_CumSlots'] = -(m2['RCumSlots_t'] - m2['Capacity'])

    ## m2['Ccum_SickProb'] = -100*m2['SickProb']
    ## m2['Ccum_SickProb'] = -100*(m2['SickProb'] - m2['SickProb'].mean()) ##100 makes values more comparable with other component of Ccum
    m2['Ccum_SickProb'] = -1*(m2['SickProb'] - m2['SickProb'].mean()) ##100 makes values more comparable with other component of Ccum

    m2['Ccum_CumMerits'] = m2['RCumMerits_t']

    m2.loc[ ## contiguous slots
      (m2['Allocd_t'] == True) & \
      (m2['Allocd_t'] == m2['Allocd_t_1']),
      ['Ccum_ContSlots']
    ] = cf.CONTIGUOUS_REWARD ## reward if shift is continued without interuption

    summables = m2.loc[
      m2['Allocd_t'] == True,
      ['Ccum_CumSlots', 'Ccum_SickProb', 'Ccum_CumMerits', 'Ccum_ContSlots'],
    ]
    Ccum_CumSlots, Ccum_SickProb, Ccum_CumMerits, Ccum_ContSlots = summables.sum(axis=0)
    Ccum = \
      theta.thCumSlots*Ccum_CumSlots + \
      theta.thSickProb*Ccum_SickProb + \
      theta.thCumMerits*Ccum_CumMerits + \
      theta.thContSlots*Ccum_ContSlots
    self.Ccum_CumSlots += Ccum_CumSlots
    self.Ccum_SickProb += Ccum_SickProb
    self.Ccum_CumMerits += Ccum_CumMerits
    self.Ccum_ContSlots += Ccum_ContSlots
    self.Ccum += Ccum

    ##resource expense (what you pay them)
    tmp = x_t['xAlloc_t'].loc[
      x_t['xAlloc_t']['Allocd_t'] == True,
    ]
    tmp = tmp.copy()
    tmp['type'] = tmp.apply(lambda row: row.Comb.split('_')[0], axis=1)
    tmp['expense'] = tmp.apply(lambda row: self.params.RESOURCE_EXPENSE[row.type], axis=1)
    expense = tmp['expense'].sum()
    self.Ccum -= expense
    
    ## real revenue or forecast
    self.Ccum += self.W_tt1['revenue']

In [None]:
# m = Model(); m

In [None]:
# t = 8
# dt = pd.to_datetime(cf.START_DATE_TIME); dt

In [None]:
# S_t = m.S_t; S_t

In [None]:
# x_t = m.x_t; x_t

In [None]:
# P = pol.Policy(M)

In [None]:
# theta = P.build_theta({
#   'thCumSlots': .1,
#   'thSickProb': .3,
#   'thCumMerits': .2,
#   'thContSlots': 1 - (.6),
#   'thSelect': 'random'
# }); theta

In [None]:
#| export
@patch
def performAllocDecision(self:Model, S_t, x_t, theta):
    ## find list of ResourceIds for allocs from x_t
    resourceIds = x_t['xAlloc_t'].loc[
      x_t['xAlloc_t']['Allocd_t']==True,
      ['Comb']
    ]['Comb'].str.split('_').str[1:2].tolist(); ##print(f'{resourceIds=}')
    resourceIds_flat = [e[0] for e in resourceIds]; ##print(f'{resourceIds_flat=}')

    ## update state of allocs
    S_t['R_t'].loc[
      S_t['R_t']['ResourceId'].isin(resourceIds_flat),
      ['RAvail_t']
    ] = 0
    S_t['R_t'].loc[
      S_t['R_t']['ResourceId'].isin(resourceIds_flat),
      ['RCumSlots_t']
    ] += 1

    ## update Ccum with allocations
    ## self.Ccum += len(resourceIds_flat) #number of allocations

In [None]:
#| export
@patch
def S__M_fn(self:Model, t, dt, S_t, x_t, W_tt1, theta):
    ## dow, hod = ((t + 1)//24)%7, (t + 1)%24; ##print(f'{dow=}, {hod=}')
    ## dow, qod = ((t + 1)//96)%7, (t + 1)%96; ##print(f'{dow=}, {qod=}')
    dow, sod = ((t + 1)//self.params.SLOTS_PER_DAY)%7, (t + 1)%self.params.SLOTS_PER_DAY; ##print(f'{dow=}, {sod=}')

    ## perform decision taken this morning
    self.performAllocDecision(S_t, x_t, theta)

    ## Update state from exogenous information
    for an in self.params.aNAMES:
      resId = an.split('_')[1]
      merits = W_tt1['merits'][an]
      S_t['M_t'].loc[S_t['M_t']['ResourceId'] == resId, 'nMerits_t'] = merits
    S_t['R_t']['RAvail_t'] = W_tt1['availabilities']
    ## base demands
    for rt in self.params.RESOURCE_TYPES:
      rt_demands = W_tt1['demands'][rt]
      S_t['D_t'].loc[S_t['D_t']['Type'] == rt, 'DSlot_t'] = rt_demands
    ## busyness demands  
    busyness = W_tt1['busyness'] 
    S_t['B_t']['Busy_t'] = busyness
    for rt in self.params.RESOURCE_TYPES:
      rt_demands = self.params.DEMAND_PER_BUSYNESS[rt]*busyness
      S_t['D_t'].loc[S_t['D_t']['Type'] == rt, 'DBusy_t'] = rt_demands
    ## volume demands
    volume = W_tt1['volume'] 
    S_t['V_t']['Volume_t'] = volume
    for rt in self.params.RESOURCE_TYPES:
      rt_demands = self.params.DEMAND_PER_VOLUME[rt]*volume
      S_t['D_t'].loc[S_t['D_t']['Type'] == rt, 'DVolume_t'] = rt_demands
    ## revenue demands
    revenue = W_tt1['revenue']
    S_t['N_t']['Revenue_t'] = revenue
    for rt in self.params.RESOURCE_TYPES:
      rt_demands = self.params.DEMAND_PER_REVENUE[rt]*revenue
      S_t['D_t'].loc[S_t['D_t']['Type'] == rt, 'DRevenue_t'] = rt_demands    

    for rt in self.params.RESOURCE_TYPES:
      number = S_t['D_t'].loc[
        S_t['D_t']['Type']==rt,
        ['DSlot_t', 'DBusy_t', 'DVolume_t', 'DRevenue_t']
      ].max(axis=1).squeeze() ##max the contributions of each demand
      ## ].sum(axis=1).squeeze() ##sum the contributions of each demand
      S_t['D_t'].loc[S_t['D_t']['Type'] == rt, 'DMax_t'] = number

    ## Update cumulative merits of all resources
    S_t['R_t']['RCumMerits_t'] += S_t['M_t']['nMerits_t']

    ## Update RComplete_t
    m1 = \
      S_t['xAlloc_t_1'].merge(
      x_t['xAlloc_t'],
      left_index=True, right_index=True)
    m1.rename(columns={'Comb_x': 'Comb', 'Allocd_t_x': 'Allocd_t_1', 'Allocd_t_y': 'Allocd_t'}, inplace=True)
    m1.drop('Comb_y', inplace=True, axis=1)
    m2 = m1.loc[(m1['Allocd_t_1'] == True) & (m1['Allocd_t'] == False)]
    resourceIds = m2.loc[
      (m2['Allocd_t_1'] == True) & (m2['Allocd_t'] == False),
      ['Comb']
    ]['Comb'].str.split('_').str[1:2].tolist(); ##print(f'{resourceIds=}')
    resourceIds_flat = [e[0] for e in resourceIds]; ##print(f'{resourceIds_flat=}')
    S_t['R_t'].loc[
      S_t['R_t']['ResourceId'].isin(resourceIds_flat),
      ['RComplete_t']
    ] = 1

    ## Update 'xAlloc_t_1'
    S_t['xAlloc_t_1'] = copy(x_t['xAlloc_t']) ## SG

    # ## Reset for new month
    # if dt.is_month_start:
    #   S_t['R_t']['RMonthCumSlots_t'] = 0

    # ## Reset for new week
    # if dow == 0:
    #   S_t['R_t']['RMonthCumSlots_t'] += S_t['R_t']['RWeekCumSlots_t']
    #   S_t['R_t']['RWeekCumSlots_t'] = 0

    ## Reset for new day
    if sod == 0: ## slot-of-day
      S_t['R_t']['RCumSlots_t'] = 0; ##print(f'%%% Resetting RCumSlots_t ...')
      ## S_t['R_t']['RWeekCumSlots_t'] += S_t['R_t']['RCumSlots_t']
      ## S_t['R_t']['RCumSlots_t'] = 0; ##print(f'%%% Resetting RCumSlots_t ...')

      S_t['R_t']['RComplete_t'] = 0
      self.Ucum = {rt: 0 for rt in self.params.RESOURCE_TYPES}
      self.Ucum_Total = 0
      self.Ccum_CumSlots = 0.0
      ## self.Ccum_SickProb = 0.0
      ## self.Ccum_CumMerits = 0.0
      self.Ccum_ContSlots = 0.0
      ## self.Ccum = 0.0

    record_t = [t, dt] + \
      list(S_t['R_t']['RAvail_t']) + \
      list(S_t['R_t']['RCumSlots_t']) + \
      list(S_t['R_t']['RCumMerits_t']) + \
      list(S_t['R_t']['RComplete_t']) + \
      list(S_t['D_t']['DSlot_t']) + \
      list(S_t['D_t']['DBusy_t']) + \
      list(S_t['D_t']['DVolume_t']) + \
      list(S_t['D_t']['DRevenue_t']) + \
      list(S_t['D_t']['DMax_t']) + \
      list(S_t['B_t']['Busy_t']) + \
      list(S_t['V_t']['Volume_t']) + \
      list(S_t['N_t']['Revenue_t']) + \
      [self.Ucum[rt] for rt in self.params.RESOURCE_TYPES] + \
      [self.Ucum_Total] + \
      [self.Ccum_CumSlots] + \
      [self.Ccum_SickProb] + \
      [self.Ccum_CumMerits] + \
      [self.Ccum_ContSlots] + \
      [self.Ccum] + \
      list(x_t['xAlloc_t']['Allocd_t'])
    return record_t

In [None]:
#| export
@patch
def C_fn(self:Model, S_t, x_t, W_tt1, theta):
    return

In [None]:
#| export
@patch
def step(self:Model, t, dt, theta):
    ## IND = '\t\t'
    ## print(f"{IND}..... M. step() .....\n{t=}\n{theta=}")
    self.W_tt1 = self.W_fn(t, dt); ##print(f'%%% {W_tt1=}')

    ## update state & reward
    record_t = self.S__M_fn(t, dt, self.S_t, self.x_t, self.W_tt1, theta)
    # print(f'~~~~~~~~~~~~~ len(record_t): {len(record_t)}')
    # print(f'~~~~~~~~~~~~~ len(self.params.LABELS): {len(self.params.LABELS)}')    
    return record_t

## Usage

In [None]:
# # hide
# # QUICK STEP
# def print_S_t():
#   print(f'M.S_t["R_t"]=\n{M.S_t["R_t"]}')
#   print(f'M.S_t["D_t"]=\n{M.S_t["D_t"]}')

# def print_x_t():
#   # print(f'x_t.x_t= {x_t.x_t}')
#   print(f'M.x_t["xAlloc_t"]=\n{M.x_t["xAlloc_t"]}')

# record = []
# l = 1
# M = Model()
# P = pol.Policy(M)
# ## DEM = DemandSimulator(seed=SEED_TRAIN); print(f'{DEM.simulate()=}')
# ## MER = MeritSimulator(seed=SEED_TRAIN); print(f'{MER.simulate()=}')

# theta_test = P.build_theta({
#   'thCumSlots': .1,
#   'thSickProb': .3,
#   'thCumMerits': .2,
#   'thContSlots': 1 - (.6),
#   'thSelect': 'random'
# })
# record_l = [cf.piNAMES[0], theta_test, l]; print(f'{record_l=}')
# print_S_t()
# print_x_t()

In [None]:
# ## hide
# # T = 7*24
# # T = 2*24
# T = 2
# # T = 1*SLOTS_PER_DAY
# # T = 2*SLOTS_PER_DAY
# dt = pd.to_datetime(cf.START_DATE_TIME)
# dt_delta = pd.Timedelta(cf.DATE_TIME_DELTA)
# for t in range(T):
#   print(f'\n################# t={t}, dt={dt} #################')
#   # getattr(P, 'X__AllocBelow')(t, M.S_t, M.x_t, theta_test); #print(f'{x_t=}')
#   getattr(P, 'X__Alloc')(t, dt, M.S_t, M.x_t, theta_test); #print(f'{x_t=}')
#   print_S_t()
#   print_x_t()

#   # record_t = M.step(t, theta_test)
#   record_t = M.step(t, dt, theta_test)
#   print(f'\nAFTER STEP:')
#   record.append(record_l + record_t); #print(f'{record=}')
#   dt = dt + dt_delta
#   # print_S_t()
#   # print(f'{M.Ccum=}')

In [None]:
# ## hide
# V = vis.Visualization()

In [None]:
# df_test_n_t = pd.DataFrame.from_records(record, columns=cf.LABELS)
# df_test_n_t.tail()

In [None]:
# ## hide
# V.plot_records(
#   df=df_test_n_t,
#   df_non=None,
#   pars=defaultdict(str, {
#     # 'thetaAdj1': {a1n: theta_test.thAdj1[a1n] for a1n in a1NAMES},
#     # 'thetaAdj3': {a1n: theta_test.thAdj3[a1n] for a1n in a1NAMES},
#     # 'suptitle': f'TRAINING OF X__AdjBelow POLICY'+'\n'+f'(first {first_n_t} records)'+'\n'+ \
#     # f'L = {L}, T = {T}, '+ \
#     # r'$\theta^*=$'+f'{P.round_theta(best_theta_AdjBelow)}',
#     'T': T
#   }),
# )

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()