In [1]:
import import_ipynb
from shared_imports import *
from data_handling import *
from neural_networks import *
import gymnasium as gym
from gymnasium import spaces

importing Jupyter notebook from shared_imports.ipynb
importing Jupyter notebook from data_handling.ipynb
importing Jupyter notebook from neural_networks.ipynb


In [4]:
class Simulator(gym.Env):
    """
    Simulator class
    """

    metadata = {"render_modes": None}

    def __init__(self, device='cpu'):
        """
        Arguments:
            parameters: dict
                dictionary containing the parameters of the environment
        """        
        self.device = device
        self.problem_params, self.observation_params = None, None
        self.batch_size, self.n_stores, self.periods, self.observation, self._internal_data = None, None, None, None, None

        # place_holders, will be overrided in reset method (as observation and action spaces depend on batch size, which might change during execution)
        self.action_space = spaces.Dict({'stores': spaces.Box(low=0.0, high=np.inf, shape=(1, 1), dtype=np.float32)})
        self.observation_space = spaces.Dict({'stores': spaces.Box(low=0.0, high=np.inf, shape=(1, 1), dtype=np.float32)})
    
    def reset(self, periods, problem_params,  data, observation_params):
        """
        Reset the environment, including initializing the observation, and return first observation
        """

        self.problem_params = problem_params
        self.observation_params = observation_params

        self.batch_size, self.n_stores, self.periods = len(data['initial_inventories']), problem_params['n_stores'], periods
        

        # data that can only be used by the simulator . E.g., all demands, days to christmas (including future)...
        self._internal_data = {
            'demands': data['demands'],
            'period_shift': observation_params['demand']['period_shift'],
            }
        
        if observation_params['include_days_to_christmas']:
            self._internal_data['days_to_christmas'] = data['days_to_christmas']
        self._internal_data['allocation_shift'] = self.initialize_shifts_for_allocation_put(data['initial_inventories'].shape).long().to(self.device)
        if self.problem_params['n_warehouses'] > 0:
            self._internal_data['warehouse_allocation_shift'] = self.initialize_shifts_for_allocation_put(data['initial_warehouse_inventories'].shape).long().to(self.device)
        self._internal_data['zero_allocation_tensor'] = self.initialize_zero_allocation_tensor(data['initial_inventories'].shape[: -1]).to(self.device)

        self.observation = self.initialize_observation(data, observation_params)
        self.action_space = self.initialize_action_space(self.batch_size, problem_params, observation_params)
        self.observation_space = self.initialize_observation_space(self.observation, periods, problem_params,)
        
        return self.observation, None
    
    def initialize_shifts_for_allocation_put(self, shape):
        """
        We will add store's allocations into corresponding position by flatenning out the state vector of the
        entire batch. We create allocation_shifts to calculate in which position of that long vector we have
        to add the corresponding allocation
        """

        batch_size, n_stores, lead_time_max = shape

        # results in a vector of lenght batch_size, where each entry corresponds to the first position of an element of a given sample
        # in the long vector of the entire batch
        n_instance_store_shift = (
            torch.arange(batch_size) * (lead_time_max * n_stores)
            ).to(self.device)

        # results in a tensor of shape batch_size x stores, where each entry corresponds to the number of positions, to move 'to the right'
        # for each store, beginning from the first position within a sample
        store_n_shift = (
            torch.arange(n_stores) * (lead_time_max)
            ).expand(batch_size, n_stores).to(self.device)
        
        # results in a vector of shape batch_size x stores, where each entry corresponds to the first position of an element of a given (sample, store)
        # in the long vector of the entire batch
        # we then add the corresponding lead time to obtain the actual position in which to insert the action
        return n_instance_store_shift[:, None] + store_n_shift

    def initialize_zero_allocation_tensor(self, shape):
        """
        Initialize a tensor of zeros with the same shape as the allocation tensor
        """

        return torch.zeros(shape).to(self.device)
    
    def step(self, action):
        """
        Simulate one step in the environment, returning the new observation and the reward (per sample)
        """

        current_demands = self.get_current_demands(
            self._internal_data, 
            current_period=self.observation['current_period'].item()
            )

        # calculate reward and update store inventories
        reward = self.calculate_store_reward_and_update_store_inventories(
            current_demands,
            action,
            self.observation
            )

        # update current period
        self.observation['current_period'] += 1

        terminated = self.observation['current_period'] >= self.periods

        return self.observation, reward, terminated, None, None
    
    def get_current_demands(self, data, current_period):
        
        return data['demands'][:, :, current_period + self._internal_data['period_shift']]
    
    def calculate_store_reward_and_update_store_inventories(self, current_demands, action, observation, calculate_profit=False):
        """
        Calculate reward and observation after demand and action is executed for stores
        """

        store_inventory = self.observation['store_inventories']
        inventory_on_hand = store_inventory[:, :, 0]
        # print(f'current_demands.shape: {current_demands.shape}')
        # print(f'store_inventories.shape: {self.observation["store_inventories"][:, :, 0].shape}')
        post_inventory_on_hand = self.observation['store_inventories'][:, :, 0] - current_demands

        # reward given by underage_costs + holding_costs
        if not calculate_profit:
            reward = (
                observation['underage_costs'] * torch.clip(-post_inventory_on_hand, min=0) + 
                observation['holding_costs'] * torch.clip(post_inventory_on_hand, min=0)
                )
        
        # reward given by -sales*price - holding_costs
        else:
            reward = (
                -observation['underage_costs'] * torch.minimum(inventory_on_hand, current_demands) + 
                observation['holding_costs'] * torch.clip(post_inventory_on_hand, min=0)
                )
        
        if self.problem_params['lost_demand']:
            post_inventory_on_hand = torch.clip(post_inventory_on_hand, min=0)

        observation['store_inventories'] = self.update_inventory_for_heterogeneous_lead_times(
            store_inventory, 
            post_inventory_on_hand, 
            action['stores'], 
            observation['lead_times'], 
            self._internal_data['allocation_shift']
            )
        
        observation['warehouse_inventories'] = self.update_warehouse_inventory(
            observation['warehouse_inventories'],
            action['warehouses']
            )
        
        print(f'observation["warehouse_lead_times"]: {observation["warehouse_lead_times"]}')

        return reward

    def initialize_observation(self, data, observation_params):
        """
        Initialize the observation of the environment
        """

        observation = {
            'store_inventories': data['initial_inventories'],
            'current_period': torch.tensor([0])
            }
        
        if observation_params['include_warehouse_inventory']:
            observation['warehouse_inventories'] = data['initial_warehouse_inventories']

        # include static features in observation (e.g., holding costs, underage costs, lead time and upper bounds)
        for k, v in observation_params['include_static_features'].items():
            if v:
                observation[k] = data[k]

        # initialize data for past observations of certain data (e.g., arrivals, orders)
        for k, v in observation_params['include_past_observations'].items():
            if v > 0:
                observation[k] = torch.zeros(self.batch_size, self.n_stores, v).to(self.device)

        if observation_params['demand']['past_periods'] > 0:
            observation['past_demands'] = self.update_past_demands(data, observation_params, self.batch_size, self.n_stores, current_period=0)

        if observation_params['include_days_to_christmas']:
            observation['days_to_christmas'] = self.update_days_to_christmas(data, observation_params, current_period=0)

        return observation
    
    def initialize_action_space(self, batch_size, problem_params, observation_params):
        """
        Initialize the action space by creating a dict with spaces.Box with shape batch_size x locations
        """

        d = {'stores': spaces.Box(low=0.0, high=np.inf, shape=(batch_size, problem_params['n_stores']), dtype=np.float32)}

        for k1, k2 in zip(['warehouses', 'extra_echelons'], ['n_warehouses', 'n_extra_echelons']):
            if problem_params[k2] > 0:
                d[k1] = spaces.Box(low=0.0, high=np.inf, shape=(batch_size, problem_params[k2]), dtype=np.float32)

        return spaces.Dict(d)
    
    def initialize_observation_space(self, initial_observation, periods, problem_params):
        box_values = {
            'arrivals': {'low': 0 if problem_params['lost_demand'] else -np.inf, 'high': np.inf, 'dtype': np.float32},
            'holding_costs': {'low': 0, 'high': np.inf, 'dtype': np.float32},
            'lead_times': {'low': 0, 'high': 2*10, 'dtype': np.int8},
            'days_to_christmas': {'low': -365, 'high': 365, 'dtype': np.int8},
            'orders': {'low': 0, 'high': np.inf, 'dtype': np.float32},
            'past_demands': {'low': -np.inf, 'high': np.inf, 'dtype': np.float32},
            'store_inventories': {'low': 0 if problem_params['lost_demand'] else -np.inf, 'high': np.inf, 'dtype': np.float32},
            'warehouse_inventories': {'low': 0, 'high': np.inf, 'dtype': np.float32},
            'extra_echelons_inventories': {'low': 0, 'high': np.inf, 'dtype': np.float32},
            'underage_costs': {'low': 0, 'high': np.inf, 'dtype': np.float32},
            'past_demands': {'low': -np.inf, 'high': np.inf, 'dtype': np.float32},
            'current_period': {'low': 0, 'high': periods, 'dtype': np.int8},
        }

        return spaces.Dict(
            {
            k: spaces.Box(
                low=box_values[k]['low'], 
                high=box_values[k]['high'], 
                shape=v.shape,
                dtype=box_values[k]['dtype']
                ) 
                for k, v in initial_observation.items()
                })
    
    def update_inventory_for_heterogeneous_lead_times(self, inventory, inventory_on_hand, allocation, lead_times, allocation_shifter):
        """
        Update the inventory for heterogeneous lead times (something simpler can be done for homogeneous lead times).
        We add the inventory into corresponding position by flatenning out the state vector of the
        entire batch. We create allocation_shifts earlier, which dictates the position shift of that long vector
        for each store and each sample. We then add the corresponding lead time to obtain the actual position in 
        which to insert the action
        """

        return torch.stack(
            [
                inventory_on_hand + inventory[:, :, 1], 
                *self.move_columns_left(inventory, 1, inventory.shape[2] - 1), 
                torch.zeros_like(allocation)
                ], 
                dim=2
                ).put(
                    (allocation_shifter + lead_times.long() - 1).flatten(), 
                    allocation.flatten(), 
                    accumulate=True
                    )

    def update_warehouse_inventory(self, warehouse_inventory, action):
        """
        Update the warehouse inventory
        """

        return warehouse_inventory + action

    def update_past_demands(self, data, observation_params, batch_size, stores, current_period):
        """
        Update the past demands in the observation
        """
        
        past_periods = observation_params['demand']['past_periods']
        current_period_shifted = current_period + self._internal_data['period_shift']
        
        if current_period_shifted == 0:
            past_demands = torch.zeros(batch_size, stores, past_periods).to(self.device)
        else:
            past_demands = data['demands'][:, :, max(0, current_period_shifted - past_periods): current_period_shifted]

            fill_with_zeros = past_periods - (current_period_shifted - max(0, current_period_shifted - past_periods))
            if fill_with_zeros > 0:
                past_demands = torch.cat([
                    torch.zeros(batch_size, stores, fill_with_zeros).to(self.device), 
                    past_demands
                    ], 
                    dim=2)
        
        return past_demands
    
    def update_days_to_christmas(self, data, observation_params, current_period):
        """
        Update the days to christmas in the observation
        """
        days_to_christmas = data['days_to_christmas'][current_period + observation_params['demand']['period_shift']]
        
        return days_to_christmas

    def move_columns_left(self, tensor_to_displace, start_index, end_index):
        """
        move all columns in given array to the left, and return as list
        """

        return [tensor_to_displace[:, :, i + 1] for i in range(start_index, end_index)]
    
    def update_observation(self, observation, action, demand):
        """
        Update the observation of the environment
        """
        inventory_on_hand = observation[:, :,  0] - demand
        if self.lost_demand:
            inventory_on_hand = torch.clip(inventory_on_hand, min=0)
        
        return torch.stack([inventory_on_hand + observation[:, :, 1],
                            *self.move_columns_left(observation, 1, self.lead_time - 1),
                            action],
                            dim=2
                            )
    
    # def simulate_batch(self, model, demand_batch):
    #     """
    #     Simulate a batch of demand data
    #     """

    #     # initialize observation as a matrix of zeros
    #     observation = torch.zeros(demand_batch.shape[0], demand_batch.shape[1], self.lead_time).to(self.device)
    #     # print(f'initial observation: {observation.shape}')
    #     # initialize reward across batch
    #     batch_reward = 0
    #     reward_to_report = 0

    #     # loop through periods
    #     for period in range(self.periods):
    #         # get demand
    #         demand = demand_batch[:, :, period]
    #         # get action (i.e, order quantity)
    #         action = model({'x': observation})
    #         # action = model(observation)
    #         # print(f'observation: {observation[0]}')
    #         # print(f'action: {action[0]}')
    #         # print(f'demand: {demand[0]}')
    #         # print()
    #         # get new observation and reward
    #         observation, reward = self.step(observation, action, demand)
            

    #         batch_reward += reward
    #         # add reward to batch reward only after lead time has passed (as costs on first periods do not depend on agent's actions)
    #         if period >= 20:
    #         # if period >= self.lead_time:
    #             reward_to_report += reward

    #     # return reward
    #     return batch_reward, reward_to_report

In [3]:
# only execute if name is main
if __name__ == '__main__':
    # test the class
    seeds = {"underage_cost": 28, "holding_cost": 73, "mean": 33, "coef_of_var": 92, "lead_time": 41, 'demand': 57, "initial_inventory": 88}
    # "seeds": {"underage": 28, "holding": 73, "mean": 33, "stds": 92, "demand_sequence": 57, "w_lead_time": 88, "perturbation": 84, "store_lead_times": 41}
    problem_params = {'n_stores': 10, 'n_warehouses': 1, 'n_extra_echelons': 0, 'periods': 50, 'lost_demand': False}
    num_samples = 512
    batch_size = 128
    store_params = {'demand': {'sample_across_stores': True,
                               'mean_range': [2.5, 7.5], 
                               'coef_of_var_range': [0.16, 0.32],
                               'distribution': 'normal',
                               'correlation': 0.5,
                               'clip': True
                               },
                    
                  #   'demand_1': {'sample_across_stores': False,
                  #              'mean': [5.0], 
                  #              'distribution': 'poisson',
                  #              'clip': True
                  #              },

                    'lead_time': {'sample_across_stores': True,
                                   'vary_across_samples': False, 
                                   'range': [4, 7]
                                #    'value': 4
                                   },

                    'holding_cost': {'sample_across_stores': False, 
                                      'vary_across_samples': False,
                                      'expand': True,
                                      'value': 1
                                      },

                    'underage_cost': {'sample_across_stores': True,
                                       'vary_across_samples': False, 
                                       'expand': False, 
                                       # 'value': 5.0,
                                       'range': [2.5, 7.5],
                                       },

                     'initial_inventory': {'sample': False,
                                           'range_mult': [0, 1],
                                           'inventory_periods': 6
                                           }
                    }
    
    warehouse_params = {'holding_cost': 0.3, 
                        'lead_time': 4}
    
    device = "cuda:0" if torch.cuda.is_available() else "cpu"
    scenario = Scenario(problem_params, store_params, warehouse_params, num_samples, seeds)
    creator = DatasetCreator()
    train_dataset, dev_dataset = creator.create_datasets(scenario, split=True, by_sample_indexes=True, sample_index_for_split=400)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    dev_loader = DataLoader(dev_dataset, batch_size=batch_size, shuffle=False)
    neurons_per_hidden_layer = [32, 32, 32]
    model = FullyConnectedNN(neurons_per_hidden_layer, output_size=problem_params['n_stores']).to(device)


KeyError: 'demand'

In [17]:
if __name__ == '__main__':
    observation_params = {
        'include_warehouse_inventory': False,
        'include_static_features': {
            'holding_costs': True, 
            'underage_costs': True, 
            'lead_times': True, 
            'upper_bounds': False
            },
        'demand': {
            'past_periods': 0, 
            'period_shift': 0
            },
        'include_past_observations': {
            'arrivals': 0, 
            'orders': 0
            },
        'include_days_to_christmas': False
        }

In [22]:
if __name__ == '__main__':
    simulator = Simulator(device=device)
    # simulator.reset(problem_params, train_dataset, observation_params)

In [23]:
if __name__ == '__main__':
    # iterate over batches of train data
    for i, data_batch in enumerate(train_loader):
        data_batch = {k: v.to(device) for k, v in data_batch.items()}
        observation, _ = simulator.reset(problem_params, data_batch, observation_params)
        for t in range(problem_params['periods']):
            print(f't: {t}')
            print(f'store_inventories: {observation["store_inventories"][0]}')
            print()
            # print(f'observation.keys(): {observation.keys()}')
            action = model(observation)
            observation, reward, terminated, _, _  = simulator.step(action)
            if terminated:
                print(f'terminated after t: {t}')
                break
        # break after first batch
        break

t: 0
store_inventories: tensor([[0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0.]], device='cuda:0')

current_demands.shape: torch.Size([128, 10])
store_inventories.shape: torch.Size([128, 10])
t: 1
store_inventories: tensor([[-2.0828,  0.0000,  0.0000,  1.1274,  0.0000,  0.0000],
        [-1.6807,  0.0000,  0.0000,  1.1841,  0.0000,  0.0000],
        [-3.4447,  0.0000,  0.0000,  0.0000,  0.0000,  0.9343],
        [-1.4319,  0.0000,  0.0000,  0.8862,  0.0000,  0.0000],
        [-4.8511,  0.0000,  0.0000,  0.0000,  1.0472,  0.0000],
        [-1.5835,  0.0000,  0.0000,  0.0000,  0.9497,  0.0000],
        [-1.1774,  0.0000,  0.0000,  0.0000,  1.0687,  0.0000],
        [-3.2697,  0.0000,  0.0000,  0.0000,  0.0000,