In [1]:
import import_ipynb
from shared_imports import *
from environment import *
from loss_functions import *

importing Jupyter notebook from shared_imports.ipynb


importing Jupyter notebook from environment.ipynb
importing Jupyter notebook from data_handling.ipynb
importing Jupyter notebook from neural_networks.ipynb
importing Jupyter notebook from loss_functions.ipynb


In [2]:
class Trainer():
    """
    Trainer class
    """

    def __init__(self,  device='cpu'):
        
        self.all_train_losses = []
        self.all_dev_losses = []
        self.all_test_losses = [] 
        self.device = device
    
    def reset(self):
        """
        Reset the losses
        """

        self.all_train_losses = []
        self.all_test_losses = []

    def train(self, epochs, loss_function, simulator, model, data_loaders, optimizer, problem_params, observation_params, params_by_dataset, trainer_params):
        """
        Train the model
        """

        for epoch in range(epochs): # make multiple passes through the dataset

            average_train_loss, average_train_loss_to_report = self.do_one_epoch(
                optimizer, 
                data_loaders['train'], 
                loss_function, 
                simulator, 
                model, 
                params_by_dataset['train']['periods'], 
                problem_params, 
                observation_params, 
                train=True, 
                ignore_periods=params_by_dataset['train']['ignore_periods']
                )
            
            self.all_train_losses.append(average_train_loss_to_report)

            if epoch % trainer_params['do_dev_every_n_epochs'] == 0:
                average_dev_loss, average_dev_loss_to_report = self.do_one_epoch(
                    optimizer, 
                    data_loaders['dev'], 
                    loss_function, 
                    simulator, 
                    model, 
                    params_by_dataset['dev']['periods'], 
                    problem_params, 
                    observation_params, 
                    train=False, 
                    ignore_periods=params_by_dataset['dev']['ignore_periods']
                    )

                self.all_dev_losses.append(average_dev_loss_to_report)
            else:
                average_dev_loss, average_dev_loss_to_report = 0, 0
                self.all_dev_losses.append(self.all_dev_losses[-1])


            # print epoch number and average per-period loss every 10 epochs
            if epoch % trainer_params['print_results_every_n_epochs'] == 0:
                print()
                print(f'epoch: {epoch + 1}')
                print(f'Average per-period train loss: {average_train_loss_to_report}')
                print(f'Average per-period dev loss: {average_dev_loss_to_report}')
    
    def test(self, loss_function, simulator, model, data_loaders, optimizer, problem_params, observation_params, params_by_dataset, discrete_demand=False):

        average_test_loss, average_test_loss_to_report = self.do_one_epoch(
                optimizer, 
                data_loaders['test'], 
                loss_function, 
                simulator, 
                model, 
                params_by_dataset['test']['periods'], 
                problem_params, 
                observation_params, 
                train=True, 
                ignore_periods=params_by_dataset['test']['ignore_periods']
                )
        
        return average_test_loss, average_test_loss_to_report

    def do_one_epoch(self, optimizer, data_loader, loss_function, simulator, model, periods, problem_params, observation_params, train=True, ignore_periods=0):
        
        epoch_loss = 0
        epoch_loss_to_report = 0
        total_samples = len(data_loader.dataset)
        periods_tracking_loss = periods - ignore_periods  # number of periods for which we report the loss
        # print(f'total_samples: {total_samples}')
        # print(f'periods: {periods}')
        # print(f'ignore_periods: {ignore_periods}')
        # print(f'periods_tracking_loss: {periods_tracking_loss}')
        
        for i, data_batch in enumerate(data_loader):  # loop through batches of data

            data_batch = self.move_batch_to_device(data_batch)
            
            if train:
                # zero-out the gradient
                optimizer.zero_grad()

            # forward pass
            total_reward, reward_to_report = self.simulate_batch(
                loss_function, simulator, model, periods, problem_params, data_batch, observation_params, ignore_periods
                )
            epoch_loss += total_reward.item()  # rewards from period 0
            epoch_loss_to_report += reward_to_report.item()  # rewards from period ignore_periods onwards
            
            mean_loss = total_reward/(len(data_batch['demands'])*periods)
            
            # backward pass (to calculate gradient) and take gradient step
            if train:
                mean_loss.backward()
                optimizer.step()
        
        return epoch_loss/(total_samples*periods), epoch_loss_to_report/(total_samples*periods_tracking_loss)
    
    def simulate_batch(self, loss_function, simulator, model, periods, problem_params, data_batch, observation_params, ignore_periods=0):
        """
        Simulate for an entire batch of data, across the specified number of periods
        """

        # initialize reward across batch
        batch_reward = 0
        reward_to_report = 0

        observation, _ = simulator.reset(periods, problem_params, data_batch, observation_params)
        for t in range(periods):
            # print(f't: {t}')
            # print(f'store_inventories: {observation["store_inventories"].shape}')
            
            # print()
            # print(f'observation.keys(): {observation.keys()}')
            action = model(observation)
            # action['stores'] = action['stores'].round()
            # print(f'action: {action[""][0]}')

            # make a deepcopy of the past observation
            past_observation = None
            # past_observation = copy.deepcopy(observation)

            observation, reward, terminated, _, _  = simulator.step(action)

            # if t == 30:
            #     print(f'mean reward: {reward.mean()}')

            total_reward = loss_function(past_observation, action, reward)

            batch_reward += total_reward
            if t >= ignore_periods:
                reward_to_report += total_reward
            
            if terminated:
                break

        # return reward
        return batch_reward, reward_to_report
    
    def plot_losses(self, ymin=None, ymax=None):
        """
        Plot train and test losses for each epoch
        """

        plt.plot(self.all_train_losses, label='train loss')
        plt.plot(self.all_test_losses, label='test loss')
        plt.legend()

        if ymin is not None and ymax is not None:
            plt.ylim(ymin, ymax)
        plt.show()
    
    def move_batch_to_device(self, data_batch):
        """
        Move a batch of data to the device (CPU or GPU)
        """

        return {k: v.to(self.device) for k, v in data_batch.items()}

In [1]:
# only execute if name is main
if __name__ == '__main__':
    # test the class
    # seeds = {"underage_cost": 28, "holding_cost": 73, "mean": 33, "coef_of_var": 92, "lead_time": 41, 'demand': 57 + 44, "initial_inventory": 4839}
    seeds = {"underage_cost": 28, "holding_cost": 73, "mean": 33, "coef_of_var": 92, "lead_time": 41, 'demand': 57, "initial_inventory": 4839}
    
    shifted_seeds = {key: val + 8 for key, val in seeds.items()}
    # "seeds": {"underage": 28, "holding": 73, "mean": 33, "stds": 92, "demand_sequence": 57, "w_lead_time": 88, "perturbation": 84, "store_lead_times": 41}
    problem_params = {'n_stores': 1, 'n_warehouses': 0, 'n_extra_echelons': 0, 'lost_demand': False}
    # problem_params = {'n_stores': 10, 'n_warehouses': 1, 'n_extra_echelons': 0, 'lost_demand': False}
    params_by_dataset = {
        'train': {'n_samples': 2**15, 'batch_size': 2**13, 'periods': 50, 'ignore_periods': 30}, 
        'dev': {'n_samples': 2**15, 'batch_size': 2**13, 'periods': 50, 'ignore_periods': 30}, 
        'test': {'n_samples': 2**15, 'batch_size': 2**13, 'periods': 500, 'ignore_periods': 300}
        }
    
    trainer_params = {'do_dev_every_n_epochs': 5, 'print_results_every_n_epochs': 10}

    observation_params = {
        'include_warehouse_inventory': False,
        'include_static_features': {
            'holding_costs': True, 
            'underage_costs': True, 
            'lead_times': True, 
            'upper_bounds': False
            },
        'demand': {
            'past_periods': 0, 
            'period_shift': 0
            },
        'include_past_observations': {
            'arrivals': 0, 
            'orders': 0
            },
        'include_days_to_christmas': False
        }

    store_params = {
        # 'demand': {'sample_across_stores': False,
        #                        'mean_range': [2.5, 7.5], 
        #                     #    'mean_range': [2.5, 7.5], 
        #                        'coef_of_var_range': [0.16, 0.32],
        #                        'distribution': 'normal',
        #                        'correlation': 0.5,
        #                        'clip': True
        #                        },

                      'demand': {
                          'sample_across_stores': False,
                          'expand': True,
                               'mean': [5.0],
                            #    'mean': [5.0],
                               'std': [1.6] ,
                               'distribution': 'normal',
                            #    'distribution': 'poisson',
                               'clip': True,
                               'decimals': 3,
                               },

                    'lead_time': {'sample_across_stores': False,
                                   'vary_across_samples': False, 
                                #    'range': [4, 7]
                                   'expand': True,
                                   'value': 4
                                   },

                    'holding_cost': {'sample_across_stores': False, 
                                      'vary_across_samples': False,
                                      'expand': True,
                                      'value': 1
                                      },

                    'underage_cost': {'sample_across_stores': False,
                                       'vary_across_samples': False, 
                                       'expand': True, 
                                       'value': 4.0,
                                    #    'range': [2.5, 7.5],
                                       },

                     'initial_inventory': {'sample': True,
                                           'range_mult': [0, 1],
                                           'inventory_periods': 4
                                           }
                    }
    
    
    warehouse_params = {'holding_cost': 0.3, 
                        'lead_time': 4}
    

   #  problem_params.update({'periods': params_by_dataset['train']['periods']})

    device = "cuda:0" if torch.cuda.is_available() else "cpu"
    creator = DatasetCreator()
    
    scenario = Scenario(
        params_by_dataset['train']['periods'], problem_params, store_params, warehouse_params, 
        params_by_dataset['train']['n_samples'] + params_by_dataset['dev']['n_samples'], seeds
        )
    
    train_dataset, dev_dataset = creator.create_datasets(scenario, split=True, by_sample_indexes=True, sample_index_for_split=params_by_dataset['dev']['n_samples'])
    train_loader = DataLoader(train_dataset, batch_size=params_by_dataset['train']['batch_size'], shuffle=True)
    dev_loader = DataLoader(dev_dataset, batch_size=params_by_dataset['dev']['batch_size'], shuffle=False)

    scenario = Scenario(params_by_dataset['test']['periods'], problem_params, store_params, warehouse_params, params_by_dataset['test']['n_samples'], shifted_seeds)
    test_dataset = creator.create_datasets(scenario, split=False)
    test_loader = DataLoader(test_dataset, batch_size=params_by_dataset['test']['batch_size'], shuffle=False)

    data_loaders = {'train': train_loader, 'dev': dev_loader, 'test': test_loader}
    
    neurons_per_hidden_layer = [32, 32, 32]
    model = FullyConnectedNN(neurons_per_hidden_layer, output_size=problem_params['n_stores'], device='cpu').to(device)
    loss_function = PolicyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01)


NameError: name 'torch' is not defined

In [5]:
# optimizer = torch.optim.Adam(model.parameters(), lr=0.0000001)

In [18]:
if __name__ == '__main__':
    simulator = Simulator(device=device)
    trainer = Trainer(device=device)
    trainer.train(1000, loss_function, simulator, model, data_loaders, optimizer, problem_params, observation_params, params_by_dataset, trainer_params)


epoch: 1
Average per-period train loss: 306.3996780395508
Average per-period dev loss: 41.67693176269531

epoch: 11
Average per-period train loss: 6.318474388122558
Average per-period dev loss: 6.793968391418457

epoch: 21
Average per-period train loss: 5.594799423217774
Average per-period dev loss: 5.585102367401123

epoch: 31
Average per-period train loss: 5.366036128997803
Average per-period dev loss: 5.371010780334473

epoch: 41
Average per-period train loss: 5.247053432464599
Average per-period dev loss: 5.259267044067383

epoch: 51
Average per-period train loss: 5.191890716552734
Average per-period dev loss: 5.205890560150147

epoch: 61
Average per-period train loss: 5.149415397644043
Average per-period dev loss: 5.164308738708496

epoch: 71
Average per-period train loss: 5.109719562530517
Average per-period dev loss: 5.124061584472656

epoch: 81
Average per-period train loss: 5.07252254486084
Average per-period dev loss: 5.0866899490356445

epoch: 91
Average per-period train lo

KeyboardInterrupt: 