In [1]:
from ipynb.fs.full.shared_imports import *

In [2]:
class Scenario():
    '''
    Class to generate a setting when there are parameters to be sampled.
    For example, we might sample the mean demand and std for each store, which is later going to be used to sample actual demand samples
    '''
    def __init__(self, periods, problem_params, store_params, warehouse_params, num_samples, seeds=None):

        self.problem_params = problem_params
        self.store_params = store_params
        self.warehouse_params = warehouse_params
        self.num_samples = num_samples
        self.periods = periods
        self.seeds = seeds

        self.demands = self.generate_demand_samples(problem_params, store_params['demand'], seeds)
        self.underage_costs = self.generate_data_for_samples_and_stores(problem_params, store_params['underage_cost'], seeds['underage_cost'], discrete=False)
        self.holding_costs = self.generate_data_for_samples_and_stores(problem_params, store_params['holding_cost'], seeds['holding_cost'], discrete=False)
        self.lead_times = self.generate_data_for_samples_and_stores(problem_params, store_params['lead_time'], seeds['lead_time'], discrete=True)
        self.initial_inventories = self.generate_initial_inventories(problem_params, store_params, self.demands, seeds['initial_inventory'])
        self.initial_warehouse_inventories = self.generate_initial_warehouse_inventory(warehouse_params)

        self.days_from_christmas = self.generate_days_from_christmas(store_params)
        self.split_by = self.define_how_to_split_data()

    def get_data(self):
        """
        Return the generated data. Will be part of a Dataset
        """

        data =  {'demands': self.demands,
                'underage_costs': self.underage_costs,
                'holding_costs': self.holding_costs,
                'lead_times': self.lead_times,
                'initial_inventories': self.initial_inventories,
                'initial_warehouse_inventories': self.initial_warehouse_inventories,
                'days_from_christmas': self.days_from_christmas
                }
        
        return {k: v.float() for k, v in data.items() if v is not None}
    
    def define_how_to_split_data(self):
        """
        Define how to split the data into different samples
        If demand comes from real data, the training and dev sets correspond to different periods.
        However, if it is generated, the split is according to sample indexes
        """

        split_by = {'sample_index': ['underage_costs', 'holding_costs', 'lead_times', 'initial_inventories', 'initial_warehouse_inventory'], 
                    'period': []}

        if self.store_params['demand']['distribution'] == 'real':
            split_by['period'].append('demands')
        else:
            split_by['sample_index'].append('demands')
        if self.days_from_christmas is not None:
            split_by['period'].append('days_from_christmas')
        
        return split_by
    
    def generate_demand_samples(self, problem_params, demand_params, seeds):
        """
        Generate demand data
        """
                
        # sample parameters to generate demand if necessary (otherwise, does nothing)
        self.generate_demand_parameters(problem_params, demand_params, seeds)

        demand_generator_functions = {"normal": self.generate_normal_demand, 'poisson': self.generate_poisson_demand}

        # sample demand
        demand = demand_generator_functions[demand_params['distribution']](problem_params, demand_params, seeds['demand'])

        if demand_params['clip']:
            demand = np.clip(demand, 0, None)
        
        return torch.tensor(demand)

    def generate_demand_parameters(self, problem_params, demand_params, seeds):
        """
        Sample parameters of demand distribution, if necessary
        """
        
        if demand_params['sample_across_stores']:  # only supported for normal demand
            demand_params.update(self.sample_normal_mean_and_std(problem_params, demand_params, seeds))
    
    def generate_normal_demand(self, problem_params, demand_params, seed):
        """
        Generate normal demand data
        """

        # set seed
        if seed is not None:
            np.random.seed(seed)
        
        if problem_params['n_stores'] == 1:
            demand = np.random.normal(demand_params['mean'], 
                                      demand_params['std'], 
                                      size=(self.num_samples, 1, self.periods)
                                      )
        else:
            # calculate covariance matrix and sample from multivariate normal
            correlation = demand_params['correlation']
            cov_matrix = [[correlation*v1*v2 if i!= j else v1*v2 
                           for i, v1 in enumerate(demand_params['std'])
                           ] 
                           for j, v2 in enumerate(demand_params['std'])
                           ]
            demand = np.random.multivariate_normal(demand_params['mean'], cov=cov_matrix, size=(self.num_samples, self.periods))
            demand = np.transpose(demand, (0, 2, 1))

        return demand

    def generate_poisson_demand(self, problem_params, demand_params, seed):

        # set seed
        if seed is not None:
            np.random.seed(seed)
        
        return np.random.poisson(demand_params['mean'], size=(self.num_samples, problem_params['n_stores'], self.periods))

    def generate_data(self, demand_params, **kwargs):
        """
        Generate demand data
        """
        demand_generator_functions = {"normal": self.generate_normal_demand_for_one_store}
        demand = demand_generator_functions[demand_params['distribution']](demand_params, **kwargs)
        
        if demand_params['clip']:
            demand = np.clip(demand, 0, None)

        return torch.tensor(demand)
        
    def sample_normal_mean_and_std(self, problem_params, demand_params, seeds):
        """
        Sample mean and std for normal demand
        """

        # set seed
        np.random.seed(seeds['mean'])

        means = np.random.uniform(demand_params['mean_range'][0], demand_params['mean_range'][1], problem_params['n_stores'])
        # del demand_params['mean_range']
        np.random.seed(seeds['coef_of_var'])
        coef_of_var = np.random.uniform(demand_params['coef_of_var_range'][0], demand_params['coef_of_var_range'][1], problem_params['n_stores'])
        # del demand_params['coef_of_var_range']
        stds = means * coef_of_var
        return {'mean': means, 'std': stds}
    
    def generate_data_for_samples_and_stores(self, problem_params, cost_params, seed, discrete=False):
        """
        Generate cost or lead time data, for each sample and store
        """
        
        # set seed
        np.random.seed(seed)

        sample_functions = {False: np.random.uniform, True: np.random.randint}
        # sample uniformly from the range (discrete)
        this_sample_function = sample_functions[discrete]
        
        if cost_params['sample_across_stores'] == True:
            return torch.tensor(this_sample_function(*cost_params['range'], problem_params['n_stores'])).expand(self.num_samples, -1)
        elif cost_params['vary_across_samples']:

            return torch.tensor(this_sample_function(*cost_params['range'], self.num_samples)).unsqueeze(1).expand(-1, problem_params['n_stores'])
        elif cost_params['expand']:
            return torch.tensor([cost_params['value']]).expand(self.num_samples, problem_params['n_stores'])
        else:
            return torch.tensor(cost_params['value'])
    
    def generate_initial_inventories(self, problem_params, store_params, demands, seed):
        """
        Generate initial inventory data
        """
        # set seed
        np.random.seed(seed)

        if store_params['initial_inventory']['sample']:
            # change type of demands to float

            # demand_mean = demands.float().mean(dim=0)
            demand_mean = demands.float().mean(dim=2).mean(dim=0)
            demand_mults = np.random.uniform(*store_params['initial_inventory']['range_mult'], 
                                             size=(self.num_samples, 
                                                   problem_params['n_stores'], 
                                                   store_params['initial_inventory']['inventory_periods']
                                                   )
                                            )
            return demand_mean[None, :, None] * demand_mults

        else:
            return torch.zeros(self.num_samples, 
                               problem_params['n_stores'], 
                               store_params['initial_inventory']['inventory_periods'])
    
    def generate_initial_warehouse_inventory(self, warehouse_params):
        """
        Generate initial warehouse inventory data
        """

        return torch.zeros(self.num_samples, 
                           1, 
                           warehouse_params['lead_time']
                           )

    def generate_days_from_christmas(self, store_params):

        if store_params['demand']['distribution'] == 'real':
            raise NotImplementedError
        else:
            return None

In [1]:
class MyDataset(Dataset):

    def __init__(self, num_samples, data):
        self.data = data
        self.num_samples = num_samples
    
    def __len__(self):
        return self.num_samples
    
    def __getitem__(self, idx):
        return {k: v[idx] for k, v in self.data.items()}

NameError: name 'Dataset' is not defined

In [79]:
class DatasetCreator():

    def __init__(self):

        pass

    def create_datasets(self, scenario, split=True, by_period=False, by_sample_indexes=False, period_for_split=None, sample_index_for_split=None):

        if split:
            if by_period:
                train_data, dev_data = self.split_by_period(scenario, period_for_split)
            elif by_sample_indexes:
                train_data, dev_data = self.split_by_sample_index(scenario, sample_index_for_split)
            else:
                raise NotImplementedError
            return self.create_single_dataset(train_data), self.create_single_dataset(dev_data)
        else:
            return self.create_single_dataset(scenario.get_data())
    
    def split_by_sample_index(self, scenario, sample_index_for_split):
        """
        Split dataset into dev and train sets by sample index
        We consider the first entries to correspomd to the dev set (so that size of train set does not impact it)
        This should be used when demand is synthetic (otherwise, if demand is real, there would be data leakage)
        """

        data = scenario.get_data()

        dev_data = {k: v[:sample_index_for_split] for k, v in data.items()}
        train_data = {k: v[sample_index_for_split:] for k, v in data.items()}

        return train_data, dev_data
    
    def create_single_dataset(self, data):
        """
        Create a single dataset
        """

        num_samples = len(data['initial_inventories'])

        return MyDataset(num_samples, data)
    

In [86]:
# only execute if name is main
if __name__ == '__main__':
    # test the class
    seeds = {"underage_cost": 28, "holding_cost": 73, "mean": 33, "coef_of_var": 92, "lead_time": 41, 'demand': 57, "initial_inventory": 88}
    # "seeds": {"underage": 28, "holding": 73, "mean": 33, "stds": 92, "demand_sequence": 57, "w_lead_time": 88, "perturbation": 84, "store_lead_times": 41}
    problem_params = {'n_stores': 10, 'periods': 50}
    num_samples = 512
    batch_size = 128
    store_params = {'demand': {'sample_across_stores': True,
                               'mean_range': [2.5, 7.5], 
                               'coef_of_var_range': [0.16, 0.32],
                               'distribution': 'normal',
                               'correlation': 0.5,
                               'clip': True
                               },
                    
                  #   'demand_1': {'sample_across_stores': False,
                  #              'mean': [5.0], 
                  #              'distribution': 'poisson',
                  #              'clip': True
                  #              },

                    'lead_time': {'sample_across_stores': True,
                                   'vary_across_samples': False, 
                                   'range': [4, 7]
                                #    'value': 4
                                   },

                    'holding_cost': {'sample_across_stores': False, 
                                      'vary_across_samples': False,
                                      'expand': True,
                                      'value': 1
                                      },

                    'underage_cost': {'sample_across_stores': True,
                                       'vary_across_samples': False, 
                                       'expand': False, 
                                       # 'value': 5.0,
                                       'range': [2.5, 7.5],
                                       },

                     'initial_inventory': {'sample': True,
                                           'range_mult': [0, 1],
                                           'inventory_periods': 6
                                           }
                    }
    
    warehouse_params = {'holding_cost': 0.3, 
                        'lead_time': 4}
    
    scenario = Scenario(problem_params, store_params, warehouse_params, num_samples, seeds)
    creator = DatasetCreator()
    train_dataset, dev_dataset = creator.create_datasets(scenario, split=True, by_sample_indexes=True, sample_index_for_split=400)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    dev_loader = DataLoader(dev_dataset, batch_size=batch_size, shuffle=False)

In [80]:
if __name__ == '__main__':
    creator = DatasetCreator()

In [87]:
if __name__ == '__main__':
    train_dataset, dev_dataset = creator.create_datasets(scenario, split=True, by_sample_indexes=True, sample_index_for_split=400)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    dev_loader = DataLoader(dev_dataset, batch_size=batch_size, shuffle=False)