In [1]:
import numpy as np
import gym
# Libs
import logging
import pandas as pd
from collections import OrderedDict
import pickle
import torch
import gym
from gym import spaces

# Own Modules
from source_torch.mlca.mlca import mlca_mechanism
from source_torch.mlca.mlca_setup import set_value_model_parameters
from source_torch.util import initial_bids_mlca_predefined

  "Gym minimally supports python 3.6 as the python foundation not longer supports the version, please update your version to 3.7+"


/home/artem/.conda/envs/ica_new
MLCA NN Class imported
MLCA NN_MIP Class imported
WDP Class imported
MLCA_Economies imported
MLCA function imported


In [23]:
class AuctionWorldEnv(gym.Env):

    def __init__(self, n_items=5, Qinit = 10, Qmax = 10, seed=None):

        self.n_items = n_items
        self.Qinit = Qinit
        self.combinations = self.generate_combinations(n_items)
        self.iter_Qinit = 0
        self.bundles = []
        self.Qmax = Qmax

        # For each item i can either choose an item or not, which should happen Qinit times
        self.action_space = spaces.Discrete(2**n_items)
    
    def start_mlca(self, bundles, Qinit, Qmax):
        lr = 0.01
        regn = 1e-5
        mean_scores = 0
        stats = []
        scores = []
        for i in range(1):
            print(lr, i)

            for handler in logging.root.handlers[:]:
                logging.root.removeHandler(handler)
            # log debug to console
            logging.basicConfig(level=logging.DEBUG, format='%(asctime)s:               %(message)s', datefmt='%H:%M:%S')
            
            configdict_mlca = OrderedDict([('SATS_domain_name','LSVM'),
                                ('SATS_auction_instance_seed', i),
                                ('bidders',0),
                                ('items',0),
                                ('bidder_ids',0),
                                ('Qinit', Qinit),
                                ('Qmax',Qmax),
                                ('Qround',0),
                                ('Starter','mlca_extra'),
                                ('epochs' , 30),
                                ('batch_size' , 30),
                                ('regularization_type' , 'l1_l2'),
                                ('layer_N' , [10, 10]),
                                ('layer_R' ,[32, 32] ),
                                ('layer_L' , [16, 16]),
                                ('NN_parameters',[]),
                                ('bigM',6000),
                                ('warm_start',False),
                                ('MIP_parameters',[]),
                                ('scaler',None),
                                ('init_bids_and_fitted_scaler',[bundles,None]),
                                ('return_allocation',True),
                                ('return_payments',True),
                                ('lr',lr),
                                ('regn',regn),
                                ('calc_efficiency_per_iteration',True),
                                ('active_learning_algorithm','predefined'),
                                ])

            configdict_mlca = set_value_model_parameters(configdict_mlca)

            res = mlca_mechanism(configdict = configdict_mlca)
            try:
                scores.append(res['MLCA_Efficiency'])
            except:
                pass
            stats.append(res)
        return scores

    
    def generate_combinations(self, n):
        # create an empty list to store the combinations
        combinations = []
        # generate all possible combinations of 0's and 1's for n items
        for i in range(2**n):
            # convert i to a binary string of length n with leading zeros
            binary_str = bin(i)[2:].zfill(n)
            
            # convert the binary string to a list of integers
            combination = [int(digit) for digit in binary_str]
            
            # append the combination to the list of combinations
            combinations.append(combination)
        return combinations

    def _get_obs(self):
        return {"bundles": self.bundles, "iter_Qinit": self.iter_Qinit}#No tartget location
    
    def reset(self, seed=None):
        # We need the following line to seed self.np_random
        super().reset(seed=seed)

        # Our location is always zeroes
        self.bundles = []
        self.iter_Qinit = 0


        observation = self._get_obs()

        return observation
    
    def step(self, action):
        # # Map the action (element of {0,1,2,3}) to the direction we walk in
        # direction = self._action_to_direction[action]
        
        self.bundles.append(self.combinations[action])

        bundles_values = initial_bids_mlca_predefined(self.bundles, self.Qinit, self.Qmax)
        

        self.iter_Qinit += 1

        scores = self.start_mlca(self.bundles, self.Qinit, self.Qmax)

        mean_score = np.mean(scores)

        # An episode is done iff the agent has Qinit items
        if self.iter_Qinit == self.Qinit:
            terminated = True
        
        reward = mean_score

        observation = self._get_obs()
        
        return observation, reward, terminated, scores

In [24]:

def q_learning(env, num_episodes, learning_rate, discount_factor, epsilon):

    # Initialize the Q-table
    num_states = env.observation_space.n
    num_actions = env.action_space.n
    Q = np.zeros((num_states, num_actions))

    for episode in range(num_episodes):
        state = env.reset()
        done = False

        while not done:
            # Exploration-exploitation trade-off
            if np.random.uniform() < epsilon:
                action = env.action_space.sample()  # Choose a random action
            else:
                action = np.argmax(Q[state])  # Choose the best action based on current Q-values

            # Perform the selected action and observe the next state and reward
            next_state, reward, done, _ = env.step(action)

            # Update Q-values using the Q-learning update rule
            Q[state, action] = (1 - learning_rate) * Q[state, action] + learning_rate * (
                    reward + discount_factor * np.max(Q[next_state])
            )

            state = next_state

    return Q

In [26]:
env = AuctionWorldEnv(18, 10, 10)
env.reset()
env.step(0)

15:34:57:               START MLCA:
15:34:57:               -----------------------------------------------
15:34:57:               Model: LSVM
15:34:57:               Seed SATS Instance: 0
15:34:57:               Qinit: 10
15:34:57:               Qmax: 10
15:34:57:               Qround: 6

15:34:57:               Instantiate SATS Instance
15:34:58:               Set NN parameters
15:34:58:               Set MIP parameters
15:34:58:               INITIALIZE BIDS
15:34:58:               -----------------------------------------------

15:34:58:               Setting inputed initial bids of dimensions:


0.01 0

------------------------ SATS parameters ------------------------
Value Model: LSVM
Number of Bidders:  6
Number of BidderTypes:  2
Number of Items:  18
Scaler:  None

------------------------ DNN  parameters ------------------------
Epochs: 30
Batch Size: 30
Regularization: l1_l2

Bidder_0:
regularization: 1e-05
learning_rate: 0.01
architecture: [10, 10]
dropout: True
dropout_prob: 0.05
epochs: 30
batch_size: 30
regularization_type: l1_l2

Bidder_1:
regularization: 1e-05
learning_rate: 0.01
architecture: [32, 32]
dropout: True
dropout_prob: 0.05
epochs: 30
batch_size: 30
regularization_type: l1_l2

Bidder_2:
regularization: 1e-05
learning_rate: 0.01
architecture: [32, 32]
dropout: True
dropout_prob: 0.05
epochs: 30
batch_size: 30
regularization_type: l1_l2

Bidder_3:
regularization: 1e-05
learning_rate: 0.01
architecture: [32, 32]
dropout: True
dropout_prob: 0.05
epochs: 30
batch_size: 30
regularization_type: l1_l2

Bidder_4:
regularization: 1e-05
learning_rate: 0.01
architect

AttributeError: 'list' object has no attribute 'keys'