Here, each agent has a specialized instrument of production. Either a sector 1 producer or sector 2 producer
This should incentivize them to go to market since no single agent can produce all their own requirements.

But we see the actions of the agents jump around and do not quite converge to anything.
The null decision even has little spurts where it is attempted

Now one possible explanation could be the following:

The agents are all using a single NN.

There is centralized learning and decentralized execution.

The agents do not have an input state telling them of their instrument of production.

So differing production policies are being trained on, and asked of, the single NN for all agents.

Remember, each sector is a unique output of the NN.

Adding a new input which is an agent index (agent ID) may help differentiate policies (each agent results in a unique input since each agent has a unique ID).

Alternatively, including productivity inputs may be useful (if not already done?), but may get needlessly complicated. Because...

Productivity has two aspects:
    * A quantity of goods which make up the instruments of production (assumed to be 1, and not explicitly modeled here)
    * A productivity matrix which maps labor sector inputs to product sector outputs.
        

In [1]:
import time
import seaborn as sns

In [2]:
import Agents

In [3]:
import Agents
import Instruments
import Markets

import Simulate
import Environment


import SimPlotting

import numpy as np
from scipy.stats import norm, uniform
import random

import matplotlib.pyplot as plt

In [4]:

def initialize_instruments(Nins_1, Nins_2, means, stds):

    instruments = []

    #means = mean_productivities
    #stds = std_productivities

    for N in range(Nins_1):
        instrument = Instruments.Universal([means[0], 0])
        instruments.append(instrument)

    for N in range(Nins_2):
        instrument = Instruments.Universal([0, means[1]])
        instruments.append(instrument)
        
    return instruments

    
    

In [5]:
#Instruments.Universal([0,1]).matrix

In [6]:
def initialize_agents(initial_parameters, learning_parameters) -> list:
    
    kwargs = {**initial_parameters, **learning_parameters}
    
    
    Agent_list = [Agents.Agent(**kwargs) for i in range(Nagents)]

    return Agent_list
   
    #Agent_list = [Agents.Agent(QNN=qnn, **initial_parameters, Q=np.random.random(2)*10) for i in range(Nagents)]

    #Agent_list = [Agents.Agent(QNN=qnn, **initial_parameters, Q=i%2*np.ones(2)*10.0) for i in range(Nagents)]
    
    #return Agent_list

def _initialize_instruments(Ninstruments, means, stds):
    '''
    Assumes Normal. Initializes Universal Instruments.
    
    Make more general to incude different distributions'''

    instruments = []

    #means = mean_productivities
    #stds = std_productivities

    for N in range(Ninstruments):
        prod_vec = [norm.rvs(loc=mean, scale=std) for mean, std in zip(means, stds)]
        instrument = Instruments.Universal(prod_vec)
        instruments.append(instrument)
        
    return instruments

def initialize_allocation(agent_list, instrument_list):
    
    for agent in agent_list:
        
        random_instrument = random.choice(instrument_list)
        instrument_list.remove(random_instrument)
        
        agent.Ins = random_instrument

Initialize Parameters

In [7]:
dt = 1

cg = np.array([1.0, 1.0])

Nproducts = cg.shape[0]

Nagents = 2000

Nactions = 3 + Nproducts # One production decision for each product + null + exchange + consume

mean_productivities = [8, 5]
std_productivities = [0, 0]

Ninstruments = Nagents

ratio = 0.5
K1 = int(ratio*Ninstruments)



In [8]:
initial_parameters = {
    #'Q': np.random.random(Nproducts)*10,
    'D': np.zeros(Nproducts), #np.ones(Nproducts)*2,
    'cg': cg,
     'M': 10.,
    'n_actions': Nactions
}

learning_parameters = {
    'gamma': 0.9,
    #'gamma': 0.5,
    #'gamma': 0.1,
    'epsilon': 1.0,
    'eps_end': 0.01,
    'eps_dec': 0.01,
    'batch_size': 100,
    'input_dims': [3*Nproducts+1],
    #'lr':0.001,
    'lr': 0.005,
    'n_actions':Nactions,
}

Initialize Agents, Instruments, and Markets

In [9]:
agent_list = initialize_agents(initial_parameters, learning_parameters)
for agent in agent_list:
    agent.Q = np.random.random(Nproducts)*10


instrument_list = initialize_instruments(K1, Ninstruments - K1, mean_productivities, std_productivities)
market = Markets.SimpleMarket(Nproducts, max_tries = 1000)

initialize_allocation(agent_list, instrument_list)

# Randomize agent's initial endowments
for agent in agent_list:
    agent.Q = np.random.random(Nproducts)*10
    agent.Q0 = agent.Q

env = Environment.Environment(dt, agent_list, instrument_list, market)
sim = Simulate.Simulate(env)

In [10]:
agent = agent_list[0]

In [11]:
agent.Ins.matrix

array([[8, 0],
       [0, 0]])

In [12]:
agent.Q

array([5.95544297, 0.1157589 ])

In [13]:
agent.reset()

In [14]:
agent.Q

array([5.95544297, 0.1157589 ])

Run Simulation

In [None]:
Ntimes = 10000

s = time.perf_counter()
sim.run_simulation(Ntimes)
e = time.perf_counter()

print(e-s)
print((e-s)/60.)
print((e-s)/60./60.)

  price = self.monetary_exchanged/self.quantity_exchanged


In [None]:
agent = agent_list[0]

agent.Q
#sim.get_state(agent)

Plot

In [None]:
splt = SimPlotting.SimPlotting(sim)
plt.rcParams['figure.dpi'] = 500

In [None]:
#splt.plotA(lw1=0.5)

In [None]:
splt.plotA(lw1=0.5, lw2=0.05, ewm=True, alpha=0.01)

In [None]:
splt.plotQuantities(
    lw1=0.3, 
    lw2=0.05, 
    xmax=10, 
    tight_layout=True, 
    Qlog=True, 
    Dlog=True, 
    qmin=0, 
    qmax=6, 
    cmin=0, 
    cmax=6,
    Qmin = 1e-1,
    Qmax=1e3,
    Dmin=1e-1,
    Dmax=1e3,
)

In [None]:
splt.plotMarket(lw1=0.3, 
                lw2=0.05, 
                Mlog=True, 
                mNlog=True, 
                mMlog=True, 
                mElog=True, 
                plog=False)

In [None]:
#plt.plot(splt.trange, splt.mMarray[:,0])


In [None]:
splt.plotPolicy(0)

In [None]:
splt.plotPolicy('L')

In [None]:
splt.plotPolicy(1)

In [None]:
splt.plotPolicy(2)

In [None]:
splt.plotPolicy('C')

In [None]:
splt.plotPolicy('E')

In [None]:
for i in range(Nproducts):

    prob_Li = splt.action_probabilities()[i+1]
    sum_del = prob_Li*Nagents
    sum_q   = splt.qarray[:,i].sum(axis=1)

    li = sum_del/sum_q
    pi = splt.parray[:,i]

    plt.scatter(li, pi, s=0.5)

plt.xscale('log')
plt.yscale('log')
plt.xlabel('Direct Labor Coefficient $l_i$')
plt.ylabel('Price $p_i$')


In [None]:
for i in range(Nproducts):

    prob_Li = splt.action_probabilities()[i+1]
    sum_del = prob_Li*Nagents
    sum_q   = splt.qarray[:,i].sum(axis=1)

    li = sum_del/sum_q
    li = np.nanmean(li)
    pi = splt.parray[:,i]

    plt.axvline(li, label=f'$l_{i}$')
    plt.hist(pi, bins=np.logspace(-6, 3,50), histtype='step', label=f'$p_{i}$')

plt.xscale('log')
plt.yscale('log')
plt.xlabel('Value and Price')
plt.ylabel('Price Count')
plt.legend()
