In [None]:
import math
import time
import random
import numpy as np
import pandas as pd
from SRModel import SRModel
from KTHModel import KTHModel
from generic_routines import convertNumberBase, MaxLocBreakTies
import warnings
import pickle 
from multiprocessing import Pool
import os 
warnings.filterwarnings("ignore")
    
def generate_uIniPrice(numSessions, memory, numPlayers):
    uIniPrice = np.zeros((memory, numPlayers, numSessions))
    for iSession in range(numSessions):
        for iDepth in range(memory):
            for iPlayer in range(numPlayers):
                uIniPrice[iDepth, iPlayer, iSession] = random.uniform(0, 1)
    uIniPrice = np.array(uIniPrice)
    return uIniPrice


def generate_uExploreation(numPlayers):
    # Generating U(0,1) draws for price initialization
    uExploration = np.zeros((2, numPlayers))
    for iDecision in range(2):
        for iPlayer in range(numPlayers):
            uExploration[iDecision, iPlayer] = random.uniform(0, 1)
    return uExploration


def initQMatrices(game, delta, Profits, indexActions):
    Q = np.zeros((game.numStates, game.numiActions, game.numPlayers))
    # Randomize over the opponents decision
    for iPlayer in range(game.numPlayers):
        for iReport in range(game. numiActions):
            den = np.count_nonzero(indexActions[:, iPlayer] == iReport) * (1 - delta)
            Q[:, iReport, iPlayer] = np.ma.array(Profits[:, iPlayer],
                                                mask=(indexActions[:, iPlayer] != iReport)).sum() / den

    # Find initial optimal strategy
    strategyPrime = np.zeros((game.numStates, game.numPlayers))
    maxVal = np.zeros((game.numStates, game.numPlayers))
    for iPlayer in range(game.numPlayers):
        for iState in range(game.numStates):
            maxVal[iState, iPlayer], strategyPrime[iState, iPlayer] = \
                MaxLocBreakTies(game.numiActions, Q[iState, :, iPlayer].copy())
    return Q, strategyPrime, maxVal


def initState(game, u, numPrices):
    p = np.floor(numPrices * u)
    stateNumber = 0
    actionNumber = computeActionNumber(game, p[0, :])
    return p, stateNumber, actionNumber


def computePPrime(game, uExploration, strategyPrime, state, iters):
    pPrime = np.zeros(game.numPlayers)
    #  Greedy with probability 1-epsilon, with exponentially decreasing epsilon
    for iPlayer in range(game.numPlayers):
        u = uExploration[:, iPlayer]
        if u[0] <= np.exp(-game.beta[iPlayer]*iters):
            pPrime[iPlayer] = math.floor(game.numiActions*u[1])
        else:
            pPrime[iPlayer] = strategyPrime[state, iPlayer].copy()
    return pPrime


def computeStateNumber(game, p):
    statevector = np.reshape(p.copy(), (1, game.lengthStates))
    return np.sum(game.cActions * statevector)


def computeActionNumber(game, p):
    return np.sum(game.cActions * p)


def computeStrategyNumber(game, maxLocQ):
    # Given the maxLocQ vectors, computes the lengthStrategies-digit strategy number
    iu = 0
    strategyNumber = np.zeros(game.lengthStrategies)
    for i in range(game.numPlayers):
        il = iu
        iu = iu + game.numStates
        strategyNumber[il:iu] = maxLocQ[:, i]
    return strategyNumber


def q_learning(game, convergedtime):
    # Initializing various quantities
    converged = np.zeros(game.numSessions)
    indexStrategies = np.zeros((game.lengthStrategies, game.numSessions))
    indexConverge = np.zeros((game.numSessions,game.lengthStrategies))
    uIniPrice = generate_uIniPrice(game.numSessions, 1, 2)

    # Loop over numSessions
    for iSession in range(game.numSessions):
        #print('Session = ', iSession, ' started')
        start_time = time.time()
        # Learning Phase
        # Initializing Q matrices
        Q, strategyPrime, maxVal = initQMatrices(game, game.delta, game.Profits, game.indexActions)
        strategy = strategyPrime.copy()
        # Randomly initializing prices and state
        p, statePrime, actionPrime = initState(game, uIniPrice[:, :, iSession], game.numiActions)
        state = int(statePrime)
        # Loop
        itersInStrategy = 0
        convergedSession = -1

        strategyFix = np.zeros((game.numStates, game.numPlayers))
        for iters in range(game.maxIters):
            # Iterations counter

            # Generating exploration random numbers
            uExploration = generate_uExploreation(2)
            # Compute pPrime by balancing exploration vs. exploitation
            pPrime = computePPrime(game, uExploration, strategyPrime, state, iters)
            p[0, :] = pPrime.copy()
            statePrime = 0
            actionPrime = int(computeActionNumber(game, pPrime))
            for iPlayer in range(game.numPlayers):
                # Q matrices and strategies update
                oldq = Q[state, int(pPrime[iPlayer]), iPlayer]
                newq = oldq + game.alpha[iPlayer] * (game.Profits[actionPrime, iPlayer] + \
                                                     game.delta * maxVal[statePrime, iPlayer] - oldq)
                Q[state, int(pPrime[iPlayer]), iPlayer] = newq
                """             
                if newq > maxVal[state, iPlayer]:
                    maxVal[state, iPlayer] = newq
                    if strategyPrime[state, iPlayer] != pPrime[iPlayer]:
                        strategyPrime[state, iPlayer] = pPrime[iPlayer]
                """
			#need to update new strategyPrime and maxVal
            strategyPrime = np.zeros((game.numStates, game.numPlayers))
            maxVal = np.zeros((game.numStates, game.numPlayers))
            for iPlayer in range(game.numPlayers):
                for iState in range(game.numStates):
                    maxVal[iState, iPlayer], strategyPrime[iState, iPlayer] = \
                        MaxLocBreakTies(game.numiActions, Q[iState, :, iPlayer].copy())
 
			# Assessing convergence
            if np.array_equiv(strategyPrime[state, :], strategy[state, :]):
                itersInStrategy = itersInStrategy + 1
            else:
                # print(strategyPrime[state, :])
                # print(strategy)
                itersInStrategy = 1

            # Check for convergence in strategy
            if convergedSession == -1:
                # Maximum number of iterations exceeded
                if iters >= game.maxIters - 1:
                    convergedSession = 0
#                    strategyFix = strategy
                    strategyFix = strategy.copy()                #testing

                # Convergence in strategy reached
 #               if itersInStrategy == 100000:
                #if itersInStrategy == 10000:
                if itersInStrategy == convergedtime:
                    convergedSession = 1
#                    strategyFix = strategy
                    strategyFix = strategy.copy()               #testing
                    #print('iters: ', iters, " strategy: ", strategy)
                    #print(iters)
                    #print("maxVal: ", maxVal)
                    #print("strategyPrime: ", strategyPrime)
                    #print("pPrime: ", pPrime)
                    #print("Q: ", Q)

            # Check for loop exit criteria
            if convergedSession != -1:
                break
            # if no converge yet, update and iterate
            strategy[state, :] = strategyPrime[state, :].copy()
            state = statePrime

        converged[iSession] = convergedSession
        indexStrategies[:, iSession] = computeStrategyNumber(game, strategyFix)
        indexConverge[iSession] = (convergedSession, iters)
        # print(convergedSession)
        # if convergedSession == 1:
        #     print("Session =", iSession, "converged")
        # else:
        #     print("Session =", iSession, "did not converge")

        end_time = time.time()
        # print("session time:", end_time-start_time)
        # print('\n')
    return converged, indexStrategies, indexConverge


def single_process(alpha):
    c_list = [10000]
    #c_list = [10000]
    for cl in c_list:
        df = pd.DataFrame(columns=['alpha','beta','total_sessions',
                                   'converged_times','i ndexStrategie','indexConverge'])
        beta_list = np.linspace(0.0005,0.00001,2)
        path = 'KTH_parallel_result'
        if not os.path.exists(path):
            os.mkdir(path)
        
        for beta in beta_list:
        
            kth = KTHModel(alpha = alpha, beta = beta)
            converged, indexStrategies,indexConverge = q_learning(kth,convergedtime= cl)
            df = df.append({'alpha':alpha,'beta':beta,'total_sessions':kth.numSessions,
                            'converged_times':cl,'indexStrategie':indexStrategies,
                            'indexConverge':indexConverge },ignore_index=True)
            print('alpha:',alpha,' beta:',beta,' converged_times:',cl,' finish!')
        
        with open(path + '/' + 'a_'+ str(alpha) + 'b_'+ str(beta) + '_cl_' + str(cl) + '.pkl', 'wb') as f:
            pickle.dump(df, f)
            
def run_complex_operations(operation, input, pool):
    pool.map(operation, input)
      
processes_count = 10
     

alpha_list = np.linspace(0.025,0.25,2)
processes_pool = Pool(processes_count)
run_complex_operations(single_process,alpha_list , processes_pool)

Process SpawnPoolWorker-195:
Process SpawnPoolWorker-194:
Traceback (most recent call last):
Traceback (most recent call last):
  File "/Users/manifect/opt/miniconda3/lib/python3.9/multiprocessing/process.py", line 315, in _bootstrap
    self.run()
  File "/Users/manifect/opt/miniconda3/lib/python3.9/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/manifect/opt/miniconda3/lib/python3.9/multiprocessing/process.py", line 315, in _bootstrap
    self.run()
  File "/Users/manifect/opt/miniconda3/lib/python3.9/multiprocessing/pool.py", line 114, in worker
    task = get()
  File "/Users/manifect/opt/miniconda3/lib/python3.9/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/manifect/opt/miniconda3/lib/python3.9/multiprocessing/queues.py", line 367, in get
    return _ForkingPickler.loads(res)
  File "/Users/manifect/opt/miniconda3/lib/python3.9/multiprocessing/pool.py", line 114,

In [7]:
alpha_list = np.linspace(0.025,0.25,2)
processes_pool = Pool(processes_count)
run_complex_operations(single_process,alpha_list , processes_pool)

Process SpawnPoolWorker-101:
Process SpawnPoolWorker-102:
Traceback (most recent call last):
  File "/Users/manifect/opt/miniconda3/lib/python3.9/multiprocessing/process.py", line 315, in _bootstrap
    self.run()
  File "/Users/manifect/opt/miniconda3/lib/python3.9/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/manifect/opt/miniconda3/lib/python3.9/multiprocessing/pool.py", line 114, in worker
    task = get()
  File "/Users/manifect/opt/miniconda3/lib/python3.9/multiprocessing/queues.py", line 367, in get
    return _ForkingPickler.loads(res)
AttributeError: Can't get attribute 'single_process' on <module '__main__' (built-in)>
Traceback (most recent call last):
  File "/Users/manifect/opt/miniconda3/lib/python3.9/multiprocessing/process.py", line 315, in _bootstrap
    self.run()
  File "/Users/manifect/opt/miniconda3/lib/python3.9/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
 

KeyboardInterrupt: 

 108, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/manifect/opt/miniconda3/lib/python3.9/multiprocessing/pool.py", line 114, in worker
    task = get()
  File "/Users/manifect/opt/miniconda3/lib/python3.9/multiprocessing/queues.py", line 364, in get
    with self._rlock:
  File "/Users/manifect/opt/miniconda3/lib/python3.9/multiprocessing/synchronize.py", line 95, in __enter__
    return self._semlock.__enter__()
KeyboardInterrupt
Process SpawnPoolWorker-110:
Process SpawnPoolWorker-109:
Process SpawnPoolWorker-108:
Process SpawnPoolWorker-107:
Traceback (most recent call last):
  File "/Users/manifect/opt/miniconda3/lib/python3.9/multiprocessing/process.py", line 315, in _bootstrap
    self.run()
  File "/Users/manifect/opt/miniconda3/lib/python3.9/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/manifect/opt/miniconda3/lib/python3.9/multiprocessing/pool.py", line 114, in worker
    task = get()
  File "/

    self._target(*self._args, **self._kwargs)
  File "/Users/manifect/opt/miniconda3/lib/python3.9/multiprocessing/pool.py", line 114, in worker
    task = get()
  File "/Users/manifect/opt/miniconda3/lib/python3.9/multiprocessing/queues.py", line 364, in get
    with self._rlock:
  File "/Users/manifect/opt/miniconda3/lib/python3.9/multiprocessing/synchronize.py", line 95, in __enter__
    return self._semlock.__enter__()
KeyboardInterrupt
Process SpawnPoolWorker-103:
Traceback (most recent call last):
  File "/Users/manifect/opt/miniconda3/lib/python3.9/multiprocessing/process.py", line 315, in _bootstrap
    self.run()
  File "/Users/manifect/opt/miniconda3/lib/python3.9/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/manifect/opt/miniconda3/lib/python3.9/multiprocessing/pool.py", line 114, in worker
    task = get()
  File "/Users/manifect/opt/miniconda3/lib/python3.9/multiprocessing/queues.py", line 364, in get
    with se

In [10]:
import multiprocessing as mp

def f(x):
    return x*x

with mp.Pool(5) as pool:
    print(pool.map(f, [1, 2, 3, 4, 5]))

Process SpawnPoolWorker-157:
Traceback (most recent call last):
  File "/Users/manifect/opt/miniconda3/lib/python3.9/multiprocessing/process.py", line 315, in _bootstrap
    self.run()
  File "/Users/manifect/opt/miniconda3/lib/python3.9/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/manifect/opt/miniconda3/lib/python3.9/multiprocessing/pool.py", line 114, in worker
    task = get()
  File "/Users/manifect/opt/miniconda3/lib/python3.9/multiprocessing/queues.py", line 367, in get
    return _ForkingPickler.loads(res)
AttributeError: Can't get attribute 'f' on <module '__main__' (built-in)>
Process SpawnPoolWorker-158:
Traceback (most recent call last):
  File "/Users/manifect/opt/miniconda3/lib/python3.9/multiprocessing/process.py", line 315, in _bootstrap
    self.run()
  File "/Users/manifect/opt/miniconda3/lib/python3.9/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/Users

  File "/Users/manifect/opt/miniconda3/lib/python3.9/multiprocessing/synchronize.py", line 95, in __enter__
    return self._semlock.__enter__()
  File "/Users/manifect/opt/miniconda3/lib/python3.9/multiprocessing/process.py", line 315, in _bootstrap
    self.run()
  File "/Users/manifect/opt/miniconda3/lib/python3.9/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/manifect/opt/miniconda3/lib/python3.9/multiprocessing/pool.py", line 114, in worker
    task = get()
  File "/Users/manifect/opt/miniconda3/lib/python3.9/multiprocessing/queues.py", line 365, in get
    res = self._reader.recv_bytes()
  File "/Users/manifect/opt/miniconda3/lib/python3.9/multiprocessing/connection.py", line 216, in recv_bytes
    buf = self._recv_bytes(maxlength)
  File "/Users/manifect/opt/miniconda3/lib/python3.9/multiprocessing/connection.py", line 414, in _recv_bytes
    buf = self._recv(4)
  File "/Users/manifect/opt/miniconda3/lib/python3.9/multi

KeyboardInterrupt: 

In [None]:
import KTH_parallel
def run_complex_operations(operation, input, pool):
    pool.map(operation, input)
      
processes_count = 2
     

alpha_list = np.linspace(0.025,0.25,2)
processes_pool = Pool(processes_count)
run_complex_operations(single_process,alpha_list , processes_pool)