In [21]:
import numpy as np
import pandas as pd
import random
from itertools import product
from generic_routines import convertNumberBase, MaxLocBreakTies

class AMModel(object):
    def __init__(self, **kwargs):
        self.numPlayers = 2
        # number of individual actions
        self.numiActions = 27
        self.eps = kwargs.get('eps', 0.05)
        self.memory = kwargs.get('memory', 0)
        self.numStates = self.numiActions ** (self.numPlayers * self.memory)
        self.numPeriods = self.numStates + 1
        self.numActions = self.numiActions ** self.numPlayers
        self.buyerInvestment = 75
        self.sellerInvestment = 25
        self.trueValue = self.init_TrueValue(self.sellerInvestment)
        self.trueCost = self.init_TrueCost(self.buyerInvestment)
        self.value = np.array([200, 250, 320])
        self.cost = np.array([130, 80, 10])
        self.buyerActions = np.array(list(product([0, 1, 2], repeat=3)))
        self.sellerActions = np.array(list(product([0, 1, 2], repeat=3)))
        self.indexActions = self.init_indexActions()
        self.Prices = self.init_Prices()
        self.Profits = self.init_Profits()

        # QL
        self.numSessions = 100
        self.maxIters = 1000000
        self.delta = kwargs.get('delta', 0.95)
        self.alpha = kwargs.get('alpha', 0.15) * np.ones(self.numPlayers)
        self.beta = kwargs.get('beta', 0.00001) * np.ones(self.numPlayers)
        self.lengthStates = self.numPlayers * self.memory
        self.lengthStrategies = self.numPlayers * self.numStates
        self.Q = self.init_Q()
        self.cStates = self.init_cStates()
        self.cActions = self.init_cActions()

    def init_TrueValue(self, i):
        if i == 0:
            return 200
        elif i == 25:
            return 250
        else:
            return 320

    def init_TrueCost(self, i):
        if i == 0:
            return 130
        elif i == 25:
            return 80
        else:
            return 10

    def init_Prices(self):
        pricesArray = np.zeros((3, 3))
        for cb in range(0, 3):
            for vs in range(0, 3):
                # (vs-200) - (130-cb) + 165
                pricesArray[cb][vs] = (self.value[vs] - 200) - (130 - self.cost[cb]) + 165
        return pricesArray

    def init_indexActions(self):
        indexActions = []
        for i in range(self.numActions):
            indexActions.append(convertNumberBase(i, self.numiActions, self.numPlayers))
        indexActions = np.array(indexActions)
        return indexActions

    def init_Profits(self):
        Profits = np.zeros((2, self.numActions, self.numPlayers))
        Actions = np.zeros((self.numActions, 6))
        for i in range(self.numActions):
            b = int(self.indexActions[i][0])
            s = int(self.indexActions[i][1])
            vb, cb, vb_a = self.buyerActions[b]
            vs, cs, cs_a = self.sellerActions[s]
            Actions[i] = vb, cb, vb_a, vs, cs, cs_a

            if (vb == vs) and (cb == cs):
                # if all report coincide
                # profit for buyer: true value - price - investment_buyer
                pb_na = self.trueValue - self.Prices[cb][vs] - self.buyerInvestment
                # profit for seller: price - true cost - investment_seller
                ps_na = self.Prices[cb][vs] - self.trueCost - self.sellerInvestment
            else:
                pb_na = 0 - self.buyerInvestment
                ps_na = 0 - self.sellerInvestment
            Profits[0][i] = [pb_na, ps_na]

            if vb_a == 0:
                # no_trade
                pb1 = 0 - self.buyerInvestment
                ps1 = 0 - self.sellerInvestment
            elif vb_a == 1:
                pb1 = 0.5 * 0 + 0.5 * (self.trueValue - 205) - self.buyerInvestment
                ps1 = 0.5 * 0 + 0.5 * (205 - self.trueCost) - self.sellerInvestment
            else:
                pb1 = 0.5 * (self.trueValue - 205) + 0.5 * (self.trueValue - 255) - self.buyerInvestment
                ps1 = 0.5 * (255 - self.trueCost) + 0.5 * (205 - self.trueCost) - self.sellerInvestment
            if vb_a == vs:
                ps1 = ps1 + 300
            else:
                ps1 = ps1 - 300

            if cs_a == 0:
                # no_trade
                pb2 = 0 - self.buyerInvestment
                ps2 = 0 - self.sellerInvestment
            elif cs_a == 1:
                pb2 = 0.5 * 0 + 0.5 * (self.trueValue - 125) - self.buyerInvestment
                ps2 = 0.5 * 0 + 0.5 * (125 - self.trueCost) - self.sellerInvestment
            else:
                pb2 = 0.5 * (self.trueValue - 125) + 0.5 * (self.trueValue - 75) - self.buyerInvestment
                ps2 = 0.5 * (125 - self.trueCost) + 0.5 * (75 - self.trueCost) - self.sellerInvestment
            if cs_a == cb:
                pb2 = pb2 + 300
            else:
                pb2 = pb2 - 300

            pb_a = 0.5 * (pb1 + pb2)
            ps_a = 0.5 * (ps1 + ps2)

            Profits[1][i] = [pb_a, ps_a]

        return Profits

    def init_Q(self):
        Q = np.zeros((self.numStates, self.numiActions, self.numPlayers))
        # Randomize over the opponents decision
        for iAgent in range(self.numPlayers):
            for iReport in range(self.numiActions):
                den = np.count_nonzero(self.indexActions[:, iAgent] == iReport) * (1 - self.delta)
                Q[:, iReport, iAgent] = np.ma.array((1 - self.eps) * self.Profits[0][:, iAgent] + \
                                                    self.eps * self.Profits[1][:, iAgent],
                                                    mask=(self.indexActions[:, iAgent] != iReport)).sum() / den
        return Q

    def init_cStates(self):
        """Initialize cStates"""
        x = np.arange(self.lengthStates - 1, -1, -1)
        cStates = self.numiActions ** x
        return cStates

    def init_cActions(self):
        """Initialize cActions"""
        x = np.arange(self.numPlayers - 1, -1, -1)
        cActions = self.numiActions ** x
        return cActions

if __name__ == '__main__':
    am = AMModel()
    print("Prices", am.Prices)
    print("Profits", am.Profits)
    #print("Q", am.Q[0])
    print("cStates", am.cStates)
    print("cActions", am.cActions)
    # profit1_df = pd.DataFrame(am.Profits[0])
    # profit1_df.to_excel("p1.xlsx")
    # profit2_df = pd.DataFrame(am.Profits[1])
    # profit2_df.to_excel("p2.xlsx")



Prices [[165. 215. 285.]
 [115. 165. 235.]
 [ 45.  95. 165.]]
Profits [[[  10.    130.  ]
  [  10.    130.  ]
  [  10.    130.  ]
  ...
  [  10.    130.  ]
  [  10.    130.  ]
  [  10.    130.  ]]

 [[  75.    125.  ]
  [-193.75  153.75]
  [-150.    170.  ]
  ...
  [-215.    235.  ]
  [-183.75  263.75]
  [ 160.    280.  ]]]
cStates []
cActions [27  1]


In [50]:
def convertNumberBase(n, b, l):
    '''
    Converts an integer n from base 10 to base b,
    generating a vector of integers of length l
    '''
    tmp = n
    ans = np.zeros(l)
    for i in range(1, l+1):
        ans[l-i] = int(tmp % b)
        tmp = np.floor(tmp/b)
    return ans


def MaxLocBreakTies(n, x):
    # MaxLocBreakTies: Given the n*1 array x, finds the maximum m and the position of m in x
    tied = []
    h = 0
    m = max(x)
    for i in range(0, n):
#        m = max(x)
        if x[i] == m:
            h = h + 1
            tied.append(i)
    if h > 1:
        u = random.uniform(0, 1)
        p = tied[int(h*u)]
    else:
        p = tied[0]
    return m, p




In [58]:
MaxLocBreakTies(10,np.array([1,2,9,4,5,6,7,8,9,0]))

(9, 8)

In [61]:
np.maximum(np.array([1,2,9,4,5,9,7,8,9,0]))

TypeError: maximum() takes from 2 to 3 positional arguments but 1 were given

In [3]:
import numpy as np
from generic_routines import convertNumberBase, MaxLocBreakTies

class KTHModel(object):
    def __init__(self, **kwargs):
        self.numPlayers = 2
        self.value = np.array([200, 250, 320])
        self.cost = np.array([130, 80, 10])
        # number of individual actions
        self.numiActions = 9
        self.buyerActions = np.array([[0, 0], [0, 1], [0, 2], [1, 0], [1, 1], [1, 2], [2, 0], [2, 1], [2, 2]])
        self.sellerActions = np.array([[0, 0], [0, 1], [0, 2], [1, 0], [1, 1], [1, 2], [2, 0], [2, 1], [2, 2]])
        self.memory = kwargs.get('memory', 0)
        self.numActions = self.numiActions ** self.numPlayers
        self.numStates = self.numiActions ** (self.numPlayers * self.memory)
        self.indexActions = self.init_indexActions()
        self.Prices = self.init_Prices()
        self.Profits = self.init_Profits()

        # QL
        self.numSessions = 100
        self.maxIters = 1000000
        self.delta = kwargs.get('delta', 0.95)
        self.alpha = kwargs.get('alpha', 0.15) * np.ones(self.numPlayers)
        self.beta = kwargs.get('beta', 100) * np.ones(self.numPlayers)
        self.lengthStates = self.numPlayers * self.memory
        self.lengthStrategies = self.numPlayers * self.numStates
        self.Q = self.init_Q()
        self.cStates = self.init_cStates()
        self.cActions = self.init_cActions()

    def init_indexActions(self):
        indexActions = []
        for i in range(self.numActions):
            indexActions.append(convertNumberBase(i, self.numiActions, self.numPlayers))
        indexActions = np.array(indexActions)
        return indexActions

    def init_Prices(self):
        pricesArray = np.zeros((3, 3))
        for cb in range(0, 3):
            for vs in range(0, 3):
                # (vs-200) - (130-cb) + 165
                pricesArray[cb][vs] = (self.value[vs] - 200) - (130 - self.cost[cb]) + 165
        return pricesArray

    def init_Profits(self):
        Profits = np.zeros((self.numActions, self.numPlayers))
        for i in range(self.numActions):
            b = int(self.indexActions[i][0])
            s = int(self.indexActions[i][1])
            vb, cb = self.buyerActions[b]
            vs, cs = self.sellerActions[s]
            # profit for buyer: vs - price - max(0, cs - cb)
            pb = self.value[vs] - self.Prices[cb][vs] - max(0, self.cost[cs] - self.cost[cb])
            # profit for seller: price - cb - max(0, vs - vb)
            ps = self.Prices[cb][vs] - self.cost[cb] - max(0, self.value[vs] - self.value[vb])
            Profits[i] = [pb, ps]
        return Profits

    def init_Q(self):
        Q = np.zeros((self.numActions, self.numiActions, self.numPlayers))
        for iPlayer in range(self.numPlayers):
            for iReport in range(self.numiActions):
                den = np.count_nonzero(self.indexActions[:, iPlayer] == iReport) * (1 - self.delta)
                Q[:, iReport, iPlayer] = np.ma.array(self.Profits[:, iPlayer],
                                            mask=(self.indexActions[:, iPlayer] != iReport)).sum() / den
        return Q

    def init_cStates(self):
        """Initialize cStates (used for q-learning)"""
        x = np.arange(self.lengthStates - 1, -1, -1)
        cStates = self.numiActions ** x
        return cStates

    def init_cActions(self):
        """Initialize cActions (used for q-learning)"""
        x = np.arange(self.numPlayers - 1, -1, -1)
        cActions = self.numiActions ** x
        return cActions

if __name__ == '__main__':
    kth = KTHModel()
    print("Prices", kth.Prices)
    #print("Profits", kth.Profits)
    print("Q", kth.Q[0])
    print(kth.Profits[65])



Prices [[165. 215. 285.]
 [115. 165. 235.]
 [ 45.  95. 165.]]
Q [[ 700.          700.        ]
 [1366.66666667  700.        ]
 [1833.33333333  700.        ]
 [ 700.         1366.66666667]
 [1366.66666667 1366.66666667]
 [1833.33333333 1366.66666667]
 [ 700.         1833.33333333]
 [1366.66666667 1833.33333333]
 [1833.33333333 1833.33333333]]
[85. 35.]


In [15]:
np.count_nonzero(np.array([0,1,1,2]))

3

In [42]:
kth.Q

array([[[ 700.        ,  700.        ],
        [1366.66666667,  700.        ],
        [1833.33333333,  700.        ],
        ...,
        [ 700.        , 1833.33333333],
        [1366.66666667, 1833.33333333],
        [1833.33333333, 1833.33333333]],

       [[ 700.        ,  700.        ],
        [1366.66666667,  700.        ],
        [1833.33333333,  700.        ],
        ...,
        [ 700.        , 1833.33333333],
        [1366.66666667, 1833.33333333],
        [1833.33333333, 1833.33333333]],

       [[ 700.        ,  700.        ],
        [1366.66666667,  700.        ],
        [1833.33333333,  700.        ],
        ...,
        [ 700.        , 1833.33333333],
        [1366.66666667, 1833.33333333],
        [1833.33333333, 1833.33333333]],

       ...,

       [[ 700.        ,  700.        ],
        [1366.66666667,  700.        ],
        [1833.33333333,  700.        ],
        ...,
        [ 700.        , 1833.33333333],
        [1366.66666667, 1833.33333333],
        [

In [26]:
np.ma.array(kth.Profits[:, 1],mask=(kth.indexActions[:, 1] != 2)).sum()

315.0

In [23]:
import numpy as np
import pandas as pd
from itertools import product
from generic_routines import convertNumberBase, MaxLocBreakTies

class SRModel(object):
    def __init__(self, **kwargs):
        self.numPlayers = 2
        # number of individual actions
        self.numiActions = 27
        self.memory = kwargs.get('memory', 0)
        self.numStates = self.numiActions ** (self.numPlayers * self.memory)
        self.numPeriods = self.numStates + 1
        self.numActions = self.numiActions ** self.numPlayers
        self.buyerInvestment = 75
        self.sellerInvestment = 25
        self.trueValue = self.init_TrueValue(self.sellerInvestment)
        self.trueCost = self.init_TrueCost(self.buyerInvestment)
        self.value = np.array([200, 250, 320])
        self.cost = np.array([130, 80, 10])
        self.buyerActions = np.array(list(product([0, 1, 2], repeat=3)))
        self.sellerActions = np.array(list(product([0, 1, 2], repeat=3)))
        self.indexActions = self.init_indexActions()
        self.Prices = self.init_Prices()
        self.Profits = self.init_Profits()

        # QL
        self.numSessions = 100
        self.maxIters = 1000000
        # self.delta = kwargs.get('delta', 0.95)
        # self.alpha = kwargs.get('alpha', 0.15) * np.ones(self.numPlayers)
#        self.beta = kwargs.get('beta', 0.0001) * np.ones(self.numPlayers)
        self.delta = kwargs.get('delta', 0.95)
        self.alpha = kwargs.get('alpha', 0.15) * np.ones(self.numPlayers)
        self.beta = kwargs.get('beta', 0.0001) * np.ones(self.numPlayers)
        self.lengthStates = self.numPlayers * self.memory
        self.lengthStrategies = self.numPlayers * self.numStates
        self.Q = self.init_Q()
        self.cStates = self.init_cStates()
        self.cActions = self.init_cActions()

    def init_TrueValue(self, i):
        if i == 0:
            return 200
        elif i == 25:
            return 250
        else:
            return 320

    def init_TrueCost(self, i):
        if i == 0:
            return 130
        elif i == 25:
            return 80
        else:
            return 10

    def init_Prices(self):
        pricesArray = np.zeros((3, 3))
        for cb in range(0, 3):
            for vs in range(0, 3):
                # (vs-200) - (130-cb) + 165
                pricesArray[cb][vs] = (self.value[vs] - 200) - (130 - self.cost[cb]) + 165
        return pricesArray

    def init_indexActions(self):
        indexActions = []
        for i in range(self.numActions):
            indexActions.append(convertNumberBase(i, self.numiActions, self.numPlayers))
        indexActions = np.array(indexActions)
        return indexActions

    def init_Profits(self):
        Profits = np.zeros((self.numActions, self.numPlayers))
        Actions = np.zeros((self.numActions, 6))
        for i in range(self.numActions):
            b = int(self.indexActions[i][0])
            s = int(self.indexActions[i][1])
            vb, cb, vb_a = self.buyerActions[b]
            vs, cs, cs_a = self.sellerActions[s]
            Actions[i] = vb, cb, vb_a, vs, cs, cs_a
            if (vb == vs) and (cb == cs):
                # if all report coincide
                # profit for buyer: true value - price - investment_buyer
                pb = self.trueValue - self.Prices[cb][vs] - self.buyerInvestment
                # profit for seller: price - true cost - investment_seller
                ps = self.Prices[cb][vs] - self.trueCost - self.sellerInvestment
            elif (vb != vs) and (cb == cs):
                # only the value reports differ
                # buyer enters into arbitration stage
                if vb_a == 0:
                    # no_trade
                    pb = 0 - self.buyerInvestment
                    ps = 0 - self.sellerInvestment
                elif vb_a == 1:
                    pb = 0.5 * 0 + 0.5 * (self.trueValue - 205) - self.buyerInvestment
                    ps = 0.5 * 0 + 0.5 * (205 - self.trueCost) - self.sellerInvestment
                else:
                    pb = 0.5 * (self.trueValue - 205) + 0.5 * (self.trueValue - 255) - self.buyerInvestment
                    ps = 0.5 * (255 - self.trueCost) + 0.5 * (205 - self.trueCost) - self.sellerInvestment

                # buyers is fined 300
                pb = pb - 300
                # the seller is rewarded a bonus of 300 if the second 
                # report of the buyer matches the first stage report
                # of seller. In other cases, the seller is also fined 300.
                if vb_a == vs:
                    ps = ps + 300
                else:
                    ps = ps - 300
            elif (vb == vs) and (cb != cs):
                # only the cost reports differ
                # seller enters into arbitration stage
                if cs_a == 0:
                    # no_trade
                    pb = 0 - self.buyerInvestment
                    ps = 0 - self.sellerInvestment
                elif cs_a == 1:
                    pb = 0.5 * 0 + 0.5 * (self.trueValue - 125) - self.buyerInvestment
                    ps = 0.5 * 0 + 0.5 * (125 - self.trueCost) - self.sellerInvestment
                else:
                    pb = 0.5 * (self.trueValue - 125) + 0.5 * (self.trueValue - 75) - self.buyerInvestment
                    ps = 0.5 * (125 - self.trueCost) + 0.5 * (75 - self.trueCost) - self.sellerInvestment

                # seller is fined 300
                ps = ps - 300
                # the buyer is rewarded a bonus of 300 if the second 
                # report of the seller matches the first stage report
                # of buyer. In other cases, the buyer is also fined 300.
                if cs_a == cb:
                    pb = pb + 300
                else:
                    pb = pb - 300
            else:
                if vb_a == 0:
                    # no_trade
                    pb1 = 0 - self.buyerInvestment
                    ps1 = 0 - self.sellerInvestment
                elif vb_a == 1:
                    pb1 = 0.5 * 0 + 0.5 * (self.trueValue - 205) - self.buyerInvestment
                    ps1 = 0.5 * 0 + 0.5 * (205 - self.trueCost) - self.sellerInvestment
                else:
                    pb1 = 0.5 * (self.trueValue - 205) + 0.5 * (self.trueValue - 255) - self.buyerInvestment
                    ps1 = 0.5 * (255 - self.trueCost) + 0.5 * (205 - self.trueCost) - self.sellerInvestment
                if vb_a == vs:
                    ps1 = ps1 + 300

                if cs_a == 0:
                    # no_trade
                    pb2 = 0 - self.buyerInvestment
                    ps2 = 0 - self.sellerInvestment
                elif cs_a == 1:
                    pb2 = 0.5 * 0 + 0.5 * (self.trueValue - 125) - self.buyerInvestment
                    ps2 = 0.5 * 0 + 0.5 * (125 - self.trueCost) - self.sellerInvestment
                else:
                    pb2 = 0.5 * (self.trueValue - 125) + 0.5 * (self.trueValue - 75) - self.buyerInvestment
                    ps2 = 0.5 * (125 - self.trueCost) + 0.5 * (75 - self.trueCost) - self.sellerInvestment
                if cs_a == cb:
                    pb2 = pb2 + 300

                pb = 0.5 * (pb1 + pb2) - 300
                ps = 0.5 * (ps1 + ps2) - 300
            Profits[i] = [pb, ps]
        action_df = pd.DataFrame(Actions)
        action_df.to_excel("action.xlsx")
        profit_df = pd.DataFrame(Profits)
        profit_df.to_excel("profit.xlsx")
        return Profits

    def init_Q(self):
        Q = np.zeros((self.numStates, self.numiActions, self.numPlayers))
        # Randomize over the opponents decision

        for iAgent in range(self.numPlayers):
            for iReport in range(self.numiActions):
                den = np.count_nonzero(self.indexActions[:, iAgent] == iReport) * (1 - self.delta)
 #               Q[:, iReport, iAgent] = np.ma.array(self.Profits[:, iAgent],
 #                                       mask=(self.indexActions[:, iAgent] != iReport)).sum() / den
                Q[:, iReport, iAgent] = np.ma.array(self.Profits[:, iAgent],
                                        mask=(self.indexActions[:, iAgent] != iReport)).sum() / den

        return Q

    def init_cStates(self):
        """Initialize cStates"""
        x = np.arange(self.lengthStates - 1, -1, -1)
        cStates = self.numiActions ** x
        return cStates

    def init_cActions(self):
        """Initialize cActions"""
        x = np.arange(self.numPlayers - 1, -1, -1)
        cActions = self.numiActions ** x
        return cActions

if __name__ == '__main__':
    sr = SRModel()
    sr.init_Q()
    print("Prices: ", sr.Prices)
    print("Profits: ", sr.Profits)
    #print("Q: ", sr.Q[0])
    print("cStates: ", sr.cStates)
    print("cActions: ", sr.cActions)
    print("sr.trueValue: ", sr.trueValue)
    print("sr.trueCost: ", sr.trueCost)
    print("sr.numStates: ", sr.numStates)
    Q_df = pd.DataFrame(sr.Q[0])
    Q_df.to_excel("Q.xlsx")

Prices:  [[165. 215. 285.]
 [115. 165. 235.]
 [ 45.  95. 165.]]
Profits:  [[ 10. 130.]
 [ 10. 130.]
 [ 10. 130.]
 ...
 [ 10. 130.]
 [ 10. 130.]
 [ 10. 130.]]
cStates:  []
cActions:  [27  1]
sr.trueValue:  250
sr.trueCost:  10
sr.numStates:  1


In [63]:
sr.indexActions.shape

(729, 2)

In [41]:
Q_df

Unnamed: 0,0,1
0,-4681.481481,-3214.814815
1,-4481.481481,-2703.703704
2,-4503.703704,-2414.814815
3,-4570.37037,-3325.925926
4,-4370.37037,-2814.814815
5,-4392.592593,-2525.925926
6,-4414.814815,-3481.481481
7,-4214.814815,-2970.37037
8,-4237.037037,-2681.481481
9,-4792.592593,-3103.703704


In [49]:
import numpy as np
import pandas as pd
import random
from itertools import product
from generic_routines import convertNumberBase, MaxLocBreakTies

class SPIModel(object):
    def __init__(self, **kwargs):
        self.numPlayers = 2
        # number of individual actions
        self.numiActions = 18
        self.eps = kwargs.get('eps', 0.05)
        self.memory = kwargs.get('memory', 0)
        self.numStates = self.numiActions ** (self.numPlayers * self.memory)
        self.numPeriods = self.numStates + 1
        self.numActions = self.numiActions ** self.numPlayers
        self.buyerInvestment = 75
        self.sellerInvestment = 25
        self.trueValue = self.init_TrueValue(self.sellerInvestment)
        self.trueCost = self.init_TrueCost(self.buyerInvestment)
        self.value = np.array([200, 250, 320])
        self.cost = np.array([130, 80, 10])
        self.buyerActions = np.array([[0, 0, 0],[0, 0, 1],[0, 1, 0],[0, 1, 1],[0, 2, 0],[0, 2, 1],
                                      [1, 0, 0],[1, 0, 1],[1, 1, 0],[1, 1, 1],[1, 2, 0],[1, 2, 1],
                                       [2, 0, 0],[2, 0, 1],[2, 1, 0],[2, 1, 1],[2, 2, 0],[2, 2, 1]])
        self.sellerActions = np.array([[0, 0, 0],[0, 0, 1],[0, 1, 0],[0, 1, 1],[0, 2, 0],[0, 2, 1],
                                      [1, 0, 0],[1, 0, 1],[1, 1, 0],[1, 1, 1],[1, 2, 0],[1, 2, 1],
                                       [2, 0, 0],[2, 0, 1],[2, 1, 0],[2, 1, 1],[2, 2, 0],[2, 2, 1]])
    
        self.indexActions = self.init_indexActions()
        self.Prices = self.init_Prices()
        self.Profits = self.init_Profits()

        # QL
        self.numSessions = 100
        self.maxIters = 1000000
        self.delta = kwargs.get('delta', 0.95)
        self.alpha = kwargs.get('alpha', 0.15) * np.ones(self.numPlayers)
        self.beta = kwargs.get('beta', 0.00001) * np.ones(self.numPlayers)
        self.lengthStates = self.numPlayers * self.memory
        self.lengthStrategies = self.numPlayers * self.numStates
        self.Q = self.init_Q()
        self.cStates = self.init_cStates()
        self.cActions = self.init_cActions()

    def init_TrueValue(self, i):
        if i == 0:
            return 200
        elif i == 25:
            return 250
        else:
            return 320

    def init_TrueCost(self, i):
        if i == 0:
            return 130
        elif i == 25:
            return 80
        else:
            return 10

    def init_Prices(self):
        pricesArray = np.zeros((3, 3))
        for cb in range(0, 3):
            for vs in range(0, 3):
                # (vs-200) - (130-cb) + 165
                pricesArray[cb][vs] = (self.value[vs] - 200) - (130 - self.cost[cb]) + 165
        return pricesArray

    def init_indexActions(self):
        indexActions = []
        for i in range(self.numActions):
            indexActions.append(convertNumberBase(i, self.numiActions, self.numPlayers))
        indexActions = np.array(indexActions)
        return indexActions

    def init_Profits(self):
        Profits = np.zeros((2, self.numActions, self.numPlayers))
        Actions = np.zeros((self.numActions, 6))
        for i in range(self.numActions):
            b = int(self.indexActions[i][0])
            s = int(self.indexActions[i][1])
            vb, cb, vb_a = self.buyerActions[b]
            vs, cs, cs_a = self.sellerActions[s]
            Actions[i] = vb, cb, vb_a, vs, cs, cs_a

            
            # if no arbitration
            # profit for buyer: true value - price - investment_buyer
            pb_na = self.trueValue - self.Prices[cb][vs] - self.buyerInvestment
            # profit for seller: price - true cost - investment_seller
            ps_na = self.Prices[cb][vs] - self.trueCost - self.sellerInvestment
            Profits[0][i] = [pb_na, ps_na]

            # if into arbitration
            if vb_a == 0:
                # no_trade
                pb1 = -300 - self.buyerInvestment
                ps1 = -300 - self.sellerInvestment
            elif vb_a == 1:
                # accept offer price
                pb1 = self.trueValue - (self.value[vb] + 5) - 300 - self.buyerInvestment
                ps1 = (self.value[vb] + 5) + 300 - self.trueCost - self.sellerInvestment

            if cs_a == 0:
                # no_trade
                pb2 = -300 - self.buyerInvestment
                ps2 = -300 - self.sellerInvestment
            elif cs_a == 1:
                pb2 = self.trueValue - (self.value[cs] - 5) + 300 - self.buyerInvestment
                ps2 = (self.value[cs] - 5) - 300 - self.trueCost - self.sellerInvestment


            pb_a = 0.5 * (pb1 + pb2)
            ps_a = 0.5 * (ps1 + ps2)

            Profits[1][i] = [pb_a, ps_a]

        return Profits

    def init_Q(self):
        Q = np.zeros((self.numStates, self.numiActions, self.numPlayers))
        # Randomize over the opponents decision
        for iAgent in range(self.numPlayers):
            for iReport in range(self.numiActions):
                den = np.count_nonzero(self.indexActions[:, iAgent] == iReport) * (1 - self.delta)
                Q[:, iReport, iAgent] = np.ma.array((1 - self.eps) * self.Profits[0][:, iAgent] + \
                                self.eps * self.Profits[1][:, iAgent],
                                mask=(self.indexActions[:, iAgent] != iReport)).sum() / den
        return Q

    def init_cStates(self):
        """Initialize cStates"""
        x = np.arange(self.lengthStates - 1, -1, -1)
        cStates = self.numiActions ** x
        return cStates

    def init_cActions(self):
        """Initialize cActions"""
        x = np.arange(self.numPlayers - 1, -1, -1)
        cActions = self.numiActions ** x
        return cActions

if __name__ == '__main__':
    spi = SPIModel()
    print("Prices", spi.Prices)
    print("Profits", spi.Profits)
    #print("Q", spi.Q[0])
    print("cStates", spi.cStates)
    print("cActions", spi.cActions)
    # profit1_df = pd.DataFrame(spi.Profits[0])
    # profit1_df.to_excel("p1.xlsx")
    # profit2_df = pd.DataFrame(spi.Profits[1])
    # profit2_df.to_excel("p2.xlsx")



Prices [[165. 215. 285.]
 [115. 165. 235.]
 [ 45.  95. 165.]]
Profits [[[  10.   130. ]
  [  10.   130. ]
  [  10.   130. ]
  ...
  [  10.   130. ]
  [  10.   130. ]
  [  10.   130. ]]

 [[-375.  -325. ]
  [ -47.5 -232.5]
  [-375.  -325. ]
  ...
  [-110.   250. ]
  [-412.5  132.5]
  [-145.   285. ]]]
cStates []
cActions [18  1]
