<a href="https://colab.research.google.com/github/IshtiSikder/Optimal-allocation-of-rescue-teams-for-hostage-situations/blob/test/toy_prob.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# CTMC MDP model for rescue operational system!!!

prob <- function(opSet = policySet, t, lam, eps, mu) {
  opTRM = matrix(0, nrow = t+1, ncol = t+1)
  for (i in 2:t) {
    opTRM[i, i-1] = sum(i*eps, opSet[i]*mu)
    opTRM[i, i+1] = lam
  }
  opTRM[1, 2] = lam
  opTRM[t+1, t] = sum(t*eps, opSet[t+1]*mu)

  k = t+1
  steady_prob <- matrix(0, nrow=1, ncol=k)
  state <- matrix(0, nrow=1, ncol=k-1)
  for (i in 1:(k-1)) {
    if (i-2 < 0) {
      state[i] <- (lam/opTRM[i+1,i])
    } else {
      state[i] <- state[i-1]*(lam/opTRM[i+1,i])
    }
  }

  steady_prob[1] <- (1+sum(state))^-1
  for (i in 2:k) {
    steady_prob[i] <- state[i-1]*steady_prob[1]
  }
  return(steady_prob)
}




# Check function to determine new policy is same with current or not
checkPolicy = function(policySet, policySetnew) {
  n = 0
  for (i in policySet == policySetnew) {
    if (i == 'FALSE') {
      n = 1
    }
  }
  if (n == 0) {
    return("TRUE")
  } else {
    return("FALSE")
  }
}


# Define MDP function (for basic policy)
MDP = function(lam, mu, eps, numRescue, numTerror, costT, RC, mult) {
  r = numRescue
  t = numTerror

  #calculating costR

  costR = mult*costT*(mu+eps)


  #########################################################
  #EDITS: 22ND FEB,2023
  #NEW SERVICE RATES STRUCTURE, ADD ORIGINAL ONE AS WELL
  aijMatrix = array(0, dim = c(t+1,t+1,r+1))
  for (k in 1:(r+1)) {
    for (i in 1:t) {
      aijMatrix[i,i+1,k] <- lam
      if (i > 1) {
        #if (k==4){
          #if(i==4){
            #cat('min(k-1,i-1)',min(k-1,i-1))}}
        aijMatrix[i,i-1,k] <- sum(min(k-1,i-1)*mu+(i-1)*eps)
      }
    }
    aijMatrix[t+1,t,k] <- (min(k-1,t)*mu+t*eps)
    aijMatrix[1,2,k] <- lam
}

  for (k in 1:(r+1)) {
    for (i in 1:(t+1)) {

      aijMatrix[i,i,k] <- -sum(aijMatrix[i,,k])
    }
  }
  #########################################################


  # Design the MDP algorithm
  # Value determination
  policy = array(0, dim = c(1,t+1))
  policyOld = array(0, dim = c(1,t+1))
  policyNew = array(1, dim=c(1,t+1)) # initially set all decision is 1 in first policy
  policyTrack = array(2, dim=c(1,t+1))
  iteration = 0
  gainMatrix = c()

  while (checkPolicy(policyOld, policyNew) == 'FALSE') {
    policyOld = policyNew
    policy = policyNew
    # Design the transition matrix regarding policy
    A = array(0, dim = c(t+1,t+1))
    for (i in 1:length(policy)) {
      A[i,] = aijMatrix[i,,(policy[i])]
    }

    # Design the cost matrix regarding policy

    q = array(0,dim=c(t+1,1))
    for (i in 1:length(policy)) {

      if (RC == 'L'){
      cost = (policy[i]-1)*costR + (i-1)*eps*costT
      }

      if (RC == 'SR'){
      cost = sqrt(policy[i]-1)*costR + (i-1)*eps*costT
      }

      if (RC == 'Q'){
      cost = ((policy[i]-1)^2)*costR + (i-1)*eps*costT
      }

      q[i,1] = cost
    }


    newA = cbind(array(1, dim=c(t+1,1)), -A[,1:t])
    solution = solve(newA) %*% q
    gain = solution[1] # First row of solution is gain value
    gainMatrix = append(gainMatrix, gain)

    # Policy improvement
    newsolution = array(0, dim=c(t+1,1))
    for (i in 1:t) {
      newsolution[i] = solution[i+1]
    }


    newsolution[t+1] = 0 # Set last v value as 0 to calcultate the test quantity in policy improvement
    improveMatrix = array(0, dim=c(t+1,r+1))
    cost = array(0,dim=c(t+1,r+1))

    for (i in 1:(t+1)) {
      for (j in 1:(r+1)) {
        # Cost when we have j rescue operation and i terrorists

        if (RC == 'L'){
      cost[i,j] = (j-1)*costR + (i-1)*eps*costT
      }

      if (RC == 'SR'){
      cost[i,j] = sqrt(j-1)*costR + (i-1)*eps*costT
      }

      if (RC == 'Q'){
      cost[i,j] = ((j-1)^2)*costR + (i-1)*eps*costT
      }

        improveMatrix[i,j] = cost[i,j] + aijMatrix[i,,j] %*% newsolution

      }
    }

    # pick the minimum value among the decision
    policyNew = array(1, dim = c(1,t+1))
    for (i in 1:length(policyNew)) {
      policyNew[i] = which.min(improveMatrix[i,])
    }
    iteration = iteration + 1
    policyTrack = rbind(policyTrack, policyNew)
    policyNew
  }
  for (i in 1:length(policyNew)) {
    policyNew[i] = policyNew[i] - 1
  }
  for (i in 1:nrow(policyTrack)) {
    for (j in 1:ncol(policyTrack)) {
      policyTrack[i,j] = policyTrack[i,j] - 1
    }

  steady = prob(opSet = policyNew, t=numTerror, lam = lam, eps = eps, mu = mu)

  gain = solution[1]

  }


   #return(list(policyNew,improveMatrix,cost,aijMatrix,newsolution))

   return (policyNew)

 }

In [2]:
MDP(lam=0.2763, mu=0.2829, eps=0.1718, numRescue=10, numTerror=10, costT=1, RC='L', mult=0.5316190142082907188481)

0,1,2,3,4,5,6,7,8,9,10
0,1,2,3,4,5,6,7,8,9,0


In [3]:
mult_test = function(cost_terror,rc,low_lim,up_lim,incr) {

prev_result = array(100,dim=c(1,11))

for (val in seq(low_lim,up_lim, by = incr)){

        current_result = MDP(lam = 0.2763, mu = 0.2829, eps = 0.1718, numRescue=10, numTerror=10, costT=cost_terror, RC=rc, mult=val)


        if (!(identical(current_result,prev_result))) {

          cat('\n\n','mult:',val,',','RC:',rc,',','\n\n')
          cat('optimal policy:\n')
          print(current_result)


          prev_result = current_result

        }

    }

}

In [80]:
#lowest possible increment possible in R is 0.00000000001, or 10^-11



mult_test(1,'L',0,3,0.0001)




In [None]:
#prob_search algo
#the if-else statements don't seem to be working logically for the actual prob.NEED TO FIX!
#check documentation for setequal(). You are probably applying it incorrectly.


low = 0
up = 0.532
go = 0

prev = 'a'

for (n in 1:100){

val = runif(1,low,up)


result = MDP(lam=0.2763, mu=0.2829, eps=0.1718, numRescue=10, numTerror=10, costT=1, RC='L', mult=val)


if (setequal(result == array(c(0,1,2,3,4,5,6,7,8,9,9)))){
  cat('JACKPOT:\n\n')
  print(val,digits=22)
  cat('result: ',result)
  cat('\n\n')
  go = 1
  break
}

else {
  if (setequal(result,array(c(0,1,2,3,4,5,6,7,8,9,10)))) {
  low = val
  if (!prev == 'a'){
    up = prev
  }
  next
}

else if (setequal(result,array(c(0,1,2,3,4,5,6,7,8,9,0)))){
  if (!prev == 'a'){
    low = prev
  }
  up = val
  next
}

else {
  next
}

}

prev = val


}

if (go == 0) {
cat('low: ')
print(low,digits=22)
cat('\n\n')

cat('up: ')
print(up,digits=22)
cat('\n\n')

cat('result',result)}

JACKPOT:

[1] 0.5314766429013112469804
result:  0 1 2 3 4 5 6 7 8 9 0



Due to the computational limitations in R, to run large and complicated while/for loops are next to impossible. As such, to find the mult value that yields our desired policy for this toy problem (0,1,1) under any cost structure, we can follow these steps:

1. First run the mult_test function to get tentative mult values for (0,1,2) and (0,1,0) while (0,1,1) doesn't yield.

2. Next, run prob_search algo with mult_(0,1,2) as low and mult_(0,1,0) as up value. While choosing the number of iterations under the for loop running for this algo, start from an arbitrary value (i.e 100) and lower the value by i.e 10 if code doesn't finish running in i.e 1 second. Keep doing this till your iteration value has been lowered to 1. The final low and up values generated should be quite closer to each other now.

3. From this point, I am kind of blank.

Actual problem has more than one intermediary policy. How to track them all without an exhaustive search, and with prob search ? I can track assuming there are as many intermediaries as I think there should be, but what happens when I am wrong ?

In [74]:
one = array(c(0,1,2))
one

In [75]:
two = array(c(0,1,2))
two

In [76]:
result = (one == two)

In [79]:
one = array(c(0,2,1))
two = array(c(0,1,2))

test = array(c(0,1,2))

ola = function(one,test){

comp = (one == two)

if (!all(comp)){
  print('nope')
}
}

[1] "nope"


In [21]:
result

In [55]:
runif(1,0,0.541)

In [69]:
# Construct a 5 x 6 matrix
my_matrix <- matrix(1:5, nrow = 5, ncol = 1)

# Calculate the sum of each column
col_sums <- apply(masti, 1, test)

# Print the result
print(col_sums)

      [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13]
 [1,]    0    0    0    0    0    0    0    0    0     0     0     0     0
 [2,]    1    0    1    0    0    0    1    1    1     1     0     0     1
 [3,]    2    0    2    0    0    0    2    2    2     2     0     0     2
 [4,]    3    0    3    0    0    0    3    3    3     3     0     0     3
 [5,]    4    0    4    0    0    0    4    4    4     4     0     0     4
 [6,]    5    0    5    0    0    0    5    5    5     5     0     0     5
 [7,]    6    0    6    0    0    0    6    6    6     6     0     0     6
 [8,]    7    0    7    0    0    0    7    7    7     7     0     0     7
 [9,]    8    0    8    0    0    0    8    8    8     8     0     0     8
[10,]    9    0    9    0    0    0    9    9    9     9     0     0     9
[11,]   10    0   10    0    0    0   10   10   10    10     0     0    10
      [,14] [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23] [,24] [,25]
 [1,]     0     0     

In [45]:
test <- function(val){
  MDP(lam=0.2763, mu=0.2829, eps=0.1718, numRescue=10, numTerror=10, costT=1, RC='L', mult=val)
}

In [67]:
masti = matrix(runif(1000,0,3), nrow = 1000)

In [68]:
masti

0
0.48383092
1.36336083
0.46131328
1.91540746
1.01669525
0.75430958
0.10018428
0.48660004
0.11838809
0.17017655


In [91]:
matrix(seq(0,3, by = 0.000001),nrow=length(seq(0,3, by = 0.000001)))

0
0.0e+00
1.0e-06
2.0e-06
3.0e-06
4.0e-06
5.0e-06
6.0e-06
7.0e-06
8.0e-06
9.0e-06
