<a href="https://colab.research.google.com/github/IshtiSikder/Optimal-allocation-of-rescue-teams-for-hostage-situations/blob/test/toy_prob.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [53]:
# CTMC MDP model for rescue operational system!!!

prob <- function(opSet = policySet, t, lam, eps, mu) {
  opTRM = matrix(0, nrow = t+1, ncol = t+1)
  for (i in 2:t) {
    opTRM[i, i-1] = sum(i*eps, opSet[i]*mu)
    opTRM[i, i+1] = lam
  }
  opTRM[1, 2] = lam
  opTRM[t+1, t] = sum(t*eps, opSet[t+1]*mu)

  k = t+1
  steady_prob <- matrix(0, nrow=1, ncol=k)
  state <- matrix(0, nrow=1, ncol=k-1)
  for (i in 1:(k-1)) {
    if (i-2 < 0) {
      state[i] <- (lam/opTRM[i+1,i])
    } else {
      state[i] <- state[i-1]*(lam/opTRM[i+1,i])
    }
  }

  steady_prob[1] <- (1+sum(state))^-1
  for (i in 2:k) {
    steady_prob[i] <- state[i-1]*steady_prob[1]
  }
  return(steady_prob)
}




# Check function to determine new policy is same with current or not
checkPolicy = function(policySet, policySetnew) {
  n = 0
  for (i in policySet == policySetnew) {
    if (i == 'FALSE') {
      n = 1
    }
  }
  if (n == 0) {
    return("TRUE")
  } else {
    return("FALSE")
  }
}


# Define MDP function (for basic policy)
MDP = function(lam, mu, eps, numRescue, numTerror, costT, RC, mult) {
  r = numRescue
  t = numTerror

  #calculating costR

  costR = mult*costT*(mu+eps)


  #########################################################
  #EDITS: 22ND FEB,2023
  #NEW SERVICE RATES STRUCTURE, ADD ORIGINAL ONE AS WELL
  aijMatrix = array(0, dim = c(t+1,t+1,r+1))
  for (k in 1:(r+1)) {
    for (i in 1:t) {
      aijMatrix[i,i+1,k] <- lam
      if (i > 1) {
        #if (k==4){
          #if(i==4){
            #cat('min(k-1,i-1)',min(k-1,i-1))}}
        aijMatrix[i,i-1,k] <- sum(min(k-1,i-1)*mu+(i-1)*eps)
      }
    }
    aijMatrix[t+1,t,k] <- (min(k-1,t)*mu+t*eps)
    aijMatrix[1,2,k] <- lam
}

  for (k in 1:(r+1)) {
    for (i in 1:(t+1)) {

      aijMatrix[i,i,k] <- -sum(aijMatrix[i,,k])
    }
  }
  #########################################################


  # Design the MDP algorithm
  # Value determination
  policy = array(0, dim = c(1,t+1))
  policyOld = array(0, dim = c(1,t+1))
  policyNew = array(1, dim=c(1,t+1)) # initially set all decision is 1 in first policy
  policyTrack = array(2, dim=c(1,t+1))
  iteration = 0
  gainMatrix = c()

  while (checkPolicy(policyOld, policyNew) == 'FALSE') {
    policyOld = policyNew
    policy = policyNew
    # Design the transition matrix regarding policy
    A = array(0, dim = c(t+1,t+1))
    for (i in 1:length(policy)) {
      A[i,] = aijMatrix[i,,(policy[i])]
    }

    # Design the cost matrix regarding policy

    q = array(0,dim=c(t+1,1))
    for (i in 1:length(policy)) {

      if (RC == 'L'){
      cost = (policy[i]-1)*costR + (i-1)*eps*costT
      }

      if (RC == 'SR'){
      cost = sqrt(policy[i]-1)*costR + (i-1)*eps*costT
      }

      if (RC == 'Q'){
      cost = ((policy[i]-1)^2)*costR + (i-1)*eps*costT
      }

      q[i,1] = cost
    }


    newA = cbind(array(1, dim=c(t+1,1)), -A[,1:t])
    solution = solve(newA) %*% q
    gain = solution[1] # First row of solution is gain value
    gainMatrix = append(gainMatrix, gain)

    # Policy improvement
    newsolution = array(0, dim=c(t+1,1))
    for (i in 1:t) {
      newsolution[i] = solution[i+1]
    }


    newsolution[t+1] = 0 # Set last v value as 0 to calcultate the test quantity in policy improvement
    improveMatrix = array(0, dim=c(t+1,r+1))
    cost = array(0,dim=c(t+1,r+1))

    for (i in 1:(t+1)) {
      for (j in 1:(r+1)) {
        # Cost when we have j rescue operation and i terrorists

        if (RC == 'L'){
      cost[i,j] = (j-1)*costR + (i-1)*eps*costT
      }

      if (RC == 'SR'){
      cost[i,j] = sqrt(j-1)*costR + (i-1)*eps*costT
      }

      if (RC == 'Q'){
      cost[i,j] = ((j-1)^2)*costR + (i-1)*eps*costT
      }

        improveMatrix[i,j] = cost[i,j] + aijMatrix[i,,j] %*% newsolution

      }
    }

    # pick the minimum value among the decision
    policyNew = array(1, dim = c(1,t+1))
    for (i in 1:length(policyNew)) {
      policyNew[i] = which.min(improveMatrix[i,])
    }
    iteration = iteration + 1
    policyTrack = rbind(policyTrack, policyNew)
    policyNew
  }
  for (i in 1:length(policyNew)) {
    policyNew[i] = policyNew[i] - 1
  }
  for (i in 1:nrow(policyTrack)) {
    for (j in 1:ncol(policyTrack)) {
      policyTrack[i,j] = policyTrack[i,j] - 1
    }

  steady = prob(opSet = policyNew, t=numTerror, lam = lam, eps = eps, mu = mu)

  gain = solution[1]

  }


   #return(list(policyNew,improveMatrix,cost,aijMatrix,newsolution))

   return (policyNew)

 }

In [54]:
MDP(lam=0.2763, mu=0.2829, eps=0.1718, numRescue=10, numTerror=10, costT=1, RC='L', mult=0.5316190142082907188481)

0,1,2,3,4,5,6,7,8,9,10
0,1,2,3,4,5,6,7,8,9,0


In [55]:
table_1 = function(t = 10,lam=0.2763,mu=0.2829,eps=0.1718,print_all = TRUE){

all_data = list()

for (r in 1:(t+1)){

  policy = c(1:(t+1))

  for (i in 1:(t+1)){

  policy[i] = min(r,policy[i])

  }

  current_data = prob(opSet = policy, t, lam, eps, mu)

  interim_lst = list()
  interim_lst = append(r,current_data)


  all_data = append(all_data,interim_lst)

  if (print_all == TRUE){

  cat('\n','# of teams:',r-1,"=>",current_data,'\n')

  }

}

return (all_data)

}


In [56]:
all_data = table_1 (t = 10,lam=0.2763,mu=0.2829,eps=0.1718,print_all = TRUE)


 # of teams: 0 => 0.6059274 0.2672271 0.09249008 0.02634266 0.006374005 0.001340593 0.0002493477 4.157048e-05 6.279549e-06 8.671295e-07 1.197401e-07 

 # of teams: 1 => 0.7129669 0.2166184 0.05535669 0.01220675 0.002367156 0.0004096488 6.400473e-05 9.114786e-06 1.192431e-06 1.442634e-07 1.745336e-08 

 # of teams: 2 => 0.7254089 0.2203986 0.04464198 0.008030848 0.001299364 0.0001910158 2.572889e-05 3.197738e-06 3.689235e-07 3.971386e-08 4.275116e-09 

 # of teams: 3 => 0.7266378 0.220772 0.04471761 0.006793202 0.0009429126 0.0001204804 1.42613e-05 1.572385e-06 1.622414e-07 1.573108e-08 1.525301e-09 

 # of teams: 4 => 0.7267465 0.220805 0.0447243 0.006794219 0.000825706 9.32984e-05 9.849967e-06 9.758492e-07 9.106871e-08 8.032653e-09 7.085146e-10 

 # of teams: 5 => 0.7267552 0.2208076 0.04472483 0.006794299 0.0008257158 8.362484e-05 7.967428e-06 7.166484e-07 6.104635e-08 4.938545e-09 3.995198e-10 

 # of teams: 6 => 0.7267558 0.2208078 0.04472487 0.006794305 0.0008257165 8.362491e-05 

In [57]:
call_data = function(no_of_teams,t = 10,lam=0.2763,mu=0.2829,eps=0.1718){

  no_of_teams = no_of_teams + 1

  all_data = table_1 (t,lam,mu,eps,FALSE)


  val = match(c(no_of_teams),all_data)

  data = all_data[(val+1):(val+(t+1))]

  return (data)
}

In [58]:
#steady state prob for 0 to 10 hostage incidents against i.e a policy of (6,6,6,...........)

STH = call_data(no_of_teams = 0,t = 10,lam=0.2763,mu=0.2829,eps=0.1718)

STH

In [59]:
table_2 = function(t = 10,lam=0.2763,mu=0.2829,eps=0.1718){


op_perf = list()

for (no_teams in 1:(t+1)){

steady_vals = call_data(no_of_teams = no_teams-1,t,lam,mu,eps)


policy = c(1:(t+1))

for (i in 1:(t+1)){

  policy[i] = min(no_teams,policy[i])

  }


expected_hostage = 0

for (i in 1:(t+1)){

  expected_hostage = expected_hostage + (steady_vals[[i]])*(i)

}


expected_teams = 0

for (i in 1:(t+1)){

  expected_teams = expected_teams + (steady_vals[[i]])*policy[i]

}


op_perf = append(op_perf,expected_teams/expected_hostage)

}

return (op_perf)

}


In [60]:
table_2(t = 10,lam=0.2763,mu=0.2829,eps=0.1718)

In [61]:
'''
colors = c('green','blue','red')

vals = c(0.5,1,2)

R = c(1:11)

x = seq(0,1, by = 0.1)

jpeg(file= '/content/figure5.jpeg')

for (i in vals){

    operational_performance = table_2 (t = 10,lam= i*0.2763,mu=0.2829,eps=0.1718)


    idx = match(i,vals)


    plot(R, operational_performance , yaxt = "n", type="l", col= colors[idx])

    legend(7,0.55, legend=c("less frequently(0.5λ)","Base(λ)", "more frequntly(2λ)"),
       fill = c('green','blue','red'))


    par(new=TRUE)

}

axis(2, at=x, las=2)

grid()


dev.off()

'''

ERROR: ignored

In [None]:
colors = c('green','blue','red')

vals = c(0.5,1,2)

teams = c(1:11)

jpeg(file= '/content/gain_2lambda.jpeg')

i=

    operational_performance = table_2 (t = 10,lam= i*0.2763,mu=0.2829,eps=0.1718)

    print(operational_performance)


    idx = match(i,vals)


    plot(teams-1, operational_performance , yaxt = "n", type="l", col= colors[idx],xlab='Total no. of rescue teams deployed',ylab='Operational performance')



    par(new=TRUE)

# X-axis
axis(1, at = teams)

Y = list()

for (i in operational_performance){

  Y = append(Y,round(i,2))

}

# Y-axis
axis(2, at = Y)

grid()


dev.off()

In [None]:
'''

costT = 1
eps=0.1718
mu=0.2829
mult = 1
costR = mult*costT*(mu+eps)

colors = c('green','blue','red')

#vals = c('L','SR','Q')

teams = c(1:11)
#lim = length(policies)

rc = 'SR'

cost = list()


x = 1:11

jpeg(file= '/content/diff_costStructures.png')

for (i in teams){

    if (rc == 'L'){
          c = (i-1)*costR + 10*eps*costT
          cost = append(cost,c)
          }

    if (rc == 'SR'){
          c = sqrt(i-1)*costR + 10*eps*costT
          cost = append(cost,c)
          }

    if (rc == 'Q'){
          c = ((i-1)^2)*costR + 10*eps*costT
          cost = append(cost,c)
          }
  }

    lst = mult_test(cost_terror=1,rc=rc,t=10,r=10,lam=0.2763,mu=0.2829,eps=0.1718,low_lim=0,up_lim=3,incr=0.01,policy=c(0,1,2,3,4,5,6,7,8,9,10))

    cost = unlist(cost)

    idx = match(rc,vals)

    print('teams:')

    print(length(teams))

    print('cost:')
    print(length(cost))

    print('team list:')
    print(teams)

    print('cost list:')
    print(cost)

    teams = teams-1


    plot(teams, cost , xaxt = 'n', yaxt = "n", xlab = 'Total no. of rescue teams deployed' , ylab = 'Total cost incurred',type="o", col= colors[idx])


    par(new=TRUE)



# X-axis
axis(1, at = teams)

# Y-axis
axis(2, at = c(0,25,50,75,100,125,150))

grid()


dev.off()

'''

# New Section

In [62]:
prob <- function(opSet = policySet, t, lam, eps, mu) {
  opTRM = matrix(0, nrow = t+1, ncol = t+1)
  for (i in 2:t) {
    opTRM[i, i-1] = sum(i*eps, opSet[i]*mu)
    opTRM[i, i+1] = lam
  }
  opTRM[1, 2] = lam
  opTRM[t+1, t] = sum(t*eps, opSet[t+1]*mu)

  k = t+1
  steady_prob <- matrix(0, nrow=1, ncol=k)
  state <- matrix(0, nrow=1, ncol=k-1)
  for (i in 1:(k-1)) {
    if (i-2 < 0) {
      state[i] <- (lam/opTRM[i+1,i])
    } else {
      state[i] <- state[i-1]*(lam/opTRM[i+1,i])
    }
  }

  steady_prob[1] <- (1+sum(state))^-1
  for (i in 2:k) {
    steady_prob[i] <- state[i-1]*steady_prob[1]
  }
  return(steady_prob)
}




# Check function to determine new policy is same with current or not
checkPolicy = function(policySet, policySetnew) {
  n = 0
  for (i in policySet == policySetnew) {
    if (i == 'FALSE') {
      n = 1
    }
  }
  if (n == 0) {
    return("TRUE")
  } else {
    return("FALSE")
  }
}


# Define MDP function (for basic policy)
MDP = function(lam, mu, eps, numRescue, numTerror, costT, RC, mult) {
  r = numRescue
  t = numTerror

  #calculating costR

  costR = mult*costT*(mu+eps)


  #########################################################
  #EDITS: 22ND FEB,2023
  #NEW SERVICE RATES STRUCTURE, ADD ORIGINAL ONE AS WELL
  aijMatrix = array(0, dim = c(t+1,t+1,r+1))
  for (k in 1:(r+1)) {
    for (i in 1:t) {
      aijMatrix[i,i+1,k] <- lam
      if (i > 1) {
        #if (k==4){
          #if(i==4){
            #cat('min(k-1,i-1)',min(k-1,i-1))}}
        aijMatrix[i,i-1,k] <- sum(min(k-1,i-1)*mu+(i-1)*eps)
      }
    }
    aijMatrix[t+1,t,k] <- (min(k-1,t)*mu+t*eps)
    aijMatrix[1,2,k] <- lam
}

  for (k in 1:(r+1)) {
    for (i in 1:(t+1)) {

      aijMatrix[i,i,k] <- -sum(aijMatrix[i,,k])
    }
  }
  #########################################################


  # Design the MDP algorithm
  # Value determination
  policy = array(0, dim = c(1,t+1))
  policyOld = array(0, dim = c(1,t+1))
  policyNew = array(1, dim=c(1,t+1)) # initially set all decision is 1 in first policy
  policyTrack = array(2, dim=c(1,t+1))
  iteration = 0
  gainMatrix = c()

  while (checkPolicy(policyOld, policyNew) == 'FALSE') {
    policyOld = policyNew
    policy = policyNew
    # Design the transition matrix regarding policy
    A = array(0, dim = c(t+1,t+1))
    for (i in 1:length(policy)) {
      A[i,] = aijMatrix[i,,(policy[i])]
    }

    # Design the cost matrix regarding policy

    q = array(0,dim=c(t+1,1))
    for (i in 1:length(policy)) {

      if (RC == 'L'){
      cost = (policy[i]-1)*costR + (i-1)*eps*costT
      }

      if (RC == 'SR'){
      cost = sqrt(policy[i]-1)*costR + (i-1)*eps*costT
      }

      if (RC == 'Q'){
      cost = ((policy[i]-1)^2)*costR + (i-1)*eps*costT
      }

      q[i,1] = cost
    }


    newA = cbind(array(1, dim=c(t+1,1)), -A[,1:t])
    solution = solve(newA) %*% q
    gain = solution[1] # First row of solution is gain value
    gainMatrix = append(gainMatrix, gain)

    # Policy improvement
    newsolution = array(0, dim=c(t+1,1))
    for (i in 1:t) {
      newsolution[i] = solution[i+1]
    }


    newsolution[t+1] = 0 # Set last v value as 0 to calcultate the test quantity in policy improvement
    improveMatrix = array(0, dim=c(t+1,r+1))
    cost = array(0,dim=c(t+1,r+1))

    for (i in 1:(t+1)) {
      for (j in 1:(r+1)) {
        # Cost when we have j rescue operation and i terrorists

        if (RC == 'L'){
      cost[i,j] = (j-1)*costR + (i-1)*eps*costT
      }

      if (RC == 'SR'){
      cost[i,j] = sqrt(j-1)*costR + (i-1)*eps*costT
      }

      if (RC == 'Q'){
      cost[i,j] = ((j-1)^2)*costR + (i-1)*eps*costT
      }

        improveMatrix[i,j] = cost[i,j] + aijMatrix[i,,j] %*% newsolution

      }
    }

    # pick the minimum value among the decision
    policyNew = array(1, dim = c(1,t+1))
    for (i in 1:length(policyNew)) {
      policyNew[i] = which.min(improveMatrix[i,])
    }
    iteration = iteration + 1
    policyTrack = rbind(policyTrack, policyNew)
    policyNew
  }
  for (i in 1:length(policyNew)) {
    policyNew[i] = policyNew[i] - 1
  }
  for (i in 1:nrow(policyTrack)) {
    for (j in 1:ncol(policyTrack)) {
      policyTrack[i,j] = policyTrack[i,j] - 1
    }

  steady = prob(opSet = policyNew, t=numTerror, lam = lam, eps = eps, mu = mu)

  gain = solution[1]

  }


   #return(list(policyNew,improveMatrix,cost,aijMatrix,newsolution))

   return (policyNew)

 }




fixed = function(policy,mult,rc,costT,lam,mu,eps,t,r){

  policy = policy + 1

  aijMatrix = array(0, dim = c(t+1,t+1,r+1))
  for (k in 1:(r+1)) {
    for (i in 1:t) {
      aijMatrix[i,i+1,k] <- lam
      if (i > 1) {
        aijMatrix[i,i-1,k] <- sum(min(k-1,i-1)*mu+(i-1)*eps)
      }
    }
    aijMatrix[t+1,t,k] <- (min(k-1,t)*mu+t*eps)
    aijMatrix[1,2,k] <- lam
}

  for (k in 1:(r+1)) {
    for (i in 1:(t+1)) {

      aijMatrix[i,i,k] <- -sum(aijMatrix[i,,k])
    }
  }




  A = array(0, dim = c(t+1,t+1))

  for (i in 1:length(policy)) {

      A[i,] = aijMatrix[i,,(policy[i])]
    }

    # Design the cost matrix regarding policy

    q = array(0,dim=c(t+1,1))

    costR = mult*costT*(mu+eps)

    for (i in 1:length(policy)) {

      if (rc == 'L'){
      cost = (policy[i]-1)*costR + (i-1)*eps*costT
      }

      if (rc == 'SR'){
      cost = sqrt(policy[i]-1)*costR + (i-1)*eps*costT
      }

      if (rc == 'Q'){
      cost = ((policy[i]-1)^2)*costR + (i-1)*eps*costT
      }

      q[i,1] = cost
    }


    newA = cbind(array(1, dim=c(t+1,1)), -A[,1:t])
    solution = solve(newA) %*% q
    gain = solution[1]

    return(gain)


}





mult_test = function(cost_terror=1,rc,t=10,r=10,lam=0.2763,mu=0.2829,eps=0.1718,low_lim,up_lim,incr,policy=c(0,1,2,3,4,5,6,7,8,9,10)) {

prev_result = array(100,dim=c(1,t+1))


list_mult = list()
list_sum = list()
list_gain = list()
list_fixed = list()
list_policy = list()


aijMatrix = array(0, dim = c(t+1,t+1,r+1))
  for (k in 1:(r+1)) {
    for (i in 1:t) {
      aijMatrix[i,i+1,k] <- lam
      if (i > 1) {
        #if (k==4){
          #if(i==4){
            #cat('min(k-1,i-1)',min(k-1,i-1))}}
        aijMatrix[i,i-1,k] <- sum(min(k-1,i-1)*mu+(i-1)*eps)
      }
    }
    aijMatrix[t+1,t,k] <- (min(k-1,t)*mu+t*eps)
    aijMatrix[1,2,k] <- lam
}

  for (k in 1:(r+1)) {
    for (i in 1:(t+1)) {

      aijMatrix[i,i,k] <- -sum(aijMatrix[i,,k])
    }
  }

if (low_lim != up_lim){

for (val in seq(low_lim,up_lim, by = incr)){

        current_result = MDP(lam, mu, eps, numRescue=r, numTerror=t, costT=cost_terror, RC=rc, mult=val)


        if (!(identical(current_result,prev_result))) {

          list_policy = append(list_policy,list(current_result))

          steady_prob = prob(current_result, t, lam, mu, eps)

          sum = 0

          for (i in 1:length(current_result)){
            sum = sum + current_result[i]*steady_prob[i]
          }

          sum = sum*mu

          list_mult = append(list_mult,val)
          list_sum = append(list_sum,sum)

          current = current_result + 1

          A = array(0, dim = c(11,11))
          for (i in 1:length(current)) {
          A[i,] = aijMatrix[i,,(current[i])]
          }

          # Design the cost matrix regarding policy

          q = array(0,dim=c(11,1))

          costT = cost_terror

          costR = val*costT*(mu+eps)

          for (i in 1:length(current)) {

          if (rc == 'L'){
          cost = (current[i]-1)*costR + (i-1)*eps*costT
          }

          if (rc == 'SR'){
          cost = sqrt(current[i]-1)*costR + (i-1)*eps*costT
          }

          if (rc == 'Q'){
          cost = ((current[i]-1)^2)*costR + (i-1)*eps*costT
          }

          q[i,1] = cost
          }


          newA = cbind(array(1, dim=c(t+1,1)), -A[,1:t])
          solution = solve(newA) %*% q
          gain = solution[1]

          fixed_gain = fixed(policy,val,rc,cost_terror,lam,mu,eps,t,r)

          list_gain = append(list_gain,gain)
          list_fixed = append(list_fixed,fixed_gain)



          cat('\n\n','mult:')
          print(val)
          cat('\n','Rate of success/failure:')
          print(sum)
          cat('\n','gain:')
          print(gain)

          cat('\n','gain for fixed policy:')
          print(fixed_gain)

          cat('\n','change in gain compared to fixed policy:')
          print(gain-fixed_gain)


          cat('\nRC:',rc,',','\n\n')
          cat('optimal policy:\n\n')
          print(current_result)
          cat('\nsteady_prob:\n')
          print(steady_prob)



          prev_result = current_result

        }

    }

}

else {

  current_result = MDP(lam, mu, eps, numRescue=r, numTerror=t, costT=cost_terror, RC=rc, mult=low_lim)

  list_policy = append(list_policy,list(current_result))

  steady_prob = prob(current_result, t, lam, mu, eps)

  sum = 0

  for (i in 1:length(current_result)){
            sum = sum + current_result[i]*steady_prob[i]
          }

  sum = sum*mu

  list_mult = append(list_mult,low_lim)
  list_sum = append(list_sum,sum)

  current = current_result + 1

  A = array(0, dim = c(11,11))
  for (i in 1:length(current)) {
          A[i,] = aijMatrix[i,,(current[i])]
          }

  # Design the cost matrix regarding policy

  q = array(0,dim=c(11,1))

  costT = cost_terror

  costR = low_lim*costT*(mu+eps)

  for (i in 1:length(current)) {

  if (rc == 'L'){
          cost = (current[i]-1)*costR + (i-1)*eps*costT
          }

  if (rc == 'SR'){
          cost = sqrt(current[i]-1)*costR + (i-1)*eps*costT
          }

  if (rc == 'Q'){
          cost = ((current[i]-1)^2)*costR + (i-1)*eps*costT
          }

  q[i,1] = cost
          }


          newA = cbind(array(1, dim=c(t+1,1)), -A[,1:t])
          solution = solve(newA) %*% q
          gain = solution[1]

          fixed_gain = fixed(policy,low_lim,rc,cost_terror,lam,mu,eps,t,r)

          list_gain = append(list_gain,gain)
          list_fixed = append(list_fixed,fixed_gain)



          cat('\n\n','mult:')
          print(low_lim)
          cat('\n','Rate of success/failure:')
          print(sum)
          cat('\n','gain:')
          print(gain)

          cat('\n','gain for fixed policy:')
          print(fixed_gain)

          cat('\n','change in gain compared to fixed policy:')
          print(gain-fixed_gain)


          cat('\nRC:',rc,',','\n\n')
          cat('optimal policy:\n\n')
          print(current_result)
          cat('\nsteady_prob:\n')
          print(steady_prob)



          #prev_result = current_result


}

return (list(list_mult,list_sum,list_gain,list_fixed,list_policy))

}

In [None]:
#lowest possible increment possible in R is 0.00000000001, or 10^-11



lst = mult_test(cost_terror=1,rc='L',t=10,r=10,lam=0.2763,mu=0.2829,eps=0.1718,low_lim=0,up_lim=3,incr=0.01,policy=c(0,1,2,3,4,5,6,7,8,9,10))


In [None]:
costT = 1
eps=0.1718
mu=0.2829
mult = 1
costR = mult*costT*(mu+eps)

colors = c('green','blue','red')

#vals = c('L','SR','Q')

teams = c(1:11)
#lim = length(policies)

rc = 'SR'

cost = list()


x = 1:11

jpeg(file= '/content/diff_costStructures.png')

for (i in teams){

    if (rc == 'L'){
          c = (i-1)*costR + 10*eps*costT
          cost = append(cost,c)
          }

    if (rc == 'SR'){
          c = sqrt(i-1)*costR + 10*eps*costT
          cost = append(cost,c)
          }

    if (rc == 'Q'){
          c = ((i-1)^2)*costR + 10*eps*costT
          cost = append(cost,c)
          }
  }

    cost = unlist(cost)

    idx = match(rc,vals)

    print('teams:')

    print(length(teams))

    print('cost:')
    print(length(cost))

    print('team list:')
    print(teams)

    print('cost list:')
    print(cost)

    teams = teams-1


    plot(teams, cost , xaxt = 'n', yaxt = "n", xlab = 'Total no. of rescue teams deployed' , ylab = 'Total cost incurred',type="o", col= colors[idx])


    par(new=TRUE)



# X-axis
axis(1, at = teams)

# Y-axis
axis(2, at = c(0,25,50,75,100,125,150))

grid()


dev.off()

In [None]:


####################################### create base scatter plot

colors = c('green','blue','red')

vals = c('L','SR','Q')

policies = c(1:22)
lim = length(policies)

x = seq(0,3, by = 0.1)

jpeg(file= '/content/gain_compare.png')

for (i in vals){

    lst = mult_test(cost_terror=1,rc=i,t=10,r=10,lam=0.2763,mu=0.2829,eps=0.1718,low_lim=0,up_lim=3,incr=0.01,policy=c(0,1,2,3,4,5,6,7,8,9,10))

    y = lst[[3]]

    elem = length(y)

    last_value = rep(y[elem], lim-elem)

    y = c(y,last_value)

    gain = unlist(y)

    idx = match(i,vals)

    plot(policies, gain , yaxt = "n", type="l", col= colors[idx])

    legend(x="bottomright", legend=c("Linear cost increase","Squre root cost increase", "Quadratic cost increase"),
       fill = c('green','blue','red'))


    par(new=TRUE)

}

axis(2, at=x, las=2)

grid()


dev.off()

In [None]:
mult_val = unlist(lst[[1]])
rate_of_success_or_failure = unlist(lst[[2]])
gain = unlist(lst[[3]])
fixed = unlist(lst[[4]])
policy = lst[[5]]
steady_probs = lst[6]

In [141]:
cost_terror=1
lam=0.2763
mu=0.2829
eps=0.1718
t=10

lst = mult_test(cost_terror=1,rc='Q',t=10,r=10,lam=0.2763,mu=0.2829,eps=0.1718,low_lim=0,up_lim=3,incr=0.01,policy=c(0,1,2,3,4,5,6,7,8,9,10))

policy = lst[[5]]
gain = unlist(lst[[3]])



 mult:[1] 0

 Rate of success/failure:[1] 0.1149728

 gain:[1] 0.1043949

 gain for fixed policy:[1] 0.1043949

 change in gain compared to fixed policy:[1] 0

RC: Q , 

optimal policy:

     [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11]
[1,]    0    1    2    3    4    5    6    7    8     9    10

steady_prob:
          [,1]     [,2]       [,3]        [,4]        [,5]         [,6]
[1,] 0.6765419 0.253428 0.05872865 0.009852292 0.001295232 0.0001399908
             [,7]         [,8]         [,9]        [,10]        [,11]
[1,] 1.284562e-05 1.024077e-06 7.217255e-08 4.557797e-09 2.769561e-10


 mult:[1] 0.02

 Rate of success/failure:[1] 0.1149728

 gain:[1] 0.1132787

 gain for fixed policy:[1] 0.1132787

 change in gain compared to fixed policy:[1] -1.797712e-11

RC: Q , 

optimal policy:

     [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11]
[1,]    0    1    2    3    4    5    6    7    8     9     9

steady_prob:
          [,1]     [,2]       [,3]        [,4] 

In [144]:
prob_val = 0.95

big_list = list()

count_1 = 0
for (policy_i in policy){
  count_1 = count_1 + 1
  mu_list = array(0,dim=c(1,t+1))
  #cat('policy:',policy_i,'\n')
  std_prob = prob(policy_i, t, lam, eps, mu)
  for (j in 1:11){

    if (policy_i[j] != 0){

    policy_ij = policy_i[j]
    idx = j - 1
    #cat('idx:',idx,'\n')
    #prob_val = ((policy_ij*mu)/((policy_ij*mu)+(idx*eps)))
    #cat('prob_val:',prob_val,'\n')

    mu = (prob_val*idx*eps)/((1-prob_val)*policy_ij)

    mu_list[j] = mu

    }

    else {

       mu_list[j] = 0

    }


    }
    count_2 = 0
    cat('Policy',count_1,'\n')
    for (mu in mu_list){
      count_2 = count_2 + 1
      if (count_2 != 11) {
      cat('&  \\multicolumn{1}{c|}{',mu,'} ')
    }
    else {
      cat('& ', mu_list[11], '& ')
      cat(gain[count_1] ,'\\')
      cat('\\ \\hline','\n')
    }

}
}

Policy 1 
&  \multicolumn{1}{c|}{ 0 } &  \multicolumn{1}{c|}{ 3.2642 } &  \multicolumn{1}{c|}{ 3.2642 } &  \multicolumn{1}{c|}{ 3.2642 } &  \multicolumn{1}{c|}{ 3.2642 } &  \multicolumn{1}{c|}{ 3.2642 } &  \multicolumn{1}{c|}{ 3.2642 } &  \multicolumn{1}{c|}{ 3.2642 } &  \multicolumn{1}{c|}{ 3.2642 } &  \multicolumn{1}{c|}{ 3.2642 } &  3.2642 & 0.1043949 \\ \hline 
Policy 2 
&  \multicolumn{1}{c|}{ 0 } &  \multicolumn{1}{c|}{ 3.2642 } &  \multicolumn{1}{c|}{ 3.2642 } &  \multicolumn{1}{c|}{ 3.2642 } &  \multicolumn{1}{c|}{ 3.2642 } &  \multicolumn{1}{c|}{ 3.2642 } &  \multicolumn{1}{c|}{ 3.2642 } &  \multicolumn{1}{c|}{ 3.2642 } &  \multicolumn{1}{c|}{ 3.2642 } &  \multicolumn{1}{c|}{ 3.2642 } &  3.626889 & 0.1132787 \\ \hline 
Policy 3 
&  \multicolumn{1}{c|}{ 0 } &  \multicolumn{1}{c|}{ 3.2642 } &  \multicolumn{1}{c|}{ 3.2642 } &  \multicolumn{1}{c|}{ 3.2642 } &  \multicolumn{1}{c|}{ 3.2642 } &  \multicolumn{1}{c|}{ 3.2642 } &  \multicolumn{1}{c|}{ 3.2642 } &  \multicolumn{1}{c|}{ 3.

In [132]:
#prob_val = 0.50

#big_list = list()

count_1 = 0
for (policy_i in policy){
  count_1 = count_1 + 1
  prob_list = array(0,dim=c(1,t+1))
  #cat('policy:',policy_i,'\n')
  #std_prob = prob(policy_i, t, lam, eps, mu)
  for (j in 1:11){

    if (policy_i[j] != 0){

    policy_ij = policy_i[j]
    idx = j - 1
    #cat('idx:',idx,'\n')
    prob_val = ((policy_ij*mu)/((policy_ij*mu)+(idx*eps)))
    #cat('prob_val:',prob_val,'\n')

    #mu = (prob_val*idx*eps)/((1-prob_val)*policy_ij)

    prob_list[j] = prob_val

    }

    else {

       prob_list[j] = 0

    }


    }
    count_2 = 0
    cat('Policy',count_1,'\n')
    for (prob_i in prob_list){
      count_2 = count_2 + 1
      if (count_2 != 11) {
      cat('&  \\multicolumn{1}{c|}{',prob_i,'} ')
    }
    else {
      cat('& ', prob_list[11], '& ')
      cat(gain[count_1] ,'\\')
      cat('\\ \\hline','\n')
    }

}
}

Policy 1 
&  \multicolumn{1}{c|}{ 0 } &  \multicolumn{1}{c|}{ 0.6221685 } &  \multicolumn{1}{c|}{ 0.6221685 } &  \multicolumn{1}{c|}{ 0.6221685 } &  \multicolumn{1}{c|}{ 0.6221685 } &  \multicolumn{1}{c|}{ 0.6221685 } &  \multicolumn{1}{c|}{ 0.6221685 } &  \multicolumn{1}{c|}{ 0.6221685 } &  \multicolumn{1}{c|}{ 0.6221685 } &  \multicolumn{1}{c|}{ 0.6221685 } &  0.6221685 & 0.1043949 \\ \hline 
Policy 2 
&  \multicolumn{1}{c|}{ 0 } &  \multicolumn{1}{c|}{ 0.6221685 } &  \multicolumn{1}{c|}{ 0.6221685 } &  \multicolumn{1}{c|}{ 0.6221685 } &  \multicolumn{1}{c|}{ 0.6221685 } &  \multicolumn{1}{c|}{ 0.6221685 } &  \multicolumn{1}{c|}{ 0.6221685 } &  \multicolumn{1}{c|}{ 0.6221685 } &  \multicolumn{1}{c|}{ 0.6221685 } &  \multicolumn{1}{c|}{ 0.6221685 } &  0.5971014 & 0.1132787 \\ \hline 
Policy 3 
&  \multicolumn{1}{c|}{ 0 } &  \multicolumn{1}{c|}{ 0.6221685 } &  \multicolumn{1}{c|}{ 0.6221685 } &  \multicolumn{1}{c|}{ 0.6221685 } &  \multicolumn{1}{c|}{ 0.6221685 } &  \multicolumn{1}{c|}

In [98]:
mu_list[-1]

In [94]:
cat('Policy1','\n')
cat('&','\n')


cat('\\multicolumn){1}{c|}{')
0
cat('}       &')

#\
cat('multicolumn){1}{c|}{')
0
cat('}       &')

#\
cat('multicolumn){1}{c|}{')
0
cat('}       &')

#\
cat('multicolumn){1}{c|}{')
0
cat('}       &')

#\
cat('multicolumn){1}{c|}{')
0
cat('}       &')

#\
cat('multicolumn){1}{c|}{')
0
cat('}       &')

#\
cat('multicolumn){1}{c|}{')
0
cat('}       &')

#\
cat('multicolumn){1}{c|}{')
0
cat('}       &')

#\
cat('multicolumn){1}{c|}{')
0
cat('}       &')

#\
cat('multicolumn){1}{c|}{')
0
cat('}       &')

#\
cat('multicolumn){1}{c|}{')
0
cat('}       &')


Policy1 
& 
\multicolumn){1}{c|}{

}       &multicolumn){1}{c|}{

}       &multicolumn){1}{c|}{

}       &multicolumn){1}{c|}{

}       &multicolumn){1}{c|}{

}       &multicolumn){1}{c|}{

}       &multicolumn){1}{c|}{

}       &multicolumn){1}{c|}{

}       &multicolumn){1}{c|}{

}       &multicolumn){1}{c|}{

}       &multicolumn){1}{c|}{

}       &

In [88]:
letters[1:8]

In [None]:
prob = 0/1
if (is.nan(prob)){
  print('yes')
  }

In [None]:
prob

In [None]:
t = 10
r = 10
lam=0.2763
mu=0.2829
eps=0.1718


aijMatrix = array(0, dim = c(t+1,t+1,r+1))
  for (k in 1:(r+1)) {
    for (i in 1:t) {
      aijMatrix[i,i+1,k] <- lam
      if (i > 1) {
        #if (k==4){
          #if(i==4){
            #cat('min(k-1,i-1)',min(k-1,i-1))}}
        aijMatrix[i,i-1,k] <- sum(min(k-1,i-1)*mu+(i-1)*eps)
      }
    }
    aijMatrix[t+1,t,k] <- (min(k-1,t)*mu+t*eps)
    aijMatrix[1,2,k] <- lam
}

  for (k in 1:(r+1)) {
    for (i in 1:(t+1)) {

      aijMatrix[i,i,k] <- -sum(aijMatrix[i,,k])
    }
  }

i

In [None]:
aijMatrix

In [None]:


####################################### create base scatter plot

colors = c('green','blue','red')

vals = c('L','SR','Q')

policies = c(1:22)
lim = length(policies)

x = seq(0,3, by = 0.1)

jpeg(file= '/content/gain_compare.png')

for (i in vals){

    lst = mult_test(cost_terror=1,rc=i,t=10,r=10,lam=0.2763,mu=0.2829,eps=0.1718,low_lim=0,up_lim=3,incr=0.01,policy=c(0,1,2,3,4,5,6,7,8,9,10))

    y = lst[[3]]

    elem = length(y)

    last_value = rep(y[elem], lim-elem)

    y = c(y,last_value)

    gain = unlist(y)

    idx = match(i,vals)

    plot(policies, gain , yaxt = "n", type="l", col= colors[idx])

    legend(x="bottomright", legend=c("Linear cost increase","Squre root cost increase", "Quadratic cost increase"),
       fill = c('green','blue','red'))


    par(new=TRUE)

}

axis(2, at=x, las=2)

grid()


dev.off()

In [None]:


####################################### create base scatter plot

colors = c('green','blue','red')

vals = c('L','SR','Q')

policies = c(1:22)
lim = length(policies)

x = seq(0,3, by = 0.1)

jpeg(file= '/content/gain_compare.png')

for (i in vals){

    lst = mult_test(cost_terror=1,rc=i,t=10,r=10,lam=0.2763,mu=0.2829,eps=0.1718,low_lim=0,up_lim=3,incr=0.01,policy=c(0,1,2,3,4,5,6,7,8,9,10))

    y = lst[[3]]

    elem = length(y)

    last_value = rep(y[elem], lim-elem)

    y = c(y,last_value)

    gain = unlist(y)

    idx = match(i,vals)

    plot(policies, gain , yaxt = "n", type="l", col= colors[idx])

    legend(x="bottomright", legend=c("Linear cost increase","Squre root cost increase", "Quadratic cost increase"),
       fill = c('green','blue','red'))


    par(new=TRUE)

}

axis(2, at=x, las=2)

grid()


dev.off()

In [None]:
plot(mult_val,gain,'s')

# overlay line plot

#points(mult_val, fixed,'s',col='green')

In [None]:
plot(mult_val,fixed,'s')

In [None]:
 #THE FOLLOWING CAN BE AN ALTERNATIVE TO THE mult_test
 #tried R's apply function to avoid the delay in for loops. This doesn't work properly.
 #For some reason, runif is not creating the assigned number of random values, but less.


 mult_vals = runif(10000000,0,0.5320000000000000284217)

  mult_vals = sort(mult_matrix, decreasing=FALSE)

  mult_matrix = matrix(mult_vals,nrow=length(mult_vals))

  MDP_mult <- function(val){
  MDP(lam=0.2763, mu=0.2829, eps=0.1718, numRescue=10, numTerror=10, costT=1, RC='L', mult=val)
}

policy_matrix <- apply(mult_matrix, 1, MDP_mult)

print(unique(policy_matrix,MARGIN = 2))

In [None]:
#prob_search algo
#the if-else statements don't seem to be working logically for the actual prob.NEED TO FIX!
#check documentation for setequal(). You are probably applying it incorrectly.

#update1 (27TH JUNE): Rather than setequal, use double equals between arrays to compare, and then use all() function to check if all same index elements are equal between both arrays.

#update2 (27TH JUNE) : For the actual prob, guess one possible intermediary policy between two found, and run the following to get close enough upper and lower bounds.
#Then using those bounds, use the mult_test alternative to check for a large number of values in between bounds. Might work, tho clunky af.

#update3(28th JUNE) :

#1. FOR MDP(lam=0.2763, mu=0.2829, eps=0.1718, numRescue=10, numTerror=10, costT=1, RC='L', mult=val), val = 0.5312087939634377864451 yields (0,1,2,.......,9)
#2. Need to figure out how does R handle large numbers, as in how many digits does it consider after the decimal point when doing any operation
#3. If possible, convert the following to apply method, cause for loops are still clunky when the lower and upper bounds get closer together.
#4. For 2, check this link: https://floating-point-gui.de/basic/

#update3(2nd July):

#1. R is precise roughly till 16 digits after the decimal point. Rest is noise.
#2. Check algo captured on iphone camera, try it out.
#3. Is there any way to figure out unique column indices from a R column, not just the columns themselves (using unique() with MARGIN=2 applied on the matrix)?

low = 0.5312087939634377864451
up = 0.5312087939634614341955

#go = 0

prev = 'a'

for (n in 1:3){

val = runif(1,low,up)


result = MDP(lam=0.2763, mu=0.2829, eps=0.1718, numRescue=10, numTerror=10, costT=1, RC='L', mult=val)

array_1 = array(c(0,1,2,3,4,5,6,7,8,9,10))
array_2 = array(c(0,1,2,3,4,5,6,7,8,9,0))



if (all(result == matrix(array_1,nrow=1,ncol=length(array_1)))){
  low = val
  if (!prev == 'a'){
    up = prev
  }
  next
}

else if (all(result == matrix(array_2,nrow=1,ncol=length(array_2)))){
  if (!prev == 'a'){
    low = prev
  }
  up = val
  next
}

else {
  cat('inter_MULT: ')
  print(val,digits=16)
  cat('\ninter_RESULT: ',result)
  cat('\n\n')
  #go = 1
 next

}



# #if (setequal(result == array(c(0,1,2,3,4,5,6,7,8,9,9)))){
# #  cat('JACKPOT:\n\n')
# #  print(val,digits=22)
# #  cat('result: ',result)
# #  cat('\n\n')
#   go = 1
#   break
# }

# # else {
# #   if (setequal(result,array(c(0,1,2,3,4,5,6,7,8,9,10)))) {
# #   low = val
# #   if (!prev == 'a'){
# #     up = prev
# #   }
# #   next
# # }

# else if (setequal(result,array(c(0,1,2,3,4,5,6,7,8,9,0)))){
#   if (!prev == 'a'){
#     low = prev
#   }
#   up = val
#   next
# }

# else {
#   next
# }

#}

prev = val


}

#if (go == 0) {
cat('low: ')
print(low,digits=16)
cat('\n\n')

cat('val: ')
print(val,digits=16)
cat('\n\n')

cat('up: ')
print(up,digits=)
cat('\n\n')

cat('result',result)
#}

In [None]:
MDP_mult <- function(val){
  MDP(lam=0.2763, mu=0.2829, eps=0.1718, numRescue=10, numTerror=10, costT=1, RC='L', mult=val)
}

In [None]:
low = 0
up = 3

vals = unique(sort(runif(10,low,up),decreasing = FALSE))
mat = matrix(vals,nrow=length(vals))
policy <- apply(mat, 1, MDP_mult)
print(mat)
print(policy)

In [None]:
sth = unique(policy,MARGIN=2)

In [None]:
sth

In [None]:
new = policy == sth[,1]

In [None]:
num_uniqCols = dim(sth)[2]



In [None]:
which(policy==sth)

Due to the computational limitations in R, to run large and complicated while/for loops are next to impossible. As such, to find the mult value that yields our desired policy for this toy problem (0,1,1) under any cost structure, we can follow these steps:

1. First run the mult_test function to get tentative mult values for (0,1,2) and (0,1,0) while (0,1,1) doesn't yield.

2. Next, run prob_search algo with mult_(0,1,2) as low and mult_(0,1,0) as up value. While choosing the number of iterations under the for loop running for this algo, start from an arbitrary value (i.e 100) and lower the value by i.e 10 if code doesn't finish running in i.e 1 second. Keep doing this till your iteration value has been lowered to 1. The final low and up values generated should be quite closer to each other now.

3. From this point, I am kind of blank.

Actual problem has more than one intermediary policy. How to track them all without an exhaustive search, and with prob search ? I can track assuming there are as many intermediaries as I think there should be, but what happens when I am wrong ?