In [None]:
import numpy as np

# Define grid size
rows, cols = 5, 5

# Initialize grid
grid = np.zeros((rows, cols))
# Define passenger pickup points and their rewards
regular_reward = 20
premium_reward = 30


locations={'A':(0,1),'B':(0,3),'C':(4,0),'D':(4,4)}

# Define restricted area (negative reward)
restricted_area = [(0, 2),(1,2),(2,2)]


# Define live-in reward
live_in_reward = -0.1

# Define possible actions: Up, Down, Left, Right
actions = [(-1, 0), (1, 0), (0, -1), (0, 1)]

# Define transition probabilities
intended_direction_probability = 0.8
unintended_direction_probability = 0.1  #its for one direction

start_position=[(3,1)]

# Define discount factor
gamma = 0.9


# Perform Value Iteration
def valueIterationAlogirthm(grid,restricted_area,pickup_points):
  num_iterations = 100
  for k in range(num_iterations):
      new_grid = np.copy(grid)
      for i in range(rows):
          for j in range(cols):
              if (i, j) not in  restricted_area and (i,j) not in pickup_points:
                  max_value = float('-inf')
                  for action in actions:
                      next_i, next_j = i + action[0], j + action[1]
                      if 0 <= next_i < rows and 0 <= next_j < cols:
                          if (next_i,next_j)  in pickup_points:
                            intended_value = grid[next_i, next_j] * 0.9
                          else:
                            intended_value = grid[next_i, next_j] * intended_direction_probability
                          unintended_value = sum(grid[i + a[0], j + a[1]] if 0 <= i + a[0] < rows and 0 <= j + a[1] < cols else 0 for a in actions if a != action) * unintended_direction_probability
                          value = (intended_value + unintended_value)* gamma
                          value += live_in_reward
                          if value > max_value:
                              max_value = value
                  new_grid[i, j] = max_value
      if np.max(new_grid-grid)<0.01:
        break
      grid = np.copy(new_grid)
  return grid


#Main function
def reinforcementAlgorithm(pickup_points):
  grid = np.zeros((rows, cols))
  pickups=[]
  for point,req in pickup_points.items():
    if req['type'] =='R':
      grid[req['pos']] = regular_reward
    else:
       grid[req['pos']] = premium_reward
    pickups.append(req['pos'])
  print("pickups",pickups)
  for i in restricted_area:
    grid[i] = -10
  value_grid=valueIterationAlogirthm(grid,restricted_area,pickups)
  return value_grid,pickups

In [None]:
#given grid this functions returns taxi path from its start location to pickuppoint
def getmeTaxiPath(start_location,grid,pickup_points):
  path = [start_location]
  while True:
    i=start_location[0]
    j=start_location[1]
    if start_location in pickup_points:
      break
    else:
      max_action = np.argmax([grid[i + a[0], j + a[1]] if 0 <= i + a[0] < rows and 0 <= j + a[1] < cols else 0  for a in actions])
      start_location=(i + actions[max_action][0],
          j + actions[max_action][1])
      path.append(start_location)
  return path

In [None]:
#R1 The agent should pick up a regular customer from position A. In this scenario only one customer has made a request to the agent. The taxi starts in position (2,2). To demonstrate that your code is working,
#produce the optimal policy.
pickup_points={'A':{'pos':(0,1),'type':'R'}}
value_grid,pickups=reinforcementAlgorithm(pickup_points)
print(value_grid)
start_location=(3,1)
print("TAXI PATH------->",getmeTaxiPath(start_location,value_grid,pickups))

pickups [(0, 1)]
[[ 17.50044097  20.         -10.           2.57629282   3.98317587]
 [ 15.5604549   17.88438051 -10.           4.47956492   5.34885679]
 [ 13.42972907  14.26599401 -10.           6.6197475    6.51965601]
 [ 11.58068439  13.11781668  10.12706305   9.21247157   7.69780117]
 [  9.23069603  11.0289028    9.48463814   8.13760122   6.45189901]]
TAXI PATH-------> [(3, 1), (2, 1), (1, 1), (0, 1)]


In [None]:
#R2 The agent should pick up a premium customer from position B. In this scenario only one customer has
#made a request to the agent. The taxi’s start position is (2,2). To demonstrate that your code is working,
#visualize the optimal policy using your preferred method.

pickup_points={'B':{'pos':(0,3),'type':'R'}}
value_grid,pickups=reinforcementAlgorithm(pickup_points)
print(value_grid)
start_location=(3,1)
print("TAXI PATH--------->",getmeTaxiPath(start_location,value_grid,pickups))


pickups [(0, 3)]
[[  3.98317587   2.57629282 -10.          20.          17.50044097]
 [  5.34885679   4.47956492 -10.          17.88438051  15.5604549 ]
 [  6.51965601   6.6197475  -10.          14.26599401  13.42972907]
 [  7.69780117   9.21247157  10.12706305  13.11781668  11.58068439]
 [  6.45189901   8.13760122   9.48463814  11.0289028    9.23069603]]
TAXI PATH---------> [(3, 1), (3, 2), (3, 3), (2, 3), (1, 3), (0, 3)]


In [None]:
#R3  The agent should pick up a premium customer from position B. In this scenario two customers have
#made requests to the agent, a regular customer at point A and a premium customer at position B. The
#taxi’s start position is (2,2). To demonstrate that your code is working, visualize the optimal policy using
#your preferred method.
pickup_points={'B':{'pos':(0,3),'type':'P'},'A':{'pos':(0,1),'type':'R'}}
value_grid,pickups=reinforcementAlgorithm(pickup_points)
print(value_grid)
start_location=(3,1)
print("TAXI PATH ..........",getmeTaxiPath(start_location,value_grid,pickups))

pickups [(0, 3), (0, 1)]
[[ 17.50406374  20.         -10.          30.          26.35362045]
 [ 15.60070278  17.91276822 -10.          27.48162527  23.92911475]
 [ 13.65025909  14.54114797 -10.          22.53338371  20.7845696 ]
 [ 13.43375309  15.73696013  16.80111895  20.84688487  18.08443914]
 [ 10.88812643  13.58197811  15.2996238   17.5904421   14.50395149]]
TAXI PATH .......... [(3, 1), (3, 2), (3, 3), (2, 3), (1, 3), (0, 3)]


In [None]:
#R4 : Run 1000 episodes with random locations of customers. At any given episode, a customer request
#originating at any of the 4 pickup points is generated at random. A further request (at a point also chosen
#at random but different from the first chosen point) may happen with probability 0.6. When two requests
#are received, one of these two (randomly chosen) requests happen to be from a premium customer with
#probability 0.3. The taxi position should also be chosen randomly from any location in the grid except for
#position R.
import random

index_locations={0:'A',1:'B',2:'C',3:'D'}
customer_type={0:'P',1:'R'}
probability_second_request=0.6

#genrate random requests of customers with probabilites mentioned
def generateRequest():
  requests={}
  value=random.randint(0,3)
  customer_pickUp_point=index_locations[value]
  customer_coordinates=locations[customer_pickUp_point]
  cust_type1=customer_type[random.randint(0,1)]
  cust_type2=''
  requests[customer_pickUp_point]={'pos':customer_coordinates,'type': cust_type1}
  if(random.random()<probability_second_request):
    exclude=[value]
    numbers = [num for num in range(0, 4) if num not in exclude]
    value=random.choice(numbers)
    customer_pickUp_point=index_locations[value]
    customer_coordinates=locations[customer_pickUp_point]
    if random.random() < 0.3:
             cust_type2 = 'P'
    else:
      cust_type2 = 'R'
    requests[customer_pickUp_point]={'pos':customer_coordinates,'type': cust_type2}
  return requests,[cust_type1,cust_type2]


# randomly generate taxi start location
def generateRandomTaxiStart_location():
   valid_cells = [(i, j) for i in range(4) for j in range(4) if (i, j) not in  restricted_area]
   random_value_index=random.randint(0,len(valid_cells)-1)
   return valid_cells[random_value_index]

count=0 # no of times premium customer has been preferred
number_times_request=0 # no of times two requests of Premium and Regular customer request has been sent
for i in range(0,1000):
  print("Episode..............", i)
  start_location=generateRandomTaxiStart_location()
  customer_requests,type_cust=generateRequest()
  value_grid,pickups=reinforcementAlgorithm(customer_requests)
  print(value_grid)
  current_location=getmeTaxiPath(start_location,value_grid,pickups)
  start_location=current_location[len(current_location)-1]
  if(len(pickups)==2 and 'P' in type_cust and 'R' in type_cust):
    number_times_request+=1
    tuple_values=customer_requests.values()
    for i in tuple_values:
      if i['pos']==start_location and i['type']=='P':
        count+=1

print("fraction in which agent chooses premium over regular----->",count/number_times_request)







[1;30;43mStreaming output truncated to the last 5000 lines.[0m
 [ 16.72413857  21.08420515  24.65254524  28.88409698  30.        ]]
Episode.............. 286
pickups [(0, 3), (4, 4)]
[[  6.41201552   5.19738073 -10.          20.          17.53342809]
 [  8.40835967   7.80831962 -10.          18.00043765  15.92705108]
 [ 10.04171315  10.54113212 -10.          15.18923468  16.5415391 ]
 [ 11.72286868  13.79897601  14.7897295   18.16734973  19.2237938 ]
 [ 10.93714031  13.86539786  16.30454465  19.20246013  20.        ]]
Episode.............. 287
pickups [(4, 0)]
[[ 16.02982357  13.65437975 -10.           9.53977478  10.3973828 ]
 [ 20.69648827  18.34957234 -10.          13.33884779  13.40081864]
 [ 24.58658463  22.58135395 -10.          17.05589629  15.78405864]
 [ 28.87743015  27.38485856  22.8650435   21.43724877  18.26038478]
 [ 30.          28.88322817  24.65100652  21.08306843  16.72209062]]
Episode.............. 288
pickups [(0, 3)]
[[  3.98317587   2.57629282 -10.          20.  

In [None]:
!jupyter nbconvert --to pdf /content/AI_Project_3.ipynb

[NbConvertApp] Converting notebook /content/AI_Project_3.ipynb to pdf
[NbConvertApp] Writing 317595 bytes to notebook.tex
[NbConvertApp] Building PDF
[NbConvertApp] Running xelatex 3 times: ['xelatex', 'notebook.tex', '-quiet']
[NbConvertApp] Running bibtex 1 time: ['bibtex', 'notebook']
[NbConvertApp] PDF successfully created
[NbConvertApp] Writing 233795 bytes to /content/AI_Project_3.pdf
