# Robot Run

# TODO
* debugging
  * https://medium.com/@anthonypjshaw/python-3-7s-new-builtin-breakpoint-a-quick-tour-4f1aebc444c
  * %debug
  * https://davidhamann.de/2017/04/22/debugging-jupyter-notebooks/
* Noch einfacheres Maze for mini max
* minimax
    * why none, none as terminal state and depth
    * Path to best move wie bei Depth first
* alpha beta umsetzen

Local Installation:
* https://www.anaconda.com/download/
* git clone git@github.com:DJCordhose/haw.git
* cd haw/notebooks
* jupyter notebook

Or clone on Azure Notebooks
* https://notebooks.azure.com/djcordhose/libraries/ai-haw

## The Game
In a certain terrain a Robot (R) plays against a Human player (H)
* Both Human and Robot try to reach a goal which is at the same distance from both of them
* Blocks (B) and both players block each other

In [3]:
terrain = [
    ["_", "R", "_", "_"],
    ["H", "_", "B", "_"],
    ["_", "_", "B", "_"],
    ["B", "_", "G", "_"]   
]

## Basic Game Playing Code

In [102]:
from copy import deepcopy
from math import sqrt, pow

robot_symbol = 'R'
robot_win_symbol = '*'
goal_symbol = 'G'
human_symbol = 'H'
human_win_symbol = '#'
blank_symbol = '_'

def field_contains(state, symbol):
    for row in state:
        for field in row:
            if field == symbol:
                return True
    return False   

def is_robot_win(state):
    return field_contains(state, robot_win_symbol)  

def is_human_win(state):
    return field_contains(state, human_win_symbol)  

def as_string(state):
    s = ''
    for row in state:
        row_string = ''
        for field in row:
            row_string += field + ' '
        s += row_string + '\n'
    return s

def locate(state, what):
    for row_index, row in enumerate(state):
        for column_index, field in enumerate(row):
            if field == what:
                return (row_index, column_index)

def check_position(state, position):
    max_row = len(state) - 1
    max_column = len(state[0]) - 1
    if position[0] < 0 or position[0] > max_row or position[1] < 0 or position[1] > max_column:
        return False
    symbol = state[position[0]][position[1]]
    if symbol != blank_symbol and symbol != goal_symbol:
        return False
    return True
            
def player_moves(state, player_symbol):
    player = locate(state, player_symbol)
    left = (player[0], player[1] - 1)
    right = (player[0], player[1] + 1)
    up = (player[0] - 1, player[1])
    down = (player[0] + 1, player[1])
    valid_moves = [move for move in (left, right, down, up) if check_position(state, move)]
    return valid_moves
            
def place_player(state, player, player_symbol, player_win_symbol):
    old_player = locate(state, player_symbol)
    new_state = deepcopy(state)
    new_state[old_player[0]][old_player[1]] = blank_symbol
    if new_state[player[0]][player[1]] == goal_symbol:
        new_state[player[0]][player[1]] = player_win_symbol
    else:
        new_state[player[0]][player[1]] = player_symbol
    return new_state

def expand(state, player_symbol, player_win_symbol):
    valid_moves = player_moves(state, player_symbol)
    new_states = [(position, place_player(state, position, player_symbol, player_win_symbol)) for position in valid_moves]
    return new_states

def expand_robot(state):
    return expand(state, robot_symbol, robot_win_symbol)

def expand_human(state):
    return expand(state, human_symbol, human_win_symbol)

def distance(pos1, pos2):
    if pos1 and pos2:
        return sqrt(pow(pos1[0] - pos2[0], 2) + pow(pos1[1] - pos2[1], 2))
    else:
        return 0
    
def estimate_state(state):
    goal_position = locate(state, goal_symbol)

    robot_position = locate(state, robot_symbol)
    human_position = locate(state, human_symbol)
    
    robot_distance = distance(robot_position, goal_position)
    human_distance = distance(human_position, goal_position)

    estimated_value = human_distance - robot_distance 
    return estimated_value

## Depth first search as a recursive solution

In [103]:
# https://en.wikipedia.org/wiki/Depth-first_search
# 1  procedure DFS(G,v):
# 2      label v as discovered
# 3      for all edges from v to w in G.adjacentEdges(v) do
# 4          if vertex w is not labeled as discovered then
# 5              recursively call DFS(G,w)

def depth_first_search(state, max_depth=10, debug=False, closed_list=[], depth = 0, path=[]):
    if as_string(state) in closed_list or depth > max_depth:
        return None
    
    if debug:
        print('depth', depth)
        print('closed_list', closed_list)
        print('path', path)
        print('state', as_string(state))
        
    if is_robot_win(state):
        return path
    
    closed_list = closed_list + [as_string(state)]
    
    for move, next_state in expand_robot(state):
        new_path = path + [move]
        res = depth_first_search(next_state, max_depth, debug, closed_list, depth + 1, new_path)
        if res:
            return res

### This quite obviously is not the shortest path, but who cares, as long as your robot wins

In [104]:
depth_first_search(terrain)

[(0, 2), (0, 3), (1, 3), (2, 3), (3, 3), (3, 2)]

## This is not good enough, because now we have an adversary

In [144]:
# https://en.wikipedia.org/wiki/Minimax
# robot is maximizer, human is minimizer

def mini_max(state, is_robot_move=True, max_depth=10, debug=False, depth = 0):
    if debug:
        print('-----')
        print('inspected state')
        print(as_string(state))
        print('is_robot_move', is_robot_move)
        print('depth', depth)
        
    if depth > max_depth:
        estimated_value = estimate_state(state)
        if debug:
            print('estimation at edge {}'.format(estimated_value))
        return (estimated_value, None, state, depth)
    
    if is_robot_win(state):
        if debug:
            print('robot win detected')
        return (float('inf'), None, state, depth)
    
    if is_human_win(state):
        if debug:
            print('human win detected')
        return (float('-inf'), None, state, depth)
    
    if debug:
        print('*** what moves are possible from here?')
    if is_robot_move:
        best_value = float('-inf')
        best_move = None
        best_terminal_state = None
        best_terminal_depth = None
        for move, next_state in expand_robot(state):
            value_for_move, _, terminal_state, terminal_depth =\
                mini_max(next_state, is_robot_move=False, max_depth=max_depth, debug=debug, depth = depth + 1)
            if value_for_move > best_value:
                best_value = value_for_move
                best_move = next_state
                best_terminal_state = terminal_state
                best_terminal_depth = terminal_depth
        if debug:
            print('*** completed looking at moves on level {}'.format(depth))
            print('best robot score', best_value)
            print('best robot move', as_string(best_move))
        return (best_value, best_move, best_terminal_state, best_terminal_depth)
    else:
        best_value = float('inf')
        best_move = None
        best_terminal_state = None
        best_terminal_depth = None
        for move, next_state in expand_human(state):
            value_for_move, _, terminal_state, terminal_depth =\
                mini_max(next_state, is_robot_move=True, max_depth=max_depth, debug=debug, depth = depth + 1)
            if value_for_move < best_value:
                best_value = value_for_move
                best_move = next_state
                best_terminal_state = terminal_state
                best_terminal_depth = terminal_depth
        if debug:
            print('*** completed looking at moves on level {}'.format(depth))
            print('best human score', best_value)
            print('best human move', as_string(best_move))
        return (best_value, best_move, best_terminal_state, best_terminal_depth)

In [145]:
terrain

[['_', 'R', '_', '_'],
 ['H', '_', 'B', '_'],
 ['_', '_', 'B', '_'],
 ['B', '_', 'G', '_']]

### It seems like who ever starts wins

In [146]:
mini_max(terrain)

(inf,
 [['_', '_', '_', '_'],
  ['H', 'R', 'B', '_'],
  ['_', '_', 'B', '_'],
  ['B', '_', 'G', '_']],
 None,
 None)

In [148]:
mini_max(terrain, is_robot_move=False)

(-inf,
 [['_', 'R', '_', '_'],
  ['_', 'H', 'B', '_'],
  ['_', '_', 'B', '_'],
  ['B', '_', 'G', '_']],
 None,
 None)

In [133]:
terrain

[['_', 'R', '_', '_'],
 ['H', '_', 'B', '_'],
 ['_', '_', 'B', '_'],
 ['B', '_', 'G', '_']]

In [134]:
mini_max(terrain, max_depth = 1, debug=True)

-----
inspected state
_ R _ _ 
H _ B _ 
_ _ B _ 
B _ G _ 

is_robot_move True
depth 0
*** what moves are possible from here?
-----
inspected state
R _ _ _ 
H _ B _ 
_ _ B _ 
B _ G _ 

is_robot_move False
depth 1
*** what moves are possible from here?
-----
inspected state
R _ _ _ 
_ H B _ 
_ _ B _ 
B _ G _ 

is_robot_move True
depth 2
estimation at edge -1.3694832979641993
-----
inspected state
R _ _ _ 
_ _ B _ 
H _ B _ 
B _ G _ 

is_robot_move True
depth 2
estimation at edge -1.3694832979641993
*** completed looking at moves on level 1
best human score -1.3694832979641993
best human move R _ _ _ 
_ H B _ 
_ _ B _ 
B _ G _ 

-----
inspected state
_ _ R _ 
H _ B _ 
_ _ B _ 
B _ G _ 

is_robot_move False
depth 1
*** what moves are possible from here?
-----
inspected state
_ _ R _ 
_ H B _ 
_ _ B _ 
B _ G _ 

is_robot_move True
depth 2
estimation at edge -0.7639320225002102
-----
inspected state
_ _ R _ 
_ _ B _ 
H _ B _ 
B _ G _ 

is_robot_move True
depth 2
estimation at edge -0.76393202

(0.0,
 [['_', '_', '_', '_'],
  ['H', 'R', 'B', '_'],
  ['_', '_', 'B', '_'],
  ['B', '_', 'G', '_']])

## We are checking on a lot of obviously stupid moves
* if we did not we could look at more promising moves instead
* this of course would only pay off in larger mazes

In [None]:
# https://en.wikipedia.org/wiki/Alpha%E2%80%93beta_pruning
# function alphabeta(state, depth, phase, alpha = - Number.MAX_VALUE, beta = Number.MAX_VALUE, maxPlayer=true) {