##### Silkroad Online Navigation System based on Model-free ML

In [1]:
import sys
import random
from time import sleep
# Include custom modules for additional functionality
sys.path.append("./modules")
from functions import *

# --- Initial Configuration ---

# Set initial probabilities for each action
action_probabilities = {'UP': 0.25, 'DOWN': 0.25, 'LEFT': 0.25, 'RIGHT': 0.25}

# Define configuration parameters
obstacle_penalty = 0.03  # Penalty value for hitting an obstacle
proximity_threshold = 20  # Threshold to determine if the target is reached
max_stuck_actions = 4    # Maximum actions allowed without changing position

# Load historical data and previous states
action_history = load_action_history('action_history.pkl')
detected_obstacles = load_obstacle_points('obstacle_points.pkl')
successful_routes = load_successful_routes('successful_routes.pkl')

# Define initial and target positions
current_position = (0, 0)  # Starting position
target_position = (6435, 882)  # Target position

# Variables to track the navigation progress
initial_distance_to_target = 0
current_route = []
steps_taken = 0
stuck_counter = 0

# --- Main Navigation Loop ---

try:
    while True:
        sleep(0.1)  # Short pause for each step

        # Increment step count and choose an action based on the history
        steps_taken += 1
        action = select_action_based_on_history(action_probabilities, action_history, current_position, target_position)
        x_coord, y_coord = send_command(action)  # Send command and receive new position
        new_position = (x_coord, y_coord)

        # Calculate distance to target
        distance_to_target = ((target_position[0] - x_coord)**2 + (target_position[1] - y_coord)**2)**0.5
        print(f'Distance to target: {distance_to_target}')

        # Check if the character has arrived at the target
        if distance_to_target <= proximity_threshold:
            print(f"Character has arrived at the destination: {new_position}")
            successful_routes.append(list(current_route))  # Record the successful route
            break

        # Check for and handle obstacles
        if new_position == current_position:
            stuck_counter += 1
            if stuck_counter >= max_stuck_actions:
                detected_obstacles.add(current_position)  # Record new obstacle
                stuck_counter = 0  # Reset counter after detecting obstacle
        else:
            stuck_counter = 0  # Reset counter if position changes

        # Adjust action probabilities
        current_step = len(current_route)
        action_patterns = analyze_successful_routes(successful_routes)
        action_probabilities = adjust_probabilities_based_on_success(action_probabilities, action_patterns, current_step)
        action_probabilities = avoid_obstacles(action_probabilities, current_position, detected_obstacles)

        # Calculate and update rewards
        reward = calculate_reward(current_position, target_position, new_position, stuck_counter >= max_stuck_actions, action_history)
        action_probabilities = update_probabilities(action_probabilities, action, reward)

        # Record action and update position
        record_action_outcome(action_history, current_position, target_position, action, reward)
        print(f"Action: {action}, Reward: {reward}, New Position: {new_position}, Probabilities: {action_probabilities}")
        last_action = action  # Track the last action for penalty adjustment
        current_position = new_position  # Update current position

except KeyboardInterrupt:
    print("Navigation stopped by the user.")

# --- Post-Execution Actions ---

print(f'Initial distance: {initial_distance_to_target}, Steps taken: {steps_taken}')
# Save all relevant data
save_action_history(action_history, 'action_history.pkl')
save_obstacle_points(detected_obstacles, 'obstacle_points.pkl')
save_successful_routes(successful_routes, 'successful_routes.pkl')

Distance to target: 127.4401820463232
Action: UP, Reward: 5, New Position: (6535, 803), Probabilities: {'UP': 0.4999999999999999, 'DOWN': 0.16666666666666666, 'LEFT': 0.16666666666666666, 'RIGHT': 0.16666666666666666}
Distance to target: 127.283148923964
Action: UP, Reward: 5, New Position: (6534, 802), Probabilities: {'UP': 0.7499999999999999, 'DOWN': 0.08333333333333333, 'LEFT': 0.08333333333333333, 'RIGHT': 0.08333333333333333}
Distance to target: 127.283148923964
Action: UP, Reward: -0.05, New Position: (6534, 802), Probabilities: {'UP': 0.7474999999999999, 'DOWN': 0.08416666666666667, 'LEFT': 0.08416666666666667, 'RIGHT': 0.08416666666666667}
Distance to target: 128.5496013218244
Action: UP, Reward: -2, New Position: (6534, 800), Probabilities: {'UP': 0.6475000000000001, 'DOWN': 0.11750000000000002, 'LEFT': 0.11750000000000002, 'RIGHT': 0.11750000000000002}
Distance to target: 127.91403363196706
Action: LEFT, Reward: 5, New Position: (6534, 801), Probabilities: {'UP': 0.5641666666