In [1]:
import cv2
import numpy as np
import random
import vizdoom as vzd
import matplotlib.pyplot as plt
import os


In [5]:
possible_actions = np.zeros((16,5),dtype=int).tolist()
possible_actions[0] = [1,0,0,0,0]     # Move forward
possible_actions[1] = [0,1,0,0,0]     # Move backward
possible_actions[2] = [0,0,-30,0,0]   # Turn left large
possible_actions[3] = [0,0,30,0,0]    # Turn right large
possible_actions[4] = [0,0,-15,0,0]   # Turn left medium
possible_actions[5] = [0,0,15,0,0]    # Turn right medium
possible_actions[6] = [0,0,-5,0,0]    # Turn left small
possible_actions[7] = [0,0,5,0,0]     # Turn right small
possible_actions[8] = [0,0,0,10,0]    # Move forward delta large
possible_actions[9] = [0,0,0,-10,0]   # Move backward delta large
possible_actions[10] = [0,0,0,5,0]    # Move forward delta medium
possible_actions[11] = [0,0,0,-5,0]   # Move backward delta medium
possible_actions[12] = [0,0,0,2,0]    # Move forward delta small
possible_actions[13] = [0,0,0,-2,0]   # Move backward delta small
possible_actions[14] = [0,0,0,0,5]    # Move right delta
possible_actions[15] = [0,0,0,0,-5]   # Move left delta

# Initialize Q-table and parameters
STATE_SIZE = 10000
q_table = np.zeros((STATE_SIZE, len(possible_actions)))
learning_rate = 0.1
discount_factor = 0.95
exploration_rate = 0.5  # Reduced initial exploration rate
exploration_decay = 0.99  # Slower decay
min_exploration_rate = 0.01

# Initialize game
game = vzd.DoomGame()
game.load_config('../data/resources/datagen.cfg')
game.set_doom_game_path("../data/resources/doom2.wad")
game.set_doom_scenario_path('../data/maps_manykeys_aug/30x30.wad')
map_index = np.random.randint(1,99)
game.set_objects_info_enabled(True)
game.set_doom_map(f'map0{map_index+1}' if map_index<9 else f'map{map_index+1}')
game.set_window_visible(True)
game.init()
game.new_episode()

# Initialize video recording
fourcc = cv2.VideoWriter_fourcc(*'XVID')
video = cv2.VideoWriter('debug0.avi', fourcc, 10, (320,240), isColor=True)

def get_state_features(state):
    """Extract relevant features from the state"""
    if state is None:
        return (0, 0, 0, 0, 0)
        
    # Get position and angle
    game_vars = state.game_variables
    pos_x, pos_y, pos_z, angle = game_vars
    
    # Get depth information from multiple view angles
    depth_buffer = state.depth_buffer
    if depth_buffer is not None:
        # Get depth from center, left and right of view
        center_depth = depth_buffer[119:121, 159:161].mean()
        left_depth = depth_buffer[119:121, 100:102].mean()
        right_depth = depth_buffer[119:121, 218:220].mean()
        # Average wall distance
        wall_distance = (left_depth + center_depth + right_depth) / 3
    else:
        wall_distance = 0
    
    return (pos_x, pos_y, angle, center_depth, wall_distance)

def get_state_index(features):
    """Convert continuous state features to discrete index"""
    # Discretize each feature with better granularity
    x_bin = int(features[0] / 30)  # Finer position binning
    y_bin = int(features[1] / 30)
    angle_bin = int((features[2] + 180) / 30)  # 12 angle bins instead of 8
    depth_bin = int(features[3] / 15)  # Finer depth binning
    wall_bin = int(features[4] / 20)  # Wall distance binning
    
    # Combine discretized features into single index
    return (x_bin * 1000 + y_bin * 100 + angle_bin * 10 + depth_bin + wall_bin) % STATE_SIZE

def calculate_reward(state, key_positions):
    """Calculate reward based on agent's position relative to objects and walls"""
    if state is None:
        return -1.0
        
    game_vars = state.game_variables
    pos_x, pos_y, _, _ = game_vars
    
    # Calculate distances to all keys
    distances = [np.sqrt((pos_x - kp['x'])**2 + (pos_y - kp['y'])**2) for kp in key_positions]
    min_distance = min(distances)
    
    # Get wall information from depth buffer
    depth_buffer = state.depth_buffer
    if depth_buffer is not None:
        # Check multiple points in the depth buffer
        left_depth = depth_buffer[119:121, 100:102].mean()
        center_depth = depth_buffer[119:121, 159:161].mean()
        right_depth = depth_buffer[119:121, 218:220].mean()
        
        # Penalize being too close to walls
        wall_penalty = 0
        if min(left_depth, center_depth, right_depth) < 30:  # If very close to any wall
            wall_penalty = -5.0
        elif min(left_depth, center_depth, right_depth) < 50:  # If moderately close to walls
            wall_penalty = -2.0
    else:
        wall_penalty = 0
    
    # Base reward from key distance
    if min_distance < 20:  # Very close to a key
        distance_reward = 15.0
    elif min_distance < 50:  # Moderately close
        distance_reward = 8.0 * (1 - min_distance/50)
    elif min_distance < 100:  # Within range
        distance_reward = 3.0 * (1 - min_distance/100)
    else:  # Far from keys
        distance_reward = -0.5 * (min_distance/100)
        
    # Combine rewards
    total_reward = distance_reward + wall_penalty
    
    return total_reward

# Initialize parameters with less exploration
learning_rate = 0.2  # Slightly increased to learn faster
discount_factor = 0.99
exploration_rate = 0.3  # Much lower initial exploration
exploration_decay = 0.995  # Moderate decay
min_exploration_rate = 0.01  # Lower minimum exploration

try:
    episode_reward = 0
    steps = 0
    max_steps = 2000  # Prevent infinite episodes
    
    state = game.get_state()
    if state is None:
        raise ValueError("Failed to initialize game state")   
    temp_objects = state.objects
    key_positions = [{'x': obj.position_x, 'y': obj.position_y, 'z': obj.position_z, 'angle': obj.angle} 
                    for obj in temp_objects if obj.name == 'RedCard']
    
    if not key_positions:
        raise ValueError("No keys found in the environment")
    
    while not game.is_episode_finished() and steps < max_steps:
        state = game.get_state()
        print(state.game_variables) 
        if state is None:
            break
            
        st = state.screen_buffer.transpose(1,2,0)
        
        # Get state features and index
        state_features = get_state_features(state)
        current_state_index = get_state_index(state_features)
        
        # Simplified action selection with less exploration
        if random.uniform(0, 1) < exploration_rate:
            action_index = random.choice(range(len(possible_actions)))
        else:
            action_index = np.argmax(q_table[current_state_index])

        # Execute action and get reward
        game.make_action(possible_actions[action_index])
        next_state = game.get_state()
        reward = calculate_reward(next_state, key_positions)
        episode_reward += reward
            
        # Q-learning update
        if next_state is not None:
            next_features = get_state_features(next_state)
            next_state_index = get_state_index(next_features)
            
            best_next_action_value = np.max(q_table[next_state_index])
            current_q = q_table[current_state_index][action_index]
            
            # Basic Q-learning update without exploration bonus
            q_table[current_state_index][action_index] = current_q + learning_rate * (
                reward + 
                discount_factor * best_next_action_value - 
                current_q
            )
        
        video.write(st)
        
        # Decay exploration rate
        exploration_rate = max(min_exploration_rate, 
                             exploration_rate * exploration_decay)
        
        steps += 1

    print(f"Episode finished with total reward: {episode_reward} in {steps} steps")

finally:
    game.close()
    video.release()

[2032.34155273 1889.87902832    0.          237.09594732]
[2032.37547302 1889.93148804    0.          237.09594732]
[2032.44012451 1890.03147888    0.          237.09594732]
[2032.07467651 1889.46624756    0.          237.09594732]
[2031.77740479 1889.00645447    0.          237.09594732]
[2031.54191589 1888.64221191    0.          237.09594732]
[2031.3624115  1888.36456299    0.          237.09594732]
[2031.23364258 1888.16539001    0.          237.09594732]
[2031.15086365 1888.03733826    0.          237.09594732]
[2031.10975647 1887.97373962    0.          237.09594732]
[2031.10641479 1887.96855164    0.          237.09594732]
[2030.67935181 1887.30799866    0.          237.09594732]
[2030.25837708 1886.65689087    0.          237.09594732]
[2029.91078186 1886.11927795    0.          237.09594732]
[2029.62968445 1885.68452454    0.          237.09594732]
[2029.37493896 1885.29052734    0.          232.09716802]
[2029.18244934 1884.98274231    0.          232.09716802]
[2029.04637146

ViZDoomUnexpectedExitException: Controlled ViZDoom instance exited unexpectedly.

In [3]:
import math
import sys
sys.path.append('..')
from PathPlanning import utils
game = vzd.DoomGame()
game.load_config('../data/resources/datagen.cfg')
game.set_doom_game_path("../data/resources/doom2.wad")
game.set_doom_scenario_path('../data/maps_manykeys_aug/30x30.wad')
map_index = np.random.randint(1,99)
game.set_objects_info_enabled(True)
game.set_doom_map(f'map0{map_index+1}' if map_index<9 else f'map{map_index+1}')
game.set_window_visible(True)
game.init()
game.new_episode()
state = game.get_state()
x1 = game.get_game_variable(vzd.GameVariable.POSITION_X)
y1 = game.get_game_variable(vzd.GameVariable.POSITION_Y)
# Get all objects from game state
objects = state.objects
# Filter for Redcard objects
redcard_objects = [obj for obj in objects if obj.name == 'RedCard']

if redcard_objects:
    # Calculate distances to each Redcard
    distances = [(obj, math.sqrt((obj.position_x - x1)**2 + (obj.position_y - y1)**2)) 
                for obj in redcard_objects]
    # Get closest Redcard
    closest_redcard = min(distances, key=lambda x: x[1])[0]
    x2, y2 = closest_redcard.position_x, closest_redcard.position_y
else:
    print("No Redcard found in the environment")
    x2, y2 = x1, y1  # Stay in place if no Redcard found

utils.move_to(x2,y2,0, game)

game.close()




KeyboardInterrupt: 

In [13]:
possible_actions = np.zeros((16,5),dtype=int).tolist()
possible_actions[0] = [1,0,0,0,0]     # Move forward
possible_actions[1] = [0,1,0,0,0]     # Move backward
possible_actions[2] = [0,0,-30,0,0]   # Turn left large
possible_actions[3] = [0,0,30,0,0]    # Turn right large
possible_actions[4] = [0,0,-15,0,0]   # Turn left medium
possible_actions[5] = [0,0,15,0,0]    # Turn right medium
possible_actions[6] = [0,0,-5,0,0]    # Turn left small
possible_actions[7] = [0,0,5,0,0]     # Turn right small
possible_actions[8] = [0,0,0,10,0]    # Move forward delta large
possible_actions[9] = [0,0,0,-10,0]   # Move backward delta large
possible_actions[10] = [0,0,0,5,0]    # Move forward delta medium
possible_actions[11] = [0,0,0,-5,0]   # Move backward delta medium
possible_actions[12] = [0,0,0,2,0]    # Move forward delta small
possible_actions[13] = [0,0,0,-2,0]   # Move backward delta small
possible_actions[14] = [0,0,0,0,5]    # Move right delta
possible_actions[15] = [0,0,0,0,-5]   # Move left delta

# Initialize Q-table and parameters
STATE_SIZE = 10000
q_table = np.zeros((STATE_SIZE, len(possible_actions)))
learning_rate = 0.1
discount_factor = 0.95
exploration_rate = 0.5  # Reduced initial exploration rate
exploration_decay = 0.99  # Slower decay
min_exploration_rate = 0.01

# Initialize game
game = vzd.DoomGame()
game.load_config('../data/resources/datagen.cfg')
game.set_doom_game_path("../data/resources/doom2.wad")
game.set_doom_scenario_path('../data/maps_manykeys_aug/30x30.wad')
map_index = np.random.randint(1,99)
game.set_objects_info_enabled(True)
game.set_doom_map(f'map0{map_index+1}' if map_index<9 else f'map{map_index+1}')
game.set_window_visible(True)
game.init()
game.new_episode()

for i in range(12):
    game.make_action(possible_actions[2])
    state = game.get_state()
    print(state.game_variables) 


[ 897.69238281 2491.54850769    0.          195.83679204]
[ 897.69238281 2491.54850769    0.          225.84045415]
[ 897.69238281 2491.54850769    0.          255.84411627]
[ 897.69238281 2491.54850769    0.          285.84777839]
[ 897.69238281 2491.54850769    0.          315.8514405 ]
[ 897.69238281 2491.54850769    0.          345.85510262]
[ 897.69238281 2491.54850769    0.           15.85876465]
[ 897.69238281 2491.54850769    0.           45.86242677]
[ 897.69238281 2491.54850769    0.           75.86608888]
[ 897.69238281 2491.54850769    0.          105.869751  ]
[ 897.69238281 2491.54850769    0.          135.87341312]
[ 897.69238281 2491.54850769    0.          165.87707523]
