In [5]:
import cv2
import numpy as np
import random
import vizdoom as vzd
import matplotlib.pyplot as plt
import os


In [12]:

possible_actions = np.zeros((8,5),dtype=int).tolist()
possible_actions[0] = [1,0,0,0,0]    # Move forward
possible_actions[1] = [0,1,0,0,0]    # Move backward 
possible_actions[2] = [0,0,-30,0,0]  # Turn left
possible_actions[3] = [0,0,30,0,0]   # Turn right
possible_actions[4] = [0,0,0,10,0]   # Move forward (delta)
possible_actions[5] = [0,0,0,-10,0]  # Move backward (delta)
possible_actions[6] = [0,0,0,0,5]    # Move right (delta)
possible_actions[7] = [0,0,0,0,-5]   # Move left (delta)

# Initialize Q-table and parameters
q_table = np.zeros((10000, len(possible_actions)))  # Example state size
learning_rate = 0.1
discount_factor = 0.95
exploration_rate = 1.0
exploration_decay = 0.995
min_exploration_rate = 0.01

# Initialize game
game = vzd.DoomGame()
game.load_config('../data/resources/datagen.cfg')
game.set_doom_game_path("D:/gzdoom/doom2.wad")  # Specify the path to your desired WAD file
game.set_doom_scenario_path('../data/maps_1key_noaug/30x30.wad')
map_index = np.random.randint(1,99)
game.set_doom_map(f'map0{map_index+1}' if map_index<9 else f'map{map_index+1}')
game.set_window_visible(True)
game.init()
game.new_episode()

# Initialize video recording
fourcc = cv2.VideoWriter_fourcc(*'XVID')
video = cv2.VideoWriter('debug0.avi', fourcc, 10, (320,240), isColor=True)

def get_state_features(state):
    """Extract relevant features from the state"""
    # Get position and angle
    game_vars = state.game_variables
    pos_x, pos_y, pos_z, angle = game_vars
    
    # Get depth information from center of view
    depth_buffer = state.depth_buffer
    center_depth = depth_buffer[119:121, 159:161].mean() if depth_buffer is not None else 0
    
    return (pos_x, pos_y, angle, center_depth)

def get_state_index(features):
    """Convert continuous state features to discrete index"""
    # Discretize each feature
    x_bin = int(features[0] / 50)  # Position binning
    y_bin = int(features[1] / 50)
    angle_bin = int((features[2] + 180) / 45)  # Angle binning into 8 directions
    depth_bin = int(features[3] / 20)  # Depth binning
    
    # Combine discretized features into single index
    return (x_bin * 1000 + y_bin * 100 + angle_bin * 10 + depth_bin) % 10000

# Initialize parameters
learning_rate = 0.1
discount_factor = 0.99
exploration_rate = 1.0
exploration_decay = 0.997
min_exploration_rate = 0.05

# Add curiosity-driven exploration
curiosity_bonus = np.zeros((10000,))  # Track state visits
curiosity_weight = 0.1

try:
    episode_reward = 0
    while not game.is_episode_finished():
        state = game.get_state()
        if state is None:
            break
            
        st = state.screen_buffer.transpose(1,2,0)
        
        # Get state features and index
        state_features = get_state_features(state)
        current_state_index = get_state_index(state_features)
        
        # Add curiosity bonus to exploration
        exploration_bonus = curiosity_weight / (curiosity_bonus[current_state_index] + 1)
        
        # Action selection with curiosity-driven exploration
        if random.uniform(0, 1) < exploration_rate + exploration_bonus:
            # Smarter random action selection based on depth
            if state_features[3] > 30:  # If far from walls
                action_index = random.choice([0, 4, 5]) # Forward and lateral movement
            else:
                action_index = random.choice([1, 2, 3]) # Backward and turning
        else:
            action_index = np.argmax(q_table[current_state_index])

        # Execute action
        reward = game.make_action(possible_actions[action_index])
        episode_reward += reward
        print(f'diff: {reward},total: {episode_reward}')
        # Update curiosity tracking
        curiosity_bonus[current_state_index] += 1
            
        # Get next state and update Q-table
        next_state = game.get_state()
        if next_state is not None:
            next_features = get_state_features(next_state)
            next_state_index = get_state_index(next_features)
            
            # Q-learning update with curiosity bonus
            best_next_action_value = np.max(q_table[next_state_index])
            current_q = q_table[current_state_index][action_index]
            q_table[current_state_index][action_index] = current_q + learning_rate * (
                reward + exploration_bonus + discount_factor * best_next_action_value - current_q
            )
        
        video.write(st)
        
        # Decay exploration rate
        exploration_rate *= exploration_decay
        exploration_rate = max(min_exploration_rate, exploration_rate)

    print(f"Episode finished with total reward: {episode_reward}")

finally:
    game.close()
    video.release()

diff: 0.0,total: 0.0
diff: 0.0,total: 0.0
diff: 0.0,total: 0.0
diff: 0.0,total: 0.0
diff: 0.0,total: 0.0
diff: 0.0,total: 0.0
diff: 0.0,total: 0.0
diff: 0.0,total: 0.0
diff: 0.0,total: 0.0
diff: 0.0,total: 0.0
diff: 0.0,total: 0.0
diff: 0.0,total: 0.0
diff: 0.0,total: 0.0
diff: 0.0,total: 0.0
diff: 0.0,total: 0.0
diff: 0.0,total: 0.0
diff: 0.0,total: 0.0
diff: 0.0,total: 0.0
diff: 0.0,total: 0.0
diff: 0.0,total: 0.0
diff: 0.0,total: 0.0
diff: 0.0,total: 0.0
diff: 0.0,total: 0.0
diff: 0.0,total: 0.0
diff: 0.0,total: 0.0
diff: 0.0,total: 0.0
diff: 0.0,total: 0.0
diff: 0.0,total: 0.0
diff: 0.0,total: 0.0
diff: 0.0,total: 0.0
diff: 0.0,total: 0.0
diff: 0.0,total: 0.0
diff: 0.0,total: 0.0
diff: 0.0,total: 0.0
diff: 0.0,total: 0.0
diff: 0.0,total: 0.0
diff: 0.0,total: 0.0
diff: 0.0,total: 0.0
diff: 0.0,total: 0.0
diff: 0.0,total: 0.0
diff: 0.0,total: 0.0
diff: 0.0,total: 0.0
diff: 0.0,total: 0.0
diff: 0.0,total: 0.0
diff: 0.0,total: 0.0
diff: 0.0,total: 0.0
diff: 0.0,total: 0.0
diff: 0.0,tot

ViZDoomUnexpectedExitException: Controlled ViZDoom instance exited unexpectedly.