In [95]:
import math
import time
import os
import cv2 
import keyboard
import pickle
import numpy as np
from PIL import Image, ImageOps, ImageGrab
from matplotlib import pyplot as plt
from skimage import io, color, measure, morphology
from scipy import ndimage
from scipy.ndimage import rotate
import copy

from gtts import gTTS
import uuid
from playsound import playsound
import pyautogui
import pydirectinput
import vgamepad as vg

%matplotlib inline

# Notes
I have made this code specific to run on windowed fullscreen 1920x1080 resolution in-game. I have hard-coded a lot of pixel values based on this assumption.

# Item detection
Scrap items appear to all appear at roughly the same shape, size, and colour. This suggests template matching as a solution. However, upon examining the training images, it was noted that each scrap item had the exact same colour albeit for a small difference observed in one case. This suggests that precise colour thresholding will be very effective at isolating the scrap items from the rest of the image. The steps for the item detection are as follows:

- RGB colour threeshold applied to the image to identify scrap items.
- Dilate segments of scrap items. This helps prevent map elements that sometimes break up the appearance of scrap as appearing as more than one scrap item when it is only one.

In [77]:
monitor = cv2.imread(r'training\9.png')[75:835, 395:1426]

# Colour threshold for scrap items
scrap_lower_bound = (1, 254, 172)
scrap_upper_bound = (2, 255, 177)

# Colour threshold the image
color_mask = cv2.inRange(monitor, scrap_lower_bound, scrap_upper_bound)

# Dilate segments
kernel_size = (5, 5)
kernel = np.ones(kernel_size, np.uint8)
eroded_mask = cv2.erode(color_mask, kernel, iterations=1)
item_mask = cv2.dilate(eroded_mask, kernel, iterations=4)

cv2.imshow('Original Image', monitor)
cv2.imshow('Binary Mask', item_mask)

cv2.waitKey(0)
cv2.destroyAllWindows()

# Enemy detection
Similar to scrap items, enemies all have the same shape and colour, but have differences in size. There are also issues with other objects appearing the same colour as the enemies on the monitor. This necessitates a requirement for more sophisticated methods of detection. Circular Hough Transforms were tried, but too difficult to implement. A simple solution was found by using morphological opening to filter out non-circular shaped objects that proved better than the circular hough transform. This will be used to improve the enemy detection method.

- RGB colour threshold applied to the image to filter out noise and any objects not red.
- Morphological opening (erode, then dilate) to remove non-circular segments. 

In [89]:
monitor = cv2.imread(r'training\17.png')[75:835, 395:1426]
# 5, 8

# Colour threshold for enemies
enemy_lower_bound = (1, 1, 85)
enemy_upper_bound = (25, 25, 255)

# Colour threshold the image
color_mask = cv2.inRange(monitor, enemy_lower_bound, enemy_upper_bound)

# Morphological Opening
kernel_size = (4, 4)
kernel = np.ones(kernel_size, np.uint8)
dilated_mask = cv2.erode(color_mask, kernel, iterations=2)
enemy_mask = cv2.dilate(dilated_mask, kernel, iterations=3)

cv2.imshow('Original Image', monitor)
cv2.imshow('Binary Mask', enemy_mask)

cv2.waitKey(0)
cv2.destroyAllWindows()

# Player Angle Detection

Simplify the method used to detect what direction the player is looking in, and return one of four simple directions: (front, behind, left, right). There were somoe difficulties here as the colour thresholding is not as effective. However, the cases where this become an issue are quite limited. The process for detecting the angle the player is looking in is as follows:

- Crop the image to just the player.
- Blue colour thresholding on the image to isolate the player.
- Average a centroid for the segmented areas and calculate the angle based on the centre of the image to that point.

In [6]:
def calculate_angle(binary_mask):
    # Calculate the centroid of the white pixels in the binary mask
    Y, X = np.where(binary_mask == 255)
    centroid_x = np.mean(X)
    centroid_y = np.mean(Y)

    # Determine the center point of the image
    image_center_x, image_center_y = binary_mask.shape[1] / 2, binary_mask.shape[0] / 2

    # Calculate the angle to the centroid of the largest segment
    delta_x = centroid_x - image_center_x
    delta_y = image_center_y - centroid_y  # Image coordinates are top-left, so we invert the y-axis
    angle_rad = math.atan2(delta_y, delta_x)  # Calculate angle in radians
    # Adjust the angle so 0 degrees is 'up' and positive angles are clockwise
    angle_deg = (90 - math.degrees(angle_rad)) % 360

    return angle_deg

monitor = cv2.imread(r'training\16.png')[75:835, 395:1426]
player = monitor[307:473, 453:619]

# Colour thresholds for the player
player_lower_bound = (90, 1, 1)
player_upper_bound = (255, 200, 10)

# Colour threshold the image
color_mask = cv2.inRange(player, player_lower_bound, player_upper_bound)

# Dilate to fill in any noise
kernel_size = (5, 5)
kernel = np.ones(kernel_size, np.uint8)
player_mask = cv2.dilate(color_mask, kernel, iterations=3)

angle = calculate_angle(player_mask)
print(angle)

cv2.imshow('Monitor', player_mask)
cv2.imshow('Player', player)

cv2.waitKey(0)
cv2.destroyAllWindows()

45.14058270723698


# Supporting Functions
Support functions such as converting a degree angle to a direction, getting centroids of detected objects (segments), and calculating relative angle of detected objects from player angle.

In [7]:
def get_centroids(image_mask):
    contours, _ = cv2.findContours(image_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    centroids = []

    # Loop through each contour
    for cnt in contours:
        # Calculate the moments of the contour
        M = cv2.moments(cnt)
        if M["m00"] != 0:
            # Calculate centroid coordinates
            cX = int(M["m10"] / M["m00"])
            cY = int(M["m01"] / M["m00"])
            centroids.append((cX, cY))
            # You can also draw the centroid on the image if you wish
            cv2.circle(monitor, (cX, cY), 5, (255, 0, 0), -1)

    return centroids


In [67]:
def get_relative_direction(player_angle, object_centroid):
    player_centroid = (536, 390)
    
    # Calculate angle from player to object
    dy = object_centroid[1] - player_centroid[1]
    dx = object_centroid[0] - player_centroid[0]
    angle_to_object = math.atan2(dy, dx) * (180 / math.pi)
    
    # Adjust the game's coordinate system to a standard coordinate system with 0 degrees as up
    angle_to_object = (angle_to_object + 90) % 360
    
    # Calculate relative angle to player's facing direction
    relative_angle = (angle_to_object - player_angle) % 360
    if relative_angle > 180:
        relative_angle -= 360  # Adjust to have a range from -179 to 180
    
    # Determine direction based on the relative angle
    if -45 < relative_angle <= 45:
        return "front"
    elif 45 < relative_angle <= 135:
        return "right"
    elif -135 <= relative_angle < -45:
        return "left"
    else:
        return "behind"

In [19]:
def calculate_angle(binary_mask):
    # Calculate the centroid of the white pixels in the binary mask for the player based on their own mask
    Y, X = np.where(binary_mask == 255)
    centroid_x = np.mean(X)
    centroid_y = np.mean(Y)

    # Determine the center point of the image
    image_center_x, image_center_y = binary_mask.shape[1] / 2, binary_mask.shape[0] / 2

    # Calculate the angle to the centroid of the largest segment
    delta_x = centroid_x - image_center_x
    delta_y = image_center_y - centroid_y  # Image coordinates are top-left, so we invert the y-axis
    angle_rad = math.atan2(delta_y, delta_x)  # Calculate angle in radians
    # Adjust the angle so 0 degrees is 'up' and positive angles are clockwise
    angle_deg = (90 - math.degrees(angle_rad)) % 360

    return angle_deg

In [94]:
def text_to_speech(text):
    # Create a gTTS object
    tts = gTTS(text=text, lang='en', tld='us')
    filename = "output_{}.mp3".format(str(uuid.uuid1()))

    # Save the speech to an MP3 file
    tts.save(filename)

    pyautogui.mouseDown()
    # Play the audio file (this will play through the default audio device)
    playsound(filename)
    pyautogui.mouseUp()
    
    # Clean up
    os.remove(filename)

# Detection Functions

In [80]:
def detect_items(monitor):
    # Colour threshold for scrap items
    scrap_lower_bound = (1, 254, 172)
    scrap_upper_bound = (2, 255, 177)

    # Colour threshold the image
    color_mask = cv2.inRange(monitor, scrap_lower_bound, scrap_upper_bound)

    # Dilate segments
    kernel_size = (5, 5)
    kernel = np.ones(kernel_size, np.uint8)
    eroded_mask = cv2.erode(color_mask, kernel, iterations=1)
    item_mask = cv2.dilate(eroded_mask, kernel, iterations=4)

    # Remove any items the player is holding or on top of
    item_mask[307:473, 453:619] = 0

    return get_centroids(item_mask)

In [18]:
def detect_enemies(monitor):
    # Colour threshold for enemies
    enemy_lower_bound = (1, 1, 85)
    enemy_upper_bound = (25, 25, 255)

    # Colour threshold the image
    color_mask = cv2.inRange(monitor, enemy_lower_bound, enemy_upper_bound)

    # Morphological Opening
    kernel_size = (4, 4)
    kernel = np.ones(kernel_size, np.uint8)
    dilated_mask = cv2.erode(color_mask, kernel, iterations=2)
    enemy_mask = cv2.dilate(dilated_mask, kernel, iterations=3)

    return get_centroids(enemy_mask)

In [22]:
def get_player_angle(monitor):
    player = monitor[307:473, 453:619]

    # Colour thresholds for the player
    player_lower_bound = (90, 1, 1)
    player_upper_bound = (255, 200, 10)

    # Colour threshold the image
    color_mask = cv2.inRange(player, player_lower_bound, player_upper_bound)

    # Dilate to fill in any noise
    kernel_size = (5, 5)
    kernel = np.ones(kernel_size, np.uint8)
    player_mask = cv2.dilate(color_mask, kernel, iterations=3)

    return calculate_angle(player_mask)

In [97]:
def detections(monitor):
    # Make detections of items, enemies, and player angle
    items = detect_items(monitor)
    enemies = detect_enemies(monitor)
    player_angle = get_player_angle(monitor)

    # Initialize direction counters
    directions = {
        'enemy': {'front': 0, 'right': 0, 'behind': 0, 'left': 0},
        'item': {'front': 0, 'right': 0, 'behind': 0, 'left': 0}
    }

    # Update counts for enemies
    for centroid in enemies:
        direction = get_relative_direction(player_angle, centroid)
        directions['enemy'][direction] += 1

    # Update counts for items
    for centroid in items:
        direction = get_relative_direction(player_angle, centroid)
        directions['item'][direction] += 1

    # Format the result string
    results = []
    for object_type in ['enemy', 'item']:
        for direction in ['front', 'right', 'behind', 'left']:
            count = directions[object_type][direction]
            if count > 0:
                results.append(f"{count} {object_type} {direction}")

    # Join the results into a single string
    detections = ', '.join(results)

    return detections
    

# Testing
Now we want to get the centroids of all the items and the centroids of all the enemies and provide a detailed description of the count of enemies and items in each direction. This works acccurately in most cases but there are still a few times where objects are not stated in the correct direction to the player.

In [114]:
monitor = cv2.imread(r'training\29.png')[75:835, 395:1426]
print(detections(monitor))
print(get_player_angle(monitor))
print(detect_items(monitor))

cv2.imshow('Test', monitor)

cv2.waitKey(0)
cv2.destroyAllWindows()

1 enemy front
264.3057210023013
[]


# AI4 Directions

In [102]:
time.sleep(10)
import pyperclip

while True:
    time.sleep(1)
    # Enter the terminal
    pydirectinput.keyDown('e')
    time.sleep(0.5)
    pydirectinput.keyUp('e')
    time.sleep(1)

    # Enter random nonsense to clear the screen (assuming we already have monitor up)
    pyperclip.copy("aa")
    pyautogui.hotkey('ctrl', 'v')
    time.sleep(0.1)
    pyautogui.press('enter')
    time.sleep(1)

    image = ImageGrab.grab(bbox=(75, 395, 835, 1426))
    image_rgb = image.convert('RGB')
    numpy_image = np.array(image_rgb)
    monitor = cv2.cvtColor(numpy_image, cv2.COLOR_RGB2BGR)
    speech = detections(monitor)
    time.sleep(0.1)

    # Exit the terminal
    pydirectinput.keyDown('esc')
    time.sleep(0.1)
    pydirectinput.keyUp('esc')

    # Use radio to tell any info
    if speech:
        pydirectinput.keyDown('q')
        time.sleep(0.1)
        pydirectinput.keyUp('q')
        time.sleep(0.1)
        pydirectinput.mouseDown(button='left')
        time.sleep(0.2)
        text_to_speech(speech)
        time.sleep(0.1)
        pydirectinput.mouseUp(button='left')
        time.sleep(0.1)
        pydirectinput.keyDown('q')
        time.sleep(0.1)
        pydirectinput.keyUp('q')
        time.sleep(0.1)

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


KeyboardInterrupt: 