In [29]:
import pyautogui
import numpy as np
import os
import uuid
import cv2
import time
import torch

In [None]:
!git clone https://github.com/ultralytics/yolov5

In [None]:
!cd yolov5 & pip install -r requirements.txt --upgrade

In [None]:
pip install ipywidgets

In [None]:
model = torch.hub.load("yolov5","yolov5s", source = "local",force_reload = True)

In [None]:
IMAGE_FOLDER_PATH = os.path.join("data","images_folder")
if os.name == "nt":  # Windows
    os.makedirs(IMAGE_FOLDER_PATH, exist_ok=True)
elif os.name == "posix":  # Linux or macOS
    os.makedirs(IMAGE_FOLDER_PATH, exist_ok=True)

In [None]:
#run this code for image collection
no_of_images = 250
time.sleep(10)
print("Image Collection started!")
for no in range(no_of_images):
    region = (150, 130, 650, 800) #adjust this according to your game screen size
    screen = pyautogui.screenshot(region = region)
    screen_np = np.array(screen)
    screen_bgr = cv2.cvtColor(screen_np, cv2.COLOR_RGB2BGR)
    IMAGE_FOLDER_PATH = os.path.join("data","images")
    IMAGE_PATH = os.path.join(IMAGE_FOLDER_PATH, str(uuid.uuid1()) + ".jpg")
    cv2.imwrite(IMAGE_PATH,screen_bgr)
    cv2.imshow("yolo",screen_bgr) 
    print("Image no {} collected".format(no + 1))
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break
cv2.destroyAllWindows()


In [None]:
#Now start labeling the images in data/images_folder
#Use labelImg 
!pip install PyQt5
!pip install lxml
!git clone https://github.com/HumanSignal/labelImg.git
!cd labelImg
!pyrcc5 -o libs/resources.py resources.qrc
!python labelImg.py #This will open up a application, using that label the images, save the labeled images in the same data/images directory
#Remember to save the files in YOLO format

In [None]:
#Before we start to train, we need to include two new files, 
# 1. Create a dataset.yaml file -> yolov5/dataset.yaml
#Example dataset.yaml
# path: C:\Users\Abisheik Raj\OneDrive\Desktop\Image Detection Projects\timberman_automation_bot\data
# train: images_folder
# val: images_folder
# test:

# nc: 19s

# names: [
#     "dog", #0
#     "person", #1
#     "cat", #2
#     "tv", #3
#     "car", #4
#     "meatballs", #5
#     "marinara sauce", #6
#     "tomato soup", #7
#     "chicken noodle soup", #8
#     "french onion soup", #9
#     "chicken breast", #10
#     "ribs", #11
#     "pulled pork", #12
#     "hamburger", #13
#     "cavity", #14
#     "left_obs", #15
#     "timberman", #16
#     "right_obs", #17
#     "lefet", #18
#   ]
#for the names just copy paste the classes you have in data/images_folder/classes.txt

# 2. Create a hyp.scratch.yaml and set fliplr: 0.0 to avoid the model to flip the labels, we do this to avoid fliping the left_obstacle and right_obstacle
#Exampele: Create a hyp.scratch.yaml -> yolov5/hyp.scratch.yaml
# lr0: 0.01 # initial learning rate (SGD=1E-2, Adam=1E-3)
# lrf: 0.01 # final OneCycleLR learning rate (lr0 * lrf)
# momentum: 0.937 # SGD momentum/Adam beta1
# weight_decay: 0.0005 # optimizer weight decay 5e-4
# warmup_epochs: 3.0 # warmup epochs (fractions ok)
# warmup_momentum: 0.8 # warmup initial momentum
# warmup_bias_lr: 0.1 # warmup initial bias lr
# box: 0.05 # box loss gain
# cls: 0.5 # cls loss gain
# cls_pw: 1.0 # cls BCELoss positive_weight
# obj: 1.0 # obj loss gain (scale with pixels)
# obj_pw: 1.0 # obj BCELoss positive_weight
# iou_t: 0.20 # IoU training threshold
# anchor_t: 4.0 # anchor-multiple threshold
# # anchors: 3  # anchors per output layer (0 to ignore)
# fl_gamma: 0.0 # focal loss gamma (efficientDet default gamma=1.5)
# hsv_h: 0.015 # image HSV-Hue augmentation (fraction)
# hsv_s: 0.7 # image HSV-Saturation augmentation (fraction)
# hsv_v: 0.4 # image HSV-Value augmentation (fraction)
# degrees: 0.0 # image rotation (+/- deg)
# translate: 0.1 # image translation (+/- fraction)
# scale: 0.5 # image scale (+/- gain)
# shear: 0.0 # image shear (+/- deg)
# perspective: 0.0 # image perspective (+/- fraction), range 0-0.001
# flipud: 0.0 # image flip up-down (probability)
# fliplr: 0.0 # image flip left-right (probability)
# mosaic: 1.0 # image mosaic (probability)
# mixup: 0.0 # image mixup (probability)
# copy_paste: 0.0 # segment copy-paste (probability)


In [None]:
#This will train the model, this usually takes a lotta time if you only have a cpu, GPU recommended, more training hours == better model
!cd yolov5 && python train.py --img 320 --batch 16 --epochs 2000 --data dataset.yaml --hyp hyp.scratch.yaml --weights yolov5s.pt --workers 2

In [None]:
#Load the model
model = torch.hub.load('yolov5', 'custom', source='local', path='yolov5/runs/train/exp/weights/best.pt')

In [None]:
#Run this script to test the model you have trained,
while True:
    region = (150, 500, 650, 500) #Change it according to your game screen size
    screen = pyautogui.screenshot(region = region)
    screen_np = np.array(screen)
    screen_bgr = cv2.cvtColor(screen_np, cv2.COLOR_RGB2BGR)
    result = model(screen_bgr)
    cv2.imshow("yolo",np.squeeze(result.render())) 
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break
cv2.destroyAllWindows()

In [None]:
#This script will actually automate with the help of pyautogui and the object detection model we have fine-tuned
CONFIDENCE_THRESHOLD = 0.3
curr_position = "left"
while True:
    region = (150, 500, 650, 500)
    screen = pyautogui.screenshot(region = region)
    screen_np = np.array(screen)
    screen_bgr = cv2.cvtColor(screen_np, cv2.COLOR_RGB2BGR)
    result = model(screen_bgr)
    cv2.imshow("yolo",np.squeeze(result.render()))
    result_coordinate = result.xyxy[0].cpu().numpy().tolist()  # Ensure it’s a list
    result_coordinate = list(reversed(result_coordinate)) 

    left_present = False
    right_present = False
    for res in result_coordinate:
        x1, y1, x2, y2, confidence, cls = res
        cls = int(cls)
        if(confidence >= CONFIDENCE_THRESHOLD):
            if(cls == 15):
                left_present = True
            elif(cls==16):
                pass
            elif(cls==17):
                right_present = True

    if left_present:
        pyautogui.press("right")
        curr_position = "right"
    elif right_present:
        pyautogui.press("left")
        curr_position = "left"
    else:
        if curr_position == "right":
            pyautogui.press("left")
        else:
            pyautogui.press("right") 

    time.sleep(0.1)
    
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break
cv2.destroyAllWindows()

In [None]:
#NOTE: The dataset we have used here is from the online version of the game: https://html5games.com/Game/Timberman/a25e37b9-1550-49c9-b383-92ad51b4ecc2
#So this model works well on the online version, if you want to try it out on the stream version, collect images from the stream game, label and retrain the model