In [1]:
from mss import mss
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.backends.cudnn as cudnn
from PIL import Image
from sklearn.neighbors import NearestNeighbors as KNN
import pyautogui
import pytesseract
from time import sleep
import winsound
from re import sub
import math

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
cudnn.benchmark = True

In [None]:
# Memory efficient loop

In [32]:
# Memory efficient loop

with mss() as sct:
    #filename = sct.shot()
    monitor = {"top": 40, "left": 0, "width": 800, "height": 640} # Memory efficiency.
    data = sct.grab(monitor)
    data = Image.frombytes("RGB", data.size, data.bgra, 'raw', 'BGRX')
    data = np.array(data)
    data = torch.from_numpy(data)

In [20]:
print(data.shape) # (Height, Width, Channels)

(640, 800, 4)


In [None]:
# We have our frame grabber. We just need to make sure it'll run in real time
# After that, we'll have to create a dataset which will serve as our memory. It can be used for Hakisa to know more or less how she should handle each situation
# The inputs in the dataset(X), will be the images. The outputs(labels), will be the keys that Hakisa must use, the commands(PyAutoGUI commands)
# Those commands can be float-encoded, just like we do in NLP. In order to decode them, we can use K-Nearest Neighbors.

# Image(game state) ------> Hakisa ------> Output(ex: 1.543) -------> Decoder(KNN + input dictionary) ------> Game Input(Keyboard/Mouse command)

# Exploration Mode: Each image will compose a standardized dataset(Is this really necessary?). Each output will compose a memory dataset(labels dataset)

# Studying Mode: After generating a certain amount of input images and output commands, Hakisa will iterate through them and learn what she should do in each situation.
# She'll be trying to get patterns from each situation and associate them with the output with lower loss(which will be determined by the game scores)

# Training Mode: After learning some patterns, Hakisa will play on her own. There'll be no datasets here, it's just her and the game.

# Eval mode: I don't really think she'll ever reach a peak of skill... If this happens, it's because her architecture must be upgraded.

In [66]:
class Dataset(torch.utils.data.Dataset):
    def __init__(self, input_mapping=None, explore_train_steps=1000, memory_size=100, top=0, left=0, width=1920, height=1080, resize=None):

        # Window resolutions for the screen grabber
        self.top = top
        self.left = left
        self.width = width
        self.height = height

        self.resize = resize # For reducing the images. Must be a tuple (Height, Width)

        # Hakisa's Mode - Explore, Study, Train.

        #self.mode = mode # Useless for now... I don't know exactly how to mount the functions. Hakisa is quite dependent from the Dataset class.

        self.steps = explore_train_steps

        self.data = None # This will be created during training. However, we could make it possible for us to load a ready-made data for studying.
        self.input_mapping = self._create_commands_dictionary(input_maps=input_mapping)

        self.knn = None # Creating variable so we don't have to fit KNN at every step.
        self._fit_knn(self.input_mapping)

        self.keys_list = list(self.input_mapping.keys()) # Also for efficiency in each step

        self.labels = None # Used for studying

        self.memory_size = memory_size
        self.memory = []


    # Pytorch's Dataset functions will only be used in Studying mode
    def __getitem__(self, idx):

        inputs = self.data[idx]
        labels = self.labels[idx]

        return inputs, labels


    def __len__(self):

        return len(self.data)


    def _grab_frame(self):
        # Unfortunately, this whole operation takes about 0.6 seconds, so we'll probably have to deal with a single frame each 1~3 seconds.
        with mss() as sct:
            frame = sct.grab(monitor={"top": self.top, "left": self.left, "width": self.width, "height": self.height})
            frame = Image.frombytes("RGB", frame.size, frame.bgra)

            if self.resize:
                frame = frame.resize(self.resize)

            frame = np.array(frame, dtype=np.float32)

            frame = torch.from_numpy(frame)
        
        frame = frame.view(1, frame.size(2), frame.size(0), frame.size(1)).to(device) # (Batch, Channels, Height, Width)

        return frame


    def _create_commands_dictionary(self, input_maps):
        idx2key = []
        key2idx = {}

        for key in input_maps:
            if key not in key2idx:
                idx2key.append(key)
                key2idx[key] = len(idx2key) - 1
        
        del idx2key

        maximum = max(key2idx.values())

        for key, value in key2idx.items():

            scaled_value = (value-0)*2.0 / (maximum - 0)-1.0

            key2idx[key] = scaled_value

        return key2idx

    def get_mouse_coordinates(self, mouse_commands, x0, xf, y0, yf):
        '''
        Generates a list of mouse commands and its coordinates, ready to generate an input dictionary
        Example:
            mouse_commands = ['click', 'move']
            x0 = 0, xf=10, y0=0, yf=10

            dataset.get_mouse_coordinates(mouse_commands, 0, 10, 0, 10)

            returns: commands = ['click_(0,0)', 'move_(0,0)', 'click_(1,0)', 'move_(1,0)'...]
        '''
        # We need to get the window coordinates so we can use mouse commands

        Xvalues = [i for i in range(x0, xf)]
        Yvalues = [i for i in range(y0, yf)]

        commands = []

        for x in Xvalues:
            for y in Yvalues:
                for cmd in mouse_commands:
                    cmd = cmd + "_" + "(" + str(x) + "," + str(y) + ")"
                    commands.append(cmd)
        
        del Xvalues, Yvalues, cmd
        
        return commands

    def _fit_knn(self, dictionary):
        
        values = list(self.input_mapping.values())

        values = np.array(values).reshape(-1,1)

        self.knn = KNN(n_neighbors=1, algorithm='kd_tree').fit(values)

        print("KNN fitted and ready to go!")

        del values
        

    def get_command(self, action_value):
        '''
        Each dictionary key = command_action ---> (keyDown_z) or (click_(100,60))
        Each dictionary value = value for that action ---> within range [-1, 1]

        Must return a tuple (command, action) --> ('keyDown', 'z') or ('click', '(100,60)')
        '''

        _, index = self.knn.kneighbors(action_value)

        for subarray in index:
            for i in subarray:
                command = self.keys_list[i]
        
        del index

        command = tuple(command.split('_')) # Command: keyDown_z -----> (keyDown, z) ; keyDown_shift_keyDown_z ----> (keyDown, shift, keyDown, z)

        return command

    def get_consequences(self, top, left, width, height, tesseract_config='--psm 8'):
        '''
        Used after Hakisa performed an input, in order to get its consequences(ex: score change, bombs, kills, deaths...).
        Returns a string according to Tesseract's OCR
        '''

        with mss() as sct:
            consequence = sct.grab(monitor={"top": top, "left": left, "width": width, "height": height})
            consequence = Image.frombytes("RGB", consequence.size, consequence.bgra)
        
        consequence = pytesseract.image_to_string(consequence, config=tesseract_config) 

        # OCR adds some strange characters(even with the whitelist function). Let's remove them.

        consequence = sub('[^A-Za-z0-9\/\.]', '', consequence) # Attention: 0, 1 and 8 can be seen as O, l and B.

        return consequence

    def create_memory(self, beta, frame, key, reward):
        '''
        Saves data in the memory list.
        Memory is saved in the format (frame, key, key_value, reward).

        Frame will be used as input during studying. key_value, as label.
        Key is saved for visualization, and reward works as weights(helps discarding bad decisions and saving good ones)

        Memory will only be changed once it reaches its full size.
        '''

        reward = reward * beta # Beta can be a constant value, like 1e-5

        key = '_'.join(key) # (command, action) ----> (command_action), like it's in the dictionary

        memory = (frame, key, self.input_mapping.get(key), reward) # A tuple makes each item in the list iterable.


        if len(self.memory) < self.memory_size:

            self.memory.append(memory)
        
        else:
            self.memory = sorted(self.memory, key=lambda x: x[3]) # Sorting list according to rewards values.
            self.memory.pop(0) # Removing the item with lowest reward value
            
            self.memory.append(memory)

    def create_data_for_study(self):

        # Creating dataset for studying

        inputs = [i[0].cpu() for i in self.memory]
        labels = [i[2] for i in self.memory]

        inputs = torch.cat(inputs, 0)
        labels = torch.cat(labels, 0)

        #inputs = np.stack(inputs, 0)
        #labels = np.stack(labels, 0)

        #inputs = torch.from_numpy(inputs).float()
        #labels = torch.from_numpy(labels).float()

        self.data = inputs.to(device)
        self.labels = labels.to(device)

        del inputs, labels

    def use_readymade_data(self, data, labels):
        # We aren't using data in time_steps mode, like we do for gifs, time series and forecasting in general.
        # I thought it might be a good idea to also train Hakisa with that.
        # This might also be the best way to train her in frames forecasting, as the process is probably too slow to be made while playing.

        self.data = data
        self.labels = labels


In [None]:
mouse_commands = ['click', 'move']
Xvalues = [i for i in range(0,10)]
Yvalues = [i for i in range(0, 10)]

commands = []

for x in Xvalues:
    for y in Yvalues:
        for cmd in mouse_commands:
            cmd = cmd + "_" + "(" + str(x) + "," + str(y) + ")"
            commands.append(cmd)

print(commands)

In [13]:
def conv2out(input, kernel, stride, padding):
    x = 2*padding
    y = 1*(kernel-1)
    z = (input + x - y - 1)/stride

    output = z + 1
    return output

In [None]:
print(conv2out(15, 2, 1, 0))

In [None]:
print(conv2out(160, 2, 2, 0))

In [67]:
class Hakisa(torch.nn.Module):

    def __init__(self, mode='Default'):

        super(Hakisa, self).__init__()

        self.mode = mode

        # This structure must be changed with the input size...unless you'd like to use adaptive pooling

        # Let's begin supposing that we're gonna use 1080x1920 RGB images ---> (3, 1080, 1920)

        # Output: always (Batch, 1)

        self.conv1 = torch.nn.Conv2d(3, 10, kernel_size=(3, 3), stride=(3,3), bias=False) # 360x640
        self.batchnorm1 = torch.nn.BatchNorm2d(10)
        self.conv2 = torch.nn.Conv2d(10, 25, kernel_size=(3, 4), stride=(3,4), bias=False) # 120x160
        self.batchnorm2 = torch.nn.BatchNorm2d(25)
        self.pool1 = torch.nn.MaxPool2d(kernel_size=(2, 2), stride=(2,2)) # 60x80
        self.conv3 = torch.nn.Conv2d(25, 50, kernel_size=(2, 2), stride=(2,2), bias=False) # 30x40
        self.batchnorm3 = torch.nn.BatchNorm2d(50)
        self.conv4 = torch.nn.Conv2d(50, 75, kernel_size=(2, 2), stride=(2,2), bias=False) # 15x20
        self.batchnorm4 = torch.nn.BatchNorm2d(75)
        self.pool2 = torch.nn.MaxPool2d(kernel_size=(4, 5), stride=(1,1)) # 12x16
        self.conv5 = torch.nn.Conv2d(75, 100, kernel_size=(3, 3), stride=(1,1), bias=False) # 10x14
        self.batchnorm5 = torch.nn.BatchNorm2d(100)
        self.conv6 = torch.nn.Conv2d(100, 75, kernel_size=(3, 3), stride=(1,1), bias=False) # 8x12
        self.batchnorm6 = torch.nn.BatchNorm2d(75)
        self.conv7 = torch.nn.Conv2d(75, 50, kernel_size=(3, 3), stride=(1,1), bias=False) # 6x10
        self.batchnorm7 = torch.nn.BatchNorm2d(50)
        self.conv8 = torch.nn.Conv2d(50, 25, kernel_size=(3, 3), stride=(1,1), bias=False) # 4x8
        self.neuron = torch.nn.Linear(25*4*8, 1, bias=False)

        self.PRelu = torch.nn.PReLU(1)
        #self.tanh = torch.nn.Tanh() # Reinforcement Learning algorithms tend to use softmax. But that would result in a quite big output size.
    


    def forward(self, input):

        if self.mode == "Forecast": # input: (Batch, frames_sequence, channels, height, width)

            batch_size = input.size(0)

            for frame_step in range(input.size(1)):

                if frame_step > 0:

                    previous_frame = input[batch_size//2:, frame_step-1]
                    
                    input_frame = input[:batch_size//2, frame_step]

                    input_frame = torch.cat((previous_frame, input_frame), 0)

                else:
                    input_frame = input[:, frame_step]

                x = self.conv1(input_frame)
                x = self.batchnorm1(x)
                x = self.PRelu(x)
                x = self.conv2(x)
                x = self.batchnorm2(x)
                x = self.PRelu(x)

                x = self.pool1(x)

                x = self.conv3(x)
                x = self.batchnorm3(x)
                x = self.PRelu(x)
                x = self.conv4(x)
                x = self.batchnorm4(x)
                x = self.PRelu(x)

                x = self.pool2(x)

                x = self.conv5(x)
                x = self.batchnorm5(x)
                x = self.PRelu(x)
                x = self.conv6(x)
                x = self.batchnorm6(x)
                x = self.PRelu(x)

                x = self.conv7(x)
                x = self.batchnorm7(x)
                x = self.PRelu(x)

                x = self.conv8(x)
                
                x = self.PRelu(x)

                x = x.view(x.size(0), -1)

                x = self.neuron(x)


                output = x

                del x

                return output

        else: # input: (Batch, channels, height, width)

            x = self.conv1(input)
            x = self.batchnorm1(x)
            x = self.PRelu(x)
            x = self.conv2(x)
            x = self.batchnorm2(x)
            x = self.PRelu(x)

            x = self.pool1(x)

            x = self.conv3(x)
            x = self.batchnorm3(x)
            x = self.PRelu(x)
            x = self.conv4(x)
            x = self.batchnorm4(x)
            x = self.PRelu(x)

            x = self.pool2(x)

            x = self.conv5(x)
            x = self.batchnorm5(x)
            x = self.PRelu(x)
            x = self.conv6(x)
            x = self.batchnorm6(x)
            x = self.PRelu(x)

            x = self.conv7(x)
            x = self.batchnorm7(x)
            x = self.PRelu(x)

            x = self.conv8(x)
            
            x = self.PRelu(x)

            x = x.view(x.size(0), -1)

            x = self.neuron(x)


            output = x

            del x

            return output


    def execute_command(self, command):
        '''
        Command must be a tuple (keyboard command|mouse command , key|coordinate)

        We could, however, make things more interesting by creating the possibility of having 2 different commands:
        one to select which command it will be(key down, key up, move mouse) and another to decide which key(Z, A, Q...)

        Make sure that all keys are lowered
        '''

        #print(command)

        #print(len(command))

        for i in range(len(command)//2): # This might do
            if "key" in command[2*i]:
                if "Up" in command[2*i]:
                    pyautogui.keyUp(command[(2*i)+1])
            
                elif "Down" in command[2*i]:
                    pyautogui.keyDown(command[(2*i)+1])

                else:
                    pyautogui.press(command[(2*i)+1])

            else:
                coordinates = command[(2*i)+1].replace('(', '').replace(')', '').split(',')
                
                if "move" in command[2*i]:
                    pyautogui.move(int(coordinates[0]), int(coordinates[1]))

                elif "rightClick" in command[2*i]:
                    pyautogui.rightClick(int(coordinates[0]), int(coordinates[1]))
                
                else:
                    pyautogui.click(int(coordinates[0]), int(coordinates[1]))

In [68]:
print(pyautogui.KEY_NAMES)

['\t', '\n', '\r', ' ', '!', '"', '#', '$', '%', '&', "'", '(', ')', '*', '+', ',', '-', '.', '/', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '<', '=', '>', '?', '@', '[', '\\', ']', '^', '_', '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '{', '|', '}', '~', 'accept', 'add', 'alt', 'altleft', 'altright', 'apps', 'backspace', 'browserback', 'browserfavorites', 'browserforward', 'browserhome', 'browserrefresh', 'browsersearch', 'browserstop', 'capslock', 'clear', 'convert', 'ctrl', 'ctrlleft', 'ctrlright', 'decimal', 'del', 'delete', 'divide', 'down', 'end', 'enter', 'esc', 'escape', 'execute', 'f1', 'f10', 'f11', 'f12', 'f13', 'f14', 'f15', 'f16', 'f17', 'f18', 'f19', 'f2', 'f20', 'f21', 'f22', 'f23', 'f24', 'f3', 'f4', 'f5', 'f6', 'f7', 'f8', 'f9', 'final', 'fn', 'hanguel', 'hangul', 'hanja', 'help', 'home', 'insert', 'junja', 'kana', 'kanji', 'launchapp1', 'launchapp2', 'launchmai

In [20]:
input_mapping = [
    'keyDown_up', 'keyUp_up',
    'keyDown_down', 'keyUp_down',
    'keyDown_left', 'keyUp_left',
    'keyDown_right', 'keyUp_right',
    'keyDown_z', 'keyUp_z',
    'keyDown_shift', 'keyUp_shift',
    'key_x'
]

In [71]:
dataset = Dataset(input_mapping, explore_train_steps=100, memory_size=100)

KNN fitted and ready to go!


In [22]:
print(dataset.input_mapping)

{'keyDown_up': -1.0, 'keyUp_up': -0.8333333333333334, 'keyDown_down': -0.6666666666666667, 'keyUp_down': -0.5, 'keyDown_left': -0.33333333333333337, 'keyUp_left': -0.16666666666666663, 'keyDown_right': 0.0, 'keyUp_right': 0.16666666666666674, 'keyDown_z': 0.33333333333333326, 'keyUp_z': 0.5, 'keyDown_shift': 0.6666666666666667, 'keyUp_shift': 0.8333333333333333, 'key_x': 1.0}


In [69]:
hakisa = Hakisa().to(device)

In [70]:
from torchsummary import summary

summary(hakisa, (3, 1080, 1920))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 10, 360, 640]             270
       BatchNorm2d-2         [-1, 10, 360, 640]              20
         LeakyReLU-3         [-1, 10, 360, 640]               0
            Conv2d-4         [-1, 25, 120, 160]           3,000
       BatchNorm2d-5         [-1, 25, 120, 160]              50
         LeakyReLU-6         [-1, 25, 120, 160]               0
         MaxPool2d-7           [-1, 25, 60, 80]               0
            Conv2d-8           [-1, 50, 30, 40]           5,000
       BatchNorm2d-9           [-1, 50, 30, 40]             100
        LeakyReLU-10           [-1, 50, 30, 40]               0
           Conv2d-11           [-1, 75, 15, 20]          15,000
      BatchNorm2d-12           [-1, 75, 15, 20]             150
        LeakyReLU-13           [-1, 75, 15, 20]               0
        MaxPool2d-14           [-1, 75,

In [10]:
def preprocess_Jigoku(score):
    # For the game Jigoku Kisetsukan: Sense of the Seasons

    score = score.replace('S', '5').replace('s', '8').replace('e', '2').replace('O', '0').replace('B', '8').replace('o', '4').replace('b', '4')
    score = score.replace('I', '1').replace('l', '1').replace('.', '')

    try:
        score = float(score)

    except ValueError:
            score = 1.0

    return score

In [24]:
# Exploration loop

from time import time

start = time()

sleep(5)

winsound.PlaySound('D:/Python/Audio/English/chiara_hacking_1_en.wav', winsound.SND_FILENAME) # Just to know if everything's ok

for step in range(dataset.steps):

    frame = dataset._grab_frame()

    command = hakisa(frame)

    command = dataset.get_command(command.detach().cpu().numpy())

    hakisa.execute_command(command)

    # If Hakisa acts too fast, the consequence for her action might not appear right now. Not that I wouldn't want her to be fast and efficient...

    score = dataset.get_consequences(1008, 1429, 1723-1429, 1046-1008, tesseract_config='--psm 8')

    score = preprocess_Jigoku(score)

    mult_score = dataset.get_consequences(933, 1536, 1723-1536, 978-933, tesseract_config='--psm 8')

    mult_score = preprocess_Jigoku(mult_score)

    life = dataset.get_consequences(849, 400, 498-400, 904-849, tesseract_config='--psm 8')

    life = preprocess_Jigoku(life)

    power = dataset.get_consequences(923, 405, 503-405, 978-923, tesseract_config='--psm 8')

    power = preprocess_Jigoku(power)

    aura = dataset.get_consequences(1001, 400, 1045-1001, 503-400, tesseract_config='--psm 8')

    aura = preprocess_Jigoku(aura)
    aura = aura/100

    if life == 0:

        try:
                reward = -(100/(score * mult_score))
        
        except ZeroDivisionError:
                reward = -10

    else:

        reward = (score * mult_score) + (power * aura)


    #consequence = 0.0 # for testing

    dataset.create_memory(1e-3, frame, command, reward)

    if step == (dataset.steps - 1):

        winsound.PlaySound(f'D:/Python/Audio/English/chiara_craftEpic_1_en.wav', winsound.SND_FILENAME) # Because yes

end = time()
print(f"Loop complete!\nTime spent: {end-start} seconds")

del frame, command, score, mult_score, life, power, aura, reward, step, start, end

winsound.PlaySound(f'D:/Python/Audio/English/chiara_rest_1_en.wav', winsound.SND_FILENAME)

Loop complete!
Time spent: 105.62340354919434 seconds


In [None]:
# Unfortunately, this is where we got the CUDA RuntimeError. Try using resize in Dataset creator and adjust Hakisa accordingly.

dataset.create_data_for_study()
dataloader = torch.utils.data.DataLoader(dataset, batch_size=4, shuffle=True)

costs = []
learning_rate = []
grads = []
grad_clip = None
save_path = None
start_epoch = 0

In [None]:
# In case you want to continue the studying phase.

params = torch.load(f'{save_path}/Hakisa_checkpoint.tar')
start_epoch = params['Epoch'] + 1
hakisa.load_state_dict(params['Hakisa_params'])
lr = params['Hakisa_LR']

del params

In [None]:
# Studying loop - Classic supervised learning. Will help Hakisa try to create certain patterns for situations and her reactions.

import os

optimizer = torch.optim.Adam(hakisa.parameters(), lr=1)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 50, gamma=0.1)

study_loss = torch.nn.MSELoss()

for epoch in range(dataset.steps):

    for i, (input_frame, labels) in enumerate(dataloader):

        hakisa.zero_grad()

        output_command = hakisa(input_frame)

        loss = study_loss(output_command, labels)

        loss.backward()

        optimizer.step()

        for n, p in hakisa.named_parameters():

                if 'neuron.weight' in n:
                    grads.append(torch.mean(p.grad))

                if grad_clip is not None:
                    p.register_hook(lambda grad: torch.clamp(grad, -grad_clip, grad_clip))

        best_loss = float('inf')

        if loss.item() < best_loss:

            best_loss = loss.item()
            best_params = hakisa.state_dict()

        #if i % checkpoint == 0:

    if i % 5 == 0:
        print(f"{epoch}/{dataset.steps}")
        print(f"Best Discriminator Loss: {best_loss}\tCurrent LR: {scheduler.get_last_lr()[0]}\tGradients Average: {grads[-1]}")

        if save_path is None:
            try:
                os.mkdir("Hakisa")
                save_path = "Hakisa"
            except:
                save_path = "Hakisa"
                
        torch.save({
            'Epoch': epoch,
            'Hakisa_params': best_params,
            'Hakisa_LR': scheduler.get_last_lr()[0]
        }, f"{save_path}/Hakisa_checkpoint.tar")

        print("Model saved!")

    scheduler.step()

In [None]:
class GameplayLoss(torch.autograd.Function):
    """
    Gameplay Loss function. Aims to allow backpropagation through scores and, thus, allow
    optimization aiming to achieve best gameplay performance.

    Have yet to be tested. The idea is to be used during normal playthrough, after study mode.

    Args:

        model_output: the output generated by the model. Necessary for backpropagation.
        reward: the reward obtained. Simply as that. Must have the same size as model_output (try .unsqueeze(-1))

    In most games, the lowest possible score one can achieve is 0, while the best score possible is infinite.
    So we could simply define a function whose range is [0, inf[ and go on with that.

    In order to avoid great numbers and possible likelihood of absurd exploding gradients, we'll use a simple log function and its derivative.
    This can make the function bad for optimization when the performance is too good, but might give it some help when it's too low.
    
    Afterall, it's easier to get better when you're bad than when you're a pro.

    Of course, there are many possible ways to achieve certain score.
    In Jigoku Kisetsukan, to achieve a score of 10,000, you can kill enemies, or simply scrape through bullets without killing anyone.

    However, I'm still interested in giving it a chance.

    PS: Remember that, when reward < 1, log(reward) will be negative.
    """

    @staticmethod
    def forward(ctx, model_output, reward):

        if reward == 0:
            reward = 1e-10

        ctx.save_for_backward(reward)
        
        reward = torch.log(reward)

        return reward

    @staticmethod
    def backward(ctx, grad_output):

        reward, = ctx.saved_tensors

        reward = 1/reward

        reward = torch.tensor(reward, device=device)

        return grad_output * reward, None

In [None]:
# Playing loop - She learns as she plays...if our GameplayLoss function actually works.

import keyboard

winsound.PlaySound(f'D:/Python/Audio/English/chiara_learnweaponskill_Rapier_2_en.wav', winsound.SND_FILENAME)

rewards = []
learning_rate = []
grads = []
grad_clip = None
save_path = None
steps = 0
save_point = 0

gameplay_loss = GameplayLoss()

while keyboard.is_pressed('esc') == False: # Exit loop when Esc is pressed

    #hakisa.eval()
    hakisa.zero_grad()

    frame = dataset._grab_frame()

    command_value = hakisa(frame)

    command = dataset.get_command(command_value)

    hakisa.execute_command(command)

    del command

    score = dataset.get_consequences(1008, 1429, 1723-1429, 1046-1008, tesseract_config='--psm 8')

    score = preprocess_Jigoku(score)

    mult_score = dataset.get_consequences(933, 1536, 1723-1536, 978-933, tesseract_config='--psm 8')

    mult_score = preprocess_Jigoku(mult_score)

    life = dataset.get_consequences(849, 400, 498-400, 904-849, tesseract_config='--psm 8')

    life = preprocess_Jigoku(life)

    power = dataset.get_consequences(923, 405, 503-405, 978-923, tesseract_config='--psm 8')

    power = preprocess_Jigoku(power)

    aura = dataset.get_consequences(1001, 400, 1045-1001, 503-400, tesseract_config='--psm 8')

    aura = preprocess_Jigoku(aura)
    aura = aura/100

    if life == 0:

        try:
            reward = -(100/(score * mult_score))
        
        except ZeroDivisionError:
            reward = -10

    else:

        reward = (score * mult_score) + (power * aura)

    del score, mult_score, power, aura, life

    reward = reward.unsqueeze(-1) # Make sure reward.size() == command_value.size(). Otherwise, you'll get an error.

    loss = gameplay_loss.apply(command_value, reward)

    loss.backward()

    optimizer.step()

    for n, p in hakisa.named_parameters():

            if 'neuron.weight' in n:
                grads.append(torch.mean(p.grad))

            if grad_clip is not None:
                p.register_hook(lambda grad: torch.clamp(grad, -grad_clip, grad_clip))

    best_loss = 0.0

    if loss.item() > best_loss:

        best_loss = loss.item()
        best_params = hakisa.state_dict()

    steps += 1

    if steps % save_point == 0:

        torch.save({
            'Steps': steps,
            'Hakisa_params': best_params,
            'Hakisa_LR': scheduler.get_last_lr()[0]
        }, f"{save_path}/Hakisa_checkpoint.tar")


        winsound.PlaySound(f'D:/Python/Audio/English/chiara_craftEpic_1_en', winsound.SND_FILENAME)