In [None]:
from mss import mss
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.backends.cudnn as cudnn
from tensorflow.keras.utils import to_categorical
from PIL import Image
from time import sleep
import winsound
import pickle
#import pyautogui
import keyboard
import mouse
import pytesseract
from re import sub

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
cudnn.benchmark = True

Here, Hakisa will be composed of 3 parts:

1) Feature extraction -> Extracts features from the input
2) Pseudo-labeling -> generates a pseudo-label according to the features extracted. Those labels will try to define the best command for that state(input)
3) Action selection -> Generates a command (command_type, action1, action2).

What's the difference? There's no Exploration Mode, only recorded gameplay.

In Study Mode, recorded gameplay is passed as input to Hakisa. There's no human labeled data. Pseudo-labels will be generated according to the length of each action argument,
which will be used as targets for the loss function.
The input for the loss function is the action generated in the end.

So, Study Mode will be:
Input --------------> Feature Extraction

Feature Extraction ------------> Pseudo-labels
Feature Extraction ------------> Command

Loss = CrossEntropy(Command, Pseudo-labels)

To consider: instead of using classes for pseudo-labels, actually using vectors.

In Play Mode, optimization will still be coordinated by predicted reward and actual (cumulative) reward (TD-Learning)

In [None]:
class Dataset(torch.utils.data.Dataset):
    '''
    Generates input maps and commands for Hakisa.

    Remember: command_types = list of strings, actions1 and 2 = list of strings(keyboard), X coordinates or None(mouse)
    '''
    
    def __init__(
        self,
        command_types = None,
        actions1 = None,
        actions2 = None,
        top=0,
        left=0,
        width=1920,
        height=1080,
        resize=None
    ):

        # Window resolutions for the screen grabber
        self.top = top
        self.left = left
        self.width = width
        self.height = height

        self.resize = resize # For reducing the images. Must be a tuple (Height, Width)

        self.data = None # This will be created during training. However, it's possible to load a ready-made data for training.

        # Initially, we'll be using lists. After our vector embedding has been properly trained, we'll create a dictionary
        # of input mappings with it -----> Here, we won't be doing this as we'll actually be testing the softmax and a more traditional approach.

        self.command_type = command_types
        self.actions1 = actions1
        self.actions2 = actions2


    # Pytorch's Dataset functions will only be used in Studying mode
    def __getitem__(self, idx):

        inputs = self.data[idx]
        return inputs


    def __len__(self):

        return len(self.data)


    def grab_frame(self):
        # Unfortunately, this whole operation takes about 0.6 seconds, so we'll probably have to deal with a single frame each 1~3 seconds.
        with mss() as sct:
            frame = sct.grab(monitor={"top": self.top, "left": self.left, "width": self.width, "height": self.height})
            frame = Image.frombytes("RGB", frame.size, frame.bgra, 'raw', 'BGRX')

            if self.resize:
                frame = frame.resize(self.resize)

            frame = np.array(frame, dtype=np.float32)

            frame = torch.from_numpy(frame)
        
        frame = frame.view(1, frame.size(2), frame.size(0), frame.size(1)).to(device) # (Batch, Channels, Height, Width)

        return frame


    def get_command(self, cmd_type, action1, action2):
        '''
        Hakisa's output: (command_type, action1, action2) ----> (key, Down, z) or (click, 100, 60)
        command_type, action1 and action2 are the argmax output from a logsoftmax function and will be used as index for their respectives lists.
        '''

        cmd_type = self.command_type[cmd_type]
        action1 = self.actions1[action1]
        action2 = self.actions2[action2]

        command = (cmd_type, action1, action2)

        del cmd_type, action1, action2

        return command

    def get_consequences(self, top, left, width, height, togray=False, threshold=False, thresh_gauss=171, thresh_C=13, tesseract_config='--psm 8'):
        '''
        Used after Hakisa performed an input, in order to get its consequences(ex: score change, bombs, kills, deaths...).
        Returns a string according to Tesseract's OCR.
        '''

        with mss() as sct:
            consequence = sct.grab(monitor={"top": top, "left": left, "width": width, "height": height})

            consequence = Image.frombytes("RGB", consequence.size, consequence.bgra, 'raw', 'BGRX')

        if togray is True:

            consequence = consequence.convert("P") # Sometimes, simply converting to grayscale is enough

            if threshold is True:
                if "ADAPTIVE_THRESH_GAUSSIAN_C" and "adaptiveThreshold" and "THRESH_BINARY" not in dir():
                    from cv2 import adaptiveThreshold, ADAPTIVE_THRESH_GAUSSIAN_C, THRESH_BINARY

                consequence = adaptiveThreshold(np.array(consequence),255,ADAPTIVE_THRESH_GAUSSIAN_C, THRESH_BINARY,thresh_gauss,thresh_C)
                consequence = Image.fromarray(consequence)
        
        consequence = pytesseract.image_to_string(consequence, config=tesseract_config) 

        # OCR adds some strange characters(even with the whitelist function). Let's remove them.

        consequence = sub('[^A-Za-z0-9/.]', '', consequence) # Attention: 0, 1 and 8 can be seen as O, l and B.

        return consequence

    def create_data(self, data, commands):

        self.data = data


    def record_gameplay(self, number_of_screenshots, screenshot_delay, grayscale=False, resize=False, path=None):

        # Resizing and grayscaling isn't really necessary here, but can save you some time later.
        # Both saving you from writing more code and from making your hardware having to process more and more data at once.

        print(f"Ok. Screenshot capture will begin in 5 seconds")

        sleep(5)

        winsound.PlaySound('D:/Python/Audio/English/chiara_hacking_1_en.wav', winsound.SND_FILENAME) # Just to know if everything's ok

        for i in range(number_of_screenshots):

            with mss() as sct:

                frame = sct.grab(monitor={"top": self.top, "left": self.left, "width": self.width, "height": self.height})
                frame = Image.frombytes("RGB", frame.size, frame.bgra, 'raw', 'BGRX')

            if grayscale:

                frame = frame.convert('L')

            if resize:

                frame = frame.resize(self.resize)

            frame.save(f"{path}/{i+2000}.png")

            sleep(screenshot_delay)
        
        print("Screenshot capture finished!")

        winsound.PlaySound('D:/Python/Audio/English/chiara_hacking_1_en.wav', winsound.SND_FILENAME)

In [None]:
# Jigoku Kisetsukan

command_type = ['key']

actions1 = ['Down', 'Up']

actions2 = ['up', 'down', 'left', 'right', 'z', 'x', 'shift']

In [None]:
dataset = Dataset(command_types=command_type, actions1=actions1, actions2=actions2, resize=(200, 200))

In [None]:
class FeatureExtractor(torch.nn.Module):

    def __init__(self):

        super(FeatureExtractor, self).__init__()

        self.conv1 = torch.nn.Conv2d(3, 100, kernel_size=3, stride=1, padding=1, bias=False)
        self.batchnorm1 = torch.nn.BatchNorm2d(100)
        self.conv2 = torch.nn.Conv2d(100, 100, kernel_size=3, stride=1, padding=1, bias=False) # 200x200
        self.batchnorm2 = torch.nn.BatchNorm2d(100)
        self.conv3 = torch.nn.Conv2d(100, 200, kernel_size=3, stride=1, padding=1, bias=False) # 100x100
        self.batchnorm3 = torch.nn.BatchNorm2d(200)
        self.conv4 = torch.nn.Conv2d(200, 200, kernel_size=3, stride=1, padding=1, bias=False) # 100x100
        self.batchnorm4 = torch.nn.BatchNorm2d(200)
        # Add pool 2x2 ---> 50x50
        self.conv5 = torch.nn.Conv2d(200, 400, kernel_size=3, stride=1, padding=1, bias=False) # 50x50
        self.batchnorm5 = torch.nn.BatchNorm2d(400)
        self.conv6 = torch.nn.Conv2d(400, 400, kernel_size=3, stride=1, padding=1, bias=False) # 50x50
        self.batchnorm6 = torch.nn.BatchNorm2d(400)
        # Add pool 2x2 ---> 25x25
        self.conv7 = torch.nn.Conv2d(400, 800, kernel_size=4, stride=1, bias=False) # 22x22
        self.batchnorm7 = torch.nn.BatchNorm2d(800)
        self.conv8 = torch.nn.Conv2d(800, 1000, kernel_size=3, stride=1, bias=False) # 20x20
        self.batchnorm8 = torch.nn.BatchNorm2d(1000)
        # Add pool 2x2 ---> 10x10
        self.conv9 = torch.nn.Conv2d(1000, 800, kernel_size=3, stride=1, padding=1, bias=False) # 10x10
        self.batchnorm9 = torch.nn.BatchNorm2d(800)
        self.conv10 = torch.nn.Conv2d(800, 400, kernel_size=3, stride=1, padding=1, bias=False) # 10x10
        self.batchnorm10 = torch.nn.BatchNorm2d(400)
        # Add pool 2x2 ---> 5x5
        self.neuron1 = torch.nn.Linear(400*5*5, 200*2*2, bias=False)
        self.layer_norm1 = torch.nn.LayerNorm(200*2*2)

        self.PRelu = torch.nn.PReLU()
        self.pool2x2 = torch.nn.MaxPool2d(kernel_size=2, stride=2)
        self.dropout = torch.nn.Dropout2d(0.25)

    def forward(self, input):

        x = self.conv1(input)
        x = self.batchnorm1(x)
        x = self.PRelu(x)
        x = self.conv2(x)
        x = self.batchnorm2(x)
        x = self.PRelu(x)

        x = self.dropout(x)
        x = self.pool2x2(x)

        x = self.conv3(x)
        x = self.batchnorm3(x)
        x = self.PRelu(x)
        x = self.conv4(x)
        x = self.batchnorm4(x)
        x = self.PRelu(x)

        x = self.dropout(x)
        x = self.pool2x2(x)

        x = self.conv5(x)
        x = self.batchnorm5(x)
        x = self.PRelu(x)
        x = self.conv6(x)
        x = self.batchnorm6(x)
        x = self.PRelu(x)

        x = self.dropout(x)
        x = self.pool2x2(x)

        x = self.conv7(x)
        x = self.batchnorm7(x)
        x = self.PRelu(x)
        x = self.conv8(x)
        x = self.batchnorm8(x)
        x = self.PRelu(x)

        x = self.dropout(x)
        x = self.pool2x2(x)

        x = self.conv9(x)
        x = self.batchnorm9(x)
        x = self.PRelu(x)
        x = self.conv10(x)
        x = self.batchnorm10(x)
        x = self.PRelu(x)
        
        x = self.dropout(x)
        x = self.pool2x2(x)

        x = x.view(x.size(0), -1) # (batch, 400*5*5)

        x = self.neuron1(x) # (batch, 200*2*2)
        x = self.layer_norm1(x)
        output = self.PRelu(x)

        return output

In [None]:
class Teacher(torch.nn.Module):

    def __init__(self, command_types, actions1, actions2):

        super(Teacher, self).__init__()

        self.command_types = len(command_types)
        self.actions1 = len(actions1)
        self.actions2 = len(actions2)

        # Remember: The features have been properly extracted into a tensor with size (Batch, 200*2*2)

        self.neuron_type = torch.nn.Linear(200*2*2, self.command_types, bias=False)
        self.neuron_action1 = torch.nn.Linear(200*2*2, self.actions1, bias=False)
        self.neuron_action2 = torch.nn.Linear(200*2*2, self.actions2, bias=False)
        
        self.pool2x2 = torch.nn.MaxPool2d(kernel_size=2, stride=2)
        self.PRelu = torch.nn.PReLU()
        self.softmax = torch.nn.LogSoftmax(-1)

    def forward(self, features):

        command_type = self.neuron_type(features)
        command_type = self.softmax(command_type)

        action1 = self.neuron_action1(features)
        action1 = self.softmax(action1)
        
        action2 = self.neuron_action2(features)
        action2 = self.softmax(action2)

        return (command_type, action1, action2)

In [None]:
class Hakisa(torch.nn.Module):

    def __init__(self, command_types, actions1, action1_dims, actions2, action2_dims, mode='Study'):

        super(Hakisa, self).__init__()

        self.mode = mode

        self.command_types = len(command_types)
        self.actions1 = len(actions1)
        self.action1_dims = action1_dims # How many dimensions the embedding matrix will have
        self.actions2 = len(actions2)
        self.action2_dims = action2_dims

        self.feature_extractor = FeatureExtractor()
        self.teacher = Teacher(command_types, actions1, actions2)

        # Features extracted. 

        if len(self.command_types) > 1:

            self.embed_command_type = torch.nn.Embedding(len(self.command_types), len(self.command_types)) # To be concatenated to features extracted from state

            self.neuron_command_study = torch.nn.Linear(200*2*2, len(self.command_types), bias=False)
            self.neuron_command_play = torch.nn.Linear(1200, len(self.command_types), bias=False)

            # Considering the command_type that has been predicted, what should be the action1 and action2?

            self.neuron2 = torch.nn.Linear(len(self.command_types), 100, bias=False)

        else:
        
            self.neuron2_study = torch.nn.Linear(200*2*2, 100, bias=False)
            self.neuron2_play = torch.nn.Linear(1000, 100, bias=False)

        self.neuron_quality = torch.nn.Linear(1, 100, bias=False)

        self.layer_norm2 = torch.nn.LayerNorm(100)

        self.embed_action1 = torch.nn.Embedding(self.actions1, self.action1_dims)
        self.neuron_action1A = torch.nn.Linear(self.actions1*self.action1_dims, 100, bias=False) # To be concatenated with neuron 2
        self.neuron_action1B = torch.nn.Linear(100*2, self.actions1, bias=False) # Apply softmax

        self.embed_action2 = torch.nn.Embedding(self.actions2, self.action2_dims)
        self.neuron_action2A = torch.nn.Linear(self.actions2*self.action2_dims, 100, bias=False) # To be concatenated with neuron 2
        self.neuron_action2B = torch.nn.Linear(100*2, self.actions2, bias=False)

        self.neuron_reward = torch.nn.Linear(1, 100, bias=False)
        self.layer_normcat = torch.nn.LayerNorm(1000)
        self.neuron_predquality = torch.nn.Linear(self.command_types+self.actions1+self.actions2, 3, bias=False)
        self.neuron_predreward1 = torch.nn.Linear(self.command_types+self.actions1+self.actions2, 100, bias=False)
        self.neuron_predreward2 = torch.nn.Linear(100, 1, bias=False)

        self.PRelu = torch.nn.PReLU(1)

        self.softmax = torch.nn.LogSoftmax(-1) # Since we're using softmax here, use NLLLoss during study and play mode.
    

    def forward(self, input_frame, previous_action=None, previous_reward=None):

        features = self.feature_extractor(input_frame)

        if self.mode == "Study":

            labelsA = self.teacher(features)
            labelsB = self.teacher(features) # To avoid having to iterate through everything 2 times in a single batch.

            if self.command_types > 1:

                vector = self.embed_command_type(torch.arange(self.command_types, device=device)) # (Batch, 1, n_commands)
                vector = vector.view(features.size(0), -1) # (batch, n_commands)

                command_type = self.neuron_command_study(features) # (Batch, n_commands)

                command_type = command_type + vector

                del vector

                command_type = self.softmax(command_type) # (Batch, n_commands)

                x = self.neuron2(command_type) # (Batch, 100)

            else:
                command_type = torch.zeros((features.size(0), 1), device=device)

                x = self.neuron2_study(features) # (Batch, 100)

            x = self.layer_norm2(x)
            x = self.PRelu(x)

            vector = self.embed_action1(torch.arange(self.actions1, device=device)) # (Batch, 1, n_action1_dims)
            vector = vector.view(x.size(0), -1) # (Batch, n_action1_dims)
            vector = self.neuron_action1A(vector) # (Batch, 100)

            vector = torch.cat((x, vector), -1) # (Batch, 100*2)

            action1 = self.neuron_action1B(vector) # (Batch, len(actions1))

            action1 = self.softmax(action1)

            vector = self.embed_action2(torch.arange(self.actions2, device=device)) # (Batch, 1, n_action2_dims)
            vector = vector.view(x.size(0), -1) # (Batch, n_action2_dims)
            vector = self.neuron_action2A(vector) # (Batch, 100)

            vector = torch.cat((x, vector), -1) # (Batch, 100*2)

            action2 = self.neuron_action2B(vector) # (Batch, 1)

            action2 = self.softmax(action2)

            del vector, x

            return (command_type, action1, action2), labelsA, labelsB

        else:

            if previous_action==None and previous_reward==None: # For first iteration

                previous_action = (torch.zeros((1), device=device), torch.zeros(1, device=device), torch.zeros(1, device=device))
                previous_reward = torch.zeros((features.size(0), 1), device=device)

            a, b, c = previous_action

            previous_action = a + b + c

            del a, b, c

            previous_action = previous_action.unsqueeze(0) # (Batch, actions_concatenated)

            previous_action = self.neuron_quality(previous_action) # (batch, 100)
            previous_reward = self.neuron_reward(previous_reward) # (batch, 100)

            x = torch.cat((features, previous_action, previous_reward), 1) # (batch, 1000)

            x = self.layer_normcat(x)
            x = self.PRelu(x)

            if self.command_types > 1:

                vector = self.embed_command_type(torch.arange(self.command_types, device=device)) # (Batch, 1, n_commands)
                vector = vector.view(x.size(0), -1) # (batch, n_commands)

                command_type = self.neuron_command_play(x)
                command_type = command_type + vector

                del vector

                command_type = self.softmax(command_type) # (Batch, n_commands)

                x = self.neuron2(command_type)

            else:
                command_type = torch.zeros((x.size(0), 1), device=device)

                x = self.neuron2_play(x) # (Batch, 100)

            x = self.layer_norm2(x)
            x = self.PRelu(x)

            vector = self.embed_action1(torch.arange(self.actions1, device=device)) # (Batch, 1, n_action1_dims)
            vector = vector.view(x.size(0), -1) # (Batch, n_action1_dims)
            vector = self.neuron_action1A(vector) # (Batch, 100)

            vector = torch.cat((x, vector), -1) # (Batch, 100*2)

            action1 = self.neuron_action1B(vector) # (Batch, len(actions1))

            action1 = self.softmax(action1) # Since each action here have size (1, len(actions1)), access each action with actions1[torch.argmax(action1, -1)]

            vector = self.embed_action2(torch.arange(self.actions2, device=device)) # (Batch, 1, n_action2_dims)
            vector = vector.view(x.size(0), -1) # (Batch, n_action2_dims)
            vector = self.neuron_action2A(vector) # (Batch, 100)

            vector = torch.cat((x, vector), -1) # (Batch, 100*2)

            action2 = self.neuron_action2B(vector) # (Batch, len(actions2))

            action2 = self.softmax(action2)

            del vector

            x = torch.cat((command_type, action1, action2), 1) # (Batch, len(command_type) + len(action1) + len(action2))

            # Attention: .detach() here causes all the previous layers to be excluded from backpropagation.
            # This happens because the backpropagation is based on predicted_reward only.

            command_quality = self.neuron_predquality(x) # (Batch, 3)
            command_quality = self.softmax(command_quality)

            x = self.neuron_predreward1(x)
            predicted_reward = self.neuron_predreward2(x)

            del x

            return (command_type, action1, action2), command_quality, predicted_reward


    def execute_command(self, command):
        '''
        Command must be a tuple(command_type, action1, action2), where:

            command_type: key(keyboard) or move, rightClick, click(mouse)
            action1: Up, Down, press(keyboard), X coordinate(mouse) or None(no mouse movement)
            action2: 'a', 'z', 'shift'...(keyboard), Y coordinate(mouse) or None(no mouse movement)

        Make sure all key actions(action2) are lowered.

        Have in mind that Hakisa might output command_type 'key' and action1 that is equivalent to a mouse action.
        '''

        if "key" in command[0]:

            try:
                
                if "Up" in command[1]:

                    try:
                        #pyautogui.keyUp(command[2])
                        keyboard.release(command[2])
                
                    except:
                        pass # If Hakisa predicts a mouse action for a keyboard command, she won't do anything.

                elif "Down" in command[1]:

                    try:
                        #pyautogui.keyDown(command[2])
                        keyboard.press(command[2])

                    except:
                        pass

                elif "press" in command[1]:

                    try:
                        keyboard.send(command[2]) # Some games won't work with pyautogui.press(), so use keyboard module, since we'll import it for Play Mode.
                    
                    except:
                        pass

                else:
                    pass
            
            except:

                pass # If Hakisa predicts a keyboard command, but outputs a mouse action, she won't do anything.

        elif "move" in command[0]:

            try:
                #pyautogui.moveTo(command[1], command[2], duration=0.19) # Duration = 0.19 seconds to be more realistic
                mouse.move(command[1], command[2], duration=0.1)

            except:
                pass # If Hakisa predict a mouse command, but outputs a keyboard action, she won't do anything.

        elif "rightclick" in command[0]:
            
            try:
                mouse.move(command[1], command[2], duration=0.1)
                mouse.right_click()
            except:
                pass

        elif "click" in command[0]:

            try:
                #pyautogui.moveTo(command[1], command[2], duration=0.19)
                mouse.move(command[1], command[2], duration=0.1)
                mouse.click() # Same case as press. Use mouse module.
            except:
                pass

        else:

            raise ValueError # It was probably you who made a mistake.

In [None]:
hakisa = Hakisa(command_types=command_type, actions1=actions1, action1_dims=1, actions2=actions2, action2_dims=1, mode='Study').to(device)

In [None]:
print(dataset.command_type)
print(dataset.actions1)
print(dataset.actions2)

In [None]:
import os

images_by_order = []

for directory, _, files in os.walk("D:/Python/Projects/Hakisa/Hakisa/JK_gameplay/"):

    for file in files:

        file = file.split('.')
        file = file[0] # Getting exclusively the number

        images_by_order.append(file)

images_by_order = sorted([int(x) for x in images_by_order])

# Problem: for strings, Python considers that 1000 < 2. Maybe something related to how the string is assembled?

images_data = []

for i in images_by_order[0:100]:

    i = directory + '/' + str(i) + '.png'
    image = Image.open(i)
    image = image.resize((200, 200))
    array = np.array(image, dtype=np.float32)
    image.close()
    images_data.append(array)

images_data = np.stack(images_data, 0)
images_data = torch.from_numpy(images_data)
images_data = images_data.view(images_data.size(0), images_data.size(3), images_data.size(1), images_data.size(2))

In [None]:
dataset.create_data(images_data)

In [None]:
optimizer = torch.optim.Adam(hakisa.parameters(), lr=1e-3)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1000, gamma=0.1)
loss = torch.nn.NLLLoss()
best_loss = float('inf')
grads = []

dataloader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=True)

epochs = 100

In [None]:
# Studying loop - Self-Learning

# Can be applied to any game

for epoch in range(epochs):

    for i, input_frame in enumerate(dataloader):

        input_frame = input_frame.to(device)

        hakisa.zero_grad()

        cmds, pseudo_pred, pseudo_labels = hakisa(input_frame)

        del input_frame

        command_type, command_type_pred, command_type_target = cmds[0], pseudo_pred[0], pseudo_labels[0].detach().argmax()
        action1, action1_pred, action1_target = cmds[1], pseudo_pred[1], pseudo_labels[1].detach().argmax()
        action2, action2_pred, action2_target = cmds[2], pseudo_pred[2], pseudo_labels[2].detach().argmax()

        # Used to promote consistency in the teacher network ---> Consider using an output EMA, or even Hakisa's gradients EMA.

        teacher_type_loss = loss(command_type_pred, command_type_target)
        teacher_action1_loss = loss(action1_pred, action1_target)
        teacher_action2_loss = loss(action2_pred, action2_target)

        command_type_loss = loss(command_type, command_type_target)
        action1_loss = loss(action1, action1_target)
        action2_loss = loss(action2, action2_loss)

        command_type_cost = (teacher_type_loss * (1 - (0.99**epoch))) + command_type_loss # Teacher loss influence must grow over time. 0.99 decays faster than 0.999.
        action1_cost = (teacher_action1_loss * (1 - (0.99**epoch))) + action1_loss
        action2_cost = (teacher_action2_loss * (1 - (0.99**epoch))) + action2_loss

        study_loss = command_type_cost + action1_cost + action2_loss

        study_loss.backward()

        optimizer.step()

        for n, p in hakisa.named_parameters():

            if 'neuron1.weight' in n:
                grads.append(torch.mean(p.grad))

        best_loss = float('inf')

        if study_loss.item() < best_loss and epoch > 0:

            best_loss = study_loss.item()
            best_params = hakisa.state_dict()

        if i % dataset.memory_size == 0:
            print(f"{epoch}/{epochs}")
            print(f"Best Loss: {best_loss}\tCurrent LR: {scheduler.get_last_lr()[0]}\tGradients Average: {grads[-1]}")
            print(f"Teacher Type Loss: {teacher_type_loss.item()}\tAction1 Loss: {teacher_action1_loss.item()}\tAction2 Loss: {teacher_action2_loss.item()}")
            print(f"Hakisa Type Loss: {command_type_loss.item()}\tAction1 Loss: {action1_loss.item()}\tAction2 Loss: {action2_loss.item()}")
            print(f"Command Type Cost: {command_type_cost.item()}\tAction1 Cost: {action1_cost.item()}\tAction2 Cost: {action2_cost.item()}\nStudy Loss: {study_loss.item()}")

            if save_path is None:
                try:
                    os.mkdir("Hakisa")
                    save_path = "Hakisa"
                except:
                    save_path = "Hakisa"
                    
            torch.save({
                'Epoch': epoch,
                'Hakisa_params': best_params,
                'Hakisa_LR': scheduler.get_last_lr()[0]
            }, f"{save_path}/Hakisa_checkpoint.tar")

            print("Model saved!")

    scheduler.step()

In [None]:
def preprocess_Jigoku(score):
    # For the game Jigoku Kisetsukan: Sense of the Seasons

    score = score.replace('S', '5').replace('s', '8').replace('e', '2').replace('O', '0').replace('B', '8').replace('o', '4').replace('b', '4')
    score = score.replace('I', '1').replace('l', '1').replace('.', '')

    try:
        score = float(score)

    except ValueError:
            score = 1.0

    return score

In [None]:
# Playing loop - She learns as she plays
# Jigoku Kisetsukan

import warnings

warnings.simplefilter('ignore')

sleep(5)

winsound.PlaySound(f'D:/Python/Audio/English/chiara_learnweaponskill_Rapier_2_en.wav', winsound.SND_FILENAME)

hakisa.mode = 'Play'

reward = 0. # Cumulative reward
learning_rate = []
grads = []
grad_clip = None
save_path = 'Hakisa'
steps = 0
save_point = 10 # Also optimization point
uncertainty_factor = 0.9 # Also known as gamma or discount factor

optimizer = torch.optim.Adam(hakisa.parameters(), lr=1)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 50, gamma=0.1)

action_quality_loss = torch.nn.NLLLoss()
reward_loss = torch.nn.MSELoss()

while keyboard.is_pressed('esc') == False: # Exit loop when Esc is pressed

    frame = dataset.grab_frame()

    if steps == 0: # First iteration

        cmds, command_quality, predicted_reward = hakisa(frame)

    else:

        cmds, command_quality, predicted_reward = hakisa(frame, previous_action=(cmds[0].detach(), cmds[1].detach(), cmds[2].detach()), previous_reward=predicted_reward.detach())

    del frame

    command = dataset.get_command(cmds[0].detach().cpu().numpy(), cmds[1].detach().cpu().numpy(), cmds[2].detach().cpu().numpy())

    hakisa.execute_command(command)

    score = dataset.get_consequences(1008, 1429, 1723-1429, 1046-1008, tesseract_config='--psm 8')

    score = preprocess_Jigoku(score)

    mult_score = dataset.get_consequences(933, 1536, 1723-1536, 978-933, tesseract_config='--psm 8')

    mult_score = preprocess_Jigoku(mult_score)

    life = dataset.get_consequences(849, 400, 498-400, 904-849, tesseract_config='--psm 8')

    life = preprocess_Jigoku(life)

    power = dataset.get_consequences(923, 405, 503-405, 978-923, tesseract_config='--psm 8')

    power = preprocess_Jigoku(power)

    aura = dataset.get_consequences(1001, 400, 1045-1001, 503-400, tesseract_config='--psm 8')

    aura = preprocess_Jigoku(aura)
    aura = aura/100

    if life == 0:

        try:
            reward += -(100./(score * mult_score))
        
        except ZeroDivisionError:
            reward += -10.

    else:

        reward += ((score * mult_score) + (power * aura))*1e-6

    del score, mult_score, power, aura, life

    reward = torch.tensor(reward, device=device)

    if steps == 0:

        previous_command_quality = command_quality.detach()

    action_quality_cost = action_quality_loss(previous_command_quality, command_quality.argmax(1).detach()) # Input = (1, Classes), Target = (1)

    predicted_reward = predicted_reward * uncertainty_factor

    reward_cost = reward_loss(predicted_reward, reward)

    gameplay_loss = action_quality_cost + reward_cost

    gameplay_loss.backward()

    previous_command_quality = command_quality.detach()

    del command_quality

    for n, p in hakisa.named_parameters(): # Checking how the grads and backpropagation are going

            if 'neuron1.weight' in n:
                grads.append(torch.mean(p.grad))

            if grad_clip is not None:
                p.register_hook(lambda grad: torch.clamp(grad, -grad_clip, grad_clip))

    best_loss = float('inf')

    if gameplay_loss.item() < best_loss:

        best_loss = gameplay_loss.item()
        best_params = hakisa.state_dict()

    steps += 1

    if steps % save_point == 0:

        optimizer.step()
        scheduler.step()

        hakisa.zero_grad()

        print(f"Current step: {steps}")
        print(f"Best Loss: {best_loss}\tCurrent LR: {scheduler.get_last_lr()[0]}\tGradients Average: {grads[-1]}")
        print(f"Predicted Reward: {predicted_reward.item()}\tCurrent Reward: {reward}")

        torch.save({
            'Steps': steps,
            'Hakisa_params': best_params,
            'Hakisa_LR': scheduler.get_last_lr()[0]
        }, f"{save_path}/Hakisa_checkpoint.tar")


        winsound.PlaySound(f'D:/Python/Audio/English/chiara_craftEpic_1_en', winsound.SND_FILENAME)