In [1]:
from mss import mss
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.backends.cudnn as cudnn
from PIL import Image
from sklearn.neighbors import NearestNeighbors as KNN
#import pyautogui # If keyboard/mouse don't work, use pyautogui
import keyboard
import mouse
import pytesseract
from time import sleep
import winsound
from re import sub

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
cudnn.benchmark = True

In [None]:
# Memory efficient loop
# Helper to find specific screen regions. Use Spritex, then confirm with this.

sleep(3)

with mss() as sct:
    monitor = {"top": 180, "left": 1, "width": 249-1, "height": 213-180} # Memory efficiency.
    data = sct.grab(monitor)
    data = Image.frombytes("RGB", data.size, data.bgra, 'raw', 'BGRX')
    data = data.convert("P")
    data = np.array(data)

winsound.PlaySound('D:/Python/Audio/English/chiara_hacking_1_en.wav', winsound.SND_FILENAME)

plt.imshow(data)
plt.show()

consequence = pytesseract.image_to_string(data, config='--psm 6')
print(consequence)

consequence = sub('[^A-Za-z0-9\/\.]', '', consequence)

print(consequence)

In [None]:
from cv2 import adaptiveThreshold, ADAPTIVE_THRESH_GAUSSIAN_C, THRESH_BINARY

datathresh = adaptiveThreshold(data,255,ADAPTIVE_THRESH_GAUSSIAN_C, THRESH_BINARY,11,5) # 11, 5

consequence = pytesseract.image_to_string(datathresh, config='--psm 8')
print(consequence)

consequence = sub('[^A-Za-z0-9\/\.]', '', consequence)

print(consequence)

plt.imshow(datathresh)
plt.show()

In [23]:
class Dataset(torch.utils.data.Dataset):
    '''
    Generates input maps and commands for Hakisa.

    Remember: command_types = list of strings, actions1 and 2 = list of strings(keyboard), X coordinates or None(mouse)
    '''
    
    def __init__(
        self,
        command_types = None,
        actions1 = None,
        actions2 = None,
        explore_train_steps=1000,
        memory_size=100,
        top=0,
        left=0,
        width=1920,
        height=1080,
        resize=None
    ):

        # Window resolutions for the screen grabber
        self.top = top
        self.left = left
        self.width = width
        self.height = height

        self.resize = resize # For reducing the images. Must be a tuple (Height, Width)

        self.steps = explore_train_steps

        self.data = None # This will be created during training. However, it's possible to load a ready-made data for training.

        self.command_type = command_types
        self.actions1 = self._create_commands_dictionary(input_maps=actions1)
        self.actions2 = self._create_commands_dictionary(input_maps=actions2)

        self.knn = None # Creating variable so we don't have to fit KNN at every step.

        self.knn_actions1 = self._fit_knn(self.actions1)
        print("KNN fitted in actions 1")
        self.knn_actions2 = self._fit_knn(self.actions2)
        print("KNN fitted in actions 2\nAll action maps have been properly fitted by their respective KNN algorithm")

        self.key_actions1 = actions1 # For efficiency in each step
        self.key_actions2 = actions2

        self.labels = None # Used for studying
        self.rewards = None # Also used for studying.

        self.memory_size = memory_size
        self.memory = []


    # Pytorch's Dataset functions will only be used in Studying mode
    def __getitem__(self, idx):

        inputs = self.data[idx]
        labels = self.labels[idx]
        rewards = self.rewards[idx]

        return inputs, labels, rewards


    def __len__(self):

        return len(self.data)


    def _grab_frame(self):
        # Unfortunately, this whole operation takes about 0.6 seconds, so we'll probably have to deal with a single frame each 1~3 seconds.
        with mss() as sct:
            frame = sct.grab(monitor={"top": self.top, "left": self.left, "width": self.width, "height": self.height})
            frame = Image.frombytes("RGB", frame.size, frame.bgra, 'raw', 'BGRX')

            if self.resize:
                frame = frame.resize(self.resize)

            frame = np.array(frame, dtype=np.float32)

            frame = torch.from_numpy(frame)
        
        frame = frame.view(1, frame.size(2), frame.size(0), frame.size(1)).to(device) # (Batch, Channels, Height, Width)

        return frame


    def _create_commands_dictionary(self, input_maps):
        idx2key = []
        key2idx = {}

        for key in input_maps:
            if key not in key2idx:
                idx2key.append(key)
                key2idx[key] = len(idx2key) - 1
        
        del idx2key

        maximum = max(key2idx.values())

        for key, value in key2idx.items():

            scaled_value = (value-0)*2.0 / (maximum - 0)-1.0

            key2idx[key] = scaled_value * ((len(key2idx)+1)//2) # Using a wider range scaling to make things easier for her and for KNN.

        return key2idx

    def _fit_knn(self, dictionary):
        
        values = list(dictionary.values())

        values = np.array(values).reshape(-1,1)

        knn = KNN(n_neighbors=1, algorithm='kd_tree').fit(values)

        del values

        return knn
        

    def get_command(self, cmd_type, action1, action2):
        '''
        Hakisa's output: (command_type, action1, action2) ----> (key, Down, z) or (click, 100, 60)
        command_type is the argmax output from a logsoftmax function and will be used as index for its respective list.
        action1 and action 2 are both floats and will be passed through KNN in order to get the proper command.
        '''

        if cmd_type.ndim > 1: # Sometimes, cmd_type isn't a vector, so we must extract a vector from it in order to use it as index.

            cmd_type = np.argmax(cmd_type, 1).item()
        
        else:
            cmd_type = cmd_type.item()

        cmd_type = self.command_type[cmd_type] # Here, cmd_type must be a vector(an array without any dimension). Remember this if you get an error here.

        _, index = self.knn_actions1.kneighbors(action1)

        for subarray in index:
            for i in subarray:
                action1 = self.key_actions1[i]

        _, index = self.knn_actions2.kneighbors(action2)

        for subarray in index:
            for i in subarray:
                action2 = self.key_actions2[i]
        
        del index, subarray, i

        command = (cmd_type, action1, action2)

        del cmd_type, action1, action2

        return command

    def get_consequences(self, top, left, width, height, togray=False, threshold=False, thresh_gauss=171, thresh_C=13, tesseract_config='--psm 8'):
        '''
        Used after Hakisa performed an input, in order to get its consequences(ex: score change, bombs, kills, deaths...).
        Returns a string according to Tesseract's OCR.
        '''

        with mss() as sct:
            consequence = sct.grab(monitor={"top": top, "left": left, "width": width, "height": height})

            consequence = Image.frombytes("RGB", consequence.size, consequence.bgra, 'raw', 'BGRX')

        if togray is True:

            consequence = consequence.convert("P") # Sometimes, simply converting to grayscale is enough.

            if threshold is True: # Thresholding can only be applied to grayscale images.
                if "ADAPTIVE_THRESH_GAUSSIAN_C" and "adaptiveThreshold" and "THRESH_BINARY" not in dir():
                    from cv2 import adaptiveThreshold, ADAPTIVE_THRESH_GAUSSIAN_C, THRESH_BINARY

                consequence = adaptiveThreshold(data,255,ADAPTIVE_THRESH_GAUSSIAN_C, THRESH_BINARY,thresh_gauss,thresh_C)
        
        consequence = pytesseract.image_to_string(consequence, config=tesseract_config) 

        # OCR adds some strange characters(even with the whitelist function). Let's remove them.

        consequence = sub('[^A-Za-z0-9\/\.]', '', consequence) # Attention: 0, 1 and 8 can be seen as O, l and B.

        return consequence

    def create_memory(self, frame, keys, values, reward):
        '''
        Saves data in the memory list.
        Memory is saved in the format (frame, (command_type, action1_key, action2_key), (command_index, action1_value, action2_value), reward)

        During study mode, frame will be used as input during training. The tuple of values and reward, as labels.
        The tuple of keys is used for visualization, and reward also works as weights(helps discarding bad decisions and saving good ones).
        
        Use cumulative rewards, but also try avoiding numbers that are too big (like above 1000). Multiply by a number between 0 and 1 if necessary.

        Memory will only be changed once it reaches its full size.
        '''

        memory = (frame, keys, values, reward) # A tuple makes each item in the list iterable...and its easier to visualize than lists of lists.


        if len(self.memory) < self.memory_size:

            self.memory.append(memory)
        
        else:
            self.memory = sorted(self.memory, key=lambda x: x[3]) # Sorting list according to rewards values.
            self.memory.pop(0) # Removing the item with lowest reward value
            
            self.memory.append(memory)
        
        del memory

    def create_data_for_study(self):

        # Creating dataset for studying

        inputs = [i[0].cpu() for i in self.memory] # game frames. Using cpu to avoid CudaMemory errors.
        labels = [i[2] for i in self.memory] # (command_type index, action1 value, action2 value)
        rewards = [i[3] for i in self.memory] # Reward got in that step.

        inputs = torch.cat(inputs, 0)

        labels = np.stack(labels, 0).astype(np.float32) # Now converting to float here to avoid numpy.dtype == object
        rewards = np.stack(rewards, 0).astype(np.float32)

        labels = torch.from_numpy(labels)
        rewards = torch.from_numpy(rewards)

        self.data = inputs
        self.labels = labels.to(device)
        self.rewards = rewards.to(device)

        del inputs, labels, rewards

    def use_readymade_data(self, data, labels, rewards):
        '''
        In case you already have some data prepared with you.
        Making a proper record function is quite difficult due to data overflow. keyboard and mouse modules has .record() functions.
        However, in mouse's case, the slightest mouse movement will generate a list with dozens of items.

        Consider using SerpentAI for this.

        Also, since we're following the TD learn idea, timeseries aren't really necessary, as Hakisa
        must be able to predict how the next state will be based on the current one, not based on a time series.
        '''

        self.data = data
        self.labels = labels
        self.rewards = rewards

    def save_memory(self, memory_name):

        import pickle

        with open(f'Hakisa_memory_{memory_name}.pkl', 'wb') as f:
            pickle.dump(self.memory, f)
        
        print(f"Memory saved! You can load it again with")
        print(f"open('Hakisa_memory_{memory_name}.pkl', 'rb') as f:\n\tdataset.memory = pickle.load(f)")
        print("Don't forget to close the file!")

        f.close()


In [None]:
def conv2out(input, kernel, stride, padding):
    x = 2*padding
    y = 1*(kernel-1)
    z = (input + x - y - 1)/stride

    output = z + 1
    return output

In [None]:
print(conv2out(200, 3, 1, 1))

New strucutre, new techniques!

Now, Hakisa will try to predict the reward for her action and for that step during Study and Play Mode.
She'll also classify her action as bad(0), neutral(1) or good based on her commands output and will also use her previous actions as input.
Each of those predictions will also be used for conditioning the next output, and for backpropagation.

Rewards are now always cumulative and, in the Play Mode, will be multiplied by a discount factor.

Exploration Mode:

    No inputs ---------> Random command output

Study Mode:

    Frame Input ----------> Command output, Predicted Reward

    The Predicted Reward will be passed to a MSE Loss having the actual Reward as target.
    In the command output:
        command_type will be passed to a NLLLoss
        action1 and action 2 will, each one, be passed to a MSE Loss

    The study loss will be the sum of the reward loss, command_type loss, action1 and action2 losses.

    The idea stills the same: using semi-supervised learning in order to make Hakisa associate patterns in the images(state)
    in order to choose the best output and also try to predict the result of such output.

    Again, we'll be using a dataset based on the memory list, so set a explore_train_steps >> memory_size in order to correctly store
    a proper set of state + good output + high reward. Otherwise, she'll simply learn how to make random moves.

Play Mode:

    Frame Input + Previous action + Previous Reward ---------> Command Output, Action quality, Predicted Reward

    The previous step action will condition the next action through a concatenation in the linear layers.
    The action quality predicted for that step will be used as label for the action quality predicted in the previous step.
    The Previous reward will condition the output, but the predicted reward will still be compared to the actual reward.

    The custom Gameplay Loss function will be discarded.

    New GameplayLoss = Cross-Entropy(previous_action_quality) + MSE(predicted_reward, actual_reward)

    In order to avoid instability, backpropagation will occur after a certain amount of steps.

In [9]:
class Hakisa(torch.nn.Module):
    '''
    Hakisa itself.

    She have 2 ways of acting, according to her current mode:

        if mode = 'Explore', her inputs can be None, and will generate random outputs.

        if mode = 'Study', she'll receive game frames as inputs, extract the most relevant features and,
        in the end, will generate 2 outputs:

            output 1: a tuple of commands (command_type, action1, action2). Sizes (Batch, 1)
            output 2: prediction of the reward to be obtained through that action. Size (Batch, 1)

        if mode = 'Play', she'll receive grame frames, previous output 2 and previous output 3 and generate output 2 and output 3,
        conditioned by the inputs.

    Hakisa also has an .execute_command() function, which uses keyboard and mouse modules.
    If those don't work, uncomment the pyautogui equivalents.
    '''

    def __init__(self, n_command_types, mode='Explore'):

        super(Hakisa, self).__init__()

        self.mode = mode

        self.n_command_types = n_command_types

        # This structure must be changed with the input size...unless you'd like to use adaptive pooling

        # Let's begin supposing that we're gonna use 200x200 RGB images ---> (3, 200, 200)

        # 200x200

        self.conv1 = torch.nn.Conv2d(3, 100, kernel_size=3, stride=1, padding=1, bias=False)
        self.batchnorm1 = torch.nn.BatchNorm2d(100)
        self.conv2 = torch.nn.Conv2d(100, 100, kernel_size=3, stride=1, padding=1, bias=False) # 200x200
        self.batchnorm2 = torch.nn.BatchNorm2d(100)
        self.pool2x2 = torch.nn.MaxPool2d(kernel_size=2, stride=2) # 100x100
        self.conv3 = torch.nn.Conv2d(100, 200, kernel_size=3, stride=1, padding=1, bias=False) # 100x100
        self.batchnorm3 = torch.nn.BatchNorm2d(200)
        self.conv4 = torch.nn.Conv2d(200, 400, kernel_size=3, stride=1, padding=1, bias=False) # 100x100
        self.batchnorm4 = torch.nn.BatchNorm2d(400)
        # Add pool 2x2 ---> 50x50
        self.conv5 = torch.nn.Conv2d(400, 800, kernel_size=3, stride=1, padding=1, bias=False) # 50x50
        self.batchnorm5 = torch.nn.BatchNorm2d(800)
        self.conv6 = torch.nn.Conv2d(800, 1000, kernel_size=3, stride=1, padding=1, bias=False) # 50x50
        self.batchnorm6 = torch.nn.BatchNorm2d(1000)
        # Add pool 2x2 ---> 25x25
        self.conv7 = torch.nn.Conv2d(1000, 1000, kernel_size=4, stride=1, bias=False) # 22x22
        self.batchnorm7 = torch.nn.BatchNorm2d(1000)
        self.conv8 = torch.nn.Conv2d(1000, 1000, kernel_size=3, stride=1, bias=False) # 20x20
        self.batchnorm8 = torch.nn.BatchNorm2d(1000)
        # Add pool 2x2 ---> 10x10
        self.conv9 = torch.nn.Conv2d(1000, 800, kernel_size=3, stride=1, padding=1, bias=False) # 10x10
        self.batchnorm9 = torch.nn.BatchNorm2d(800)
        self.conv10 = torch.nn.Conv2d(800, 400, kernel_size=3, stride=1, padding=1, bias=False) # 10x10
        self.batchnorm10 = torch.nn.BatchNorm2d(400)
        # Add pool 2x2 ---> 5x5
        self.neuron1 = torch.nn.Linear(400*5*5, 200*2*2, bias=False)
        self.layer_norm1 = torch.nn.LayerNorm(200*2*2)

        if self.n_command_types > 1:
            # If n_command_types = 1, the Exploration Mode will always output the same actions due to this bottleneck.

            self.neuron_command_study = torch.nn.Linear(200*2*2, self.n_command_types, bias=False)
            self.neuron_command_play = torch.nn.Linear(1200, self.n_command_types, bias=False)

            # Considering the command_type that has been predicted, what should be the action1 and action2?

            self.neuron2 = torch.nn.Linear(self.n_command_types, 100*2*2, bias=False)

        else:
        
            self.neuron2_study = torch.nn.Linear(200*2*2, 100*2*2, bias=False)
            self.neuron2_play = torch.nn.Linear(1200, 100*2*2, bias=False)

        self.neuron_quality = torch.nn.Linear(3, 200, bias=False)

        self.layer_norm2 = torch.nn.LayerNorm(100*2*2)
        self.neuron_action1 = torch.nn.Linear(100*2*2, 1, bias=False)
        self.neuron_action2 = torch.nn.Linear(100*2*2, 1, bias=False)

        self.neuron_reward = torch.nn.Linear(1, 200, bias=False)
        self.layer_normcat = torch.nn.LayerNorm(1200)
        self.neuron_predquality = torch.nn.Linear(2+self.n_command_types, 3, bias=False)
        self.neuron_predreward1 = torch.nn.Linear(2+self.n_command_types, 1000, bias=False)
        self.neuron_predreward2 = torch.nn.Linear(1000, 1, bias=False)

        self.PRelu = torch.nn.PReLU(1)

        self.softmax = torch.nn.LogSoftmax(-1) # Since we're using softmax here, use NLLLoss during study and play mode.
    

    def forward(self, input=None, frame_sequence=None, previous_action=None, previous_reward=None):

        if self.mode == "Explore":
            # Reinventing the wheel didn't work. Now, in exploration mode, Hakisa will simply generate random numbers.

            if self.n_command_types > 1:
                command_type = torch.randint(0, self.n_command_types, size=(1,), device=device) # Outputting a vector.
            else:
                command_type = torch.ones((0), device=device)
                
            action1 = torch.normal(0, max(dataset.actions1.values()), size=(1, 1), device=device)
            action2 = torch.normal(0, max(dataset.actions2.values()), size=(1, 1), device=device)

            return (command_type, action1, action2)


        elif self.mode == 'Study':

            x = self.conv1(input)
            x = self.batchnorm1(x)
            x = self.PRelu(x)
            x = self.conv2(x)
            x = self.batchnorm2(x)
            x = self.PRelu(x)

            x = self.pool2x2(x)

            x = self.conv3(x)
            x = self.batchnorm3(x)
            x = self.PRelu(x)
            x = self.conv4(x)
            x = self.batchnorm4(x)
            x = self.PRelu(x)

            x = self.pool2x2(x)

            x = self.conv5(x)
            x = self.batchnorm5(x)
            x = self.PRelu(x)
            x = self.conv6(x)
            x = self.batchnorm6(x)
            x = self.PRelu(x)

            x = self.pool2x2(x)

            x = self.conv7(x)
            x = self.batchnorm7(x)
            x = self.PRelu(x)
            x = self.conv8(x)
            x = self.batchnorm8(x)
            x = self.PRelu(x)

            x = self.pool2x2(x)

            x = self.conv9(x)
            x = self.batchnorm9(x)
            x = self.PRelu(x)
            x = self.conv10(x)
            x = self.batchnorm10(x)
            x = self.PRelu(x)
            
            x = self.pool2x2(x)

            x = x.view(x.size(0), -1) # (batch, 400*5*5)

            x = self.neuron1(x) # (batch, 200*2*2)
            x = self.layer_norm1(x)
            x = self.PRelu(x)

            if self.n_command_types > 1:

                command_type = self.neuron_command_study(x)
                command_type = self.softmax(command_type) # (Batch, n_commands)

                x = self.neuron2(command_type)

            else:
                command_type = torch.ones((input.size(0), 1), device=device)

                x = self.neuron2_study(x)

            x = self.layer_norm2(x)
            x = self.PRelu(x)

            action1 = self.neuron_action1(x) # (Batch, 1)
            action2 = self.neuron_action2(x) # (Batch, 1)

            x = torch.cat((command_type.detach(), action1.detach(), action2.detach()), 1) # (Batch, 1+1+n_commands)

            x = self.neuron_predreward1(x)
            predicted_reward = self.neuron_predreward2(x)

            del x

            return (command_type, action1, action2), predicted_reward

        else:

            x = self.conv1(input)
            x = self.batchnorm1(x)
            x = self.PRelu(x)
            x = self.conv2(x)
            x = self.batchnorm2(x)
            x = self.PRelu(x)

            x = self.pool2x2(x)

            x = self.conv3(x)
            x = self.batchnorm3(x)
            x = self.PRelu(x)
            x = self.conv4(x)
            x = self.batchnorm4(x)
            x = self.PRelu(x)

            x = self.pool2x2(x)

            x = self.conv5(x)
            x = self.batchnorm5(x)
            x = self.PRelu(x)
            x = self.conv6(x)
            x = self.batchnorm6(x)
            x = self.PRelu(x)

            x = self.pool2x2(x)

            x = self.conv7(x)
            x = self.batchnorm7(x)
            x = self.PRelu(x)
            x = self.conv8(x)
            x = self.batchnorm8(x)
            x = self.PRelu(x)

            x = self.pool2x2(x)

            x = self.conv9(x)
            x = self.batchnorm9(x)
            x = self.PRelu(x)
            x = self.conv10(x)
            x = self.batchnorm10(x)
            x = self.PRelu(x)
            
            x = self.pool2x2(x)

            x = x.view(x.size(0), -1) # (batch, 400*5*5)

            x = self.neuron1(x) # (batch, 200*2*2)

            if previous_action==None and previous_reward==None: # For first iteration

                previous_action = (torch.zeros((1, 3), device=device), torch.zeros(1, device=device), torch.zeros(1, device=device))
                previous_reward = torch.zeros((input.size(0), 1), device=device)

            a, b, c = previous_action
            previous_action = a + b + c

            del a, b, c

            previous_action = self.neuron_quality(previous_action) # (batch, 200)
            previous_reward = self.neuron_reward(previous_reward) # (batch, 200)

            x = torch.cat((x, previous_action, previous_reward), 1) # (batch, 1200)

            x = self.layer_normcat(x)
            x = self.PRelu(x)

            if self.n_command_types > 1:

                command_type = self.neuron_command_play(x)
                command_type = self.softmax(command_type) # (Batch, n_commands)

                x = self.neuron2(command_type)

            else:
                command_type = torch.ones((input.size(0), 1), device=device)

                x = self.neuron2_play(x) # (Batch, 1)

            x = self.layer_norm2(x)
            x = self.PRelu(x)

            action1 = self.neuron_action1(x) # (Batch, 1)
            action2 = self.neuron_action2(x) # (Batch, 1)

            x = torch.cat((command_type, action1, action2), 1) # (Batch, 1+1+n_commands)

            command_quality = self.neuron_predquality(x) # (Batch, 3)
            command_quality = self.softmax(command_quality)

            x = self.neuron_predreward1(x)
            predicted_reward = self.neuron_predreward2(x)

            del x

            return (command_type, action1, action2), command_quality, predicted_reward


    def execute_command(self, command):
        '''
        Command must be a tuple(command_type, action1, action2), where:

            command_type: key(keyboard) or move, rightclick, click(mouse)
            action1: Up, Down, press(keyboard), X coordinate(mouse) or None(no mouse movement, when using PyAutoGUI)
            action2: 'a', 'z', 'shift'...(keyboard), Y coordinate(mouse) or None(no mouse movement, when using PyAutoGUI)

            PS: Using None when using mouse module will throw an error.

        Make sure all key actions(action2) are lowered.

        Have in mind that Hakisa might output command_type 'key' and action1 that is equivalent to a mouse action.
        '''

        if "key" in command[0]:

            try:
                
                if "Up" in command[1]:

                    try:
                        #pyautogui.keyUp(command[2])
                        keyboard.release(command[2])
                
                    except:
                        pass # If Hakisa predicts a mouse action for a keyboard command, she won't do anything.

                elif "Down" in command[1]:

                    try:
                        #pyautogui.keyDown(command[2])
                        keyboard.press(command[2])

                    except:
                        pass

                elif "press" in command[1]:

                    try:
                        keyboard.send(command[2]) # Some games won't work with pyautogui.press(), so use keyboard module, since we'll import it for Play Mode.
                    
                    except:
                        pass

                else:
                    pass
            
            except:

                pass # If Hakisa predicts a keyboard command, but outputs a mouse action, she won't do anything.

        elif "move" in command[0]:

            try:
                #pyautogui.moveTo(command[1], command[2], duration=0.19) # Duration = 0.19 seconds to be more realistic
                mouse.move(command[1], command[2], duration=0.1)

            except:
                pass # If Hakisa predict a mouse command, but outputs a keyboard action, she won't do anything.

        elif "rightclick" in command[0]:
            
            try:
                mouse.move(command[1], command[2], duration=0.1)
                mouse.right_click()
            except:
                pass

        elif "click" in command[0]:

            try:
                #pyautogui.moveTo(command[1], command[2], duration=0.19)
                mouse.move(command[1], command[2], duration=0.1)
                mouse.click() # Same case as press. Use mouse module.
            except:
                pass

        else:

            raise ValueError # It was probably you who made a mistake.

In [5]:
# Jigoku Kisetsukan

command_type = ['key']

actions1 = ['Down', 'Up']

actions2 = ['up', 'down', 'left', 'right', 'z', 'x', 'shift']

In [6]:
dataset = Dataset(command_types=command_type, actions1=actions1, actions2=actions2, explore_train_steps=100, memory_size=10, resize=(200, 200))

KNN fitted in actions 1
KNN fitted in actions 2
All action maps have been properly fitted by their respective KNN algorithm


In [7]:
print(dataset.command_type)
print(dataset.actions1)
print(dataset.actions2)

['key']
{'Down': -1.0, 'Up': 1.0}
{'up': -4.0, 'down': -2.666666666666667, 'left': -1.3333333333333335, 'right': 0.0, 'z': 1.333333333333333, 'x': 2.666666666666667, 'shift': 4.0}


In [24]:
# Bullet Heaven

command_types = ['move', 'click', 'rightClick']

actions1 = [i for i in range(1, 1919)] # Avoiding using the extremes so we don't have to shut down PyAutoGUI safety lock.

actions2 = [i for i in range(1, 1079)]

dataset = Dataset(command_types, actions1, actions2, explore_train_steps=100, memory_size=10, resize=(200,200))

KNN fitted in actions 1
KNN fitted in actions 2
All action maps have been properly fitted by their respective KNN algorithm


In [10]:
hakisa = Hakisa(n_command_types=3, mode='Explore').to(device)

In [None]:
def preprocess_Jigoku(score):
    # For the game Jigoku Kisetsukan: Sense of the Seasons

    score = score.replace('S', '5').replace('s', '8').replace('e', '2').replace('O', '0').replace('B', '8').replace('o', '4').replace('b', '4')
    score = score.replace('I', '1').replace('l', '1').replace('.', '')

    try:
        score = float(score)

    except ValueError:
            score = 1.0

    return score

In [7]:
def preprocess_BH2(score):
    # For the game Bullet Heaven 2

    score = score.replace('S', '5').replace('s', '8').replace('e', '2').replace('O', '0').replace('B', '8').replace('o', '0').replace('.', '')
    score = sub('[^0-9]', '', score)

    try:
        score = float(score)

    except ValueError:
        score = 1.0

    return score

In [None]:
# Exploration loop - Jigoku Kisetsukan

from win32gui import GetWindowText, GetForegroundWindow # To make sure she's playing your game
from time import time

start = time()

sleep(5)

game_window = GetWindowText(GetForegroundWindow())

winsound.PlaySound('D:/Python/Audio/English/chiara_hacking_1_en.wav', winsound.SND_FILENAME) # Just to know if everything's ok

reward = 0

for step in range(dataset.steps):

    if GetWindowText(GetForegroundWindow()) != game_window:
        print("I don't want to play anymore!")
        break

    start_step = time()

    frame = dataset._grab_frame() # Getting environment state

    cmds = hakisa()

    command = dataset.get_command(cmds[0].cpu().numpy(), cmds[1].cpu().numpy(), cmds[2].cpu().numpy())

    hakisa.execute_command(command)

    # If Hakisa acts too fast, the consequence for her action might not appear right now. Not that I wouldn't want her to be fast and efficient...

    score = dataset.get_consequences(1008, 1429, 1723-1429, 1046-1008, tesseract_config='--psm 8')

    score = preprocess_Jigoku(score)

    mult_score = dataset.get_consequences(933, 1536, 1723-1536, 978-933, tesseract_config='--psm 8')

    mult_score = preprocess_Jigoku(mult_score)
    mult_score = mult_score/100

    life = dataset.get_consequences(849, 400, 498-400, 904-849, tesseract_config='--psm 8')

    life = preprocess_Jigoku(life)
    life = life/100

    power = dataset.get_consequences(923, 405, 503-405, 978-923, tesseract_config='--psm 8')

    power = preprocess_Jigoku(power)
    power = power/100

    aura = dataset.get_consequences(1001, 400, 1045-1001, 503-400, tesseract_config='--psm 8')

    aura = preprocess_Jigoku(aura)
    aura = aura/100

    if life == 0:

        try:
                reward += -(100/(score * mult_score))
        
        except ZeroDivisionError:
                reward += -10

    else:

        reward += ((score * mult_score) + (power * aura))*1e-6 # Jigoku Kisetsukan deals with score numbers around hundreds of thousands.

    dataset.create_memory(frame, keys=command, values=(cmds[0].cpu(), cmds[1].cpu(), cmds[2].cpu()), reward=reward)

    end_step = time()

    print(f"Step complete! {end_step-start_step}")

    if step == (dataset.steps - 1):

        winsound.PlaySound(f'D:/Python/Audio/English/chiara_craftEpic_1_en.wav', winsound.SND_FILENAME) # Because yes

end = time()
print(f"Loop complete!\nTime spent: {end-start} seconds\nNumber of steps: {step}")

del frame, command, score, mult_score, life, power, aura, reward, step, start, end

winsound.PlaySound(f'D:/Python/Audio/English/chiara_rest_1_en.wav', winsound.SND_FILENAME)

In [8]:
# Exploration loop - Bullet Heaven 2

from win32gui import GetWindowText, GetForegroundWindow # To make sure she's playing your game
from time import time
import math

start = time()

sleep(5)

game_window = GetWindowText(GetForegroundWindow())

winsound.PlaySound('D:/Python/Audio/English/chiara_hacking_1_en.wav', winsound.SND_FILENAME) # Just to know if everything's ok

reward = 0

for step in range(dataset.steps):

    if GetWindowText(GetForegroundWindow()) != game_window:
        print("I don't want to play anymore!")
        break

    start_step = time()

    frame = dataset._grab_frame() # Getting environment state

    cmds = hakisa()

    command = dataset.get_command(cmds[0].cpu().numpy(), cmds[1].cpu().numpy(), cmds[2].cpu().numpy())

    hakisa.execute_command(command)

    # If Hakisa acts too fast, the consequence for her action might not appear right now. Not that I wouldn't want her to be fast and efficient...

    score = dataset.get_consequences(180, 1, 249-1, 213-180, tesseract_config='--psm 8')

    score = preprocess_BH2(score)

    try:

        reward += math.log10(score)
    
    except:

        reward += 0.0

    dataset.create_memory(frame, keys=command, values=(cmds[0].cpu(), cmds[1].cpu(), cmds[2].cpu()), reward=reward)

    end_step = time()

    print(f"Step complete! {end_step-start_step}")

    if step == (dataset.steps - 1):

        winsound.PlaySound(f'D:/Python/Audio/English/chiara_craftEpic_1_en.wav', winsound.SND_FILENAME) # Because yes

end = time()
print(f"Loop complete!\nTime spent: {end-start} seconds\nNumber of steps: {step}")

del frame, command, score, reward, step, start, end

winsound.PlaySound(f'D:/Python/Audio/English/chiara_rest_1_en.wav', winsound.SND_FILENAME)

Step complete! 2.2599964141845703
Step complete! 0.8769965171813965
Step complete! 0.846994161605835
Step complete! 0.7700028419494629
Step complete! 0.7539963722229004
Step complete! 0.7410037517547607
Step complete! 0.7169966697692871
Step complete! 0.7269859313964844
Step complete! 0.7450008392333984
Step complete! 0.7675182819366455
Step complete! 0.7320003509521484
Step complete! 0.7339982986450195
Step complete! 0.7585504055023193
Step complete! 0.8449945449829102
Step complete! 0.7750005722045898
Step complete! 0.7490017414093018
Step complete! 0.7495441436767578
Step complete! 0.7270002365112305
Step complete! 0.7509994506835938
Step complete! 0.7410016059875488
Step complete! 0.7530055046081543
Step complete! 0.7521631717681885
Step complete! 0.7340030670166016
Step complete! 0.7449967861175537
Step complete! 0.7280025482177734
Step complete! 0.7369980812072754
Step complete! 0.7351484298706055
Step complete! 0.7169976234436035
Step complete! 0.747002363204956
Step complete! 0

In [11]:
# Unfortunately, this is where we got the CUDA RuntimeError. Try using resize in Dataset creator and adjust Hakisa accordingly.

dataset.create_data_for_study()
dataloader = torch.utils.data.DataLoader(dataset, batch_size=2, shuffle=True)

hakisa.mode = 'Study'
costs = []
learning_rate = []
grads = []
grad_clip = None
save_path = None
start_epoch = 0
epochs = 10000

In [None]:
# In case you want to continue from a checkpoint.

params = torch.load(f'Hakisa/Hakisa_checkpoint.tar')
start_epoch = params['Epoch'] + 1
hakisa.load_state_dict(params['Hakisa_params'])
lr = params['Hakisa_LR']

del params

In [None]:
# Studying loop - Classic supervised learning. Will help Hakisa try to create certain patterns for situations and her reactions.

# Can be applied to any game

import os

optimizer = torch.optim.Adam(hakisa.parameters(), lr=1)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 100, gamma=0.1)

command_type_loss = torch.nn.NLLLoss()
mse_loss = torch.nn.MSELoss()

for epoch in range(start_epoch, epochs):

    for i, (input_frame, label, reward) in enumerate(dataloader):

        label, reward = label.to(device), reward.to(device)

        hakisa.zero_grad()

        cmds, predicted_reward = hakisa(input_frame.to(device))

        del input_frame

        if len(dataset.command_type) != 1:

            command_type_cost = command_type_loss(cmds[0], label[:, 0].long())
        
        else:
            command_type_cost = 0.

        action1_loss = mse_loss(cmds[1].view(-1), label[:, 1])
        action2_loss = mse_loss(cmds[2].view(-1), label[:, 2])

        reward_loss = mse_loss(predicted_reward.view(-1), reward)

        study_loss = command_type_cost + action1_loss + action2_loss + reward_loss

        study_loss.backward()

        optimizer.step()

        for n, p in hakisa.named_parameters():

            if 'neuron1.weight' in n:
                grads.append(torch.mean(p.grad))

            if grad_clip is not None:
                p.register_hook(lambda grad: torch.clamp(grad, -grad_clip, grad_clip))

        best_loss = float('inf')

        if study_loss.item() < best_loss:

            best_loss = study_loss.item()
            best_params = hakisa.state_dict()

        if i % dataset.memory_size == 0:
            print(f"{epoch}/{epochs}")
            print(f"Best Loss: {best_loss}\tCurrent LR: {scheduler.get_last_lr()[0]}\tGradients Average: {grads[-1]}")
            print(f"Predicted Reward: {predicted_reward[0].item()}\tActual Reward: {reward[0].item()}")
            print(f"Reward loss: {reward_loss}")
            print(f"command_type loss: {command_type_cost}\taction1_loss: {action1_loss}\taction2_loss: {action2_loss}")

            if save_path is None:
                try:
                    os.mkdir("Hakisa")
                    save_path = "Hakisa"
                except:
                    save_path = "Hakisa"
                    
            torch.save({
                'Epoch': epoch,
                'Hakisa_params': best_params,
                'Hakisa_LR': scheduler.get_last_lr()[0]
            }, f"{save_path}/Hakisa_checkpoint.tar")

            print("Model saved!")

    scheduler.step()

In [None]:
dataset.save_memory(memory_name='jigoku_kisetsukan')

In [9]:
import pickle

with open('Hakisa_memory_jigoku_kisetsukan.pkl', 'rb') as f:

    dataset.memory = pickle.load(f)

f.close()

In [None]:
# Playing loop - She learns as she plays
# Jigoku Kisetsukan

import warnings

warnings.simplefilter('ignore')

sleep(5)

winsound.PlaySound(f'D:/Python/Audio/English/chiara_learnweaponskill_Rapier_2_en.wav', winsound.SND_FILENAME)

hakisa.mode = 'Play'

reward = 0. # Cumulative reward
learning_rate = []
grads = []
grad_clip = None
save_path = 'Hakisa'
steps = 0
save_point = 10 # Also optimization point
uncertainty_factor = 0.9 # Also known as gamma or discount factor

optimizer = torch.optim.Adam(hakisa.parameters(), lr=1)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 50, gamma=0.1)

action_quality_loss = torch.nn.NLLLoss()
reward_loss = torch.nn.MSELoss()

while keyboard.is_pressed('esc') == False: # Exit loop when Esc is pressed

    frame = dataset._grab_frame()

    if steps == 0: # First iteration

        cmds, command_quality, predicted_reward = hakisa(frame)

    else:

        cmds, command_quality, predicted_reward = hakisa(frame, previous_action=(cmds[0].detach(), cmds[1].detach(), cmds[2].detach()), previous_reward=predicted_reward.detach())

    del frame

    command = dataset.get_command(cmds[0].detach().cpu().numpy(), cmds[1].detach().cpu().numpy(), cmds[2].detach().cpu().numpy())

    hakisa.execute_command(command)

    score = dataset.get_consequences(1008, 1429, 1723-1429, 1046-1008, tesseract_config='--psm 8')

    score = preprocess_Jigoku(score)

    mult_score = dataset.get_consequences(933, 1536, 1723-1536, 978-933, tesseract_config='--psm 8')

    mult_score = preprocess_Jigoku(mult_score)

    life = dataset.get_consequences(849, 400, 498-400, 904-849, tesseract_config='--psm 8')

    life = preprocess_Jigoku(life)

    power = dataset.get_consequences(923, 405, 503-405, 978-923, tesseract_config='--psm 8')

    power = preprocess_Jigoku(power)

    aura = dataset.get_consequences(1001, 400, 1045-1001, 503-400, tesseract_config='--psm 8')

    aura = preprocess_Jigoku(aura)
    aura = aura/100

    if life == 0:

        try:
            reward += -(100./(score * mult_score))
        
        except ZeroDivisionError:
            reward += -10.

    else:

        reward += ((score * mult_score) + (power * aura))*1e-6

    del score, mult_score, power, aura, life

    reward = torch.tensor(reward, device=device)

    if steps == 0:

        previous_command_quality = command_quality.detach()

    action_quality_cost = action_quality_loss(previous_command_quality, command_quality.argmax(1).detach()) # Input = (1, Classes), Target = (1)

    predicted_reward = predicted_reward * uncertainty_factor

    reward_cost = reward_loss(predicted_reward, reward)

    gameplay_loss = action_quality_cost + reward_cost

    gameplay_loss.backward()

    previous_command_quality = command_quality.detach()

    del command_quality

    for n, p in hakisa.named_parameters(): # Checking how the grads and backpropagation are going

            if 'neuron1.weight' in n:
                grads.append(torch.mean(p.grad))

            if grad_clip is not None:
                p.register_hook(lambda grad: torch.clamp(grad, -grad_clip, grad_clip))

    best_loss = float('inf')

    if gameplay_loss.item() < best_loss:

        best_loss = gameplay_loss.item()
        best_params = hakisa.state_dict()

    steps += 1

    if steps % save_point == 0:

        optimizer.step()
        scheduler.step()

        hakisa.zero_grad()

        print(f"Current step: {steps}")
        print(f"Best Loss: {best_loss}\tCurrent LR: {scheduler.get_last_lr()[0]}\tGradients Average: {grads[-1]}")
        print(f"Predicted Reward: {predicted_reward.item()}\tCurrent Reward: {reward}")

        torch.save({
            'Steps': steps,
            'Hakisa_params': best_params,
            'Hakisa_LR': scheduler.get_last_lr()[0]
        }, f"{save_path}/Hakisa_checkpoint.tar")


        winsound.PlaySound(f'D:/Python/Audio/English/chiara_craftEpic_1_en', winsound.SND_FILENAME)

In [None]:
# Playing loop - She learns as she plays
# Bullet Heaven 2

import warnings

warnings.simplefilter('ignore')

sleep(5)

winsound.PlaySound(f'D:/Python/Audio/English/chiara_learnweaponskill_Rapier_2_en.wav', winsound.SND_FILENAME)

hakisa.mode = 'Play'

reward = 0. # Cumulative reward
learning_rate = []
grads = []
grad_clip = None
save_path = 'Hakisa'
steps = 0
save_point = 5 # Also optimization point
uncertainty_factor = 0.9 # Also known as gamma or discount factor

optimizer = torch.optim.Adam(hakisa.parameters(), lr=1)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 50, gamma=0.1)

action_quality_loss = torch.nn.NLLLoss()
reward_loss = torch.nn.MSELoss()

while keyboard.is_pressed('esc') == False: # Exit loop when Esc is pressed

    frame = dataset._grab_frame()

    if steps == 0:

        cmds, command_quality, predicted_reward = hakisa(frame)

    else:

        cmds, command_quality, predicted_reward = hakisa(frame, previous_action=(cmds[0].detach(), cmds[1].detach(), cmds[2].detach()), previous_reward=predicted_reward.detach())

    del frame

    command = dataset.get_command(cmds[0].detach().cpu().numpy(), cmds[1].detach().cpu().numpy(), cmds[2].detach().cpu().numpy())

    hakisa.execute_command(command)

    score = dataset.get_consequences(180, 1, 249-1, 213-180, tesseract_config='--psm 8')

    score = preprocess_BH2(score)

    try:

        reward += math.log10(score)
    
    except:

        reward += 0.0

    del score

    reward = torch.tensor(reward, device=device)

    if steps == 0:

        previous_command_quality = command_quality.detach()

    action_quality_cost = action_quality_loss(previous_command_quality, command_quality.argmax(1).detach()) # Input = (1, Classes), Target = (1)

    predicted_reward = predicted_reward * uncertainty_factor

    reward_cost = reward_loss(predicted_reward, reward)

    gameplay_loss = action_quality_cost + reward_cost

    gameplay_loss.backward()

    previous_command_quality = command_quality.detach()

    del command_quality

    for n, p in hakisa.named_parameters():

            if 'neuron1.weight' in n:
                grads.append(torch.mean(p.grad))

            if grad_clip is not None:
                p.register_hook(lambda grad: torch.clamp(grad, -grad_clip, grad_clip))

    best_loss = float('inf')

    if gameplay_loss.item() < best_loss:

        best_loss = gameplay_loss.item()
        best_params = hakisa.state_dict()

    steps += 1

    if steps % save_point == 0:

        optimizer.step()
        scheduler.step()

        hakisa.zero_grad()

        print(f"Current step: {steps}")
        print(f"Best Loss: {best_loss}\tCurrent LR: {scheduler.get_last_lr()[0]}\tGradients Average: {grads[-1]}")
        print(f"Predicted Reward: {predicted_reward.item()}\tCurrent Reward: {reward}")
        print(command)

        torch.save({
            'Steps': steps,
            'Hakisa_params': best_params,
            'Hakisa_LR': scheduler.get_last_lr()[0]
        }, f"{save_path}/Hakisa_checkpoint.tar")


        winsound.PlaySound(f'D:/Python/Audio/English/chiara_craftEpic_1_en', winsound.SND_FILENAME)