In [402]:
import itertools
import time
import numpy as np
import cv2
import matplotlib.pyplot as plt
import socket
from PIL import Image
from scipy.spatial import distance
import glob




class Agent:
    def __init__(self):
        pass

    def step(self, end, reward, state):
        # return 0 # nothing
        # return 1 # left
        # return 2 # right
        return   3 # random
    
    
    
class Environment:
    def __init__(self, ip = "127.0.0.1", port = 13000, size = 200, timescale = 3):
        self.client     = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        self.ip         = ip
        self.port       = port
        self.size       = size
        self.timescale  = timescale

        self.client.connect((ip, port))

    def reset(self):
        self._send(1, 0)
        return self._receive()

    def step(self, action):
        self._send(2, action)
        return self._receive()

    def state2image(self, state):
        return Image.fromarray(np.array(state, "uint8").reshape(self.size, self.size, 3))

    def _receive(self):
        # Kudos to Jan for the socket.MSG_WAITALL fix!
        data   = self.client.recv(2 + 3 * self.size ** 2, socket.MSG_WAITALL)
        end    = data[0]
        reward = data[1]
        state  = [data[i] for i in range(2, len(data))]

        return end, reward, state

    def _send(self, action, command):
        self.client.send(bytes([action, command]))


        
        
        
        
        
        
        
        
class EnvironmentWrapper:   # Wrapper around an environment object
    
    def __init__(self, env, background = np.array([[0]]), verbose = False, reward_victory = 1, reward_suicide = -10, reward_killed = -1, n_frames = 5):
        
        # Background: image passed as bckg, the image already loaded, not the dorection of the image in the pc
        
        self.env = env
        self.verbose    = verbose
        self.reward_victory = reward_victory
        self.reward_suicide = reward_suicide
        self.reward_killed = reward_killed
        self.n_frames = n_frames               # The amount of final frames it checks to decide it's a victory or suicide
                                               # 5 it's a nice selection, as only one could be just ricocheting from the collision
        
        # When loaded the wrapper, the environment resets
        self.env.reset()

        # History of behavior 
        self.history = []       # history of position
        self.n_objects = []     # records whether there are 1 or 2 objects in the image
        self.size_objects = []  # when there's only one object in the arena records size, 
                                # two robots are bigger than one
        
        self.distance = []      # Distance between agents
                                # If distance is extremely low, it's most likely due to one of the agents being
                                # out of sight from the environment, used to mark likelihood of corner presence
        
        self.corner = []        # For every frame whether there's an agent on the corner is marker
        
        
        
        # Have we a background or not?
     
        if np.all(background == np.array([[0]])):
            if len(glob.glob('background_'+str(env.size)+'.png'))!=0:
                self.background = cv2.imread('background_'+str(env.size)+'.png')
            else:
                self.background = self.automatic_background_segmentation()
            
            
        else:
            self.background = background
        
        
        
    def step(self, action, get_image = False):
        
        end, reward, state = env.step(action)
        
        
        # A image of the environment is taken
        state = np.array(env.state2image(state))
        
        # The agents are located in that image
        agents = self.agent_enhacement(state)
        
        # Positions of the agents
        pos = self.positions(agents)
        
        
        # If get_image true, parameter of the method, the image
        if get_image:
            return end, reward, pos, agents
    
        if end == 1:  # When the environment finishes
             
            if reward == 10:  # The agent wins
                return end, self.reward_victory, state
                
            # Checks if has commited suicide
            if len(np.unique(env_n.n_objects[-5:]))==1 and np.unique(env_n.n_objects[-5:])[0]==0:    
                # Here it's checked the amount of agents detected in the last frames is the same in every frame, and it's different to 1 (0 means 2 or higher)                            
                return end, self.reward_suicide, state
            
            else:  # There are not two or more different blobs
                if self.corner[-1] == 1:   # If it's because one is in the corner and the agent dies, suicide
                    return end, self.reward_suicide, state      
                else:   # If the two agents are interacted, it'll be assumed that it was killed
                    return end, self.reward_killed, state      
                    
        return end, reward, state
    
    
    def reset(self, get_image = False):
        
        end, reward, state = env.reset()
        
        #state = np.array(env.state2image(state))
        #agents = self.agent_enhacement(state)
        #plt.imshow(agents)
        #pos = self.positions(agents)
        
        #if get_image:
        #    return end, reward, pos, agents
        
        
        self.history = []       # history of position
        self.n_objects = []     # records whether there are 1 or 2 objects in the image
        self.size_objects = []  # when there's only one object in the arena records size, 
                                # two robots are bigger than one
        
        self.distance = []      # Distance between agents
                                # If distance is extremely low, it's most likely due to one of the agents being
                                # out of sight from the environment, used to mark likelihood of corner presence
        
        self.corner = []    
        
        return end, reward, state
            
        
    def automatic_background_segmentation(self):
        #Get images
        
        # Background segmentation, only happens once, the environment automatically performs it the first time
        
        self.env.reset()
        
        
        # Keep some images, not too much, we just want the agents to move a little
        
        img_list = []
        for i in range(100):
            _ =  self.env.step(3)
            
            # Every 5 images save one of them
            if i%5==0:
                img_list.append(np.array(self.env.state2image(_[-1])))
                
            # If the game is endded just finish it
            if _[0]==1:
                break
        self.env.reset()
        
        # Save the images as numoy arrays
        img_list_arr = [np.array(i) for i in img_list]
    
    
        from itertools import product

        dim_x, dim_y, n_channels = img_list[0].shape
        
        # Black canvas of a given dimension
        canvas_baseline = np.zeros((dim_x, dim_y, n_channels))
    
        # Here for each pixel in the canvas, the same pixel is examined in every image, and the most common value
        # (the background, as the agents are moving) will be taken
        for x,y,c in product([i for i in range(dim_x)],[i for i in range(dim_y)],[i for i in range(n_channels)]):
            values, counts = np.unique([i[x,y][c] for i in img_list_arr ], return_counts = True)
            canvas_baseline[x,y,c] = values[np.argmax(counts)]

        cv2.imwrite('background_'+str(env.size)+'.png', canvas_baseline)
        
        return canvas_baseline.astype(int)
    
    
    def agent_enhacement(self, img_list):
        
        if type(img_list)!=list:
            img_list_support = []
            img_list_support.append(img_list)
            img_list = img_list_support
            
        modified_imgs = []
        
        
        for i in img_list:    
            
            
            # Substract the background from the image
            base = np.abs(self.background - np.array(i))

            
            # The image is put in black and white
            gray = cv2.cvtColor(np.uint8(base), cv2.COLOR_BGR2GRAY)
            
            # Get a mask for the agents based on the grey image from the substracted image and background
            _, thresh = cv2.threshold(gray, 1, 255, cv2.THRESH_BINARY)
            
            # Get the agents cropped from the image
            res = cv2.bitwise_and(np.uint8(i),np.uint8(i),mask = np.uint8(thresh))

            modified_imgs.append(res)    
            
            
        if len(modified_imgs)==1:
            return modified_imgs[0]
        
        return modified_imgs
    
    
    
    def positions(self, image):
        
        # This function gets the position of the agents
        image_original = np.copy(image)
    
        
        
        # Grey image
        image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
        
        # Only sabe 
        _, image = cv2.threshold(image,1,255,cv2.THRESH_BINARY)
        
        
        # Do some morphological operations on the agents' blobs
        kernel = np.ones((5,5), np.uint8)
        image = cv2.dilate(image, kernel, iterations = 3)
        image = cv2.erode(image, kernel, iterations = 3)

        # this functions gets 2 values on opencv2 and 4, but 3 in opencv3, so it requires opencv3
        # The version i used is opencv-python==3.4.8.29
        a, contours, c = cv2.findContours(image, cv2.RETR_LIST,cv2.CHAIN_APPROX_SIMPLE)


        pos = []  # List of positions of the agents
        
        
        #If there are several agents, 2 basically
        
        if len(contours)>1:
            
            dict_red_centers = {}
            
            # For each contour:
            for contour in contours:
                
                #Find a rectangle that contains the whole image for each agent
                (x,y,w,h) = cv2.boundingRect(contour)

                # Get the values where there are pixels with red value different from 0
                red_value = np.where(image_original[y:y+h,x:x+w,0]!=0)
                
                
                agent_crop = image_original[y:y+h,x:x+w,0]

                
                red_sum = 0
                for x_i, y_i in zip(red_value[0], red_value[1]):
                    # All values of red are added
                    red_sum += int(agent_crop[x_i, y_i])
                
                # The position is used as key, position of the bounding box saved in the dict
                dict_red_centers[red_sum]=(int(x+w/2), int(y+h/2))

            # Reder agents get fist, (red value is key in the dict) so it can be sorted
            for agent in sorted(dict_red_centers.keys())[::-1]: #red first
                pos.append(dict_red_centers[agent])

            self.n_objects.append(0)
            self.corner.append(0)


        # If there's only one blob detected after substracting the background
        if len(contours)==1:
            
            image = image_original
            
            # What will be done is calculate the 'center' of the red and blue pixels in the blob
            
            a = np.where(image[:,:,0]!=[0]) # Positions of pixels different from 0 blakc
            
            self.size_objects.append(len(a[0]))
            
            values = []
            
            # Combining the list of x and y positions
            for i in zip(a[0], a[1]):
                values.append(image[i[0],i[1],0])

            
            # Only leave the brightest pixels of a given color, red in this case [:,:,0 <---- red channel]
            _, img = cv2.threshold(image[:,:,0], values[-int(len(values)/10)], 255, cv2.THRESH_TOZERO)

                
            # Using moments to calculate the center of the brightest pixels
            M = cv2.moments(img)
            cX = int((M["m10"]+0.0001) / (M["m00"]+0.0001))
            cY = int((M["m01"]+0.0001) / (M["m00"]+0.0001))

            red_agent = (cX, cY)

            #############

            # Same as before but for the other color (blue)
            
            a = np.where(image[:,:,2]!=[0])
            values = []
            for i in zip(a[0], a[1]):
                values.append(image[i[0],i[1],2])


            _, img = cv2.threshold(image[:,:,2], values[-int(len(values)/10)], 255, cv2.THRESH_TOZERO)

            M = cv2.moments(img)
            cX = int((M["m10"]+0.0001)  / (M["m00"]+0.0001) )  #0.0001 are there to avoid 0/0 divisions
            cY = int((M["m01"]+0.0001)  / (M["m00"]+0.0001) )


            blue_agent = (cX, cY)

            pos = [red_agent, blue_agent] # Saving positions for the agents when together
            
           
            
            
            self.n_objects.append(1)
            

            

            if self.verbose: print('---------  INTERSECTION -----------')   # Marking the intersection of the 
        
        if len(contours)==0:
            pos = [(0,0),(0,0)]
                
        self.history.append(pos)
        if self.verbose: print('positions object,   ', pos)

        self.distance.append(distance.euclidean(pos[0], pos[1]))
    
        ######
            
        #Checking presence in corners
        
        # This implies that the object is close to the same, expected if only there's one agent left
        if self.distance[-1] <= 1.0:
            self.corner.append(1)
        
        if self.verbose: print(distance.euclidean(pos[0], pos[1]))
            
        return pos

In [None]:
env = Environment()
env_n = EnvironmentWrapper(env, reward_suicide=-100)


for i in range(10000):
    r = env_n.step(3)
  
    
    
    if r[0]!=0:
        print(r[:2])
        env_n.reset()