In [20]:
# !pip install pygame
import pygame

In [21]:
from Objects import Walls, Floors, CBoards, Fryers
from Foods import Resources, Fish, Potato, Plate
from Player import Players, Player1, Player2, player1_controls, player2_controls
from Foods import Menu
from random import random

In [22]:
import numpy as np

def get_state():
    pixel_array = pygame.surfarray.array3d(screen)
    # Transpose to (height, width, channels)
    pixel_array = np.transpose(pixel_array, (1, 0, 2))
    
    #Normalize so that pixel values are from 0 to 1, not 0 to 255 as in RGB
    pixel_array = pixel_array / 255.0
    pixel_array = np.expand_dims(pixel_array, axis=0)

In [23]:
import os
os.chdir('/Users/michal.maslowski/Documents/GitHub/OvercookedAI-coop')

# from training_algorithm import Misha

In [24]:
def get_state():
    '''
    Returns the list of:
    - visual_data - normalized pixel values of the screen
    - numerical_data, containing in respective order:
        - the state of the Menu (see: Menu.get_state())
        - the state of Player 1's and Player 2's hands (see: Player.get_state())
    
    '''
    
    visual_data = pygame.surfarray.array3d(screen)
    visual_data = np.transpose(visual_data, (1, 0, 2)) #Change from width, height, color channel to height, width, color channel
    visual_data = visual_data / 255. #Normalize pixels from 0 to 1 for easier training
    visual_data = np.expand_dims(visual_data, 0) #Add the batch_size dimension

    numerical_data = np.concatenate((Menu.get_state(), Player1.get_state(), Player2.get_state()))
    numerical_data = np.expand_dims(numerical_data, 0)
    
    return [visual_data, numerical_data]




In [25]:
def get_rewards():
    '''
    Computes the total amount of rewards for the current game state. 
    Raw food needed to finish the menu is worth 5 points, 
    Cut food on the menu - 15 points 
    Fried food on the menu - 25 points
    Plate - 50 points
    Preparing a dish from the menu - half the points of giving the order 
    Fish - 500 points
    Fish and Chips - 1000 points
    '''
    raw_coeff = 5
    chopped_coeff = 15
    fried_coeff = 25
    plate_coeff = 50
    
    rewards = Menu.game_score
    
    food_categories = [("Fish", Fish), ("Potato", Potato)]
    for name, cls in food_categories:
        food_on_menu = sum([dish.ingredients_dict[name] for dish in Menu.queue])
        fried_food = len([food for food in Resources if (isinstance(food, cls) and food.fried)])
        chopped_food = len([food for food in Resources if (isinstance(food, cls) and food.chopped and not food.fried)])
        raw_food = len([food for food in Resources if (isinstance(food, cls) and not food.chopped and not food.fried)])
        
        remaining_food_on_menu = food_on_menu
        rewards += fried_coeff * min(food_on_menu, fried_food)
        
        remaining_food_on_menu -= min(food_on_menu, fried_food)
        rewards += chopped_coeff * min(chopped_food, remaining_food_on_menu) 
        
        remaining_food_on_menu -= min(chopped_food, remaining_food_on_menu) 
        rewards += raw_coeff * min(raw_food, remaining_food_on_menu)
    
    plates = len([food for food in Resources if isinstance(food, Plate)])
    rewards += plate_coeff * min(plates, len(Menu.queue))
    
    #TO-DO HERE: ADD INTERMEDIATE REWARDS FOR HAVING A NEAR-READY DISH
    #Get the dict of plate and the dish and see if any match

In [26]:
def draw():
    screen.fill((255, 255, 255)) 
    Floors.draw(screen)
    Walls.draw(screen)
    Resources.draw(screen)
    Players.draw(screen)

In [27]:
def q2idx(eps = 0.1):
    is_random = random() < eps
    if is_random:
        #Randomizes q-values for each move. Equivalent to picking
        q_values = np.random.rand(10)
        
    else:
        q_values = Misha.predict(get_state())
        q_values = q_values[0] #Get rid of the batch_size dimension: goes from (1, 10) to (10,)
    
    p1_values = q_values[0:5]
    p1_idx = p1_values.argmax() #Get the action from player 1 that maximizes the q value
    
    if (p1_idx == 4 and (not Player1.action_possible())):
        p1_idx = p1_values[0:4].argmax() #Get the 2nd best action if an action is the best one and isn't possible
    
    p2_values = q_values[0:5]
    p2_idx = p2_values.argmax() #Get the action from player 1 that maximizes the q value
    
    if (p2_idx == 4 and (not Player2.action_possible())):
        p2_idx = p2_values[0:4].argmax() #Get the 2nd best action if an action is the best one and isn't possible
    
    return np.array(p1_idx, p2_idx)
    

def idx2key(idxs):
    p1_idx = idxs[0]
    p2_idx = idxs[1]
    
    keys = [key for key in dir(pygame) if key.startswith('K_')]
    keys_dict = {getattr(pygame, key): False for key in keys}
    
    keys_dict[list(player1_controls.values())[p1_idx]] = True
    keys_dict[list(player2_controls.values())[p2_idx]] = True
    

    

In [29]:
pygame.init()


misha_playing = True #Bot vs human play
running = True
clock = pygame.time.Clock()
tick = 0
game_score = 0

learning = True #Specifies whether the experience is added to the replay buffer

#if not learning:
if learning:
    from training_algorithm import add_memory, train_Misha, Misha
    
else:
    from tensorflow.keras.models import load_model
    Misha = load_model('Misha.h5')
    
screen = pygame.display.set_mode((800, 600))

#Defining global variables for use later
actions = None
former_state = None
rewards = None
former_rewards = None

time_per_game = int(input("Please enter the length of the game, in seconds (default: 180)")) if not learning else 99999
start_time = pygame.time.get_ticks()


from constants import START_X, END_X, START_Y, END_Y
def update(actions):
    elapsed_time = (pygame.time.get_ticks() - start_time) / 1000
    if(elapsed_time < time_per_game):
        Players.update(actions) 
        CBoards.update()
        Fryers.update()


    else:
        #Game finished; display a large "GAME OVER" sign over a frozen frame
        font = pygame.font.SysFont("comicsansms", 100)
        game_over_surface = font.render("GAME OVER", True, (255, 0, 0))
        game_over_rect = game_over_surface.get_rect(center=((START_X+END_X)//2, (START_Y + END_Y)//2))
        screen.blit(game_over_surface, game_over_rect)


while running:
    for event in pygame.event.get():
        if event.type == pygame.QUIT:
            running = False
            break

    if(not running):
        break
    
    tick += 1
    if misha_playing and (tick % 60 == 0):
        state = get_state()
        action_idxs = q2idx()
        actions = idx2key(action_idxs)
        
        rewards = get_rewards()
        
        rewards = 0
        if learning:
            add_memory(former_state, state, action_idxs, rewards - former_rewards)
        
        former_state, former_rewards = state, rewards
            
        
    if learning and tick % 1000 == 0: #PLAYER ACTION_POSSIBLE TO BE CODED WHY TF IS IT NOT DRAWING?. 
        train_Misha() 
        start_time = pygame.time.get_ticks() #Reset the clock
    
    else:
        actions =  pygame.key.get_pressed()

    # if not learning:
    draw()
        
    update(actions)
    
    pygame.display.flip()

    clock.tick(60)
    
        
pygame.display.quit()
pygame.quit()
exit()



AttributeError: 'Player' object has no attribute 'action_possible'

: 