In [1]:
# !pip install pygame
import pygame

pygame 2.6.0 (SDL 2.28.4, Python 3.9.6)
Hello from the pygame community. https://www.pygame.org/contribute.html


In [2]:
from Objects import Walls, Floors, CBoards, Fryers
from Foods import Resources, Fish, Potato, Plate
from Player import Players, Player1, Player2, player1_controls, player2_controls
from Foods import Menu
from random import random
import numpy as np
import os
os.chdir('/Users/michal.maslowski/Documents/GitHub/OvercookedAI-coop')

# from training_algorithm import Misha



In [3]:
from constants import START_X, END_X, START_Y, END_Y

def get_state():
    '''
    Returns the list of:
    - visual_data - normalized pixel values of the screen
    - numerical_data, containing in respective order:
        - the state of the Menu (see: Menu.get_state())
        - the state of Player 1's and Player 2's hands (see: Player.get_state())
    
    '''
    
    visual_data = pygame.surfarray.array3d(screen)
    visual_data = np.transpose(visual_data, (1, 0, 2)) #Change from width, height, color channel to height, width, color channel
    visual_data = visual_data[START_Y - Menu.height: END_Y, START_X:END_X]
    visual_data = visual_data / 255. #Normalize pixels from 0 to 1 for easier training
    visual_data = np.expand_dims(visual_data, axis=0)

    numerical_data = np.concatenate((Menu.get_state(), Player1.get_state(), Player2.get_state()))
    numerical_data = numerical_data.astype(float)
    numerical_data = np.expand_dims(numerical_data, axis=0)
    
    return [visual_data, numerical_data]




In [4]:
def get_rewards():
    '''
    Computes the total amount of rewards for the current game state. 
    Raw food needed to finish the menu is worth 5 points, 
    Cut food on the menu - 15 points 
    Fried food on the menu - 25 points
    Plate - 50 points
    Preparing a dish from the menu - half the points of giving the order 
    Fish - 500 points
    Fish and Chips - 1000 points
    '''
    
    raw_coeff = 50
    chopped_coeff = 150
    fried_coeff = 250
    plate_coeff = 500
    
    rewards = Menu.game_score*10
    
    food_categories = [("Fish", Fish), ("Potato", Potato)]
    for name, cls in food_categories:
        food_on_menu = sum([dish.ingredients_dict[name] for dish in Menu.queue])
        fried_food = len([food for food in Resources if (isinstance(food, cls) and food.fried)])
        chopped_food = len([food for food in Resources if (isinstance(food, cls) and food.chopped and not food.fried)])
        raw_food = len([food for food in Resources if (isinstance(food, cls) and not food.chopped and not food.fried)])
        
        remaining_food_on_menu = food_on_menu
        rewards += fried_coeff * min(food_on_menu, fried_food)
        
        remaining_food_on_menu -= min(food_on_menu, fried_food)
        rewards += chopped_coeff * min(chopped_food, remaining_food_on_menu) 
        
        remaining_food_on_menu -= min(chopped_food, remaining_food_on_menu) 
        rewards += raw_coeff * min(raw_food, remaining_food_on_menu)
    
    plates = len([food for food in Resources if isinstance(food, Plate)])
    rewards += plate_coeff * min(plates, len(Menu.queue))
    
    return float(rewards)
    
    #TO-DO HERE: ADD INTERMEDIATE REWARDS FOR HAVING A NEAR-READY DISH
    #Get the dict of plate and the dish and see if any match

In [5]:
def draw():
    screen.fill((255, 255, 255)) 
    Floors.draw(screen)
    Walls.draw(screen)
    Resources.draw(screen)
    Players.draw(screen)
    Menu.draw(screen)

In [6]:
def get_q_values(tick, learning):
    def eps_function(tick):
        #Epsilon function will start at (nearly) 1 and exponentially decrease to 0.1
        return 0.1 + 0.9 * np.exp(- 1e-6 * tick)

    is_random = random() < eps_function(tick)
    if (is_random and learning):
        #Randomizes q-values for each move. Equivalent to picking random moves, except that action if possible is preferred
        q_values = np.random.rand(10)
        q_values[4] = q_values.max() + 1 #Set q_value of action for player 1 to max 
        q_values[9] = q_values.max() + 1 #Set q_value of action for player 2 to max 
        
    else:
        q_values = Misha.predict(get_state())
        q_values = q_values[0] #Get rid of the batch_size dimension: goes from (1, 10) to (10,) 
        
    return q_values
        
    

def q2idx(tick, learning):
    q_values = get_q_values(tick, learning)

    
    p1_values = q_values[0:5]
    p1_idx = p1_values.argmax() #Get the action from player 1 that maximizes the q value
    
    
    p1_action_possible = not (Player1.action_possible()["action"] == None)
    if (p1_idx == 4 and (not p1_action_possible)):
        p1_idx = p1_values[0:4].argmax() #Get the 2nd best action if an action is the best one and isn't possible
    
    p2_values = q_values[5:10]
    p2_idx = p2_values.argmax() #Get the action from player 1 that maximizes the q value
    
    p2_action_possible = not (Player2.action_possible()["action"] == None)
    if (p2_idx == 4 and (not p2_action_possible)):
        p2_idx = p2_values[0:4].argmax() #Get the 2nd best action if an action is the best one and isn't possible
    
    return np.array([p1_idx, p2_idx])
    

def idx2key(idxs):
    p1_idx = idxs[0]
    p2_idx = idxs[1]
    
    keys = [key for key in dir(pygame) if key.startswith('K_')]
    keys_dict = {getattr(pygame, key): False for key in keys}
    
    keys_dict[list(player1_controls.values())[p1_idx]] = True
    keys_dict[list(player2_controls.values())[p2_idx]] = True
    
    return keys_dict
    

    

In [8]:
import time

running = True
clock = pygame.time.Clock()
tick = 0
game_score = 0

misha_playing = True #Bot vs human play
learning = False #Specifies whether the experience is added to the replay buffer

#if not learning:
if learning:
    from training_algorithm import add_memory, train_Misha, Misha
    time_training = np.array([])
    time_actions = np.array([])
    total_time = np.array([])
    losses = np.array([])
    
else:
    if misha_playing:
        from tensorflow import keras
        Misha = keras.saving.load_model("Misha.keras", safe_mode=False)
        
pygame.init()    
screen = pygame.display.set_mode((800, 600))

#Defining global variables for use later
actions = None
former_state = None
rewards = None
former_rewards = 0.

time_per_game = int(input("Please enter the length of the game, in seconds (default: 180)")) if not learning else 99999999
start_time = pygame.time.get_ticks()


def update(actions):
    elapsed_time = (pygame.time.get_ticks() - start_time) / 1000
    if(elapsed_time < time_per_game):
        Players.update(actions)
        CBoards.update()
        Fryers.update()
        Menu.update()


    else:
        #Game finished; display a large "GAME OVER" sign over a frozen frame
        font = pygame.font.SysFont("comicsansms", 100)
        game_over_surface = font.render("GAME OVER", True, (255, 0, 0))
        game_over_rect = game_over_surface.get_rect(center=((START_X+END_X)//2, (START_Y + END_Y)//2))
        
        score_text = f"Final Score: {Menu.game_score}"
        score_surface = font.render(score_text, True, (0, 0, 0))
        score_rect = score_surface.get_rect(center=((START_X + END_X) // 2, (START_Y + END_Y) // 2 + 100))
        
        screen.blit(game_over_surface, game_over_rect)
        screen.blit(score_surface, score_rect)


times_per_action = np.array([])
while running:
    total_start_time = time.time()
    for event in pygame.event.get():
        if event.type == pygame.QUIT:
            running = False
            break

    if(not running):
        break
    
    
    if misha_playing and (tick % 30 == 0):
        start_time = time.time()
        state = get_state()
        action_idxs = q2idx(tick, learning)
        actions = idx2key(action_idxs)
        if learning and (tick != 0):
            rewards = get_rewards()
            
            add_memory(state[0], state[1], former_state[0], former_state[1], action_idxs, rewards - former_rewards)
        
            former_rewards = rewards
            draw() 
            
        former_state = state   
        np.append(times_per_action, time.time() - start_time)     
    
    if learning and ((tick + 1) % (30*64*128) == 0): 
        start_training_time = time.time()
        losses_epoch, time_saving = train_Misha() 
        losses = np.append(losses, losses_epoch)
        time_training = np.append(time_training, time.time()-start_training_time)
        time_actions = np.append(time_actions, times_per_action.sum())
        total_time = np.append(total_time, time.time() - total_start_time)
        np.save('time_training.npy', time_training)
        np.save('time_actions.npy', time_actions)
        np.save('total_time.npy', total_time)
        np.save('time_saving.npy', time_saving)
        np.save('losses.npy', losses)
        
        start_time = pygame.time.get_ticks() #Reset the clock
     
    if not misha_playing:
        actions = pygame.key.get_pressed()

    update(actions)    
        
    tick += 1
    if not learning:
        draw()
        pygame.display.flip()
        clock.tick(60)
    
        
pygame.display.quit()
pygame.quit()
exit()



: 