In [6]:
from random import random
import numpy as np
from agents import *
import agents
from copy import copy

In [7]:
queens = 8
k = 8

In [8]:
class QueensEnvironment(agents.Environment):
    
    def __init__(self):
        super().__init__()
        self.random_state()
        self.fail_times, self.win_times = [], []
        
    def random_state(self):
        self.state = np.random.randint(0, queens-1, [k, queens])
    
    def percept(self, agent):
        return self.state
    
    def execute_action(self, agent, action):
        
        mes, state = action
        if mes == "Success":
            self.win_times.append(agent.performance)
            agent.performance = 0
            self.random_state()
        
        elif agent.performance >= int(0.5*queens):
            self.fail_times.append(agent.performance)
            self.random_state()
            agent.performance = 0
            print(agent.performance)
        else:
            agent.performance += 1
            self.state = state

    def print_stats(self):
        print("Win ratio:", len(self.win_times)/ (len(self.fail_times) + len(self.win_times)))
        print("Average fail time:", sum(self.fail_times)/len(self.fail_times))
        print("Average win time:", sum(self.win_times)/len(self.win_times))              

In [9]:
def QueensHillClimbAgent():
        
    dt = np.dtype([('count', int), ('state', int, (queens,))] )
    def count_collisions(board):
        count = 0
        for i in range(queens):
            for j in range(i+1, queens):
                if board[i] == board[j] or abs(board[i] - board[j]) == j - i:
                    count += 1
        return count
    
    def print_board(state):
        print("--"*(queens + 2))
        for row in state:
            print(str(row) + " " + "  " * row + "x" )
        print("--"* (queens + 2))

    def check_actions(board, how_many):
        d = np.empty([queens, queens], dtype = dt)
        run = np.array(board)
        for q in range(queens):
            for i in range(queens):
                run[q] = i
                d[q, i] = (count_collisions(run), np.array(run))
            run[q] = board[q]
            
        ret = np.sort(d, axis=None, order='count')
        return ret[:how_many]
    
    def program(state):
        top = check_actions(state[0], k)
        for board in state[1:]:
            new = np.unique(np.concatenate((top, check_actions(board, k))))
            top = np.sort(new, order='count')[:k]
        print(top['count'])
        
        if(0 in top['count']):
            print("Success", top['state'][0])
            return "Success", top['state']
        return "", top['state']
        
        
    program.plateau_count = 0    
    return program

In [10]:
q = QueensEnvironment()
q.add_thing(Agent(QueensHillClimbAgent()))
q.failures, q.wins = 0,0

In [11]:
q.run(1000)
q.print_stats()

[4 5 5 5 5 5 5 5]
[3 3 3 3 3 3 3 3]
[2 2 2 2 2 2 2 2]
[1 1 1 1 1 1 2 2]
[0 1 1 1 1 1 1 1]
Success [4 1 5 0 6 3 7 2]
[2 2 2 3 3 3 3 3]
[0 1 1 1 2 2 2 2]
Success [6 0 2 7 5 3 1 4]
[3 3 4 4 4 4 4 4]
[2 2 2 2 2 2 2 2]
[0 1 1 1 1 1 1 2]
Success [7 3 0 2 5 1 6 4]
[2 2 2 3 3 3 3 3]
[1 1 1 1 2 2 2 2]
[0 1 1 1 1 1 1 1]
Success [3 1 6 2 5 7 4 0]
[3 3 3 4 4 4 4 4]
[2 2 2 2 2 2 2 2]
[0 1 1 1 1 1 1 1]
Success [7 1 4 2 0 6 3 5]
[2 2 2 2 3 3 3 3]
[1 1 1 2 2 2 2 2]
[1 1 1 1 1 1 1 1]
[1 1 1 1 1 1 1 1]
[1 1 1 1 1 1 1 1]
0
[3 3 3 3 4 4 4 4]
[2 2 2 2 2 2 2 2]
[1 1 1 1 1 1 1 2]
[0 1 1 1 1 1 1 1]
Success [0 5 7 2 6 3 1 4]
[0 1 1 2 2 3 3 3]
Success [5 1 6 0 3 7 4 2]
[4 4 4 4 5 5 5 5]
[2 2 3 3 3 3 3 3]
[1 1 2 2 2 2 2 2]
[1 1 1 1 1 1 2 2]
[0 1 1 1 1 1 1 1]
Success [2 4 1 7 0 6 3 5]
[3 3 4 4 4 4 4 4]
[2 2 2 2 2 2 2 2]
[0 1 1 1 1 1 1 2]
Success [4 6 3 0 2 7 5 1]
[4 4 5 5 5 5 5 5]
[2 2 2 3 3 3 3 3]
[1 1 1 1 1 1 2 2]
[1 1 1 1 1 1 1 1]
[1 1 1 1 1 1 1 1]
0
[3 3 4 4 4 4 4 4]
[1 2 2 2 2 2 2 2]
[1 1 1 1 2 2 2 2]
[1 1 1

[1 1 1 1 1 2 2 2]
[0 1 1 1 1 1 1 1]
Success [6 4 2 0 5 7 1 3]
[2 3 3 3 4 4 4 4]
[1 2 2 2 2 2 2 2]
[0 1 1 1 1 1 1 2]
Success [4 2 0 5 7 1 3 6]
[3 3 4 4 5 5 5 5]
[2 2 2 3 3 3 3 3]
[1 1 1 2 2 2 2 2]
[1 1 1 1 1 1 1 1]
[1 1 1 1 1 1 1 1]
0
[4 4 4 4 4 4 4 5]
[2 2 2 3 3 3 3 3]
[1 1 1 1 2 2 2 2]
[0 0 1 1 1 1 1 1]
Success [0 6 3 5 7 1 4 2]
[2 2 3 3 3 3 4 4]
[1 1 1 1 1 2 2 2]
[1 1 1 1 1 1 1 1]
[1 1 1 1 1 1 1 1]
[0 1 1 1 1 1 1 1]
Success [5 2 6 3 0 7 1 4]
[3 4 4 4 4 4 4 4]
[1 2 2 2 2 3 3 3]
[1 1 1 1 1 1 1 1]
[0 1 1 1 1 1 1 1]
Success [1 5 7 2 0 3 6 4]
[3 4 4 4 4 4 4 4]
[2 2 2 2 3 3 3 3]
[1 1 1 2 2 2 2 2]
[0 0 1 1 1 1 1 1]
Success [0 4 7 5 2 6 1 3]
[3 3 3 4 4 4 4 4]
[2 2 3 3 3 3 3 3]
[2 2 2 2 2 2 2 2]
[1 1 1 1 1 2 2 2]
[0 1 1 1 1 1 1 1]
Success [0 4 7 5 2 6 1 3]
[3 4 4 4 4 4 4 4]
[1 1 2 2 2 2 2 2]
[1 1 1 1 1 1 1 1]
[1 1 1 1 1 1 1 1]
[1 1 1 1 1 1 1 1]
0
[4 4 4 5 5 5 5 5]
[2 2 3 3 3 3 3 3]
[1 1 2 2 2 2 2 2]
[1 1 1 1 1 1 1 1]
[0 0 1 1 1 1 1 1]
Success [0 6 4 7 1 3 5 2]
[3 3 3 3 3 3 4 4]
[1 1 2 2 2 2 2

[0 1 1 1 1 1 1 1]
Success [3 1 6 2 5 7 0 4]
[4 4 4 4 4 5 5 5]
[2 2 2 3 3 3 3 3]
[1 1 1 1 2 2 2 2]
[0 1 1 1 1 1 2 2]
Success [6 1 5 2 0 3 7 4]
[3 4 4 4 4 4 4 4]
[2 2 2 2 2 2 3 3]
[1 1 1 1 1 1 1 1]
[0 0 1 1 1 1 1 1]
Success [5 3 0 4 7 1 6 2]
[3 4 4 4 4 4 4 5]
[2 2 2 2 2 2 3 3]
[0 1 1 1 1 1 1 2]
Success [1 3 5 7 2 0 6 4]
[2 2 3 3 3 3 3 3]
[1 1 1 1 2 2 2 2]
[1 1 1 1 1 1 1 2]
[1 1 1 1 1 1 1 1]
[0 1 1 1 1 1 1 1]
Success [4 6 1 5 2 0 7 3]
[3 4 4 4 4 4 4 4]
[2 2 2 2 2 2 3 3]
[0 1 1 1 1 1 1 2]
Success [6 4 2 0 5 7 1 3]
[4 5 5 5 5 5 5 5]
[2 2 3 3 3 3 3 3]
[1 1 1 1 1 1 1 2]
[1 1 1 1 1 1 1 1]
[1 1 1 1 1 1 1 1]
0
[4 5 5 5 5 5 5 5]
[3 3 3 3 3 3 3 3]
[1 2 2 2 2 2 2 2]
[1 1 1 1 1 1 1 1]
[0 1 1 1 1 1 1 1]
Success [5 2 6 1 3 7 0 4]
[2 2 3 3 3 3 3 3]
[1 1 1 1 2 2 2 2]
[0 1 1 1 1 1 1 1]
Success [3 1 4 7 5 0 2 6]
[3 3 3 3 4 4 4 4]
[1 1 2 2 2 2 2 2]
[1 1 1 1 1 2 2 2]
[1 1 1 1 1 1 1 1]
[1 1 1 1 1 1 1 1]
0
[1 1 3 3 3 3 3 3]
[1 1 1 1 1 1 1 1]
[0 1 1 1 1 1 1 1]
Success [4 1 5 0 6 3 7 2]
[3 3 4 4 4 4 4 4]
[2 2 2

Win ratio: 0.8016194331983806
Average fail time: 4.0
Average win time: 2.792929292929293
