### Objective: 
In this notebook, I am exploring the performance of different agents to see how they perform
against built in agents. 

#### Import modules

In [1]:
import numpy as np
import pandas as pd
from os import getcwd,listdir
from os.path import split, join
import sys
import random
from collections import namedtuple, OrderedDict
from tqdm import tqdm_notebook as pbar
# set the base path
base_path = split(getcwd())[0]
sys.path.append(base_path)


from isolation import Board
from sample_players import (RandomPlayer, open_move_score,
                            improved_score, center_score)
from game_agent import (MinimaxPlayer, AlphaBetaPlayer, custom_score,
                        custom_score_2, custom_score_3)

from tournament import *
from custom_builders import (exponent_score_builder, 
                             improved_weighted_center_score_builder,
                             improved_weighted_score_builder)

In [2]:
NUM_MATCHES = 5 # number of matches against each opponent
TIME_LIMIT = 10  # number of milliseconds before timeout
Agent = namedtuple("Agent", ["player", "name"])

# Define a collection of agents to compete against the test agents
cpu_agents = [
    Agent(RandomPlayer(), "Random"),
    Agent(MinimaxPlayer(score_fn=open_move_score), "MM_Open"),
    Agent(MinimaxPlayer(score_fn=center_score), "MM_Center"),
    Agent(MinimaxPlayer(score_fn=improved_score), "MM_Improved"),
    Agent(AlphaBetaPlayer(score_fn=open_move_score), "AB_Open"),
    Agent(AlphaBetaPlayer(score_fn=center_score), "AB_Center"),
    Agent(AlphaBetaPlayer(score_fn=improved_score), "AB_Improved")
]


def play_game_set(test_agent, cpu_agent, win_records,time_limit):
    """plays a single game set between player_agent and cpu_agent"""
    
    for _ in range(2):
        games = [Board(test_agent.player, cpu_agent.player), Board(cpu_agent.player,test_agent.player)]

    # play a rando move for the first player
    move = random.choice(games[0].get_legal_moves())
    for game in games: 
        game.apply_move(move)

    for game in games: 
        winner, _, termination = game.play(time_limit=time_limit)
        if winner==test_agent.player: 
            win_records[test_agent.name][cpu_agent.name]+=1

def create_win_records(test_agents, cpu_agents):
    """ creates a dict for the holding records"""
    return {agent.name: {agent.name:0 for agent in cpu_agents} for agent in test_agents}


def play_matches(test_agents,cpu_agents, num_matches, time_limit=150):
    """Plays a series of matches between the test agents and cpu agent
    """
    win_records = create_win_records(test_agents, cpu_agents)
    
    for ta in pbar(test_agents):
        for ca in pbar(cpu_agents):
            for _ in pbar(range(num_matches)):
                play_game_set(ta,ca,win_records,time_limit) 
    return pd.DataFrame(win_records).T



def random_improved_weighted_agents(n, min_w=0,max_w=10 ):
    """generates a list of random weighted agents"""
    ws = np.random.uniform(min_w,max_w, n)
    agents = [Agent(AlphaBetaPlayer(score_fn=improved_weighted_score_builder(w)), 
                  "AB_WeightedImproved (w:%f)"%w  ) for w in ws]
    description = pd.DataFrame({'Weight':ws})
    description['Agent Type']= 'Improved Weighted'
    description['Lambda']=1
    description['K']=1
    
    return agents, description
    
    
def random_improved_weighted_center_agents(n, min_w=0,max_w=10 ):
    """generates a list of random weighted agents"""
    ws = np.random.uniform(min_w,max_w, n)
    ls = np.random.uniform(0,1,n)
    agents = [Agent(AlphaBetaPlayer(score_fn=improved_weighted_center_score_builder(ws[i],ls[i])), 
                    "AB_WeightedCenter (w:%f,lamba:%f)"%(ws[i],ls[i])) for i in range(n)]
    
    
    description = pd.DataFrame({'Weight':ws, 'Lambda':ls})
    description['Agent Type']= 'Improved Weighted Centered'
    description['K']=1
    
    return agents, description


def random_exponential_agents(n, min_k=1.1,max_k=3 ):
    """generates a list of random weighted agents"""
    ks = np.random.uniform(min_k,max_k, n,)

    agents = [Agent(AlphaBetaPlayer(score_fn=exponent_score_builder(k)),
                    "AB_Exp (k:%f)"%(k)) for k in ks]
    
    
    description = pd.DataFrame({'K':ks})
    description['Agent Type']= 'Exponential'
    description['Weight']=1
    description['Lambda']=1
                               
    return agents, description
    
    
def create_random_test_agent_list(n=10,seed = 123):
    """Creates a random test agent list to play games
    Args:
        n(int): the number of agents per custom type
    Returns:
        test_agents(list): a list of the agents
        test_agent_description(dataframe): a data frame of the players
    """
    np.random.seed(seed)
    
    agents=[Agent(AlphaBetaPlayer(score_fn=improved_score), "AB_Improved")]
    desc=[pd.DataFrame({'Agent Type':"AB_Improved", 'K':1, 'Lambda':1, 'Weight':1},index =[1])]
    
    for f in [random_improved_weighted_agents,random_improved_weighted_center_agents]:# , random_exponential_agents]:
        
    
        a, d =  f(n)
        agents.extend(a)
        desc.append(d)
    return agents, pd.concat(desc).reset_index(drop=True)
    

In [3]:
def main(n=10, seed=123):
    # Define a collection of agents to compete against the test agents
    cpu_agents = [
        Agent(RandomPlayer(), "Random"),
        Agent(MinimaxPlayer(score_fn=open_move_score), "MM_Open"),
        Agent(MinimaxPlayer(score_fn=center_score), "MM_Center"),
        Agent(MinimaxPlayer(score_fn=improved_score), "MM_Improved"),
        Agent(AlphaBetaPlayer(score_fn=open_move_score), "AB_Open"),
        Agent(AlphaBetaPlayer(score_fn=center_score), "AB_Center"),
        Agent(AlphaBetaPlayer(score_fn=improved_score), "AB_Improved")
    ]
    
    test_agents, descriptions= create_random_test_agent_list(n,seed)
    print('Total games per test agent is %d.'%(2*len(cpu_agents)*10))
    print('The total number of games that will be played is %d'%(2*len(cpu_agents)*len(test_agents)*10))

    
    win_record=play_matches(test_agents,cpu_agents, num_matches=10, time_limit=150)
    
    return win_record,descriptions

``` Python
win_record, descriptions = main()
win_record['total wins'] = win_record.iloc[:,:-2].sum(axis=1)
win_record['Fraction Won'] = win_record['total wins']/(2*7*10)
win_record = win_record.sort_values('Fraction Won', ascending = False)
win_record['relative strength'] = win_record['Fraction Won']/win_record.loc['AB_Improved']['Fraction Won']
win_record.to_csv('GameSimulationResults.csv')

```

In [25]:
win_record

Unnamed: 0,AB_Center,AB_Improved,AB_Open,MM_Center,MM_Improved,MM_Open,Random,total wins,Fraction Won,relative strength
AB_WeightedImproved (w:2.268515),15,15,11,17,16,16,18,216,1.542857,1.136842
"AB_WeightedCenter (w:1.754518,lamba:0.228263)",13,12,12,18,17,15,20,214,1.528571,1.126316
"AB_WeightedCenter (w:1.824917,lamba:0.361789)",8,14,14,17,16,18,19,212,1.514286,1.115789
"AB_WeightedCenter (w:3.980443,lamba:0.722443)",11,9,7,19,15,19,19,198,1.414286,1.042105
AB_WeightedImproved (w:3.921175),10,12,7,17,14,19,19,196,1.4,1.031579
"AB_WeightedCenter (w:7.379954,lamba:0.322959)",11,12,5,20,12,16,19,190,1.357143,1.0
AB_Improved,15,9,9,17,15,11,19,190,1.357143,1.0
"AB_WeightedCenter (w:5.315514,lamba:0.293714)",10,9,10,17,13,18,18,190,1.357143,1.0
"AB_WeightedCenter (w:0.596779,lamba:0.611024)",10,10,7,18,15,15,19,188,1.342857,0.989474
"AB_WeightedCenter (w:4.385722,lamba:0.724455)",7,9,7,17,16,17,20,186,1.328571,0.978947


In [30]:
descriptions.to_csv('player_descriptions.csv')

#### Simulate more agents.

In [4]:
win_record2, descriptions2 = main(20,513)

Total games per test agent is 140.
The total number of games that will be played is 5740





In [7]:

win_record2['total wins'] = win_record2.sum(axis=1)
win_record2['Fraction Won'] = win_record2['total wins']/(2*7*10)
win_record2 = win_record2.sort_values('Fraction Won', ascending = False)
win_record2['relative strength'] = win_record2['Fraction Won']/win_record2.loc['AB_Improved']['Fraction Won']

In [9]:
win_record2.head()

Unnamed: 0,AB_Center,AB_Improved,AB_Open,MM_Center,MM_Improved,MM_Open,Random,total wins,Fraction Won,relative strength
"AB_WeightedCenter (w:8.249055,lamba:0.163261)",12,16,12,19,16,18,19,112,0.8,1.217391
AB_WeightedImproved (w:0.989725),12,13,12,19,15,17,20,108,0.771429,1.173913
"AB_WeightedCenter (w:0.728707,lamba:0.078828)",12,13,8,17,15,19,19,103,0.735714,1.119565
"AB_WeightedCenter (w:6.759281,lamba:0.237363)",11,13,12,17,15,17,18,103,0.735714,1.119565
"AB_WeightedCenter (w:4.553225,lamba:0.265657)",14,10,8,20,15,16,19,102,0.728571,1.108696


In [10]:
# save
win_record2.to_csv('GameSimulationResults2.csv')

### Conclusions: 

We randomly generated a series of scoring functions and evaluated them againt players. 