In [None]:
import torch
import numpy as np

from offline_generator import OfflineDataGenerator
from AstroCraft.PettingZoo_MA.env.CaptureTheFlagMA import CTFENVMA

from train import *
from itertools import combinations_with_replacement

In [None]:
dataset = AstroCraftData("./offline_data/0")
model1 = CQLA2C(dataset)
model2 = CQLA2C(dataset)
env = CTFENVMA(1, 1, 0)

In [None]:
def model_v_bot(model, bot):
    # Reset environment
    state, info = env.reset()
    rew = {'player0': 0, 'player1': 0}
    term = {'player0': False, 'player1': False}
    trunc = {'player0': False, 'player1': False}
        
    (hn, cn) = (torch.zeros(1,128).detach().to(device), torch.zeros(1,128).detach().to(device))

    # Continue playing until the game is over
    while True:
        action0, (hn,cn) = model.select_action(state['player0'], rew['player0'], term['player0'], trunc['player0'], info['player0'], hn, cn)
        action1 = bot.select_action(state['player1'], rew['player1'], term['player1'], trunc['player1'], info['player1'])

        action = {'player0': action0, 'player1': action1}
        state0 = state['player0']
        state1 = state['player1']
        state, rew, term, trunc, info = env.step(action)
        rew0 = rew['player0']
        rew1 = rew['player1']

        if term['player0'] or term['player1'] or (trunc['player0'] and trunc['player1']):
            return rew0
        
def bot_v_model(model, bot):
    # Reset environment
    state, info = env.reset()
    rew = {'player0': 0, 'player1': 0}
    term = {'player0': False, 'player1': False}
    trunc = {'player0': False, 'player1': False}
        
    (hn, cn) = (torch.zeros(1,128).detach().to(device), torch.zeros(1,128).detach().to(device))

    # Continue playing until the game is over
    while True:
        action1, (hn,cn) = model.select_action(state['player1'], rew['player1'], term['player1'], trunc['player1'], info['player1'], hn, cn)
        action0 = bot.select_action(state['player0'], rew['player0'], term['player0'], trunc['player0'], info['player0'])

        action = {'player0': action0, 'player1': action1}
        state0 = state['player0']
        state1 = state['player1']
        state, rew, term, trunc, info = env.step(action)
        rew0 = rew['player0']
        rew1 = rew['player1']

        if term['player0'] or term['player1'] or (trunc['player0'] and trunc['player1']):
            return rew1
        
def model_v_model(model0, model1):
    # Reset environment
    state, info = env.reset()
    rew = {'player0': 0, 'player1': 0}
    term = {'player0': False, 'player1': False}
    trunc = {'player0': False, 'player1': False}
        
    (hn0, cn0) = (torch.zeros(1,128).detach().to(device), torch.zeros(1,128).detach().to(device))
    (hn1, cn1) = (torch.zeros(1,128).detach().to(device), torch.zeros(1,128).detach().to(device))

    # Continue playing until the game is over
    while True:
        action0, (hn0,cn0) = model0.select_action(state['player0'], rew['player0'], term['player0'], trunc['player0'], info['player0'], hn0, cn0)
        action1, (hn1,cn1) = model1.select_action(state['player1'], rew['player1'], term['player1'], trunc['player1'], info['player1'], hn1, cn1)

        action = {'player0': action0, 'player1': action1}
        state0 = state['player0']
        state1 = state['player1']
        state, rew, term, trunc, info = env.step(action)
        rew0 = rew['player0']
        rew1 = rew['player1']

        if term['player0'] or term['player1'] or (trunc['player0'] and trunc['player1']):
            return rew0,rew1
        
def bot_v_bot(bot1, bot2):
    # Reset environment
    state, info = env.reset()
    rew = {'player0': 0, 'player1': 0}
    term = {'player0': False, 'player1': False}
    trunc = {'player0': False, 'player1': False}

    # Continue playing until the game is over
    while True:
        action0 = bot1.select_action(state['player0'], rew['player0'], term['player0'], trunc['player0'], info['player0'])
        action1 = bot2.select_action(state['player1'], rew['player1'], term['player1'], trunc['player1'], info['player1'])

        action = {'player0': action0, 'player1': action1}
        state0 = state['player0']
        state1 = state['player1']
        state, rew, term, trunc, info = env.step(action)
        rew0 = rew['player0']
        rew1 = rew['player1']

        if term['player0'] or term['player1'] or (trunc['player0'] and trunc['player1']):
            return rew1

# Heatmap
Play games between each of the model checkpoints. Record win and loss rates.

In [None]:
weights = ['0','4','8']
outcomes = {}   # Of agent 

for (w1,w2) in [*combinations_with_replacement(weights, r=2)]:
    # Load weights
    model1.load_weights("./weights/"+w1+".pth")
    model2.load_weights("./weights/"+w2+".pth")
    m1 = []
    m2 = []
    
    # Play 100 games
    for _ in range(100):
        print("({},{})\tgame:{}".format(w1,w2,_), end='\r')
        
        s1, s2 = model_v_model(model1, model2)
        m1.append(s1)
        m2.append(s2)
        
    # Record W/D
    outcomes[(w1,w2)] = {"W": m1.count(1) / 100, "D": m1.count(0) / 100}
        

In [None]:
weights = ['0','4','8']

for w1 in weights:
    # Load weights
    model1.load_weights("./weights/"+w1+".pth")
    m1 = []
    
    # Play 100 games
    for _ in range(100):

        # Build a bot
        p_capture_slow = np.random.uniform(0,.5)
        p_return_slow = np.random.uniform(.5,.7)
        p_capture_fast = np.random.uniform(.337,1)
        p_return_fast = np.random.uniform(.45,1)
        p_intercept_slow = np.random.uniform(.5,1)
        p_intercept_fast = np.random.uniform(.62,1)
        orb_norm = 0
        while abs(orb_norm - 1) > .1:
            p_orbital_1 = np.random.uniform(.1,.2)
            p_orbital_2 = np.random.uniform(.17,.2)
            p_orbital_3 = np.random.uniform(.12,.15)
            p_orbital_4 = np.random.uniform(0,.12)
            p_orbital_5 = np.random.uniform(0,.12)
            p_orbital_6 = np.random.uniform(.18,.27)
            p_orbital_7 = np.random.uniform(0,.17)
            orbitals = [p_orbital_1, p_orbital_2, p_orbital_3, p_orbital_4, p_orbital_5, p_orbital_6, p_orbital_7]
            orb_norm = sum(orbitals)

        orbitals = [x/orb_norm for x in orbitals]

        p_dodge = np.random.uniform(.48,1)
        p_random_traj_change = 0

        bot = OfflineDataGenerator(1, p_capture_slow, p_return_slow, p_capture_fast, p_return_fast, p_intercept_slow, p_intercept_fast, orbitals, p_dodge, p_random_traj_change)
        print("({})\tgame:{}".format(w1,_), end='\r')
        
        s1 = model_v_bot(model1, bot)
        m1.append(s1)
        
    # Record W/D
    outcomes[(w1,'bot')] = {"W": m1.count(1) / 100, "D": m1.count(0) / 100}

In [None]:
outcomes

In [None]:
m1 = []
for _ in range(100):

    # Build a bot
    p_capture_slow = np.random.uniform(0,.5)
    p_return_slow = np.random.uniform(.5,.7)
    p_capture_fast = np.random.uniform(.337,1)
    p_return_fast = np.random.uniform(.45,1)
    p_intercept_slow = np.random.uniform(.5,1)
    p_intercept_fast = np.random.uniform(.62,1)
    orb_norm = 0
    while abs(orb_norm - 1) > .1:
        p_orbital_1 = np.random.uniform(.1,.2)
        p_orbital_2 = np.random.uniform(.17,.2)
        p_orbital_3 = np.random.uniform(.12,.15)
        p_orbital_4 = np.random.uniform(0,.12)
        p_orbital_5 = np.random.uniform(0,.12)
        p_orbital_6 = np.random.uniform(.18,.27)
        p_orbital_7 = np.random.uniform(0,.17)
        orbitals = [p_orbital_1, p_orbital_2, p_orbital_3, p_orbital_4, p_orbital_5, p_orbital_6, p_orbital_7]
        orb_norm = sum(orbitals)

    orbitals = [x/orb_norm for x in orbitals]

    p_dodge = np.random.uniform(.48,1)
    p_random_traj_change = 0

    bot1 = OfflineDataGenerator(1, p_capture_slow, p_return_slow, p_capture_fast, p_return_fast, p_intercept_slow, p_intercept_fast, orbitals, p_dodge, p_random_traj_change)
    
    # Build another bot
    p_capture_slow = np.random.uniform(0,.5)
    p_return_slow = np.random.uniform(.5,.7)
    p_capture_fast = np.random.uniform(.337,1)
    p_return_fast = np.random.uniform(.45,1)
    p_intercept_slow = np.random.uniform(.5,1)
    p_intercept_fast = np.random.uniform(.62,1)
    orb_norm = 0
    while abs(orb_norm - 1) > .1:
        p_orbital_1 = np.random.uniform(.1,.2)
        p_orbital_2 = np.random.uniform(.17,.2)
        p_orbital_3 = np.random.uniform(.12,.15)
        p_orbital_4 = np.random.uniform(0,.12)
        p_orbital_5 = np.random.uniform(0,.12)
        p_orbital_6 = np.random.uniform(.18,.27)
        p_orbital_7 = np.random.uniform(0,.17)
        orbitals = [p_orbital_1, p_orbital_2, p_orbital_3, p_orbital_4, p_orbital_5, p_orbital_6, p_orbital_7]
        orb_norm = sum(orbitals)

    orbitals = [x/orb_norm for x in orbitals]

    p_dodge = np.random.uniform(.48,1)
    p_random_traj_change = 0

    bot2 = OfflineDataGenerator(1, p_capture_slow, p_return_slow, p_capture_fast, p_return_fast, p_intercept_slow, p_intercept_fast, orbitals, p_dodge, p_random_traj_change)
    print("game:{}".format(_), end='\r')
    
    s1 = bot_v_bot(bot1, bot2)
    m1.append(s1)

# ELO and Line Chart Data
Play 100 games between every combination of bot and model checkpoint, and record outcomes to show progression over training

In [None]:
weights = [str(i) for i in range(9)]
outcomes2 = {}   # Of agent 

for w1 in weights:
    # Load weights
    model1.load_weights("./weights/"+w1+".pth")
    m1 = []
    
    # Play 100 games
    for _ in range(100):

        # Build a bot
        p_capture_slow = np.random.uniform(0,.5)
        p_return_slow = np.random.uniform(.5,.7)
        p_capture_fast = np.random.uniform(.337,1)
        p_return_fast = np.random.uniform(.45,1)
        p_intercept_slow = np.random.uniform(.5,1)
        p_intercept_fast = np.random.uniform(.62,1)
        orb_norm = 0
        while abs(orb_norm - 1) > .1:
            p_orbital_1 = np.random.uniform(.1,.2)
            p_orbital_2 = np.random.uniform(.17,.2)
            p_orbital_3 = np.random.uniform(.12,.15)
            p_orbital_4 = np.random.uniform(0,.12)
            p_orbital_5 = np.random.uniform(0,.12)
            p_orbital_6 = np.random.uniform(.18,.27)
            p_orbital_7 = np.random.uniform(0,.17)
            orbitals = [p_orbital_1, p_orbital_2, p_orbital_3, p_orbital_4, p_orbital_5, p_orbital_6, p_orbital_7]
            orb_norm = sum(orbitals)

        orbitals = [x/orb_norm for x in orbitals]

        p_dodge = np.random.uniform(.48,1)
        p_random_traj_change = 0

        bot = OfflineDataGenerator(1, p_capture_slow, p_return_slow, p_capture_fast, p_return_fast, p_intercept_slow, p_intercept_fast, orbitals, p_dodge, p_random_traj_change)
        print("({})\tgame:{}".format(w1,_), end='\r')
        
        s1 = model_v_bot(model1, bot)
        m1.append(s1)
        
    # Record W/D
    outcomes2[(w1,'bot')] = m1

In [None]:
weights = [str(i) for i in range(9)]

for (w1,w2) in [*combinations_with_replacement(weights, r=2)]:
    if w1 == w2:
        continue

    # Load weights
    model1.load_weights("./weights/"+w1+".pth")
    model2.load_weights("./weights/"+w2+".pth")
    m1 = []
    
    # Play 100 games
    for _ in range(100):
        print("({},{})\tgame:{}".format(w1,w2,_), end='\r')
        
        s1, s2 = model_v_model(model1, model2)
        m1.append(s1)
        
    # Record W/D
    outcomes2[(w1,w2)] = m1
        

# Radar
Play games between each of the model checkpoints. Record various qualitative data.

In [None]:
# A slightly modified model_v_bot records the right data for this section

def model_v_bot(model, bot):
    # Reset environment
    state, info = env.reset()
    rew = {'player0': 0, 'player1': 0}
    term = {'player0': False, 'player1': False}
    trunc = {'player0': False, 'player1': False}
    
    max_orbitals = 0
    timesteps = 0
    actions0 = np.zeros(14)
    last_nonzero_0 = None
    last_nonzero_1 = None
    
        
    (hn, cn) = (torch.zeros(1,128).detach().to(device), torch.zeros(1,128).detach().to(device))

    # Continue playing until the game is over
    while True:
        timesteps += 1
        action0, (hn,cn) = model.select_action(state['player0'], rew['player0'], term['player0'], trunc['player0'], info['player0'], hn, cn)
        action1 = bot.select_action(state['player1'], rew['player1'], term['player1'], trunc['player1'], info['player1'])
        actions0[action0] += 1
        
        if action0 != 0:
            last_nonzero_0 = action0
            
        if action1 != 0:
            last_nonzero_1 = action1
        
        if action0 == 1 or action0 == 7:
            max_orbitals += 1

        action = {'player0': action0, 'player1': action1}
        state, rew, term, trunc, info = env.step(action)
        state0 = state['player0']
        state1 = state['player1']
        rew0 = rew['player0']

        if term['player0'] or term['player1'] or (trunc['player0'] and trunc['player1']):
            
            method = "Time"
            
            # Check if game ended by intercept
            if (last_nonzero_0 == 12 or last_nonzero_0 == 13 and state1['observation'][1,2] == 0) or (last_nonzero_1 == 12 or last_nonzero_1 == 13 and state0['observation'][1,2] == 0):
                method = 'Tag'
                
            # Check if game ended by running out of fuel
            elif state1['observation'][1,2] == 0:
                method = "Fuel"
                
            # Check if game ended by flag return
            elif last_nonzero_0 == 8 or last_nonzero_0 == 9:
                method = 'Flag'
            
            # Reward, method, max_orbitals, evasions, fuel, timesteps, guard
            return rew0, method, max_orbitals, 1000-state0['observation'][1,3], timesteps, actions0

In [None]:
weights = ['0', '4', '8']
outcomes3 = {}   # Of agent 

for w in weights:
    
    # Load weights
    model1.load_weights("./weights/"+w+".pth")
    outcomes3[w] = {
        "Max Orbital": 0,
        "Aggression": 0,
        "Fuel Use": 0,
        "Episode Length": 0,
        "Entropy": 0,
        "Draw Rate": 0
        }
    tot_act = np.zeros(14)
    # Play 100 games
    for _ in range(100):
        print("({})\tgame:{}".format(w,_), end='\r')
        # Reward, method, max_orbitals, evasions, fuel, timesteps, left_base
        r, method, maxorb, fuel, T, actions0 = model_v_bot(model1, bot)
        
        outcomes3[w]['Fuel Use'] += fuel/1000
        outcomes3[w]['Episode Length'] += T/672
        outcomes3[w]['Max Orbital'] += (maxorb > 0)
        tot_act += actions0
        
        if r == 1 and method == 'Tag':
            outcomes3[w]['Aggression'] += 1

        elif r == 0:
            outcomes3[w]['Draw Rate'] += 1
                
    for k,v in outcomes[w].items():
        outcomes3[w][k] /= 100

    rates = tot_act / np.sum(tot_act)
    outcomes3[w]['Entropy'] = -np.dot(rates, np.log(np.clip(rates, 1e-12, None)))