In [None]:
# Run this cell if output errors occur

import locale
locale.getpreferredencoding = lambda: "UTF-8"

In [None]:
!pip3 install -U PySuperTuxKart

In [None]:
%load_ext autoreload
%autoreload 2

# Create pkl training/validation files for imitation learning

# We were given some agents to compete with. Have them play 500 games with each other and record their actions. At the beginning of each game, we randomly set puck's starting velocity and location.

In [None]:
from itertools import permutations
import subprocess
import random
from tqdm import tqdm

folders_arr = ['agent_1', 'agent_2', 'agent_3', 'agent_4', 'agent_5']
pairs_arr = list(permutations(folders_arr, 2))

for i in tqdm(range(500)):
  index = random.randint(0, len(pairs_arr) - 1)
  item = pairs_arr[index]
  pos_x = random.randint(-30, 30)
  pos_y = random.randint(-30, 30)
  vel_x = random.randint(-20, 20)
  vel_y = random.randint(-20, 20)
  data_f_save_name = "train_files/" + item[0] + '_' + item[1] + "_"\
                       + str(pos_x) + "_" + str(pos_y) + "_" + str(vel_x) + "_" + str(vel_y) + '.pkl'
  print(data_f_save_name)
  subprocess.run(["python3", "-m", "tournament.runner", item[0], item[1], "-s", data_f_save_name,
                  "-f", "600", "-m", "2", "--ball_location", str(pos_x), str(pos_y), "--ball_velocity", str(vel_x), str(vel_y)])

for i in tqdm(range(50)):
  index = random.randint(0, len(pairs_arr) - 1)
  item = pairs_arr[index]
  pos_x = random.randint(-30, 30)
  pos_y = random.randint(-30, 30)
  vel_x = random.randint(-20, 20)
  vel_y = random.randint(-20, 20)
  data_f_save_name = "val_files/" + item[0] + '_' + item[1] + "_"\
                       + str(pos_x) + "_" + str(pos_y) + "_" + str(vel_x) + "_" + str(vel_y) + '.pkl'
  print(data_f_save_name)
  subprocess.run(["python3", "-m", "tournament.runner", item[0], item[1], "-s", data_f_save_name,
                  "-f", "600", "-m", "2", "--ball_location", str(pos_x), str(pos_y), "--ball_velocity", str(vel_x), str(vel_y)])

In [None]:
from tqdm import tqdm
import numpy as np
import torch.utils.tensorboard as tb
import os
from torch.utils.data import Dataset, DataLoader
import torch
import pickle

# Code for dataset creation

In [None]:
def limit_period(angle):
    # turn angle into -1 to 1
    return angle - torch.floor(angle / 2 + 0.5) * 2


def extract_features(pstate, soccer_state, opponent_state, team_id):
    """This function is used to perform feature engineering."""

    # features of ego-vehicle
    kart_front = torch.tensor(pstate['kart']['front'], dtype=torch.float32)[[0, 2]]
    kart_center = torch.tensor(pstate['kart']['location'], dtype=torch.float32)[[0, 2]]
    kart_direction = (kart_front-kart_center) / torch.norm(kart_front-kart_center)
    kart_angle = torch.atan2(kart_direction[1], kart_direction[0])

    kart_velocity = np.linalg.norm(pstate['kart']['velocity'])

    # features of soccer
    puck_center = torch.tensor(soccer_state['ball']['location'], dtype=torch.float32)[[0, 2]]
    kart_to_puck_direction = (puck_center - kart_center) / torch.norm(puck_center-kart_center)
    kart_to_puck_angle = torch.atan2(kart_to_puck_direction[1], kart_to_puck_direction[0])

    kart_to_puck_angle_difference = limit_period((kart_angle - kart_to_puck_angle)/np.pi)

    distance_to_puck = np.linalg.norm(kart_center - puck_center)

    # features of opponents
    opponent_center0 = torch.tensor(opponent_state[0]['kart']['location'], dtype=torch.float32)[[0, 2]]
    opponent_center1 = torch.tensor(opponent_state[1]['kart']['location'], dtype=torch.float32)[[0, 2]]

    opponent_velocity0 = np.linalg.norm(opponent_state[0]['kart']['velocity'])
    opponent_velocity1 = np.linalg.norm(opponent_state[1]['kart']['velocity'])

    opponent_distance_to_puck0 = np.linalg.norm(opponent_center0 - puck_center)
    opponent_distance_to_puck1 = np.linalg.norm(opponent_center1 - puck_center)

    kart_to_opponent0 = (opponent_center0 - kart_center) / torch.norm(opponent_center0-kart_center)
    kart_to_opponent1 = (opponent_center1 - kart_center) / torch.norm(opponent_center1-kart_center)

    kart_to_opponent0_angle = torch.atan2(kart_to_opponent0[1], kart_to_opponent0[0])
    kart_to_opponent1_angle = torch.atan2(kart_to_opponent1[1], kart_to_opponent1[0])

    kart_to_opponent0_angle_difference = limit_period((kart_angle - kart_to_opponent0_angle)/np.pi)
    kart_to_opponent1_angle_difference = limit_period((kart_angle - kart_to_opponent1_angle)/np.pi)

    # features of score-line
    goal_line_center = torch.tensor(soccer_state['goal_line'][team_id], dtype=torch.float32)[:, [0, 2]].mean(dim=0)
    opponent_goal_line_center = torch.tensor(soccer_state['goal_line'][np.abs(team_id - 1)], dtype=torch.float32)[:, [0, 2]].mean(dim=0)

    puck_to_goal_line = (goal_line_center-puck_center) / torch.norm(goal_line_center-puck_center)
    puck_to_goal_line_angle = torch.atan2(puck_to_goal_line[1], puck_to_goal_line[0])
    kart_to_goal_line_angle_difference = limit_period((kart_angle - puck_to_goal_line_angle)/np.pi)

    puck_distance_to_the_goal_line = np.linalg.norm(puck_center - goal_line_center)
    puck_opponent_distance_to_goal = np.linalg.norm(puck_center - opponent_goal_line_center)

    features = torch.tensor([kart_center[0], kart_center[1], kart_angle, kart_to_puck_angle, opponent_center0[0],
        opponent_center0[1], opponent_center1[0], opponent_center1[1], kart_to_opponent0_angle, kart_to_opponent1_angle,
        goal_line_center[0], goal_line_center[1],
        puck_to_goal_line_angle, kart_to_puck_angle_difference,
        kart_to_opponent0_angle_difference, kart_to_opponent1_angle_difference,
        kart_to_goal_line_angle_difference,
        kart_velocity, distance_to_puck, puck_center[0], puck_center[1], opponent_velocity0,
        opponent_velocity1, puck_distance_to_the_goal_line, puck_opponent_distance_to_goal
                             ], dtype=torch.float32)

    return features


class Final_Project_Dataset(Dataset):
  def __init__(self, dataset_path):
    data_files = os.listdir(dataset_path)
    data = []
    for file in data_files:
      with open(dataset_path + "/" + file, "rb") as f:
          while True:
              try:
                  data.append(pickle.load(f))
              except EOFError:
                  break

    player_state_data = []
    opponent_state_data = []
    soccer_state_data = []
    self.actions_data = []
    team_id = []

    for entry in (data):
      # Add data for two players
      player_state_data.append(entry['team1_state'][0])
      player_state_data.append(entry['team1_state'][1])

      # Add data for opponents as players
      player_state_data.append(entry['team2_state'][0])
      player_state_data.append(entry['team2_state'][1])

      # Add data for opponents. Duplicate as we have two separate players on our side
      opponent_state_data.append(entry['team2_state'])
      opponent_state_data.append(entry['team2_state'])

      opponent_state_data.append(entry['team1_state'])
      opponent_state_data.append(entry['team1_state'])

      # Add data for soccer
      soccer_state_data.append(entry['soccer_state'])
      soccer_state_data.append(entry['soccer_state'])
      soccer_state_data.append(entry['soccer_state'])
      soccer_state_data.append(entry['soccer_state'])

      # Add actions. Duplicate as we have two separate players on our side
      temp = entry['actions']
      # Actions for the 1 team (players 1 and 3)
      self.actions_data.append([temp[0]['acceleration'], temp[0]['steer'], temp[0]['brake']])
      self.actions_data.append([temp[2]['acceleration'], temp[2]['steer'], temp[2]['brake']])

      # Actions for the 2 team (players 2 and 4)
      self.actions_data.append([temp[1]['acceleration'], temp[1]['steer'], temp[1]['brake']])
      self.actions_data.append([temp[3]['acceleration'], temp[3]['steer'], temp[3]['brake']])

      # Append team_ids
      team_id.append(0)
      team_id.append(0)
      team_id.append(1)
      team_id.append(1)

    self.features = []
    for i in tqdm(range(len(player_state_data))):
      self.features.append(extract_features(player_state_data[i], soccer_state_data[i],
                                      opponent_state_data[i], team_id[i]))
    del data
    del player_state_data
    del soccer_state_data
    del opponent_state_data
    del team_id

  def __len__(self):
    return len(self.features)

  def __getitem__(self, idx):
    return self.features[idx], self.actions_data[idx]

def load_data(dataset_path, num_workers=0, batch_size=128, shuffle=True):
    dataset = Final_Project_Dataset(dataset_path)
    return DataLoader(dataset, num_workers=num_workers, batch_size=batch_size, shuffle=shuffle, drop_last=True)


# Define model

In [None]:
class State_Based_Model(torch.nn.Module):
  def __init__(self, input_size=25, sizes=[64, 32, 32, 10]):
    super().__init__()
    layers = []
    for i in range(len(sizes) - 1):
      curr_size = sizes[i]
      layers.append(torch.nn.Linear(input_size, curr_size))
      layers.append(torch.nn.PReLU())
      layers.append(torch.nn.Dropout(0.1))
      input_size = curr_size

    layers.append(torch.nn.Linear(input_size, sizes[-1]))
    self.net = torch.nn.Sequential(*layers)
    self.acceleration_net = torch.nn.Linear(sizes[-1], 1, bias=False)
    self.steer_net = torch.nn.Linear(sizes[-1], 3, bias=False)
    self.brake_net = torch.nn.Linear(sizes[-1], 1, bias=False)

  def forward(self, x):
    net_output = self.net(x)
    acceleration, steer, brake = self.acceleration_net(net_output),\
          self.steer_net(net_output), self.brake_net(net_output)

    acceleration = torch.clamp(acceleration, 0, 1)
    brake = torch.clamp(brake, 0, 1)

    # Set acceleration = 0 if we are braking
    acceleration = acceleration * torch.abs((brake > 0.5).float() - 1.)

    return acceleration, steer, brake

def save_model(model):
    from torch import save
    from os import path
    save(model.state_dict(), "models/model.th")


def load_model(model_path="models/model.th"):
    from torch import load
    from os import path
    r = State_Based_Model()
    r.load_state_dict(torch.load(model_path, map_location='cpu'))
    return r

In [None]:
def get_mean_std(loader):
  sum = 0
  sum_of_squares = 0
  total_samples = 0

  # Iterate over the DataLoader
  for data, _ in loader:
      sum += torch.sum(data, dim=0)
      sum_of_squares += torch.sum(data ** 2, dim=0)
      total_samples += data.size(0)

  # Calculate the mean and std dev
  mean = sum / total_samples
  std = (sum_of_squares / total_samples - mean ** 2) ** 0.5

  return mean, std

# Train our model with imitation learning

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

train_loader = load_data("train_files", batch_size=1024, shuffle=True)
val_loader = load_data("val_files", batch_size=2048, shuffle=False)

mse_loss_fn = torch.nn.MSELoss()
bce_loss_fn = torch.nn.BCEWithLogitsLoss()
ce_loss_fn = torch.nn.CrossEntropyLoss()

100%|██████████| 2005392/2005392 [23:12<00:00, 1440.46it/s]
100%|██████████| 208240/208240 [02:30<00:00, 1380.29it/s]


In [None]:
train_logger = tb.SummaryWriter("logs/train")
val_logger = tb.SummaryWriter("logs/val")

best_val_loss = 2
best_train_loss = 2

global_step = 0

model = State_Based_Model(25, [64, 32, 32, 10])

optimizer = torch.optim.Adam(model.parameters(), lr=3e-3)
lr_scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.5)

model.to(device)

for epoch in range(1, 30):
  print(f"Epoch: {epoch}")

  model.train()

  loss_arr = []
  acceleration_loss_arr = []
  steer_loss_arr = []
  brake_loss_arr = []

  steer_acc_arr = []
  brake_acc_arr = []

  for x_batch, y_batch in tqdm(train_loader):
    x_batch = x_batch.to(device)

    acceleration_true = y_batch[0].to(device).to(torch.float)
    # Our steering actions are encoded as -1, 0, 1. Convert them to 0, 1, 2
    steer_true = y_batch[1].to(device).to(torch.long).squeeze(dim=1) + 1
    brake_true = y_batch[2].to(device).to(torch.float)

    # Get predictions
    acceleration_pred, steer_pred, brake_pred = model(x_batch)

    # Compute loss
    acceleration_loss = mse_loss_fn(acceleration_pred, acceleration_true)
    steer_loss = ce_loss_fn(steer_pred, steer_true)
    brake_loss = mse_loss_fn(brake_pred, brake_true)
    loss = acceleration_loss + steer_loss + brake_loss

    # log to tensorboard
    train_logger.add_scalar('acceleration_loss', acceleration_loss, global_step=global_step)
    train_logger.add_scalar('steer_loss', steer_loss, global_step=global_step)
    train_logger.add_scalar('brake_loss', brake_loss, global_step=global_step)
    train_logger.add_scalar('loss', loss, global_step=global_step)

    # Backward step
    optimizer.zero_grad()
    loss.backward()
    torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=0.01)
    optimizer.step()
    global_step += 1


    acceleration_loss_arr.append(acceleration_loss.detach().cpu())
    steer_loss_arr.append(steer_loss.detach().cpu())
    brake_loss_arr.append(brake_loss.detach().cpu())
    loss_arr.append(loss.detach().cpu())

    steer_acc_arr.append(((torch.nn.functional.softmax(steer_pred, dim=1).argmax(dim=1)) == steer_true).cpu())
    brake_acc_arr.append(((brake_pred > 0.5).float() == brake_true).cpu())


  train_loss = np.mean(loss_arr)
  print(f"Train loss = {train_loss}")

  acceleration_loss = np.mean(acceleration_loss_arr)
  print(f"Acceleration loss = {acceleration_loss}")

  steer_loss = np.mean(steer_loss_arr)
  print(f"Steer loss = {steer_loss}")

  brake_loss = np.mean(brake_loss_arr)
  print(f"Brake loss = {brake_loss}")

  steer_acc = np.mean(steer_acc_arr)
  print(f"Steer acc = {steer_acc}")

  brake_acc = np.mean(brake_acc_arr)
  print(f"Brake acc = {brake_acc}")


  # Perform validation
  model.eval()

  loss_arr = []
  acceleration_loss_arr = []
  steer_loss_arr = []
  brake_loss_arr = []

  steer_acc_arr = []
  brake_acc_arr = []

  for x_batch, y_batch in tqdm(val_loader):
    x_batch = x_batch.to(device)

    acceleration_true = y_batch[0].to(device).to(torch.float)
    # Our steering actions are encoded as -1, 0, 1. Convert them to 0, 1, 2
    steer_true = y_batch[1].to(device).to(torch.long).squeeze(dim=1) + 1
    brake_true = y_batch[2].to(device).to(torch.float)

    acceleration_pred, steer_pred, brake_pred = model(x_batch)

    acceleration_loss = mse_loss_fn(acceleration_pred, acceleration_true)
    steer_loss = ce_loss_fn(steer_pred, steer_true)
    brake_loss = mse_loss_fn(brake_pred, brake_true)
    loss = acceleration_loss + steer_loss + brake_loss

    val_logger.add_scalar('acceleration_loss', acceleration_loss, global_step=global_step)
    val_logger.add_scalar('steer_loss', steer_loss, global_step=global_step)
    val_logger.add_scalar('brake_loss', brake_loss, global_step=global_step)
    val_logger.add_scalar('loss', loss, global_step=global_step)

    acceleration_loss_arr.append(acceleration_loss.detach().cpu())
    steer_loss_arr.append(steer_loss.detach().cpu())
    brake_loss_arr.append(brake_loss.detach().cpu())
    loss_arr.append(loss.detach().cpu())

    steer_acc_arr.append(((torch.nn.functional.softmax(steer_pred, dim=1).argmax(dim=1)) == steer_true).cpu())
    brake_acc_arr.append(((brake_pred > 0.5).float() == brake_true).cpu())

  val_loss = np.mean(loss_arr)
  print(f"Val loss = {val_loss}")

  acceleration_loss = np.mean(acceleration_loss_arr)
  print(f"Acceleration loss = {acceleration_loss}")

  steer_loss = np.mean(steer_loss_arr)
  print(f"Steer loss = {steer_loss}")

  brake_loss = np.mean(brake_loss_arr)
  print(f"Brake loss = {brake_loss}")

  steer_acc = np.mean(steer_acc_arr)
  print(f"Steer acc = {steer_acc}")

  brake_acc = np.mean(brake_acc_arr)
  print(f"Brake acc = {brake_acc}")


  # Save Model if the chosen metric is better
  if val_loss < best_val_loss:
    save_model(model)
    print("Model saved")
    best_val_loss = val_loss


  # LR scheduler step
  if epoch % 3 == 0:
    print("LR_Scheduler step")
    lr_scheduler.step()
    print(lr_scheduler.get_last_lr())


Epoch: 1


100%|██████████| 1958/1958 [00:40<00:00, 48.70it/s]


Train loss = 0.7116305232048035
Acceleration loss = 0.056832075119018555
Steer loss = 0.5694422721862793
Brake loss = 0.08535612374544144
Steer acc = 0.7576389332226762
Brake acc = 0.8789376715717568


100%|██████████| 101/101 [00:01<00:00, 86.58it/s]


Val loss = 0.5171760320663452
Acceleration loss = 0.018362050876021385
Steer loss = 0.4603717625141144
Brake loss = 0.03844226896762848
Steer acc = 0.8110835009282178
Brake acc = 0.9470964186262376
Model saved
Epoch: 2


100%|██████████| 1958/1958 [00:53<00:00, 36.59it/s]


Train loss = 0.546146810054779
Acceleration loss = 0.02601715549826622
Steer loss = 0.467780739068985
Brake loss = 0.052348971366882324
Steer acc = 0.8082531002617467
Brake acc = 0.9295039581205311


100%|██████████| 101/101 [00:01<00:00, 79.01it/s]


Val loss = 0.5088879466056824
Acceleration loss = 0.01778518781065941
Steer loss = 0.4548942744731903
Brake loss = 0.036208491772413254
Steer acc = 0.8104936958539604
Brake acc = 0.951239557549505
Model saved
Epoch: 3


100%|██████████| 1958/1958 [00:43<00:00, 44.83it/s]


Train loss = 0.514247715473175
Acceleration loss = 0.023728499189019203
Steer loss = 0.4412396252155304
Brake loss = 0.04927952215075493
Steer acc = 0.8200027730783963
Brake acc = 0.9339503599016854


100%|██████████| 101/101 [00:01<00:00, 77.38it/s]


Val loss = 0.4726211130619049
Acceleration loss = 0.01732630655169487
Steer loss = 0.4194371700286865
Brake loss = 0.03585762903094292
Steer acc = 0.8321472772277227
Brake acc = 0.9501614712252475
Model saved
LR_Scheduler step
[0.0015]
Epoch: 4


100%|██████████| 1958/1958 [00:42<00:00, 46.02it/s]


Train loss = 0.47032925486564636
Acceleration loss = 0.02146211639046669
Steer loss = 0.40303948521614075
Brake loss = 0.045827653259038925
Steer acc = 0.8397998595505618
Brake acc = 0.9388705790347294


100%|██████████| 101/101 [00:01<00:00, 76.17it/s]


Val loss = 0.43447086215019226
Acceleration loss = 0.01648823358118534
Steer loss = 0.38167160749435425
Brake loss = 0.03631108999252319
Steer acc = 0.8485747988861386
Brake acc = 0.9493734529702971
Model saved
Epoch: 5


100%|██████████| 1958/1958 [00:43<00:00, 44.94it/s]


Train loss = 0.4605969786643982
Acceleration loss = 0.021269474178552628
Steer loss = 0.39416274428367615
Brake loss = 0.04516473785042763
Steer acc = 0.8454148445480082
Brake acc = 0.9399468925561798


100%|██████████| 101/101 [00:01<00:00, 78.25it/s]


Val loss = 0.42545026540756226
Acceleration loss = 0.016419492661952972
Steer loss = 0.37145909667015076
Brake loss = 0.03757169097661972
Steer acc = 0.8545356977103961
Brake acc = 0.9484355662128713
Model saved
Epoch: 6


100%|██████████| 1958/1958 [00:44<00:00, 44.18it/s]


Train loss = 0.45418041944503784
Acceleration loss = 0.021220456808805466
Steer loss = 0.3877814710140228
Brake loss = 0.04517851397395134
Steer acc = 0.8491355576481103
Brake acc = 0.9400825539453525


100%|██████████| 101/101 [00:01<00:00, 77.04it/s]


Val loss = 0.4268920421600342
Acceleration loss = 0.01693766936659813
Steer loss = 0.3727426826953888
Brake loss = 0.03721175342798233
Steer acc = 0.8516350170173267
Brake acc = 0.9485709313118812
LR_Scheduler step
[0.00075]
Epoch: 7


100%|██████████| 1958/1958 [00:42<00:00, 45.65it/s]


Train loss = 0.43026506900787354
Acceleration loss = 0.02029387839138508
Steer loss = 0.3662623167037964
Brake loss = 0.04370887950062752
Steer acc = 0.86110667773238
Brake acc = 0.9420735843335035


100%|██████████| 101/101 [00:01<00:00, 78.48it/s]


Val loss = 0.3981814384460449
Acceleration loss = 0.016107501462101936
Steer loss = 0.34539365768432617
Brake loss = 0.03668032959103584
Steer acc = 0.8703540764232673
Brake acc = 0.9482518564356436
Model saved
Epoch: 8


100%|██████████| 1958/1958 [00:43<00:00, 45.09it/s]


Train loss = 0.42605188488960266
Acceleration loss = 0.020160671323537827
Steer loss = 0.36255648732185364
Brake loss = 0.04333476349711418
Steer acc = 0.8630977081205311
Brake acc = 0.9427947842185904


100%|██████████| 101/101 [00:01<00:00, 75.92it/s]


Val loss = 0.3942980170249939
Acceleration loss = 0.01522624772042036
Steer loss = 0.3441507816314697
Brake loss = 0.03492104262113571
Steer acc = 0.8700301670792079
Brake acc = 0.9517036664603961
Model saved
Epoch: 9


100%|██████████| 1958/1958 [00:43<00:00, 44.94it/s]


Train loss = 0.4228508770465851
Acceleration loss = 0.019992010667920113
Steer loss = 0.35978373885154724
Brake loss = 0.04307512938976288
Steer acc = 0.8641650440500511
Brake acc = 0.943096531058478


100%|██████████| 101/101 [00:01<00:00, 77.94it/s]


Val loss = 0.3876701593399048
Acceleration loss = 0.015867799520492554
Steer loss = 0.33634018898010254
Brake loss = 0.03546212613582611
Steer acc = 0.8735593285891089
Brake acc = 0.9509833307549505
Model saved
LR_Scheduler step
[0.000375]
Epoch: 10


100%|██████████| 1958/1958 [00:43<00:00, 45.20it/s]


Train loss = 0.4091288447380066
Acceleration loss = 0.019600050523877144
Steer loss = 0.34708741307258606
Brake loss = 0.042441368103027344
Steer acc = 0.8705401318309499
Brake acc = 0.9441583806818182


100%|██████████| 101/101 [00:01<00:00, 77.63it/s]


Val loss = 0.38121509552001953
Acceleration loss = 0.01603551395237446
Steer loss = 0.3289576470851898
Brake loss = 0.03622190281748772
Steer acc = 0.8758460318688119
Brake acc = 0.9496296797648515
Model saved
Epoch: 11


100%|██████████| 1958/1958 [00:55<00:00, 35.53it/s]


Train loss = 0.406605988740921
Acceleration loss = 0.019532212987542152
Steer loss = 0.34484606981277466
Brake loss = 0.04222770035266876
Steer acc = 0.8713620802477018
Brake acc = 0.944337932520429


100%|██████████| 101/101 [00:01<00:00, 77.97it/s]


Val loss = 0.3760831356048584
Acceleration loss = 0.015471115708351135
Steer loss = 0.32458364963531494
Brake loss = 0.036028407514095306
Steer acc = 0.8790996287128713
Brake acc = 0.9494943146658416
Model saved
Epoch: 12


100%|██████████| 1958/1958 [00:44<00:00, 44.04it/s]


Train loss = 0.4056597054004669
Acceleration loss = 0.019496222957968712
Steer loss = 0.3440786898136139
Brake loss = 0.04208480566740036
Steer acc = 0.8715615822906027
Brake acc = 0.9446356893194586


100%|██████████| 101/101 [00:01<00:00, 76.87it/s]


Val loss = 0.3843347132205963
Acceleration loss = 0.015676068142056465
Steer loss = 0.3327934443950653
Brake loss = 0.03586520627140999
Steer acc = 0.8735641630569307
Brake acc = 0.9503790222772277
LR_Scheduler step
[0.0001875]
Epoch: 13


100%|██████████| 1958/1958 [00:44<00:00, 43.87it/s]


Train loss = 0.39709174633026123
Acceleration loss = 0.01925511285662651
Steer loss = 0.33610761165618896
Brake loss = 0.04172902926802635
Steer acc = 0.8755216978421859
Brake acc = 0.9452341954481613


100%|██████████| 101/101 [00:01<00:00, 73.06it/s]


Val loss = 0.3723260760307312
Acceleration loss = 0.015123332850635052
Steer loss = 0.3217964470386505
Brake loss = 0.035406388342380524
Steer acc = 0.8781037283415841
Brake acc = 0.9503161741955446
Model saved
Epoch: 14


100%|██████████| 1958/1958 [00:44<00:00, 44.44it/s]


Train loss = 0.39536887407302856
Acceleration loss = 0.019181907176971436
Steer loss = 0.3345939815044403
Brake loss = 0.041592977941036224
Steer acc = 0.8762044935840143
Brake acc = 0.9453139962653218


100%|██████████| 101/101 [00:01<00:00, 76.13it/s]


Val loss = 0.3702290654182434
Acceleration loss = 0.015595570206642151
Steer loss = 0.31862586736679077
Brake loss = 0.036007631570100784
Steer acc = 0.8806128171410891
Brake acc = 0.9497650448638614
Model saved
Epoch: 15


100%|██████████| 1958/1958 [00:44<00:00, 43.74it/s]


Train loss = 0.39525479078292847
Acceleration loss = 0.01925850473344326
Steer loss = 0.3343307673931122
Brake loss = 0.04166553542017937
Steer acc = 0.8761850421348315
Brake acc = 0.9451932975293666


100%|██████████| 101/101 [00:01<00:00, 76.61it/s]


Val loss = 0.36958760023117065
Acceleration loss = 0.015586362220346928
Steer loss = 0.31813886761665344
Brake loss = 0.03586233779788017
Steer acc = 0.8806756652227723
Brake acc = 0.9499922648514851
Model saved
LR_Scheduler step
[9.375e-05]
Epoch: 16


100%|██████████| 1958/1958 [00:45<00:00, 42.60it/s]


Train loss = 0.3904668986797333
Acceleration loss = 0.019159633666276932
Steer loss = 0.3298189043998718
Brake loss = 0.04148836061358452
Steer acc = 0.8785710865679265
Brake acc = 0.9455134983082226


100%|██████████| 101/101 [00:01<00:00, 77.59it/s]


Val loss = 0.3670901656150818
Acceleration loss = 0.015277968719601631
Steer loss = 0.3166359066963196
Brake loss = 0.03517627716064453
Steer acc = 0.8804774520420792
Brake acc = 0.9508286277846535
Model saved
Epoch: 17


100%|██████████| 1958/1958 [00:43<00:00, 44.90it/s]


Train loss = 0.38999849557876587
Acceleration loss = 0.019101401790976524
Steer loss = 0.3295292556285858
Brake loss = 0.04136785492300987
Steer acc = 0.8785456500574566
Brake acc = 0.945733449310521


100%|██████████| 101/101 [00:01<00:00, 78.82it/s]


Val loss = 0.36784523725509644
Acceleration loss = 0.015361749567091465
Steer loss = 0.31746071577072144
Brake loss = 0.03502276539802551
Steer acc = 0.8796072478341584
Brake acc = 0.9513604192450495
Epoch: 18


100%|██████████| 1958/1958 [00:43<00:00, 44.53it/s]


Train loss = 0.389305979013443
Acceleration loss = 0.019071538001298904
Steer loss = 0.32889458537101746
Brake loss = 0.04133987799286842
Steer acc = 0.8788813122446374
Brake acc = 0.9459174899450971


100%|██████████| 101/101 [00:01<00:00, 79.00it/s]


Val loss = 0.36446747183799744
Acceleration loss = 0.01526151318103075
Steer loss = 0.31414681673049927
Brake loss = 0.03505909815430641
Steer acc = 0.8827689897896039
Brake acc = 0.9515441290222773
Model saved
LR_Scheduler step
[4.6875e-05]
Epoch: 19


100%|██████████| 1958/1958 [00:56<00:00, 34.44it/s]


Train loss = 0.3866676986217499
Acceleration loss = 0.019074546173214912
Steer loss = 0.3262971341609955
Brake loss = 0.04129597917199135
Steer acc = 0.8800523892364658
Brake acc = 0.9457733497191011


100%|██████████| 101/101 [00:01<00:00, 76.10it/s]


Val loss = 0.366819828748703
Acceleration loss = 0.015606509521603584
Steer loss = 0.31547367572784424
Brake loss = 0.03573963791131973
Steer acc = 0.8800955290841584
Brake acc = 0.9499825959158416
Epoch: 20


100%|██████████| 1958/1958 [00:46<00:00, 42.44it/s]


Train loss = 0.3861130177974701
Acceleration loss = 0.01904960535466671
Steer loss = 0.3258484899997711
Brake loss = 0.041214894503355026
Steer acc = 0.8801885493807456
Brake acc = 0.9460017795582226


100%|██████████| 101/101 [00:01<00:00, 74.87it/s]


Val loss = 0.3629797101020813
Acceleration loss = 0.015363489277660847
Steer loss = 0.31248369812965393
Brake loss = 0.0351325087249279
Steer acc = 0.8819713025990099
Brake acc = 0.951104192450495
Model saved
Epoch: 21


100%|██████████| 1958/1958 [00:45<00:00, 42.60it/s]


Train loss = 0.38611117005348206
Acceleration loss = 0.01903950423002243
Steer loss = 0.32585614919662476
Brake loss = 0.04121548682451248
Steer acc = 0.8804124904239019
Brake acc = 0.946038188681052


100%|██████████| 101/101 [00:01<00:00, 75.71it/s]


Val loss = 0.3622697591781616
Acceleration loss = 0.015469126403331757
Steer loss = 0.31125038862228394
Brake loss = 0.035550229251384735
Steer acc = 0.8833056157178217
Brake acc = 0.950461208230198
Model saved
LR_Scheduler step
[2.34375e-05]
Epoch: 22


100%|██████████| 1958/1958 [00:45<00:00, 42.78it/s]


Train loss = 0.38452160358428955
Acceleration loss = 0.01898668333888054
Steer loss = 0.3243725597858429
Brake loss = 0.04116235300898552
Steer acc = 0.8810858098186926
Brake acc = 0.9460671164772727


100%|██████████| 101/101 [00:01<00:00, 76.13it/s]


Val loss = 0.36164724826812744
Acceleration loss = 0.015453183092176914
Steer loss = 0.31109610199928284
Brake loss = 0.03509791940450668
Steer acc = 0.8833539603960396
Brake acc = 0.9512202196782178
Model saved
Epoch: 23


100%|██████████| 1958/1958 [00:45<00:00, 42.83it/s]


Train loss = 0.3845539689064026
Acceleration loss = 0.018969785422086716
Steer loss = 0.3244667649269104
Brake loss = 0.04111744463443756
Steer acc = 0.8808932903472931
Brake acc = 0.9460765928243106


100%|██████████| 101/101 [00:01<00:00, 75.42it/s]


Val loss = 0.3614243268966675
Acceleration loss = 0.015567595139145851
Steer loss = 0.31079769134521484
Brake loss = 0.03505907580256462
Steer acc = 0.8836730352722773
Brake acc = 0.9510993579826733
Model saved
Epoch: 24


100%|██████████| 1958/1958 [00:45<00:00, 42.73it/s]


Train loss = 0.3842177987098694
Acceleration loss = 0.01898401416838169
Steer loss = 0.32407107949256897
Brake loss = 0.04116269573569298
Steer acc = 0.8810897998595506
Brake acc = 0.9460621289262002


100%|██████████| 101/101 [00:01<00:00, 73.51it/s]


Val loss = 0.3622977137565613
Acceleration loss = 0.015696588903665543
Steer loss = 0.31098437309265137
Brake loss = 0.03561679273843765
Steer acc = 0.8826674659653465
Brake acc = 0.950306505259901
LR_Scheduler step
[1.171875e-05]
Epoch: 25


100%|██████████| 1958/1958 [00:46<00:00, 41.69it/s]


Train loss = 0.38367760181427
Acceleration loss = 0.01897849701344967
Steer loss = 0.32359057664871216
Brake loss = 0.04110855609178543
Steer acc = 0.8812773317798774
Brake acc = 0.946118987008427


100%|██████████| 101/101 [00:01<00:00, 74.55it/s]


Val loss = 0.36138907074928284
Acceleration loss = 0.015542794950306416
Steer loss = 0.31050512194633484
Brake loss = 0.035341180860996246
Steer acc = 0.8832911123143564
Brake acc = 0.9505240563118812
Model saved
Epoch: 26


100%|██████████| 1958/1958 [00:47<00:00, 41.61it/s]


Train loss = 0.38338208198547363
Acceleration loss = 0.018972555175423622
Steer loss = 0.32334157824516296
Brake loss = 0.04106791689991951
Steer acc = 0.8815102504149642
Brake acc = 0.9463040251532175


100%|██████████| 101/101 [00:01<00:00, 75.06it/s]


Val loss = 0.36089983582496643
Acceleration loss = 0.015535969287157059
Steer loss = 0.3100981116294861
Brake loss = 0.03526570275425911
Steer acc = 0.8841081373762376
Brake acc = 0.9506545869430693
Model saved
Epoch: 27


100%|██████████| 1958/1958 [00:46<00:00, 42.35it/s]


Train loss = 0.3831985890865326
Acceleration loss = 0.018923811614513397
Steer loss = 0.3231857717037201
Brake loss = 0.041089046746492386
Steer acc = 0.8814823201289581
Brake acc = 0.9462282143769152


100%|██████████| 101/101 [00:01<00:00, 74.83it/s]


Val loss = 0.3609634339809418
Acceleration loss = 0.015459821559488773
Steer loss = 0.3102588653564453
Brake loss = 0.035244762897491455
Steer acc = 0.8832717744430693
Brake acc = 0.9506545869430693
LR_Scheduler step
[5.859375e-06]
Epoch: 28


100%|██████████| 1958/1958 [00:57<00:00, 33.94it/s]


Train loss = 0.38301658630371094
Acceleration loss = 0.018939392641186714
Steer loss = 0.3230056166648865
Brake loss = 0.04107155650854111
Steer acc = 0.8815306993743616
Brake acc = 0.9462292118871297


100%|██████████| 101/101 [00:01<00:00, 75.71it/s]


Val loss = 0.36163246631622314
Acceleration loss = 0.015489479526877403
Steer loss = 0.3108453154563904
Brake loss = 0.035297635942697525
Steer acc = 0.8827738242574258
Brake acc = 0.9507754486386139
Epoch: 29


100%|██████████| 1958/1958 [00:46<00:00, 42.27it/s]


Train loss = 0.38312065601348877
Acceleration loss = 0.01894860900938511
Steer loss = 0.32310035824775696
Brake loss = 0.04107171297073364
Steer acc = 0.8815371831907559
Brake acc = 0.9461773413559755


100%|██████████| 101/101 [00:01<00:00, 74.61it/s]

Val loss = 0.3603377342224121
Acceleration loss = 0.015444085001945496
Steer loss = 0.30967944860458374
Brake loss = 0.03521424159407616
Steer acc = 0.8842289990717822
Brake acc = 0.9510268409653465
Model saved





In [None]:
# !rm -r training_files/.ipynb_checkpoints/

rm: cannot remove 'training_files/.ipynb_checkpoints/': No such file or directory


In [None]:
# !rm -r logs

In [None]:
%load_ext tensorboard

In [None]:
import torch.utils.tensorboard as tb

%tensorboard --logdir {'logs'}

# Use REINFORCE to imporve our model

In [None]:
import copy
import subprocess
import random
from tqdm import tqdm
import pickle
from torch.distributions import Bernoulli, Categorical

In [None]:
!cp scripts/model.pt state_agent/model.pt

In [None]:
model = load_model('/models/model.th')

In [None]:
average_return_logger = tb.SummaryWriter("logs/average_return")

In [None]:
folders_arr = ['agent_1', 'agent_2', 'agent_3', 'agent_4', 'agent_5']

batch_size = 512

optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

global_step = 0

for epoch in tqdm(range(20)):
  print(f"Epoch = {epoch}")

  # Clear the directory used from any files from previous runs
  subprocess.run(["rm", "-r", "reinforce_files/"])
  subprocess.run(['mkdir', 'reinforce_files'])

  # Have our agent play 40 random games
  for i in range(40):
    index = random.randint(0, len(folders_arr) - 1)
    player_1 = "state_agent"
    player_2 = folders_arr[index]

    # This variable defines the side of our player in the game (red or blue)
    player_1_pos = random.randint(0, 1)

    pos_x = random.randint(-30, 30)
    pos_y = random.randint(-30, 30)
    vel_x = random.randint(-20, 20)
    vel_y = random.randint(-20, 20)

    if player_1_pos == 0:
      data_f_save_name = "reinforce_files/" + player_1 + '_' + player_2 + "_"\
                    + str(pos_x) + "_" + str(pos_y) + "_" + str(vel_x) + "_" \
                    + str(vel_y) + '.pkl'
      subprocess.run(["python3", "-m", "tournament.runner", player_1, player_2, "-s", data_f_save_name,
                      "-f", "600", "-m", "2", "--ball_location", str(pos_x), str(pos_y), "--ball_velocity", str(vel_x), str(vel_y)])
    else:
      data_f_save_name = "reinforce_files/" + player_2 + '_' + player_1 + "_"\
              + str(pos_x) + "_" + str(pos_y) + "_" + str(vel_x) + "_" \
              + str(vel_y) + '.pkl'
      subprocess.run(["python3", "-m", "tournament.runner", player_2, player_1, "-s", data_f_save_name,
                  "-f", "600", "-m", "2", "--ball_location", str(pos_x), str(pos_y), "--ball_velocity", str(vel_x), str(vel_y)])


  data_files = os.listdir("final/reinforce_files/")
  data = []
  for file in data_files:
    with open("reinforce_files" + "/" + file, "rb") as f:
        while True:
            try:
                data.append(pickle.load(f))
            except EOFError:
                break

  player_state_data = []
  opponent_state_data = []
  soccer_state_data = []
  actions_data = []
  player_pos_arr = []

  for entry in data:
    # Add player/opponent data depending on the position of our agents in the game

    if 'drift' in entry['actions'][0].keys():
      player_pos = 0
    else:
      player_pos = 1

    player_pos_arr.append(player_pos)

    if player_pos == 0:
      player_state_data.append(entry['team1_state'])
      opponent_state_data.append(entry['team2_state'])
    else:
      player_state_data.append(entry['team2_state'])
      opponent_state_data.append(entry['team1_state'])

    soccer_state_data.append(entry['soccer_state'])
    actions_data.append(entry['actions'])

  features = []
  returns = []
  actions = []

  # Get initial features for our first and second players
  initial_features_1 = extract_features(player_state_data[0][0],
                                             soccer_state_data[0], opponent_state_data[0], player_pos_arr[0])
  initial_features_2 = extract_features(player_state_data[0][1],
                                             soccer_state_data[0], opponent_state_data[0], player_pos_arr[0])

  prev_puck_goal_distance_1 = initial_features_1[-2]
  prev_puck_player_distance_1 = initial_features_1[-7]
  prev_puck_goal_distance_2 = initial_features_2[-2]
  prev_puck_player_distance_2 = initial_features_2[-7]

  player_state_data = player_state_data[1:]
  opponent_state_data = opponent_state_data[1:]
  soccer_state_data = soccer_state_data[1:]
  actions_data = actions_data[1:]
  player_pos_arr = player_pos_arr[1:]

  for i in range(len(player_state_data)):
    player_pos = player_pos_arr[i]
    curr_features_1 = extract_features(player_state_data[i][0],
                                             soccer_state_data[i], opponent_state_data[i], player_pos)
    curr_features_2 = extract_features(player_state_data[i][1],
                                             soccer_state_data[i], opponent_state_data[i], player_pos)
    curr_puck_goal_distance_1 = curr_features_1[-2]
    curr_puck_player_distance_1 = curr_features_1[-7]

    curr_puck_goal_distance_2 = curr_features_1[-2]
    curr_puck_player_distance_2 = curr_features_1[-7]

    # Experiment with the rewards given to our agents
    reward_1 = (prev_puck_goal_distance_1 - curr_puck_goal_distance_1) # + (prev_puck_player_distance_1 - curr_puck_player_distance_1)
    reward_2 = (prev_puck_goal_distance_2 - curr_puck_goal_distance_2) # + (prev_puck_player_distance_2 - curr_puck_player_distance_2)

    if reward_1 != 0:
      returns.append(reward_1)
      features.append(curr_features_1)
      actions.append(((actions_data[i][player_pos]['acceleration'] > 0.5).float(),
                (actions_data[i][player_pos]['steer'] + 1.),
                (actions_data[i][player_pos]['brake'] > 0.5).float()))

    if reward_2 != 0:
      returns.append(reward_2)
      features.append(curr_features_2)
      actions.append(((actions_data[i][player_pos+2]['acceleration'] > 0.5).float(),
                (actions_data[i][player_pos+2]['steer'] + 1.),
                (actions_data[i][player_pos+2]['brake'] > 0.5).float()))


    prev_puck_goal_distance_1 = curr_puck_goal_distance_1
    prev_puck_player_distance_1 = curr_puck_player_distance_1
    prev_puck_goal_distance_2 = curr_puck_goal_distance_2
    prev_puck_player_distance_2 = curr_puck_player_distance_2



  number_of_iterations_to_train = int(len(features) / batch_size)
  print(f"We are going to train for {number_of_iterations_to_train} iterations")

  returns = torch.as_tensor(returns, dtype=torch.float32).cuda()
  actions = torch.as_tensor(actions).cuda()
  features = torch.stack(features).cuda()

  returns = (returns - returns.mean()) / returns.std()
  # returns = returns / returns.std()

  model.train().cuda()
  avg_expected_log_return = []
  for i in range(number_of_iterations_to_train):
    batch_ids = torch.randint(0, len(returns), (batch_size, )).cuda()

    batch_returns = returns[batch_ids]
    batch_actions = actions[batch_ids]
    batch_features = features[batch_ids]

    acceleration, steer, brake = model(batch_features)

    pi_acceleration = Bernoulli(logits=acceleration[:, 0])
    pi_steer = Categorical(logits=steer)
    pi_brake = Bernoulli(logits=brake[:, 0])

    expected_log_return = ((pi_acceleration.log_prob(batch_actions[:, 0]) * batch_returns).mean() +
                          (pi_steer.log_prob(batch_actions[:, 1]) * batch_returns).mean() +
                          (pi_brake.log_prob(batch_actions[:, 2]) * batch_returns).mean()) / 3

    optimizer.zero_grad()
    (-expected_log_return).backward()
    torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=0.01)
    optimizer.step()
    avg_expected_log_return.append(float(expected_log_return))

    average_return_logger.add_scalar('average_return', float(expected_log_return), global_step=global_step)
    global_step += 1

  print(f"Average expected log return {np.mean(avg_expected_log_return)}")

  model.eval()
  model.cpu()
  batch_features = batch_features.to('cpu')

  traced_script_module = torch.jit.trace(model, batch_features)
  traced_script_module.save("model.pt")

# Get torch jit script

In [None]:
model = load_model()
model.eval()
x_batch = x_batch.to('cpu')

traced_script_module = torch.jit.trace(model, x_batch)
traced_script_module.save("model.pt")

In [None]:
!cp scripts/model.pt state_agent/

# Test new agent

In [None]:
import moviepy.editor
moviepy.editor.ipython_display("my_video.mp4")