In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.distributions.categorical import Categorical
from torch.utils.data import Dataset, DataLoader
from torch.optim import Adam
from torch.optim.lr_scheduler import ExponentialLR
import numpy as np
import wandb
import os
import random
from time import time

from lib.data import EpisodeDataset
from lib.neural_networks import FullyConnected, FullyConnected3, ConvNet

In [2]:
WIDTH = 10
OBS_SIZE = 46
HIDDEN_SIZE = 512

In [3]:
TRAIN_PATH_1 = '/home/bdemoss/Desktop/RL IDM/Episode Data/Random Data/30 TPS/3v3 Train'
TRAIN_PATH_2 = '/home/bdemoss/Desktop/RL IDM/Episode Data/Nexto Data/30 TPS/3v3 Train'

train_dataset = EpisodeDataset([TRAIN_PATH_1, TRAIN_PATH_2], width=WIDTH, include_change=True, corrupt=True)

print(len(train_dataset))

Loading Episode Data
Finished Loading Episode Data
311700


In [4]:
Train_Data_Loader = DataLoader(train_dataset, batch_size=1, shuffle=True, num_workers=16)

In [3]:
TEST_PATH = '/home/bdemoss/Desktop/RL IDM/Episode Data/Human Example Data/30 TPS'
TEST_PATH = '/home/bdemoss/Desktop/RL IDM/Episode Data/Human Replays'

test_dataset = EpisodeDataset([TEST_PATH], width=WIDTH, include_change=True, corrupt=True)
print(len(test_dataset))

Loading Episode Data
Finished Loading Episode Data
1


In [4]:
Test_Data_Loader = DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=16)

In [10]:
#model = ConvNet(obs_size=OBS_SIZE, obs_width=WIDTH, conv_number=10, hidden_size=HIDDEN_SIZE)
model = FullyConnected3(obs_size=OBS_SIZE, obs_width=WIDTH, hidden_size=HIDDEN_SIZE)

gpumodel = model.cuda()

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(gpumodel.parameters(), lr=0.0001)
scheduler = ExponentialLR(optimizer, gamma = 0.63)

In [11]:
gpumodel.load_state_dict(torch.load('trained_networks/mild-wood-119.pt'))

<All keys matched successfully>

In [8]:
def test_model(gpumodel, Test_Data_Loader):
    gpumodel.eval()

    total_loss = 0
    obs_count = 0
    total_correct = torch.zeros(9).cuda()

    for _obs, _act, _add_data in Test_Data_Loader:
        # EVALUATION
        obs = _obs[0].cuda()
        act = torch.cat((_act[0], _add_data[0][:,0:1]), dim=1).long().cuda()
        add_data = _add_data[0].long()

        if obs.shape[0] == 0:
            continue

        with torch.no_grad():
            y_pred = model(obs)

        y_pred = model(obs)
        pred_act = torch.zeros(obs.shape[0], 9).cuda()

        loss = 0
        for i in range(9):
            loss += criterion(y_pred[i], act[:, i])

            pred_act[:, i] = torch.argmax(y_pred[i], dim=1)


        # LOGGING
        obs_count += len(obs)
        total_loss += loss.item() * len(obs)

        correct = act == pred_act
        total_correct += torch.sum(correct, dim=0)

    return total_loss / obs_count, total_correct / obs_count

In [14]:
wandb.init(project="Inverse Dynamics Model", entity="harrymead")

wandb.config = {
  "learning_rate": 0.0001,
  "epochs": 10,
  "batch_size": 10
}

0,1
accuracy/boost,▁▄▆▆▇▇▇▆▇▇▇▇▇▇▇▇▇▇█▇▇█▇▇▇▇▇███▇█▇▇█▇▇▇▇▇
accuracy/handbrake,▁▃▅▄▄▆▆▅▆▆▆▆▆▆▇▆▆▇▆▆█▇▇▆▇█▇▆█▇▆▇▇▇▇▆▆▆▇▆
accuracy/jump,▁▃▄▆▆▆▇▆▇▆▇▇▇▇▇▇▇▇▇▇█▇▇▇███▇▇█▆█▇▇███▇██
accuracy/on_ground,▁▃▅▆▇▇▇▇█▇▇▇▇▇██▇▇██▇██▇▇▇▇█▇█▇▇██▇██▇▇▇
accuracy/pitch,▁▄▅▆▆▆▇▇▇█▇▇██▇█▇███▇███████▇███▇███████
accuracy/roll,▁▃▄▆▆▆▇▇▇▇▇█████▇███▇███████████████████
accuracy/steer,▁▄▆▆▇▇▇▇▇███████████████████████████████
accuracy/throttle,▁▃▅▅▆▇▇▆▇▇▇▇▇▇▇▇▇█▇▇███▇▇▇▇▇█▇▇▇▇▇█▇▇▇▇▇
accuracy/yaw,▁▄▆▆▇▇▇▇▇███████████████████████████████
loss,█▅▄▃▂▂▂▂▂▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
accuracy/boost,0.98829
accuracy/handbrake,0.98094
accuracy/jump,0.97044
accuracy/on_ground,0.99767
accuracy/pitch,0.9497
accuracy/roll,0.95769
accuracy/steer,0.94683
accuracy/throttle,0.97344
accuracy/yaw,0.94675
loss,0.69872


In [15]:
gpumodel.train()

for epoch in range(10):
    batch = 0
    total_loss = 0
    obs_count = 0
    total_correct = torch.zeros(9).cuda()

    for _obs, _act, _add_data in Train_Data_Loader:
        # TRAINING 
        obs = _obs[0].cuda()
        act = torch.cat((_act[0], _add_data[0][:,0:1]), dim=1).long().cuda()
        add_data = _add_data[0]

        if obs.shape[0] == 0:
            continue

        optimizer.zero_grad()

        y_pred = model(obs)
        pred_act = torch.zeros(obs.shape[0], 9).cuda()

        loss = 0
        for i in range(9):
            loss += criterion(y_pred[i], act[:, i])

            pred_act[:, i] = torch.argmax(y_pred[i], dim=1)

        loss.backward()
        optimizer.step()


        # LOGGING
        obs_count += len(obs)
        total_loss += loss.item() * len(obs)

        correct = act == pred_act
        total_correct += torch.sum(correct, dim=0)


        batch += 1
        if batch % 1000 == 0:
            idv_accuracy = total_correct/obs_count

            if (batch - 1000) % 5000 == 0:
                start_test = time()
                test_loss, test_accuracy = test_model(gpumodel, Test_Data_Loader)
                gpumodel.train()

            print(f'Epoch {epoch + 1} - Loss: {total_loss / obs_count}, Correct: {idv_accuracy}')
            wandb.log({"loss": total_loss/obs_count, "accuracy/throttle": idv_accuracy[0], "accuracy/steer": idv_accuracy[1], "accuracy/pitch": idv_accuracy[2], 
                        "accuracy/yaw": idv_accuracy[3], "accuracy/roll": idv_accuracy[4], "accuracy/jump": idv_accuracy[5], "accuracy/boost": idv_accuracy[6],
                        "accuracy/handbrake": idv_accuracy[7], "accuracy/on_ground": idv_accuracy[8],
                        
                        "test_loss": test_loss, "test_accuracy/throttle": test_accuracy[0], "test_accuracy/steer": test_accuracy[1], "test_accuracy/pitch": test_accuracy[2], 
                        "test_accuracy/yaw": test_accuracy[3], "test_accuracy/roll": test_accuracy[4], "test_accuracy/jump": test_accuracy[5], "test_accuracy/boost": test_accuracy[6],
                        "test_accuracy/handbrake": test_accuracy[7], "test_accuracy/on_ground": test_accuracy[8],})

            total_loss = 0
            obs_count = 0
            total_correct = torch.zeros(9).cuda()
    
    scheduler.step()

Epoch 1 - Loss: 3.941805749245361, Correct: tensor([0.7412, 0.6831, 0.8237, 0.6781, 0.8413, 0.9300, 0.8700, 0.9685, 0.9076],
       device='cuda:0')
Epoch 1 - Loss: 2.9466688583493332, Correct: tensor([0.8535, 0.7422, 0.8551, 0.7422, 0.8718, 0.9359, 0.9436, 0.9726, 0.9741],
       device='cuda:0')
Epoch 1 - Loss: 2.668187505163903, Correct: tensor([0.8874, 0.7515, 0.8692, 0.7511, 0.8808, 0.9378, 0.9584, 0.9711, 0.9791],
       device='cuda:0')
Epoch 1 - Loss: 2.4805048432300043, Correct: tensor([0.9027, 0.7610, 0.8779, 0.7618, 0.8881, 0.9431, 0.9602, 0.9720, 0.9831],
       device='cuda:0')
Epoch 1 - Loss: 2.358450890618419, Correct: tensor([0.9147, 0.7668, 0.8851, 0.7667, 0.8906, 0.9415, 0.9636, 0.9718, 0.9844],
       device='cuda:0')
Epoch 1 - Loss: 2.257677316010359, Correct: tensor([0.9203, 0.7779, 0.8917, 0.7784, 0.8938, 0.9400, 0.9618, 0.9720, 0.9868],
       device='cuda:0')
Epoch 1 - Loss: 2.1880549822969724, Correct: tensor([0.9241, 0.7809, 0.8960, 0.7812, 0.8976, 0.9451, 0.9

In [16]:
torch.save(gpumodel.state_dict(), "trained_networks/mild-wood-119.pt")

In [12]:
conf_mtx = torch.zeros((8,3,3)).cuda()
CONF_MAT = True

gpumodel.eval()

total_loss = 0
obs_count = 0
total_correct = torch.zeros(9).cuda()

correct_on_ground = torch.zeros(9).cuda()
total_on_ground = 0

for _obs, _act, _add_data in Test_Data_Loader:
    # EVALUATION
    obs = _obs[0].cuda()
    act = torch.cat((_act[0], _add_data[0][:,0:1]), dim=1).long().cuda()
    add_data = _add_data[0].long()

    if obs.shape[0] == 0:
        continue

    with torch.no_grad():
        y_pred = model(obs)

    y_pred = model(obs)
    pred_act = torch.zeros(obs.shape[0], 9).cuda()

    loss = 0
    for i in range(9):
        loss += criterion(y_pred[i], act[:, i])

        pred_act[:, i] = torch.argmax(y_pred[i], dim=1)


    # LOGGING
    obs_count += len(obs)
    total_loss += loss.item() * len(obs)

    correct = act == pred_act
    total_correct += torch.sum(correct, dim=0)

    on_ground = pred_act[:,8]

    #print(correct)
    #print(on_ground.bool())

    total_on_ground += torch.sum(on_ground == 1).item()


    correct_on_ground += torch.sum(correct[on_ground == 1], dim=0)

    if CONF_MAT:
        for i in range(obs.shape[0]):
            for j in range(8):
                if on_ground[i] == 1:
                    conf_mtx[j, act[i, j], pred_act[i, j].long()] += 1

    

print(f'Loss: {total_loss / obs_count}, Correct: {total_correct / obs_count}, Correct On Ground: {correct_on_ground / total_on_ground}, On Ground: {total_on_ground / obs_count}')

np.set_printoptions(precision=4, suppress=True)
print((conf_mtx).cpu().numpy())

Loss: 26.654266357421875, Correct: tensor([0.9120, 0.8223, 0.8248, 0.4793, 0.8872, 0.9554, 0.9545, 0.9802, 0.2843],
       device='cuda:0'), Correct On Ground: tensor([0.9169, 0.8199, 0.9977, 0.4163, 1.0000, 0.9867, 0.9555, 0.9723, 0.0000],
       device='cuda:0'), On Ground: 0.715702479338843
[[[ 183.    6.    6.]
  [  30.  359.   33.]
  [   5.   64. 1046.]]

 [[ 297.   45.    3.]
  [  67.  608.  118.]
  [  10.   69.  515.]]

 [[   0.    0.    0.]
  [   1. 1728.    3.]
  [   0.    0.    0.]]

 [[   0.    0.    0.]
  [ 368.  721.  643.]
  [   0.    0.    0.]]

 [[   0.    0.    0.]
  [   0. 1732.    0.]
  [   0.    0.    0.]]

 [[1709.   23.    0.]
  [   0.    0.    0.]
  [   0.    0.    0.]]

 [[1380.   50.    0.]
  [  27.  275.    0.]
  [   0.    0.    0.]]

 [[1684.   48.    0.]
  [   0.    0.    0.]
  [   0.    0.    0.]]]
