In [2]:
# Load the Drive helper and mount
from google.colab import drive
drive.mount('/content/drive', force_remount=True)
%cd /content/drive/MyDrive/Documents/Fulbright\ Application\ 2020-2021/Courses/'Spring Semester 2024'/'Deep Decision and Reinforcement Learning'/project
%ls demos

Mounted at /content/drive
/content/drive/MyDrive/Documents/Fulbright Application 2020-2021/Courses/Spring Semester 2024/Deep Decision and Reinforcement Learning/project
circle_clock.json         never_seen.json  recover_3.json  recover_6.json
circle_counterclock.json  recover_1.json   recover_4.json  snake_2.json
eight.json                recover_2.json   recover_5.json  snake.json


In [3]:
import os
import json
from enum import Enum

import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, random_split
from torch.optim.lr_scheduler import ReduceLROnPlateau
from tqdm import tqdm

from src.agent import BehaviorCloningModel, Constants
from src.dataset import Dataset

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

obs_list = torch.tensor([])
action_list = torch.tensor([])

for file in os.listdir("./demos"):
    if file.startswith("*") or file.startswith("."):
        continue
    with open(f"./demos/{file}", "r") as f:
        data = json.load(f)
        for episode in data:
            min_length = min(len(episode[0]), len(episode[1]))
            obs = episode[0][:min_length]
            action = episode[1][:min_length]

            if len(obs) == 0 or len(action) == 0:
                continue

            obs = torch.tensor(obs, dtype=torch.float32)
            action = torch.tensor(action, dtype=torch.float32)
            obs_list = torch.cat([obs_list, obs])
            action_list = torch.cat([action_list, action])

dataset = Dataset(Constants.INPUT_SIZE.value, obs_list, action_list, Constants.NUM_HISTORY.value)
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Instantiate model, loss function, and optimizer
# model = MultiHistoryNetwork(
#     Constants.INPUT_SIZE.value,
#     Constants.HIDDEN_SIZE.value,
#     Constants.OUTPUT_SIZE.value,
#     Constants.NUM_HISTORY.value).to(device)

model = BehaviorCloningModel(
    Constants.NUM_HISTORY.value,
    Constants.INPUT_SIZE.value,
    Constants.OUTPUT_SIZE.value
).to(device)

# create a loss function
loss_fn = nn.MSELoss().to(device)

# create an optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=Constants.lr.value)
scheduler = ReduceLROnPlateau(optimizer, "min", patience=2)
# train the model
iterator = tqdm(range(1, Constants.EPOCHS.value + 1), total=Constants.EPOCHS.value, desc="Training")

for epoch in iterator:
    model.train()
    iterator.set_description("Training")
    for obs, action in train_dataloader:
        optimizer.zero_grad()
        obs = obs.to(device)
        action = action.to(device)
        pred = model(obs)
        loss = loss_fn(pred, action)
        loss.backward()
        optimizer.step()

    # evaluate the model
    iterator.set_description("Evaluating")
    model.eval()
    with torch.no_grad():
        test_loss = 0
        for obs, action in test_dataloader:
            obs = obs.to(device)
            action = action.to(device)
            pred = model(obs)
            test_loss += loss_fn(pred, action).item()
        test_loss /= len(test_dataloader)
    # iterator.set_postfix(epoch=epoch, loss=test_loss)
    print('epoch: ', epoch, 'loss: ', test_loss)
    scheduler.step(test_loss)

# save the model
torch.save(model.state_dict(), f"model_dict_{device}.pt")

cuda


Training:   5%|▌         | 1/20 [00:08<02:42,  8.53s/it]  

epoch:  1 loss:  0.04150487778252104


Training:  10%|█         | 2/20 [00:16<02:25,  8.06s/it]  

epoch:  2 loss:  0.028288926599195664


Training:  15%|█▌        | 3/20 [00:23<02:09,  7.59s/it]  

epoch:  3 loss:  0.027191614969690078


Training:  20%|██        | 4/20 [00:31<02:02,  7.65s/it]  

epoch:  4 loss:  0.02491277508375891


Training:  25%|██▌       | 5/20 [00:38<01:54,  7.64s/it]  

epoch:  5 loss:  0.03158836452201337


Training:  30%|███       | 6/20 [00:46<01:46,  7.59s/it]  

epoch:  6 loss:  0.022665414172564048


Training:  35%|███▌      | 7/20 [00:53<01:39,  7.64s/it]  

epoch:  7 loss:  0.02157506607458757


Training:  40%|████      | 8/20 [01:00<01:28,  7.40s/it]  

epoch:  8 loss:  0.02002579943563974


Training:  45%|████▌     | 9/20 [01:09<01:25,  7.73s/it]  

epoch:  9 loss:  0.018999475675049664


Training:  50%|█████     | 10/20 [01:16<01:15,  7.56s/it]  

epoch:  10 loss:  0.0191699347811325


Training:  55%|█████▌    | 11/20 [01:24<01:08,  7.58s/it]  

epoch:  11 loss:  0.017167888500362206


Training:  60%|██████    | 12/20 [01:31<01:00,  7.62s/it]  

epoch:  12 loss:  0.017384326655506287


Training:  65%|██████▌   | 13/20 [01:38<00:52,  7.47s/it]  

epoch:  13 loss:  0.015635616905437705


Training:  70%|███████   | 14/20 [01:46<00:45,  7.51s/it]  

epoch:  14 loss:  0.017688248371849166


Training:  75%|███████▌  | 15/20 [01:53<00:36,  7.33s/it]  

epoch:  15 loss:  0.01536129208987198


Training:  80%|████████  | 16/20 [02:01<00:29,  7.44s/it]  

epoch:  16 loss:  0.016085053853000617


Training:  85%|████████▌ | 17/20 [02:08<00:21,  7.30s/it]  

epoch:  17 loss:  0.015848596083852914


Training:  90%|█████████ | 18/20 [02:15<00:14,  7.41s/it]  

epoch:  18 loss:  0.014420623716020198


Training:  95%|█████████▌| 19/20 [02:23<00:07,  7.43s/it]  

epoch:  19 loss:  0.014330649523331731


Evaluating: 100%|██████████| 20/20 [02:30<00:00,  7.53s/it]

epoch:  20 loss:  0.015365794038568314



