In [2]:
# Load the Drive helper and mount
from google.colab import drive
drive.mount('/content/drive', force_remount=True)
%cd /content/drive/MyDrive/Documents/Fulbright\ Application\ 2020-2021/Courses/'Spring Semester 2024'/'Deep Decision and Reinforcement Learning'/project
%ls demos

Mounted at /content/drive
/content/drive/MyDrive/Documents/Fulbright Application 2020-2021/Courses/Spring Semester 2024/Deep Decision and Reinforcement Learning/project
circle_clock.json         never_seen.json  recover_3.json  recover_6.json
circle_counterclock.json  recover_1.json   recover_4.json  snake_2.json
eight.json                recover_2.json   recover_5.json  snake.json


In [3]:
import os
import json
from enum import Enum

import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, random_split
from torch.optim.lr_scheduler import ReduceLROnPlateau
from tqdm import tqdm

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

class BehaviorCloningModel(nn.Module):
    def __init__(self, num_history, num_features, output_size):
        super(BehaviorCloningModel, self).__init__()
        self.flattened_size = 64 * (num_features // 4)
        self.policy = nn.Sequential(
            nn.Conv1d(in_channels=num_history, out_channels=32, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=2, stride=2),
            nn.BatchNorm1d(32),

            nn.Conv1d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=2, stride=2),
            nn.BatchNorm1d(64)
        )
        self.classifier = nn.Sequential(
            # Calculate the size after convolution and pooling
            nn.Linear(self.flattened_size, self.flattened_size),
            nn.ReLU(),
            nn.Linear(self.flattened_size, self.flattened_size),
            nn.ReLU(),
            nn.Linear(self.flattened_size, 128),
            nn.Dropout(),
            nn.ReLU(),
            nn.Linear(128, output_size),
            nn.Tanh()
        )

    def forward(self, x):
        x = self.policy(x)
        x = x.view(-1, self.flattened_size)  # Flatten the tensor for the fully connected layer
        x = self.classifier(x)
        return x

class Constants(Enum):
    INPUT_SIZE = 25  # Number of features in observation
    HIDDEN_SIZE = 128  # Number of units in hidden layer
    NUM_HISTORY = 12  # Number of history steps to use
    OUTPUT_SIZE = 2  # Number of actions
    DROPOUT = 0.25  # Dropout rate
    lr = 1e-3  # Learning rate
    EPOCHS = 20  # Number of epochs to train

    NUM_LAYERS = 4  # Number of LSTM layers

class Dataset(torch.utils.data.Dataset):
    def __init__(self, obs_list, action_list, num_history):
        self.obs_list = obs_list
        self.action_list = action_list
        self.num_history = num_history

    def __len__(self):
        return len(self.obs_list)

    def __getitem__(self, idx):
        obs = self.obs_list[idx]
        action = self.action_list[idx]

        # Retrieve history observations
        start_idx = max(0, idx - self.num_history)
        history_obs = self.obs_list[start_idx:idx]

        # Pad history observations if necessary
        if len(history_obs) < self.num_history:
            pad_width = self.num_history - len(history_obs)
            padding = torch.zeros(pad_width, Constants.INPUT_SIZE.value)
            history_obs = torch.cat([padding, history_obs])

        return history_obs, action

obs_list = torch.tensor([])
action_list = torch.tensor([])

for file in os.listdir("./demos"):
    if file.startswith("*") or file.startswith("."):
        continue
    with open(f"./demos/{file}", "r") as f:
        data = json.load(f)
        for episode in data:
            min_length = min(len(episode[0]), len(episode[1]))
            obs = episode[0][:min_length]
            action = episode[1][:min_length]

            if len(obs) == 0 or len(action) == 0:
                continue

            obs = torch.tensor(obs, dtype=torch.float32)
            action = torch.tensor(action, dtype=torch.float32)
            obs_list = torch.cat([obs_list, obs])
            action_list = torch.cat([action_list, action])

dataset = Dataset(obs_list, action_list, Constants.NUM_HISTORY.value)
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Instantiate model, loss function, and optimizer
# model = MultiHistoryNetwork(
#     Constants.INPUT_SIZE.value,
#     Constants.HIDDEN_SIZE.value,
#     Constants.OUTPUT_SIZE.value,
#     Constants.NUM_HISTORY.value).to(device)

model = BehaviorCloningModel(
    Constants.NUM_HISTORY.value,
    Constants.INPUT_SIZE.value,
    Constants.OUTPUT_SIZE.value
).to(device)

# create a loss function
loss_fn = nn.MSELoss().to(device)

# create an optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=Constants.lr.value)
scheduler = ReduceLROnPlateau(optimizer, "min", patience=2)
# train the model
iterator = tqdm(range(1, Constants.EPOCHS.value + 1), total=Constants.EPOCHS.value, desc="Training")

for epoch in iterator:
    model.train()
    iterator.set_description("Training")
    for obs, action in train_dataloader:
        optimizer.zero_grad()
        obs = obs.to(device)
        action = action.to(device)
        pred = model(obs)
        loss = loss_fn(pred, action)
        loss.backward()
        optimizer.step()

    # evaluate the model
    iterator.set_description("Evaluating")
    model.eval()
    with torch.no_grad():
        test_loss = 0
        for obs, action in test_dataloader:
            obs = obs.to(device)
            action = action.to(device)
            pred = model(obs)
            test_loss += loss_fn(pred, action).item()
        test_loss /= len(test_dataloader)
    # iterator.set_postfix(epoch=epoch, loss=test_loss)
    print('epoch: ', epoch, 'loss: ', test_loss)
    scheduler.step(test_loss)

# save the model
torch.save(model.state_dict(), "model_dict.pt")

cpu


Training:   5%|▌         | 1/20 [00:26<08:16, 26.13s/it]  

epoch:  1 loss:  0.031370244686480475


Training:  10%|█         | 2/20 [00:53<08:03, 26.87s/it]  

epoch:  2 loss:  0.02829996175330645


Training:  15%|█▌        | 3/20 [01:22<07:50, 27.68s/it]  

epoch:  3 loss:  0.0236359830148733


Training:  20%|██        | 4/20 [01:50<07:26, 27.88s/it]  

epoch:  4 loss:  0.02417366156662788


Training:  25%|██▌       | 5/20 [02:18<07:00, 28.01s/it]  

epoch:  5 loss:  0.022788035235818294


Training:  30%|███       | 6/20 [02:46<06:33, 28.12s/it]  

epoch:  6 loss:  0.021704312111698806


Training:  35%|███▌      | 7/20 [03:16<06:12, 28.67s/it]  

epoch:  7 loss:  0.02051477834593169


Training:  40%|████      | 8/20 [03:45<05:43, 28.64s/it]  

epoch:  8 loss:  0.020970944850375643


Training:  45%|████▌     | 9/20 [04:13<05:15, 28.66s/it]  

epoch:  9 loss:  0.021274267733078223


Training:  50%|█████     | 10/20 [04:42<04:47, 28.72s/it]  

epoch:  10 loss:  0.0219746627804219


Training:  55%|█████▌    | 11/20 [05:12<04:20, 28.96s/it]  

epoch:  11 loss:  0.014420931835327512


Training:  60%|██████    | 12/20 [05:42<03:55, 29.41s/it]  

epoch:  12 loss:  0.013837602496232464


Training:  65%|██████▌   | 13/20 [06:11<03:24, 29.19s/it]  

epoch:  13 loss:  0.01318635954109194


Training:  70%|███████   | 14/20 [06:40<02:54, 29.02s/it]  

epoch:  14 loss:  0.012540544877934028


Training:  75%|███████▌  | 15/20 [07:08<02:24, 28.87s/it]  

epoch:  15 loss:  0.01227319124808699


Training:  80%|████████  | 16/20 [07:37<01:55, 28.79s/it]  

epoch:  16 loss:  0.012005609801290782


Training:  85%|████████▌ | 17/20 [08:07<01:27, 29.13s/it]  

epoch:  17 loss:  0.011912529677685694


Training:  90%|█████████ | 18/20 [08:35<00:57, 28.97s/it]  

epoch:  18 loss:  0.01157502455961832


Training:  95%|█████████▌| 19/20 [09:04<00:28, 28.84s/it]  

epoch:  19 loss:  0.011254866635530568


Evaluating: 100%|██████████| 20/20 [09:32<00:00, 28.64s/it]

epoch:  20 loss:  0.010946448775497783



