In [2]:
# Load the Drive helper and mount
from google.colab import drive
drive.mount('/content/drive', force_remount=True)
%cd /content/drive/MyDrive/Documents/Fulbright\ Application\ 2020-2021/Courses/'Spring Semester 2024'/'Deep Decision and Reinforcement Learning'/project

Mounted at /content/drive
/content/drive/MyDrive/Documents/Fulbright Application 2020-2021/Courses/Spring Semester 2024/Deep Decision and Reinforcement Learning/project


In [None]:
import os
import json
from enum import Enum

import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, random_split
from torch.optim.lr_scheduler import ReduceLROnPlateau
from tqdm import tqdm

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

class MultiHistoryNetwork(nn.Module):
  def __init__(self, num_features, hidden_size, output_size, num_history):
    super(MultiHistoryNetwork, self).__init__()
    self.lstm = nn.LSTM(num_features, hidden_size, num_layers=1, batch_first=True)
    self.output_features = hidden_size * num_history
    self.linear = nn.Sequential(
        nn.Linear(self.output_features, self.output_features),
        nn.ReLU(),
        nn.Linear(self.output_features, 512),
        nn.ReLU(),
        nn.Linear(512, 128),
        nn.ReLU(),
        nn.Linear(128, output_size),
        nn.Tanh()
    )

  def forward(self, x):
    # x is of shape (batch_size, num_history, num_features)
    x, _ = self.lstm(x)  # Pass through LSTM
    x = x.reshape(x.size(0), -1)  # Reshape to remove sequence dimension
    x = self.linear(x)
    return x

class Constants(Enum):
    INPUT_SIZE = 25  # Number of features in observation
    HIDDEN_SIZE = 256  # Number of units in hidden layer
    NUM_HISTORY = 5  # Number of history steps to use
    OUTPUT_SIZE = 2  # Number of actions
    DROPOUT = 0.25  # Dropout rate
    lr = 1e-3  # Learning rate
    EPOCHS = 20  # Number of epochs to train

    NUM_LAYERS = 4  # Number of LSTM layers

class Dataset(torch.utils.data.Dataset):
    def __init__(self, obs_list, action_list, num_history):
        self.obs_list = obs_list
        self.action_list = action_list
        self.num_history = num_history

    def __len__(self):
        return len(self.obs_list)

    def __getitem__(self, idx):
        obs = self.obs_list[idx]
        action = self.action_list[idx]

        # Retrieve history observations
        start_idx = max(0, idx - self.num_history)
        history_obs = self.obs_list[start_idx:idx]

        # Pad history observations if necessary
        if len(history_obs) < self.num_history:
            pad_width = self.num_history - len(history_obs)
            padding = torch.zeros(pad_width, Constants.INPUT_SIZE.value)
            history_obs = torch.cat([padding, history_obs])

        return history_obs, action

obs_list = torch.tensor([])
action_list = torch.tensor([])

for file in os.listdir("./demos"):
    if file.startswith("*") or file.startswith("."):
        continue
    with open(f"./demos/{file}", "r") as f:
        data = json.load(f)
        for episode in data:
            min_length = min(len(episode[0]), len(episode[1]))
            obs = episode[0][:min_length]
            action = episode[1][:min_length]

            if len(obs) == 0 or len(action) == 0:
                continue

            obs = torch.tensor(obs, dtype=torch.float32)
            action = torch.tensor(action, dtype=torch.float32)
            obs_list = torch.cat([obs_list, obs])
            action_list = torch.cat([action_list, action])

dataset = Dataset(obs_list, action_list, Constants.NUM_HISTORY.value)
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Instantiate model, loss function, and optimizer
# model = FNN(
model = MultiHistoryNetwork(
    Constants.INPUT_SIZE.value,
    # Constants.INPUT_SIZE.value * Constants.NUM_HISTORY.value,
    Constants.HIDDEN_SIZE.value,
    Constants.OUTPUT_SIZE.value,
    Constants.NUM_HISTORY.value).to(device)
    # Constants.DROPOUT.value)

# create a loss function
loss_fn = nn.MSELoss().to(device)

# create an optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=Constants.lr.value)
scheduler = ReduceLROnPlateau(optimizer, "min", patience=2)
# train the model
iterator = tqdm(range(Constants.EPOCHS.value), total=Constants.EPOCHS.value, desc="Training")

for epoch in iterator:
    model.train()
    iterator.set_description("Training")
    for obs, action in train_dataloader:
        optimizer.zero_grad()
        obs = obs.to(device)
        action = action.to(device)
        pred = model(obs)
        loss = loss_fn(pred, action)
        loss.backward()
        optimizer.step()

    # evaluate the model
    iterator.set_description("Evaluating")
    model.eval()
    with torch.no_grad():
        test_loss = 0
        for obs, action in test_dataloader:
            obs = obs.to(device)
            action = action.to(device)
            pred = model(obs)
            test_loss += loss_fn(pred, action).item()
        test_loss /= len(test_dataloader)
    # iterator.set_postfix(epoch=epoch, loss=test_loss)
    print('epoch: ', epoch, 'loss: ', test_loss)
    scheduler.step(test_loss)

# save the model
torch.save(model, "model.pt")

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



cuda
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 3553, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-3-18c047ad4123>", line 77, in <cell line: 77>
    for file in os.listdir("./demos"):
OSError: [Errno 107] Transport endpoint is not connected: './demos'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 2099, in showtraceback
    stb = value._render_traceback_()
AttributeError: 'OSError' object has no attribute '_render_traceback_'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/IPython/core/ultratb.py", line 1101, in get_records
    return _fixed_getinnerframes(etb, number_of_lines_of_context, tb_offset)
  File "/usr/local