# Train Agents


In [1]:
import os
import torch
from tqdm import tqdm
import numpy as np
from stable_baselines3 import DQN
import warnings
import pandas as pd
import gc  # Import garbage collector module

warnings.filterwarnings("ignore")

In [2]:
# ensure the module is re-imported after changes
import importlib

import datasets.dataset_utils
importlib.reload(datasets.dataset_utils)

from datasets.dataset_utils import set_all_seeds, create_environment, load_dataset, preprocess_and_split, create_dataloaders, inspect_dataset_sample

In [3]:
# ensure the module is re-imported after changes
import importlib

import agent_methods.behavioral_cloning_bc.bc_utils
importlib.reload(agent_methods.behavioral_cloning_bc.bc_utils)

from agent_methods.behavioral_cloning_bc.bc_utils import train_and_evaluate_BC

import agent_methods.implicit_q_learning_iql.iql_utils
importlib.reload(agent_methods.implicit_q_learning_iql.iql_utils)

from agent_methods.implicit_q_learning_iql.iql_utils import train_and_evaluate_IQL

import agent_methods.behavior_value_estimation_bve.bve_utils
importlib.reload(agent_methods.behavior_value_estimation_bve.bve_utils)

from agent_methods.behavior_value_estimation_bve.bve_utils import train_and_evaluate_BVE

In [4]:
SEED = 12345
ENV_ID = 'SeaquestNoFrameskip-v4'
EPOCHS = 10
SEEDS = 3

In [5]:
# set seed for reproducability
set_all_seeds(SEED)

# force PyTorch to use CPU
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Device: {device}")

# initialize enviornment
env = create_environment(env_id=ENV_ID, seed=SEED)

Device: cuda


# Training all agents on: Beginner Dataset

### 0% Perturbation

In [6]:
# Paths to your datasets
dataset_path = 'datasets/beginner_logs/seaquest_beginner_perturb0.pkl'

dataloaders = {}  # Store dataloaders for each dataset

dataset_name = os.path.splitext(os.path.basename(dataset_path))[0]

print(f"Loading {dataset_name} dataset...")

# Load dataset
data = load_dataset(dataset_path)

print(f"Data preprocessing for {dataset_name} dataset...")

# Preprocess and split the data
train_data, test_data, tune_data = preprocess_and_split(
    data=data, seed=SEED, test_size=0.2, tune_size=0.1
)

print(f"Creating dataloaders for {dataset_name} dataset...")

# Create dataloaders using the adjusted function
train_loader, test_loader, tune_loader = create_dataloaders(
    train_data, test_data, tune_data, batch_size=64, seed=SEED
)

# Store dataloaders
dataloaders[dataset_name] = {
    'train': train_loader,
    'test': test_loader,
    'tuning': tune_loader
}

# Clear variables to free up memory
del data, train_data, test_data, tune_data
gc.collect()

print(dataloaders.keys())

Loading seaquest_beginner_perturb0 dataset...
Data preprocessing for seaquest_beginner_perturb0 dataset...
Creating dataloaders for seaquest_beginner_perturb0 dataset...
dict_keys(['seaquest_beginner_perturb0'])


## BC

In [None]:
%%time

# train and evaluate the BC model on the Expert dataset with 0% perturbation
train_and_evaluate_BC(
    dataloaders=dataloaders,
    device=device,
    seeds=SEEDS,
    epochs=EPOCHS,
    dataset='seaquest_beginner_perturb0',
    env_id=ENV_ID,
    seed=SEED
)

# print execution time it took to train the model
print("----- Execution time: BC - Beginner | Perturbation 0% -----")

Training BC on seaquest_beginner_perturb0
-- Starting Seed 1/3 --


Epochs: 100%|██████████| 10/10 [57:38<00:00, 345.80s/it]


Finished Training on seaquest_beginner_perturb0
    ➤ Avg Train Loss: -1.68118
    ➤ Avg Test Loss: -1.71870
    ➤ Avg Reward: 262.00
Model saved to agent_methods/behavioral_cloning_bc/bc_logs/seaquest_beginner/perturb0/bc_model_perturb0.pth
-- Starting Seed 2/3 --


Epochs: 100%|██████████| 10/10 [57:16<00:00, 343.67s/it]


Finished Training on seaquest_beginner_perturb0
    ➤ Avg Train Loss: -1.67643
    ➤ Avg Test Loss: -1.89362
    ➤ Avg Reward: 248.00
-- Starting Seed 3/3 --


Epochs: 100%|██████████| 10/10 [57:05<00:00, 342.55s/it]


Finished Training on seaquest_beginner_perturb0
    ➤ Avg Train Loss: -1.69835
    ➤ Avg Test Loss: -1.67948
    ➤ Avg Reward: 260.00
Return Stats saved to agent_methods/behavioral_cloning_bc/bc_logs/seaquest_beginner/perturb0/stats_perturb0.pkl
----- Execution time: BC - Beginner | Perturbation 0% -----
CPU times: total: 3h 31min 37s
Wall time: 2h 52min 8s


## IQL

In [None]:
%%time

# train and evaluate the IQL model on the Beginner dataset with 0% perturbation
train_and_evaluate_IQL(
    dataloaders=dataloaders,
    device=device,
    seeds=SEEDS,
    epochs=EPOCHS,
    dataset='seaquest_beginner_perturb0',
    env_id=ENV_ID,
    seed=SEED
)

# print execution time it took to train the model
print("----- Execution time: IQL - Beginner | Perturbation 0% -----")

Training IQL on seaquest_beginner_perturb0
-- Starting Seed 1/3 --


Epochs:   0%|          | 0/10 [00:00<?, ?it/s]

## BVE

In [7]:
%%time

# train and evaluate the BVE model on the Beginner dataset with 0% perturbation
train_and_evaluate_BVE(
    dataloaders=dataloaders,
    device=device,
    seeds=SEEDS,
    epochs=EPOCHS,
    dataset='seaquest_beginner_perturb0',
    env_id=ENV_ID,
    seed=SEED
)

# print execution time it took to train the model
print("----- Execution time: BVE - Beginner | Perturbation 0% -----")

Training BVE on seaquest_beginner_perturb0
-- Starting Seed 1/3 --


Epochs: 100%|██████████| 10/10 [1:06:34<00:00, 399.44s/it]


Finished Training on seaquest_beginner_perturb0
    ➤ Avg Train Loss: -1.56911
    ➤ Avg Test Loss: -1.46587
    ➤ Avg Reward: 20.00
Saved model to agent_methods/behavior_value_estimation/bve_logs/seaquest_beginner/perturb0/bve_model_perturb0.pth
-- Starting Seed 2/3 --


Epochs: 100%|██████████| 10/10 [1:07:12<00:00, 403.22s/it]


Finished Training on seaquest_beginner_perturb0
    ➤ Avg Train Loss: -1.61088
    ➤ Avg Test Loss: -1.50807
    ➤ Avg Reward: 0.00
-- Starting Seed 3/3 --


Epochs: 100%|██████████| 10/10 [1:06:48<00:00, 400.89s/it]


Finished Training on seaquest_beginner_perturb0
    ➤ Avg Train Loss: -1.56141
    ➤ Avg Test Loss: -1.48893
    ➤ Avg Reward: 44.00
Saved stats to agent_methods/behavior_value_estimation/bve_logs/seaquest_beginner/perturb0/stats_perturb0.pkl
----- Execution time: BVE - Beginner | Perturbation 0% -----
CPU times: total: 3h 56min 33s
Wall time: 3h 20min 45s


-----------------------------

### 5% Perturbation

In [7]:
# Paths to your datasets
dataset_paths = [
    'datasets/expert/expert_logs/expert_dataset_perturbation_5.pkl',
]

dataloaders = {}  # Store dataloaders for each dataset

for path in dataset_paths:
    dataset_name = path.split('/')[-1].split('.')[0]  # Extract dataset name

    print(f"Loading {dataset_name} dataset...")

    # Load dataset
    data = load_dataset(path)

    print(f"Data preprocessing for {dataset_name} dataset...")

    # Preprocess and split the data
    train_data, test_data, tune_data = preprocess_and_split(
        data=data, seed=SEED, test_size=0.2, tune_size=0.1
    )

    print(f"Creating dataloaders for {dataset_name} dataset...")

    # Create dataloaders using the adjusted function
    train_loader, test_loader, tune_loader = create_dataloaders(
        train_data, test_data, tune_data, batch_size=64, seed=SEED
    )

    # Store dataloaders
    dataloaders[dataset_name] = {
        'train': train_loader,
        'test': test_loader,
        'tuning': tune_loader
    }

    # Clear variables to free up memory
    del data, train_data, test_data, tune_data
    gc.collect()

print(dataloaders.keys())

Loading expert_dataset_perturbation_5 dataset...
Data preprocessing for expert_dataset_perturbation_5 dataset...
Creating dataloaders for expert_dataset_perturbation_5 dataset...
dict_keys(['expert_dataset_perturbation_5'])


In [8]:
# inspect dataset samples
dataset_name_to_inspect = 'expert_dataset_perturbation_5'

# inspect the training set of the dataset
print(f"Inspecting {dataset_name_to_inspect} Training Set:")
inspect_dataset_sample(dataloaders[dataset_name_to_inspect]['train'])
print("\n")

# inspect the testing set of the dataset
print(f"Inspecting {dataset_name_to_inspect} Testing Set")
inspect_dataset_sample(dataloaders[dataset_name_to_inspect]['test'])
print("\n")

# inspect the tuning set of the dataset
print(f"Inspecting {dataset_name_to_inspect} Tuning Set")
inspect_dataset_sample(dataloaders[dataset_name_to_inspect]['tuning'])

Inspecting expert_dataset_perturbation_5 Training Set:
--- Sample 1 ---
States Batch Shape: torch.Size([64, 3, 210, 160])
Actions Batch Shape: torch.Size([64])
Rewards Batch Shape: torch.Size([64])
Next States Batch Shape: torch.Size([64, 3, 210, 160])
Dones Batch Shape: torch.Size([64])
States Batch Data Type: torch.float32
Actions Batch Data Type: torch.int64
Rewards Batch Data Type: torch.float32
Next States Batch Data Type: torch.float32
Dones Batch Data Type: torch.float32

First Sample Details:
First State Shape: torch.Size([3, 210, 160])
First State Min/Max: 0.0000/0.8392
First Action: 8
First Reward: 0.0000
First Next State Shape: torch.Size([3, 210, 160])
First Done: 0.0


Inspecting expert_dataset_perturbation_5 Testing Set
--- Sample 1 ---
States Batch Shape: torch.Size([64, 3, 210, 160])
Actions Batch Shape: torch.Size([64])
Rewards Batch Shape: torch.Size([64])
Next States Batch Shape: torch.Size([64, 3, 210, 160])
Dones Batch Shape: torch.Size([64])
States Batch Data Type

## BC

In [9]:
%%time

# train and evaluate the BC model on the Expert dataset with 5% perturbation
train_and_evaluate_BC(
    dataloaders=dataloaders,
    device=device,
    trials=1,
    epochs=EPOCHS,
    dataset='expert_dataset_perturbation_5',
    env_id=ENV_ID,
    seed=SEED
)

# print execution time it took to train the model
print("----- Execution time: BC - Expert | Perturbation 5% -----")

Training on expert_dataset_perturbation_5
-- Starting Trial 1/1 --


Epochs: 100%|██████████| 10/10 [1:07:02<00:00, 402.22s/it]


Initial learning rate: 0.001000
Adjusted learning rate in epoch 10: 0.000900
Finished Training on expert_dataset_perturbation_5 - Training Loss: 0.25136
                                                   - Tuning Loss: 1.60741
                                                   - Test Loss: 1.55508
                                                   - Reward: 260.00
Model saved to behavioral_cloning_bc/bc_logs/expert_dataset/perturbation_5/bc_model_5.pth
Return Stats saved to behavioral_cloning_bc/bc_logs/expert_dataset/perturbation_5/stats_5.pkl
----- Execution time: BC - Expert | Perturbation 5% -----
CPU times: total: 1h 20min 30s
Wall time: 1h 7min 4s


#### continue training the agent

In [9]:
# continue training for 10 more epochs
continue_training_BC(
    dataloaders=dataloaders,
    further_epochs=10,
    dataset="expert_dataset_perturbation_5",
    env_id=ENV_ID,
    seed=SEED,
    device=device
)

Continuing BC Training on expert_dataset_perturbation_5
Loading existing model from behavioral_cloning_bc/bc_logs/expert_dataset/perturbation_5/bc_model_5.pth
Loading existing stats from behavioral_cloning_bc/bc_logs/expert_dataset/perturbation_5/stats_5.pkl


Continued Training Epochs: 100%|██████████| 10/10 [1:11:00<00:00, 426.07s/it]

Finished additional Training on expert_dataset_perturbation_5 - Training Loss: 0.11232
                                                              - Tuning Loss: 2.42555
                                                              - Test Loss: 2.36914
                                                              - Reward: 600.00
Updated DQN model saved to behavioral_cloning_bc/bc_logs/expert_dataset/perturbation_5/bc_model_5_continued.pth
Updated stats saved to behavioral_cloning_bc/bc_logs/expert_dataset/perturbation_5/stats_5_continued.pkl





## IQL

In [9]:
%%time

# train and evaluate the IQL model on the Expert dataset with 5% perturbation
train_and_evaluate_IQL(
    dataloaders=dataloaders,
    device=device,
    trials=1,
    epochs=EPOCHS,
    dataset='expert_dataset_perturbation_5',
    env_id=ENV_ID,
    seed=SEED
)

# print execution time it took to train the model
print("----- Execution time: IQL - Expert | Perturbation 5% -----")

Training IQL on expert_dataset_perturbation_5
-- Starting Trial 1/1 --


Epochs: 100%|██████████| 10/10 [1:13:05<00:00, 438.53s/it]


Finished Training on expert_dataset_perturbation_5 - Actor Loss: 5.36442
                                                   - Critic 1 Loss: 0.22395
                                                   - Critic 2 Loss: 0.17995
                                                   - Value Loss: 0.34945
                                                   - Tuning Loss: 1.06684
                                                   - Test Loss: 1.05855
                                                   - Reward: 344.00000
Model saved to implicit_q_learning_iql/iql_logs/expert_dataset/perturbation_5/iql_model_5.pth
Return Stats saved to implicit_q_learning_iql/iql_logs/expert_dataset/perturbation_5/stats_5.pkl
----- Execution time: IQL - Expert | Perturbation 5% -----
CPU times: total: 1h 27min 16s
Wall time: 1h 13min 6s


#### continue training the agent

In [13]:
# continue training for 10 more epochs
continue_training_IQL(
    dataloaders=dataloaders,
    further_epochs=10,
    dataset="expert_dataset_perturbation_5",
    env_id=ENV_ID,
    seed=SEED,
    device=device
)

Continuing IQL Training on expert_dataset_perturbation_5
Loading existing model from implicit_q_learning_iql/iql_logs/expert_dataset/perturbation_5/iql_model_5.pth
Loading existing stats from implicit_q_learning_iql/iql_logs/expert_dataset/perturbation_5/stats_5.pkl


Continued Training Epochs: 100%|██████████| 10/10 [1:18:49<00:00, 472.94s/it]


Finished additional Training on expert_dataset_perturbation_5 - Actor Loss: 2.89492
                                                              - Critic 1 Loss: 0.18770
                                                              - Critic 2 Loss: 0.18581
                                                              - Value Loss: 0.01366
                                                              - Tuning Loss: 1.25915
                                                              - Test Loss: 1.22187
                                                              - Reward: 319.00
Updated DQN model saved to implicit_q_learning_iql/iql_logs/expert_dataset/perturbation_5/iql_model_5_continued.pth
Updated stats saved to implicit_q_learning_iql/iql_logs/expert_dataset/perturbation_5/stats_5_continued.pkl


## DQN

In [9]:
%%time

# train DQN agent on Expert dataset with 5% perturbation
train_and_evaluate_DQN(
    dataloaders=dataloaders,
    device=device,
    trials=1,
    epochs=EPOCHS, 
    dataset='expert_dataset_perturbation_5',
    env_id=ENV_ID,
    seed=SEED 
)

# print execution time it took to train the model
print("----- Execution time: DQN - Expert | Perturbation 5% -----") 

Training on expert_dataset_perturbation_5
Filling replay buffer from dataset...
Replay buffer filled with 108439 samples.
-- Starting Trial 1/1 --


Epochs: 100%|██████████| 10/10 [54:23<00:00, 326.33s/it]


Initial learning rate: 0.000500
Adjusted learning rate in epoch 8: 0.000450
Finished Training on expert_dataset_perturbation_5 - Training Loss: 0.13587
                                                   - Tuning Loss: 0.15067
                                                   - Test Loss: 0.14560
                                                   - Reward: 0.00
Model saved to deep_q_network_dqn/dqn_logs/expert_dataset/perturbation_5/dqn_model_5.pth
Return Stats saved to deep_q_network_dqn/dqn_logs/expert_dataset/perturbation_5/stats_5.pkl
----- Execution time: DQN - Expert | Perturbation 5% -----
CPU times: total: 1h 5min 31s
Wall time: 56min 36s


### continue training the agent

In [9]:
# Continue training for 10 more epochs
continue_training_DQN(
    dataloaders=dataloaders,
    further_epochs=10,
    dataset="expert_dataset_perturbation_5",
    env_id=ENV_ID,
    seed=SEED,
    device=device
)

Continuing DQN Training on expert_dataset_perturbation_5
Loading existing model from deep_q_network_dqn/dqn_logs/expert_dataset/perturbation_5/dqn_model_5.pth
Loading existing stats from deep_q_network_dqn/dqn_logs/expert_dataset/perturbation_5/stats_5.pkl
Filling replay buffer...
Replay buffer filled with 108439 samples.


Continued DQN Epochs: 100%|██████████| 10/10 [55:54<00:00, 335.49s/it]


Finished additional Training on expert_dataset_perturbation_5 - Training Loss: 0.14865
                                                              - Tuning Loss: 0.15051
                                                              - Test Loss: 0.14544
                                                              - Reward: 0.00
Updated DQN model saved to deep_q_network_dqn/dqn_logs/expert_dataset/perturbation_5/dqn_model_5_continued.pth
Updated stats saved to deep_q_network_dqn/dqn_logs/expert_dataset/perturbation_5/stats_5_continued.pkl


-----------------

### 10% Perturbation

In [7]:
# Paths to your datasets
dataset_paths = [
    'datasets/expert/expert_logs/expert_dataset_perturbation_10.pkl',
]

dataloaders = {}  # Store dataloaders for each dataset

for path in dataset_paths:
    dataset_name = path.split('/')[-1].split('.')[0]  # Extract dataset name

    print(f"Loading {dataset_name} dataset...")

    # Load dataset
    data = load_dataset(path)

    print(f"Data preprocessing for {dataset_name} dataset...")

    # Preprocess and split the data
    train_data, test_data, tune_data = preprocess_and_split(
        data=data, seed=SEED, test_size=0.2, tune_size=0.1
    )

    print(f"Creating dataloaders for {dataset_name} dataset...")

    # Create dataloaders using the adjusted function
    train_loader, test_loader, tune_loader = create_dataloaders(
        train_data, test_data, tune_data, batch_size=64, seed=SEED
    )

    # Store dataloaders
    dataloaders[dataset_name] = {
        'train': train_loader,
        'test': test_loader,
        'tuning': tune_loader
    }

    # Clear variables to free up memory
    del data, train_data, test_data, tune_data
    gc.collect()

print(dataloaders.keys())

Loading expert_dataset_perturbation_10 dataset...
Data preprocessing for expert_dataset_perturbation_10 dataset...
Creating dataloaders for expert_dataset_perturbation_10 dataset...
dict_keys(['expert_dataset_perturbation_10'])


In [8]:
# inspect dataset samples
dataset_name_to_inspect = 'expert_dataset_perturbation_10'

# inspect the training set of the dataset
print(f"Inspecting {dataset_name_to_inspect} Training Set:")
inspect_dataset_sample(dataloaders[dataset_name_to_inspect]['train'])
print("\n")

# inspect the testing set of the dataset
print(f"Inspecting {dataset_name_to_inspect} Testing Set")
inspect_dataset_sample(dataloaders[dataset_name_to_inspect]['test'])
print("\n")

# inspect the tuning set of the dataset
print(f"Inspecting {dataset_name_to_inspect} Tuning Set")
inspect_dataset_sample(dataloaders[dataset_name_to_inspect]['tuning'])

Inspecting expert_dataset_perturbation_10 Training Set:
--- Sample 1 ---
States Batch Shape: torch.Size([64, 3, 210, 160])
Actions Batch Shape: torch.Size([64])
Rewards Batch Shape: torch.Size([64])
Next States Batch Shape: torch.Size([64, 3, 210, 160])
Dones Batch Shape: torch.Size([64])
States Batch Data Type: torch.float32
Actions Batch Data Type: torch.int64
Rewards Batch Data Type: torch.float32
Next States Batch Data Type: torch.float32
Dones Batch Data Type: torch.float32

First Sample Details:
First State Shape: torch.Size([3, 210, 160])
First State Min/Max: 0.0000/0.8392
First Action: 9
First Reward: 0.0000
First Next State Shape: torch.Size([3, 210, 160])
First Done: 0.0


Inspecting expert_dataset_perturbation_10 Testing Set
--- Sample 1 ---
States Batch Shape: torch.Size([64, 3, 210, 160])
Actions Batch Shape: torch.Size([64])
Rewards Batch Shape: torch.Size([64])
Next States Batch Shape: torch.Size([64, 3, 210, 160])
Dones Batch Shape: torch.Size([64])
States Batch Data Ty

## BC

In [9]:
%%time

# train and evaluate the BC model on the Expert dataset with 10% perturbation
train_and_evaluate_BC(
    dataloaders=dataloaders,
    device=device,
    trials=1,
    epochs=EPOCHS,
    dataset='expert_dataset_perturbation_10',
    env_id=ENV_ID,
    seed=SEED
)

# print execution time it took to train the model
print("----- Execution time: BC - Expert | Perturbation 10% -----")

Training on expert_dataset_perturbation_10
-- Starting Trial 1/1 --


Epochs: 100%|██████████| 10/10 [1:06:48<00:00, 400.87s/it]


Initial learning rate: 0.001000
Adjusted learning rate in epoch 10: 0.000900
Finished Training on expert_dataset_perturbation_10 - Training Loss: 0.34025
                                                   - Tuning Loss: 2.17573
                                                   - Test Loss: 2.12246
                                                   - Reward: 400.00
Model saved to behavioral_cloning_bc/bc_logs/expert_dataset/perturbation_10/bc_model_10.pth
Return Stats saved to behavioral_cloning_bc/bc_logs/expert_dataset/perturbation_10/stats_10.pkl
----- Execution time: BC - Expert | Perturbation 10% -----
CPU times: total: 1h 19min 56s
Wall time: 1h 6min 50s


### continue training the agent

In [9]:
# continue training for 10 more epochs
continue_training_BC(
    dataloaders=dataloaders,
    further_epochs=10,
    dataset="expert_dataset_perturbation_10",
    env_id=ENV_ID,
    seed=SEED,
    device=device
)

Continuing BC Training on expert_dataset_perturbation_10
Loading existing model from behavioral_cloning_bc/bc_logs/expert_dataset/perturbation_10/bc_model_10.pth
Loading existing stats from behavioral_cloning_bc/bc_logs/expert_dataset/perturbation_10/stats_10.pkl


Continued Training Epochs: 100%|██████████| 10/10 [1:11:24<00:00, 428.41s/it]

Finished additional Training on expert_dataset_perturbation_10 - Training Loss: 0.17036
                                                              - Tuning Loss: 3.00531
                                                              - Test Loss: 2.93343
                                                              - Reward: 240.00
Updated DQN model saved to behavioral_cloning_bc/bc_logs/expert_dataset/perturbation_10/bc_model_10_continued.pth
Updated stats saved to behavioral_cloning_bc/bc_logs/expert_dataset/perturbation_10/stats_10_continued.pkl





## IQL

In [9]:
%%time

# train and evaluate the IQL model on the Expert dataset with 10% perturbation
train_and_evaluate_IQL(
    dataloaders=dataloaders,
    device=device,
    trials=1,
    epochs=EPOCHS,
    dataset='expert_dataset_perturbation_10',
    env_id=ENV_ID,
    seed=SEED
)

# print execution time it took to train the model
print("----- Execution time: IQL - Expert | Perturbation 10% -----")

Training IQL on expert_dataset_perturbation_10
-- Starting Trial 1/1 --


Epochs: 100%|██████████| 10/10 [1:15:46<00:00, 454.62s/it]

Finished Training on expert_dataset_perturbation_10 - Actor Loss: 7.07386
                                                   - Critic 1 Loss: 0.50477
                                                   - Critic 2 Loss: 0.63776
                                                   - Value Loss: 1.58096
                                                   - Tuning Loss: 1.31335
                                                   - Test Loss: 1.30949
                                                   - Reward: 358.00000
Model saved to implicit_q_learning_iql/iql_logs/expert_dataset/perturbation_10/iql_model_10.pth
Return Stats saved to implicit_q_learning_iql/iql_logs/expert_dataset/perturbation_10/stats_10.pkl
----- Execution time: IQL - Expert | Perturbation 10% -----
CPU times: total: 1h 28min 34s
Wall time: 1h 15min 47s





### continue training the agent

In [9]:
# continue training for 10 more epochs
continue_training_IQL(
    dataloaders=dataloaders,
    further_epochs=10,
    dataset="expert_dataset_perturbation_10",
    env_id=ENV_ID,
    seed=SEED,
    device=device
)

Continuing IQL Training on expert_dataset_perturbation_10
Loading existing model from implicit_q_learning_iql/iql_logs/expert_dataset/perturbation_10/iql_model_10.pth
Loading existing stats from implicit_q_learning_iql/iql_logs/expert_dataset/perturbation_10/stats_10.pkl


Continued Training Epochs: 100%|██████████| 10/10 [1:17:26<00:00, 464.62s/it]

Finished additional Training on expert_dataset_perturbation_10 - Actor Loss: 3.95128
                                                              - Critic 1 Loss: 0.41487
                                                              - Critic 2 Loss: 0.35210
                                                              - Value Loss: 0.19291
                                                              - Tuning Loss: 1.54360
                                                              - Test Loss: 1.52630
                                                              - Reward: 280.00
Updated DQN model saved to implicit_q_learning_iql/iql_logs/expert_dataset/perturbation_10/iql_model_10_continued.pth
Updated stats saved to implicit_q_learning_iql/iql_logs/expert_dataset/perturbation_10/stats_10_continued.pkl





## DQN

In [9]:
%%time

# train DQN agent on Expert dataset with 10% perturbation
train_and_evaluate_DQN(
    dataloaders=dataloaders,
    device=device,
    trials=1,
    epochs=EPOCHS, 
    dataset='expert_dataset_perturbation_10',
    env_id=ENV_ID,
    seed=SEED 
)

# print execution time it took to train the model
print("----- Execution time: DQN - Expert | Perturbation 10% -----") 

Training on expert_dataset_perturbation_10
Filling replay buffer from dataset...
Replay buffer filled with 108220 samples.
-- Starting Trial 1/1 --


Epochs: 100%|██████████| 10/10 [37:36<00:00, 225.61s/it]


Initial learning rate: 0.000500
Adjusted learning rate in epoch 10: 0.000450
Finished Training on expert_dataset_perturbation_10 - Training Loss: 0.14010
                                                   - Tuning Loss: 0.13340
                                                   - Test Loss: 0.14226
                                                   - Reward: 40.00
Model saved to deep_q_network_dqn/dqn_logs/expert_dataset/perturbation_10/dqn_model_10.pth
Return Stats saved to deep_q_network_dqn/dqn_logs/expert_dataset/perturbation_10/stats_10.pkl
----- Execution time: DQN - Expert | Perturbation 10% -----
CPU times: total: 48min 39s
Wall time: 39min 42s


### continue training the agent

In [9]:
# Continue training for 10 more epochs
continue_training_DQN(
    dataloaders=dataloaders,
    further_epochs=10,
    dataset="expert_dataset_perturbation_10",
    env_id=ENV_ID,
    seed=SEED,
    device=device
)

Continuing DQN Training on expert_dataset_perturbation_10
Loading existing model from deep_q_network_dqn/dqn_logs/expert_dataset/perturbation_10/dqn_model_10.pth
Loading existing stats from deep_q_network_dqn/dqn_logs/expert_dataset/perturbation_10/stats_10.pkl
Filling replay buffer...
Replay buffer filled with 108220 samples.


Continued DQN Epochs: 100%|██████████| 10/10 [35:00<00:00, 210.04s/it]


Finished additional Training on expert_dataset_perturbation_10 - Training Loss: 0.14137
                                                              - Tuning Loss: 0.13286
                                                              - Test Loss: 0.14172
                                                              - Reward: 40.00
Updated DQN model saved to deep_q_network_dqn/dqn_logs/expert_dataset/perturbation_10/dqn_model_10_continued.pth
Updated stats saved to deep_q_network_dqn/dqn_logs/expert_dataset/perturbation_10/stats_10_continued.pkl


-----------------

### 20% Perturbation

In [7]:
# Paths to your datasets
dataset_paths = [
    'datasets/expert/expert_logs/expert_dataset_perturbation_20.pkl',
]

dataloaders = {}  # Store dataloaders for each dataset

for path in dataset_paths:
    dataset_name = path.split('/')[-1].split('.')[0]  # Extract dataset name

    print(f"Loading {dataset_name} dataset...")

    # Load dataset
    data = load_dataset(path)

    print(f"Data preprocessing for {dataset_name} dataset...")

    # Preprocess and split the data
    train_data, test_data, tune_data = preprocess_and_split(
        data=data, seed=SEED, test_size=0.2, tune_size=0.1
    )

    print(f"Creating dataloaders for {dataset_name} dataset...")

    # Create dataloaders using the adjusted function
    train_loader, test_loader, tune_loader = create_dataloaders(
        train_data, test_data, tune_data, batch_size=64, seed=SEED
    )

    # Store dataloaders
    dataloaders[dataset_name] = {
        'train': train_loader,
        'test': test_loader,
        'tuning': tune_loader
    }

    # Clear variables to free up memory
    del data, train_data, test_data, tune_data
    gc.collect()

print(dataloaders.keys())

Loading expert_dataset_perturbation_20 dataset...
Data preprocessing for expert_dataset_perturbation_20 dataset...
Creating dataloaders for expert_dataset_perturbation_20 dataset...
dict_keys(['expert_dataset_perturbation_20'])


In [8]:
# inspect dataset samples
dataset_name_to_inspect = 'expert_dataset_perturbation_20'

# inspect the training set of the dataset
print(f"Inspecting {dataset_name_to_inspect} Training Set:")
inspect_dataset_sample(dataloaders[dataset_name_to_inspect]['train'])
print("\n")

# inspect the testing set of the dataset
print(f"Inspecting {dataset_name_to_inspect} Testing Set")
inspect_dataset_sample(dataloaders[dataset_name_to_inspect]['test'])
print("\n")

# inspect the tuning set of the dataset
print(f"Inspecting {dataset_name_to_inspect} Tuning Set")
inspect_dataset_sample(dataloaders[dataset_name_to_inspect]['tuning'])

Inspecting expert_dataset_perturbation_20 Training Set:
--- Sample 1 ---
States Batch Shape: torch.Size([64, 3, 210, 160])
Actions Batch Shape: torch.Size([64])
Rewards Batch Shape: torch.Size([64])
Next States Batch Shape: torch.Size([64, 3, 210, 160])
Dones Batch Shape: torch.Size([64])
States Batch Data Type: torch.float32
Actions Batch Data Type: torch.int64
Rewards Batch Data Type: torch.float32
Next States Batch Data Type: torch.float32
Dones Batch Data Type: torch.float32

First Sample Details:
First State Shape: torch.Size([3, 210, 160])
First State Min/Max: 0.0000/0.9255
First Action: 15
First Reward: 0.0000
First Next State Shape: torch.Size([3, 210, 160])
First Done: 0.0


Inspecting expert_dataset_perturbation_20 Testing Set
--- Sample 1 ---
States Batch Shape: torch.Size([64, 3, 210, 160])
Actions Batch Shape: torch.Size([64])
Rewards Batch Shape: torch.Size([64])
Next States Batch Shape: torch.Size([64, 3, 210, 160])
Dones Batch Shape: torch.Size([64])
States Batch Data T

## BC

In [9]:
%%time

# train and evaluate the BC model on the Expert dataset with 20% perturbation
train_and_evaluate_BC(
    dataloaders=dataloaders,
    device=device,
    trials=1,
    epochs=EPOCHS,
    dataset='expert_dataset_perturbation_20',
    env_id=ENV_ID,
    seed=SEED
)

# print execution time it took to train the model
print("----- Execution time: BC - Expert | Perturbation 20% -----")

Training on expert_dataset_perturbation_20
-- Starting Trial 1/1 --


Epochs: 100%|██████████| 10/10 [1:04:07<00:00, 384.74s/it]


Initial learning rate: 0.001000
Adjusted learning rate in epoch 9: 0.000900
Finished Training on expert_dataset_perturbation_20 - Training Loss: 0.52623
                                                   - Tuning Loss: 2.80368
                                                   - Test Loss: 2.79225
                                                   - Reward: 500.00
Model saved to behavioral_cloning_bc/bc_logs/expert_dataset/perturbation_20/bc_model_20.pth
Return Stats saved to behavioral_cloning_bc/bc_logs/expert_dataset/perturbation_20/stats_20.pkl
----- Execution time: BC - Expert | Perturbation 20% -----
CPU times: total: 1h 17min 31s
Wall time: 1h 4min 9s


### continue training the agent

In [9]:
# continue training for 10 more epochs
continue_training_BC(
    dataloaders=dataloaders,
    further_epochs=10,
    dataset="expert_dataset_perturbation_20",
    env_id=ENV_ID,
    seed=SEED,
    device=device
)

Continuing BC Training on expert_dataset_perturbation_20
Loading existing model from behavioral_cloning_bc/bc_logs/expert_dataset/perturbation_20/bc_model_20.pth
Loading existing stats from behavioral_cloning_bc/bc_logs/expert_dataset/perturbation_20/stats_20.pkl


Continued Training Epochs: 100%|██████████| 10/10 [1:08:49<00:00, 412.98s/it]

Finished additional Training on expert_dataset_perturbation_20 - Training Loss: 0.30311
                                                              - Tuning Loss: 3.89714
                                                              - Test Loss: 3.85889
                                                              - Reward: 340.00
Updated DQN model saved to behavioral_cloning_bc/bc_logs/expert_dataset/perturbation_20/bc_model_20_continued.pth
Updated stats saved to behavioral_cloning_bc/bc_logs/expert_dataset/perturbation_20/stats_20_continued.pkl





## IQL

In [9]:
%%time

# train and evaluate the IQL model on the Expert dataset with 20% perturbation
train_and_evaluate_IQL(
    dataloaders=dataloaders,
    device=device,
    trials=1,
    epochs=EPOCHS,
    dataset='expert_dataset_perturbation_20',
    env_id=ENV_ID,
    seed=SEED
)

# print execution time it took to train the model
print("----- Execution time: IQL - Expert | Perturbation 20% -----")

Training IQL on expert_dataset_perturbation_20
-- Starting Trial 1/1 --


Epochs: 100%|██████████| 10/10 [1:11:04<00:00, 426.47s/it]


Finished Training on expert_dataset_perturbation_20 - Actor Loss: 9.05648
                                                   - Critic 1 Loss: 0.25150
                                                   - Critic 2 Loss: 0.33957
                                                   - Value Loss: 1.78412
                                                   - Tuning Loss: 1.69753
                                                   - Test Loss: 1.69888
                                                   - Reward: 302.00000
Model saved to implicit_q_learning_iql/iql_logs/expert_dataset/perturbation_20/iql_model_20.pth
Return Stats saved to implicit_q_learning_iql/iql_logs/expert_dataset/perturbation_20/stats_20.pkl
----- Execution time: IQL - Expert | Perturbation 20% -----
CPU times: total: 1h 23min 20s
Wall time: 1h 11min 6s


### continue training the agent

In [9]:
# continue training for 10 more epochs
continue_training_IQL(
    dataloaders=dataloaders,
    further_epochs=10,
    dataset="expert_dataset_perturbation_20",
    env_id=ENV_ID,
    seed=SEED,
    device=device
)

Continuing IQL Training on expert_dataset_perturbation_20
Loading existing model from implicit_q_learning_iql/iql_logs/expert_dataset/perturbation_20/iql_model_20.pth
Loading existing stats from implicit_q_learning_iql/iql_logs/expert_dataset/perturbation_20/stats_20.pkl


Continued Training Epochs: 100%|██████████| 10/10 [1:14:06<00:00, 444.60s/it]

Finished additional Training on expert_dataset_perturbation_20 - Actor Loss: 5.71745
                                                              - Critic 1 Loss: 0.17306
                                                              - Critic 2 Loss: 0.17535
                                                              - Value Loss: 0.01386
                                                              - Tuning Loss: 2.06185
                                                              - Test Loss: 2.05134
                                                              - Reward: 305.00
Updated DQN model saved to implicit_q_learning_iql/iql_logs/expert_dataset/perturbation_20/iql_model_20_continued.pth
Updated stats saved to implicit_q_learning_iql/iql_logs/expert_dataset/perturbation_20/stats_20_continued.pkl





## DQN

In [9]:
%%time

# train DQN agent on Expert dataset with 20% perturbation
train_and_evaluate_DQN(
    dataloaders=dataloaders,
    device=device,
    trials=1,
    epochs=EPOCHS, 
    dataset='expert_dataset_perturbation_20',
    env_id=ENV_ID,
    seed=SEED 
)

# print execution time it took to train the model
print("----- Execution time: DQN - Expert | Perturbation 20% -----") 

Training on expert_dataset_perturbation_20
Filling replay buffer from dataset...
Replay buffer filled with 105804 samples.
-- Starting Trial 1/1 --


Epochs: 100%|██████████| 10/10 [52:32<00:00, 315.22s/it]


Initial learning rate: 0.000500
Adjusted learning rate in epoch 7: 0.000450
Finished Training on expert_dataset_perturbation_20 - Training Loss: 0.13098
                                                   - Tuning Loss: 0.14870
                                                   - Test Loss: 0.12809
                                                   - Reward: 0.00
Model saved to deep_q_network_dqn/dqn_logs/expert_dataset/perturbation_20/dqn_model_20.pth
Return Stats saved to deep_q_network_dqn/dqn_logs/expert_dataset/perturbation_20/stats_20.pkl
----- Execution time: DQN - Expert | Perturbation 20% -----
CPU times: total: 1h 3min 10s
Wall time: 54min 38s


### continue training the agent

In [9]:
# Continue training for 10 more epochs
continue_training_DQN(
    dataloaders=dataloaders,
    further_epochs=10,
    dataset="expert_dataset_perturbation_20",
    env_id=ENV_ID,
    seed=SEED,
    device=device
)

Continuing DQN Training on expert_dataset_perturbation_20
Loading existing model from deep_q_network_dqn/dqn_logs/expert_dataset/perturbation_20/dqn_model_20.pth
Loading existing stats from deep_q_network_dqn/dqn_logs/expert_dataset/perturbation_20/stats_20.pkl
Filling replay buffer...
Replay buffer filled with 105804 samples.


Continued DQN Epochs: 100%|██████████| 10/10 [51:34<00:00, 309.48s/it]


Finished additional Training on expert_dataset_perturbation_20 - Training Loss: 0.13528
                                                              - Tuning Loss: 0.14885
                                                              - Test Loss: 0.12823
                                                              - Reward: 0.00
Updated DQN model saved to deep_q_network_dqn/dqn_logs/expert_dataset/perturbation_20/dqn_model_20_continued.pth
Updated stats saved to deep_q_network_dqn/dqn_logs/expert_dataset/perturbation_20/stats_20_continued.pkl


------------------

# Training the Mixed Agents for BC, IQL and DQN

### 0% Perturbation

In [7]:
# Paths to your datasets
dataset_paths = [
    'datasets/mixed/mixed_logs/mixed_dataset_perturbation_0.pkl',
]

dataloaders = {}  # Store dataloaders for each dataset

for path in dataset_paths:
    dataset_name = path.split('/')[-1].split('.')[0]  # Extract dataset name

    print(f"Loading {dataset_name} dataset...")

    # Load dataset
    data = load_dataset(path)

    print(f"Data preprocessing for {dataset_name} dataset...")

    # Preprocess and split the data
    train_data, test_data, tune_data = preprocess_and_split(
        data=data, seed=SEED, test_size=0.2, tune_size=0.1
    )

    print(f"Creating dataloaders for {dataset_name} dataset...")

    # Create dataloaders using the adjusted function
    train_loader, test_loader, tune_loader = create_dataloaders(
        train_data, test_data, tune_data, batch_size=64, seed=SEED
    )

    # Store dataloaders
    dataloaders[dataset_name] = {
        'train': train_loader,
        'test': test_loader,
        'tuning': tune_loader
    }

    # Clear variables to free up memory
    del data, train_data, test_data, tune_data
    gc.collect()

print(dataloaders.keys())

Loading mixed_dataset_perturbation_0 dataset...
Data preprocessing for mixed_dataset_perturbation_0 dataset...
Creating dataloaders for mixed_dataset_perturbation_0 dataset...
dict_keys(['mixed_dataset_perturbation_0'])


In [8]:
# inspect dataset samples
dataset_name_to_inspect = 'mixed_dataset_perturbation_0'

# inspect the training set of the dataset
print(f"Inspecting {dataset_name_to_inspect} Training Set:")
inspect_dataset_sample(dataloaders[dataset_name_to_inspect]['train'])
print("\n")

# inspect the testing set of the dataset
print(f"Inspecting {dataset_name_to_inspect} Testing Set")
inspect_dataset_sample(dataloaders[dataset_name_to_inspect]['test'])
print("\n")

# inspect the tuning set of the dataset
print(f"Inspecting {dataset_name_to_inspect} Tuning Set")
inspect_dataset_sample(dataloaders[dataset_name_to_inspect]['tuning'])

Inspecting mixed_dataset_perturbation_0 Training Set:
--- Sample 1 ---
States Batch Shape: torch.Size([64, 3, 210, 160])
Actions Batch Shape: torch.Size([64])
Rewards Batch Shape: torch.Size([64])
Next States Batch Shape: torch.Size([64, 3, 210, 160])
Dones Batch Shape: torch.Size([64])
States Batch Data Type: torch.float32
Actions Batch Data Type: torch.int64
Rewards Batch Data Type: torch.float32
Next States Batch Data Type: torch.float32
Dones Batch Data Type: torch.float32

First Sample Details:
First State Shape: torch.Size([3, 210, 160])
First State Min/Max: 0.0000/0.8392
First Action: 13
First Reward: 0.0000
First Next State Shape: torch.Size([3, 210, 160])
First Done: 0.0


Inspecting mixed_dataset_perturbation_0 Testing Set
--- Sample 1 ---
States Batch Shape: torch.Size([64, 3, 210, 160])
Actions Batch Shape: torch.Size([64])
Rewards Batch Shape: torch.Size([64])
Next States Batch Shape: torch.Size([64, 3, 210, 160])
Dones Batch Shape: torch.Size([64])
States Batch Data Type:

## BC

### continue training the agent

## IQL

### continue training the agent

## DQN

### continue training the agent

-----------------------------

### 5% Perturbation

In [7]:
# Paths to your datasets
dataset_paths = [
    'datasets/mixed/mixed_logs/mixed_dataset_perturbation_5.pkl',
]

dataloaders = {}  # Store dataloaders for each dataset

for path in dataset_paths:
    dataset_name = path.split('/')[-1].split('.')[0]  # Extract dataset name

    print(f"Loading {dataset_name} dataset...")

    # Load dataset
    data = load_dataset(path)

    print(f"Data preprocessing for {dataset_name} dataset...")

    # Preprocess and split the data
    train_data, test_data, tune_data = preprocess_and_split(
        data=data, seed=SEED, test_size=0.2, tune_size=0.1
    )

    print(f"Creating dataloaders for {dataset_name} dataset...")

    # Create dataloaders using the adjusted function
    train_loader, test_loader, tune_loader = create_dataloaders(
        train_data, test_data, tune_data, batch_size=64, seed=SEED
    )

    # Store dataloaders
    dataloaders[dataset_name] = {
        'train': train_loader,
        'test': test_loader,
        'tuning': tune_loader
    }

    # Clear variables to free up memory
    del data, train_data, test_data, tune_data
    gc.collect()

print(dataloaders.keys())

Loading mixed_dataset_perturbation_5 dataset...
Data preprocessing for mixed_dataset_perturbation_5 dataset...
Creating dataloaders for mixed_dataset_perturbation_5 dataset...
dict_keys(['mixed_dataset_perturbation_5'])


In [8]:
# inspect dataset samples
dataset_name_to_inspect = 'mixed_dataset_perturbation_5'

# inspect the training set of the dataset
print(f"Inspecting {dataset_name_to_inspect} Training Set:")
inspect_dataset_sample(dataloaders[dataset_name_to_inspect]['train'])
print("\n")

# inspect the testing set of the dataset
print(f"Inspecting {dataset_name_to_inspect} Testing Set")
inspect_dataset_sample(dataloaders[dataset_name_to_inspect]['test'])
print("\n")

# inspect the tuning set of the dataset
print(f"Inspecting {dataset_name_to_inspect} Tuning Set")
inspect_dataset_sample(dataloaders[dataset_name_to_inspect]['tuning'])

Inspecting mixed_dataset_perturbation_5 Training Set:
--- Sample 1 ---
States Batch Shape: torch.Size([64, 3, 210, 160])
Actions Batch Shape: torch.Size([64])
Rewards Batch Shape: torch.Size([64])
Next States Batch Shape: torch.Size([64, 3, 210, 160])
Dones Batch Shape: torch.Size([64])
States Batch Data Type: torch.float32
Actions Batch Data Type: torch.int64
Rewards Batch Data Type: torch.float32
Next States Batch Data Type: torch.float32
Dones Batch Data Type: torch.float32

First Sample Details:
First State Shape: torch.Size([3, 210, 160])
First State Min/Max: 0.0000/0.8392
First Action: 8
First Reward: 0.0000
First Next State Shape: torch.Size([3, 210, 160])
First Done: 0.0


Inspecting mixed_dataset_perturbation_5 Testing Set
--- Sample 1 ---
States Batch Shape: torch.Size([64, 3, 210, 160])
Actions Batch Shape: torch.Size([64])
Rewards Batch Shape: torch.Size([64])
Next States Batch Shape: torch.Size([64, 3, 210, 160])
Dones Batch Shape: torch.Size([64])
States Batch Data Type: 

## BC

### continue training the agent

## IQL

### continue training the agent

## DQN

### continue training the agent

-----------------

### 10% Perturbation

In [7]:
# Paths to your datasets
dataset_paths = [
    'datasets/mixed/mixed_logs/mixed_dataset_perturbation_10.pkl',
]

dataloaders = {}  # Store dataloaders for each dataset

for path in dataset_paths:
    dataset_name = path.split('/')[-1].split('.')[0]  # Extract dataset name

    print(f"Loading {dataset_name} dataset...")

    # Load dataset
    data = load_dataset(path)

    print(f"Data preprocessing for {dataset_name} dataset...")

    # Preprocess and split the data
    train_data, test_data, tune_data = preprocess_and_split(
        data=data, seed=SEED, test_size=0.2, tune_size=0.1
    )

    print(f"Creating dataloaders for {dataset_name} dataset...")

    # Create dataloaders using the adjusted function
    train_loader, test_loader, tune_loader = create_dataloaders(
        train_data, test_data, tune_data, batch_size=64, seed=SEED
    )

    # Store dataloaders
    dataloaders[dataset_name] = {
        'train': train_loader,
        'test': test_loader,
        'tuning': tune_loader
    }

    # Clear variables to free up memory
    del data, train_data, test_data, tune_data
    gc.collect()

print(dataloaders.keys())

Loading mixed_dataset_perturbation_10 dataset...
Data preprocessing for mixed_dataset_perturbation_10 dataset...
Creating dataloaders for mixed_dataset_perturbation_10 dataset...
dict_keys(['mixed_dataset_perturbation_10'])


In [8]:
# inspect dataset samples
dataset_name_to_inspect = 'mixed_dataset_perturbation_10'

# inspect the training set of the dataset
print(f"Inspecting {dataset_name_to_inspect} Training Set:")
inspect_dataset_sample(dataloaders[dataset_name_to_inspect]['train'])
print("\n")

# inspect the testing set of the dataset
print(f"Inspecting {dataset_name_to_inspect} Testing Set")
inspect_dataset_sample(dataloaders[dataset_name_to_inspect]['test'])
print("\n")

# inspect the tuning set of the dataset
print(f"Inspecting {dataset_name_to_inspect} Tuning Set")
inspect_dataset_sample(dataloaders[dataset_name_to_inspect]['tuning'])

Inspecting mixed_dataset_perturbation_10 Training Set:
--- Sample 1 ---
States Batch Shape: torch.Size([64, 3, 210, 160])
Actions Batch Shape: torch.Size([64])
Rewards Batch Shape: torch.Size([64])
Next States Batch Shape: torch.Size([64, 3, 210, 160])
Dones Batch Shape: torch.Size([64])
States Batch Data Type: torch.float32
Actions Batch Data Type: torch.int64
Rewards Batch Data Type: torch.float32
Next States Batch Data Type: torch.float32
Dones Batch Data Type: torch.float32

First Sample Details:
First State Shape: torch.Size([3, 210, 160])
First State Min/Max: 0.0000/0.8392
First Action: 15
First Reward: 0.0000
First Next State Shape: torch.Size([3, 210, 160])
First Done: 0.0


Inspecting mixed_dataset_perturbation_10 Testing Set
--- Sample 1 ---
States Batch Shape: torch.Size([64, 3, 210, 160])
Actions Batch Shape: torch.Size([64])
Rewards Batch Shape: torch.Size([64])
Next States Batch Shape: torch.Size([64, 3, 210, 160])
Dones Batch Shape: torch.Size([64])
States Batch Data Typ

----------------

# Training the Replay Agents for BC, IQL and DQN

### 0% Perturbation

In [7]:
# Paths to your datasets
dataset_paths = [
    'datasets/replay/replay_logs/replay_dataset_perturbation_0.pkl',
]

dataloaders = {}  # Store dataloaders for each dataset

for path in dataset_paths:
    dataset_name = path.split('/')[-1].split('.')[0]  # Extract dataset name

    print(f"Loading {dataset_name} dataset...")

    # Load dataset
    data = load_dataset(path)

    print(f"Data preprocessing for {dataset_name} dataset...")

    # Preprocess and split the data
    train_data, test_data, tune_data = preprocess_and_split(
        data=data, seed=SEED, test_size=0.2, tune_size=0.1
    )

    print(f"Creating dataloaders for {dataset_name} dataset...")

    # Create dataloaders using the adjusted function
    train_loader, test_loader, tune_loader = create_dataloaders(
        train_data, test_data, tune_data, batch_size=64, seed=SEED
    )

    # Store dataloaders
    dataloaders[dataset_name] = {
        'train': train_loader,
        'test': test_loader,
        'tuning': tune_loader
    }

    # Clear variables to free up memory
    del data, train_data, test_data, tune_data
    gc.collect()

print(dataloaders.keys())

Loading replay_dataset_perturbation_0 dataset...
Data preprocessing for replay_dataset_perturbation_0 dataset...
Creating dataloaders for replay_dataset_perturbation_0 dataset...
dict_keys(['replay_dataset_perturbation_0'])


In [8]:
# inspect dataset samples
dataset_name_to_inspect = 'replay_dataset_perturbation_0'

# inspect the training set of the dataset
print(f"Inspecting {dataset_name_to_inspect} Training Set:")
inspect_dataset_sample(dataloaders[dataset_name_to_inspect]['train'])
print("\n")

# inspect the testing set of the dataset
print(f"Inspecting {dataset_name_to_inspect} Testing Set")
inspect_dataset_sample(dataloaders[dataset_name_to_inspect]['test'])
print("\n")

# inspect the tuning set of the dataset
print(f"Inspecting {dataset_name_to_inspect} Tuning Set")
inspect_dataset_sample(dataloaders[dataset_name_to_inspect]['tuning'])

Inspecting replay_dataset_perturbation_0 Training Set:
--- Sample 1 ---
States Batch Shape: torch.Size([64, 3, 210, 160])
Actions Batch Shape: torch.Size([64])
Rewards Batch Shape: torch.Size([64])
Next States Batch Shape: torch.Size([64, 3, 210, 160])
Dones Batch Shape: torch.Size([64])
States Batch Data Type: torch.float32
Actions Batch Data Type: torch.int64
Rewards Batch Data Type: torch.float32
Next States Batch Data Type: torch.float32
Dones Batch Data Type: torch.float32

First Sample Details:
First State Shape: torch.Size([3, 210, 160])
First State Min/Max: 0.0000/0.8392
First Action: 9
First Reward: 0.0000
First Next State Shape: torch.Size([3, 210, 160])
First Done: 0.0


Inspecting replay_dataset_perturbation_0 Testing Set
--- Sample 1 ---
States Batch Shape: torch.Size([64, 3, 210, 160])
Actions Batch Shape: torch.Size([64])
Rewards Batch Shape: torch.Size([64])
Next States Batch Shape: torch.Size([64, 3, 210, 160])
Dones Batch Shape: torch.Size([64])
States Batch Data Type