In [7]:
import os 
import sys 
project_root = os.path.abspath(os.path.join(os.getcwd(), "..")) 
if project_root not in sys.path: 
    sys.path.insert(0, project_root)

from data.Sampler import Sampler 
from data.TrajectorySet import TrajectorySet
from models.cl_model import mlpCL

from utils.sampling_states import sample_states
from utils.tensor_utils import split_data

import torch

import numpy as np

import minari

In [None]:
class RandomSamplingDataset(torch.utils.data.Dataset): 
    def __init__(self, cl_model,  minari_dataset, iterate_thru_dataset: bool = False, data = None):
        """
        Creates a dataset with randomly sampled states from the minari dataset specified by the TrajectorySet class. 

        Args: 
            cl_model: The contrastive learning model that changes x to z representations. 
            minari_dataset: The minari dataset to use. 
            iterate_thru_dataset: If true, gets all of the states (observations) from the specified minari dataset. 
            data: If there is existing to data, convert it to this dataset to use with the PyTorch dataloader. Data should be a numpy array. 
        """

        self.cl_model = cl_model 
        self.minari_dataset = minari_dataset

        if data is not None: 
            self.states = torch.as_tensor(data, dtype=torch.float32)

        elif iterate_thru_dataset: 
            self.states = self.__get_all_states()
            self.states = torch.as_tensor(self.states, dtype=torch.float32)

        with torch.no_grad(): 
            self.z = cl_model(self.states)

    def __len__(self): 
        return len(self.z) 
    
    def __getitem__(self, index):
        return self.z[index]

    def __get_all_states(self): 
        """
        Gets all of the states from the minari dataset and returns all states in Tensor form. 
        """
        total_eps = self.minari_dataset.total_episodes
        print(total_eps)

        eps = self.minari_dataset.sample_episodes(n_episodes=total_eps) 
        states = eps[0].observations["observation"]

        # stack all states vertically so the states array has shape: [N, 4], where N is the total number of states
        for i in range(1, total_eps): 
            states = np.vstack((states, eps[i].observations["observation"]))

        return states



In [3]:
model_name = "best_model.ckpt"
pretrained_model_file = os.path.join(project_root+ "/saved_models", model_name) 

if os.path.isfile(pretrained_model_file): 
    print(f"Found pretrained model at {pretrained_model_file}, loading...") 
    cl_model = mlpCL.load_from_checkpoint(pretrained_model_file, map_location=torch.device("cpu"))

Found pretrained model at c:\Users\ray\Documents\2025 RA\contrastive-learning-RL/saved_models\best_model.ckpt, loading...


In [4]:
md = minari.load_dataset("D4RL/pointmaze/large-v2")

In [9]:
num_states = 10000 
data = sample_states(dataset=md, num_states=num_states)

train, val = split_data(data=data, split_val=0.7)

print(train.shape)
print(val.shape)

(7000, 4)
(3000, 4)


In [13]:
train_ds = RandomSamplingDataset(cl_model=cl_model, minari_dataset=md, data=train)
val_ds = RandomSamplingDataset(cl_model=cl_model, minari_dataset=md, data=val) 



In [14]:
print(next(iter(train_ds)))

tensor([ 26.2221, -26.3859,   0.4013, -15.0183,   8.4208,  26.8904,  13.9768,
          1.8588,   4.6670,   7.5652,  18.0703,   0.2683,  -0.4450,  10.5678,
         -0.2255,  22.4735, -18.6578,  -3.4553,   4.8568, -25.5034,  -9.7440,
        -19.9790,  -0.6149, -15.1940, -22.8600,  13.4516,  27.5970,  -5.1186,
         10.4619,   7.4546,   2.3401,   6.5062], grad_fn=<SelectBackward0>)


In [68]:
md = minari.load_dataset("D4RL/pointmaze/large-v2")
eps = md.sample_episodes(n_episodes=md.total_episodes)

In [69]:
total_eps = md.total_episodes


ep_t = eps[0].observations["observation"]
for i in range(1, total_eps):
    ep_t = np.vstack((ep_t, eps[i].observations["observation"]))

print("vstack: ", len(ep_t))

    

vstack:  1003360
