# Imports


In [2]:
import sys 
import os

In [3]:

# Set path to parent dir to import personal imports
project_root = os.path.abspath(os.path.join(os.getcwd(), ".."))
if project_root not in sys.path:
    sys.path.insert(0, project_root)

# Personal 
from data.TrajectorySet import TrajectorySet
from data.Sampler import Sampler 
from data.DatasetCL import DatasetCL 
from utils.tensor_utils import convert_batch_to_tensor

# Misc
import minari 
import numpy as np
import wandb
import os

# Torch 
import torch 
import torch.nn as nn 
import torch.nn.functional as F 
import torch.optim as optim
import torch.utils.data as data 

# PyTorch Lightning 
import pytorch_lightning as pl
from pytorch_lightning.callbacks import LearningRateMonitor, ModelCheckpoint
from pytorch_lightning.loggers import WandbLogger



In [4]:
minari_dataset = minari.load_dataset("D4RL/pointmaze/large-v2")

In [5]:
wandb.login()

[34m[1mwandb[0m: Currently logged in as: [33mray-s[0m ([33mray-s-university-of-alberta[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

# Model Architecture

In [6]:
class mlpCL(pl.LightningModule): 
    def __init__(self, lr, weight_decay, temperature=30, max_epochs=1000, h1=256, h2=128, h3=64, h4=32):
        super().__init__() # inherit from LightningModule and nn.module 
        self.save_hyperparameters() # save args  

        self.mlp = nn.Sequential(
            nn.Linear(4, h1), 
            nn.ReLU(inplace=True), 

            nn.Linear(h1, h2), 
            nn.ReLU(inplace=True),

            nn.Linear(h2, h3), 
            nn.ReLU(inplace=True),

            nn.Linear(h3, h4), # representation z 
        )

    def configure_optimizers(self):
        optimizer = optim.AdamW(params=self.parameters(), 
                                lr= self.hparams.lr, 
                                weight_decay=self.hparams.weight_decay)
        lr_scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer=optimizer, 
                                                            T_max=self.hparams.max_epochs,
                                                            eta_min=self.hparams.lr / 50)
        return ([optimizer], [lr_scheduler])
    

    def info_nce_loss(self, batch, mode="train"):
        # batch is of shape: [N, D]
        x = torch.cat(batch, dim=0)  # shape: [2N, D]

        z = F.normalize(self.mlp(x), dim=1)  # [2N, h4]
        N = z.size(0) // 2

        sim = torch.matmul(z, z.T) / self.hparams.temperature  # cosine sim matrix [2N, 2N]

        # mask out self similarities
        mask = torch.eye(2 * N, device=sim.device).bool()
        sim = sim.masked_fill_(mask, -9e15)

        # positives: i-th sample matches i + N mod 2N
        pos_idx = (torch.arange(2 * N, device=sim.device) + N) % (2 * N)
        labels = pos_idx

        loss = F.cross_entropy(sim, labels)

        # metrics
        preds = sim.argmax(dim=1)
        top1 = (preds == labels).float().mean()   # top1: true positive is most similar to anchor 
        top5 = (sim.topk(5, dim=1).indices == labels.unsqueeze(1)).any(dim=1).float().mean() # top5: true positive is atleast in the top 5 most similar to anchor 

        self.log(f"{mode}/nll_loss", loss, on_epoch=True, prog_bar=True)
        self.log(f"{mode}/top1", top1, on_epoch=True, prog_bar=True)
        self.log(f"{mode}/top5", top5, on_epoch=True, prog_bar=True)

        return loss

    def training_step(self, batch):
        return self.info_nce_loss(batch, mode='train')

    def validation_step(self, batch):
        self.info_nce_loss(batch, mode='val')

In [7]:
"""
TESTING CELL! 

Testing InfoNCE loss 
"""

T = TrajectorySet(dataset=minari_dataset) 
S = Sampler(T, dist="l")

batch_size = 8

ds = DatasetCL(S, batch_size=batch_size, k=2)
print(ds.get_batch())
train_loader= data.DataLoader(dataset=ds, batch_size=batch_size)
model = mlpCL(lr = 1, weight_decay=1)

batch = next(iter(train_loader))
model.info_nce_loss(batch)



[(tensor([-3.9405,  2.9318, -5.2263,  0.5707]), tensor([-4.0870,  2.9410, -4.9626,  0.3101])), (tensor([ 4.3472, -0.0486, -0.0449, -3.1708]), tensor([ 4.3492, -0.0826,  0.1934, -3.4014])), (tensor([-4.4851,  0.2375, -0.5339, -3.8977]), tensor([-4.4915,  0.2010, -0.6426, -3.6502])), (tensor([ 0.4186, -0.4259,  0.3872,  4.5854]), tensor([ 0.4091, -0.5618,  0.4748,  4.3091])), (tensor([-0.3652,  0.9566, -3.8294, -0.8938]), tensor([-0.7253,  0.8599, -3.9969, -0.9770])), (tensor([ 2.3891,  2.1317, -0.3102, -4.5257]), tensor([ 2.4952,  0.9403,  0.2857, -2.8877])), (tensor([-4.5235, -0.0639, -0.1180, -3.7606]), tensor([-4.5197,  0.0090, -0.3670, -3.3005])), (tensor([ 3.3356,  3.0493,  3.3813, -0.0890]), tensor([ 3.3958,  3.0481,  2.8895, -0.0762]))]
pred: tensor([ 8,  9, 10, 11, 12, 13,  2, 15,  0,  1, 14,  3,  4,  5, 10,  7])
top1: tensor(0.8125)
top5 tensor(1.)


/Users/ray/Documents/Research Assistancy UofA 2025/Reproduce CL/contrastive-learning-RL/CL_RL/lib/python3.9/site-packages/pytorch_lightning/core/module.py:441: You are trying to `self.log()` but the `self.trainer` reference is not registered on the model yet. This is most likely because the model hasn't been passed to the `Trainer`


tensor(2.6999, grad_fn=<NllLossBackward0>)

# Trainer


In [7]:
CHECKPOINT_PATH = "../saved_models"
checkpoint_callback = ModelCheckpoint(dirpath=CHECKPOINT_PATH,
                                      filename="best_model", 
                                      save_top_k=3, 
                                      save_weights_only=True, 
                                      mode="max",
                                      monitor="val/top5")

def train_cl(train_ds, val_ds, batch_size, logger, max_epochs=1000, **kwargs):
    trainer = pl.Trainer(
        default_root_dir=CHECKPOINT_PATH, 
        logger = logger,
        accelerator= "mps" if torch.backends.mps.is_available() else "cuda" if torch.cuda.is_available() else "cpu", 
        devices=1, 
        max_epochs=max_epochs,
        callbacks=[checkpoint_callback,
                   LearningRateMonitor("epoch")]) # creates a model checkpoint when a new max in val/top5 has been reached 
    train_loader = data.DataLoader(dataset=train_ds, batch_size=batch_size, shuffle=True, drop_last=True)
    val_loader = data.DataLoader(dataset= val_ds, batch_size=batch_size, shuffle=False, drop_last=False)
    pl.seed_everything(10)
    model = mlpCL(max_epochs=max_epochs, **kwargs) 
    trainer.fit(model, train_loader, val_loader)

    print("Best model path:", checkpoint_callback.best_model_path)
    model = mlpCL.load_from_checkpoint(checkpoint_callback.best_model_path)
    
    return model 

In [8]:
config = {
    "distribution": "g",
    "batch_size": 256,
    "k": 2,
    "lr": 5e-4,
    "weight_decay": 1e-4, 
    "temperature": 0.08,
    "max_epochs": 10
}

wandb_logger = WandbLogger(
    project="Contrastive Learning RL", 
    name="test-run-new-infoNCE-loss", 
    save_dir = project_root, 
    log_model=True,
    config = config
) 

dist = config["distribution"]
batch_size = config["batch_size"]
k = config["k"]
lr = config["lr"]
weight_decay = config["weight_decay"]
temperature = config["temperature"]
max_epochs = config["max_epochs"]

T = TrajectorySet(dataset=minari_dataset)
S = Sampler(T, dist=dist)
train_dataset = DatasetCL(S, batch_size=batch_size, k=k)

val_dataset = DatasetCL(S, batch_size=batch_size, k=k)

model = train_cl(train_ds=train_dataset, 
                val_ds=val_dataset, 
                batch_size=batch_size,
                logger=wandb_logger, 
                max_epochs=max_epochs, 
                lr=lr, 
                temperature=temperature, 
                weight_decay = weight_decay)
    
    
    

GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Seed set to 10


/Users/ray/Documents/Research Assistancy UofA 2025/Reproduce CL/contrastive-learning-RL/CL_RL/lib/python3.9/site-packages/pytorch_lightning/callbacks/model_checkpoint.py:654: Checkpoint directory /Users/ray/Documents/Research Assistancy UofA 2025/Reproduce CL/contrastive-learning-RL/saved_models exists and is not empty.

  | Name | Type       | Params | Mode 
--------------------------------------------
0 | mlp  | Sequential | 44.5 K | train
--------------------------------------------
44.5 K    Trainable params
0         Non-trainable params
44.5 K    Total params
0.178     Total estimated model params size (MB)
8         Modules in train mode
0         Modules in eval mode


Sanity Checking DataLoader 0:   0%|          | 0/1 [00:00<?, ?it/s]

/Users/ray/Documents/Research Assistancy UofA 2025/Reproduce CL/contrastive-learning-RL/CL_RL/lib/python3.9/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:425: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.


                                                                           

/Users/ray/Documents/Research Assistancy UofA 2025/Reproduce CL/contrastive-learning-RL/CL_RL/lib/python3.9/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.
/Users/ray/Documents/Research Assistancy UofA 2025/Reproduce CL/contrastive-learning-RL/CL_RL/lib/python3.9/site-packages/pytorch_lightning/loops/fit_loop.py:310: The number of training batches (1) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 26.66it/s, v_num=ll7m, train/nll_loss_step=3.570, train/top1_step=0.180, train/top5_step=0.463, val/nll_loss=3.740, val/top1=0.209, val/top5=0.496, train/nll_loss_epoch=3.570, train/top1_epoch=0.180, train/top5_epoch=0.463] 

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 21.25it/s, v_num=ll7m, train/nll_loss_step=3.570, train/top1_step=0.180, train/top5_step=0.463, val/nll_loss=3.740, val/top1=0.209, val/top5=0.496, train/nll_loss_epoch=3.570, train/top1_epoch=0.180, train/top5_epoch=0.463]
Best model path: /Users/ray/Documents/Research Assistancy UofA 2025/Reproduce CL/contrastive-learning-RL/saved_models/best_model-v13.ckpt
