In [1]:
import random
import os
import pandas as pd
import numpy as np
from stable_baselines3.common.vec_env import DummyVecEnv
import gymnasium as gym
import math

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F

import pytorch_lightning as pl

2024-03-04 11:01:43.879613: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
env_id = 'CarRacing-v2'
render_mode = "rgb_array"

env = DummyVecEnv([lambda: gym.make(env_id, render_mode=render_mode)])

In [3]:
def calc_out_conv_layer(in_h, in_w, ker, pad = 0, dil = 1, stri = 1):
    out_h = in_h
    out_w = in_w
    
    out_h = (out_h + 2*pad - dil * (ker-1) - 1)//stri + 1
    out_w = (out_w + 2*pad - dil * (ker-1) - 1)//stri + 1

    return out_h, out_w

In [4]:
class CustomResNet(nn.Module):
    def __init__(self, observation_space, features_dim: int = 256, 
                 hidden_channels: int = 32, n_cnn_layers: int = 3, 
                 stride: int = 1, dropout : float = 0.2):
        super(CustomResNet, self).__init__()
        self.dropout =  dropout   
        # We assume CxHxW images (channels first)
        # Re-ordering will be done by pre-preprocessing or wrapper
        n_input_channels = observation_space.shape[2]
        image_h = observation_space.shape[0]
        image_w = observation_space.shape[1]
        out_h_first_2, out_w_first_2 = calc_out_conv_layer(image_h, image_w, n_input_channels, stri = stride)
        #Now the size is hidden_channels x out_h x out_w
        out_h_first = out_h_first_2 // 2
        out_w_first = out_w_first_2 // 2
        
        self.first_cnn_layer = nn.Sequential(
            nn.Conv2d(n_input_channels, hidden_channels, kernel_size=3, stride=stride),
            nn.LayerNorm([hidden_channels, out_h_first_2, out_w_first_2]),  # Add Layer Normalization
            nn.GELU(),
            nn.Dropout2d(p=self.dropout),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )

        ##Now the size is hidden_channels x out_h_first x out_w_first
        out_h_2, out_w_2 = calc_out_conv_layer(out_h_first, out_w_first, 3, stri = stride)
        
        self.second_cnn_layer = nn.Sequential(
            nn.Conv2d(hidden_channels, hidden_channels, kernel_size=3, stride=stride),
            nn.LayerNorm([hidden_channels, out_h_2, out_w_2]),  # Add Layer Normalization
            nn.GELU(),
            nn.Dropout2d(p=self.dropout),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )

        #Now the size is hidden_channels x out_h x out_w
        out_h = out_h_2 // 2
        out_w = out_w_2 // 2
        
        # Define a list to hold the CNN layers
        self.cnn_layers = nn.ModuleList()
        
        # Add the specified number of convolutional layers
        for index in range(n_cnn_layers - 2):
            modules = []
            modules.append(nn.Conv2d(hidden_channels, hidden_channels, kernel_size=3, stride=stride, padding="same"))
            modules.append(nn.LayerNorm([hidden_channels, out_h, out_w]))  # Add Layer Normalization
            modules.append(nn.GELU())  # Use GELU activation function after LN
            #modules.append(nn.MaxPool2d(kernel_size=2, stride=2))  # Add MaxPooling layer
            modules.append(nn.Dropout2d(p=0.2))  # Add Dropout (DropBlock) layer
            self.cnn_layers.append(nn.Sequential(*modules))

        # Define the Flatten layer
        self.flatten = nn.Flatten()

        # Compute shape by doing one forward pass
        with torch.no_grad():
            # Define a dummy input tensor
            dummy_input = torch.as_tensor(observation_space.sample()[None]).permute(0, 3, 1, 2).float()
            # Perform a forward pass through the CNN layers
            cnn_output = dummy_input
            cnn_output = self.first_cnn_layer(cnn_output)
            cnn_output = self.second_cnn_layer(cnn_output)
            for layer in self.cnn_layers:
                cnn_output = cnn_output + layer(cnn_output)
            # Compute the shape after flattening
            n_flatten = self.flatten(cnn_output).shape[1]

        # Define the linear layer
        self.linear = nn.Sequential(nn.Linear(n_flatten, features_dim), nn.GELU())
        # Print the number of learnable parameters
        num_params = sum(p.numel() for p in self.parameters() if p.requires_grad)
        print("Number of learnable parameters for the CNN:", num_params)

    def forward(self, observations: torch.Tensor) -> torch.Tensor:
        # Perform a forward pass through the CNN layers
        cnn_output = self.first_cnn_layer(observations)
        cnn_output = self.second_cnn_layer(cnn_output)
        for index, layer in enumerate(self.cnn_layers):
            layer_output = layer(cnn_output)
            cnn_output = cnn_output + layer_output
        # Flatten the output
        cnn_output = self.flatten(cnn_output)
        # Pass the flattened output through the linear layer
        return self.linear(cnn_output)

In [5]:
class PositionalEncoding(nn.Module):
    def __init__(self, d_model: int, dropout: float = 0.1, max_len: int = 512, position: torch.Tensor = None):
        super().__init__()
        self.dropout = nn.Dropout(p=dropout)

        if position is None:
            position = torch.arange(max_len)

        pe = torch.zeros(1, max_len, d_model)  # Shape: [1, max_len, d_model]
            
        position = position.float().unsqueeze(1)  # Shape: [1, max_len]
        div_term = torch.exp(torch.arange(0, d_model, 2) * (-math.log(10000.0) / d_model)).unsqueeze(0) 
        argument = position @ div_term

        pe[:, :, 0::2] = torch.sin(argument)
        pe[:, :, 1::2] = torch.cos(argument)
        self.register_buffer('pe', pe)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        """
        Arguments:
            x: Tensor, shape ``[batch_size, seq_len, embedding_dim]``
        """
        # Instead of adding positional encodings along the sequence dimension,
        # we add them along the batch dimension.
        x = x + self.pe[:, :x.size(1)]
        return self.dropout(x)

In [6]:
def repeat_integers(integers):
    # Repeat each integer three times
    repeated_integers = torch.repeat_interleave(integers, 3)
    # Trim the tensor to the maximum length
    repeated_integers = repeated_integers
    return repeated_integers

def set_elements_to_zero(row, index):
    # Create a mask tensor
    mask = torch.zeros_like(row)
    mask[index:] = 1
    # Set elements before the index to zero
    row = row * mask
    return torch.nan_to_num(row, neginf = -float('inf'))

def step_masking(step_len):
    seq_len = step_len * 3
    attention_mask = torch.full((seq_len, seq_len), -float('inf'), dtype=torch.float32)
    for step in range(1, step_len + 1):
        row_set = step - 1
        for sequence_element in [row_set * 3, row_set * 3 + 1, row_set * 3 + 2]:
            attention_mask[sequence_element] = set_elements_to_zero(attention_mask[sequence_element], step * 3)
    
    return attention_mask

In [7]:
class TransformerArchitecture(nn.Module):
    def __init__(self, d_model=256, n_head=8, n_layer=4, 
                 d_ff=1024, max_step_len=512, dropout = 0.1, batch_first = True):
        super(TransformerArchitecture, self).__init__()
        self.dropout = dropout
        self.activation = nn.GELU()

        positions = repeat_integers(torch.arange(0, max_step_len))
        
        self.positional_embedding = PositionalEncoding(d_model,max_len = max_step_len*3, position = positions)
        self.attentions = nn.ModuleList([
            nn.MultiheadAttention(d_model, n_head, batch_first = batch_first)
            for _ in range(n_layer)
        ])
        self.linear1 = nn.ModuleList([
            nn.Linear(d_model, d_ff)
            for _ in range(n_layer)
        ])
        self.linear2 = nn.ModuleList([
            nn.Linear(d_ff, d_model)
            for _ in range(n_layer)
        ])

        self.layer_norms1 = nn.ModuleList([nn.LayerNorm(d_model) for _ in range(n_layer)])
        self.layer_norms2 = nn.ModuleList([nn.LayerNorm(d_model) for _ in range(n_layer)])
        
    def forward(self, x):
        # Each step is made by reward, observation, action
        step_len = x.size(1) // 3 
        
        x = self.positional_embedding(x)
        
        # Masking
        attention_mask = step_masking(step_len).to(device=x.device, dtype=torch.float32)
            
        for i in range(len(self.attentions)):
            # Multi-head Attention
            residual = x
            x, _ = self.attentions[i](x, x, x, attn_mask=attention_mask)
            x = F.dropout(x, p=self.dropout, training=self.training)
            x = residual + x  # Residual connection
            x = self.layer_norms1[i](x)
            
            # Feed-forward network
            residual = x
            x = self.activation(self.linear1[i](x))
            x = F.dropout(x, p=self.dropout, training=self.training)
            x = self.linear2[i](x)
            x = residual + x  # Residual connection
            x = self.layer_norms2[i](x)
            
        return x

In [8]:
def merge_embedding_sequence(reward, observation, action):
    batch_size, seq_len, emb_dim = reward.size()
    combined_tensor = torch.stack((reward, observation, action), dim=1)
    #print(combined_tensor.shape)
    combined_tensor = combined_tensor.view(batch_size, seq_len * 3, emb_dim)
    #print(combined_tensor.shape)
    return combined_tensor

In [9]:
class DecisionTransformers(pl.LightningModule):
    def __init__(self, d_model, action_space_dim, observation_space, batch_first = True, max_seq_len = 32):
        super(DecisionTransformers, self).__init__()

        #reward, action, observation to embedding
        self.embedding_reward = nn.Linear(1, d_model)
        self.embedding_action = nn.Linear(action_space_dim, d_model)
        self.embedding_observation = CustomResNet(observation_space, features_dim = d_model)

        #Defining the Transformer architecture
        self.emb_dim = d_model
        self.transformer = TransformerArchitecture(d_model = d_model, max_step_len=max_seq_len*3, batch_first = batch_first)

        #Defining fully connected layer
        self.fc1 = nn.Sequential(
            nn.Linear(d_model, 2 * d_model),
            nn.GELU(),
        )
        self.output = nn.Linear(2 * d_model,  action_space_dim)

        self.huber_loss = nn.SmoothL1Loss()  # Huber loss

        # Print the number of learnable parameters
        num_params = sum(p.numel() for p in self.parameters() if p.requires_grad)
        print("Number of learnable parameters for the entire architecture:", num_params)
        
    def forward(self, x):
        rewards = x["rewards"]
        observations = x["observations"]
        actions = x["actions"]

        #calculating variable needed after
        batch_len = observations.shape[0]
        seq_len = observations.shape[1]
        device = observations.device

        #Calculating embedding
        rewards_emb = self.embedding_reward(rewards)
        actions_emb = self.embedding_action(actions)
        observations_emb = torch.empty((batch_len,seq_len, self.emb_dim))
        
        for batch_index, batch_imgs in enumerate(observations):
            observations_emb[batch_index] = self.embedding_observation(batch_imgs)

        observations_emb = observations_emb.to(device)
        
        #interleave the sequences
        sequence = merge_embedding_sequence(rewards_emb, observations_emb, actions_emb)

        output = self.transformer(sequence)

        # Extract the output related to the observation input
        sequence = sequence[:, 1::3, :]

        #Fully connected layer to get the action
        output = self.fc1(output)
        
        return self.output(output)
    
    def training_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        
        # Compute three different dimension of action space
        loss1 = F.huber_loss(y_hat[:, 0], y[:, 0])
        loss2 = F.huber_loss(y_hat[:, 1], y[:, 1])
        loss3 = F.huber_loss(y_hat[:, 2], y[:, 2])
        
        # Total loss is the sum of the three losses
        total_loss = loss1 + loss2 + loss3
        
        self.log('train_loss', total_loss)
        return total_loss
    
    def validation_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        
        # Compute three different dimension of action space
        loss1 = F.huber_loss(y_hat[:, 0], y[:, 0])  # Loss for first value
        loss2 = F.huber_loss(y_hat[:, 1], y[:, 1])  # Loss for second value
        loss3 = F.huber_loss(y_hat[:, 2], y[:, 2])  # Loss for third value
        
        # Total loss is the sum of the three losses
        total_loss = loss1 + loss2 + loss3
        
        self.log('val_loss', total_loss)
        return total_loss
    
    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=1e-3)
        return optimizer

In [10]:
class SequenceExtractor(Dataset):
    def __init__(self, env, seq_len = 32, dataset_len = 16384):
        self.seq_len = seq_len
        self.dataset_len = dataset_len
        self.env = env
        self.env_name = env.unwrapped.envs[0].spec.id

    def __len__(self):
        return self.dataset_len

    ## il seed è usato come indice
    def __getitem__(self, seed):
        if seed is not None:
            random.seed(seed)
        # List all subfolders in the folder
        models_subfolder = [f.path for f in os.scandir(self.env_name) if f.is_dir()]
        
        # Choose a random subfolder
        random_model_subfolder = random.choice(models_subfolder)
        
        files = [f.path for f in os.scandir(random_model_subfolder) if f.is_file()]
        assert len(files) > 0, f"The number of file in the folder {random_model_subfolder} should be greater 0)"
        random_file = random.choice(files)

        # Read the Parquet file into a DataFrame
        df = pd.read_parquet(random_file)
        df['observation'] = df.apply(lambda row : row["observation"].reshape(self.env.observation_space.shape), axis = 1)

        #checking if the sequence is long enought (+1 to be sure to have Y too)
        assert len(df) > self.seq_len + 1, f"The lenght of the experience sequence ({x}) should be greater than sequence lenght + 1({y+1})"
        starting_row = random.randint(0, len(df) - (self.seq_len) -1)
        ending_row = starting_row + self.seq_len

        #converting to numpy array
        reward = torch.Tensor(np.stack(df["rewards"][starting_row:ending_row])).unsqueeze(1)
        observation = torch.Tensor(np.stack(df["observation"][starting_row:ending_row])).permute(0, 3, 1, 2)
        input_action = torch.Tensor(np.stack(df["action"][starting_row:ending_row]))

        #print(rewards.shape, observation.shape, input_action.shape)

        X = {
            "reward":reward, 
            "observation":observation, 
            "action":input_action,
            }
        Y = np.stack(df["action"][starting_row + 1:ending_row + 1])

        #print(Y.shape)

        return X, Y

In [11]:
sequenceExtractor = SequenceExtractor(env)

In [12]:
def collate_fn(data):
    """
       data: dict for the X and Y
    """
    Xs, Ys = zip(*data)
    
    rewards = torch.zeros((len(data),) +  Xs[0]["reward"].shape)
    observations = torch.zeros((len(data),) +  Xs[0]["observation"].shape)
    actions = torch.zeros((len(data),) +  Xs[0]["action"].shape)
    
    labels = torch.tensor(np.array(Ys))

    #print(labels.shape)
    #print(rewards.shape)
    #print(observations.shape)
    #print(actions.shape)

    for i in range(len(data)):
        rewards[i] = Xs[i]["reward"]
        observations[i] = Xs[i]["observation"]
        actions[i] = Xs[i]["action"]

    return {
             "rewards": rewards,
             "observations": observations,
             "actions":actions,
            }, labels

In [13]:
def seed_worker(worker_id):
    worker_seed = torch.initial_seed() % 2**32
    np.random.seed(worker_seed)
    random.seed(worker_seed)

g = torch.Generator().manual_seed(0)

In [14]:
# Creating a data loader
batch_size = 128
dataloader = DataLoader(sequenceExtractor, batch_size=batch_size, 
                        shuffle=True, num_workers=7, collate_fn = collate_fn,
                        worker_init_fn=seed_worker, generator=g)

In [16]:
torch.manual_seed(42)
model = DecisionTransformers(d_model = 128, action_space_dim = env.action_space.shape[0], 
                             observation_space = env.observation_space, max_seq_len = sequenceExtractor.seq_len)  # Example vocab size
trainer = pl.Trainer(max_epochs=10)  # Example trainer configuration
trainer.fit(model, dataloader)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


Number of learnable parameters for the CNN: 2728064
Number of learnable parameters for the entire architecture: 4082051


/data/cino/.local/lib/python3.10/site-packages/pytorch_lightning/trainer/configuration_validator.py:74: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name                  | Type                    | Params
------------------------------------------------------------------
0 | embedding_reward      | Linear                  | 256   
1 | embedding_action      | Linear                  | 512   
2 | embedding_observation | CustomResNet            | 2.7 M 
3 | transformer           | TransformerArchitecture | 1.3 M 
4 | fc1                   | Sequential              | 33.0 K
5 | output                | Linear                  | 771   
6 | huber_loss            | SmoothL1Loss            | 0     
------------------------------------------------------------------
4.1 M     Trainable params
0         Non-trainable params
4.1 M     Total params
16.328    Total estimated model params size (MB)


Training: |                                               | 0/? [00:00<?, ?it/s]

/data/cino/.local/lib/python3.10/site-packages/pytorch_lightning/trainer/call.py:54: Detected KeyboardInterrupt, attempting graceful shutdown...
