In [20]:
import numpy as np
import torch
import json

# Load the JSON data
with open('Final.json', 'r') as file:
    data = json.load(file)

# Convert data to a suitable format
trajectories = []
for i in range(7000):
    trajectory = []
    for j in range(199):
        trajectory.append(tuple(data[i][j]))
    trajectories.append(trajectory)

def preprocess_trajectories(trajectories):
    returns, states, actions, timesteps = [], [], [], []
    action_mapping = {}  # Dictionary to map actions to integers
    action_counter = 0
    
    for traj in trajectories:
        traj_returns = []
        traj_states = []
        traj_actions = []
        traj_timesteps = []
        for t, step in enumerate(traj):
            state_uav1, state_uav2, action, reward, next_state_uav1, next_state_uav2, done = step
            state = state_uav1 + state_uav2  # Concatenate the state tuples to form a single state list
            
            # Convert action list to tuple to use as dictionary key
            action = tuple(action)
            
            # Convert action tuple to an integer
            if action not in action_mapping:
                action_mapping[action] = action_counter
                action_counter += 1
            
            traj_returns.append(reward)
            traj_states.append(state)
            traj_actions.append(action_mapping[action])
            traj_timesteps.append(t)  # Generate timesteps as a sequence of integers
            
        returns.append(traj_returns)
        states.append(traj_states)
        actions.append(traj_actions)
        timesteps.append(traj_timesteps)
    
    return np.array(returns), np.array(states), np.array(actions), np.array(timesteps)

returns, states, actions, timesteps = preprocess_trajectories(trajectories)

# Convert to PyTorch tensors
returns_tensor = torch.tensor(returns, dtype=torch.float32)
states_tensor = torch.tensor(states, dtype=torch.float32)
actions_tensor = torch.tensor(actions, dtype=torch.long)  # Ensure actions are of type long for CrossEntropyLoss
timesteps_tensor = torch.tensor(timesteps, dtype=torch.float32)

print(returns_tensor.shape)  # Expected: (num_trajectories, num_steps)
print(states_tensor.shape)  # Expected: (num_trajectories, num_steps, state_dim)
print(actions_tensor.shape)  # Expected: (num_trajectories, num_steps)
print(timesteps_tensor.shape)  # Expected: (num_trajectories, num_steps)


torch.Size([7000, 199])
torch.Size([7000, 199, 4])
torch.Size([7000, 199])
torch.Size([7000, 199])


In [23]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
from transformers import GPT2Model, GPT2Config
import matplotlib.pyplot as plt

# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

class DecisionTransformer(nn.Module):
    def __init__(self, state_dim, action_dim, hidden_dim, max_length=199):
        super(DecisionTransformer, self).__init__()
        config = GPT2Config(vocab_size=1, n_positions=max_length * 3, n_embd=hidden_dim, n_layer=4, n_head=4)
        self.transformer = GPT2Model(config)
        
        # Embedding layers
        self.state_emb = nn.Linear(state_dim, hidden_dim)
        self.reward_emb = nn.Linear(1, hidden_dim)
        
        self.predict_action = nn.Linear(hidden_dim, action_dim)
        
        # Positional encoding
        self.positional_encoding = nn.Parameter(self._init_positional_encoding(max_length * 3, hidden_dim), requires_grad=False)
        
    def _init_positional_encoding(self, max_length, hidden_dim):
        position = torch.arange(max_length).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, hidden_dim, 2) * -(torch.log(torch.tensor(10000.0)) / hidden_dim))
        positional_encoding = torch.zeros(max_length, hidden_dim)
        positional_encoding[:, 0::2] = torch.sin(position * div_term)
        positional_encoding[:, 1::2] = torch.cos(position * div_term)
        return positional_encoding
    
    def forward(self, returns, states, timesteps):
        batch_size, seq_length, state_dim = states.size()
        pos_embedding = self.positional_encoding[:seq_length * 3, :].unsqueeze(0).expand(batch_size, -1, -1)
        
        s_embedding = self.state_emb(states.view(-1, state_dim).float()).view(batch_size, seq_length, -1)  # Ensure float32 type
        R_embedding = self.reward_emb(returns.unsqueeze(-1).float()).view(batch_size, seq_length, -1)  # Ensure float32 type
        
        # Concatenate reward and state embeddings
        input_embeds = torch.cat([R_embedding, s_embedding], dim=1).view(batch_size, -1, s_embedding.size(-1))
        
        # Add positional embeddings
        input_embeds += pos_embedding
        
        # Use transformer to get hidden states
        transformer_outputs = self.transformer(inputs_embeds=input_embeds)
        hidden_states = transformer_outputs.last_hidden_state
        
        # Select hidden states for action prediction tokens
        a_hidden = hidden_states[:, 2::3]  # Assuming the actions are at these positions

        # Predict action
        return self.predict_action(a_hidden)

# Define the model, optimizer, and loss function
model = DecisionTransformer(state_dim=8, action_dim=5, hidden_dim=128).to(device)  # state_dim=8 due to concatenation
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
loss_fn = nn.CrossEntropyLoss()  # Use CrossEntropyLoss for classification

# Create a DataLoader for batching
dataset = TensorDataset(returns_tensor, states_tensor, actions_tensor)
dataloader = DataLoader(dataset, batch_size=64, shuffle=True, num_workers=4)

# Training loop with accuracy and loss tracking
num_epochs = 100
best_loss = float('inf')
early_stop_count = 0
patience = 10  # Number of epochs to wait for improvement before stopping

# Enable mixed precision training
scaler = torch.cuda.amp.GradScaler()

losses = []
accuracies = []

for epoch in range(num_epochs):
    epoch_loss = 0.0
    epoch_correct = 0
    epoch_total = 0

    for R_batch, s_batch, a_batch in dataloader:
        R_batch, s_batch, a_batch = R_batch.to(device), s_batch.to(device), a_batch.to(device)
        a_batch = a_batch.view(-1).long()  # Flatten and convert to long for CrossEntropyLoss

        optimizer.zero_grad()
        
        with torch.cuda.amp.autocast():
            a_preds = model(R_batch, s_batch, timesteps_tensor)
            loss = loss_fn(a_preds.view(-1, 5).float(), a_batch)  # Ensure float32 type for predictions
        
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        
        epoch_loss += loss.item() * R_batch.size(0)

        # Accuracy calculation
        _, predicted = torch.max(a_preds, -1)
        correct = (predicted.view(-1) == a_batch).sum().item()
        epoch_correct += correct
        epoch_total += a_batch.size(0)
        
    avg_loss = epoch_loss / len(dataset)
    accuracy = epoch_correct / epoch_total

    losses.append(avg_loss)
    accuracies.append(accuracy)

    print(f'Epoch {epoch + 1}/{num_epochs}, Loss: {avg_loss:.4f}, Accuracy: {accuracy:.4f}')
    
    # Early stopping logic
    if avg_loss < best_loss:
        best_loss = avg_loss
        early_stop_count = 0
    else:
        early_stop_count += 1
        if early_stop_count >= patience:
            print(f'Early stopping at epoch {epoch + 1}')
            break

# Visualization of the training loss and accuracy
fig, ax1 = plt.subplots()

color = 'tab:blue'
ax1.set_xlabel('Epoch')
ax1.set_ylabel('Loss', color=color)
ax1.plot(range(1, num_epochs + 1), losses, color=color)
ax1.tick_params(axis='y', labelcolor=color)

ax2 = ax1.twinx()  # instantiate a second axes that shares the same x-axis
color = 'tab:red'
ax2.set_ylabel('Accuracy', color=color)  # we already handled the x-label with ax1
ax2.plot(range(1, num_epochs + 1), accuracies, color=color)
ax2.tick_params(axis='y', labelcolor=color)

fig.tight_layout()  # otherwise the right y-label is slightly clipped
plt.title('Training Loss and Accuracy')
plt.show()


RuntimeError: mat1 and mat2 shapes cannot be multiplied (12736x4 and 8x128)