In [None]:
import numpy as np
import pandas as pd
import pickle
import torch

def compute_tcp_reward(features, reward_weights):
    """
    Compute the reward for TCP flow control based on features extracted by the encoder.

    Args:
    - features (tuple of tensors): Extracted features from the encoder, including RTT, loss rate, and throughputs.
    - reward_weights (dict): Weights for each feature type to scale the reward.

    Returns:
    - np.array: The computed reward value.
    """
    # Extract features
    current_rtt = features[0]
    current_loss = features[1]
    throughputs = features[2]

    # Convert to numpy for reward computation
    current_rtt = current_rtt.cpu().detach().numpy()
    current_loss = current_loss.cpu().detach().numpy()
    throughputs = throughputs.cpu().detach().numpy()

    # Ensure that the values are positive to avoid log(0) or negative values
    throughput = np.clip(throughputs, 1e-6, None)
    latency = np.clip(current_rtt, 1e-6, None)
    loss_rate = np.clip(current_loss, 1e-6, None)

    # Compute the reward
    reward = reward_weights['throughput'] * np.log(throughput) - \
             reward_weights['latency'] * np.log(latency) - \
             reward_weights['loss_rate'] * np.log(loss_rate)

    return reward.mean()  # Return the mean reward for the batch

class ExperiencePool:
    """
    Experience pool for collecting trajectories.
    """
    def __init__(self):
        self.states = []
        self.actions = []
        self.rewards = []
        self.dones = []

    def add(self, state, action, reward, done):
        self.states.append(np.array(state))  # Store state as np.array
        self.actions.append(np.array(action))  # Store action as np.array
        self.rewards.append(np.array(reward))  # Store reward as np.array
        self.dones.append(np.array(done))  # Store done as np.array

    def __len__(self):
        return len(self.states)

# Define the CCA mapping
cca_mapping = {'Cubic': 0, 'BBR': 1, 'PCC': 2}

# Load your data
data_path = '/content/encoded_file.csv'  # Replace with your actual data file path
df = pd.read_csv(data_path)

# Create an instance of ExperiencePool
exp_pool = ExperiencePool()

# Initialize the global reward variable
global_reward = 0

# Iterate through each row and update the experience pool
for index, row in df.iterrows():
    # Extract state features
    current_rtt = row['Latency']  # RTT
    current_loss = row['LossRate']  # Loss rate
    throughput = row['Throughput']  # Throughput

    # Normalize or preprocess if needed
    state = np.array([current_rtt, current_loss, throughput])

    # Reshape state to match the expected input of EncoderNetwork
    # Assuming seq_len = 1 for a single time step
    state_tensor = torch.tensor(state, dtype=torch.float).unsqueeze(0).unsqueeze(0)  # Shape (1, 1, 3)

    # Action is CCA selection
    action = np.array(row['CCAs'])  # Convert action to np.array

    # Compute reward based on your reward function
    reward_weights = {'throughput': 0.5, 'latency': 0.25, 'loss_rate': 0.25}
    reward = compute_tcp_reward(
        features=(state_tensor[:, :, 0:1], state_tensor[:, :, 1:2], state_tensor[:, :, 2:3]),
        reward_weights=reward_weights
    )

    # Convert reward to np.array
    reward = np.array(reward)

    # Assuming 'done' is an indicator of end of an episode, set it accordingly
    done = np.array(0)  # Placeholder, adjust according to your logic

    # Add to experience pool
    exp_pool.add(state=state, action=action, reward=reward, done=done)

# Save the experience pool
pickle_save_path = 'exp_pool.pkl'
pickle.dump(exp_pool, open(pickle_save_path, 'wb'))
print(f"Done. Experience pool saved at:", pickle_save_path)


In [4]:
import torch
import torch.nn as nn

class EncoderNetwork(nn.Module):
    """
    The encoder network for encoding each piece of information of the state.
    This design is adapted from Pensieve/Genet for customized data.
    """
    def __init__(self, conv_size=4, embed_dim=128):
        super().__init__()
        self.embed_dim = embed_dim

        # Fully connected layers for specific features
        self.fc_rtt = nn.Sequential(
            nn.Linear(1, embed_dim),  # RTT
            nn.LeakyReLU()
        )
        self.fc_loss = nn.Sequential(
            nn.Linear(1, embed_dim),  # Loss rateC
            nn.LeakyReLU()
        )
        
        # Convolutional layers for sequence data
        self.conv_throughput = nn.Sequential(
            nn.Conv1d(in_channels=1, out_channels=embed_dim, kernel_size=conv_size),
            nn.LeakyReLU(),
            nn.MaxPool1d(kernel_size=2),
            nn.Flatten()
        )  # Convolutional layer for throughput
        
        # Fully connected layer to combine features
        self.fc_combined = nn.Sequential(
            nn.Linear(embed_dim * 2 + embed_dim, embed_dim),  # Adjust input size to combined features
            nn.LeakyReLU()
        )

    def forward(self, state):
        """
        Forward pass of the encoder network.
        
        Args:
        - state (torch.Tensor): Input tensor of shape (batch_size, seq_len, num_features).
        
        Returns:
        - torch.Tensor: Encoded features of shape (batch_size, seq_len, embed_dim).
        """
        batch_size, seq_len = state.shape[0], state.shape[1]

        # Separate features
        rtt = state[:, :, 0:1].transpose(1, 2)  # Shape: (batch_size, 1, seq_len)
        loss = state[:, :, 1:2].transpose(1, 2)  # Shape: (batch_size, 1, seq_len)
        throughput = state[:, :, 2:3].transpose(1, 2)  # Shape: (batch_size, 1, seq_len)
        
        # Apply convolutions and fully connected layers
        features_rtt = self.fc_rtt(rtt).reshape(batch_size, seq_len, -1)
        features_loss = self.fc_loss(loss).reshape(batch_size, seq_len, -1)
        features_throughput = self.conv_throughput(throughput).reshape(batch_size, seq_len, -1)
        
        # Concatenate features
        combined_features = torch.cat([features_rtt, features_loss, features_throughput], dim=-1)
        
        # Pass through fully connected layer
        final_features = self.fc_combined(combined_features)
        
        return final_features


In [3]:
class OfflineRLPolicy(nn.Module):
    def __init__(self, state_feature_dim, num_actions):
        super(OfflineRLPolicy, self).__init__()
        self.state_encoder = EncoderNetwork(input_dim_rtt=1, input_dim_loss_rate=1, input_dim_throughput=1)
        self.embed_state1 = nn.Linear(state_feature_dim, 64)
        self.embed_state2 = nn.Linear(state_feature_dim, 64)
        self.embed_time = nn.Linear(state_feature_dim, 64)
        self.fc_policy = nn.Linear(64 * 3, num_actions)

    def forward(self, states):
        # Encode states using EncoderNetwork
        encoded_states = self.state_encoder(states)
        # Embedding states
        state_embedding1 = F.relu(self.embed_state1(encoded_states))
        state_embedding2 = F.relu(self.embed_state2(encoded_states))
        # Assume time embeddings are handled elsewhere
        combined_features = torch.cat([state_embedding1, state_embedding2], dim=-1)
        policy_output = self.fc_policy(combined_features)
        return policy_output


NameError: name 'nn' is not defined

need to check the following ex pool

In [2]:
import numpy as np
import pandas as pd
import pickle
import torch

def compute_tcp_reward(features, reward_weights):
    """
    Compute the reward for TCP flow control based on features extracted by the encoder.

    Args:
    - features (tuple of tensors): Extracted features from the encoder, including RTT, loss rate, and throughputs.
    - reward_weights (dict): Weights for each feature type to scale the reward.

    Returns:
    - np.array: The computed reward value.
    """
    # Extract features
    current_rtt = features[0]
    current_loss = features[1]
    throughputs = features[2]

    # Convert to numpy for reward computation
    current_rtt = current_rtt.cpu().detach().numpy()
    current_loss = current_loss.cpu().detach().numpy()
    throughputs = throughputs.cpu().detach().numpy()

    # Ensure that the values are positive to avoid log(0) or negative values
    throughput = np.clip(throughputs, 1e-6, None)
    latency = np.clip(current_rtt, 1e-6, None)
    loss_rate = np.clip(current_loss, 1e-6, None)

    # Compute the reward
    reward = reward_weights['throughput'] * np.log(throughput) - \
             reward_weights['latency'] * np.log(latency) - \
             reward_weights['loss_rate'] * np.log(loss_rate)

    return reward.mean()  # Return the mean reward for the batch

class ExperiencePool:
    """
    Experience pool for collecting trajectories.
    """
    def __init__(self):
        self.states = []
        self.actions = []
        self.rewards = []
        self.dones = []

    def add(self, state, action, reward, done):
        self.states.append(np.array(state))  # Store state as np.array
        self.actions.append(np.array(action))  # Store action as np.array
        self.rewards.append(np.array(reward))  # Store reward as np.array
        self.dones.append(np.array(done))  # Store done as np.array

    def __len__(self):
        return len(self.states)

# Define the CCA mapping
cca_mapping = {0: 'Cubic', 1: 'BBR', 2: 'PCC'}  # Adjusted mapping for actions

# Load your data
data_path = '/home/sit-research/Desktop/NetLLM-master/tcp-llm_dataset/encoded_file.csv'  # Replace with your actual data file path
df = pd.read_csv(data_path)

# Create an instance of ExperiencePool
exp_pool = ExperiencePool()

# Initialize the global reward variable
global_reward = 0

# Iterate through each row and update the experience pool
for index, row in df.iterrows():
    # Extract state features
    current_rtt = row['Latency']  # RTT
    current_loss = row['LossRate']  # Loss rate
    throughput = row['Throughput']  # Throughput

    # Normalize or preprocess if needed
    state = np.array([current_rtt, current_loss, throughput])

    # Reshape state to match the expected input of EncoderNetwork
    # Assuming seq_len = 1 for a single time step
    state_tensor = torch.tensor(state, dtype=torch.float).unsqueeze(0).unsqueeze(0)  # Shape (1, 1, 3)

    # Action is CCA selection
    action = np.array(row['CCAs'])  # Convert action to np.array

    # Compute reward based on your reward function
    reward_weights = {'throughput': 0.5, 'latency': 0.25, 'loss_rate': 0.25}
    reward = compute_tcp_reward(
        features=(state_tensor[:, :, 0:1], state_tensor[:, :, 1:2], state_tensor[:, :, 2:3]),
        reward_weights=reward_weights
    )

    # Convert reward to np.array
    reward = np.array(reward)

    # Assuming 'done' is an indicator of end of an episode, set it accordingly
    done = np.array(0)  # Placeholder, adjust according to your logic

    # Add to experience pool
    exp_pool.add(state=state, action=action, reward=reward, done=done)

# Save the experience pool
pickle_save_path = 'exp_pool_tllm.pkl'
with open(pickle_save_path, 'wb') as f:
    pickle.dump(exp_pool, f)

print(f"Done. Experience pool saved at:", pickle_save_path)


Done. Experience pool saved at: exp_pool_tllm.pkl


New ex pool