In [1]:
from __future__ import annotations

import math

import torch
import torch.nn as nn
from torch import Tensor, BoolTensor
from torch.nn import functional as F

In [13]:
from model.transformer import TransformerBlock, CausalAttention

In [5]:
from model.time2vec import SineActivation, CosineActivation

VALID_T2V_ACTIVATION = ["sin", "cos"]

In [9]:
import torch

# Suppose we have a tensor of shape (B, S, C)
B, S, C = 4, 5, 1  # Example dimensions
tensor = torch.randn(B, S, C)  # Create a random tensor

# Extract the slice [:, :, 0] and retain the singleton dimension
slice_tensor = tensor[:, :, 1:]

# Check the shape of the resulting tensor
print("Original shape:", tensor.shape)  # (B, S, C)
print("Slice shape:", slice_tensor.shape)  # (B, S, 1)

Original shape: torch.Size([4, 5, 1])
Slice shape: torch.Size([4, 5, 0])


In [146]:
class QCCT(nn.Module):
    """QUIC Congestion Control Transformer."""

    def __init__(
        self,
        n_features: int,
        hidden_size: int,
        n_heads: int,
        n_layers: int,
        expand_size: int,
        context_size: int,
        t2v_act: str = "sin",
        act: nn.Module = nn.GELU,
        attention: nn.Module = CausalAttention,
        drop: float = 0.1,
        bias: bool = True,
    ):
        super().__init__()

        # 1. Features:
        # 1.1 timestamp
        if t2v_act == "sin":
            self.t2v = SineActivation(1, hidden_size)
        elif t2v_act == "cos":
            self.t2v = CosineActivation(1, hidden_size)
        else:
            raise Exception(f"Unsupported activation:{t2v_act} for time2vec")
        # 1.2 other features
        self.o2v = nn.ModuleList(
            [
                nn.Linear(n_features - 1, expand_size, bias=bias),
                act(),
                nn.Linear(expand_size, hidden_size, bias=bias),
                nn.Dropout(drop),
            ]
        )
        # 1.3 feature dropout
        self.f_drop = nn.Dropout(drop)

        # 2. transformer blocks
        # initialize num_layers of transformer layers
        self.tfm_blocks = nn.ModuleList(
            [
                TransformerBlock(
                    hidden_size=hidden_size,
                    num_heads=n_heads,
                    context_size=context_size,
                    expand_size=expand_size,
                    attention=attention,
                    act=act,
                    bias=bias,
                    attn_drop=drop,
                    out_drop=drop,
                    ffn_drop=drop,
                )
                for _ in range(n_layers)
            ]
        )

        # 3. output
        self.final = nn.Linear(context_size * hidden_size, 1, bias=bias)

        # 4. init parameters
        self.apply(self._init_weights)

    def forward(self, x: Tensor):
        # [Input]: (B, S, C)
        # B: batch_size, S: n_events, C: n_features
        B, S, C = x.shape
        # Step 1: (B, S, C) -> (B, S, D)
        # B: batch_size, S: n_events, D: hidden_size

        # Step 1.1: timestamp
        # (B, S, 1)
        timestamp = x[:, :, 0].unsqueeze(-1)
        # (B, S, D)
        f_ts = self.t2v(timestamp)

        # Step 1.2: other features
        # (B, S, C-1)
        f_others = x[:, :, 1:]
        # (B, S, D)
        for layer in self.o2v:
            f_others = layer(f_others)

        # Step 1.3: Addition
        f_all = self.f_drop(f_ts + f_others)
        B, S, D = f_all.shape

        # Step 2: transformer blocks
        for block in self.tfm_blocks:
            f_all = block(f_all)

        # (B, S, D) -> (B, S * D)
        flattened = f_all.view(B, S * D)

        # Step 3: next congestion control window
        return self.final(flattened)

    def _init_weights(self, module):
        if isinstance(module, nn.Linear):
            if module._get_name() == "fc2":
                # GPT-2 style FFN init
                torch.nn.init.normal_(
                    module.weight, mean=0.0, std=0.02 / math.sqrt(2 * self.num_layers)
                )
            else:
                torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)
            if module.bias is not None:
                torch.nn.init.zeros_(module.bias)

In [124]:
n_features = 8
hidden_size = 64
n_heads = 4
n_layers = 4
expand_size = 128
context_size = 32

In [125]:
model = QCCT(
    n_features=n_features,
    hidden_size=hidden_size,
    n_heads=n_heads,
    n_layers=n_layers,
    expand_size=expand_size,
    context_size=context_size,
)
model

QCCT(
  (t2v): SineActivation()
  (o2v): ModuleList(
    (0): Linear(in_features=7, out_features=128, bias=True)
    (1): GELU(approximate='none')
    (2): Linear(in_features=128, out_features=64, bias=True)
    (3): Dropout(p=0.1, inplace=False)
  )
  (f_drop): Dropout(p=0.1, inplace=False)
  (tfm_blocks): ModuleList(
    (0-3): 4 x TransformerBlock(
      (norm1): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
      (attn): CausalAttention(
        (Wqkv): Linear(in_features=64, out_features=192, bias=True)
        (attn_drop): Dropout(p=0.1, inplace=False)
        (Wo): Linear(in_features=64, out_features=64, bias=True)
        (out_drop): Dropout(p=0.1, inplace=False)
      )
      (norm2): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
      (ffn): FeedForward(
        (fc1): Linear(in_features=64, out_features=128, bias=True)
        (act): GELU(approximate='none')
        (fc2): Linear(in_features=128, out_features=64, bias=True)
        (drop): Dropout(p=0.1, inplac

In [127]:
B, S, C = 1, context_size, 8  # Example dimensions
tensor = torch.randn(B, S, C)  # Create a random tensor
tensor.shape

torch.Size([1, 32, 8])

In [129]:
model(tensor)

1 32 64


tensor([[-0.4377]], grad_fn=<AddmmBackward0>)

In [130]:
test_window = df[-2:].values

In [131]:
test_window[-1, -1] = 0
test_window

array([[28040354,        0,       53,       53,        1,        0,
               0,        0],
       [28040695,        0,       53,        0,        1,        0,
               0,        0]])

In [149]:
import torch
from torch.utils.data import Dataset
import pandas as pd
import numpy as np


class SlidingWindowDataset(Dataset):
    def __init__(self, df, window_size, label_column):
        self.df = df
        self.window_size = window_size
        self.label_column = label_column

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        # Determine the actual window size based on the index
        actual_window_size = min(idx + 1, self.window_size)

        # Slice the DataFrame to get the window
        start_idx = idx - actual_window_size + 1
        window = self.df.iloc[start_idx : idx + 1]

        # The label is taken from the last row of the window for the label_column
        label = window.iloc[-1][self.label_column]

        # Extract the features (all values except the last row's label_column, i.e., the label)
        features = window.values
        features[-1, -1] = 0

        # Pad the features with zeros if necessary
        if actual_window_size < self.window_size:
            padding = np.zeros(
                (self.window_size - actual_window_size, features.shape[1])
            )
            features = np.vstack((padding, features))

        return torch.tensor(features, dtype=torch.float32), torch.tensor(
            label, dtype=torch.float32
        ).unsqueeze(-1)

In [150]:
import os
from pathlib import Path

In [134]:
reports_dir_tmpl = "reports_seed_{}"
report_dir_tmpl = "delay_{}_drop_{}"

In [135]:
cwd = os.getcwd()

In [136]:
# Define the path to the reports directory
reports_dir = Path(cwd) / reports_dir_tmpl.format(42)
reports_dir

PosixPath('/Users/luzhaoyan/workspace/github/s2n-quic/quic/s2n-quic-sim/reports_seed_42')

In [137]:
# Define lists for delay and drop_rate values
delays = ["5ms", "50ms", "100ms", "200ms", "500ms"]
drop_rates = [0.01, 0.05, 0.1, 0.2, 0.3]

In [138]:
report_dir = reports_dir / report_dir_tmpl.format(delays[3], drop_rates[1])
report_dir

PosixPath('/Users/luzhaoyan/workspace/github/s2n-quic/quic/s2n-quic-sim/reports_seed_42/delay_200ms_drop_0.05')

In [139]:
dataset_path = report_dir / "formatted.csv"

In [140]:
df = pd.read_csv(dataset_path)

In [141]:
df

Unnamed: 0,timestamp,lost_bytes,bytes_acknowledged,bytes_in_filght,event_on_ack,event_on_packet_lost,event_on_packet_sent,congestion_window
0,200,0,0,191,0,0,1,12000
1,200,0,0,1200,0,0,1,12000
2,600,0,191,1009,1,0,0,12000
3,600,0,1009,0,1,0,0,12000
4,600,0,0,1472,0,0,1,12000
...,...,...,...,...,...,...,...,...
336353,28039944,0,53,106,1,0,0,10066
336354,28040147,0,53,53,1,0,0,10066
336355,28040295,0,0,106,0,0,1,10066
336356,28040354,0,53,53,1,0,0,10066


In [159]:
# Parameters
n_features = 8
hidden_size = 64
n_heads = 4
n_layers = 4
expand_size = 128
context_size = 32
window_size = context_size
batch_size = 128
label_column = "congestion_window"
num_epochs = 1

In [160]:
# Create the dataset and data loader
dataset = SlidingWindowDataset(df, window_size, label_column)
data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

In [161]:
import torch
import torch.optim as optim

# Assuming you have a model defined as 'model'
model = QCCT(
    n_features=n_features,
    hidden_size=hidden_size,
    n_heads=n_heads,
    n_layers=n_layers,
    expand_size=expand_size,
    context_size=context_size,
)

# Check if a GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Move model to GPU
model.to(device)

# Define the criterion (loss function)
criterion = nn.MSELoss()

# Define the optimizer (e.g., Adam)
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
from tqdm import tqdm  # Import tqdm for progress visualization

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for features, label in tqdm(
        data_loader, desc=f"Epoch {epoch+1}/{num_epochs}", unit="batch"
    ):
        features, label = features.to(device), label.to(device)
        # print(features.shape, label.shape)

        optimizer.zero_grad()
        outputs = model(features)
        # print(outputs.shape)
        loss = criterion(outputs, label)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    avg_loss = running_loss / len(data_loader)
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.4f}")

Epoch 1/1: 100%|████████████████████████████████████████████████▊| 2615/2628 [05:38<00:01,  8.37batch/s]

In [None]:
model.eval()
example = torch.rand(1, context_size, 8)
traced_script_module = torch.jit.trace(model, example)
traced_script_module.save("model.pt")