In [2]:
import pandas as pd
import numpy as np
import torch
from sklearn.preprocessing import MinMaxScaler
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt

In [37]:
df = pd.read_csv("preprocessed_dataset.csv", parse_dates=["timestamp"])
df.head()

Unnamed: 0,requests,memory,cpu,timestamp
0,6.0,0.105,0.003,2023-10-01 00:00:00
1,29.0,0.104,0.004,2023-10-01 00:01:00
2,12.0,0.105,0.003,2023-10-01 00:02:00
3,18.0,0.105,0.004,2023-10-01 00:03:00
4,4.0,0.105,0.003,2023-10-01 00:04:00


In [38]:
df = pd.read_csv("preprocessed_dataset.csv", parse_dates=["timestamp"])

# Ensure data is sorted by timestamp
df = df.sort_values(by="timestamp").reset_index(drop=True)

# Select relevant columns
target_col = "requests"
feature_cols = ["memory", "cpu"]

# Normalize features
scaler_x = MinMaxScaler()
scaler_y = MinMaxScaler()

df[feature_cols] = scaler_x.fit_transform(df[feature_cols])
df[target_col] = scaler_y.fit_transform(df[[target_col]])

# Convert DataFrame to NumPy array
data = df[["requests", "memory", "cpu"]].values


In [43]:
data.shape

(1440, 3)

In [39]:
class TimeSeriesDataset(Dataset):
    def __init__(self, data, input_len=12, pred_len=12):  # Increased input_len to avoid lag errors
        self.data = data
        self.input_len = input_len
        self.pred_len = pred_len

    def __len__(self):
        return len(self.data) - self.input_len - self.pred_len

    def __getitem__(self, index):
        past_values = self.data[index : index + self.input_len]
        future_values = self.data[index + self.input_len : index + self.input_len + self.pred_len]
        
        # Create a binary mask (1 for observed values)
        past_observed_mask = np.ones_like(past_values)

        # Extract time features (e.g., normalized time step indices)
        past_time_features = np.expand_dims(np.arange(self.input_len) / self.input_len, axis=1)

        return (
            torch.tensor(past_values, dtype=torch.float32),
            torch.tensor(past_time_features, dtype=torch.float32),
            torch.tensor(past_observed_mask, dtype=torch.float32),
            torch.tensor(future_values, dtype=torch.float32),
        )

In [40]:
# Define sequence lengths
input_len = 24  # Increased to avoid lag-related errors
pred_len = 12   # Forecasting 12 time steps ahead


# Create dataset
dataset = TimeSeriesDataset(data, input_len, pred_len)
# Create DataLoader
train_loader = DataLoader(dataset, batch_size=16, shuffle=True, num_workers=0)

In [41]:
len(dataset)

1404

In [None]:
for batch in train_loader:
    print(f"Batch length: {len(batch)}")  # Should print 4
    print(f"past_values shape: {batch[0].shape}")  # (batch_size, input_len, features)
    print(f"past_time_features shape: {batch[1].shape}")  
    print(f"past_observed_mask shape: {batch[2].shape}")  
    print(f"future_values shape: {batch[3].shape}")  
    break  # Only print once


Batch length: 4
past_values shape: torch.Size([16, 112, 3])
past_time_features shape: torch.Size([16, 112, 1])
past_observed_mask shape: torch.Size([16, 112, 3])
future_values shape: torch.Size([16, 12, 3])


In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
from transformers import AutoformerConfig, AutoformerModel

# Define model configuration
config = AutoformerConfig(
    prediction_length=pred_len,
    context_length=input_len,  
    input_size=len(feature_cols),  
    lags_sequence=[1, 2, 3, 4],  # Manually define lags (must be <= context_length)
)

# Initialize model
model = AutoformerModel(config).to(device)


In [None]:
model.parameters()

<generator object Module.parameters at 0x788f9c257ae0>

In [None]:
import torch.optim as optim

criterion = torch.nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


In [None]:
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    total_loss = 0

    for past_values, past_time_features, past_observed_mask, future_values in train_loader:
        past_time_features, past_observed_mask, past_values = (
            past_time_features.to(device),
            past_observed_mask.to(device),
            past_values.to(device),
        )

        # print(past_time_features.shape)
        print("mask: ", past_observed_mask.shape)
        optimizer.zero_grad()
        
        # Forward pass (passing correct inputs)
        outputs = model(
            past_time_features=past_time_features,
            past_observed_mask=past_observed_mask,
            past_values=past_values
        ).last_hidden_state  # Extract predictions

        loss = criterion(outputs, y)
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
    
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {total_loss / len(train_loader):.4f}")


mask:  torch.Size([16, 112, 3])


ValueError: lags cannot go further than history length, found lag 4 while history length is only 112

### Demo

In [32]:
import pandas as pd
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import MinMaxScaler
from transformers import AutoformerConfig, AutoformerForPrediction, Trainer, TrainingArguments

In [33]:
df = pd.read_csv("preprocessed_dataset.csv")

df['timestamp'] = pd.to_datetime(df['timestamp'])

input_features = ['memory', 'cpu']
target_feature = 'requests'

In [34]:
scaler = MinMaxScaler()
df[input_features + [target_feature]] = scaler.fit_transform(df[input_features + [target_feature]])

input_length = 48  # Past 48 time steps for input
prediction_length = 24 

In [None]:
# Create dataset class
class TimeSeriesDataset(Dataset):
    def __init__(self, df, input_length, prediction_length):
        self.data = df
        self.input_length = input_length
        self.prediction_length = prediction_length
        
    def __len__(self):
        return len(self.data) - self.input_length - self.prediction_length + 1
    
    def __getitem__(self, idx):
        past_values = self.data.loc[idx : idx + self.input_length, target_feature].values
        future_values = self.data.loc[idx + self.input_length : idx + self.input_length + self.prediction_length, target_feature].values
        past_time_features = self.data.loc[idx : idx + self.input_length, input_features].values.reshape(self.input_length, -1)
        future_time_features = self.data.loc[idx + self.input_length : idx + self.input_length + self.prediction_length, input_features].values.reshape(self.prediction_length, -1)
        past_observed_mask = np.ones_like(past_values)  # Assume all values are observed
        future_observed_mask = np.ones_like(future_values)  # Ensure correct shape
        static_real_features = np.zeros((1, 1))  # Ensure shape is (1,)
        static_categorical_features = np.zeros((1, 1))  # Added static categorical features

        return {
            "past_values": torch.tensor(past_values, dtype=torch.float32).unsqueeze(-1),  # Shape (61, 1)
            "future_values": torch.tensor(future_values, dtype=torch.float32).unsqueeze(-1),  # Shape (24, 1)
            "past_time_features": self.data.loc[idx : idx + self.input_length, input_features].values.reshape(-1, 124),  # Example adjustment
            "future_time_features": torch.tensor(future_time_features, dtype=torch.float32).unsqueeze(1),  # Shape (24, 2)
            "past_observed_mask": torch.tensor(past_observed_mask, dtype=torch.float32).unsqueeze(-1),  # Shape (61, 1)
            "future_observed_mask": torch.tensor(future_observed_mask, dtype=torch.float32).unsqueeze(-1),  # Shape (24, 1)
            "static_real_features": torch.tensor(static_real_features, dtype=torch.float32).expand(1, 1),  # Shape (1, 1)
            "static_categorical_features": torch.tensor(static_categorical_features, dtype=torch.float32).expand(1, 1),  # Shape (1, 1)
        }

In [27]:
# Create dataset and dataloader
dataset = TimeSeriesDataset(df, input_length, prediction_length)
train_loader = DataLoader(dataset, batch_size=64, shuffle=True)


In [28]:
sample = dataset[0]  # Fetch first sample
for key, value in sorted(sample.items()):
    print(f"{key}  {value.shape}")


ValueError: cannot reshape array of size 124 into shape (61,newaxis)

In [29]:
# Load Autoformer model
config = AutoformerConfig(
    context_length=input_length,
    prediction_length=prediction_length,
    input_size=len(input_features),
    target_size=1
)
model = AutoformerForPrediction(config)

In [30]:
# Define training arguments
training_args = TrainingArguments(
    output_dir="./results",
    num_train_epochs=10,
    per_device_train_batch_size=64,
    logging_dir="./logs",
    logging_steps=10,
    save_total_limit=2,
)

# Trainer setup
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset,
)

In [31]:
trainer.train()


ValueError: cannot reshape array of size 124 into shape (61,newaxis)

### Try 4

In [2]:
from transformers import AutoformerConfig, AutoformerForPrediction

config = AutoformerConfig.from_pretrained("kashif/autoformer-traffic-hourly")
model = AutoformerForPrediction.from_pretrained("kashif/autoformer-traffic-hourly")


config.json:   0%|          | 0.00/1.41k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/116k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/92.7k [00:00<?, ?B/s]

In [16]:
from transformers import PretrainedConfig



def create_backtest_dataloader(
    config: PretrainedConfig,
    freq,
    data,
    batch_size: int,
    **kwargs,
):
    PREDICTION_INPUT_NAMES = [
        "past_time_features",
        "past_values",
        "past_observed_mask",
        "future_time_features",
    ]
    if config.num_static_categorical_features > 0:
        PREDICTION_INPUT_NAMES.append("static_categorical_features")

    if config.num_static_real_features > 0:
        PREDICTION_INPUT_NAMES.append("static_real_features")

    transformation = create_transformation(freq, config)
    transformed_data = transformation.apply(data)

    # we create a Validation Instance splitter which will sample the very last
    # context window seen during training only for the encoder.
    instance_sampler = create_instance_splitter(config, "validation")

    # we apply the transformations in train mode
    testing_instances = instance_sampler.apply(transformed_data, is_train=True)

    return as_stacked_batches(
        testing_instances,
        batch_size=batch_size,
        output_type=torch.tensor,
        field_names=PREDICTION_INPUT_NAMES,
    )

def create_test_dataloader(
    config: PretrainedConfig,
    freq,
    data,
    batch_size: int,
    **kwargs,
):
    PREDICTION_INPUT_NAMES = [
        "past_time_features",
        "past_values",
        "past_observed_mask",
        "future_time_features",
    ]
    if config.num_static_categorical_features > 0:
        PREDICTION_INPUT_NAMES.append("static_categorical_features")

    if config.num_static_real_features > 0:
        PREDICTION_INPUT_NAMES.append("static_real_features")

    transformation = create_transformation(freq, config)
    transformed_data = transformation.apply(data, is_train=False)

    # We create a test Instance splitter to sample the very last
    # context window from the dataset provided.
    instance_sampler = create_instance_splitter(config, "test")

    # We apply the transformations in test mode
    testing_instances = instance_sampler.apply(transformed_data, is_train=False)
    
    return as_stacked_batches(
        testing_instances,
        batch_size=batch_size,
        output_type=torch.tensor,
        field_names=PREDICTION_INPUT_NAMES,
    )

In [13]:
config

AutoformerConfig {
  "activation_dropout": 0.1,
  "activation_function": "gelu",
  "architectures": [
    "AutoformerForPrediction"
  ],
  "attention_dropout": 0.1,
  "autocorrelation_factor": 3,
  "cardinality": [
    0
  ],
  "context_length": 48,
  "d_model": 16,
  "decoder_attention_heads": 2,
  "decoder_ffn_dim": 32,
  "decoder_layerdrop": 0.1,
  "decoder_layers": 2,
  "distribution_output": "student_t",
  "dropout": 0.1,
  "embedding_dimension": [
    0
  ],
  "encoder_attention_heads": 2,
  "encoder_ffn_dim": 32,
  "encoder_layerdrop": 0.1,
  "encoder_layers": 2,
  "feature_size": 47,
  "init_std": 0.02,
  "input_size": 1,
  "is_encoder_decoder": true,
  "label_length": 10,
  "lags_sequence": [
    1,
    2,
    3,
    4,
    5,
    6,
    7,
    23,
    24,
    25,
    47,
    48,
    49,
    71,
    72,
    73,
    95,
    96,
    97,
    119,
    120,
    121,
    143,
    144,
    145,
    167,
    168,
    169,
    335,
    336,
    337,
    503,
    504,
    505,
    671,


In [14]:
test_dataloader = create_backtest_dataloader(
    config=config,
    freq=freq,
    data=test_dataset,
    batch_size=64,
)

NameError: name 'create_backtest_dataloader' is not defined

### Try 5

In [60]:
from transformers import AutoformerConfig, AutoformerModel

# Initializing a default Autoformer configuration

config = AutoformerConfig(prediction_length=24, input_size=2)

model = AutoformerModel(config)
model.parameters

<bound method Module.parameters of AutoformerModel(
  (scaler): AutoformerMeanScaler()
  (encoder): AutoformerEncoder(
    (value_embedding): AutoformerValueEmbedding(
      (value_projection): Linear(in_features=18, out_features=64, bias=False)
    )
    (embed_positions): AutoformerSinusoidalPositionalEmbedding(48, 64)
    (layers): ModuleList(
      (0-1): 2 x AutoformerEncoderLayer(
        (self_attn): AutoformerAttention(
          (k_proj): Linear(in_features=64, out_features=64, bias=True)
          (v_proj): Linear(in_features=64, out_features=64, bias=True)
          (q_proj): Linear(in_features=64, out_features=64, bias=True)
          (out_proj): Linear(in_features=64, out_features=64, bias=True)
        )
        (self_attn_layer_norm): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
        (activation_fn): GELUActivation()
        (fc1): Linear(in_features=64, out_features=32, bias=True)
        (fc2): Linear(in_features=32, out_features=64, bias=True)
        (fina

In [13]:
df = pd.read_csv("preprocessed_dataset.csv")

In [21]:
df['datetime'] = pd.to_datetime(df['timestamp'])
df['requests'] = df['requests'].fillna(0)
df['time_idx'] = df['datetime'].astype('int64') // 10**9 

In [22]:
df.head()

Unnamed: 0,requests,memory,cpu,timestamp,datetime,time_idx
0,6.0,0.105,0.003,2023-10-01 00:00:00,2023-10-01 00:00:00,1696118400
1,29.0,0.104,0.004,2023-10-01 00:01:00,2023-10-01 00:01:00,1696118460
2,12.0,0.105,0.003,2023-10-01 00:02:00,2023-10-01 00:02:00,1696118520
3,18.0,0.105,0.004,2023-10-01 00:03:00,2023-10-01 00:03:00,1696118580
4,4.0,0.105,0.003,2023-10-01 00:04:00,2023-10-01 00:04:00,1696118640


In [23]:
from pytorch_forecasting import TimeSeriesDataSet
dataset = TimeSeriesDataSet(data=df, time_idx='time_idx', target='requests', group_ids=['memory', 'cpu'])

KeyError: "Unknown category 'nan' encountered. Set `add_nan=True` to allow unknown categories"

In [11]:
import torch
from transformers import AutoformerConfig, AutoformerModel
from torch.utils.data import DataLoader
import torch.optim as optim
import torch.nn as nn
from torch.utils.data import Dataset
from pytorch_forecasting import TimeSeriesDataSet


# Define the model configuration
config = AutoformerConfig(
    prediction_length=24,  # Set your prediction length
    input_features=2
)

# Initialize the model with the configuration
model = AutoformerModel(config)

# Set up the dataset and dataloader
dataset = TimeSeriesDataSet(data=df, time_idx='timestamp', target='requests', group_ids=['memory', 'cpu'])


dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

# Define the loss function and optimizer
criterion = nn.MSELoss()  # Mean Squared Error Loss for regression
optimizer = optim.Adam(model.parameters(), lr=1e-3)

# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for batch in dataloader:
        optimizer.zero_grad()  # Clear the previous gradients
        
        # Get the inputs and targets from the batch
        past_values = batch["past_values"].squeeze(-1)  # Remove extra dimensions
        future_values = batch["future_values"].squeeze(-1)
        
        # Forward pass
        outputs = model(past_values)  # Assuming the model takes past values and outputs predictions
        
        # Calculate the loss
        loss = criterion(outputs, future_values)
        loss.backward()  # Backpropagate the loss
        
        optimizer.step()  # Update the model parameters
        
        running_loss += loss.item()

    # Print the average loss for this epoch
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(dataloader)}")

# Save the model
torch.save(model.state_dict(), 'autoformer_model.pth')


AssertionError: Timeseries index should be of type integer