In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
from torch.optim import Adam
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
from numpy import genfromtxt
from sklearn.preprocessing import LabelEncoder
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader

In [9]:
# Torch device configuration
device = (
    "cuda" if torch.cuda.is_available() else "mps"
    if torch.backends.mps.is_available() else "cpu"
)
print(f"Using {device} device")
if torch.cuda.is_available():
    print(f"Current GPU device: {torch.cuda.get_device_name(device)}")

Using cuda device
Current GPU device: NVIDIA GeForce RTX 3080


In [2]:
import torch
from sklearn.metrics import accuracy_score
import numpy as np
import time
import psutil
from pathlib import Path

def compute_metrics_base(model, x_test, y_test, model_path):
    """
    Compute the accuracy of the PyTorch model.

    :param model: PyTorch model.
    :param x_test: Test dataset features (as a PyTorch Tensor).
    :param y_test: Test dataset labels (as a NumPy array).
    :param model_dir: Directory where the PyTorch model files are stored.
    :return: None
    """

    model.eval()
    with torch.no_grad():
        # Get the model's predictions
        outputs = model(x_test)
        _, predicted_labels = torch.max(outputs, 1)

        # Convert y_test to tensor if it's not already
        true_labels = torch.tensor(y_test) if not isinstance(y_test, torch.Tensor) else y_test
        true_labels = true_labels.squeeze()  # Remove unnecessary dimensions

    model_file = Path(model_path)

    # Size in bytes
    model_size_bytes = model_file.stat().st_size

    # Convert size to kilobytes (optional)
    model_size_kb = model_size_bytes / 1024
    print(f"Size of the model: {model_size_kb:.2f} KB")

    # Compute accuracy
    accuracy = accuracy_score(true_labels.numpy(), predicted_labels.numpy())
    print(f'Accuracy on the test set: {accuracy:.2%}')
    
def measure_cpu_utilization_and_run(func, *args, **kwargs):
    """
    Measure CPU utilization while running a function.

    Parameters:
        func (function): The function to be executed.
        *args: Arguments to be passed to func.
        **kwargs: Keyword arguments to be passed to func.

    Returns:
        float: CPU utilization percentage during the execution of func.
        float: The elapsed time during the execution of func.
        any: The result of func execution.
    """
    
    # Measure CPU utilization before execution
    cpu_percent_before = psutil.cpu_percent(interval=None)

    # Record the start time
    start_time = time.time()

    # Execute the function and store its result
    result = func(*args, **kwargs)

    # Record the end time
    end_time = time.time()

    # Measure CPU utilization after execution
    cpu_percent_after = psutil.cpu_percent(interval=None)

    # Calculate elapsed time and average CPU utilization
    elapsed_time = end_time - start_time
    average_cpu_utilization = (cpu_percent_before + cpu_percent_after) / 2

    return average_cpu_utilization, elapsed_time, result



In [3]:
# Load Data
x = genfromtxt('../../Data/WISDM_x.csv', delimiter=',')
y_df = pd.read_csv('../../Data/WISDM_y.csv')
y = y_df.values.flatten()  # Flatten if y is 2D

# Encode labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Function to create time series dataset
def create_series(x, y, timestep, overlap):
    slide_step = int(timestep * (1 - overlap))
    data_num = int((len(x) / slide_step) - 1)
    dataset = np.ndarray(shape=(data_num, timestep, x.shape[1]))
    labels = []

    for i in range(data_num):
        labels.append(y[slide_step * (i + 1) - 1])
        for j in range(timestep):
            dataset[i, j, :] = x[slide_step * i + j, :]

    return dataset, np.array(labels)

# Create time series
timestep = 16  # Replace with your value
overlap = 0.5  # Replace with your value
X_series, y_series = create_series(x, y_encoded, timestep, overlap)

In [4]:
# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X_series, y_series, test_size=0.2, random_state=42)
print(f'X_train shape:{X_train.shape}, X_test shape:{X_test.shape}, y_train shape:{y_train.shape}, y_test shape:{y_test.shape}')

X_train shape:(104856, 16, 3), X_test shape:(26214, 16, 3), y_train shape:(104856,), y_test shape:(26214,)


In [23]:
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.int64)  # Assuming y_train is already encoded as class indexes
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.int64)

In [33]:
# Training setup
training_percentage = 0.9  # Set the desired percentage of training samples

# Calculate the number of training samples
n_total_samples = X_train_tensor.shape[0]
n_training_data = int(training_percentage * n_total_samples)

# Create a subset of the training data
subset_indices = torch.randperm(n_total_samples)[:n_training_data]
x_train_subset = X_train_tensor[subset_indices]
y_train_subset = y_train_tensor[subset_indices]

# Create a DataLoader for the subset
batch_size = n_training_data
train_dataset = TensorDataset(x_train_subset, y_train_subset)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

# Create a test Dataloader
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=True)

In [47]:
# Define the Transformer model
import torch
import torch.nn as nn
import torch.nn.functional as F

class TransformerEncoderBlock(nn.Module):
    def __init__(self, input_dim, head_size, n_heads, ff_dim, dropout=0.0):
        super(TransformerEncoderBlock, self).__init__()
        self.norm1 = nn.LayerNorm(input_dim)
        self.attention = nn.MultiheadAttention(embed_dim=input_dim, num_heads=n_heads, dropout=dropout)
        self.dropout1 = nn.Dropout(dropout)
        self.norm2 = nn.LayerNorm(input_dim)
        self.conv1 = nn.Conv1d(in_channels=input_dim, out_channels=ff_dim, kernel_size=1)
        self.conv2 = nn.Conv1d(in_channels=ff_dim, out_channels=input_dim, kernel_size=1)
        self.dropout2 = nn.Dropout(dropout)

    def forward(self, src):
        # LayerNorm and Multi-head Attention
        x = self.norm1(src)
        x, _ = self.attention(x, x, x)
        x = self.dropout1(x)
        x = x + src  # skip connection

        # Feed Forward
        x = self.norm2(x)
        x = x.permute(1, 2, 0)  # Conv1D expects (batch_size, channels, length)
        x = F.relu(self.conv1(x))
        x = self.dropout2(x)
        x = self.conv2(x)
        x = x.permute(2, 0, 1)  # back to (length, batch_size, channels)
        x = x + src  # skip connection
        return x

class TimeSeriesTransformer(nn.Module):
    def __init__(self, sequence_length, num_features, head_size, n_heads, ff_dim, n_trans_blocks, mlp_units, drop=0.0, mlp_drop=0.0):
        super(TimeSeriesTransformer, self).__init__()
        self.encoders = nn.ModuleList([TransformerEncoderBlock(num_features, head_size, n_heads, ff_dim, drop) for _ in range(n_trans_blocks)])
        self.global_avg_pooling = nn.AdaptiveAvgPool1d(1)
        mlp_layers = []
        current_dim = num_features
        for dim in mlp_units:
            mlp_layers.append(nn.Linear(current_dim, dim))
            mlp_layers.append(nn.ReLU())
            mlp_layers.append(nn.Dropout(mlp_drop))
            current_dim = dim  # Set input dim for the next layer
        self.mlp = nn.Sequential(*mlp_layers)
        self.final_layer = nn.Linear(mlp_units[-1], 6)

    def forward(self, src):
        src = src.permute(1, 0, 2)  # Transformer expects (seq_len, batch_size, features)
        for encoder in self.encoders:
            src = encoder(src)

        # Global average pooling
        src = src.permute(1, 2, 0)  # pooling expects (batch_size, channels, length)
        src = self.global_avg_pooling(src)
        src = torch.flatten(src, 1)  # Flatten the output for the MLP

        # MLP
        src = self.mlp(src)
        output = self.final_layer(src)
        return output

# Input parameters for your data
sequence_length = 16  # The length of the time series sequences in your data
num_features = 3     # The number of features in each time step of your data sequence

# Instantiate the model
# Instantiate the model with an adjusted number of heads and head size
# The head size must be a multiple of num_features.
model = TimeSeriesTransformer(
    sequence_length=16, 
    num_features=3, 
    head_size=3,  # Each head will now have an embed size of 1 (3 / 3)
    n_heads=1,  # Only one head since our embed_dim is 3
    ff_dim=64, 
    n_trans_blocks=4, 
    mlp_units=[128, 64], 
    drop=0.1, 
    mlp_drop=0.1
)

model.to(device)

TimeSeriesTransformer(
  (encoders): ModuleList(
    (0-3): 4 x TransformerEncoderBlock(
      (norm1): LayerNorm((3,), eps=1e-05, elementwise_affine=True)
      (attention): MultiheadAttention(
        (out_proj): NonDynamicallyQuantizableLinear(in_features=3, out_features=3, bias=True)
      )
      (dropout1): Dropout(p=0.1, inplace=False)
      (norm2): LayerNorm((3,), eps=1e-05, elementwise_affine=True)
      (conv1): Conv1d(3, 64, kernel_size=(1,), stride=(1,))
      (conv2): Conv1d(64, 3, kernel_size=(1,), stride=(1,))
      (dropout2): Dropout(p=0.1, inplace=False)
    )
  )
  (global_avg_pooling): AdaptiveAvgPool1d(output_size=1)
  (mlp): Sequential(
    (0): Linear(in_features=3, out_features=128, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.1, inplace=False)
    (3): Linear(in_features=128, out_features=64, bias=True)
    (4): ReLU()
    (5): Dropout(p=0.1, inplace=False)
  )
  (final_layer): Linear(in_features=64, out_features=6, bias=True)
)

In [37]:
import torch
from torchmin import Minimizer
from torch.nn import CrossEntropyLoss

# Define the optimizer and loss function
optimizer = Minimizer(model.parameters(),
                      method='cg',
                      tol=1e-3,
                      max_iter=2,
                      disp=0)

loss_fn = CrossEntropyLoss()

num_fine_tune_epochs = 100
best_val_accuracy = 0.0  # Variable to track the best validation accuracy
best_model_state = None  # Variable to store the state of the best model

for epoch in range(num_fine_tune_epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for X_batch, y_batch in train_loader:
        # Move inputs and labels to the selected device
        inputs, labels = X_batch.to(device), y_batch.to(device)

        def closure():
            optimizer.zero_grad()
            output = model(inputs)
            loss = loss_fn(output, labels)
            # loss.backward()  <-- do not call backward!
            return loss

        loss = optimizer.step(closure)
        running_loss += loss.item()

    for X_batch, y_batch in test_loader:
        # Move inputs and labels to the selected device
        inputs, labels = X_batch.to(device), y_batch.to(device)
        output = model(inputs)
        _, predicted = torch.max(output.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    avg_loss = running_loss / len(train_loader)
    accuracy = 100 * correct / total
    print(f"Epoch [{epoch+1}/{num_fine_tune_epochs}], Train Loss: {avg_loss:.4f}, Val Accuracy: {accuracy:.2f}%")

    # Save the model if the current validation accuracy is better than the best
    if accuracy > best_val_accuracy:
        best_val_accuracy = accuracy
        best_model_state = model.state_dict()

print('Finished Training')


Epoch [1/100], Train Loss: 1.4931, Val Accuracy: 42.89%
Epoch [2/100], Train Loss: 1.3841, Val Accuracy: 47.83%
Epoch [3/100], Train Loss: 1.3353, Val Accuracy: 50.66%
Epoch [4/100], Train Loss: 1.3010, Val Accuracy: 50.86%
Epoch [5/100], Train Loss: 1.2863, Val Accuracy: 51.16%
Epoch [6/100], Train Loss: 1.2778, Val Accuracy: 51.95%
Epoch [7/100], Train Loss: 1.2692, Val Accuracy: 51.79%
Epoch [8/100], Train Loss: 1.2625, Val Accuracy: 52.25%
Epoch [9/100], Train Loss: 1.2583, Val Accuracy: 52.38%
Epoch [10/100], Train Loss: 1.2525, Val Accuracy: 52.38%
Epoch [11/100], Train Loss: 1.2005, Val Accuracy: 55.73%
Epoch [12/100], Train Loss: 1.1871, Val Accuracy: 56.29%
Epoch [13/100], Train Loss: 1.1796, Val Accuracy: 56.61%
Epoch [14/100], Train Loss: 1.1720, Val Accuracy: 56.51%
Epoch [15/100], Train Loss: 1.1661, Val Accuracy: 57.08%
Epoch [16/100], Train Loss: 1.1608, Val Accuracy: 56.82%
Epoch [17/100], Train Loss: 1.1565, Val Accuracy: 57.34%
Epoch [18/100], Train Loss: 1.1522, Val 

In [38]:
# Save the best model
model_path = 'models/transformer_base.pt'
torch.save(best_model_state, model_path)

In [50]:
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
import numpy as np
import os

model_path = 'models/transformer_base.pt'

# Testing loop
model.eval()  # Set the model to evaluation mode

model.load_state_dict(best_model_state)

all_predictions = []
all_targets = []

with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        
        # Collect all predictions and labels to compute overall metrics
        all_predictions.extend(predicted.cpu().numpy())
        all_targets.extend(labels.cpu().numpy())

# Convert collected predictions and labels to arrays
all_predictions = np.array(all_predictions)
all_targets = np.array(all_targets)

# Calculate metrics
accuracy = accuracy_score(all_targets, all_predictions)
precision, recall, f1_score, support = precision_recall_fscore_support(all_targets, all_predictions, average='weighted')

# Print metrics
print(f'Accuracy: {accuracy:.4f}')
print(f'Precision: {precision:.4f}')
print(f'Recall: {recall:.4f}')
print(f'F1 Score: {f1_score:.4f}')

# Get the size of the saved model file
model_size = os.path.getsize(model_path)
print(f"The model size is {model_size/1024:.2f} KB")


Accuracy: 0.7281
Precision: 0.6814
Recall: 0.7281
F1 Score: 0.6699
The model size is 61.94 KB


In [52]:
model.to('cpu')

cpu_usage, inference_time, _ = measure_cpu_utilization_and_run(compute_metrics_base, model, X_test_tensor, y_test_tensor, model_path)

print(f'CPU usage during inference: {cpu_usage:.2f}%')
print(f'Inference time: {inference_time:.4f} seconds')


Size of the model: 61.94 KB
Accuracy on the test set: 72.81%
CPU usage during inference: 6.40%
Inference time: 0.4594 seconds


In [53]:
import torch.quantization
# torch.backends.quantized.engine = 'qnnpack'

# Load the saved model's state dict
model.load_state_dict(torch.load(model_path))

# Make sure the model is in evaluation mode before quantization
model.eval()

# Perform dynamic quantization
quantized_model = torch.quantization.quantize_dynamic(
    model,  # the original model
    {torch.nn.Linear},  # specify which layer types to quantize
    dtype=torch.qint8  # the target data type for quantized weights
)

# Save the quantized model
quantized_model_path = "models/transformer_CG_quantized.pt"
torch.save(quantized_model.state_dict(), quantized_model_path)

# Now you can use quantized_model for inference
print("Quantization complete and model saved.")


Quantization complete and model saved.


In [57]:
# Testing loop
quantized_model.to('cpu')
quantized_model.eval()  # Set the model to evaluation mode
all_predictions = []
all_targets = []

with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to('cpu'), labels.to('cpu')
        outputs = quantized_model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        
        # Collect all predictions and labels to compute overall metrics
        all_predictions.extend(predicted.cpu().numpy())
        all_targets.extend(labels.cpu().numpy())

# Convert collected predictions and labels to arrays
all_predictions = np.array(all_predictions)
all_targets = np.array(all_targets)

# Calculate metrics
accuracy = accuracy_score(all_targets, all_predictions)
precision, recall, f1_score, support = precision_recall_fscore_support(all_targets, all_predictions, average='weighted')

# Print metrics
print(f'Accuracy: {accuracy:.4f}')
print(f'Precision: {precision:.4f}')
print(f'Recall: {recall:.4f}')
print(f'F1 Score: {f1_score:.4f}')


Accuracy: 0.7285
Precision: 0.6812
Recall: 0.7285
F1 Score: 0.6710


In [58]:
cpu_usage, inference_time, _ = measure_cpu_utilization_and_run(compute_metrics_base, quantized_model, X_test_tensor, y_test_tensor, quantized_model_path)

print(f'CPU usage during inference: {cpu_usage:.2f}%')
print(f'Inference time: {inference_time:.4f} seconds')


Size of the model: 37.72 KB
Accuracy on the test set: 72.84%
CPU usage during inference: 1.95%
Inference time: 0.2147 seconds
