In [None]:
%pip install torch
%pip install numpy
%pip install pandas
%pip install scikit-learn
%pip install coremltools

In [31]:
import os
import time
from numpy import genfromtxt
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from pathlib import Path
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import psutil
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
import numpy as np
import coremltools as ct


In [32]:
# Set up the paths
HOME_PATH = os.path.expanduser('~')
MODELS_PATH = f'{HOME_PATH}/Developer/BU/research/models'
DATASET_PATH = f'../../../data/'
data_features = f'{DATASET_PATH}/WISDM_x.csv'
data_labels = f'{DATASET_PATH}/WISDM_y.csv'

In [33]:
def compute_metrics_base(model, x_test, y_test, model_path):
    """
    Compute the accuracy of the PyTorch model.

    :param model: PyTorch model.
    :param x_test: Test dataset features (as a PyTorch Tensor).
    :param y_test: Test dataset labels (as a NumPy array).
    :param model_dir: Directory where the PyTorch model files are stored.
    :return: None
    """

    model.eval()
    with torch.no_grad():
        # Get the model's predictions
        outputs = model(x_test)
        _, predicted_labels = torch.max(outputs, 1)

        # Convert y_test to tensor if it's not already
        true_labels = torch.tensor(y_test) if not isinstance(y_test, torch.Tensor) else y_test
        true_labels = true_labels.squeeze()  # Remove unnecessary dimensions

    model_file = Path(model_path)

    # Size in bytes
    model_size_bytes = model_file.stat().st_size

    # Convert size to kilobytes (optional)
    model_size_kb = model_size_bytes / 1024
    print(f"Size of the model: {model_size_kb:.2f} KB")

    # Compute accuracy
    accuracy = accuracy_score(true_labels.numpy(), predicted_labels.numpy())
    print(f'Accuracy on the test set: {accuracy:.2%}')
def measure_cpu_utilization_and_run(func, *args, **kwargs):
    """
    Measure CPU utilization while running a function.

    Parameters:
        func (function): The function to be executed.
        *args: Arguments to be passed to func.
        **kwargs: Keyword arguments to be passed to func.

    Returns:
        float: CPU utilization percentage during the execution of func.
        float: The elapsed time during the execution of func.
        any: The result of func execution.
    """
    
    # Measure CPU utilization before execution
    cpu_percent_before = psutil.cpu_percent(interval=None)

    # Record the start time
    start_time = time.time()

    # Execute the function and store its result
    result = func(*args, **kwargs)

    # Record the end time
    end_time = time.time()

    # Measure CPU utilization after execution
    cpu_percent_after = psutil.cpu_percent(interval=None)

    # Calculate elapsed time and average CPU utilization
    elapsed_time = end_time - start_time
    average_cpu_utilization = (cpu_percent_before + cpu_percent_after) / 2

    return average_cpu_utilization, elapsed_time, result



In [34]:
# Load Data
x = genfromtxt(data_features, delimiter=',')
y_df = pd.read_csv(data_labels)
y = y_df.values.flatten()  # Flatten if y is 2D

# Encode labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Function to create time series dataset
def create_series(x, y, timestep, overlap):
    slide_step = int(timestep * (1 - overlap))
    data_num = int((len(x) / slide_step) - 1)
    dataset = np.ndarray(shape=(data_num, timestep, x.shape[1]))
    labels = []

    for i in range(data_num):
        labels.append(y[slide_step * (i + 1) - 1])
        for j in range(timestep):
            dataset[i, j, :] = x[slide_step * i + j, :]

    return dataset, np.array(labels)

# Create time series
timestep = 16  # Replace with your value
overlap = 0.5  # Replace with your value
X_series, y_series = create_series(x, y_encoded, timestep, overlap)

In [35]:
# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X_series, y_series, test_size=0.2, random_state=42)
print(f'X_train shape:{X_train.shape}, X_test shape:{X_test.shape}, y_train shape:{y_train.shape}, y_test shape:{y_test.shape}')

X_train shape:(104856, 16, 3), X_test shape:(26214, 16, 3), y_train shape:(104856,), y_test shape:(26214,)


In [36]:
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.int64)  # Assuming y_train is already encoded as class indexes
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.int64)


In [37]:
# Create Dataset
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

# Create DataLoader
train_loader = DataLoader(dataset=train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=32, shuffle=False)


In [38]:
# Define the Transformer model
class TransformerEncoderBlock(nn.Module):
    def __init__(self, input_dim, head_size, n_heads, ff_dim, dropout=0.0):
        super(TransformerEncoderBlock, self).__init__()
        self.norm1 = nn.LayerNorm(input_dim)
        self.attention = nn.MultiheadAttention(embed_dim=input_dim, num_heads=n_heads, dropout=dropout)
        self.dropout1 = nn.Dropout(dropout)
        self.norm2 = nn.LayerNorm(input_dim)
        self.conv1 = nn.Conv1d(in_channels=input_dim, out_channels=ff_dim, kernel_size=1)
        self.conv2 = nn.Conv1d(in_channels=ff_dim, out_channels=input_dim, kernel_size=1)
        self.dropout2 = nn.Dropout(dropout)

    def forward(self, src):
        # LayerNorm and Multi-head Attention
        x = self.norm1(src)
        x, _ = self.attention(x, x, x)
        x = self.dropout1(x)
        x = x + src  # skip connection

        # Feed Forward
        x = self.norm2(x)
        x = x.permute(1, 2, 0)  # Conv1D expects (batch_size, channels, length)
        x = F.relu(self.conv1(x))
        x = self.dropout2(x)
        x = self.conv2(x)
        x = x.permute(2, 0, 1)  # back to (length, batch_size, channels)
        x = x + src  # skip connection
        return x

class TimeSeriesTransformer(nn.Module):
    def __init__(self, sequence_length, num_features, head_size, n_heads, ff_dim, n_trans_blocks, mlp_units, drop=0.0, mlp_drop=0.0):
        super(TimeSeriesTransformer, self).__init__()
        self.encoders = nn.ModuleList([TransformerEncoderBlock(num_features, head_size, n_heads, ff_dim, drop) for _ in range(n_trans_blocks)])
        self.global_avg_pooling = nn.AdaptiveAvgPool1d(1)
        mlp_layers = []
        current_dim = num_features
        for dim in mlp_units:
            mlp_layers.append(nn.Linear(current_dim, dim))
            mlp_layers.append(nn.ReLU())
            mlp_layers.append(nn.Dropout(mlp_drop))
            current_dim = dim  # Set input dim for the next layer
        self.mlp = nn.Sequential(*mlp_layers)
        self.final_layer = nn.Linear(mlp_units[-1], 6)

    def forward(self, src):
        src = src.permute(1, 0, 2)  # Transformer expects (seq_len, batch_size, features)
        for encoder in self.encoders:
            src = encoder(src)

        # Global average pooling
        src = src.permute(1, 2, 0)  # pooling expects (batch_size, channels, length)
        src = self.global_avg_pooling(src)
        src = torch.flatten(src, 1)  # Flatten the output for the MLP

        # MLP
        src = self.mlp(src)
        output = self.final_layer(src)
        return output

# Input parameters for your data
sequence_length = 16  # The length of the time series sequences in your data
num_features = 3     # The number of features in each time step of your data sequence

# Instantiate the model
# Instantiate the model with an adjusted number of heads and head size
# The head size must be a multiple of num_features.
model = TimeSeriesTransformer(
    sequence_length=16, 
    num_features=3, 
    head_size=3,  # Each head will now have an embed size of 1 (3 / 3)
    n_heads=1,  # Only one head since our embed_dim is 3
    ff_dim=64, 
    n_trans_blocks=4, 
    mlp_units=[128, 64], 
    drop=0.1, 
    mlp_drop=0.1
)




In [39]:
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Move model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

TimeSeriesTransformer(
  (encoders): ModuleList(
    (0-3): 4 x TransformerEncoderBlock(
      (norm1): LayerNorm((3,), eps=1e-05, elementwise_affine=True)
      (attention): MultiheadAttention(
        (out_proj): NonDynamicallyQuantizableLinear(in_features=3, out_features=3, bias=True)
      )
      (dropout1): Dropout(p=0.1, inplace=False)
      (norm2): LayerNorm((3,), eps=1e-05, elementwise_affine=True)
      (conv1): Conv1d(3, 64, kernel_size=(1,), stride=(1,))
      (conv2): Conv1d(64, 3, kernel_size=(1,), stride=(1,))
      (dropout2): Dropout(p=0.1, inplace=False)
    )
  )
  (global_avg_pooling): AdaptiveAvgPool1d(output_size=1)
  (mlp): Sequential(
    (0): Linear(in_features=3, out_features=128, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.1, inplace=False)
    (3): Linear(in_features=128, out_features=64, bias=True)
    (4): ReLU()
    (5): Dropout(p=0.1, inplace=False)
  )
  (final_layer): Linear(in_features=64, out_features=6, bias=True)
)

In [40]:
# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    model.train()  # Set model to training mode
    running_loss = 0.0
    correct = 0
    total = 0
    
    for i, (inputs, labels) in enumerate(train_loader):
        inputs, labels = inputs.to(device), labels.to(device)
        
        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        
        # Calculate accuracy
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        
        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        
        if (i+1) % 100 == 0:
            print(f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{len(train_loader)}], '
                  f'Loss: {running_loss / 100:.4f}, Accuracy: {100 * correct / total:.2f}%')
            running_loss = 0.0
            correct = 0
            total = 0

# After training, you may want to save your model
# torch.save(model.state_dict(), 'model.pth')

print('Finished Training')


Epoch [1/10], Step [100/3277], Loss: 1.3120, Accuracy: 50.72%
Epoch [1/10], Step [200/3277], Loss: 1.1600, Accuracy: 57.59%
Epoch [1/10], Step [300/3277], Loss: 1.0515, Accuracy: 62.16%
Epoch [1/10], Step [400/3277], Loss: 0.9524, Accuracy: 66.78%
Epoch [1/10], Step [500/3277], Loss: 0.8611, Accuracy: 70.25%
Epoch [1/10], Step [600/3277], Loss: 0.8112, Accuracy: 72.88%
Epoch [1/10], Step [700/3277], Loss: 0.8269, Accuracy: 71.94%
Epoch [1/10], Step [800/3277], Loss: 0.7590, Accuracy: 72.72%
Epoch [1/10], Step [900/3277], Loss: 0.7629, Accuracy: 74.47%
Epoch [1/10], Step [1000/3277], Loss: 0.7459, Accuracy: 74.84%
Epoch [1/10], Step [1100/3277], Loss: 0.7474, Accuracy: 73.66%
Epoch [1/10], Step [1200/3277], Loss: 0.7222, Accuracy: 74.94%
Epoch [1/10], Step [1300/3277], Loss: 0.7406, Accuracy: 74.41%
Epoch [1/10], Step [1400/3277], Loss: 0.6964, Accuracy: 75.81%
Epoch [1/10], Step [1500/3277], Loss: 0.7492, Accuracy: 73.72%
Epoch [1/10], Step [1600/3277], Loss: 0.7413, Accuracy: 73.56%
E

In [41]:
# Testing loop
model.eval()  # Set the model to evaluation mode
all_predictions = []
all_targets = []

with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        
        # Collect all predictions and labels to compute overall metrics
        all_predictions.extend(predicted.cpu().numpy())
        all_targets.extend(labels.cpu().numpy())

# Convert collected predictions and labels to arrays
all_predictions = np.array(all_predictions)
all_targets = np.array(all_targets)

# Calculate metrics
accuracy = accuracy_score(all_targets, all_predictions)
precision, recall, f1_score, support = precision_recall_fscore_support(all_targets, all_predictions, average='weighted')

# Print metrics
print(f'Accuracy: {accuracy:.4f}')
print(f'Precision: {precision:.4f}')
print(f'Recall: {recall:.4f}')
print(f'F1 Score: {f1_score:.4f}')


Accuracy: 0.8082
Precision: 0.7816
Recall: 0.8082
F1 Score: 0.7809


In [42]:
# Assume 'model' is the instance of TimeSeriesTransformer you have already defined and trained
model_path = f'{MODELS_PATH}/transformer_base.pth'
torch.save(model.state_dict(), model_path)

# Get the size of the saved model file
model_size = os.path.getsize(model_path)
print(f"The model size is {model_size/1024:.2f} KB")


The model size is 60.75 KB


In [43]:
cpu_usage, inference_time, _ = measure_cpu_utilization_and_run(compute_metrics_base, model, X_test_tensor, y_test_tensor, model_path)

print(f'CPU usage during inference: {cpu_usage:.2f}%')
print(f'Inference time: {inference_time:.4f} seconds')


Size of the model: 60.75 KB
Accuracy on the test set: 80.82%
CPU usage during inference: 37.60%
Inference time: 1.8497 seconds


### Coreml model

In [22]:
#model.load_state_dict(torch.load(f'{MODELS_PATH}/transformer_base.pth'))

<All keys matched successfully>

> **As April/2024: CoreML does not support nn.AdaptiveAvgPool1d conversion.**

> As a workaround copy the changes from coremltools/converters/mil/frontend/torch/ops.py and add them to your installed copy of that file.

> [Github](https://github.com/TobyRoseman/coremltools/blob/2982c6c6e3aa28ce282efdda65a25a70ffe93f76/coremltools/converters/mil/frontend/torch/ops.py)

In [44]:
import coremltools as ct
example_input = torch.rand(1,16, 3) 

model.eval()
traced_model = torch.jit.trace(model, example_input)
out = traced_model(example_input)

# Convert to Core ML program using the Unified Conversion API.
transformer_coreml_model = ct.convert(
    traced_model,
    convert_to="mlprogram",
    inputs=[ct.TensorType(shape=example_input.shape)]
 )

transformer_coreml_model.save(f'{MODELS_PATH}/transformer.mlpackage')

Converting PyTorch Frontend ==> MIL Ops: 100%|█████████▉| 396/397 [00:00<00:00, 9849.05 ops/s]
Running MIL frontend_pytorch pipeline: 100%|██████████| 5/5 [00:00<00:00, 707.02 passes/s]
Running MIL default pipeline: 100%|██████████| 71/71 [00:00<00:00, 221.19 passes/s]
Running MIL backend_mlprogram pipeline: 100%|██████████| 12/12 [00:00<00:00, 1455.81 passes/s]


In [45]:
transformer_coreml_model

input {
  name: "src_1"
  type {
    multiArrayType {
      shape: 1
      shape: 16
      shape: 3
      dataType: FLOAT32
    }
  }
}
output {
  name: "linear_10"
  type {
    multiArrayType {
      shape: 1
      shape: 6
      dataType: FLOAT32
    }
  }
}
metadata {
  userDefined {
    key: "com.github.apple.coremltools.source"
    value: "torch==2.0.0"
  }
  userDefined {
    key: "com.github.apple.coremltools.source_dialect"
    value: "TorchScript"
  }
  userDefined {
    key: "com.github.apple.coremltools.version"
    value: "7.1"
  }
}

In [46]:
from pathlib import Path

def coreml_metrics(model_name, X_test, y_test, model_path):
    predictions = []
    for id in range(len(X_test)):
        X_test_new = np.expand_dims(X_test[id], axis=0)
        output_dict = model_name.predict({'src_1': X_test_new})
        pred_class = np.argmax(output_dict['linear_10'])
        predictions.append(pred_class)
    
    accuracy = np.sum(predictions == y_test) / len(predictions)
    print("Accuracy:", accuracy)
    
    model_file = Path(model_path)
    
    # Size in bytes
    model_size_bytes = model_file.stat().st_size
    
    # Convert size to kilobytes (optional)
    model_size_kb = model_size_bytes / 1024
    print(f"Size of the model: {model_size_kb:.2f} KB")

In [26]:
import coremltools as ct
import coremltools.optimize.coreml as cto

# gru_coreml_model = ct.models.MLModel(f'{MODELS_PATH}/transformer.mlpackage')

In [47]:
model_name = transformer_coreml_model
model_path = f'{MODELS_PATH}/transformer.mlpackage'

coreml_metrics(model_name, X_test, y_test, model_path)

Accuracy: 0.8082322423132677
Size of the model: 0.12 KB


## Post Training Optimization

In [51]:
from coremltools.optimize.coreml import (
    OpThresholdPrunerConfig,
    OpMagnitudePrunerConfig,
    OpPalettizerConfig,
    OpLinearQuantizerConfig,
    OptimizationConfig,
    prune_weights,
)

### Quantization

In [52]:
op_config = OpLinearQuantizerConfig(
    mode="linear_symmetric", weight_threshold=512
)
config = OptimizationConfig(global_config=op_config)

compressed_8_bit_model = cto.linear_quantize_weights(transformer_coreml_model, config=config)

Running compression pass linear_quantize_weights: 100%|██████████| 21/21 [00:00<00:00, 18423.00 ops/s]
Running MIL frontend_milinternal pipeline: 0 passes [00:00, ? passes/s]
Running MIL default pipeline: 100%|██████████| 69/69 [00:00<00:00, 468.50 passes/s]
Running MIL backend_mlprogram pipeline: 100%|██████████| 12/12 [00:00<00:00, 1076.82 passes/s]


In [53]:
compressed_8_bit_model.save(f'{MODELS_PATH}/transformer_8bitQuantized_mlmodel.mlpackage')

In [54]:
model_name = compressed_8_bit_model
model_path = f'{MODELS_PATH}/transformer_8bitQuantized_mlmodel.mlpackage'

coreml_metrics(model_name, X_test, y_test, model_path)

Accuracy: 0.8081559472037843
Size of the model: 0.12 KB


### Pruning

#### a) OpMagnitudePrunerConfig: Prune the weights with a constant sparsity percentile

In [55]:
op_config = OpMagnitudePrunerConfig(
    target_sparsity=0.6,
    weight_threshold=1024,
)
config = OptimizationConfig(global_config=op_config)
transformer_magnitude_pruner = prune_weights(transformer_coreml_model, config=config)


Running compression pass prune_weights: 100%|██████████| 21/21 [00:00<00:00, 9186.52 ops/s]
Running MIL frontend_milinternal pipeline: 0 passes [00:00, ? passes/s]
Running MIL default pipeline: 100%|██████████| 69/69 [00:00<00:00, 471.94 passes/s]
Running MIL backend_mlprogram pipeline: 100%|██████████| 12/12 [00:00<00:00, 1136.26 passes/s]


In [56]:
transformer_magnitude_pruner.save(f'{MODELS_PATH}/transformer_magnitude_pruner.mlpackage')

In [57]:
model_name = transformer_magnitude_pruner
model_path = f'{MODELS_PATH}/transformer_magnitude_pruner.mlpackage'

coreml_metrics(model_name, X_test, y_test, model_path)

Accuracy: 0.7955291065842679
Size of the model: 0.12 KB


#### b) OpThresholdPrunerConfig: Sets all weight values below a certain value.

In [58]:
op_config = OpThresholdPrunerConfig(
    threshold=0.001,
    minimum_sparsity_percentile=0.01,
    weight_threshold=1024,
)

config = OptimizationConfig(global_config=op_config)
transformer_threshold_pruner = prune_weights(transformer_coreml_model, config=config)

Running compression pass prune_weights:   0%|          | 0/21 [00:00<?, ? ops/s]weight value has sparsity of 0.005615234375 < minimum_sparsity_percentile 0.01. Skipped.
Running compression pass prune_weights: 100%|██████████| 21/21 [00:00<00:00, 22848.35 ops/s]
Running MIL frontend_milinternal pipeline: 0 passes [00:00, ? passes/s]
Running MIL default pipeline: 100%|██████████| 69/69 [00:00<00:00, 487.32 passes/s]
Running MIL backend_mlprogram pipeline: 100%|██████████| 12/12 [00:00<00:00, 1194.25 passes/s]


In [61]:
transformer_threshold_pruner.save(f'{MODELS_PATH}/transformer_threshold_pruner.mlpackage')

In [62]:
model_name = transformer_threshold_pruner
model_path = f'{MODELS_PATH}/transformer_threshold_pruner.mlpackage'

coreml_metrics(model_name, X_test, y_test, model_path)

Accuracy: 0.8082322423132677
Size of the model: 0.12 KB


### Palletization

In [63]:
op_config = OpPalettizerConfig(
    mode="kmeans", 
    nbits=6
)

config = OptimizationConfig(global_config=op_config)
transformer_palettizer = cto.palettize_weights(transformer_coreml_model, config=config)

Running compression pass palettize_weights: 100%|██████████| 21/21 [00:00<00:00, 112.62 ops/s]
Running MIL frontend_milinternal pipeline: 0 passes [00:00, ? passes/s]
Running MIL default pipeline: 100%|██████████| 69/69 [00:00<00:00, 275.30 passes/s]
Running MIL backend_mlprogram pipeline: 100%|██████████| 12/12 [00:00<00:00, 1120.77 passes/s]


In [64]:
transformer_palettizer.save(f'{MODELS_PATH}/transformer_palettizer.mlpackage')

In [65]:
model_name = transformer_palettizer
model_path = f'{MODELS_PATH}/transformer_palettizer.mlpackage'

coreml_metrics(model_name, X_test, y_test, model_path)

Accuracy: 0.8082703898680095
Size of the model: 0.12 KB
