# Conjugate Gradient Experiment
In this notebook, most of the process stayed the same for consistently comparison.
What changes are...
1.  I replace Adam optimizer with Conjugate Gradient in the training step.
2. The mini-batch training is switched to full-batch training to work with CG Optimizer

Results
1. A little accuracy improvement from the base and dynamic quanization model.
2. No significant model size changes.

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
from torch.optim import Adam
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
from numpy import genfromtxt
from sklearn.preprocessing import LabelEncoder

In [2]:
# Torch device configuration
device = (
    "cuda" if torch.cuda.is_available() else "mps"
    if torch.backends.mps.is_available() else "cpu"
)
print(f"Using {device} device")
if torch.cuda.is_available():
    print(f"Current GPU device: {torch.cuda.get_device_name(device)}")

Using cuda device
Current GPU device: NVIDIA GeForce RTX 3080


In [3]:
# Load Data
x = genfromtxt('../../Data/WISDM_x.csv', delimiter=',')
y_df = pd.read_csv('../../Data/WISDM_y.csv')
y = y_df.values.flatten()  # Flatten if y is 2D

# Encode labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Function to create time series dataset
def create_series(x, y, timestep, overlap):
    slide_step = int(timestep * (1 - overlap))
    data_num = int((len(x) / slide_step) - 1)
    dataset = np.ndarray(shape=(data_num, timestep, x.shape[1]))
    labels = []

    for i in range(data_num):
        labels.append(y[slide_step * (i + 1) - 1])
        for j in range(timestep):
            dataset[i, j, :] = x[slide_step * i + j, :]

    return dataset, np.array(labels)

# Create time series
timestep = 16  # Replace with your value
overlap = 0.5  # Replace with your value
X_series, y_series = create_series(x, y_encoded, timestep, overlap)

In [4]:
# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X_series, y_series, test_size=0.2, random_state=42)

# Convert to PyTorch tensors
x_train_tensor = torch.tensor(X_train, dtype=torch.float32)
x_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

# DataLoader
train_dataset = TensorDataset(x_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

In [5]:
print(f'X_train.shape:{X_train.shape}, X_test.shape:{X_test.shape}, y_train.shape:{y_train.shape}, y_test.shape:{y_test.shape}')

X_train.shape:(104856, 16, 3), X_test.shape:(26214, 16, 3), y_train.shape:(104856,), y_test.shape:(26214,)


In [9]:
# Define the MLP model
class MyMLP(nn.Module):
    def __init__(self, input_size, num_classes=6):
        super(MyMLP, self).__init__()
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(input_size, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, num_classes)

    def forward(self, x):
        x = self.flatten(x)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        return self.fc3(x)

# Model Initialization
input_size = timestep * X_series.shape[2]  # Calculate input size
model = MyMLP(input_size)

In [10]:
from torchmin import Minimizer

# Loss and optimizer
criterion = nn.CrossEntropyLoss()

# Initialize the optimizer
optimizer = Minimizer(model.parameters(),
                      method='cg',
                      tol=1e-3,
                      max_iter=8000,
                      disp=2)

# Training
model.train()

def closure():
    optimizer.zero_grad()
    output = model(x_train_tensor)
    loss = criterion(output, y_train_tensor)
    # loss.backward()  <-- do not call backward!
    return loss

loss = optimizer.step(closure)
print(f'Final training loss: {loss}')

# Save the model
model_path = "models/MLPCG_base.pt"
torch.save(model.state_dict(), model_path)

initial fval: 2.2520
iter   1 - fval: 1.8870
iter   2 - fval: 1.5706
iter   3 - fval: 1.3054
iter   4 - fval: 1.2487
iter   5 - fval: 1.1773
iter   6 - fval: 1.0808
iter   7 - fval: 1.0247
iter   8 - fval: 0.9885
iter   9 - fval: 0.9486
iter  10 - fval: 0.9108
iter  11 - fval: 0.8908
iter  12 - fval: 0.8675
iter  13 - fval: 0.8226
iter  14 - fval: 0.8079
iter  15 - fval: 0.7812
iter  16 - fval: 0.7504
iter  17 - fval: 0.7401
iter  18 - fval: 0.7140
iter  19 - fval: 0.7080
iter  20 - fval: 0.6861
iter  21 - fval: 0.6773
iter  22 - fval: 0.6622
iter  23 - fval: 0.6516
iter  24 - fval: 0.6380
iter  25 - fval: 0.6302
iter  26 - fval: 0.6160
iter  27 - fval: 0.6110
iter  28 - fval: 0.6016
iter  29 - fval: 0.5949
iter  30 - fval: 0.5822
iter  31 - fval: 0.5776
iter  32 - fval: 0.5649
iter  33 - fval: 0.5609
iter  34 - fval: 0.5481
iter  35 - fval: 0.5396
iter  36 - fval: 0.5347
iter  37 - fval: 0.5315
iter  38 - fval: 0.5239
iter  39 - fval: 0.5190
iter  40 - fval: 0.5181
iter  41 - fval: 0.

In [11]:
def evaluate(model, test_loader, criterion):
    model.eval()
    total_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            output = model(data)
            loss = criterion(output, target)
            total_loss += loss.item()
            pred = output.data.max(1, keepdim=True)[1]
            correct += pred.eq(target.data.view_as(pred)).sum()
    accuracy = 100. * correct / len(test_loader.dataset)
    print(f'Test set: Average loss: {total_loss / len(test_loader)}, Accuracy: {correct}/{len(test_loader.dataset)} ({accuracy:.0f}%)')

# DataLoader for test set
test_dataset = TensorDataset(x_test_tensor, y_test_tensor)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=True)

# Evaluate the model
evaluate(model, test_loader, criterion)


Test set: Average loss: 0.37987394200229063, Accuracy: 23801/26214 (91%)


In [12]:
model

MyMLP(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (fc1): Linear(in_features=48, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=64, bias=True)
  (fc3): Linear(in_features=64, out_features=6, bias=True)
)

In [13]:
import torch
from sklearn.metrics import accuracy_score
import numpy as np
import time
import psutil
from pathlib import Path

def compute_metrics_base(model, x_test, y_test, model_path):
    """
    Compute the accuracy of the PyTorch model.

    :param model: PyTorch model.
    :param x_test: Test dataset features (as a PyTorch Tensor).
    :param y_test: Test dataset labels (as a NumPy array).
    :param model_dir: Directory where the PyTorch model files are stored.
    :return: None
    """

    model.eval()
    with torch.no_grad():
        # Get the model's predictions
        outputs = model(x_test)
        _, predicted_labels = torch.max(outputs, 1)

        # Convert y_test to tensor if it's not already
        true_labels = torch.tensor(y_test) if not isinstance(y_test, torch.Tensor) else y_test
        true_labels = true_labels.squeeze()  # Remove unnecessary dimensions

    model_file = Path(model_path)

    # Size in bytes
    model_size_bytes = model_file.stat().st_size

    # Convert size to kilobytes (optional)
    model_size_kb = model_size_bytes / 1024
    print(f"Size of the model: {model_size_kb:.2f} KB")

    # Compute accuracy
    accuracy = accuracy_score(true_labels.numpy(), predicted_labels.numpy())
    print(f'Accuracy on the test set: {accuracy:.2%}')


In [14]:
def measure_cpu_utilization_and_run(func, *args, **kwargs):
    """
    Measure CPU utilization while running a function.

    Parameters:
        func (function): The function to be executed.
        *args: Arguments to be passed to func.
        **kwargs: Keyword arguments to be passed to func.

    Returns:
        float: CPU utilization percentage during the execution of func.
        float: The elapsed time during the execution of func.
        any: The result of func execution.
    """
    
    # Measure CPU utilization before execution
    cpu_percent_before = psutil.cpu_percent(interval=None)

    # Record the start time
    start_time = time.time()

    # Execute the function and store its result
    result = func(*args, **kwargs)

    # Record the end time
    end_time = time.time()

    # Measure CPU utilization after execution
    cpu_percent_after = psutil.cpu_percent(interval=None)

    # Calculate elapsed time and average CPU utilization
    elapsed_time = end_time - start_time
    average_cpu_utilization = (cpu_percent_before + cpu_percent_after) / 2

    return average_cpu_utilization, elapsed_time, result



In [15]:
# Measure CPU usage and inference time
cpu_usage, inference_time, _ = measure_cpu_utilization_and_run(compute_metrics_base, model, x_test_tensor, y_test_tensor, model_path)

print(f'CPU usage during inference: {cpu_usage:.2f}%')
print(f'Inference time: {inference_time:.4f} seconds')


Size of the model: 60.85 KB
Accuracy on the test set: 90.79%
CPU usage during inference: 0.35%
Inference time: 0.0103 seconds


In [17]:
# torch.backends.quantized.engine = 'qnnpack'

quantized_model = torch.quantization.quantize_dynamic(
    model,  # the original model
    {nn.Linear},  # a set of layers to dynamically quantize
    dtype=torch.qint8)  # the target dtype for quantized weights


In [18]:
quantized_model_path = "models/MLP_Quantized.pt"
torch.save(quantized_model.state_dict(), quantized_model_path)


In [20]:
# Measure CPU usage and inference time
cpu_usage, inference_time, _ = measure_cpu_utilization_and_run(compute_metrics_base, quantized_model, x_test_tensor, y_test_tensor, quantized_model_path)

print(f'CPU usage during inference: {cpu_usage:.2f}%')
print(f'Inference time: {inference_time:.4f} seconds')


Size of the model: 19.71 KB
Accuracy on the test set: 90.59%
CPU usage during inference: 0.50%
Inference time: 0.0010 seconds


In [21]:
def print_sample_predictions(model, x_test, y_test, num_samples=5):
    model.eval()  # Set the model to evaluation mode
    with torch.no_grad():
        # Predict on the test set
        outputs = model(x_test)
        _, predicted = torch.max(outputs, 1)

        print("Sample predictions:\n")
        for i in range(num_samples):
            print(f"x_test[{i}]: {x_test[i]}")
            print(f"Actual label (y_test[{i}]): {y_test[i]}")
            print(f"Predicted label: {predicted[i]}")
            print("\n")


### Static Quantization - Overall

In [22]:
import torch
from torch import nn
import torch.nn.functional as F
from torch.quantization import QuantStub, DeQuantStub

class QuantizedMLP(nn.Module):
    def __init__(self, input_size, num_classes=6):
        super(QuantizedMLP, self).__init__()
        self.quant = QuantStub()
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(input_size, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, num_classes)
        self.dequant = DeQuantStub()

    def forward(self, x):
        x = self.quant(x)
        x = self.flatten(x)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        x = self.dequant(x)
        return x

# Instantiate the model
model_fp32 = MyMLP(input_size)
model_fp32.load_state_dict(torch.load(model_path))
model_fp32.eval()

# Define a quantization configuration
model_int8 = QuantizedMLP(input_size)
model_int8.eval()

# Specify the quantization configuration
model_int8.qconfig = torch.quantization.get_default_qconfig('qnnpack')

# Prepare the model for static quantization
torch.quantization.prepare(model_int8, inplace=True)

# Calibrate the model with representative data
# Assuming the train_loader is representative of the data distribution
for data, _ in train_loader:
    model_int8(data)

# Convert to a quantized model
torch.quantization.convert(model_int8, inplace=True)

# Evaluate the quantized model
evaluate(model_int8, test_loader, criterion)

Test set: Average loss: 2.036565427954604, Accuracy: 2516/26214 (10%)


In [23]:
# Define the path where you want to save the quantized model
static_quantized_model_path = "models/MLP_Static_Quantized.pt"

# Save the state dictionary of the quantized model
torch.save(model_int8.state_dict(), static_quantized_model_path)

print(f"Quantized model saved to {static_quantized_model_path}")


Quantized model saved to models/MLP_Static_Quantized.pt


In [24]:
# Measure CPU usage and inference time
cpu_usage, inference_time, _ = measure_cpu_utilization_and_run(compute_metrics_base, model_int8, x_test_tensor, y_test_tensor, static_quantized_model_path)

print(f'CPU usage during inference: {cpu_usage:.2f}%')
print(f'Inference time: {inference_time:.4f} seconds')


Size of the model: 20.40 KB
Accuracy on the test set: 9.60%
CPU usage during inference: 0.70%
Inference time: 0.0156 seconds


### Static Quantization - Per Channel


In [25]:
import torch
from torch import nn
import torch.nn.functional as F
from torch.quantization import QuantStub, DeQuantStub, default_per_channel_qconfig

class QuantizedMLP(nn.Module):
    def __init__(self, input_size, num_classes=6):
        super(QuantizedMLP, self).__init__()
        self.quant = QuantStub()
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(input_size, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, num_classes)
        self.dequant = DeQuantStub()

    def forward(self, x):
        x = self.quant(x)
        x = self.flatten(x)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        x = self.dequant(x)
        return x

# Instantiate the model
model_fp32 = MyMLP(input_size)
model_fp32.load_state_dict(torch.load(model_path))
model_fp32.eval()

# Define a quantization configuration
model_int8_pc = QuantizedMLP(input_size)
model_int8_pc.eval()

# Specify the quantization configuration to use per-channel weight quantization
model_int8_pc.qconfig = torch.quantization.get_default_qconfig('qnnpack')
# Set the model configuration to use per-channel quantization
model_int8_pc.fc1.qconfig = default_per_channel_qconfig
model_int8_pc.fc2.qconfig = default_per_channel_qconfig
# For the output layer, you might want to use per-tensor quantization
model_int8_pc.fc3.qconfig = torch.quantization.default_qconfig

# Prepare the model for static quantization
torch.quantization.prepare(model_int8_pc, inplace=True)

# Calibrate the model with representative data
# Assuming the train_loader is representative of the data distribution
for data, _ in train_loader:
    model_int8_pc(data)

# Convert to a quantized model
torch.quantization.convert(model_int8_pc, inplace=True)

# Save the quantized model
quantized_model_path = "models/MLP_Static_Quantized_perChannel.pt"
torch.save(model_int8_pc.state_dict(), quantized_model_path)

# Evaluate the quantized model
evaluate(model_int8_pc, test_loader, criterion)


Test set: Average loss: 1.825728035263899, Accuracy: 4388/26214 (17%)


In [26]:
# Measure CPU usage and inference time
cpu_usage, inference_time, _ = measure_cpu_utilization_and_run(compute_metrics_base, model_int8_pc, x_test_tensor, y_test_tensor, quantized_model_path)

print(f'CPU usage during inference: {cpu_usage:.2f}%')
print(f'Inference time: {inference_time:.4f} seconds')


Size of the model: 24.67 KB
Accuracy on the test set: 16.74%
CPU usage during inference: 0.75%
Inference time: 0.0046 seconds


### Quantization aware training

In [32]:
import torch
from torch import nn, optim
from torch.utils.data import DataLoader, TensorDataset
import torch.nn.functional as F
from sklearn.metrics import accuracy_score

# Define the model architecture for QAT
class MyMLPForQAT(nn.Module):
    def __init__(self, input_size, num_classes=6):
        super(MyMLPForQAT, self).__init__()
        self.quant = torch.quantization.QuantStub()
        self.fc1 = nn.Linear(input_size, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, num_classes)
        self.dequant = torch.quantization.DeQuantStub()

    def forward(self, x):
        x = self.quant(x)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        x = self.dequant(x)
        return x



In [33]:
# Assuming the correct input size and number of classes
input_size = 16 * 3  # 16 time steps with 3 features each
num_classes = 6  # Assuming 6 classes as per your data

# Instantiate and prepare the model for QAT
model_qat = MyMLPForQAT(input_size, num_classes)
model_qat.qconfig = torch.quantization.get_default_qat_qconfig('qnnpack')
model_qat.train()
model_prepared = torch.quantization.prepare_qat(model_qat, inplace=True)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()

# Initialize the model and optimizer
optimizer = Minimizer(model_prepared.parameters(),
                      method='cg',
                      tol=1e-3,
                      max_iter=2000,
                      disp=2)

# Training
model_prepared.train()
inputs = x_train_tensor.view(x_train_tensor.size(0), -1)  # Flatten the input

def closure():
    optimizer.zero_grad()
    output = model_prepared(inputs)
    loss = criterion(output, y_train_tensor)
    # loss.backward()  <-- do not call backward!
    return loss

loss = optimizer.step(closure)
print(f'Final training loss: {loss}')

initial fval: 1.7665
iter   1 - fval: 1.4745
iter   2 - fval: 1.3784
iter   3 - fval: 1.2463
iter   4 - fval: 1.1826
iter   5 - fval: 1.1149
iter   6 - fval: 1.0668
iter   7 - fval: 1.0329
iter   8 - fval: 0.9888
iter   9 - fval: 0.9565
iter  10 - fval: 0.9462
iter  11 - fval: 0.8981
iter  12 - fval: 0.8903
iter  13 - fval: 0.8736
iter  14 - fval: 0.8443
iter  15 - fval: 0.8195
iter  16 - fval: 0.7980
iter  17 - fval: 0.7763
iter  18 - fval: 0.7631
iter  19 - fval: 0.7456
iter  20 - fval: 0.7364
iter  21 - fval: 0.7246
iter  22 - fval: 0.7164
iter  23 - fval: 0.7155
iter  24 - fval: 0.7034
iter  25 - fval: 0.6983
iter  26 - fval: 0.6926
iter  27 - fval: 0.6875
iter  28 - fval: 0.6801
iter  29 - fval: 0.6750
iter  30 - fval: 0.6689
iter  31 - fval: 0.6642
iter  32 - fval: 0.6573
iter  33 - fval: 0.6447
iter  34 - fval: 0.6419
iter  35 - fval: 0.6354
iter  36 - fval: 0.6336
iter  37 - fval: 0.6295
iter  38 - fval: 0.6267
iter  39 - fval: 0.6228
iter  40 - fval: 0.6208
iter  41 - fval: 0.

In [34]:
model_prepared.eval()
# Convert the QAT model to a fully quantized model
qat_model = torch.quantization.convert(model, inplace=False)

# Save the fine-tuned quantized model
qat_model_path = "models/MLP_QAT_v2.pt"
torch.save(qat_model.state_dict(), qat_model_path)

In [35]:
#Load the QAT model from the saved file
qat_model_saved = MyMLPForQAT(input_size, num_classes)
state_dict = torch.load(qat_model_path)
qat_model_saved.load_state_dict(state_dict)
qat_model_saved.to('cpu')

MyMLPForQAT(
  (quant): QuantStub()
  (fc1): Linear(in_features=48, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=64, bias=True)
  (fc3): Linear(in_features=64, out_features=6, bias=True)
  (dequant): DeQuantStub()
)

In [39]:
# Prepare the model for evaluation
qat_model_saved.eval()

# Define the test dataset and dataloader
test_dataset = TensorDataset(x_test_tensor, y_test_tensor)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# Evaluate the model on the test dataset
correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in test_loader:
        inputs = inputs.view(inputs.size(0), -1)  # Flatten the input
        outputs = qat_model_saved(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100.0 * correct / total
print(f'Accuracy: {accuracy:.2f}%')

Accuracy: 90.79%


In [40]:
import torch
from sklearn.metrics import accuracy_score
import numpy as np
import time
import psutil
from pathlib import Path

def compute_metrics_new(model, x_test, y_test, model_path):
    """
    Compute the accuracy of the PyTorch model.

    :param model: PyTorch model.
    :param x_test: Test dataset features (as a PyTorch Tensor).
    :param y_test: Test dataset labels (as a NumPy array).
    :param model_dir: Directory where the PyTorch model files are stored.
    :return: None
    """

    model.eval()
    test_dataset = TensorDataset(x_test, y_test)
    test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)
    
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs = inputs.view(inputs.size(0), -1)  # Flatten the input
            outputs = qat_model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        
    model_file = Path(model_path)
    # Size in bytes
    model_size_bytes = model_file.stat().st_size

    # Convert size to kilobytes (optional)
    model_size_kb = model_size_bytes / 1024
    print(f"Size of the model: {model_size_kb:.2f} KB")

    # Compute accuracy
    accuracy = correct / total
    print(f'Accuracy on the test set: {accuracy:.2%}')


In [41]:
model.to('cpu')

# Measure CPU usage and inference time
cpu_usage, inference_time, _ = measure_cpu_utilization_and_run(compute_metrics_new, qat_model_saved, x_test_tensor, y_test_tensor, qat_model_path)

print(f'CPU usage during inference: {cpu_usage:.2f}%')
print(f'Inference time: {inference_time:.4f} seconds')


Size of the model: 60.85 KB
Accuracy on the test set: 90.79%
CPU usage during inference: 0.95%
Inference time: 0.2031 seconds


### Torch Pruning

In [42]:
model

MyMLP(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (fc1): Linear(in_features=48, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=64, bias=True)
  (fc3): Linear(in_features=64, out_features=6, bias=True)
)

In [43]:
import torch
import torch.nn as nn
import torch.nn.utils.prune as prune


# Assuming timestep and X_series.shape[2] are defined
# input_size = timestep * X_series.shape[2]
pruned_model = MyMLP(input_size)

# Apply pruning to a layer by specifying the percentage of connections to prune
prune.l1_unstructured(pruned_model.fc1, 'weight', amount=0.2)
prune.l1_unstructured(pruned_model.fc2, 'weight', amount=0.2)

# To make the pruning permanent, you might want to remove the reparametrization
for module in [pruned_model.fc1, pruned_model.fc2]:
    prune.remove(module, 'weight')


pruned_model_path = "models/MLP_pruned.pt"
torch.save(model.state_dict(), pruned_model_path)


In [44]:
criterion = nn.CrossEntropyLoss()
evaluate(pruned_model, test_loader, criterion)

Test set: Average loss: 1.8419621281507539, Accuracy: 5632/26214 (21%)


In [45]:
cpu_usage, inference_time, _ = measure_cpu_utilization_and_run(compute_metrics_base, pruned_model, x_test_tensor, y_test_tensor, pruned_model_path)

print(f'CPU usage during inference: {cpu_usage:.2f}%')
print(f'Inference time: {inference_time:.4f} seconds')


Size of the model: 60.85 KB
Accuracy on the test set: 21.48%
CPU usage during inference: 0.60%
Inference time: 0.0010 seconds


In [46]:
# Initialize the model
model = MyMLP(input_size)

# Define the amount of pruning
pruning_amount = 0.2  # This will prune 20% of the neurons

# Apply structured pruning to the layers
prune.ln_structured(model.fc1, name="weight", amount=pruning_amount, n=1, dim=0)
prune.ln_structured(model.fc2, name="weight", amount=pruning_amount, n=1, dim=0)

# Optionally, make the pruning permanent
for module in [model.fc1, model.fc2]:
    prune.remove(module, 'weight')


In [47]:
pruned_model_path = "models/MLP_pruned_structured.pt"
torch.save(model.state_dict(), pruned_model_path)


In [48]:
criterion = nn.CrossEntropyLoss()
evaluate(model, test_loader, criterion)

Test set: Average loss: 2.129577203203992, Accuracy: 4016/26214 (15%)


In [49]:
cpu_usage, inference_time, _ = measure_cpu_utilization_and_run(compute_metrics_base, model, x_test_tensor, y_test_tensor, pruned_model_path)

print(f'CPU usage during inference: {cpu_usage:.2f}%')
print(f'Inference time: {inference_time:.4f} seconds')


Size of the model: 60.96 KB
Accuracy on the test set: 15.32%
CPU usage during inference: 17.65%
Inference time: 0.0060 seconds


In [54]:
import torch.optim as optim


# Loss and optimizer
criterion = nn.CrossEntropyLoss()

# Initialize the model and optimizer
optimizer = Minimizer(model.parameters(),
                      method='cg',
                      tol=1e-3,
                      max_iter=2000,
                      disp=2)

# Training
model.train()
# inputs = x_train_tensor.view(inputs.size(0), -1)  # Flatten the input

def closure():
    optimizer.zero_grad()
    output = model(x_train_tensor)
    loss = criterion(output, y_train_tensor)
    # loss.backward()  <-- do not call backward!
    return loss

loss = optimizer.step(closure)
print(f'Final training loss: {loss}')


initial fval: 1.9366
iter   1 - fval: 1.4805
iter   2 - fval: 1.4102
iter   3 - fval: 1.2358
iter   4 - fval: 1.1886
iter   5 - fval: 1.1464
iter   6 - fval: 1.0378
iter   7 - fval: 0.9896
iter   8 - fval: 0.9542
iter   9 - fval: 0.9380
iter  10 - fval: 0.9069
iter  11 - fval: 0.8674
iter  12 - fval: 0.8440
iter  13 - fval: 0.8299
iter  14 - fval: 0.8049
iter  15 - fval: 0.7894
iter  16 - fval: 0.7715
iter  17 - fval: 0.7462
iter  18 - fval: 0.7440
iter  19 - fval: 0.7334
iter  20 - fval: 0.7279
iter  21 - fval: 0.7216
iter  22 - fval: 0.7134
iter  23 - fval: 0.7067
iter  24 - fval: 0.7035
iter  25 - fval: 0.6893
iter  26 - fval: 0.6738
iter  27 - fval: 0.6674
iter  28 - fval: 0.6596
iter  29 - fval: 0.6564
iter  30 - fval: 0.6509
iter  31 - fval: 0.6467
iter  32 - fval: 0.6349
iter  33 - fval: 0.6216
iter  34 - fval: 0.6149
iter  35 - fval: 0.6115
iter  36 - fval: 0.6056
iter  37 - fval: 0.6015
iter  38 - fval: 0.5897
iter  39 - fval: 0.5841
iter  40 - fval: 0.5745
iter  41 - fval: 0.

In [55]:
pruned_model_path = "models/MLP_pruned_structured_finetuned.pt"
torch.save(model.state_dict(), pruned_model_path)

In [56]:
cpu_usage, inference_time, _ = measure_cpu_utilization_and_run(compute_metrics_base, model, x_test_tensor, y_test_tensor, pruned_model_path)

print(f'CPU usage during inference: {cpu_usage:.2f}%')
print(f'Inference time: {inference_time:.4f} seconds')


Size of the model: 61.12 KB
Accuracy on the test set: 90.88%
CPU usage during inference: 1.00%
Inference time: 0.0045 seconds


In [57]:
class MyMLP(nn.Module):
    def __init__(self, input_size, num_classes=6):
        super(MyMLP, self).__init__()
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(input_size, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, num_classes)
        # Initialize a dictionary to hold the sum of activations
        self.activations = {'fc1': 0, 'fc2': 0}

    def forward(self, x):
        x = self.flatten(x)
        x = F.relu(self.fc1(x))
        # Sum the absolute values of the activations for fc1
        self.activations['fc1'] += x.abs().sum(dim=0)
        x = F.relu(self.fc2(x))
        # Sum the absolute values of the activations for fc2
        self.activations['fc2'] += x.abs().sum(dim=0)
        return self.fc3(x)


In [58]:
# Initialize the model
pruned_model = MyMLP(input_size)

# Pass training data through the model to record activations
pruned_model.eval()  # Set to eval mode if you don't need to track gradients
with torch.no_grad():
    for inputs, _ in train_loader:
        pruned_model(inputs)


In [59]:
def prune_activations(model, layer_name, amount):
    # Get the weight tensor
    weight = getattr(model, layer_name).weight.data

    # Compute the sum of absolute activations for each output neuron
    activation = model.activations[layer_name]

    # Calculate the threshold for pruning
    threshold = torch.quantile(activation, amount)

    # Generate a mask where entries are 1 if above the threshold and 0 otherwise
    mask = activation.ge(threshold).float()

    # Reshape the mask to match the dimensions of the weight tensor
    mask = mask.unsqueeze(1).expand_as(weight)

    # Apply the custom pruning mask
    prune.custom_from_mask(getattr(model, layer_name), name='weight', mask=mask)


In [60]:
# Apply activation-based pruning
prune_activations(pruned_model, 'fc1', amount=0.2)
prune_activations(pruned_model, 'fc2', amount=0.2)

# Make pruning permanent
for layer_name in ['fc1', 'fc2']:
    prune.remove(getattr(pruned_model, layer_name), 'weight')


In [61]:
pruned_model_path = "models/MLP_pruned_activation.pt"
torch.save(model.state_dict(), pruned_model_path)


In [62]:
cpu_usage, inference_time, _ = measure_cpu_utilization_and_run(compute_metrics_base, pruned_model, x_test_tensor, y_test_tensor, pruned_model_path)

print(f'CPU usage during inference: {cpu_usage:.2f}%')
print(f'Inference time: {inference_time:.4f} seconds')


Size of the model: 60.96 KB
Accuracy on the test set: 22.60%
CPU usage during inference: 0.40%
Inference time: 0.0072 seconds


In [63]:
model.eval()


MyMLP(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (fc1): Linear(in_features=48, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=64, bias=True)
  (fc3): Linear(in_features=64, out_features=6, bias=True)
)

In [64]:
dummy_input = torch.randn(1, input_size)

In [65]:
onnx_model_path = "models/MLP_model.onnx"
torch.onnx.export(model, dummy_input, onnx_model_path, opset_version=11)