In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
from torch.optim import Adam
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
from numpy import genfromtxt
from sklearn.preprocessing import LabelEncoder

In [2]:
# Load Data
x = genfromtxt('../Data/WISDM_x.csv', delimiter=',')
y_df = pd.read_csv('../Data/WISDM_y.csv')
y = y_df.values.flatten()  # Flatten if y is 2D

# Encode labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Function to create time series dataset
def create_series(x, y, timestep, overlap):
    slide_step = int(timestep * (1 - overlap))
    data_num = int((len(x) / slide_step) - 1)
    dataset = np.ndarray(shape=(data_num, timestep, x.shape[1]))
    labels = []

    for i in range(data_num):
        labels.append(y[slide_step * (i + 1) - 1])
        for j in range(timestep):
            dataset[i, j, :] = x[slide_step * i + j, :]

    return dataset, np.array(labels)

# Create time series
timestep = 16  # Replace with your value
overlap = 0.5  # Replace with your value
X_series, y_series = create_series(x, y_encoded, timestep, overlap)

In [3]:
# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X_series, y_series, test_size=0.2, random_state=42)

# Convert to PyTorch tensors
x_train_tensor = torch.tensor(X_train, dtype=torch.float32)
x_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)



In [4]:
print(f'X_train.shape:{X_train.shape}, X_test.shape:{X_test.shape}, y_train.shape:{y_train.shape}, y_test.shape:{y_test.shape}')

X_train.shape:(109820, 16, 3), X_test.shape:(27455, 16, 3), y_train.shape:(109820,), y_test.shape:(27455,)


In [5]:
# Define the MLP model
class MyMLP(nn.Module):
    def __init__(self, input_size, num_classes=6):
        super(MyMLP, self).__init__()
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(input_size, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, num_classes)

    def forward(self, x):
        x = self.flatten(x)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        return self.fc3(x)

# Model Initialization
input_size = timestep * X_series.shape[2]  # Calculate input size
model = MyMLP(input_size)

# DataLoader
train_dataset = TensorDataset(x_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)



In [6]:
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = Adam(model.parameters(), lr=0.001)

# Training function
def train(model, train_loader, criterion, optimizer, epochs=100):
    model.train()
    for epoch in range(epochs):
        total_loss = 0
        for data, target in train_loader:
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        print(f'Epoch {epoch}, Loss: {total_loss / len(train_loader)}')

# Train the model
train(model, train_loader, criterion, optimizer)

#
model_path = "/Users/sandeep/Desktop/BUCourses/Project/saved_models/Pytorch/MLP_base.pth"
torch.save(model.state_dict(), model_path)


Epoch 0, Loss: 0.5036703805875528
Epoch 1, Loss: 0.3736385640694753
Epoch 2, Loss: 0.3213703603264474
Epoch 3, Loss: 0.2899197906663854
Epoch 4, Loss: 0.26723053082954007
Epoch 5, Loss: 0.25139076860582493
Epoch 6, Loss: 0.23830647413599323
Epoch 7, Loss: 0.22708436165282847
Epoch 8, Loss: 0.21918042258322587
Epoch 9, Loss: 0.21003089617234427
Epoch 10, Loss: 0.20360947032795573
Epoch 11, Loss: 0.19722458829404244
Epoch 12, Loss: 0.19203124335361318
Epoch 13, Loss: 0.18720296161718197
Epoch 14, Loss: 0.18219791885925618
Epoch 15, Loss: 0.17841651994031626
Epoch 16, Loss: 0.17403507972622426
Epoch 17, Loss: 0.16986562935895957
Epoch 18, Loss: 0.16700676376095566
Epoch 19, Loss: 0.16328140604689648
Epoch 20, Loss: 0.1612280957696043
Epoch 21, Loss: 0.15745123630142085
Epoch 22, Loss: 0.1557973129609665
Epoch 23, Loss: 0.1525420074173117
Epoch 24, Loss: 0.15005502231106474
Epoch 25, Loss: 0.14734081920813813
Epoch 26, Loss: 0.14468327838284917
Epoch 27, Loss: 0.1431623719445927
Epoch 28, 

In [7]:
def evaluate(model, test_loader, criterion):
    model.eval()
    total_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            output = model(data)
            loss = criterion(output, target)
            total_loss += loss.item()
            pred = output.data.max(1, keepdim=True)[1]
            correct += pred.eq(target.data.view_as(pred)).sum()
    accuracy = 100. * correct / len(test_loader.dataset)
    print(f'Test set: Average loss: {total_loss / len(test_loader)}, Accuracy: {correct}/{len(test_loader.dataset)} ({accuracy:.0f}%)')

# DataLoader for test set
test_dataset = TensorDataset(x_test_tensor, y_test_tensor)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=True)

# Evaluate the model
evaluate(model, test_loader, criterion)


Test set: Average loss: 0.4956688749015609, Accuracy: 25021/27455 (91%)


In [8]:
model

MyMLP(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (fc1): Linear(in_features=48, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=64, bias=True)
  (fc3): Linear(in_features=64, out_features=6, bias=True)
)

In [9]:
import torch
from sklearn.metrics import accuracy_score
import numpy as np
import time
import psutil
from pathlib import Path

def compute_metrics_base(model, x_test, y_test, model_path):
    """
    Compute the accuracy of the PyTorch model.

    :param model: PyTorch model.
    :param x_test: Test dataset features (as a PyTorch Tensor).
    :param y_test: Test dataset labels (as a NumPy array).
    :param model_dir: Directory where the PyTorch model files are stored.
    :return: None
    """

    model.eval()
    with torch.no_grad():
        # Get the model's predictions
        outputs = model(x_test)
        _, predicted_labels = torch.max(outputs, 1)

        # Convert y_test to tensor if it's not already
        true_labels = torch.tensor(y_test) if not isinstance(y_test, torch.Tensor) else y_test
        true_labels = true_labels.squeeze()  # Remove unnecessary dimensions

    model_file = Path(model_path)

    # Size in bytes
    model_size_bytes = model_file.stat().st_size

    # Convert size to kilobytes (optional)
    model_size_kb = model_size_bytes / 1024
    print(f"Size of the model: {model_size_kb:.2f} KB")

    # Compute accuracy
    accuracy = accuracy_score(true_labels.numpy(), predicted_labels.numpy())
    print(f'Accuracy on the test set: {accuracy:.2%}')


In [10]:
def measure_cpu_utilization_and_run(func, *args, **kwargs):
    """
    Measure CPU utilization while running a function.

    Parameters:
        func (function): The function to be executed.
        *args: Arguments to be passed to func.
        **kwargs: Keyword arguments to be passed to func.

    Returns:
        float: CPU utilization percentage during the execution of func.
        float: The elapsed time during the execution of func.
        any: The result of func execution.
    """
    
    # Measure CPU utilization before execution
    cpu_percent_before = psutil.cpu_percent(interval=None)

    # Record the start time
    start_time = time.time()

    # Execute the function and store its result
    result = func(*args, **kwargs)

    # Record the end time
    end_time = time.time()

    # Measure CPU utilization after execution
    cpu_percent_after = psutil.cpu_percent(interval=None)

    # Calculate elapsed time and average CPU utilization
    elapsed_time = end_time - start_time
    average_cpu_utilization = (cpu_percent_before + cpu_percent_after) / 2

    return average_cpu_utilization, elapsed_time, result



In [11]:
# Measure CPU usage and inference time
cpu_usage, inference_time, _ = measure_cpu_utilization_and_run(compute_metrics_base, model, x_test_tensor, y_test_tensor, model_path)

print(f'CPU usage during inference: {cpu_usage:.2f}%')
print(f'Inference time: {inference_time:.4f} seconds')


Size of the model: 60.83 KB
Accuracy on the test set: 91.13%
CPU usage during inference: 32.50%
Inference time: 0.0084 seconds


In [12]:
torch.backends.quantized.engine = 'qnnpack'

quantized_model = torch.quantization.quantize_dynamic(
    model,  # the original model
    {nn.Linear},  # a set of layers to dynamically quantize
    dtype=torch.qint8)  # the target dtype for quantized weights


In [13]:
quantized_model_path = "/Users/sandeep/Desktop/BUCourses/Project/saved_models/Pytorch/MLP_Quantized.pth"
torch.save(quantized_model.state_dict(), quantized_model_path)


In [15]:
# Measure CPU usage and inference time
cpu_usage, inference_time, _ = measure_cpu_utilization_and_run(compute_metrics_base, quantized_model, x_test_tensor, y_test_tensor, quantized_model_path)

print(f'CPU usage during inference: {cpu_usage:.2f}%')
print(f'Inference time: {inference_time:.4f} seconds')


Size of the model: 19.58 KB
Accuracy on the test set: 91.08%
CPU usage during inference: 40.55%
Inference time: 0.0298 seconds


In [16]:
def print_sample_predictions(model, x_test, y_test, num_samples=5):
    model.eval()  # Set the model to evaluation mode
    with torch.no_grad():
        # Predict on the test set
        outputs = model(x_test)
        _, predicted = torch.max(outputs, 1)

        print("Sample predictions:\n")
        for i in range(num_samples):
            print(f"x_test[{i}]: {x_test[i]}")
            print(f"Actual label (y_test[{i}]): {y_test[i]}")
            print(f"Predicted label: {predicted[i]}")
            print("\n")


In [16]:
# Assuming you're using the first num_samples of x_test and y_test
#num_samples = 5
#print_sample_predictions(model, x_test_tensor[:num_samples], y_test_tensor[:num_samples], num_samples=5)


scikit-learn version 1.2.2 is not supported. Minimum required version: 0.17. Maximum required version: 1.1.2. Disabling scikit-learn conversion API.
XGBoost version 1.7.6 has not been tested with coremltools. You may run into unexpected errors. XGBoost 1.4.2 is the most recent version that has been tested.
TensorFlow version 2.15.0 has not been tested with coremltools. You may run into unexpected errors. TensorFlow 2.12.0 is the most recent version that has been tested.


ModuleNotFoundError: No module named 'coremltools.converters.pytorch'

### Static Quantization - Overall

In [17]:
import torch
from torch import nn
import torch.nn.functional as F
from torch.quantization import QuantStub, DeQuantStub

class QuantizedMLP(nn.Module):
    def __init__(self, input_size, num_classes=6):
        super(QuantizedMLP, self).__init__()
        self.quant = QuantStub()
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(input_size, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, num_classes)
        self.dequant = DeQuantStub()

    def forward(self, x):
        x = self.quant(x)
        x = self.flatten(x)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        x = self.dequant(x)
        return x

# Instantiate the model
model_fp32 = MyMLP(input_size)
model_fp32.load_state_dict(torch.load(model_path))
model_fp32.eval()

# Define a quantization configuration
model_int8 = QuantizedMLP(input_size)
model_int8.eval()

# Specify the quantization configuration
model_int8.qconfig = torch.quantization.get_default_qconfig('qnnpack')

# Prepare the model for static quantization
torch.quantization.prepare(model_int8, inplace=True)

# Calibrate the model with representative data
# Assuming the train_loader is representative of the data distribution
for data, _ in train_loader:
    model_int8(data)

# Convert to a quantized model
torch.quantization.convert(model_int8, inplace=True)

# Evaluate the quantized model
evaluate(model_int8, test_loader, criterion)

Test set: Average loss: 2.326750990394112, Accuracy: 3509/27455 (13%)


In [18]:
# Define the path where you want to save the quantized model
static_quantized_model_path = "/Users/sandeep/Desktop/BUCourses/Project/saved_models/Pytorch/MLP_Static_Quantized.pth"

# Save the state dictionary of the quantized model
torch.save(model_int8.state_dict(), static_quantized_model_path)

print(f"Quantized model saved to {static_quantized_model_path}")


Quantized model saved to /Users/sandeep/Desktop/BUCourses/Project/saved_models/Pytorch/MLP_Static_Quantized.pth


In [19]:
# Measure CPU usage and inference time
cpu_usage, inference_time, _ = measure_cpu_utilization_and_run(compute_metrics_base, model_int8, x_test_tensor, y_test_tensor, static_quantized_model_path)

print(f'CPU usage during inference: {cpu_usage:.2f}%')
print(f'Inference time: {inference_time:.4f} seconds')


Size of the model: 20.27 KB
Accuracy on the test set: 12.78%
CPU usage during inference: 46.55%
Inference time: 0.0049 seconds


### Static Quantization - Per Channel


In [20]:
import torch
from torch import nn
import torch.nn.functional as F
from torch.quantization import QuantStub, DeQuantStub, default_per_channel_qconfig

class QuantizedMLP(nn.Module):
    def __init__(self, input_size, num_classes=6):
        super(QuantizedMLP, self).__init__()
        self.quant = QuantStub()
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(input_size, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, num_classes)
        self.dequant = DeQuantStub()

    def forward(self, x):
        x = self.quant(x)
        x = self.flatten(x)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        x = self.dequant(x)
        return x

# Instantiate the model
model_fp32 = MyMLP(input_size)
model_fp32.load_state_dict(torch.load(model_path))
model_fp32.eval()

# Define a quantization configuration
model_int8_pc = QuantizedMLP(input_size)
model_int8_pc.eval()

# Specify the quantization configuration to use per-channel weight quantization
model_int8_pc.qconfig = torch.quantization.get_default_qconfig('qnnpack')
# Set the model configuration to use per-channel quantization
model_int8_pc.fc1.qconfig = default_per_channel_qconfig
model_int8_pc.fc2.qconfig = default_per_channel_qconfig
# For the output layer, you might want to use per-tensor quantization
model_int8_pc.fc3.qconfig = torch.quantization.default_qconfig

# Prepare the model for static quantization
torch.quantization.prepare(model_int8_pc, inplace=True)

# Calibrate the model with representative data
# Assuming the train_loader is representative of the data distribution
for data, _ in train_loader:
    model_int8_pc(data)

# Convert to a quantized model
torch.quantization.convert(model_int8_pc, inplace=True)

# Save the quantized model
quantized_model_path = "/Users/sandeep/Desktop/BUCourses/Project/saved_models/Pytorch/MLP_Static_Quantized_perChannel.pth"
torch.save(model_int8_pc.state_dict(), quantized_model_path)

# Evaluate the quantized model
evaluate(model_int8_pc, test_loader, criterion)


Test set: Average loss: 1.8900043008488654, Accuracy: 10407/27455 (38%)


In [21]:
# Measure CPU usage and inference time
cpu_usage, inference_time, _ = measure_cpu_utilization_and_run(compute_metrics_base, model_int8_pc, x_test_tensor, y_test_tensor, quantized_model_path)

print(f'CPU usage during inference: {cpu_usage:.2f}%')
print(f'Inference time: {inference_time:.4f} seconds')


Size of the model: 24.61 KB
Accuracy on the test set: 37.91%
CPU usage during inference: 36.15%
Inference time: 0.0051 seconds


### Quantization aware training

In [22]:
import torch
from torch import nn, optim
from torch.utils.data import DataLoader, TensorDataset
import torch.nn.functional as F
from sklearn.metrics import accuracy_score

# Define the model architecture for QAT
class MyMLPForQAT(nn.Module):
    def __init__(self, input_size, num_classes=6):
        super(MyMLPForQAT, self).__init__()
        self.quant = torch.quantization.QuantStub()
        self.fc1 = nn.Linear(input_size, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, num_classes)
        self.dequant = torch.quantization.DeQuantStub()

    def forward(self, x):
        x = self.quant(x)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        x = self.dequant(x)
        return x



In [23]:
# Assuming the correct input size and number of classes
input_size = 16 * 3  # 16 time steps with 3 features each
num_classes = 6  # Assuming 6 classes as per your data

# Instantiate and prepare the model for QAT
model_qat = MyMLPForQAT(input_size, num_classes)
model_qat.qconfig = torch.quantization.get_default_qat_qconfig('qnnpack')
model_qat.train()
model_prepared = torch.quantization.prepare_qat(model_qat, inplace=True)

# Define the optimizer and loss function
optimizer = optim.Adam(model_prepared.parameters(), lr=0.00001)
criterion = nn.CrossEntropyLoss()

# Fine-tuning loop for QAT
num_fine_tune_epochs = 10
model_prepared.train()
for epoch in range(num_fine_tune_epochs):
    for inputs, labels in train_loader:
        inputs = inputs.view(inputs.size(0), -1)  # Flatten the input
        optimizer.zero_grad()
        outputs = model_prepared(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
    print(f'Epoch {epoch}: Loss {loss.item()}')

Epoch 0: Loss 1.0491827726364136
Epoch 1: Loss 0.8117403388023376
Epoch 2: Loss 0.9130408763885498
Epoch 3: Loss 0.7163098454475403
Epoch 4: Loss 0.6618160605430603
Epoch 5: Loss 0.6053312420845032
Epoch 6: Loss 0.42409414052963257
Epoch 7: Loss 0.5860022902488708
Epoch 8: Loss 0.6115912199020386
Epoch 9: Loss 0.6875667572021484


In [24]:
model_prepared.eval()
# Convert the QAT model to a fully quantized model
qat_model = torch.quantization.convert(model, inplace=False)

# Save the fine-tuned quantized model
qat_model_path = "/Users/sandeep/Desktop/BUCourses/Project/saved_models/Pytorch/MLP_QAT_v2.pth"
torch.save(qat_model.state_dict(), qat_model_path)

In [25]:
#Load the QAT model from the saved file
qat_model_saved = MyMLPForQAT(input_size, num_classes)
state_dict = torch.load(model_path)
qat_model_saved.load_state_dict(state_dict)
qat_model_saved.to('cpu')

MyMLPForQAT(
  (quant): QuantStub()
  (fc1): Linear(in_features=48, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=64, bias=True)
  (fc3): Linear(in_features=64, out_features=6, bias=True)
  (dequant): DeQuantStub()
)

In [26]:
# Prepare the model for evaluation
qat_model_saved.eval()

# Define the test dataset and dataloader
test_dataset = TensorDataset(x_test_tensor, y_test_tensor)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# Evaluate the model on the test dataset
correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in test_loader:
        inputs = inputs.view(inputs.size(0), -1)  # Flatten the input
        outputs = qat_model_saved(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100.0 * correct / total
print(f'Accuracy: {accuracy:.2f}%')

Accuracy: 90.92%


In [27]:
import torch
from sklearn.metrics import accuracy_score
import numpy as np
import time
import psutil
from pathlib import Path

def compute_metrics_new(model, x_test, y_test, model_path):
    """
    Compute the accuracy of the PyTorch model.

    :param model: PyTorch model.
    :param x_test: Test dataset features (as a PyTorch Tensor).
    :param y_test: Test dataset labels (as a NumPy array).
    :param model_dir: Directory where the PyTorch model files are stored.
    :return: None
    """

    model.eval()
    test_dataset = TensorDataset(x_test, y_test)
    test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)
    
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs = inputs.view(inputs.size(0), -1)  # Flatten the input
            outputs = qat_model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        
    model_file = Path(model_path)
    # Size in bytes
    model_size_bytes = model_file.stat().st_size

    # Convert size to kilobytes (optional)
    model_size_kb = model_size_bytes / 1024
    print(f"Size of the model: {model_size_kb:.2f} KB")

    # Compute accuracy
    accuracy = correct / total
    print(f'Accuracy on the test set: {accuracy:.2%}')


In [28]:
# Measure CPU usage and inference time
cpu_usage, inference_time, _ = measure_cpu_utilization_and_run(compute_metrics_new, qat_model_saved, x_test_tensor, y_test_tensor, qat_model_path)

print(f'CPU usage during inference: {cpu_usage:.2f}%')
print(f'Inference time: {inference_time:.4f} seconds')


Size of the model: 60.85 KB
Accuracy on the test set: 90.92%
CPU usage during inference: 21.85%
Inference time: 0.0879 seconds


### Torch Pruning

In [29]:
model

MyMLP(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (fc1): Linear(in_features=48, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=64, bias=True)
  (fc3): Linear(in_features=64, out_features=6, bias=True)
)

In [30]:
import torch
import torch.nn as nn
import torch.nn.utils.prune as prune


# Assuming timestep and X_series.shape[2] are defined
# input_size = timestep * X_series.shape[2]
pruned_model = MyMLP(input_size)

# Apply pruning to a layer by specifying the percentage of connections to prune
prune.l1_unstructured(pruned_model.fc1, 'weight', amount=0.2)
prune.l1_unstructured(pruned_model.fc2, 'weight', amount=0.2)

# To make the pruning permanent, you might want to remove the reparametrization
for module in [pruned_model.fc1, pruned_model.fc2]:
    prune.remove(module, 'weight')


pruned_model_path = "/Users/sandeep/Desktop/BUCourses/Project/saved_models/Pytorch/MLP_pruned.pth"
torch.save(model.state_dict(), pruned_model_path)


In [31]:
criterion = nn.CrossEntropyLoss()
evaluate(pruned_model, test_loader, criterion)

Test set: Average loss: 2.0504767550177228, Accuracy: 2677/27455 (10%)


In [32]:
cpu_usage, inference_time, _ = measure_cpu_utilization_and_run(compute_metrics_base, pruned_model, x_test_tensor, y_test_tensor, pruned_model_path)

print(f'CPU usage during inference: {cpu_usage:.2f}%')
print(f'Inference time: {inference_time:.4f} seconds')


Size of the model: 60.85 KB
Accuracy on the test set: 9.75%
CPU usage during inference: 12.80%
Inference time: 0.0083 seconds


In [33]:
# Initialize the model
model = MyMLP(input_size)

# Define the amount of pruning
pruning_amount = 0.2  # This will prune 20% of the neurons

# Apply structured pruning to the layers
prune.ln_structured(model.fc1, name="weight", amount=pruning_amount, n=1, dim=0)
prune.ln_structured(model.fc2, name="weight", amount=pruning_amount, n=1, dim=0)

# Optionally, make the pruning permanent
for module in [model.fc1, model.fc2]:
    prune.remove(module, 'weight')


In [34]:
pruned_model_path = "/Users/sandeep/Desktop/BUCourses/Project/saved_models/Pytorch/MLP_pruned_structured.pth"
torch.save(model.state_dict(), pruned_model_path)


In [35]:
criterion = nn.CrossEntropyLoss()
evaluate(model, test_loader, criterion)

Test set: Average loss: 2.0075885318653843, Accuracy: 2826/27455 (10%)


In [36]:
cpu_usage, inference_time, _ = measure_cpu_utilization_and_run(compute_metrics_base, model, x_test_tensor, y_test_tensor, pruned_model_path)

print(f'CPU usage during inference: {cpu_usage:.2f}%')
print(f'Inference time: {inference_time:.4f} seconds')


Size of the model: 60.96 KB
Accuracy on the test set: 10.29%
CPU usage during inference: 0.00%
Inference time: 0.0064 seconds


In [37]:
import torch.optim as optim


# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
# Use a lower learning rate for fine-tuning
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Fine-tuning loop
num_epochs = 10  # Set the number of epochs
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader)}")

    # Validation
    model.eval()
    with torch.no_grad():
        correct = 0
        total = 0
        for inputs, labels in test_loader:
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print(f"Accuracy of the model on the validation set: {100 * correct / total}%")


Epoch [1/10], Loss: 0.5167649119727679
Accuracy of the model on the validation set: 84.09032963030414%
Epoch [2/10], Loss: 0.3852498353543607
Accuracy of the model on the validation set: 87.40848661446003%
Epoch [3/10], Loss: 0.3363587231807289
Accuracy of the model on the validation set: 88.72336550719359%
Epoch [4/10], Loss: 0.30365206413231527
Accuracy of the model on the validation set: 89.12037880167547%
Epoch [5/10], Loss: 0.2822448457949437
Accuracy of the model on the validation set: 89.4773265343289%
Epoch [6/10], Loss: 0.26496579775061363
Accuracy of the model on the validation set: 89.63030413403752%
Epoch [7/10], Loss: 0.25294134031877025
Accuracy of the model on the validation set: 90.38790748497541%
Epoch [8/10], Loss: 0.2416109860047594
Accuracy of the model on the validation set: 90.2094336186487%
Epoch [9/10], Loss: 0.2334713713962819
Accuracy of the model on the validation set: 90.42433072300128%
Epoch [10/10], Loss: 0.22536255340884268
Accuracy of the model on the va

In [38]:
pruned_model_path = "/Users/sandeep/Desktop/BUCourses/Project/saved_models/Pytorch/MLP_pruned_structured_finetuned.pth"
torch.save(model.state_dict(), pruned_model_path)


In [39]:
cpu_usage, inference_time, _ = measure_cpu_utilization_and_run(compute_metrics_base, model, x_test_tensor, y_test_tensor, pruned_model_path)

print(f'CPU usage during inference: {cpu_usage:.2f}%')
print(f'Inference time: {inference_time:.4f} seconds')


Size of the model: 61.12 KB
Accuracy on the test set: 90.97%
CPU usage during inference: 12.35%
Inference time: 0.0073 seconds


In [40]:
class MyMLP(nn.Module):
    def __init__(self, input_size, num_classes=6):
        super(MyMLP, self).__init__()
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(input_size, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, num_classes)
        # Initialize a dictionary to hold the sum of activations
        self.activations = {'fc1': 0, 'fc2': 0}

    def forward(self, x):
        x = self.flatten(x)
        x = F.relu(self.fc1(x))
        # Sum the absolute values of the activations for fc1
        self.activations['fc1'] += x.abs().sum(dim=0)
        x = F.relu(self.fc2(x))
        # Sum the absolute values of the activations for fc2
        self.activations['fc2'] += x.abs().sum(dim=0)
        return self.fc3(x)


In [41]:
# Initialize the model
pruned_model = MyMLP(input_size)

# Pass training data through the model to record activations
pruned_model.eval()  # Set to eval mode if you don't need to track gradients
with torch.no_grad():
    for inputs, _ in train_loader:
        pruned_model(inputs)


In [42]:
def prune_activations(model, layer_name, amount):
    # Get the weight tensor
    weight = getattr(model, layer_name).weight.data

    # Compute the sum of absolute activations for each output neuron
    activation = model.activations[layer_name]

    # Calculate the threshold for pruning
    threshold = torch.quantile(activation, amount)

    # Generate a mask where entries are 1 if above the threshold and 0 otherwise
    mask = activation.ge(threshold).float()

    # Reshape the mask to match the dimensions of the weight tensor
    mask = mask.unsqueeze(1).expand_as(weight)

    # Apply the custom pruning mask
    prune.custom_from_mask(getattr(model, layer_name), name='weight', mask=mask)


In [43]:
# Apply activation-based pruning
prune_activations(pruned_model, 'fc1', amount=0.2)
prune_activations(pruned_model, 'fc2', amount=0.2)

# Make pruning permanent
for layer_name in ['fc1', 'fc2']:
    prune.remove(getattr(pruned_model, layer_name), 'weight')


In [44]:
pruned_model_path = "/Users/sandeep/Desktop/BUCourses/Project/saved_models/Pytorch/MLP_pruned_activation.pth"
torch.save(model.state_dict(), pruned_model_path)


In [45]:
cpu_usage, inference_time, _ = measure_cpu_utilization_and_run(compute_metrics_base, pruned_model, x_test_tensor, y_test_tensor, pruned_model_path)

print(f'CPU usage during inference: {cpu_usage:.2f}%')
print(f'Inference time: {inference_time:.4f} seconds')


Size of the model: 60.96 KB
Accuracy on the test set: 13.09%
CPU usage during inference: 55.10%
Inference time: 0.0079 seconds


In [59]:
!pip3 install onnx

Collecting onnx
  Downloading onnx-1.15.0-cp311-cp311-macosx_10_12_universal2.whl (16.3 MB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m16.3/16.3 MB[0m [31m45.0 MB/s[0m eta [36m0:00:00[0mm eta [36m0:00:01[0m0:01[0m:01[0m
Installing collected packages: onnx
Successfully installed onnx-1.15.0


In [57]:
model.eval()


MyMLP(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (fc1): Linear(in_features=48, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=64, bias=True)
  (fc3): Linear(in_features=64, out_features=6, bias=True)
)

In [48]:
dummy_input = torch.randn(1, input_size)

In [60]:
onnx_model_path = "/Users/sandeep/Desktop/BUCourses/Project/saved_models/Pytorch/MLP_model.onnx"
torch.onnx.export(model, dummy_input, onnx_model_path, opset_version=11)
