### **Data Preprocessing**



In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
from torch.optim import Adam
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
from numpy import genfromtxt
from sklearn.preprocessing import LabelEncoder



In [2]:
# Load Data
x = genfromtxt('../Data/WISDM_x.csv', delimiter=',')
y_df = pd.read_csv('../Data/WISDM_y.csv')
y = y_df.values.flatten()  # Flatten if y is 2D

# Encode labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Function to create time series dataset
def create_series(x, y, timestep, overlap):
    slide_step = int(timestep * (1 - overlap))
    data_num = int((len(x) / slide_step) - 1)
    dataset = np.ndarray(shape=(data_num, timestep, x.shape[1]))
    labels = []

    for i in range(data_num):
        labels.append(y[slide_step * (i + 1) - 1])
        for j in range(timestep):
            dataset[i, j, :] = x[slide_step * i + j, :]

    return dataset, np.array(labels)

# Create time series
timestep = 16  # Replace with your value
overlap = 0.5  # Replace with your value
X_series, y_series = create_series(x, y_encoded, timestep, overlap)

In [3]:
# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X_series, y_series, test_size=0.2, random_state=42)

# Convert to PyTorch tensors
x_train_tensor = torch.tensor(X_train, dtype=torch.float32)
x_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)



In [4]:
class LSTMNet(nn.Module):
    def __init__(self, input_size, hidden_size1, hidden_size2, output_size, n_steps):
        super(LSTMNet, self).__init__()
        self.hidden_size1 = hidden_size1
        self.hidden_size2 = hidden_size2
        self.lstm1 = nn.LSTM(input_size, hidden_size1, batch_first=True)
        self.dropout1 = nn.Dropout(0.2)
        self.lstm2 = nn.LSTM(hidden_size1, hidden_size2, batch_first=True)
        self.dropout2 = nn.Dropout(0.2)
        self.fc = nn.Linear(hidden_size2, output_size)

    def forward(self, x):
        # Initialize hidden state and cell state
        h0 = torch.zeros(1, x.size(0), self.hidden_size1).to(x.device)
        c0 = torch.zeros(1, x.size(0), self.hidden_size1).to(x.device)
        
        # First LSTM layer
        out, _ = self.lstm1(x, (h0, c0))
        out = self.dropout1(out)
        
        # Second LSTM layer
        h1 = torch.zeros(1, x.size(0), self.hidden_size2).to(x.device)
        c1 = torch.zeros(1, x.size(0), self.hidden_size2).to(x.device)
        out, _ = self.lstm2(out, (h1, c1))
        out = self.dropout2(out)

        # Dense layer
        out = self.fc(out[:, -1, :]) # Taking the last time step
        return out


In [5]:
# Parameters
input_size = 3  # Number of features
hidden_size1 = 50
hidden_size2 = 25
output_size = 6
n_steps = 16

# Create the model
model = LSTMNet(input_size, hidden_size1, hidden_size2, output_size, n_steps)

# Training setup (for demonstration)
# Define your dataset here
train_dataset = TensorDataset(torch.tensor(X_train, dtype=torch.float32), torch.tensor(y_train, dtype=torch.int64))
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)


In [6]:
def train(model, train_loader, loss_fn, optimizer, epochs=0):
    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0

        for X_batch, y_batch in train_loader:
            optimizer.zero_grad()
            y_pred = model(X_batch)
            loss = loss_fn(y_pred, y_batch)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, predicted = torch.max(y_pred.data, 1)
            total += y_batch.size(0)
            correct += (predicted == y_batch).sum().item()

        avg_loss = running_loss / len(train_loader)
        accuracy = 100 * correct / total
        print(f"Epoch [{epoch+1}/{epochs}], Loss: {avg_loss:.4f}, Accuracy: {accuracy:.2f}%")

train(model, train_loader, loss_fn, optimizer, epochs=20)


Epoch [1/20], Loss: 0.5845, Accuracy: 79.90%
Epoch [2/20], Loss: 0.3519, Accuracy: 87.94%
Epoch [3/20], Loss: 0.2904, Accuracy: 90.28%
Epoch [4/20], Loss: 0.2530, Accuracy: 91.67%
Epoch [5/20], Loss: 0.2307, Accuracy: 92.40%
Epoch [6/20], Loss: 0.2124, Accuracy: 93.07%
Epoch [7/20], Loss: 0.1992, Accuracy: 93.51%
Epoch [8/20], Loss: 0.1887, Accuracy: 93.86%
Epoch [9/20], Loss: 0.1813, Accuracy: 94.13%
Epoch [10/20], Loss: 0.1718, Accuracy: 94.45%
Epoch [11/20], Loss: 0.1644, Accuracy: 94.70%
Epoch [12/20], Loss: 0.1601, Accuracy: 94.82%
Epoch [13/20], Loss: 0.1561, Accuracy: 95.04%
Epoch [14/20], Loss: 0.1497, Accuracy: 95.17%
Epoch [15/20], Loss: 0.1452, Accuracy: 95.31%
Epoch [16/20], Loss: 0.1414, Accuracy: 95.41%
Epoch [17/20], Loss: 0.1378, Accuracy: 95.55%
Epoch [18/20], Loss: 0.1337, Accuracy: 95.67%
Epoch [19/20], Loss: 0.1304, Accuracy: 95.78%
Epoch [20/20], Loss: 0.1272, Accuracy: 95.88%


In [7]:
test_dataset = TensorDataset(torch.tensor(X_test, dtype=torch.float32), torch.tensor(y_test, dtype=torch.int64))
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)
def evaluate(model, test_loader, loss_fn):
    model.eval()
    total_loss = 0
    correct = 0
    total = 0
    with torch.no_grad():
        for X_batch, y_batch in test_loader:
            y_pred = model(X_batch)
            total_loss += loss_fn(y_pred, y_batch).item()
            _, predicted = torch.max(y_pred.data, 1)
            total += y_batch.size(0)
            correct += (predicted == y_batch).sum().item()

    avg_loss = total_loss / len(test_loader)
    accuracy = correct / total
    return avg_loss, accuracy

avg_loss, accuracy = evaluate(model, test_loader, loss_fn)
print(f"Test Loss: {avg_loss:.4f}, Test Accuracy: {accuracy:.4f}")



Test Loss: 0.1521, Test Accuracy: 0.9508


In [8]:
model_path = "/Users/sandeep/Desktop/BUCourses/Project/saved_models/Pytorch/lstm_base.pth"
torch.save(model.state_dict(), model_path)

In [9]:
def measure_cpu_utilization_and_run(func, *args, **kwargs):
    """
    Measure CPU utilization while running a function.

    Parameters:
        func (function): The function to be executed.
        *args: Arguments to be passed to func.
        **kwargs: Keyword arguments to be passed to func.

    Returns:
        float: CPU utilization percentage during the execution of func.
        float: The elapsed time during the execution of func.
        any: The result of func execution.
    """
    
    # Measure CPU utilization before execution
    cpu_percent_before = psutil.cpu_percent(interval=None)

    # Record the start time
    start_time = time.time()

    # Execute the function and store its result
    result = func(*args, **kwargs)

    # Record the end time
    end_time = time.time()

    # Measure CPU utilization after execution
    cpu_percent_after = psutil.cpu_percent(interval=None)

    # Calculate elapsed time and average CPU utilization
    elapsed_time = end_time - start_time
    average_cpu_utilization = (cpu_percent_before + cpu_percent_after) / 2

    return average_cpu_utilization, elapsed_time, result


In [10]:
import torch
from sklearn.metrics import accuracy_score
import numpy as np
import time
import psutil
from pathlib import Path

def compute_metrics_base(model, x_test, y_test, model_path):
    """
    Compute the accuracy of the PyTorch model.

    :param model: PyTorch model.
    :param x_test: Test dataset features (as a PyTorch Tensor).
    :param y_test: Test dataset labels (as a NumPy array).
    :param model_dir: Directory where the PyTorch model files are stored.
    :return: None
    """

    model.eval()
    with torch.no_grad():
        # Get the model's predictions
        outputs = model(x_test)
        _, predicted_labels = torch.max(outputs, 1)

        # Convert y_test to tensor if it's not already
        true_labels = torch.tensor(y_test) if not isinstance(y_test, torch.Tensor) else y_test
        true_labels = true_labels.squeeze()  # Remove unnecessary dimensions

    model_file = Path(model_path)

    # Size in bytes
    model_size_bytes = model_file.stat().st_size

    # Convert size to kilobytes (optional)
    model_size_kb = model_size_bytes / 1024
    print(f"Size of the model: {model_size_kb:.2f} KB")

    # Compute accuracy
    accuracy = accuracy_score(true_labels.numpy(), predicted_labels.numpy())
    print(f'Accuracy on the test set: {accuracy:.2%}')


In [11]:
# Measure CPU usage and inference time
cpu_usage, inference_time, _ = measure_cpu_utilization_and_run(compute_metrics_base, model, x_test_tensor, y_test_tensor, model_path)

print(f'CPU usage during inference: {cpu_usage:.2f}%')
print(f'Inference time: {inference_time:.4f} seconds')


Size of the model: 77.08 KB
Accuracy on the test set: 95.08%
CPU usage during inference: 35.15%
Inference time: 0.4486 seconds


In [27]:
torch.backends.quantized.engine = 'qnnpack'

quantized_model = torch.quantization.quantize_dynamic(
    model,  # the original model
    {nn.Linear,nn.LSTM},  # a set of layers to dynamically quantize
    dtype=torch.qint8)  # the target dtype for quantized weights


In [28]:
quantized_model_path = "/Users/sandeep/Desktop/BUCourses/Project/saved_models/Pytorch/lstm_Quantized.pth"
torch.save(quantized_model.state_dict(), quantized_model_path)


In [29]:
# Measure CPU usage and inference time
cpu_usage, inference_time, _ = measure_cpu_utilization_and_run(compute_metrics_base, quantized_model, x_test_tensor, y_test_tensor, quantized_model_path)

print(f'CPU usage during inference: {cpu_usage:.2f}%')
print(f'Inference time: {inference_time:.4f} seconds')


Size of the model: 28.36 KB
Accuracy on the test set: 95.05%
CPU usage during inference: 40.10%
Inference time: 0.3964 seconds


In [32]:
def print_sample_predictions(model, x_test, y_test, num_samples=5):
    model.eval()  # Set the model to evaluation mode
    with torch.no_grad():
        # Predict on the test set
        outputs = model(x_test)
        _, predicted = torch.max(outputs, 1)

        print("Sample predictions:\n")
        for i in range(num_samples):
            print(f"x_test[{i}]: {x_test[i]}")
            print(f"Actual label (y_test[{i}]): {y_test[i]}")
            print(f"Predicted label: {predicted[i]}")
            print("\n")


In [33]:
# Assuming you're using the first num_samples of x_test and y_test
#num_samples = 5
#print_sample_predictions(model, x_test_tensor[:num_samples], y_test_tensor[:num_samples], num_samples=5)

### Static Quantization

### Quantization aware training


In [17]:
import torch
from torch import nn
import torch.quantization

class LSTMNetQAT(nn.Module):
    def __init__(self, input_size, hidden_size1, hidden_size2, output_size, n_steps):
        super(LSTMNetQAT, self).__init__()
        self.quant = torch.quantization.QuantStub()
        self.hidden_size1 = hidden_size1
        self.hidden_size2 = hidden_size2
        self.lstm1 = nn.LSTM(input_size, hidden_size1, batch_first=True)
        self.dropout1 = nn.Dropout(0.2)
        self.lstm2 = nn.LSTM(hidden_size1, hidden_size2, batch_first=True)
        self.dropout2 = nn.Dropout(0.2)
        self.fc = nn.Linear(hidden_size2, output_size)
        self.dequant = torch.quantization.DeQuantStub()

    def forward(self, x):
        x = x.to('cpu')
        x = self.quant(x)
        h0 = torch.zeros(1, x.size(0), self.hidden_size1).to(x.device)
        c0 = torch.zeros(1, x.size(0), self.hidden_size1).to(x.device)
        out, _ = self.lstm1(x, (h0, c0))
        out = self.dropout1(out)
        h1 = torch.zeros(1, x.size(0), self.hidden_size2).to(x.device)
        c1 = torch.zeros(1, x.size(0), self.hidden_size2).to(x.device)
        out, _ = self.lstm2(out, (h1, c1))
        out = self.dropout2(out)
        out = self.fc(out[:, -1, :])
        out = self.dequant(out)
        return out


In [18]:
import torch.optim as optim
from torch.nn import CrossEntropyLoss


# Parameters
input_size = 3  # Number of features
hidden_size1 = 50
hidden_size2 = 25
output_size = 6
n_steps = 16

# Create the model
model_qat = LSTMNetQAT(input_size, hidden_size1, hidden_size2, output_size, n_steps).to('cpu')
model_qat

LSTMNetQAT(
  (quant): QuantStub()
  (lstm1): LSTM(3, 50, batch_first=True)
  (dropout1): Dropout(p=0.2, inplace=False)
  (lstm2): LSTM(50, 25, batch_first=True)
  (dropout2): Dropout(p=0.2, inplace=False)
  (fc): Linear(in_features=25, out_features=6, bias=True)
  (dequant): DeQuantStub()
)

In [19]:

model_qat.qconfig = torch.quantization.get_default_qat_qconfig('qnnpack')
model_qat.train()
model_prepared = torch.quantization.prepare_qat(model_qat, inplace=False).to('cpu')


In [20]:
train_dataset = TensorDataset(torch.tensor(X_train, dtype=torch.float32), torch.tensor(y_train, dtype=torch.int64))
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

# Define the optimizer and loss function
optimizer = optim.Adam(model_prepared.parameters(), lr=0.0001)
criterion = CrossEntropyLoss()

# Fine-tuning loop for QAT
num_fine_tune_epochs = 1
model_prepared.train()
for epoch in range(num_fine_tune_epochs):
    for inputs, labels in train_loader:
        inputs, labels = inputs.to('cpu'), labels.to('cpu')
        optimizer.zero_grad()
        outputs = model_prepared(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
    print(f'Epoch {epoch}: Loss {loss.item()}')


Epoch 0: Loss 0.6909666061401367


In [21]:
# Make sure to switch to eval mode
model_prepared.eval()
# Convert to a fully quantized model
quantized_model = torch.quantization.convert(model_prepared, inplace=False).to('cpu')
# Path to save the quantized model
qat_model_path = "/Users/sandeep/Desktop/BUCourses/Project/saved_models/Pytorch/LSTM_QAT.pth"

# Save the model state dictionary
torch.save(quantized_model.state_dict(), qat_model_path)


In [22]:
quantized_model

LSTMNetQAT(
  (quant): Quantize(scale=tensor([0.1551]), zero_point=tensor([126]), dtype=torch.quint8)
  (lstm1): QuantizedLSTM(
    (layers): ModuleList(
      (0): _LSTMLayer(
        (layer_fw): _LSTMSingleLayer(
          (cell): QuantizableLSTMCell(
            (igates): QuantizedLinear(in_features=3, out_features=200, scale=0.04487486928701401, zero_point=119, qscheme=torch.per_tensor_affine)
            (hgates): QuantizedLinear(in_features=50, out_features=200, scale=0.01590183563530445, zero_point=110, qscheme=torch.per_tensor_affine)
            (gates): QFunctional(
              scale=0.04775191470980644, zero_point=107
              (activation_post_process): Identity()
            )
            (input_gate): Sigmoid()
            (forget_gate): Sigmoid()
            (cell_gate): Tanh()
            (output_gate): Sigmoid()
            (fgate_cx): QFunctional(
              scale=0.020394695922732353, zero_point=138
              (activation_post_process): Identity()
       

In [23]:
# Assuming test_loader is defined and loaded with your test data
test_dataset = TensorDataset(torch.tensor(X_test, dtype=torch.float32), torch.tensor(y_test, dtype=torch.int64))
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to('cpu'), labels.to('cpu')
        outputs = quantized_model(inputs)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = correct / total * 100
print(f'Test Accuracy: {accuracy:.2f}%')


NotImplementedError: Could not run 'quantized::linear' with arguments from the 'CPU' backend. This could be because the operator doesn't exist for this backend, or was omitted during the selective/custom build process (if using custom build). If you are a Facebook employee using PyTorch on mobile, please visit https://fburl.com/ptmfixes for possible resolutions. 'quantized::linear' is only available for these backends: [MPS, QuantizedCPU, BackendSelect, Python, FuncTorchDynamicLayerBackMode, Functionalize, Named, Conjugate, Negative, ZeroTensor, ADInplaceOrView, AutogradOther, AutogradCPU, AutogradCUDA, AutogradXLA, AutogradMPS, AutogradXPU, AutogradHPU, AutogradLazy, AutogradMeta, Tracer, AutocastCPU, AutocastCUDA, FuncTorchBatched, FuncTorchVmapMode, Batched, VmapMode, FuncTorchGradWrapper, PythonTLSSnapshot, FuncTorchDynamicLayerFrontMode, PreDispatch, PythonDispatcher].

MPS: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/mps/MPSFallback.mm:75 [backend fallback]
QuantizedCPU: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/native/quantized/cpu/qlinear.cpp:1137 [kernel]
BackendSelect: fallthrough registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/core/BackendSelectFallbackKernel.cpp:3 [backend fallback]
Python: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/core/PythonFallbackKernel.cpp:153 [backend fallback]
FuncTorchDynamicLayerBackMode: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/functorch/DynamicLayer.cpp:498 [backend fallback]
Functionalize: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/FunctionalizeFallbackKernel.cpp:290 [backend fallback]
Named: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/core/NamedRegistrations.cpp:7 [backend fallback]
Conjugate: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/ConjugateFallback.cpp:17 [backend fallback]
Negative: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/native/NegateFallback.cpp:19 [backend fallback]
ZeroTensor: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/ZeroTensorFallback.cpp:86 [backend fallback]
ADInplaceOrView: fallthrough registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/core/VariableFallbackKernel.cpp:86 [backend fallback]
AutogradOther: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/core/VariableFallbackKernel.cpp:53 [backend fallback]
AutogradCPU: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/core/VariableFallbackKernel.cpp:57 [backend fallback]
AutogradCUDA: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/core/VariableFallbackKernel.cpp:65 [backend fallback]
AutogradXLA: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/core/VariableFallbackKernel.cpp:69 [backend fallback]
AutogradMPS: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/core/VariableFallbackKernel.cpp:77 [backend fallback]
AutogradXPU: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/core/VariableFallbackKernel.cpp:61 [backend fallback]
AutogradHPU: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/core/VariableFallbackKernel.cpp:90 [backend fallback]
AutogradLazy: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/core/VariableFallbackKernel.cpp:73 [backend fallback]
AutogradMeta: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/core/VariableFallbackKernel.cpp:81 [backend fallback]
Tracer: registered at /Users/runner/work/pytorch/pytorch/pytorch/torch/csrc/autograd/TraceTypeManual.cpp:296 [backend fallback]
AutocastCPU: fallthrough registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/autocast_mode.cpp:382 [backend fallback]
AutocastCUDA: fallthrough registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/autocast_mode.cpp:249 [backend fallback]
FuncTorchBatched: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/functorch/LegacyBatchingRegistrations.cpp:710 [backend fallback]
FuncTorchVmapMode: fallthrough registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/functorch/VmapModeRegistrations.cpp:28 [backend fallback]
Batched: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/LegacyBatchingRegistrations.cpp:1075 [backend fallback]
VmapMode: fallthrough registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/VmapModeRegistrations.cpp:33 [backend fallback]
FuncTorchGradWrapper: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/functorch/TensorWrapper.cpp:203 [backend fallback]
PythonTLSSnapshot: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/core/PythonFallbackKernel.cpp:161 [backend fallback]
FuncTorchDynamicLayerFrontMode: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/functorch/DynamicLayer.cpp:494 [backend fallback]
PreDispatch: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/core/PythonFallbackKernel.cpp:165 [backend fallback]
PythonDispatcher: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/core/PythonFallbackKernel.cpp:157 [backend fallback]


In [24]:
# Measure CPU usage and inference time
cpu_usage, inference_time, _ = measure_cpu_utilization_and_run(compute_metrics_new, qat_model, x_test_tensor, y_test_tensor, qat_model_path)

print(f'CPU usage during inference: {cpu_usage:.2f}%')
print(f'Inference time: {inference_time:.4f} seconds')


NameError: name 'compute_metrics_new' is not defined

In [25]:
from torch import nn
import torch.quantization

class LSTMNetQAT2(nn.Module):
    def __init__(self, input_size, hidden_size1, hidden_size2, output_size):
        super(LSTMNetQAT2, self).__init__()
        self.hidden_size1 = hidden_size1
        self.hidden_size2 = hidden_size2
        
        # Quantization stubs for QAT
        self.quant = torch.quantization.QuantStub()
        self.dequant = torch.quantization.DeQuantStub()

        # LSTM layers
        self.lstm1 = nn.LSTM(input_size, hidden_size1, batch_first=True)
        self.dropout1 = nn.Dropout(0.2)
        self.lstm2 = nn.LSTM(hidden_size1, hidden_size2, batch_first=True)
        self.dropout2 = nn.Dropout(0.2)

        # Fully connected layer
        self.fc = nn.Linear(hidden_size2, output_size)

    def forward(self, x):
        # Quantize the input
        x = self.quant(x)

        # Initialize hidden state and cell state for first LSTM
        h0 = torch.zeros(1, x.size(0), self.hidden_size1).to(x.device)
        c0 = torch.zeros(1, x.size(0), self.hidden_size1).to(x.device)
        out, _ = self.lstm1(x, (h0, c0))
        out = self.dropout1(out)

        # Initialize hidden state and cell state for second LSTM
        h1 = torch.zeros(1, x.size(0), self.hidden_size2).to(x.device)
        c1 = torch.zeros(1, x.size(0), self.hidden_size2).to(x.device)
        out, _ = self.lstm2(out, (h1, c1))
        out = self.dropout2(out)

        # Fully connected layer
        out = self.fc(out[:, -1, :])  # Taking the last time step output
        # Dequantize the output
        out = self.dequant(out)
        return out


In [26]:
torch.backends.quantized.engine = 'qnnpack'

# Assume the model instantiation and data loaders are already defined
# Parameters
input_size = 3  # Number of features
hidden_size1 = 50
hidden_size2 = 25
output_size = 6
n_steps = 16

# Create the model
model_qat = LSTMNetQAT2(input_size, hidden_size1, hidden_size2, output_size).to('cpu')
model_qat


# Specify the quantization configuration
model_qat.qconfig = torch.quantization.get_default_qat_qconfig('qnnpack')

# Prepare the model for QAT
model_qat.train()
model_prepared = torch.quantization.prepare_qat(model_qat, inplace=False)

# Fine-tune the model
optimizer = torch.optim.Adam(model_prepared.parameters(), lr=1e-3)
criterion = nn.CrossEntropyLoss()

num_epochs = 1
for epoch in range(num_epochs):
    total_loss = 0
    for inputs, labels in train_loader:  # Assume train_loader is defined
        optimizer.zero_grad()
        outputs = model_prepared(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
    print(f'Epoch {epoch}, Loss: {total_loss / len(train_loader)}')

# Switch to evaluation mode
model_prepared.eval()

# Convert the model to a fully quantized version
quantized_model = torch.quantization.convert(model_prepared, inplace=False)


Epoch 0, Loss: 0.0


In [34]:
import torch
import torch.nn as nn
import torch.nn.utils.prune as prune


In [38]:
# Instantiate the model
model = LSTMNet(input_size, hidden_size1, hidden_size2, output_size, n_steps)

# Collect names of the parameters in LSTM layers to be pruned
params_to_prune = []
for name, module in model.named_modules():
    if isinstance(module, nn.LSTM):
        for param_name, _ in module.named_parameters():
            if 'weight' in param_name:
                params_to_prune.append((module, param_name))

# Apply pruning
for module, param_name in params_to_prune:
    prune.l1_unstructured(module, param_name, amount=0.1)

# Apply pruning to the linear layer
prune.l1_unstructured(model.fc, 'weight', amount=0.1)

# Make the pruning permanent
for module, param_name in params_to_prune:
    prune.remove(module, param_name)
prune.remove(model.fc, 'weight')


Linear(in_features=25, out_features=6, bias=True)

In [39]:
pruned_model_path = "/Users/sandeep/Desktop/BUCourses/Project/saved_models/Pytorch/lstm_pruned.pth"
torch.save(model.state_dict(), pruned_model_path)


In [40]:
# Measure CPU usage and inference time
cpu_usage, inference_time, _ = measure_cpu_utilization_and_run(compute_metrics_base, model, x_test_tensor, y_test_tensor, pruned_model_path)

print(f'CPU usage during inference: {cpu_usage:.2f}%')
print(f'Inference time: {inference_time:.4f} seconds')


Size of the model: 77.11 KB
Accuracy on the test set: 31.19%
CPU usage during inference: 32.65%
Inference time: 0.4218 seconds
