### **Data Preprocessing**



In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
from torch.optim import Adam
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
from numpy import genfromtxt
from sklearn.preprocessing import LabelEncoder

In [2]:
# Torch device configuration
device = (
    "cuda" if torch.cuda.is_available() else "mps"
    if torch.backends.mps.is_available() else "cpu"
)
print(f"Using {device} device")
if torch.cuda.is_available():
    print(f"Current GPU device: {torch.cuda.get_device_name(device)}")

Using cuda device
Current GPU device: NVIDIA GeForce RTX 3080


In [3]:
# Load Data
x = genfromtxt('../../Data/WISDM_x.csv', delimiter=',')
y_df = pd.read_csv('../../Data/WISDM_y.csv')
y = y_df.values.flatten()  # Flatten if y is 2D

# Encode labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Function to create time series dataset
def create_series(x, y, timestep, overlap):
    slide_step = int(timestep * (1 - overlap))
    data_num = int((len(x) / slide_step) - 1)
    dataset = np.ndarray(shape=(data_num, timestep, x.shape[1]))
    labels = []

    for i in range(data_num):
        labels.append(y[slide_step * (i + 1) - 1])
        for j in range(timestep):
            dataset[i, j, :] = x[slide_step * i + j, :]

    return dataset, np.array(labels)

# Create time series
timestep = 16  # Replace with your value
overlap = 0.5  # Replace with your value
X_series, y_series = create_series(x, y_encoded, timestep, overlap)

In [4]:
# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X_series, y_series, test_size=0.2, random_state=42)

# Convert to PyTorch tensors
x_train_tensor = torch.tensor(X_train, dtype=torch.float32)
x_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

In [5]:
# Training setup
training_percentage = 0.9  # Set the desired percentage of training samples

# Calculate the number of training samples
n_total_samples = x_train_tensor.shape[0]
n_training_data = int(training_percentage * n_total_samples)

# Create a subset of the training data
subset_indices = torch.randperm(n_total_samples)[:n_training_data]
x_train_subset = x_train_tensor[subset_indices]
y_train_subset = y_train_tensor[subset_indices]

# Create a DataLoader for the subset
batch_size = n_training_data
train_dataset = TensorDataset(x_train_subset, y_train_subset)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

# Create a test Dataloader
test_dataset = TensorDataset(x_test_tensor, y_test_tensor)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=True)

In [6]:
class LSTMNet(nn.Module):
    def __init__(self, input_size, hidden_size1, hidden_size2, output_size, n_steps):
        super(LSTMNet, self).__init__()
        self.hidden_size1 = hidden_size1
        self.hidden_size2 = hidden_size2
        self.lstm1 = nn.LSTM(input_size, hidden_size1, batch_first=True)
        self.dropout1 = nn.Dropout(0.2)
        self.lstm2 = nn.LSTM(hidden_size1, hidden_size2, batch_first=True)
        self.dropout2 = nn.Dropout(0.2)
        self.fc = nn.Linear(hidden_size2, output_size)

    def forward(self, x):
        # Initialize hidden state and cell state
        h0 = torch.zeros(1, x.size(0), self.hidden_size1).to(x.device)
        c0 = torch.zeros(1, x.size(0), self.hidden_size1).to(x.device)
        
        # First LSTM layer
        out, _ = self.lstm1(x, (h0, c0))
        out = self.dropout1(out)
        
        # Second LSTM layer
        h1 = torch.zeros(1, x.size(0), self.hidden_size2).to(x.device)
        c1 = torch.zeros(1, x.size(0), self.hidden_size2).to(x.device)
        out, _ = self.lstm2(out, (h1, c1))
        out = self.dropout2(out)

        # Dense layer
        out = self.fc(out[:, -1, :]) # Taking the last time step
        return out

# Parameters
input_size = 3  # Number of features
hidden_size1 = 50
hidden_size2 = 25
output_size = 6
n_steps = 16

# Create the model
model = LSTMNet(input_size, hidden_size1, hidden_size2, output_size, n_steps)

In [7]:
from torchmin import Minimizer

loss_fn = nn.CrossEntropyLoss()
optimizer = optimizer = Minimizer(model.parameters(),
                      method='cg',
                      tol=1e-5,
                      max_iter=200,
                      disp=0)

def train(model, train_loader, loss_fn, optimizer, epochs=1):
    model.to(device)
    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0

        for X_batch, y_batch in train_loader:
            inputs, labels = X_batch.to(device), y_batch.to(device)

            def closure():
                optimizer.zero_grad()
                output = model(inputs)
                loss = loss_fn(output, labels)
                # loss.backward()  <-- do not call backward!
                return loss

            loss = optimizer.step(closure)
            running_loss += loss.item()

        for X_batch, y_batch in test_loader:
            inputs, labels = X_batch.to(device), y_batch.to(device)
            output = model(inputs)
            _, predicted = torch.max(output.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        avg_loss = running_loss / len(train_loader)
        accuracy = 100 * correct / total
        print(f"Epoch [{epoch+1}/{epochs}], Train Loss: {avg_loss:.4f}, Val Accuracy: {accuracy:.2f}%")

train(model, train_loader, loss_fn, optimizer, epochs=100)


Epoch [1/100], Train Loss: 0.6513, Val Accuracy: 77.44%
Epoch [2/100], Train Loss: 0.6504, Val Accuracy: 77.73%
Epoch [3/100], Train Loss: 0.6500, Val Accuracy: 77.62%
Epoch [4/100], Train Loss: 0.6253, Val Accuracy: 78.63%
Epoch [5/100], Train Loss: 0.6239, Val Accuracy: 78.62%
Epoch [6/100], Train Loss: 0.6222, Val Accuracy: 78.61%
Epoch [7/100], Train Loss: 0.6209, Val Accuracy: 78.52%
Epoch [8/100], Train Loss: 0.6212, Val Accuracy: 78.57%
Epoch [9/100], Train Loss: 0.6205, Val Accuracy: 78.66%
Epoch [10/100], Train Loss: 0.6213, Val Accuracy: 78.71%
Epoch [11/100], Train Loss: 0.6206, Val Accuracy: 78.84%
Epoch [12/100], Train Loss: 0.6198, Val Accuracy: 78.81%
Epoch [13/100], Train Loss: 0.6200, Val Accuracy: 78.61%
Epoch [14/100], Train Loss: 0.6207, Val Accuracy: 78.72%
Epoch [15/100], Train Loss: 0.6194, Val Accuracy: 78.62%
Epoch [16/100], Train Loss: 0.6192, Val Accuracy: 78.71%
Epoch [17/100], Train Loss: 0.6162, Val Accuracy: 78.80%
Epoch [18/100], Train Loss: 0.6178, Val 

In [8]:
test_dataset = TensorDataset(torch.tensor(X_test, dtype=torch.float32), torch.tensor(y_test, dtype=torch.int64))
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)
def evaluate(model, test_loader, loss_fn):
    model.eval()
    total_loss = 0
    correct = 0
    total = 0
    with torch.no_grad():
        for X_batch, y_batch in test_loader:
            y_pred = model(X_batch)
            total_loss += loss_fn(y_pred, y_batch).item()
            _, predicted = torch.max(y_pred.data, 1)
            total += y_batch.size(0)
            correct += (predicted == y_batch).sum().item()

    avg_loss = total_loss / len(test_loader)
    accuracy = correct / total
    return avg_loss, accuracy

model.to('cpu')
avg_loss, accuracy = evaluate(model, test_loader, loss_fn)
print(f"Test Loss: {avg_loss:.4f}, Test Accuracy: {accuracy:.4f}")



Test Loss: 0.5517, Test Accuracy: 0.8098


In [9]:
model_path = "models/LSTM_CG_base.pt"
torch.save(model.state_dict(), model_path)

In [10]:
def measure_cpu_utilization_and_run(func, *args, **kwargs):
    """
    Measure CPU utilization while running a function.

    Parameters:
        func (function): The function to be executed.
        *args: Arguments to be passed to func.
        **kwargs: Keyword arguments to be passed to func.

    Returns:
        float: CPU utilization percentage during the execution of func.
        float: The elapsed time during the execution of func.
        any: The result of func execution.
    """
    
    # Measure CPU utilization before execution
    cpu_percent_before = psutil.cpu_percent(interval=None)

    # Record the start time
    start_time = time.time()

    # Execute the function and store its result
    result = func(*args, **kwargs)

    # Record the end time
    end_time = time.time()

    # Measure CPU utilization after execution
    cpu_percent_after = psutil.cpu_percent(interval=None)

    # Calculate elapsed time and average CPU utilization
    elapsed_time = end_time - start_time
    average_cpu_utilization = (cpu_percent_before + cpu_percent_after) / 2

    return average_cpu_utilization, elapsed_time, result


In [11]:
import torch
from sklearn.metrics import accuracy_score
import numpy as np
import time
import psutil
from pathlib import Path

def compute_metrics_base(model, x_test, y_test, model_path):
    """
    Compute the accuracy of the PyTorch model.

    :param model: PyTorch model.
    :param x_test: Test dataset features (as a PyTorch Tensor).
    :param y_test: Test dataset labels (as a NumPy array).
    :param model_dir: Directory where the PyTorch model files are stored.
    :return: None
    """

    model.eval()
    with torch.no_grad():
        # Get the model's predictions
        outputs = model(x_test)
        _, predicted_labels = torch.max(outputs, 1)

        # Convert y_test to tensor if it's not already
        true_labels = torch.tensor(y_test) if not isinstance(y_test, torch.Tensor) else y_test
        true_labels = true_labels.squeeze()  # Remove unnecessary dimensions

    model_file = Path(model_path)

    # Size in bytes
    model_size_bytes = model_file.stat().st_size

    # Convert size to kilobytes (optional)
    model_size_kb = model_size_bytes / 1024
    print(f"Size of the model: {model_size_kb:.2f} KB")

    # Compute accuracy
    accuracy = accuracy_score(true_labels.numpy(), predicted_labels.numpy())
    print(f'Accuracy on the test set: {accuracy:.2%}')


In [12]:
model.to('cpu')

# Measure CPU usage and inference time
cpu_usage, inference_time, _ = measure_cpu_utilization_and_run(compute_metrics_base, model, x_test_tensor, y_test_tensor, model_path)

print(f'CPU usage during inference: {cpu_usage:.2f}%')
print(f'Inference time: {inference_time:.4f} seconds')


Size of the model: 77.12 KB
Accuracy on the test set: 80.98%
CPU usage during inference: 5.70%
Inference time: 0.1223 seconds


In [13]:
# torch.backends.quantized.engine = 'qnnpack'

quantized_model = torch.quantization.quantize_dynamic(
    model,  # the original model
    {nn.Linear,nn.LSTM},  # a set of layers to dynamically quantize
    dtype=torch.qint8)  # the target dtype for quantized weights


In [14]:
quantized_model_path = "models/LSTM_CG__Quantized.pt"
torch.save(quantized_model.state_dict(), quantized_model_path)


In [15]:
# Measure CPU usage and inference time
cpu_usage, inference_time, _ = measure_cpu_utilization_and_run(compute_metrics_base, quantized_model, x_test_tensor, y_test_tensor, quantized_model_path)

print(f'CPU usage during inference: {cpu_usage:.2f}%')
print(f'Inference time: {inference_time:.4f} seconds')


Size of the model: 25.22 KB
Accuracy on the test set: 80.99%
CPU usage during inference: 1.75%
Inference time: 0.2302 seconds


In [16]:
def print_sample_predictions(model, x_test, y_test, num_samples=5):
    model.eval()  # Set the model to evaluation mode
    with torch.no_grad():
        # Predict on the test set
        outputs = model(x_test)
        _, predicted = torch.max(outputs, 1)

        print("Sample predictions:\n")
        for i in range(num_samples):
            print(f"x_test[{i}]: {x_test[i]}")
            print(f"Actual label (y_test[{i}]): {y_test[i]}")
            print(f"Predicted label: {predicted[i]}")
            print("\n")


In [17]:
# Assuming you're using the first num_samples of x_test and y_test
#num_samples = 5
#print_sample_predictions(model, x_test_tensor[:num_samples], y_test_tensor[:num_samples], num_samples=5)

### Static Quantization

### Quantization aware training


In [18]:
import torch
from torch import nn
import torch.quantization

class LSTMNetQAT(nn.Module):
    def __init__(self, input_size, hidden_size1, hidden_size2, output_size, n_steps):
        super(LSTMNetQAT, self).__init__()
        self.quant = torch.quantization.QuantStub()
        self.hidden_size1 = hidden_size1
        self.hidden_size2 = hidden_size2
        self.lstm1 = nn.LSTM(input_size, hidden_size1, batch_first=True)
        self.dropout1 = nn.Dropout(0.2)
        self.lstm2 = nn.LSTM(hidden_size1, hidden_size2, batch_first=True)
        self.dropout2 = nn.Dropout(0.2)
        self.fc = nn.Linear(hidden_size2, output_size)
        self.dequant = torch.quantization.DeQuantStub()

    def forward(self, x):
        x = self.quant(x)
        h0 = torch.zeros(1, x.size(0), self.hidden_size1).to(x.device)
        c0 = torch.zeros(1, x.size(0), self.hidden_size1).to(x.device)
        out, _ = self.lstm1(x, (h0, c0))
        out = self.dropout1(out)
        h1 = torch.zeros(1, x.size(0), self.hidden_size2).to(x.device)
        c1 = torch.zeros(1, x.size(0), self.hidden_size2).to(x.device)
        out, _ = self.lstm2(out, (h1, c1))
        out = self.dropout2(out)
        out = self.fc(out[:, -1, :])
        out = self.dequant(out)
        return out


In [19]:
import torch.optim as optim
from torch.nn import CrossEntropyLoss


# Parameters
input_size = 3  # Number of features
hidden_size1 = 50
hidden_size2 = 25
output_size = 6
n_steps = 16

# Create the model
model_qat = LSTMNetQAT(input_size, hidden_size1, hidden_size2, output_size, n_steps).to(device)
model_qat

LSTMNetQAT(
  (quant): QuantStub()
  (lstm1): LSTM(3, 50, batch_first=True)
  (dropout1): Dropout(p=0.2, inplace=False)
  (lstm2): LSTM(50, 25, batch_first=True)
  (dropout2): Dropout(p=0.2, inplace=False)
  (fc): Linear(in_features=25, out_features=6, bias=True)
  (dequant): DeQuantStub()
)

In [20]:

model_qat.qconfig = torch.quantization.get_default_qat_qconfig('qnnpack')
model_qat.train()
model_prepared = torch.quantization.prepare_qat(model_qat, inplace=False).to(device)


In [21]:
import torch
from torchmin import Minimizer
from torch.nn import CrossEntropyLoss

# Move the model to the selected device
model_prepared.to(device)

# Define the optimizer and loss function
optimizer = Minimizer(model_prepared.parameters(),
                      method='cg',
                      tol=1e-3,
                      max_iter=2,
                      disp=2)

loss_fn = CrossEntropyLoss()

num_fine_tune_epochs = 1
for epoch in range(num_fine_tune_epochs):
    model_prepared.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for X_batch, y_batch in train_loader:
        # Move inputs and labels to the selected device
        inputs, labels = X_batch.to(device), y_batch.to(device)

        def closure():
            optimizer.zero_grad()
            output = model_prepared(inputs)
            loss = loss_fn(output, labels)
            # loss.backward()  <-- do not call backward!
            return loss

        loss = optimizer.step(closure)
        running_loss += loss.item()

    for X_batch, y_batch in test_loader:
        # Move inputs and labels to the selected device
        inputs, labels = X_batch.to(device), y_batch.to(device)
        output = model_prepared(inputs)
        _, predicted = torch.max(output.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    avg_loss = running_loss / len(train_loader)
    accuracy = 100 * correct / total
    print(f"Epoch [{epoch+1}/{num_fine_tune_epochs}], Train Loss: {avg_loss:.4f}, Val Accuracy: {accuracy:.2f}%")


  return torch.fused_moving_avg_obs_fake_quant(


initial fval: 1.7854
iter   1 - fval: 1.6764
iter   2 - fval: 1.6528
         Current function value: 1.652833
         Iterations: 2
         Function evaluations: 3
Epoch [1/1], Train Loss: 1.6528, Val Accuracy: 39.67%


In [22]:
# Make sure to switch to eval mode
model_prepared.eval()
model_prepared.to('cpu')
# Convert to a fully quantized model
quantized_model = torch.quantization.convert(model_prepared, inplace=False).to('cpu')
# Path to save the quantized model
qat_model_path = "models/LSTM_CG_QAT.pt"

# Save the model state dictionary
torch.save(quantized_model.state_dict(), qat_model_path)

In [23]:
quantized_model

LSTMNetQAT(
  (quant): Quantize(scale=tensor([0.1551]), zero_point=tensor([126]), dtype=torch.quint8)
  (lstm1): QuantizedLSTM(
    (layers): ModuleList(
      (0): _LSTMLayer(
        (layer_fw): _LSTMSingleLayer(
          (cell): QuantizableLSTMCell(
            (igates): QuantizedLinear(in_features=3, out_features=200, scale=0.04042680561542511, zero_point=125, qscheme=torch.per_tensor_affine)
            (hgates): QuantizedLinear(in_features=50, out_features=200, scale=0.0052698529325425625, zero_point=126, qscheme=torch.per_tensor_affine)
            (gates): QFunctional(
              scale=0.03954175487160683, zero_point=125
              (activation_post_process): Identity()
            )
            (input_gate): Sigmoid()
            (forget_gate): Sigmoid()
            (cell_gate): Tanh()
            (output_gate): Sigmoid()
            (fgate_cx): QFunctional(
              scale=0.01567537896335125, zero_point=96
              (activation_post_process): Identity()
       

In [29]:
from torch import nn
import torch.quantization

class LSTMNetQAT2(nn.Module):
    def __init__(self, input_size, hidden_size1, hidden_size2, output_size):
        super(LSTMNetQAT2, self).__init__()
        self.hidden_size1 = hidden_size1
        self.hidden_size2 = hidden_size2
        
        # Quantization stubs for QAT
        self.quant = torch.quantization.QuantStub()
        self.dequant = torch.quantization.DeQuantStub()

        # LSTM layers
        self.lstm1 = nn.LSTM(input_size, hidden_size1, batch_first=True)
        self.dropout1 = nn.Dropout(0.2)
        self.lstm2 = nn.LSTM(hidden_size1, hidden_size2, batch_first=True)
        self.dropout2 = nn.Dropout(0.2)

        # Fully connected layer
        self.fc = nn.Linear(hidden_size2, output_size)

    def forward(self, x):
        # Quantize the input
        x = self.quant(x)

        # Initialize hidden state and cell state for first LSTM
        h0 = torch.zeros(1, x.size(0), self.hidden_size1).to(x.device)
        c0 = torch.zeros(1, x.size(0), self.hidden_size1).to(x.device)
        out, _ = self.lstm1(x, (h0, c0))
        out = self.dropout1(out)

        # Initialize hidden state and cell state for second LSTM
        h1 = torch.zeros(1, x.size(0), self.hidden_size2).to(x.device)
        c1 = torch.zeros(1, x.size(0), self.hidden_size2).to(x.device)
        out, _ = self.lstm2(out, (h1, c1))
        out = self.dropout2(out)

        # Fully connected layer
        out = self.fc(out[:, -1, :])  # Taking the last time step output
        # Dequantize the output
        out = self.dequant(out)
        return out


In [42]:
# torch.backends.quantized.engine = 'qnnpack'

# Assume the model instantiation and data loaders are already defined
# Parameters
input_size = 3  # Number of features
hidden_size1 = 50
hidden_size2 = 25
output_size = 6
n_steps = 16

# Create the model
model_qat = LSTMNetQAT2(input_size, hidden_size1, hidden_size2, output_size).to('cpu')

# Specify the quantization configuration
model_qat.qconfig = torch.quantization.get_default_qat_qconfig('qnnpack')

# Prepare the model for QAT
model_qat.train()
model_prepared = torch.quantization.prepare_qat(model_qat, inplace=False)
model_prepared

LSTMNetQAT2(
  (quant): QuantStub(
    (activation_post_process): FusedMovingAvgObsFakeQuantize(
      fake_quant_enabled=tensor([1]), observer_enabled=tensor([1]), scale=tensor([1.]), zero_point=tensor([0], dtype=torch.int32), dtype=torch.quint8, quant_min=0, quant_max=255, qscheme=torch.per_tensor_affine, reduce_range=False
      (activation_post_process): MovingAverageMinMaxObserver(min_val=inf, max_val=-inf)
    )
  )
  (dequant): DeQuantStub()
  (lstm1): QuantizableLSTM(
    (layers): ModuleList(
      (0): _LSTMLayer(
        (layer_fw): _LSTMSingleLayer(
          (cell): QuantizableLSTMCell(
            (igates): Linear(
              in_features=3, out_features=200, bias=True
              (activation_post_process): FusedMovingAvgObsFakeQuantize(
                fake_quant_enabled=tensor([1]), observer_enabled=tensor([1]), scale=tensor([1.]), zero_point=tensor([0], dtype=torch.int32), dtype=torch.quint8, quant_min=0, quant_max=255, qscheme=torch.per_tensor_affine, reduce_range

In [43]:
import torch
from torchmin import Minimizer
from torch.nn import CrossEntropyLoss

# Move the model to the selected device
model_prepared.to(device)

# Define the optimizer and loss function
optimizer = Minimizer(model_prepared.parameters(),
                      method='cg',
                      tol=1e-3,
                      max_iter=2,
                      disp=0)

loss_fn = CrossEntropyLoss()

num_fine_tune_epochs = 20
for epoch in range(num_fine_tune_epochs):
    model_prepared.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for X_batch, y_batch in train_loader:
        # Move inputs and labels to the selected device
        inputs, labels = X_batch.to(device), y_batch.to(device)

        def closure():
            optimizer.zero_grad()
            output = model_prepared(inputs)
            loss = loss_fn(output, labels)
            # loss.backward()  <-- do not call backward!
            return loss

        loss = optimizer.step(closure)
        running_loss += loss.item()

    for X_batch, y_batch in test_loader:
        # Move inputs and labels to the selected device
        inputs, labels = X_batch.to(device), y_batch.to(device)
        output = model_prepared(inputs)
        _, predicted = torch.max(output.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    avg_loss = running_loss / len(train_loader)
    accuracy = 100 * correct / total
    print(f"Epoch [{epoch+1}/{num_fine_tune_epochs}], Train Loss: {avg_loss:.4f}, Val Accuracy: {accuracy:.2f}%")


Epoch [1/20], Train Loss: 1.6212, Val Accuracy: 39.61%
Epoch [2/20], Train Loss: 1.5015, Val Accuracy: 48.84%
Epoch [3/20], Train Loss: 1.3934, Val Accuracy: 52.85%
Epoch [4/20], Train Loss: 1.1410, Val Accuracy: 66.90%
Epoch [5/20], Train Loss: 1.3734, Val Accuracy: 51.51%
Epoch [6/20], Train Loss: 1.0254, Val Accuracy: 69.43%
Epoch [7/20], Train Loss: 0.8945, Val Accuracy: 70.97%
Epoch [8/20], Train Loss: 0.8656, Val Accuracy: 71.79%
Epoch [9/20], Train Loss: 0.8479, Val Accuracy: 72.05%
Epoch [10/20], Train Loss: 0.8338, Val Accuracy: 72.47%
Epoch [11/20], Train Loss: 0.8252, Val Accuracy: 72.48%
Epoch [12/20], Train Loss: 0.8102, Val Accuracy: 72.95%
Epoch [13/20], Train Loss: 0.8023, Val Accuracy: 72.69%
Epoch [14/20], Train Loss: 0.7892, Val Accuracy: 73.37%
Epoch [15/20], Train Loss: 0.7823, Val Accuracy: 73.45%
Epoch [16/20], Train Loss: 0.7750, Val Accuracy: 73.41%
Epoch [17/20], Train Loss: 0.7701, Val Accuracy: 73.69%
Epoch [18/20], Train Loss: 0.7662, Val Accuracy: 73.91%
E

In [45]:
model_prepared.to('cpu')

# Measure CPU usage and inference time
cpu_usage, inference_time, _ = measure_cpu_utilization_and_run(compute_metrics_base, model_prepared, x_test_tensor, y_test_tensor, qat_model_path)

print(f'CPU usage during inference: {cpu_usage:.2f}%')
print(f'Inference time: {inference_time:.4f} seconds')

Size of the model: 36.45 KB
Accuracy on the test set: 74.77%
CPU usage during inference: 1.80%
Inference time: 1.3949 seconds


## Pruning

In [31]:
import torch
import torch.nn as nn
import torch.nn.utils.prune as prune

In [32]:
# Instantiate the model
model = LSTMNet(input_size, hidden_size1, hidden_size2, output_size, n_steps)

# Collect names of the parameters in LSTM layers to be pruned
params_to_prune = []
for name, module in model.named_modules():
    if isinstance(module, nn.LSTM):
        for param_name, _ in module.named_parameters():
            if 'weight' in param_name:
                params_to_prune.append((module, param_name))

# Apply pruning
for module, param_name in params_to_prune:
    prune.l1_unstructured(module, param_name, amount=0.1)

# Apply pruning to the linear layer
prune.l1_unstructured(model.fc, 'weight', amount=0.1)

# Make the pruning permanent
for module, param_name in params_to_prune:
    prune.remove(module, param_name)
prune.remove(model.fc, 'weight')


Linear(in_features=25, out_features=6, bias=True)

In [33]:
pruned_model_path = "models/LSTM_CG_pruned.pt"
torch.save(model.state_dict(), pruned_model_path)


In [34]:
# Measure CPU usage and inference time
cpu_usage, inference_time, _ = measure_cpu_utilization_and_run(compute_metrics_base, model, x_test_tensor, y_test_tensor, pruned_model_path)

print(f'CPU usage during inference: {cpu_usage:.2f}%')
print(f'Inference time: {inference_time:.4f} seconds')


Size of the model: 77.15 KB
Accuracy on the test set: 38.76%
CPU usage during inference: 2.35%
Inference time: 0.3316 seconds
