In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
from numpy import genfromtxt
from sklearn.preprocessing import LabelEncoder

In [2]:
# Torch device configuration
device = (
    "cuda" if torch.cuda.is_available() else "mps"
    if torch.backends.mps.is_available() else "cpu"
)
print(f"Using {device} device")
if torch.cuda.is_available():
    print(f"Current GPU device: {torch.cuda.get_device_name(device)}")

Using cuda device
Current GPU device: NVIDIA GeForce RTX 3080


In [3]:
# Load Data
x = genfromtxt('../../Data/WISDM_x.csv', delimiter=',')
y_df = pd.read_csv('../../Data/WISDM_y.csv')
y = y_df.values.flatten()  # Flatten if y is 2D

# Encode labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Function to create time series dataset
def create_series(x, y, timestep, overlap):
    slide_step = int(timestep * (1 - overlap))
    data_num = int((len(x) / slide_step) - 1)
    dataset = np.ndarray(shape=(data_num, timestep, x.shape[1]))
    labels = []

    for i in range(data_num):
        labels.append(y[slide_step * (i + 1) - 1])
        for j in range(timestep):
            dataset[i, j, :] = x[slide_step * i + j, :]

    return dataset, np.array(labels)

# Create time series
timestep = 16  # Replace with your value
overlap = 0.5  # Replace with your value
X_series, y_series = create_series(x, y_encoded, timestep, overlap)

In [4]:
# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X_series, y_series, test_size=0.2, random_state=42)
print(f'X_train shape:{X_train.shape}, X_test shape:{X_test.shape}, y_train shape:{y_train.shape}, y_test shape:{y_test.shape}')

X_train shape:(104856, 16, 3), X_test shape:(26214, 16, 3), y_train shape:(104856,), y_test shape:(26214,)


In [5]:
class MyModel(nn.Module):
    def __init__(self, n_input, n_steps):
        super(MyModel, self).__init__()
        self.conv1 = nn.Conv1d(in_channels=n_input, out_channels=256, kernel_size=3)
        self.conv2 = nn.Conv1d(in_channels=256, out_channels=128, kernel_size=3)
        self.conv3 = nn.Conv1d(in_channels=128, out_channels=64, kernel_size=3)
        self.conv4 = nn.Conv1d(in_channels=64, out_channels=32, kernel_size=3)
        self.flatten = nn.Flatten()
        self.fc = nn.Linear(32 * 8, 6)  # Adjusted for the output from the last Conv1d layer
        self.dropout = nn.Dropout(0.2)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = self.dropout(torch.relu(self.conv1(x)))
        x = self.dropout(torch.relu(self.conv2(x)))
        x = self.dropout(torch.relu(self.conv3(x)))
        x = self.dropout(torch.relu(self.conv4(x)))
        x = self.flatten(x)
        x = self.fc(x)
        return self.softmax(x)

# Example instantiation of the model
model = MyModel(n_input=3, n_steps=16)


In [9]:
# Convert arrays to PyTorch Tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.long)  # Assuming y_train is class labels for classification
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.long)

# Creating TensorDatasets
train_dataset = TensorDataset(X_train, y_train)
test_dataset = TensorDataset(X_test, y_test)

# Creating DataLoaders
batch_size = 64
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

In [38]:
from torchmin import Minimizer

# Loss function and optimizer
criterion = nn.CrossEntropyLoss()

# Initialize the optimizer
optimizer = Minimizer(model.parameters(),
                      method='cg',
                      tol=1e-5,
                      max_iter=2000,
                      disp=2)

# Training
model.train()

inputs, labels = X_train, y_train
inputs = inputs.view(-1, 3, 16)  # Reshape input to match model expectations

def closure():
    optimizer.zero_grad()
    output = model(inputs)
    loss = criterion(output, labels)
    # loss.backward()  <-- do not call backward!
    return loss

optimizer.step(closure)

# Save the model
model_path = "models/MLPCG_base.pt"
torch.save(model.state_dict(), model_path)


initial fval: 1.7970
iter   1 - fval: 1.6234
iter   2 - fval: 1.6167
iter   3 - fval: 1.5301
iter   4 - fval: 1.5231
iter   5 - fval: 1.5229
iter   6 - fval: 1.5217
iter   7 - fval: 1.5173
iter   8 - fval: 1.5117
iter   9 - fval: 1.4981
iter  10 - fval: 1.4760
iter  11 - fval: 1.4671
iter  12 - fval: 1.4564
iter  13 - fval: 1.4341
iter  14 - fval: 1.4096
iter  15 - fval: 1.4091
iter  16 - fval: 1.4077
iter  17 - fval: 1.4064
iter  18 - fval: 1.3986
iter  19 - fval: 1.3963
iter  20 - fval: 1.3944
iter  21 - fval: 1.3939
iter  22 - fval: 1.3878
iter  23 - fval: 1.3878
         Current function value: 1.387778
         Iterations: 24
         Function evaluations: 70


In [39]:
# Test phase - after training is complete
model.eval()  # Set the model to evaluation mode
test_loss = 0.0
correct = 0
total = 0

with torch.no_grad():
    for inputs, labels in test_loader:
        inputs = inputs.view(-1, 3, 16)

        outputs = model(inputs)
        loss = criterion(outputs, labels)
        test_loss += loss.item() * inputs.size(0)

        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

# Calculate average loss and accuracy
test_loss = test_loss / len(test_loader.dataset)
test_accuracy = 100 * correct / total

# Print test statistics
print(f'Test Loss: {test_loss:.4f} \tTest Accuracy: {test_accuracy:.2f}%')


Test Loss: 1.3822 	Test Accuracy: 66.00%


In [40]:
model_path = "models/CNN_CG_base.pt"
torch.save(model.state_dict(), model_path)

In [13]:
from pathlib import Path

def compute_metrics_base(model, test_loader, model_path):
    """
    Compute the accuracy of the PyTorch model.

    :param model: PyTorch model.
    :param test_loader: DataLoader for the test dataset.
    :param model_path: Path to the PyTorch model file.
    :return: None
    """

    model.eval()
    correct = 0
    total = 0
    all_predicted = []
    all_labels = []

    with torch.no_grad():
        for inputs, labels in test_loader:
            # Reshaping and predictions
            inputs = inputs.view(-1, 3, 16)  # Adjust the reshape as per your model input
            outputs = model(inputs)
            _, predicted_labels = torch.max(outputs, 1)

            # Aggregate labels and predictions
            all_predicted.extend(predicted_labels.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

            # Calculating accuracy
            correct += (predicted_labels == labels).sum().item()
            total += labels.size(0)

    # Compute accuracy
    accuracy = correct / total
    print(f'Accuracy on the test set: {accuracy:.2%}')

    # Model size
    model_size_bytes = Path(model_path).stat().st_size
    model_size_kb = model_size_bytes / 1024
    print(f"Size of the model: {model_size_kb:.2f} KB")


In [14]:
import psutil
import time

def measure_cpu_utilization_and_run(func, *args, **kwargs):
    """
    Measure CPU utilization while running a function.

    Parameters:
        func (function): The function to be executed.
        *args: Arguments to be passed to func.
        **kwargs: Keyword arguments to be passed to func.

    Returns:
        float: CPU utilization percentage during the execution of func.
        float: The elapsed time during the execution of func.
        any: The result of func execution.
    """
    
    # Measure CPU utilization before execution
    cpu_percent_before = psutil.cpu_percent(interval=None)

    # Record the start time
    start_time = time.time()

    # Execute the function and store its result
    result = func(*args, **kwargs)

    # Record the end time
    end_time = time.time()

    # Measure CPU utilization after execution
    cpu_percent_after = psutil.cpu_percent(interval=None)

    # Calculate elapsed time and average CPU utilization
    elapsed_time = end_time - start_time
    average_cpu_utilization = (cpu_percent_before + cpu_percent_after) / 2

    return average_cpu_utilization, elapsed_time, result



In [43]:
# Move model to CPU
model.to('cpu')

# Measure CPU usage and inference time
cpu_usage, inference_time, _ = measure_cpu_utilization_and_run(compute_metrics_base, model, test_loader, model_path)

print(f'CPU usage during inference: {cpu_usage:.2f}%')
print(f'Inference time: {inference_time:.4f} seconds')


Accuracy on the test set: 66.00%
Size of the model: 524.61 KB
CPU usage during inference: 10.00%
Inference time: 0.7496 seconds


### Dynamic Quantization

In [45]:
import torch.quantization

# Configure to qnnpack if available (ARM cpu)
# torch.backends.quantized.engine = 'qnnpack'

In [54]:
quantized_model = torch.quantization.quantize_dynamic(
    model,  # the original model
    {nn.Conv1d},  # specify which layer types to quantize
    dtype=torch.qint8  # the target data type for quantized weights
)

In [55]:
# Save the quantized model
torch.save(quantized_model.state_dict(), 'models/CNN_CG_Quantized.pt')

# Use it for inference
quantized_model.eval()
quantized_model.to('cpu')
with torch.no_grad():
    for inputs, labels in test_loader:
        inputs = inputs.view(-1, 3, 16)  # Reshape if needed, based on your model's requirement
        inputs, labels = inputs.to('cpu'), labels.to('cpu')
        outputs = quantized_model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f'Accuracy of the quantized model on the test dataset: {accuracy:.2f}%')


Accuracy of the quantized model on the test dataset: 66.00%


In [58]:
# Measure CPU usage and inference time
quantized_model_path = 'models/CNN_CG_Quantized.pt'
cpu_usage, inference_time, _ = measure_cpu_utilization_and_run(compute_metrics_base, quantized_model, test_loader, quantized_model_path)

print(f'CPU usage during inference: {cpu_usage:.2f}%')
print(f'Inference time: {inference_time:.4f} seconds')


Accuracy on the test set: 66.00%
Size of the model: 524.80 KB
CPU usage during inference: 1.60%
Inference time: 0.7699 seconds


### Static Quantization

In [6]:
class CNN_Model(nn.Module):
    def __init__(self, n_input, n_steps):
        super(CNN_Model, self).__init__()
        self.quant = torch.quantization.QuantStub()
        self.conv1 = nn.Conv1d(in_channels=n_input, out_channels=256, kernel_size=3)
        self.conv2 = nn.Conv1d(in_channels=256, out_channels=128, kernel_size=3)
        self.conv3 = nn.Conv1d(in_channels=128, out_channels=64, kernel_size=3)
        self.conv4 = nn.Conv1d(in_channels=64, out_channels=32, kernel_size=3)
        self.flatten = nn.Flatten()
        self.fc = nn.Linear(32 * 8, 6)  # Adjusted for the output from the last Conv1d layer
        self.dropout = nn.Dropout(0.2)
        self.softmax = nn.Softmax(dim=1)
        self.dequant = torch.quantization.DeQuantStub()

    def forward(self, x):
        x = self.quant(x)
        x = self.dropout(torch.relu(self.conv1(x)))
        x = self.dropout(torch.relu(self.conv2(x)))
        x = self.dropout(torch.relu(self.conv3(x)))
        x = self.dropout(torch.relu(self.conv4(x)))
        x = self.flatten(x)
        x = self.fc(x)
        x = self.dequant(x)
        return self.softmax(x)

In [10]:
model_path = "models/CNN_CG_base.pt"

# Instantiate the model
model_fp32 = CNN_Model(n_input=3, n_steps=16)
model_fp32.load_state_dict(torch.load(model_path))
model_fp32.eval()

# Define a quantization configuration
model_int8 = CNN_Model(n_input=3, n_steps=16)
model_int8.eval()

# Specify the quantization configuration
model_int8.qconfig = torch.quantization.get_default_qconfig('qnnpack')

# Prepare the model for static quantization
torch.quantization.prepare(model_int8, inplace=True)

# Calibrate the model with representative data
# Assuming the train_loader is representative of the data distribution
for data, _ in train_loader:
    data = data.view(-1, 3, 16)
    model_int8(data)

# Convert to a quantized model
torch.quantization.convert(model_int8, inplace=True)

CNN_Model(
  (quant): Quantize(scale=tensor([0.1556]), zero_point=tensor([126]), dtype=torch.quint8)
  (conv1): QuantizedConv1d(3, 256, kernel_size=(3,), stride=(1,), scale=0.15590274333953857, zero_point=129)
  (conv2): QuantizedConv1d(256, 128, kernel_size=(3,), stride=(1,), scale=0.06991789489984512, zero_point=124)
  (conv3): QuantizedConv1d(128, 64, kernel_size=(3,), stride=(1,), scale=0.0262898076325655, zero_point=130)
  (conv4): QuantizedConv1d(64, 32, kernel_size=(3,), stride=(1,), scale=0.01119869016110897, zero_point=128)
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (fc): QuantizedLinear(in_features=256, out_features=6, scale=0.0028389603830873966, zero_point=115, qscheme=torch.per_tensor_affine)
  (dropout): QuantizedDropout(p=0.2, inplace=False)
  (softmax): Softmax(dim=1)
  (dequant): DeQuantize()
)

In [11]:
# Define the path where you want to save the quantized model
static_quantized_model_path = "models/CNN_Static_Quantized.pt"

# Save the state dictionary of the quantized model
torch.save(model_int8.state_dict(), static_quantized_model_path)

print(f"Quantized model saved to {static_quantized_model_path}")


Quantized model saved to models/CNN_Static_Quantized.pt


In [15]:
# Measure CPU usage and inference time
cpu_usage, inference_time, _ = measure_cpu_utilization_and_run(compute_metrics_base, model_int8, test_loader, static_quantized_model_path)

print(f'CPU usage during inference: {cpu_usage:.2f}%')
print(f'Inference time: {inference_time:.4f} seconds')


Accuracy on the test set: 39.92%
Size of the model: 138.97 KB
CPU usage during inference: 1.05%
Inference time: 0.6838 seconds


### Quantization Aware Training

In [27]:
from torchmin import Minimizer

# Assuming the correct input size and number of classes
input_size = 16 * 3  # 16 time steps with 3 features each
num_classes = 6  # Assuming 6 classes as per your data

# Instantiate and prepare the model for QAT
model_qat = CNN_Model(n_input=3, n_steps=16)
model_qat.qconfig = torch.quantization.get_default_qat_qconfig('qnnpack')
model_qat.train()
model_prepared = torch.quantization.prepare_qat(model_qat, inplace=True)

# Define the optimizer and loss function
optimizer = Minimizer(model_prepared.parameters(),
                      method='cg',
                      tol=1e-3,
                      max_iter=2000,
                      disp=2)

criterion = nn.CrossEntropyLoss()

inputs, labels = X_train, y_train
inputs = inputs.view(-1, 3, 16)  # Reshape input to match model expectations

model_prepared.train()

def closure():
    optimizer.zero_grad()
    output = model(inputs)
    loss = criterion(output, labels)
    # loss.backward()  <-- do not call backward!
    return loss

loss = optimizer.step(closure)

initial fval: 1.7961
iter   1 - fval: 1.7961
Optimization terminated successfully.
         Current function value: 1.796093
         Iterations: 1
         Function evaluations: 2


In [47]:
model_prepared.eval()
# Convert the QAT model to a fully quantized model
qat_model = torch.quantization.convert(model, inplace=False)

# Save the fine-tuned quantized model
qat_model_path = "models/CNN_CG_QAT.pt"
torch.save(qat_model.state_dict(), qat_model_path)

In [48]:
#Load the QAT model from the saved file
qat_model_saved = CNN_Model(n_input=3, n_steps=16)
state_dict = torch.load(model_path)
qat_model_saved.load_state_dict(state_dict)
qat_model_saved.to('cpu')

CNN_Model(
  (quant): QuantStub()
  (conv1): Conv1d(3, 256, kernel_size=(3,), stride=(1,))
  (conv2): Conv1d(256, 128, kernel_size=(3,), stride=(1,))
  (conv3): Conv1d(128, 64, kernel_size=(3,), stride=(1,))
  (conv4): Conv1d(64, 32, kernel_size=(3,), stride=(1,))
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (fc): Linear(in_features=256, out_features=6, bias=True)
  (dropout): Dropout(p=0.2, inplace=False)
  (softmax): Softmax(dim=1)
  (dequant): DeQuantStub()
)

In [49]:
# Prepare the model for evaluation
qat_model_saved.eval()

# Define the test dataset and dataloader
test_dataset = TensorDataset(X_test, y_test)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# Evaluate the model on the test dataset
correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in test_loader:
        inputs = inputs.view(-1, 3, 16)  
        outputs = qat_model_saved(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100.0 * correct / total
print(f'Accuracy: {accuracy:.2f}%')

Accuracy: 66.00%


In [54]:
import torch
from sklearn.metrics import accuracy_score
import numpy as np
import time
import psutil
from pathlib import Path

def compute_metrics_new(model, x_test, y_test, model_path):
    """
    Compute the accuracy of the PyTorch model.

    :param model: PyTorch model.
    :param x_test: Test dataset features (as a PyTorch Tensor).
    :param y_test: Test dataset labels (as a NumPy array).
    :param model_dir: Directory where the PyTorch model files are stored.
    :return: None
    """

    model.eval()
    test_dataset = TensorDataset(x_test, y_test)
    test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)
    
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs = inputs.view(-1, 3, 16)  # Flatten the input
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        
    model_file = Path(model_path)
    # Size in bytes
    model_size_bytes = model_file.stat().st_size

    # Convert size to kilobytes (optional)
    model_size_kb = model_size_bytes / 1024
    print(f"Size of the model: {model_size_kb:.2f} KB")

    # Compute accuracy
    accuracy = correct / total
    print(f'Accuracy on the test set: {accuracy:.2%}')


In [55]:
model.to('cpu')

# Measure CPU usage and inference time
cpu_usage, inference_time, _ = measure_cpu_utilization_and_run(compute_metrics_new, qat_model_saved, X_test, y_test, model_path)

print(f'CPU usage during inference: {cpu_usage:.2f}%')
print(f'Inference time: {inference_time:.4f} seconds')


Size of the model: 524.61 KB
Accuracy on the test set: 66.00%
CPU usage during inference: 0.60%
Inference time: 0.7739 seconds


### Torch Pruning

In [33]:
import torch
import torch.nn as nn
import torch.nn.utils.prune as prune


# Assuming timestep and X_series.shape[2] are defined
# input_size = timestep * X_series.shape[2]
pruned_model = CNN_Model(n_input=3, n_steps=16)

# Apply pruning to the convolutional layers
# Here, we are using L1Unstructured pruning as an example
prune.l1_unstructured(model.conv1, name="weight", amount=0.2)
prune.l1_unstructured(model.conv2, name="weight", amount=0.2)
prune.l1_unstructured(model.conv3, name="weight", amount=0.2)
prune.l1_unstructured(model.conv4, name="weight", amount=0.2)

# To make pruning permanent
for module in [model.conv1, model.conv2, model.conv3, model.conv4]:
    prune.remove(module, 'weight')

pruned_model_path = "models/QAT_CG_pruned.pt"
torch.save(model.state_dict(), pruned_model_path)


In [34]:
# Prepare the model for evaluation
pruned_model.eval()

# Define the test dataset and dataloader
test_dataset = TensorDataset(X_test, y_test)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# Evaluate the model on the test dataset
correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in test_loader:
        inputs = inputs.view(-1, 3, 16)  
        outputs = pruned_model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100.0 * correct / total
print(f'Accuracy: {accuracy:.2f}%')

Accuracy: 17.14%


In [35]:
# Measure CPU usage and inference time
cpu_usage, inference_time, _ = measure_cpu_utilization_and_run(compute_metrics_new, pruned_model, X_test, y_test, pruned_model_path)

print(f'CPU usage during inference: {cpu_usage:.2f}%')
print(f'Inference time: {inference_time:.4f} seconds')


Size of the model: 524.64 KB
Accuracy on the test set: 11.13%
CPU usage during inference: 1.45%
Inference time: 1.0296 seconds
