### **Data Preprocessing**



In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
from torch.optim import Adam
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
from numpy import genfromtxt
from sklearn.preprocessing import LabelEncoder



In [3]:
# Load Data
x = genfromtxt('../Data/WISDM_x.csv', delimiter=',')
y_df = pd.read_csv('../Data/WISDM_y.csv')
y = y_df.values.flatten()  # Flatten if y is 2D

# Encode labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Function to create time series dataset
def create_series(x, y, timestep, overlap):
    slide_step = int(timestep * (1 - overlap))
    data_num = int((len(x) / slide_step) - 1)
    dataset = np.ndarray(shape=(data_num, timestep, x.shape[1]))
    labels = []

    for i in range(data_num):
        labels.append(y[slide_step * (i + 1) - 1])
        for j in range(timestep):
            dataset[i, j, :] = x[slide_step * i + j, :]

    return dataset, np.array(labels)

# Create time series
timestep = 16  # Replace with your value
overlap = 0.5  # Replace with your value
X_series, y_series = create_series(x, y_encoded, timestep, overlap)

In [4]:
# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X_series, y_series, test_size=0.2, random_state=42)

# Convert to PyTorch tensors
x_train_tensor = torch.tensor(X_train, dtype=torch.float32)
x_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)



In [5]:
import torch
import torch.nn as nn

class GRUNet(nn.Module):
    def __init__(self, input_size, hidden_size1, hidden_size2, output_size):
        super(GRUNet, self).__init__()
        self.hidden_size1 = hidden_size1
        self.hidden_size2 = hidden_size2

        self.gru1 = nn.GRU(input_size, hidden_size1, batch_first=True)
        self.dropout1 = nn.Dropout(0.2)
        self.gru2 = nn.GRU(hidden_size1, hidden_size2, batch_first=True)
        self.dropout2 = nn.Dropout(0.2)
        self.fc = nn.Linear(hidden_size2, output_size)

    def forward(self, x):
        # Initialize hidden state
        h0 = torch.zeros(1, x.size(0), self.hidden_size1).to(x.device)
        
        # First GRU layer
        out, _ = self.gru1(x, h0)
        out = self.dropout1(out)
        
        # Second GRU layer
        h1 = torch.zeros(1, x.size(0), self.hidden_size2).to(x.device)
        out, _ = self.gru2(out, h1)
        out = self.dropout2(out)

        # Dense layer
        out = self.fc(out[:, -1, :])  # Taking the last time step
        return out


In [6]:
# Parameters
input_size = 3  # Number of features
hidden_size1 = 50
hidden_size2 = 25
output_size = 6
n_steps = 16

# Create the model
model = GRUNet(input_size, hidden_size1, hidden_size2, output_size)

# Training setup (for demonstration)
# Define your dataset here
train_dataset = TensorDataset(torch.tensor(X_train, dtype=torch.float32), torch.tensor(y_train, dtype=torch.int64))
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)


In [7]:
def train(model, train_loader, loss_fn, optimizer, epochs=0):
    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0

        for X_batch, y_batch in train_loader:
            optimizer.zero_grad()
            y_pred = model(X_batch)
            loss = loss_fn(y_pred, y_batch)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, predicted = torch.max(y_pred.data, 1)
            total += y_batch.size(0)
            correct += (predicted == y_batch).sum().item()

        avg_loss = running_loss / len(train_loader)
        accuracy = 100 * correct / total
        print(f"Epoch [{epoch+1}/{epochs}], Loss: {avg_loss:.4f}, Accuracy: {accuracy:.2f}%")

train(model, train_loader, loss_fn, optimizer, epochs=20)


Epoch [1/20], Loss: 0.5702, Accuracy: 80.09%
Epoch [2/20], Loss: 0.3727, Accuracy: 86.88%
Epoch [3/20], Loss: 0.2988, Accuracy: 89.77%
Epoch [4/20], Loss: 0.2611, Accuracy: 91.19%
Epoch [5/20], Loss: 0.2395, Accuracy: 91.95%
Epoch [6/20], Loss: 0.2215, Accuracy: 92.57%
Epoch [7/20], Loss: 0.2092, Accuracy: 93.05%
Epoch [8/20], Loss: 0.1992, Accuracy: 93.40%
Epoch [9/20], Loss: 0.1891, Accuracy: 93.84%
Epoch [10/20], Loss: 0.1831, Accuracy: 93.96%
Epoch [11/20], Loss: 0.1744, Accuracy: 94.29%
Epoch [12/20], Loss: 0.1700, Accuracy: 94.45%
Epoch [13/20], Loss: 0.1641, Accuracy: 94.62%
Epoch [14/20], Loss: 0.1605, Accuracy: 94.78%
Epoch [15/20], Loss: 0.1581, Accuracy: 94.83%
Epoch [16/20], Loss: 0.1522, Accuracy: 94.98%
Epoch [17/20], Loss: 0.1507, Accuracy: 95.07%
Epoch [18/20], Loss: 0.1478, Accuracy: 95.19%
Epoch [19/20], Loss: 0.1430, Accuracy: 95.38%
Epoch [20/20], Loss: 0.1412, Accuracy: 95.37%


In [8]:
test_dataset = TensorDataset(torch.tensor(X_test, dtype=torch.float32), torch.tensor(y_test, dtype=torch.int64))
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)
def evaluate(model, test_loader, loss_fn):
    model.eval()
    total_loss = 0
    correct = 0
    total = 0
    with torch.no_grad():
        for X_batch, y_batch in test_loader:
            y_pred = model(X_batch)
            total_loss += loss_fn(y_pred, y_batch).item()
            _, predicted = torch.max(y_pred.data, 1)
            total += y_batch.size(0)
            correct += (predicted == y_batch).sum().item()

    avg_loss = total_loss / len(test_loader)
    accuracy = correct / total
    return avg_loss, accuracy

avg_loss, accuracy = evaluate(model, test_loader, loss_fn)
print(f"Test Loss: {avg_loss:.4f}, Test Accuracy: {accuracy:.4f}")



Test Loss: 0.1549, Test Accuracy: 0.9513


In [9]:
model_path = "/Users/sandeep/Desktop/BUCourses/Project/saved_models/Pytorch/gru_base.pth"
torch.save(model.state_dict(), model_path)

In [10]:
def measure_cpu_utilization_and_run(func, *args, **kwargs):
    """
    Measure CPU utilization while running a function.

    Parameters:
        func (function): The function to be executed.
        *args: Arguments to be passed to func.
        **kwargs: Keyword arguments to be passed to func.

    Returns:
        float: CPU utilization percentage during the execution of func.
        float: The elapsed time during the execution of func.
        any: The result of func execution.
    """
    
    # Measure CPU utilization before execution
    cpu_percent_before = psutil.cpu_percent(interval=None)

    # Record the start time
    start_time = time.time()

    # Execute the function and store its result
    result = func(*args, **kwargs)

    # Record the end time
    end_time = time.time()

    # Measure CPU utilization after execution
    cpu_percent_after = psutil.cpu_percent(interval=None)

    # Calculate elapsed time and average CPU utilization
    elapsed_time = end_time - start_time
    average_cpu_utilization = (cpu_percent_before + cpu_percent_after) / 2

    return average_cpu_utilization, elapsed_time, result


In [11]:
import torch
from sklearn.metrics import accuracy_score
import numpy as np
import time
import psutil
from pathlib import Path

def compute_metrics_base(model, x_test, y_test, model_path):
    """
    Compute the accuracy of the PyTorch model.

    :param model: PyTorch model.
    :param x_test: Test dataset features (as a PyTorch Tensor).
    :param y_test: Test dataset labels (as a NumPy array).
    :param model_dir: Directory where the PyTorch model files are stored.
    :return: None
    """

    model.eval()
    with torch.no_grad():
        # Get the model's predictions
        outputs = model(x_test)
        _, predicted_labels = torch.max(outputs, 1)

        # Convert y_test to tensor if it's not already
        true_labels = torch.tensor(y_test) if not isinstance(y_test, torch.Tensor) else y_test
        true_labels = true_labels.squeeze()  # Remove unnecessary dimensions

    model_file = Path(model_path)

    # Size in bytes
    model_size_bytes = model_file.stat().st_size

    # Convert size to kilobytes (optional)
    model_size_kb = model_size_bytes / 1024
    print(f"Size of the model: {model_size_kb:.2f} KB")

    # Compute accuracy
    accuracy = accuracy_score(true_labels.numpy(), predicted_labels.numpy())
    print(f'Accuracy on the test set: {accuracy:.2%}')


In [12]:
# Measure CPU usage and inference time
cpu_usage, inference_time, _ = measure_cpu_utilization_and_run(compute_metrics_base, model, x_test_tensor, y_test_tensor, model_path)

print(f'CPU usage during inference: {cpu_usage:.2f}%')
print(f'Inference time: {inference_time:.4f} seconds')


Size of the model: 58.76 KB
Accuracy on the test set: 95.13%
CPU usage during inference: 7.00%
Inference time: 0.3501 seconds


In [13]:
def model_size_in_memory(model):
    param_size = 0
    for param in model.parameters():
        param_size += param.numel() * param.element_size()
    return param_size


In [14]:
size_in_bytes = model_size_in_memory(model)
size_in_megabytes = size_in_bytes / (1024 * 1024)
print(f"Model size in memory: {size_in_megabytes:.2f} MB")


Model size in memory: 0.05 MB


In [21]:
torch.backends.quantized.engine = 'qnnpack'

quantized_model = torch.quantization.quantize_dynamic(
    model,  # the original model
    {nn.Linear, nn.GRU},  # a set of layers to dynamically quantize
    dtype=torch.qint8)  # the target dtype for quantized weights


In [22]:
quantized_model_path = "/Users/sandeep/Desktop/BUCourses/Project/saved_models/Pytorch/gru_Quantized.pth"
torch.save(quantized_model.state_dict(), quantized_model_path)


In [23]:
# Measure CPU usage and inference time
cpu_usage, inference_time, _ = measure_cpu_utilization_and_run(compute_metrics_base, quantized_model, x_test_tensor, y_test_tensor, quantized_model_path)

print(f'CPU usage during inference: {cpu_usage:.2f}%')
print(f'Inference time: {inference_time:.4f} seconds')


Size of the model: 22.65 KB
Accuracy on the test set: 95.12%
CPU usage during inference: 39.45%
Inference time: 0.3380 seconds


In [24]:
size_in_bytes = model_size_in_memory(quantized_model)
size_in_megabytes = size_in_bytes / (1024 * 1024)
print(f"Model size in memory: {size_in_megabytes:.2f} MB")


Model size in memory: 0.00 MB


In [25]:
def print_sample_predictions(model, x_test, y_test, num_samples=5):
    model.eval()  # Set the model to evaluation mode
    with torch.no_grad():
        # Predict on the test set
        outputs = model(x_test)
        _, predicted = torch.max(outputs, 1)

        print("Sample predictions:\n")
        for i in range(num_samples):
            print(f"x_test[{i}]: {x_test[i]}")
            print(f"Actual label (y_test[{i}]): {y_test[i]}")
            print(f"Predicted label: {predicted[i]}")
            print("\n")


In [31]:
# Assuming you're using the first num_samples of x_test and y_test
#num_samples = 5
#print_sample_predictions(model, x_test_tensor[:num_samples], y_test_tensor[:num_samples], num_samples=5)

In [36]:
import torch
import torch.nn as nn
import torch.nn.utils.prune as prune

# Instantiate the model
model = GRUNet(input_size, hidden_size1, hidden_size2, output_size)

# Collect names of the parameters in GRU layers to be pruned
params_to_prune = []
for name, module in model.named_modules():
    if isinstance(module, nn.GRU):
        for param_name, _ in module.named_parameters():
            if 'weight' in param_name:
                params_to_prune.append((module, param_name))

# Apply pruning
for module, param_name in params_to_prune:
    prune.l1_unstructured(module, param_name, amount=0.2)

# Apply pruning to the linear layer
prune.l1_unstructured(model.fc, 'weight', amount=0.2)

# Make the pruning permanent
for module, param_name in params_to_prune:
    prune.remove(module, param_name)
prune.remove(model.fc, 'weight')


Linear(in_features=25, out_features=6, bias=True)

In [37]:
pruned_model_path = "/Users/sandeep/Desktop/BUCourses/Project/saved_models/Pytorch/gru_pruned.pth"
torch.save(model.state_dict(), pruned_model_path)


In [38]:
# Measure CPU usage and inference time
cpu_usage, inference_time, _ = measure_cpu_utilization_and_run(compute_metrics_base, model, x_test_tensor, y_test_tensor, pruned_model_path)

print(f'CPU usage during inference: {cpu_usage:.2f}%')
print(f'Inference time: {inference_time:.4f} seconds')


Size of the model: 58.78 KB
Accuracy on the test set: 12.78%
CPU usage during inference: 30.45%
Inference time: 0.3272 seconds
