### **Data Preprocessing**



In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
from torch.optim import Adam
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
from numpy import genfromtxt
from sklearn.preprocessing import LabelEncoder



In [2]:
# Load Data
x = genfromtxt('../Data/WISDM_x.csv', delimiter=',')
y_df = pd.read_csv('../Data/WISDM_y.csv')
y = y_df.values.flatten()  # Flatten if y is 2D

# Encode labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Function to create time series dataset
def create_series(x, y, timestep, overlap):
    slide_step = int(timestep * (1 - overlap))
    data_num = int((len(x) / slide_step) - 1)
    dataset = np.ndarray(shape=(data_num, timestep, x.shape[1]))
    labels = []

    for i in range(data_num):
        labels.append(y[slide_step * (i + 1) - 1])
        for j in range(timestep):
            dataset[i, j, :] = x[slide_step * i + j, :]

    return dataset, np.array(labels)

# Create time series
timestep = 16  # Replace with your value
overlap = 0.5  # Replace with your value
X_series, y_series = create_series(x, y_encoded, timestep, overlap)

In [3]:
# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X_series, y_series, test_size=0.2, random_state=42)

# Convert to PyTorch tensors
x_train_tensor = torch.tensor(X_train, dtype=torch.float32)
x_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)



In [4]:
import torch
import torch.nn as nn

class GRUNet(nn.Module):
    def __init__(self, input_size, hidden_size1, hidden_size2, output_size):
        super(GRUNet, self).__init__()
        self.hidden_size1 = hidden_size1
        self.hidden_size2 = hidden_size2

        self.gru1 = nn.GRU(input_size, hidden_size1, batch_first=True)
        self.dropout1 = nn.Dropout(0.2)
        self.gru2 = nn.GRU(hidden_size1, hidden_size2, batch_first=True)
        self.dropout2 = nn.Dropout(0.2)
        self.fc = nn.Linear(hidden_size2, output_size)

    def forward(self, x):
        # Initialize hidden state
        h0 = torch.zeros(1, x.size(0), self.hidden_size1).to(x.device)
        
        # First GRU layer
        out, _ = self.gru1(x, h0)
        out = self.dropout1(out)
        
        # Second GRU layer
        h1 = torch.zeros(1, x.size(0), self.hidden_size2).to(x.device)
        out, _ = self.gru2(out, h1)
        out = self.dropout2(out)

        # Dense layer
        out = self.fc(out[:, -1, :])  # Taking the last time step
        return out


In [5]:
# Parameters
input_size = 3  # Number of features
hidden_size1 = 50
hidden_size2 = 25
output_size = 6
n_steps = 16

# Create the model
model = GRUNet(input_size, hidden_size1, hidden_size2, output_size)

# Training setup (for demonstration)
# Define your dataset here
train_dataset = TensorDataset(torch.tensor(X_train, dtype=torch.float32), torch.tensor(y_train, dtype=torch.int64))
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)


In [6]:
def train(model, train_loader, loss_fn, optimizer, epochs=0):
    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0

        for X_batch, y_batch in train_loader:
            optimizer.zero_grad()
            y_pred = model(X_batch)
            loss = loss_fn(y_pred, y_batch)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, predicted = torch.max(y_pred.data, 1)
            total += y_batch.size(0)
            correct += (predicted == y_batch).sum().item()

        avg_loss = running_loss / len(train_loader)
        accuracy = 100 * correct / total
        print(f"Epoch [{epoch+1}/{epochs}], Loss: {avg_loss:.4f}, Accuracy: {accuracy:.2f}%")

train(model, train_loader, loss_fn, optimizer, epochs=20)


Epoch [1/20], Loss: 0.5811, Accuracy: 79.95%
Epoch [2/20], Loss: 0.3674, Accuracy: 87.11%
Epoch [3/20], Loss: 0.2949, Accuracy: 90.00%
Epoch [4/20], Loss: 0.2590, Accuracy: 91.38%
Epoch [5/20], Loss: 0.2360, Accuracy: 92.17%
Epoch [6/20], Loss: 0.2190, Accuracy: 92.78%
Epoch [7/20], Loss: 0.2077, Accuracy: 93.22%
Epoch [8/20], Loss: 0.1976, Accuracy: 93.57%
Epoch [9/20], Loss: 0.1880, Accuracy: 93.77%
Epoch [10/20], Loss: 0.1801, Accuracy: 94.15%
Epoch [11/20], Loss: 0.1741, Accuracy: 94.35%
Epoch [12/20], Loss: 0.1681, Accuracy: 94.57%
Epoch [13/20], Loss: 0.1656, Accuracy: 94.65%
Epoch [14/20], Loss: 0.1598, Accuracy: 94.90%
Epoch [15/20], Loss: 0.1578, Accuracy: 94.93%
Epoch [16/20], Loss: 0.1533, Accuracy: 95.03%
Epoch [17/20], Loss: 0.1494, Accuracy: 95.22%
Epoch [18/20], Loss: 0.1465, Accuracy: 95.28%
Epoch [19/20], Loss: 0.1437, Accuracy: 95.42%
Epoch [20/20], Loss: 0.1418, Accuracy: 95.44%


In [7]:
test_dataset = TensorDataset(torch.tensor(X_test, dtype=torch.float32), torch.tensor(y_test, dtype=torch.int64))
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)
def evaluate(model, test_loader, loss_fn):
    model.eval()
    total_loss = 0
    correct = 0
    total = 0
    with torch.no_grad():
        for X_batch, y_batch in test_loader:
            y_pred = model(X_batch)
            total_loss += loss_fn(y_pred, y_batch).item()
            _, predicted = torch.max(y_pred.data, 1)
            total += y_batch.size(0)
            correct += (predicted == y_batch).sum().item()

    avg_loss = total_loss / len(test_loader)
    accuracy = correct / total
    return avg_loss, accuracy

avg_loss, accuracy = evaluate(model, test_loader, loss_fn)
print(f"Test Loss: {avg_loss:.4f}, Test Accuracy: {accuracy:.4f}")



Test Loss: 0.1581, Test Accuracy: 0.9490


In [8]:
model_path = "/Users/sandeep/Desktop/BUCourses/Project/saved_models/Pytorch/gru_base.pth"
torch.save(model.state_dict(), model_path)

### Coreml metrics

In [10]:
import coremltools as ct
example_input = torch.rand(1,16, 3) 

model.eval()
traced_model = torch.jit.trace(model, example_input)
out = traced_model(example_input)

# Convert to Core ML program using the Unified Conversion API.
gru_coreml_model = ct.convert(
    traced_model,
    convert_to="mlprogram",
    inputs=[ct.TensorType(shape=example_input.shape)]
 )

# Save the converted model.
mlp_coreml_model.save("gru.mlpackage")

scikit-learn version 1.4.0 is not supported. Minimum required version: 0.17. Maximum required version: 1.1.2. Disabling scikit-learn conversion API.
XGBoost version 1.7.6 has not been tested with coremltools. You may run into unexpected errors. XGBoost 1.4.2 is the most recent version that has been tested.
TensorFlow version 2.15.0 has not been tested with coremltools. You may run into unexpected errors. TensorFlow 2.12.0 is the most recent version that has been tested.
Converting PyTorch Frontend ==> MIL Ops:  99%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 71/72 [00:00<00:00, 4141.86 ops/s]
Running MIL frontend_pytorch pipeline: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 2805.93 passes/s]
Running MIL default pipeline: 100%|████████████████████████████████████████████████████████████████████████████████

In [14]:
gru_coreml_model

input {
  name: "x"
  type {
    multiArrayType {
      shape: 1
      shape: 16
      shape: 3
      dataType: FLOAT32
    }
  }
}
output {
  name: "linear_24"
  type {
    multiArrayType {
      shape: 1
      shape: 6
      dataType: FLOAT32
    }
  }
}
metadata {
  userDefined {
    key: "com.github.apple.coremltools.source"
    value: "torch==2.1.0"
  }
  userDefined {
    key: "com.github.apple.coremltools.source_dialect"
    value: "TorchScript"
  }
  userDefined {
    key: "com.github.apple.coremltools.version"
    value: "7.1"
  }
}

In [11]:
from pathlib import Path

def coreml_metrics(model_name, X_test, y_test, model_path):
    predictions = []
    for id in range(len(X_test)):
        X_test_new = np.expand_dims(X_test[id], axis=0)
        output_dict = model_name.predict({'x': X_test_new})
        pred_class = np.argmax(output_dict['linear_2'])
        predictions.append(pred_class)
    
    accuracy = np.sum(predictions == y_test) / len(predictions)
    print("Accuracy:", accuracy)
    
    model_file = Path(model_path)
    
    # Size in bytes
    model_size_bytes = model_file.stat().st_size
    
    # Convert size to kilobytes (optional)
    model_size_kb = model_size_bytes / 1024
    print(f"Size of the model: {model_size_kb:.2f} KB")

In [26]:
import coremltools as ct
import coremltools.optimize.coreml as cto

gru_coreml_model = ct.models.MLModel("gru.mlpackage")

In [19]:
model_name = gru_coreml_model
model_path = "gru.mlpackage"

coreml_metrics(model_name, X_test, y_test, model_path)

Accuracy: 0.9490438900018212
Size of the model: 0.12 KB


### Dynamic 8-bit quantization

In [21]:
import coremltools.optimize.coreml as cto

op_config = cto.OpLinearQuantizerConfig(mode="linear_symmetric", weight_threshold=512)
config = cto.OptimizationConfig(global_config=op_config)

compressed_8_bit_model = cto.linear_quantize_weights(gru_coreml_model, config=config)


Running compression pass linear_quantize_weights: 0 ops [00:00, ? ops/s]
Running compression pass linear_quantize_weights: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 12/12 [00:00<00:00, 3817.92 ops/s]
Running compression pass linear_quantize_weights: 0 ops [00:00, ? ops/s]
Running compression pass linear_quantize_weights: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 12/12 [00:00<00:00, 3002.72 ops/s]
  quantized_data = np.round(original_data / scale)
  quantized_data = np.clip(quantized_data, q_val_min, q_val_max).astype(np_dtype)
Running compression pass linear_quantize_weights: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 4702.13 ops/s]
Running MIL frontend_milinternal pipeline: 0 passes [00:00, ? passes/s]
Running MIL default pipeline: 100%

In [24]:
compressed_8_bit_model.save("gru_8bitQuantized_mlmodel.mlpackage")

In [25]:
model_name = compressed_8_bit_model
model_path = "gru_8bitQuantized_mlmodel.mlpackage"

coreml_metrics(model_name, X_test, y_test, model_path)

Accuracy: 0.9490438900018212
Size of the model: 0.12 KB
