# Save and Upload a Model to S3

Save a trained model in ONNX format and upload the file to an S3 bucket.

### 1. Import the required libraries

In [1]:
import os
import boto3
import numpy as np
import matplotlib.pyplot as plt

import torch
import torch.nn as nn

from sklearn.datasets import make_blobs

### 2. Train a sample model

Generate a random data set of normally-distributed data points.

In [None]:
X, _ = make_blobs(100, centers=1, n_features=2, random_state=0)
X = torch.tensor(X, dtype=torch.float32)
print(f"Generated data has {X.shape[0]} samples and {X.shape[1]} features")

Preview the data: display the first ten samples.

In [None]:
X[:10, :]

Define a simple autoencoder network for anomaly detection.

* Autoencoders are designed to reconstruct the input data (X). Their output has the same shape as the input.
* Autoencoders can filter anomalous data because its latent space forces the network to retain only relevant information.
* You can detect anomalies by selecting the samples that the model fails to reconstruct.

In [4]:
class Model(nn.Module):
    def __init__(self, num_features: int):
        super(Model, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(num_features, 10),
            nn.ReLU(),
            nn.Linear(10, 2)
        )
        self.decoder = nn.Sequential(
            nn.Linear(2, 5),
            nn.ReLU(),
            nn.Linear(5, num_features)
        )

    def forward(self, x):
        latent_space = self.encoder(x)
        reconstructions = self.decoder(latent_space)
        return reconstructions


# Instantiate the model
model = Model(num_features=X.shape[1])

Train a model for anomaly detection by using the generated data.

In [5]:
# Define the loss function and the optimizer for back propagation
loss_function = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

# Train the model during a few epochs
epochs = 50
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    reconstructions = model(X)
    loss = loss_function(reconstructions, X)
    loss.backward()
    optimizer.step()

Run the model and compute the error between each sample and its reconstructions.
The samples with relevant errors in its reconstructions are anomalies.

In [6]:
# Evaluate anomalies
model.eval()
with torch.no_grad():
    reconstructions = model(X)
    errors = torch.mean((reconstructions - X) ** 2, axis=1)

# Define threshold to detect 5% of the data as anomalies
threshold = torch.quantile(errors, 0.95).item()
# Find anomalies based on errors
anomalies = (errors > threshold).int().numpy()

Plot the results.
The diagram displays anomalies in red.

In [None]:
colors = np.array([
    "#00aa52",  # Ok
    "#ef3300",  # Anomaly
])
y_pred_colors = colors[anomalies]
plt.scatter(X[:, 0], X[:, 1], color=y_pred_colors)
plt.show()

### 3. Export the trained model to ONNX

Use a single sample row for the model to infer the input shape.

In [None]:
sample_row = X[:1]
sample_row

Convert the PyTorch model to ONNX.

In [9]:
onnx_file_name = "anomaly_detection.onnx"
torch.onnx.export(model, sample_row, onnx_file_name)

### 4. Upload the file to the S3 bucket.

Use the `boto3` library with the parameters of the data connection.

In [None]:
key_id = os.getenv("AWS_ACCESS_KEY_ID")
secret_key = os.getenv("AWS_SECRET_ACCESS_KEY")
endpoint = os.getenv("AWS_S3_ENDPOINT")
bucket_name = os.getenv("AWS_S3_BUCKET")

s3_client = boto3.client(
    "s3",
    aws_access_key_id=key_id,
    aws_secret_access_key=secret_key,
    endpoint_url=f"http://{endpoint}"  # boto3 requires the protocol
)

s3_client.upload_file(onnx_file_name, bucket_name, Key=onnx_file_name)

print(f"File {onnx_file_name} uploaded to S3!")

Open https://minio-ui-minio.apps.ocp4.example.com/.

Authenticate with the `minio` access key and the `minio123` secret key.

Verify that the `saved-models` bucket contains the ONNX model file.