# Read and write from Numpy to Azure Data Lake and Azure Blob storage

## Install requirements

In [None]:
!pip install -r requirements.txt

## Initialize public Blob filesystem

In [None]:
container_name = "datasets"
storage_options = {"account_name": "azuremlexamples"}

In [None]:
from adlfs import AzureBlobFileSystem as abfs

fs = abfs(**storage_options)
files = fs.ls(f"{container_name}/mnist")
files

## Define fuctions to read gzipped MNIST data

In [None]:
import numpy as np


def read_images(f, num_images, image_size=28):
    f.read(16)  # magic

    buf = f.read(image_size * image_size * num_images)
    images = np.frombuffer(buf, dtype=np.uint8).astype(np.float32)
    images = images.reshape(num_images, image_size, image_size, 1)

    return images


def read_labels(f, num_labels):
    f.read(8)  # magic

    buf = f.read(num_labels)
    labels = np.frombuffer(buf, dtype=np.uint8)

    return labels

## Read in gzipped MNIST data

In [None]:
import gzip

train_len = 60000
test_len = 10000

for f in files:
    if "train-images" in f:
        X_train = read_images(gzip.open(fs.open(f)), train_len)
    elif "train-labels" in f:
        y_train = read_labels(gzip.open(fs.open(f)), train_len)
    elif "images" in f:
        X_test = read_images(gzip.open(fs.open(f)), test_len)
    elif "labels" in f:
        y_test = read_labels(gzip.open(fs.open(f)), test_len)

## Verify expected results

In [None]:
from random import randint

i = randint(0, train_len)
x = X_train[i]
y = y_train[i]

In [None]:
import matplotlib.pyplot as plt

plt.imshow(x.squeeze())
plt.title(f"Label: {y}")

## Initialize private Blob filesystem

In [None]:
from azureml.core import Workspace

ws = Workspace.from_config()
ds = ws.datastores["workspaceblobstore"]

container_name = ds.container_name
storage_options = {"account_name": ds.account_name, "account_key": ds.account_key}

In [None]:
fs = abfs(**storage_options)
fs

In [None]:
fs.ls(f"{container_name}")

## Write numpy arrays using `np.save`

In [None]:
with fs.open(f"{container_name}/example-data/mnist/X_train.npy", "wb") as f:
    np.save(f, X_train)

with fs.open(f"{container_name}/example-data/mnist/y_train.npy", "wb") as f:
    np.save(f, y_train)

with fs.open(f"{container_name}/example-data/mnist/X_test.npy", "wb") as f:
    np.save(f, X_test)

with fs.open(f"{container_name}/example-data/mnist/y_test.npy", "wb") as f:
    np.save(f, y_test)

## Load numpy arrays using `np.load`

In [None]:
with fs.open(f"{container_name}/example-data/mnist/X_train.npy", "rb") as f:
    X_train = np.load(f)

with fs.open(f"{container_name}/example-data/mnist/y_train.npy", "rb") as f:
    y_train = np.load(f)

with fs.open(f"{container_name}/example-data/mnist/X_test.npy", "rb") as f:
    X_test = np.load(f)

with fs.open(f"{container_name}/example-data/mnist/y_test.npy", "rb") as f:
    y_test = np.load(f)

## Verify expected results

In [None]:
from random import randint

i = randint(0, train_len)
x = X_train[i]
y = y_train[i]

In [None]:
import matplotlib.pyplot as plt

plt.imshow(x.squeeze())
plt.title(f"Label: {y}")