In [1]:
import importlib.util, psycopg2
import grpc, time, os
from tqdm import tqdm
import numpy as np
import hydro_serving_grpc as hs

# Database

In [2]:
USER = "postgres"
PASS = "postgres"
PORT = "5432"
ADDRESS = "localhost"
DATABASE = "postgres"

In [3]:
url = f"postgresql://{USER}:{PASS}@{ADDRESS}:{PORT}/{DATABASE}"
print(f"Connecting to {url}")
conn = psycopg2.connect(url)
cur = conn.cursor()

Connecting to postgresql://postgres:postgres@localhost:5432/postgres


In [4]:
cur.execute('''
    CREATE TABLE IF NOT EXISTS 
        requests (timestamp bigint, uid integer, ground_truth integer);
''')

# Data

### Local setup

In [5]:
spec = importlib.util.spec_from_file_location("download", "./01_download/download.py")
download = importlib.util.module_from_spec(spec)
spec.loader.exec_module(download)

In [6]:
target_file = "t10k.npz"

In [7]:
# Download test images locally, if not exist
if not os.path.exists(os.path.join(".", target_file)):
    base_url = "http://yann.lecun.com/exdb/mnist/"
    download.download_files(base_url, '.', 
        ['t10k-images-idx3-ubyte.gz', 't10k-labels-idx1-ubyte.gz'])
    download.process_images('.', "t10k")

In [8]:
def generate_data(base_path, test_file):
    # Read mages & labels, shuffle them and return
    with np.load(os.path.join(base_path, test_file)) as data:
        imgs, labels = data["imgs"], data["labels"]
        assert len(imgs) == len(labels)
        permute = np.random.permutation(len(imgs))
    return imgs[permute], labels[permute]

# Hydrosphere gRPC client

In [9]:
# credentials to the cluster 
creds = grpc.ssl_channel_credentials()
channel = grpc.secure_channel(
    "dev.k8s.hydrosphere.io:443", creds)
stub = hs.PredictionServiceStub(channel) 

In [10]:
# an application, that will be invoked
model_spec = hs.ModelSpec(name="mnist-app")

In [11]:
# basic shape for images
tensor_shape = hs.TensorShapeProto(dim=[
    hs.TensorShapeProto.Dim(size=1),
    hs.TensorShapeProto.Dim(size=28),
    hs.TensorShapeProto.Dim(size=28)
])

In [13]:
REQUEST_AMOUNT = 10000   # How much requests will be sent
REQUEST_DELAY = 2        # Delay between requests in seconds

In [15]:
images, labels = generate_data('.', target_file)
for index, (image, label) in tqdm(enumerate(zip(images, labels)), total=REQUEST_AMOUNT):
    if index == REQUEST_AMOUNT: break
    
    # form a request
    tensor = hs.TensorProto(dtype=hs.DT_FLOAT, tensor_shape=tensor_shape, 
        float_val=image.flatten().tolist())
    request = hs.PredictRequest(model_spec=model_spec, inputs={"imgs": tensor})
    
    # get prediction
    result = stub.Predict(request)
    
    # insert trace_id and ground_truth labels into database
    cur.execute("INSERT INTO requests VALUES (%s, %s, %s)",
        (result.trace_data.ts, result.trace_data.uid, int(label)))
    conn.commit()    
    time.sleep(REQUEST_DELAY)

  5%|▍         | 466/10000 [21:22<7:17:11,  2.75s/it]

_Rendezvous: <_Rendezvous of RPC that terminated with:
	status = StatusCode.UNAVAILABLE
	details = "Socket closed"
	debug_error_string = "{"created":"@1555593146.225753000","description":"Error received from peer ipv4:18.184.72.114:443","file":"src/core/lib/surface/call.cc","file_line":1041,"grpc_message":"Socket closed","grpc_status":14}"
>

In [14]:
conn.close()