Quickstart

In [1]:
import requests

from ray import serve

serve.start()


@serve.deployment
def hello(request):
    name = request.query_params["name"]
    return f"Hello {name}!"


hello.deploy()

# Query our endpoint over HTTP.
response = requests.get("http://127.0.0.1:8000/hello?name=serve").text
assert response == "Hello serve!"

2021-12-05 19:30:49,369	INFO services.py:1338 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m
 pid=10520)[0m 2021-12-05 19:30:57,607	INFO checkpoint_path.py:16 -- Using RayInternalKVStore for controller checkpoint and recovery.
 pid=10520)[0m 2021-12-05 19:30:57,614	INFO http_state.py:98 -- Starting HTTP proxy with name 'SERVE_CONTROLLER_ACTOR:gxNELu:SERVE_PROXY_ACTOR-node:127.0.0.1-0' on node 'node:127.0.0.1-0' listening on '127.0.0.1:8000'
2021-12-05 19:30:58,352	INFO api.py:463 -- Started Serve instance in namespace 'serve'.
 pid=6660)[0m INFO:     Started server process [6660]
2021-12-05 19:30:58,470	INFO api.py:242 -- Updating deployment 'hello'. component=serve deployment=hello
 pid=10520)[0m 2021-12-05 19:30:58,487	INFO deployment_state.py:912 -- Adding 1 replicas to deployment 'hello'. component=serve deployment=hello
2021-12-05 19:30:59,297	INFO api.py:249 -- Deployment 'hello' is ready at `http://127.0.0.1:8000/hello`. component=serve deployment=hell

In [None]:
import requests

import ray
from ray import serve

serve.start()


@serve.deployment
class Counter:
    def __init__(self):
        self.count = 0

    def __call__(self, *args):
        self.count += 1
        return {"count": self.count}


# Deploy our class.
Counter.deploy()

# Query our endpoint in two different ways: from HTTP and from Python.
assert requests.get("http://127.0.0.1:8000/Counter").json() == {"count": 1}
assert ray.get(Counter.get_handle().remote()) == {"count": 2}

In [4]:
 ray.get(Counter.get_handle().remote())

{'count': 3}

End-to-End Tutorial

In [6]:
import ray
from ray import serve

ray.init(ignore_reinit_error=True)
serve.start()

2021-12-05 18:38:13,013	INFO worker.py:852 -- Calling ray.init() again after it has already been called.
2021-12-05 18:38:13,014	INFO api.py:414 -- Connecting to existing Serve instance in namespace 'serve'.


<ray.serve.api.Client at 0x15df2297c40>

In [7]:
@serve.deployment
class Counter:
    def __init__(self):
        self.count = 0

    def __call__(self, request):
        self.count += 1
        return {"count": self.count}
Counter.deploy()

2021-12-05 18:38:43,460	INFO api.py:242 -- Updating deployment 'Counter'. component=serve deployment=Counter
 pid=10144)[0m 2021-12-05 18:38:43,559	INFO deployment_state.py:874 -- Stopping 1 replicas of deployment 'Counter' with outdated versions. component=serve deployment=Counter
 pid=10144)[0m 2021-12-05 18:38:45,728	INFO deployment_state.py:912 -- Adding 1 replicas to deployment 'Counter'. component=serve deployment=Counter
2021-12-05 18:38:49,355	INFO api.py:249 -- Deployment 'Counter' is ready at `http://127.0.0.1:8000/Counter`. component=serve deployment=Counter


In [18]:
from fastapi import FastAPI

app = FastAPI()

@serve.deployment
@serve.ingress(app)
class Counter:
    def __init__(self):
        self.count = 0

    @app.get("/")
    def get(self):
        return {"count": self.count}

    @app.get("/incr")
    def incr(self):
        self.count += 1
        return {"count": self.count}

    @app.get("/decr")
    def decr(self):
        self.count -= 1
        return {"count": self.count}
Counter.deploy()

2021-12-05 18:41:14,070	INFO api.py:242 -- Updating deployment 'Counter'. component=serve deployment=Counter
 pid=10144)[0m 2021-12-05 18:41:14,187	INFO deployment_state.py:874 -- Stopping 1 replicas of deployment 'Counter' with outdated versions. component=serve deployment=Counter
 pid=10144)[0m 2021-12-05 18:41:16,404	INFO deployment_state.py:912 -- Adding 1 replicas to deployment 'Counter'. component=serve deployment=Counter
2021-12-05 18:41:19,243	INFO api.py:249 -- Deployment 'Counter' is ready at `http://127.0.0.1:8000/Counter`. component=serve deployment=Counter


In [20]:
! curl -X GET localhost:8000/Counter/

{"count":0}


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed

  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
100    11  100    11    0     0     50      0 --:--:-- --:--:-- --:--:--    50


In [21]:
! curl -X GET localhost:8000/Counter/incr

{"count":1}


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed

  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
  0    11    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
100    11  100    11    0     0     50      0 --:--:-- --:--:-- --:--:--    50


Deployment

In [23]:
@serve.deployment
class MyFirstDeployment:
  # Take the message to return as an argument to the constructor.
    def __init__(self, msg):
        self.msg = msg
        
    def __call__(self, request):
        return self.msg

    def other_method(self, arg):
        return self.msg

MyFirstDeployment.deploy("Hello world!")

2021-12-05 18:45:15,669	INFO api.py:242 -- Updating deployment 'MyFirstDeployment'. component=serve deployment=MyFirstDeployment
 pid=10144)[0m 2021-12-05 18:45:15,734	INFO deployment_state.py:874 -- Stopping 1 replicas of deployment 'MyFirstDeployment' with outdated versions. component=serve deployment=MyFirstDeployment
 pid=10144)[0m 2021-12-05 18:45:17,945	INFO deployment_state.py:912 -- Adding 1 replicas to deployment 'MyFirstDeployment'. component=serve deployment=MyFirstDeployment
2021-12-05 18:45:20,482	INFO api.py:249 -- Deployment 'MyFirstDeployment' is ready at `http://127.0.0.1:8000/MyFirstDeployment`. component=serve deployment=MyFirstDeployment


In [27]:
MyFirstDeployment.options(name="hello_service").deploy("Hello!")
MyFirstDeployment.options(name="hi_service").deploy("Hi!")

2021-12-05 18:45:53,411	INFO api.py:242 -- Updating deployment 'hello_service'. component=serve deployment=hello_service
 pid=10144)[0m 2021-12-05 18:45:53,512	INFO deployment_state.py:912 -- Adding 1 replicas to deployment 'hello_service'. component=serve deployment=hello_service
2021-12-05 18:45:56,369	INFO api.py:249 -- Deployment 'hello_service' is ready at `http://127.0.0.1:8000/hello_service`. component=serve deployment=hello_service
2021-12-05 18:45:56,379	INFO api.py:242 -- Updating deployment 'hi_service'. component=serve deployment=hi_service
 pid=10144)[0m 2021-12-05 18:45:56,478	INFO deployment_state.py:912 -- Adding 1 replicas to deployment 'hi_service'. component=serve deployment=hi_service


KeyboardInterrupt: 

In [29]:
serve.list_deployments()

In [30]:
deployment = serve.get_deployment("MyFirstDeployment")



In [33]:
@serve.deployment(name="http_deployment", route_prefix="/api")
class HTTPDeployment:
    def __call__(self, request):
        return "Hello world!"
import requests
HTTPDeployment.deploy("Hello world!")
print(requests.get("http://127.0.0.1:8000/api").text)

2021-12-05 18:47:31,436	INFO api.py:242 -- Updating deployment 'http_deployment'. component=serve deployment=http_deployment
 pid=10144)[0m 2021-12-05 18:47:31,483	INFO deployment_state.py:912 -- Adding 1 replicas to deployment 'http_deployment'. component=serve deployment=http_deployment


KeyboardInterrupt: 

In [None]:
# To get a handle from the same script, use the Deployment object directly:
handle = HTTPDeployment.get_handle()

# To get a handle from a different script, reference it by name:
handle = serve.get_deployment("http_deployment").get_handle()

print(ray.get(handle.remote()))



In [None]:
@serve.deployment(name="my_deployment", num_replicas=1)
class SimpleDeployment:
    pass

# Creates one initial replica.
SimpleDeployment.deploy()

# Re-deploys, creating an additional replica.
# This could be the SAME Python script, modified and re-run.
@serve.deployment(name="my_deployment", num_replicas=2)
class SimpleDeployment:
    pass

SimpleDeployment.deploy()

# You can also use Deployment.options() to change options without redefining
# the class. This is useful for programmatically updating deployments.
SimpleDeployment.options(num_replicas=2).deploy()

In [None]:
@serve.deployment(
    _autoscaling_config={
        "min_replicas": 1,
        "max_replicas": 5,
        "target_num_ongoing_requests_per_replica": 10,
    },
    version="v1")
def func(_):
    time.sleep(1)
    return ""

func.deploy() # The func deployment will now autoscale based on requests demand.

In [None]:
@serve.deployment(
    _autoscaling_config={
        "min_replicas": 1,
        "max_replicas": 5,
        "target_num_ongoing_requests_per_replica": 10,
    },
    version="v1")
def func(_):
    time.sleep(1)
    return ""

func.deploy() # The func deployment will now autoscale based on requests demand.

FastAPI

In [None]:
import ray

from fastapi import FastAPI
from ray import serve

app = FastAPI()
ray.init(address="auto", namespace="summarizer")
serve.start(detached=True)

@serve.deployment(route_prefix="/hello")
@serve.ingress(app)
class MyFastAPIDeployment:
    @app.get("/")
    def root(self):
        return "Hello, world!"

    @app.post("/{subpath}")
    def root(self, subpath: str):
        return f"Hello from {subpath}!"

MyFastAPIDeployment.deploy()

In [None]:
import ray

from fastapi import FastAPI
from ray import serve
from starlette.middleware import Middleware
from starlette.middleware.cors import CORSMiddleware

app = FastAPI()
ray.init(address="auto", namespace="summarizer")

client = serve.start(
    detached=True,
    http_options={"middlewares": [
        Middleware(
            CORSMiddleware, allow_origins=["*"], allow_methods=["*"])
    ]})

@app.get("/")
def f():
    return "Hello from the root!"

@serve.deployment(route_prefix="/api1")
@serve.ingress(app)
class FastAPIWrapper1:
    @app.get("/subpath")
    def method(self):
        return "Hello 1!"

@serve.deployment(route_prefix="/api2")
@serve.ingress(app)
class FastAPIWrapper2:
    @app.get("/subpath")
    def method(self):
        return "Hello 2!"

Deployment

In [None]:
import ray
from ray import serve
import time

# This will start Ray locally and start Serve on top of it.
serve.start()

@serve.deployment
def my_func(request):
    return "hello"

my_func.deploy()

# Serve will be shut down once the script exits, so keep it alive manually.
while True:
    time.sleep(5)
    print(serve.list_deployments())

In [None]:
import ray
from ray import serve

# This will connect to the running Ray cluster.
ray.init(address="auto", namespace="serve")

@serve.deployment
def my_func(request):
    return "hello"

my_func.deploy()

In [5]:
import logging

from ray import serve
import requests

#serve.start()

logger = logging.getLogger("ray")


@serve.deployment
def f(*_args):
    logger.info("Some info!")


f.deploy()

requests.get("http://127.0.0.1:8000/Counter")

2021-12-05 19:34:11,241	INFO api.py:242 -- Updating deployment 'f'. component=serve deployment=f
 pid=9644)[0m 2021-12-05 19:34:11,299	INFO deployment_state.py:912 -- Adding 1 replicas to deployment 'f'. component=serve deployment=f
2021-12-05 19:34:12,327	INFO api.py:249 -- Deployment 'f' is ready at `http://127.0.0.1:8000/f`. component=serve deployment=f


<Response [404]>

Serving model

In [6]:
@serve.deployment(route_prefix="/increment")
class BatchingExample:
    def __init__(self):
        self.count = 0

    @serve.batch
    async def handle_batch(self, requests):
        responses = []
        for request in requests:
            responses.append(request.json())

        return responses

    async def __call__(self, request):
        return await self.handle_batch(request)

BatchingExample.deploy()

2021-12-05 19:34:15,580	INFO api.py:242 -- Updating deployment 'BatchingExample'. component=serve deployment=BatchingExample
 pid=9644)[0m 2021-12-05 19:34:15,616	INFO deployment_state.py:912 -- Adding 1 replicas to deployment 'BatchingExample'. component=serve deployment=BatchingExample
2021-12-05 19:34:18,496	INFO api.py:249 -- Deployment 'BatchingExample' is ready at `http://127.0.0.1:8000/increment`. component=serve deployment=BatchingExample


In [None]:
from random import random
import requests
import ray
from ray import serve

#ray.init(num_cpus=8)
#serve.start()

# Our pipeline will be structured as follows:
# - Input comes in, the composed model sends it to model_one
# - model_one outputs a random number between 0 and 1, if the value is
#   greater than 0.5, then the data is sent to model_two
# - otherwise, the data is returned to the user.

# Let's define two models that just print out the data they received.


@serve.deployment
def model_one(data):
    print("Model 1 called with data ", data)
    return random()


model_one.deploy()


@serve.deployment
def model_two(data):
    print("Model 2 called with data ", data)
    return data


model_two.deploy()


# max_concurrent_queries is optional. By default, if you pass in an async
# function, Ray Serve sets the limit to a high number.
@serve.deployment(max_concurrent_queries=10, route_prefix="/composed")
class ComposedModel:
    def __init__(self):
        self.model_one = model_one.get_handle()
        self.model_two = model_two.get_handle()

    # This method can be called concurrently!
    async def __call__(self, starlette_request):
        data = await starlette_request.body()

        score = await self.model_one.remote(data=data)
        if score > 0.5:
            result = await self.model_two.remote(data=data)
            result = {"model_used": 2, "score": score}
        else:
            result = {"model_used": 1, "score": score}

        return result


ComposedModel.deploy()

for _ in range(5):
    resp = requests.get("http://127.0.0.1:8000/composed", data="hey!")
    print(resp.json())

2021-12-05 19:34:36,534	INFO api.py:242 -- Updating deployment 'model_one'. component=serve deployment=model_one
 pid=9644)[0m 2021-12-05 19:34:36,543	INFO deployment_state.py:912 -- Adding 1 replicas to deployment 'model_one'. component=serve deployment=model_one
2021-12-05 19:34:39,377	INFO api.py:249 -- Deployment 'model_one' is ready at `http://127.0.0.1:8000/model_one`. component=serve deployment=model_one
2021-12-05 19:34:39,391	INFO api.py:242 -- Updating deployment 'model_two'. component=serve deployment=model_two
 pid=9644)[0m 2021-12-05 19:34:39,493	INFO deployment_state.py:912 -- Adding 1 replicas to deployment 'model_two'. component=serve deployment=model_two


In [None]:
import pandas as pd
import mlflow.pyfunc

@serve.deployment
class MLflowDeployment:
    def __init__(self, model_uri):
        self.model = mlflow.pyfunc.load_model(model_uri=model_uri)

    async def __call__(self, request):
        csv_text = await request.body() # The body contains just raw csv text.
        df = pd.read_csv(csv_text)
        return self.model.predict(df)

model_uri = "model:/my_registered_model/Production"
MLflowDeployment.deploy(model_uri)

Sklearn

In [1]:
from ray import serve

import pickle
import json
import numpy as np
import requests
import os
import tempfile

from sklearn.datasets import load_iris
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import mean_squared_error

# Load data
iris_dataset = load_iris()
data, target, target_names = iris_dataset["data"], iris_dataset[
    "target"], iris_dataset["target_names"]

# Instantiate model
model = GradientBoostingClassifier()

# Training and validation split
np.random.shuffle(data), np.random.shuffle(target)
train_x, train_y = data[:100], target[:100]
val_x, val_y = data[100:], target[100:]

# Train and evaluate models
model.fit(train_x, train_y)
print("MSE:", mean_squared_error(model.predict(val_x), val_y))

# Save the model and label to file
MODEL_PATH = os.path.join(tempfile.gettempdir(),
                          "iris_model_logistic_regression.pkl")
LABEL_PATH = os.path.join(tempfile.gettempdir(), "iris_labels.json")

with open(MODEL_PATH, "wb") as f:
    pickle.dump(model, f)
with open(LABEL_PATH, "w") as f:
    json.dump(target_names.tolist(), f)

MSE: 1.46


In [3]:
@serve.deployment(route_prefix="/regressor")
class BoostingModel:
    def __init__(self):
        with open(MODEL_PATH, "rb") as f:
            self.model = pickle.load(f)
        with open(LABEL_PATH) as f:
            self.label_list = json.load(f)

    async def __call__(self, starlette_request):
        payload = await starlette_request.json()
        print("Worker: received starlette request with data", payload)

        input_vector = [
            payload["sepal length"],
            payload["sepal width"],
            payload["petal length"],
            payload["petal width"],
        ]
        prediction = self.model.predict([input_vector])[0]
        human_name = self.label_list[prediction]
        return {"result": human_name}

serve.start(detached=True)
BoostingModel.deploy()

 pid=9644)[0m 2021-12-05 19:33:33,733	INFO checkpoint_path.py:16 -- Using RayInternalKVStore for controller checkpoint and recovery.
 pid=9644)[0m 2021-12-05 19:33:33,742	INFO http_state.py:98 -- Starting HTTP proxy with name 'SERVE_CONTROLLER_ACTOR:SERVE_PROXY_ACTOR-node:127.0.0.1-0' on node 'node:127.0.0.1-0' listening on '127.0.0.1:8000'
2021-12-05 19:33:34,643	INFO api.py:463 -- Started detached Serve instance in namespace 'serve'.
2021-12-05 19:33:34,664	INFO api.py:242 -- Updating deployment 'BoostingModel'. component=serve deployment=BoostingModel
 pid=8392)[0m INFO:     Started server process [8392]
 pid=9644)[0m 2021-12-05 19:33:34,710	INFO deployment_state.py:912 -- Adding 1 replicas to deployment 'BoostingModel'. component=serve deployment=BoostingModel
2021-12-05 19:33:37,373	INFO api.py:249 -- Deployment 'BoostingModel' is ready at `http://127.0.0.1:8000/regressor`. component=serve deployment=BoostingModel


In [4]:
sample_request_input = {
    "sepal length": 1.2,
    "sepal width": 1.0,
    "petal length": 1.1,
    "petal width": 0.9,
}
response = requests.get(
    "http://localhost:8000/regressor", json=sample_request_input)
print(response.text)

{
  "result": "virginica"
}
 pid=3844)[0m Worker: received starlette request with data {'sepal length': 1.2, 'sepal width': 1.0, 'petal length': 1.1, 'petal width': 0.9}
