Skip to content
Permalink
Browse files

Split and Refactor the Device Plugin API

Change include:
- New registration model
    Split the Registration part of the Device Plugin in its own API
    This is as part of the ongoing discussion around a better
    registration API.
    It allows to use fsnotify rather than the current dual gRPC process.
    This will allow for a cleaner and simpler API with no downtime

- Replace the n allocate calls with a single AdmitPod call
- Add the Container initialize call
    AdmitPod is used for injecting in the pod sandbox.
    InitContainer is used for injecting in the containers.

    This allows for a cleaner separation of concerns
    And a more straightforward API.
  • Loading branch information...
RenaudWasTaken committed Dec 8, 2017
1 parent 355aff2 commit 23d735f02d39fd1c1464e67a355b64642cecfec6
@@ -13,41 +13,37 @@ option (gogoproto.sizer_all) = true;
option (gogoproto.unmarshaler_all) = true; option (gogoproto.unmarshaler_all) = true;
option (gogoproto.goproto_unrecognized_all) = false; option (gogoproto.goproto_unrecognized_all) = false;


// DevicePlugin is the service advertised by Device Plugins
service DevicePlugin {
// Register is called by the Kubelet after the device plugin placed
// it's socket inside the kubelet's device plugin standard registration path
rpc GetPluginInfo(GetPluginInfoRequest) returns (GetPluginInfoResponse) {}
// ListAndWatch returns a stream of List of Devices
// Whenever a Device state change or a Device disapears, ListAndWatch
// returns the new list
rpc ListAndWatch(ListAndWatchRequest) returns (stream ListAndWatchResponse) {}


// Registration is the service advertised by the Kubelet // AdmitPod is called during pod admission
// Only when Kubelet answers with a success code to a Register Request rpc AdmitPod(AdmitPodRequest) returns (AdmitPodResponse) {}
// may Device Plugins start their service
// Registration may fail when device plugin version is not supported by
// Kubelet or the registered resourceName is already taken by another
// active device plugin. Device plugin is expected to terminate upon registration failure
service Registration {
rpc Register(RegisterRequest) returns (Empty) {}
}


message RegisterRequest { // InitializeContainer is called during container creation
// Version of the API the Device Plugin was built against rpc InitContainer(InitContainerRequest) returns (InitContainerResponse) {}
string version = 1;
// Name of the unix socket the device plugin is listening on
// PATH = path.Join(DevicePluginPath, endpoint)
string endpoint = 2;
// Schedulable resource name. As of now it's expected to be a DNS Label
string resource_name = 3;
} }


message Empty { // A request sent by the Kubelet to the device plugin containing the kubelet version
message GetPluginInfoRequest {
} }


// DevicePlugin is the service advertised by Device Plugins // The response sent containing the name of the plugin as well as it's expected InitializeContainer max timeout
service DevicePlugin { message GetPluginInfoResponse {
// ListAndWatch returns a stream of List of Devices // timeout in seconds for the Init call
// Whenever a Device state change or a Device disapears, ListAndWatch int64 init_timeout = 1;
// returns the new list // labels to advertise on the node
rpc ListAndWatch(Empty) returns (stream ListAndWatchResponse) {} map<string, string> labels = 2;
}


// Allocate is called during container creation so that the Device // The message sent when calling ListAndWatch
// Plugin can run device specific operations and instruct Kubelet message ListAndWatchRequest {
// of the steps to make the Device available in the container
rpc Allocate(AllocateRequest) returns (AllocateResponse) {}
} }


// ListAndWatch returns a stream of List of Devices // ListAndWatch returns a stream of List of Devices
@@ -57,10 +53,43 @@ message ListAndWatchResponse {
repeated Device devices = 1; repeated Device devices = 1;
} }


// AdmitPodRequest is a call issued by the kubelet during pod admission
// TODO change this for CPU manager
message AdmitPodRequest {
// Name of the pod
string pod_name = 1;

// map of InitContainers with the assigned devices
map<string, Container> init_containers = 2;

// map of Containers with the assigned devices
map<string, Container> containers = 3;
}

message AdmitPodResponse {
// Spec to add to the pod
PodSpec pod = 1;
}

// Init Container request is expected to be called before container start
// This call is synchronous
message InitContainerRequest{
// The container that is being started
Container container = 1;
}

message InitContainerResponse {
// The spec to inject into the container
ContainerSpec spec = 1;
}

/* E.g: /* E.g:
* struct Device { * struct Device {
* ID: "GPU-fef8089b-4820-abfc-e83e-94318197576e", * ID: "GPU-fef8089b-4820-abfc-e83e-94318197576e",
* State: "Healthy", * State: "Healthy",
* Attributes: {
* "memory": "8000",
* }
*} */ *} */
message Device { message Device {
// A unique ID assigned by the device plugin used // A unique ID assigned by the device plugin used
@@ -69,35 +98,35 @@ message Device {
string ID = 1; string ID = 1;
// Health of the device, can be healthy or unhealthy, see constants.go // Health of the device, can be healthy or unhealthy, see constants.go
string health = 2; string health = 2;
// Attributes of the device must start by vendor name (e.g "nvidia.com/gpu/memory")
map<string, string> Attributes = 3;
} }


// - Allocate is expected to be called during pod creation since allocation message Container {
// failures for any container would result in pod startup failure. // The name of the Container
// - Allocate allows kubelet to exposes additional artifacts in a pod's string name = 1;
// environment as directed by the plugin.
// - Allocate allows Device Plugin to run device specific operations on // The device assigned to that container
// the Devices requested repeated string devices = 2;
message AllocateRequest {
repeated string devicesIDs = 1;
} }


// AllocateResponse includes the artifacts that needs to be injected into message ContainerSpec {
// a container for accessing 'deviceIDs' that were mentioned as part of // List of environment variable to be set in the container to access one of more devices.
// 'AllocateRequest'.
// Failure Handling:
// if Kubelet sends an allocation request for dev1 and dev2.
// Allocation on dev1 succeeds but allocation on dev2 fails.
// The Device plugin should send a ListAndWatch update and fail the
// Allocation request
message AllocateResponse {
// List of environment variable to be set in the container to access one of more devices.
map<string, string> envs = 1; map<string, string> envs = 1;
// Mounts for the container. // Mounts for the container.
repeated Mount mounts = 2; repeated Mount mounts = 2;
// Devices for the container. // Devices for the container.
repeated DeviceSpec devices = 3; repeated DeviceSpec devices = 3;
// Annotations for the container
map<string, string> annotations = 4;
} }


message PodSpec {
// Annotations for the pod
map<string, string> annotations = 1;
}


// Mount specifies a host volume to mount into a container. // Mount specifies a host volume to mount into a container.
// where device library or tools are installed on host and container // where device library or tools are installed on host and container
message Mount { message Mount {
@@ -24,10 +24,13 @@ const (


// Current version of the API supported by kubelet // Current version of the API supported by kubelet
Version = "v1alpha2" Version = "v1alpha2"

// DeviceManager path is the path to the device manager folder
// Plugins have an explicit folder so that we can store the checkpoint data
// in the manager path
DeviceManagerPath = "/var/lib/kubelet/device-plugin"
// DevicePluginPath is the folder the Device Plugin is expecting sockets to be on // DevicePluginPath is the folder the Device Plugin is expecting sockets to be on
// Only privileged pods have access to this path // Only privileged pods have access to this path
// Note: Placeholder until we find a "standard path" // Note: Placeholder until we find a "standard path"
DevicePluginPath = "/var/lib/kubelet/device-plugins/" DevicePluginsPath = DeviceManagerPath + "/plugins"
// KubeletSocket is the path of the Kubelet registry socket
KubeletSocket = DevicePluginPath + "kubelet.sock"
) )
@@ -0,0 +1,54 @@
// To regenerate api.pb.go run hack/update-device-plugin.sh
syntax = 'proto3';

package pluginregistration;

import "github.com/gogo/protobuf/gogoproto/gogo.proto";

option (gogoproto.goproto_stringer_all) = false;
option (gogoproto.stringer_all) = true;
option (gogoproto.goproto_getters_all) = true;
option (gogoproto.marshaler_all) = true;
option (gogoproto.sizer_all) = true;
option (gogoproto.unmarshaler_all) = true;
option (gogoproto.goproto_unrecognized_all) = false;

service Identity {
// Returns the Device Plugin API versions supported
rpc GetSupportedVersions(GetSupportedVersionsRequest) returns (GetSupportedVersionsResponse) {}

// Returns the identity of the plugin
rpc GetPluginIdentity(GetPluginIdentityRequest) returns (GetPluginIdentityResponse) {}

rpc PluginRegistrationStatus(RegistrationStatus) returns (Empty) {}
}

message Empty {
}

message GetSupportedVersionsRequest {
}

message GetSupportedVersionsResponse {
// All the Device Plugin API versions that the Plugin supports.
repeated string supported_versions = 1;
}

message GetPluginIdentityRequest {
// Version of the Kubelet Device Plugin API
string version = 1;
}

message GetPluginIdentityResponse {
// The name of the plugin
string resource_name = 1;
}

message RegistrationStatus {
// Whether registration was a success or not
bool success = 1;

// If registration was not a success, the error encountered
// This field is empty if registration was successful
string error = 2;
}

0 comments on commit 23d735f

Please sign in to comment.
You can’t perform that action at this time.