Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CR2-22 CR2-49 CR2-48 add metrics list to sdk #328

Merged
merged 13 commits into from
Nov 19, 2020
Merged
21 changes: 21 additions & 0 deletions gradient/api_sdk/clients/deployment_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -296,6 +296,27 @@ def get_metrics(self, deployment_id, start=None, end=None, interval="30s", built
)
return metrics


def list_metrics(self, deployment_id, start=None, end=None, interval="30s"):
"""List model deployment metrics

:param str deployment_id: ID of deployment
:param datetime.datetime|str start:
:param datetime.datetime|str end:
:param str interval:
:returns: Metrics of a model deployment job
:rtype: dict[str,dict[str,list[dict]]]
"""

repository = self.build_repository(repositories.ListDeploymentMetrics)
metrics = repository.get(
id=deployment_id,
start=start,
end=end,
interval=interval,
)
return metrics

def stream_metrics(self, deployment_id, interval="30s", built_in_metrics=None):
"""Stream live model deployment metrics

Expand Down
21 changes: 21 additions & 0 deletions gradient/api_sdk/clients/experiment_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -831,6 +831,27 @@ def get_metrics(self, experiment_id, start=None, end=None, interval="30s", built
)
return metrics

def list_metrics(self, experiment_id, start=None, end=None, interval="30s"):
"""List experiment metrics

:param str experiment_id: ID of experiment
:param datetime.datetime|str start:
:param datetime.datetime|str end:
:param str interval:
:returns: Metrics of and experiment
:rtype: dict[str,dict[str,list[dict]]]
"""

repository = self.build_repository(repositories.ListExperimentMetrics)
metrics = repository.get(
id=experiment_id,
start=start,
end=end,
interval=interval,
)
return metrics


def stream_metrics(self, experiment_id, interval="30s", built_in_metrics=None):
"""Stream live experiment metrics

Expand Down
22 changes: 21 additions & 1 deletion gradient/api_sdk/clients/job_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from .base_client import BaseClient, TagsSupportMixin
from ..models import Artifact, Job
from ..repositories.jobs import ListJobs, ListJobLogs, ListJobArtifacts, CreateJob, DeleteJob, StopJob, \
DeleteJobArtifacts, GetJobArtifacts, GetJobMetrics, StreamJobMetrics
DeleteJobArtifacts, GetJobArtifacts, GetJobMetrics, ListJobMetrics, StreamJobMetrics


class JobsClient(TagsSupportMixin, BaseClient):
Expand Down Expand Up @@ -393,6 +393,26 @@ def get_metrics(self, job_id, start=None, end=None, interval="30s", built_in_met
)
return metrics

def list_metrics(self, job_id, start=None, end=None, interval="30s"):
"""List job metrics

:param str job_id: ID of a job
:param datetime.datetime|str start:
:param datetime.datetime|str end:
:param str interval:
:returns: Metrics of a job
:rtype: dict[str,dict[str,list[dict]]]
"""

repository = self.build_repository(ListJobMetrics)
metrics = repository.get(
id=job_id,
start=start,
end=end,
interval=interval,
)
return metrics

def stream_metrics(self, job_id, interval="30s", built_in_metrics=None):
"""Stream live job metrics

Expand Down
20 changes: 20 additions & 0 deletions gradient/api_sdk/clients/notebook_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,26 @@ def get_metrics(self, notebook_id, start=None, end=None, interval="30s", built_i
)
return metrics

def list_metrics(self, notebook_id, start=None, end=None, interval="30s"):
"""List notebook metrics

:param str notebook_id: notebook ID
:param datetime.datetime|str start:
:param datetime.datetime|str end:
:param str interval:
:returns: Metrics of a notebook
:rtype: dict[str,dict[str,list[dict]]]
"""

repository = self.build_repository(repositories.ListNotebookMetrics)
metrics = repository.get(
id=notebook_id,
start=start,
end=end,
interval=interval,
)
return metrics

def stream_metrics(self, notebook_id, interval="30s", built_in_metrics=None):
"""Stream live notebook metrics

Expand Down
8 changes: 4 additions & 4 deletions gradient/api_sdk/repositories/__init__.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,18 @@
from .clusters import ListClusters
from .deployments import ListDeployments, CreateDeployment, StartDeployment, StopDeployment, DeleteDeployment, \
UpdateDeployment, GetDeployment, GetDeploymentMetrics, StreamDeploymentMetrics, ListDeploymentLogs
UpdateDeployment, GetDeployment, GetDeploymentMetrics, ListDeploymentMetrics, StreamDeploymentMetrics, ListDeploymentLogs
from .experiments import ListExperiments, GetExperiment, ListExperimentLogs, StartExperiment, StopExperiment, \
CreateSingleNodeExperiment, CreateMultiNodeExperiment, RunSingleNodeExperiment, RunMultiNodeExperiment, \
CreateMpiMultiNodeExperiment, RunMpiMultiNodeExperiment, DeleteExperiment, GetExperimentMetrics, \
CreateMpiMultiNodeExperiment, RunMpiMultiNodeExperiment, DeleteExperiment, GetExperimentMetrics, ListExperimentMetrics, \
StreamExperimentMetrics
from .hyperparameter import CreateHyperparameterJob, CreateAndStartHyperparameterJob, ListHyperparameterJobs, \
GetHyperparameterTuningJob, StartHyperparameterTuningJob
from .jobs import ListJobs, ListResources, ListJobArtifacts, ListJobLogs, GetJob, GetJobMetrics, StreamJobMetrics
from .jobs import ListJobs, ListResources, ListJobArtifacts, ListJobLogs, GetJob, GetJobMetrics, ListJobMetrics, StreamJobMetrics
from .machine_types import ListMachineTypes
from .machines import CheckMachineAvailability, CreateMachine, CreateResource, StartMachine, StopMachine, \
RestartMachine, GetMachine, UpdateMachine, GetMachineUtilization
from .models import DeleteModel, ListModels, UploadModel, GetModel, ListModelFiles
from .notebooks import CreateNotebook, DeleteNotebook, GetNotebook, ListNotebooks, GetNotebookMetrics, \
from .notebooks import CreateNotebook, DeleteNotebook, GetNotebook, ListNotebooks, GetNotebookMetrics, ListNotebookMetrics, \
StreamNotebookMetrics, StopNotebook, StartNotebook, ForkNotebook, ListNotebookArtifacts, ListNotebookLogs
from .projects import CreateProject, ListProjects, DeleteProject, GetProject
from .secrets import ListSecrets, SetSecret, DeleteSecret
Expand Down
79 changes: 78 additions & 1 deletion gradient/api_sdk/repositories/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -377,6 +377,84 @@ def _format_datetime(self, some_datetime):
datetime_str = some_datetime.strftime("%Y-%m-%dT%H:%M:%SZ")
return datetime_str

class ListMetrics(GetResource):
OBJECT_TYPE = None

DEFAULT_INTERVAL = "30s"

@abc.abstractmethod
def _get_instance_by_id(self, instance_id, **kwargs):
pass

def _get_metrics_api_url(self, instance, protocol="https"):
if not instance.metrics_url:
raise GradientSdkError("Metrics API url not found")

metrics_api_url = concatenate_urls(protocol + "://", instance.metrics_url)
return metrics_api_url

def _get(self, **kwargs):
new_kwargs = self._get_kwargs(kwargs)
rv = super(ListMetrics, self)._get(**new_kwargs)
return rv

def _get_kwargs(self, kwargs):
instance_id = kwargs["id"]
instance = self._get_instance_by_id(instance_id)
started_date = self._get_start_date(instance, kwargs)
end = self._get_end_date(instance, kwargs)
interval = kwargs.get("interval") or self.DEFAULT_INTERVAL
metrics_api_url = self._get_metrics_api_url(instance)
new_kwargs = {
"start": started_date,
"interval": interval,
"objecttype": self.OBJECT_TYPE,
"handle": instance_id,
"metrics_api_url": metrics_api_url if config.CONFIG_HOST != "http://localhost:3102" else "% s:8080"% metrics_api_url,
}
if end:
new_kwargs["end"] = end

return new_kwargs

def get_request_url(self, **kwargs):
return "metrics/api/v1/list"

def _get_api_url(self, **kwargs):
api_url = kwargs["metrics_api_url"]
return api_url

def _get_start_date(self, instance, kwargs):
datetime_string = kwargs.get("start") or instance.dt_started or instance.dt_created
if not datetime_string:
return None

datetime_string = self._format_datetime(datetime_string)
return datetime_string

def _get_end_date(self, instance, kwargs):
datetime_string = kwargs.get("end")
if not datetime_string:
return None

datetime_string = self._format_datetime(datetime_string)
return datetime_string
colin-welch marked this conversation as resolved.
Show resolved Hide resolved

def _get_request_params(self, kwargs):
params = kwargs.copy()
params.pop("metrics_api_url", None)
return params

def _parse_object(self, instance_dict, **kwargs):
chart_names = instance_dict["chart_names"]
return chart_names

def _format_datetime(self, some_datetime):
if not isinstance(some_datetime, datetime.datetime):
some_datetime = dateutil.parser.parse(some_datetime)

datetime_str = some_datetime.strftime("%Y-%m-%dT%H:%M:%SZ")
return datetime_str

@six.add_metaclass(abc.ABCMeta)
class StreamMetrics(BaseRepository):
Expand Down Expand Up @@ -456,7 +534,6 @@ def _send_chart_descriptor(self, connection, kwargs):
def _get_stream_generator(self, connection):
return connection


class ListLogs(ListResources):
@abc.abstractmethod
def _get_request_params(self, kwargs):
Expand Down
16 changes: 15 additions & 1 deletion gradient/api_sdk/repositories/deployments.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from .common import ListResources, CreateResource, StartResource, StopResource, DeleteResource, AlterResource, \
GetResource, GetMetrics, StreamMetrics, ListLogs
GetResource, GetMetrics, ListMetrics, StreamMetrics, ListLogs
from .. import serializers, config, sdk_exceptions
from ..sdk_exceptions import ResourceFetchingError, MalformedResponseError

Expand Down Expand Up @@ -175,6 +175,20 @@ def _get_start_date(self, instance, kwargs):

return rv

class ListDeploymentMetrics(ListMetrics):
OBJECT_TYPE = "modelDeployment"

def _get_instance_by_id(self, instance_id, **kwargs):
repository = GetDeployment(self.api_key, logger=self.logger, ps_client_name=self.ps_client_name)
instance = repository.get(deployment_id=instance_id)
return instance

def _get_start_date(self, instance, kwargs):
rv = super(ListDeploymentMetrics, self)._get_start_date(instance, kwargs)
if rv is None:
raise sdk_exceptions.GradientSdkError("Deployment job has not started yet")

return rv

class StreamDeploymentMetrics(StreamMetrics):
OBJECT_TYPE = "modelDeployment"
Expand Down
28 changes: 27 additions & 1 deletion gradient/api_sdk/repositories/experiments.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import six
import websocket

from .common import ListResources, CreateResource, StartResource, StopResource, DeleteResource, GetResource, GetMetrics, \
from .common import ListResources, CreateResource, StartResource, StopResource, DeleteResource, GetResource, GetMetrics, ListMetrics, \
StreamMetrics, ListLogs
from .. import config, serializers, sdk_exceptions
from ..repositories.jobs import ListJobs
Expand Down Expand Up @@ -208,6 +208,32 @@ def _get_instance(self, response, **kwargs):

return rv

class ListExperimentMetrics(GetExperimentMetricsApiUrlMixin, ListMetrics):
OBJECT_TYPE = "experiment"

def _get_instance_by_id(self, instance_id, **kwargs):
repository = GetExperiment(self.api_key, logger=self.logger, ps_client_name=self.ps_client_name)
instance = repository.get(experiment_id=instance_id)
return instance

def _get_start_date(self, instance, kwargs):
rv = super(ListExperimentMetrics, self)._get_start_date(instance, kwargs)
if rv is None:
raise sdk_exceptions.GradientSdkError("Experiment has not started yet")

return rv

def _get_instance(self, response, **kwargs):
try:
rv = super(ListExperimentMetrics, self)._get_instance(response, **kwargs)
except sdk_exceptions.ResourceFetchingError as e:
if '{"version":' in str(e):
# TODO: metrics are not working for v1 experiments at the moment
raise sdk_exceptions.GradientSdkError("Metrics are available for private clusters only")
robghchen marked this conversation as resolved.
Show resolved Hide resolved
else:
raise

return rv

class StreamExperimentMetrics(GetExperimentMetricsApiUrlMixin, StreamMetrics):
OBJECT_TYPE = "experiment"
Expand Down
16 changes: 15 additions & 1 deletion gradient/api_sdk/repositories/jobs.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import json

import gradient.api_sdk.config
from .common import ListResources, CreateResource, GetResource, DeleteResource, StopResource, GetMetrics, StreamMetrics, \
from .common import ListResources, CreateResource, GetResource, DeleteResource, StopResource, GetMetrics, ListMetrics, StreamMetrics, \
ListLogs
from .. import serializers, sdk_exceptions
from ..clients import http_client
Expand Down Expand Up @@ -206,6 +206,20 @@ def _get_start_date(self, instance, kwargs):

return rv

class ListJobMetrics(ListMetrics):
OBJECT_TYPE = "mljob"

def _get_instance_by_id(self, instance_id, **kwargs):
repository = GetJob(self.api_key, logger=self.logger, ps_client_name=self.ps_client_name)
instance = repository.get(job_id=instance_id)
return instance

def _get_start_date(self, instance, kwargs):
rv = super(ListJobMetrics, self)._get_start_date(instance, kwargs)
if rv is None:
raise sdk_exceptions.GradientSdkError("Job has not started yet")

return rv

class StreamJobMetrics(StreamMetrics):
OBJECT_TYPE = "mljob"
Expand Down
17 changes: 15 additions & 2 deletions gradient/api_sdk/repositories/notebooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from ..clients import http_client
from ..sdk_exceptions import ResourceCreatingError
from .common import CreateResource, DeleteResource, ListResources, GetResource, \
StopResource, GetMetrics, StreamMetrics, BaseRepository, ListLogs
StopResource, GetMetrics, ListMetrics, StreamMetrics, BaseRepository, ListLogs
from .. import config
from .. import serializers, sdk_exceptions

Expand Down Expand Up @@ -174,6 +174,20 @@ def _get_start_date(self, instance, kwargs):

return rv

class ListNotebookMetrics(ListMetrics):
OBJECT_TYPE = "notebook"

def _get_instance_by_id(self, instance_id, **kwargs):
repository = GetNotebook(self.api_key, logger=self.logger, ps_client_name=self.ps_client_name)
instance = repository.get(id=instance_id)
return instance

def _get_start_date(self, instance, kwargs):
rv = super(ListNotebookMetrics, self)._get_start_date(instance, kwargs)
if rv is None:
raise sdk_exceptions.GradientSdkError("Notebook has not started yet")

return rv

class StreamNotebookMetrics(StreamMetrics):
OBJECT_TYPE = "notebook"
Expand Down Expand Up @@ -220,4 +234,3 @@ def _get_request_params(self, kwargs):
"limit": kwargs["limit"]
}
return params