Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix status update for failed deployments #474

Merged
merged 2 commits into from
Mar 29, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions api-frontend/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ clean:
@rm -fv apife.json
@rm -fv apife.yaml
@rm -fv apife_istio.yaml
@rm -fr src/main/proto/*

cache_dependencies:
mvn -Dmaven.repo.local=./.m2 dependency:resolve
Expand Down
5 changes: 1 addition & 4 deletions api-frontend/Makefile.ci
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,7 @@ push_image_private_repo:

clean:
mvn clean -B
rm -fv src/main/proto/seldon_deployment.proto
rm -fv src/main/proto/prediction.proto
rm -fvr src/main/proto/k8s.io
rm -fv src/main/proto/v1.proto
rm -fr src/main/proto/*

download_protos_k8s:
cd ../proto/k8s ; make create_protos
Expand Down
1 change: 1 addition & 0 deletions cluster-manager/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ push_to_registry:
clean:
@mvn clean
@rm -fv cluster-manager.json
@rm -fr src/main/proto/*

cache_dependencies:
mvn -Dmaven.repo.local=./.m2 dependency:resolve
Expand Down
4 changes: 1 addition & 3 deletions cluster-manager/Makefile.ci
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,7 @@ push_image_private_repo:

clean:
mvn clean -B
rm -fv src/main/proto/seldon_deployment.proto
rm -fvr src/main/proto/k8s.io
rm -fv src/main/proto/v1.proto
rm -fr src/main/proto/*
rm -rfv java_client

download_protos:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
package io.seldon.clustermanager.k8s;

import java.io.IOException;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;

import io.kubernetes.client.ApiClient;
import io.kubernetes.client.ApiException;
import io.kubernetes.client.ProtoClient;
import io.kubernetes.client.ProtoClient.ObjectOrStatus;
import io.kubernetes.client.apis.CoreV1Api;
import io.kubernetes.client.models.ExtensionsV1beta1Deployment;
import io.kubernetes.client.models.ExtensionsV1beta1DeploymentList;
import io.kubernetes.client.models.V1DeleteOptions;
import io.kubernetes.client.models.V1Service;
import io.kubernetes.client.models.V1ServiceList;
import io.kubernetes.client.models.V1Status;
import io.kubernetes.client.proto.Meta.DeleteOptions;
import io.kubernetes.client.proto.V1.Service;
import io.kubernetes.client.proto.V1beta1Extensions.Deployment;
import io.seldon.clustermanager.pb.ProtoBufUtils;
import io.seldon.protos.DeploymentProtos.SeldonDeployment;

@Component
public class SeldonDeletionHandler {

private final static Logger logger = LoggerFactory.getLogger(SeldonDeletionHandler.class);

private final SeldonNameCreator seldonNameCreator = new SeldonNameCreator();
private final KubeCRDHandler crdHandler;


@Autowired
public SeldonDeletionHandler(KubeCRDHandler crdHandler) {
super();
this.crdHandler = crdHandler;
}

private Set<String> getServiceNames(List<Service> services)
{
Set<String> names = new HashSet<>();
for(Service s : services)
names.add(s.getMetadata().getName());
return names;
}

private Set<String> getDeploymentNames(List<Deployment> deployments)
{
Set<String> names = new HashSet<>();
for(Deployment d : deployments)
names.add(d.getMetadata().getName());
return names;
}

/**
* Delete deployments that are not in list. Allows 2 stage delete by only deleting service orchestrator or all. Gets owned
* deployments and then removes ones not in the list provided.
* @param client ProtoClient
* @param namespace Namespace to use
* @param seldonDeployment The Seldon Deployment we are refering to
* @param deployments The list of deployments from the Seldon Deployment
* @param svcOrchOnly Whether to only delete the service orchestrator
* @return Number of Deployments deleted
* @throws ApiException
* @throws IOException
* @throws SeldonDeploymentException
*/
public int removeDeployments(ProtoClient client,String namespace,SeldonDeployment seldonDeployment,List<Deployment> deployments,boolean svcOrchOnly) throws ApiException, IOException, SeldonDeploymentException
{
int deleteCount = 0;
Set<String> names = getDeploymentNames(deployments);
ExtensionsV1beta1DeploymentList depList = crdHandler.getOwnedDeployments(seldonNameCreator.getSeldonId(seldonDeployment),namespace);
for (ExtensionsV1beta1Deployment d : depList.getItems())
{
boolean okToDelete = !svcOrchOnly || (d.getMetadata().getLabels().containsKey(Constants.LABEL_SELDON_SVCORCH));
if (okToDelete && !names.contains(d.getMetadata().getName()))
{
deleteCount++;
final String deleteApiPath = "/apis/"+SeldonDeploymentControllerImpl.DEPLOYMENT_API_VERSION+"/namespaces/{namespace}/deployments/{name}"
.replaceAll("\\{" + "name" + "\\}", client.getApiClient().escapeString(d.getMetadata().getName()))
.replaceAll("\\{" + "namespace" + "\\}", client.getApiClient().escapeString(namespace));
DeleteOptions options = DeleteOptions.newBuilder().setPropagationPolicy("Foreground").build();
ObjectOrStatus<Deployment> os = client.delete(Deployment.newBuilder(),deleteApiPath,options);
if (os.status != null) {
logger.error("Error deleting deployment:"+ProtoBufUtils.toJson(os.status));
//throw new SeldonDeploymentException("Failed to delete deployment "+d.getMetadata().getName());
}
else {
logger.debug("Deleted deployment:"+ProtoBufUtils.toJson(os.object));
}
}
else
logger.info("Skipping deletion of {} svcOrchOnly:{}",d.getMetadata().getName(),svcOrchOnly);
}
return deleteCount;
}

public void removeServices(ApiClient client,String namespace,SeldonDeployment seldonDeployment,List<Service> services) throws ApiException, IOException, SeldonDeploymentException
{
Set<String> names = getServiceNames(services);
V1ServiceList svcList = crdHandler.getOwnedServices(seldonNameCreator.getSeldonId(seldonDeployment),namespace);
for(V1Service s : svcList.getItems())
{
if (!names.contains(s.getMetadata().getName()))
{
CoreV1Api api = new CoreV1Api(client);
io.kubernetes.client.models.V1DeleteOptions options = new V1DeleteOptions();
V1Status status = api.deleteNamespacedService(s.getMetadata().getName(), namespace, options, null, null, null, null);
if (!"Success".equals(status.getStatus()))
{
logger.error("Failed to delete service "+s.getMetadata().getName());
throw new SeldonDeploymentException("Failed to delete service "+s.getMetadata().getName());
}
else
logger.debug("Deleted service "+s.getMetadata().getName());

}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -56,21 +56,23 @@ public class SeldonDeploymentControllerImpl implements SeldonDeploymentControlle
private final KubeCRDHandler crdHandler;
private final SeldonDeploymentCache mlCache;
private final SeldonNameCreator seldonNameCreator = new SeldonNameCreator();
private final SeldonDeletionHandler deletionHandler;

private static final String DEPLOYMENT_API_VERSION = "extensions/v1beta1";
static final String DEPLOYMENT_API_VERSION = "extensions/v1beta1";

Cache<String, Boolean> deletedCache = CacheBuilder.newBuilder()
.maximumSize(1000)
.build();


@Autowired
public SeldonDeploymentControllerImpl(SeldonDeploymentOperator operator, K8sClientProvider clientProvider,KubeCRDHandler crdHandler,SeldonDeploymentCache mlCache) {
public SeldonDeploymentControllerImpl(SeldonDeploymentOperator operator, K8sClientProvider clientProvider,KubeCRDHandler crdHandler,SeldonDeploymentCache mlCache,SeldonDeletionHandler deletetionHandler) {
super();
this.operator = operator;
this.clientProvider = clientProvider;
this.crdHandler = crdHandler;
this.mlCache = mlCache;
this.deletionHandler = deletetionHandler;
}

private void createDeployments(ProtoClient client,String namespace,List<Deployment> deployments) throws ApiException, IOException, SeldonDeploymentException
Expand Down Expand Up @@ -116,74 +118,7 @@ private void createDeployments(ProtoClient client,String namespace,List<Deployme
}
}

private Set<String> getDeploymentNames(List<Deployment> deployments)
{
Set<String> names = new HashSet<>();
for(Deployment d : deployments)
names.add(d.getMetadata().getName());
return names;
}

private int removeDeployments(ProtoClient client,String namespace,SeldonDeployment seldonDeployment,List<Deployment> deployments,boolean svcOrchOnly) throws ApiException, IOException, SeldonDeploymentException
{
int deleteCount = 0;
Set<String> names = getDeploymentNames(deployments);
ExtensionsV1beta1DeploymentList depList = crdHandler.getOwnedDeployments(seldonNameCreator.getSeldonId(seldonDeployment),namespace);
for (ExtensionsV1beta1Deployment d : depList.getItems())
{
boolean okToDelete = !svcOrchOnly || (d.getMetadata().getLabels().containsKey(Constants.LABEL_SELDON_SVCORCH));
if (okToDelete && !names.contains(d.getMetadata().getName()))
{
deleteCount++;
final String deleteApiPath = "/apis/"+DEPLOYMENT_API_VERSION+"/namespaces/{namespace}/deployments/{name}"
.replaceAll("\\{" + "name" + "\\}", client.getApiClient().escapeString(d.getMetadata().getName()))
.replaceAll("\\{" + "namespace" + "\\}", client.getApiClient().escapeString(namespace));
DeleteOptions options = DeleteOptions.newBuilder().setPropagationPolicy("Foreground").build();
ObjectOrStatus<Deployment> os = client.delete(Deployment.newBuilder(),deleteApiPath,options);
if (os.status != null) {
logger.error("Error deleting deployment:"+ProtoBufUtils.toJson(os.status));
//throw new SeldonDeploymentException("Failed to delete deployment "+d.getMetadata().getName());
}
else {
logger.debug("Deleted deployment:"+ProtoBufUtils.toJson(os.object));
}
}
else
logger.info("Skipping deletion of {} svcOrchOnly:{}",d.getMetadata().getName(),svcOrchOnly);
}
return deleteCount;
}

private void removeServices(ApiClient client,String namespace,SeldonDeployment seldonDeployment,List<Service> services) throws ApiException, IOException, SeldonDeploymentException
{
Set<String> names = getServiceNames(services);
V1ServiceList svcList = crdHandler.getOwnedServices(seldonNameCreator.getSeldonId(seldonDeployment),namespace);
for(V1Service s : svcList.getItems())
{
if (!names.contains(s.getMetadata().getName()))
{
CoreV1Api api = new CoreV1Api(client);
io.kubernetes.client.models.V1DeleteOptions options = new V1DeleteOptions();
V1Status status = api.deleteNamespacedService(s.getMetadata().getName(), namespace, options, null, null, null, null);
if (!"Success".equals(status.getStatus()))
{
logger.error("Failed to delete service "+s.getMetadata().getName());
throw new SeldonDeploymentException("Failed to delete service "+s.getMetadata().getName());
}
else
logger.debug("Deleted service "+s.getMetadata().getName());

}
}
}

private Set<String> getServiceNames(List<Service> services)
{
Set<String> names = new HashSet<>();
for(Service s : services)
names.add(s.getMetadata().getName());
return names;
}

private void createServices(ProtoClient client,String namespace,List<Service> services) throws ApiException, IOException, SeldonDeploymentException
{
Expand Down Expand Up @@ -241,11 +176,11 @@ public void removeInitialUnusedResources(SeldonDeployment mlDep) {
logger.info("Deployment delete cache key {}",deploymentDeleteKey);
if (deletedCache.getIfPresent(deploymentDeleteKey) == null)
{
int deleteCount = removeDeployments(client, namespace, mlDep2, resources.deployments,true);
int deleteCount = deletionHandler.removeDeployments(client, namespace, mlDep2, resources.deployments,true);
if (deleteCount == 0)
{
logger.info("Failed to delete anything from first stage delete so will delete all unsed deployments for {}",mlDep.getSpec().getName());
removeDeployments(client, namespace, mlDep2, resources.deployments,false);
deletionHandler.removeDeployments(client, namespace, mlDep2, resources.deployments,false);
}
deletedCache.put(deploymentDeleteKey, true);
}
Expand All @@ -269,9 +204,9 @@ public void removeAllUnusedResources(SeldonDeployment mlDep) {
DeploymentResources resources = operator.createResources(mlDep2);
ProtoClient client = clientProvider.getProtoClient();
String namespace = SeldonDeploymentUtils.getNamespace(mlDep2);
removeDeployments(client, namespace, mlDep2, resources.deployments,false);
deletionHandler.removeDeployments(client, namespace, mlDep2, resources.deployments,false);
ApiClient client2 = clientProvider.getClient();
removeServices(client2,namespace, mlDep2, resources.services);
deletionHandler.removeServices(client2,namespace, mlDep2, resources.services);
} catch (SeldonDeploymentException e) {
logger.error("Failed to cleanup deployment ",e);
} catch (ApiException e) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,9 +62,13 @@ private boolean isAvailable(SeldonDeployment.Builder mlBuilder,SeldonDeployment
return false;
for (PredictorStatus.Builder b : mlBuilder.getStatusBuilder().getPredictorStatusBuilderList())
{
if (b.getReplicas() != b.getReplicasAvailable())
return false;
names.remove(b.getName());
if (names.contains(b.getName()))
{
if (b.getReplicas() != b.getReplicasAvailable())
return false;
else
names.remove(b.getName());
}
}
if (names.isEmpty())
return true;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ public void createMocks(String resourceFilename) throws Exception
crdHandler = new KubeCRDHandlerImpl(mockK8sApiProvider,mockK8sClientProvider,props);
mlCache = new SeldonDeploymentCacheImpl(props, crdHandler);
operator = new SeldonDeploymentOperatorImpl(props);
controller = new SeldonDeploymentControllerImpl(operator, mockK8sClientProvider, crdHandler, mlCache);
controller = new SeldonDeploymentControllerImpl(operator, mockK8sClientProvider, crdHandler, mlCache, new SeldonDeletionHandler(crdHandler));

}
}
Loading