Skip to content

Commit

Permalink
feat(proto): Define CommonReplicaSpec in common.proto
Browse files Browse the repository at this point in the history
Resolves: flyteorg#4408
Signed-off-by: Chi-Sheng Liu <chishengliu@chishengliu.com>
  • Loading branch information
MortalHappiness committed May 11, 2024
1 parent ee6037b commit fde3bc5
Show file tree
Hide file tree
Showing 5 changed files with 44 additions and 46 deletions.
27 changes: 27 additions & 0 deletions flyteidl/protos/flyteidl/plugins/common.proto
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
syntax = "proto3";

package flyteidl.plugins;

option go_package = "github.com/flyteorg/flyte/flyteidl/gen/pb-go/flyteidl/plugins";

import "flyteidl/core/tasks.proto";

enum RestartPolicy {
RESTART_POLICY_NEVER = 0;
RESTART_POLICY_ON_FAILURE = 1;
RESTART_POLICY_ALWAYS = 2;
}

message CommonReplicaSpec {
// Number of replicas
int32 replicas = 1;

// Image used for the replica group
string image = 2;

// Resources required for the replica group
core.Resources resources = 3;

// RestartPolicy determines whether pods will be restarted when they exit
RestartPolicy restart_policy = 4;
}
11 changes: 3 additions & 8 deletions flyteidl/protos/flyteidl/plugins/kubeflow/common.proto
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,9 @@ syntax = "proto3";

package flyteidl.plugins.kubeflow;

option go_package = "github.com/flyteorg/flyte/flyteidl/gen/pb-go/flyteidl/plugins";
option go_package = "github.com/flyteorg/flyte/flyteidl/gen/pb-go/flyteidl/plugins/kubeflow";


enum RestartPolicy {
RESTART_POLICY_NEVER = 0;
RESTART_POLICY_ON_FAILURE = 1;
RESTART_POLICY_ALWAYS = 2;
}
import public "flyteidl/plugins/common.proto";

enum CleanPodPolicy {
CLEANPOD_POLICY_NONE = 0;
Expand All @@ -30,4 +25,4 @@ message RunPolicy {

// Number of retries before marking this job failed.
int32 backoff_limit = 4;
}
}
20 changes: 6 additions & 14 deletions flyteidl/protos/flyteidl/plugins/kubeflow/mpi.proto
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,8 @@ syntax = "proto3";

package flyteidl.plugins.kubeflow;

option go_package = "github.com/flyteorg/flyte/flyteidl/gen/pb-go/flyteidl/plugins";
option go_package = "github.com/flyteorg/flyte/flyteidl/gen/pb-go/flyteidl/plugins/kubeflow";

import "flyteidl/core/tasks.proto";
import "flyteidl/plugins/kubeflow/common.proto";

// Proto for plugin that enables distributed training using https://github.com/kubeflow/mpi-operator
Expand All @@ -26,18 +25,11 @@ message DistributedMPITrainingTask {

// Replica specification for distributed MPI training
message DistributedMPITrainingReplicaSpec {
// Number of replicas
int32 replicas = 1;

// Image used for the replica group
string image = 2;

// Resources required for the replica group
core.Resources resources = 3;

// Restart policy determines whether pods will be restarted when they exit
RestartPolicy restart_policy = 4;
reserved 1, 2, 3, 4;

// MPI sometimes requires different command set for different replica groups
repeated string command = 5;
}

// The common replica spec
CommonReplicaSpec common = 6;
}
16 changes: 4 additions & 12 deletions flyteidl/protos/flyteidl/plugins/kubeflow/pytorch.proto
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,8 @@ syntax = "proto3";

package flyteidl.plugins.kubeflow;

option go_package = "github.com/flyteorg/flyte/flyteidl/gen/pb-go/flyteidl/plugins";
option go_package = "github.com/flyteorg/flyte/flyteidl/gen/pb-go/flyteidl/plugins/kubeflow";

import "flyteidl/core/tasks.proto";
import "flyteidl/plugins/kubeflow/common.proto";

// Custom proto for torch elastic config for distributed training using
Expand Down Expand Up @@ -35,15 +34,8 @@ message DistributedPyTorchTrainingTask {
}

message DistributedPyTorchTrainingReplicaSpec {
// Number of replicas
int32 replicas = 1;
reserved 1, 2, 3, 4;

// Image used for the replica group
string image = 2;

// Resources required for the replica group
core.Resources resources = 3;

// RestartPolicy determines whether pods will be restarted when they exit
RestartPolicy restart_policy = 4;
// The common replica spec
CommonReplicaSpec common = 5;
}
16 changes: 4 additions & 12 deletions flyteidl/protos/flyteidl/plugins/kubeflow/tensorflow.proto
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,8 @@ syntax = "proto3";

package flyteidl.plugins.kubeflow;

option go_package = "github.com/flyteorg/flyte/flyteidl/gen/pb-go/flyteidl/plugins";
option go_package = "github.com/flyteorg/flyte/flyteidl/gen/pb-go/flyteidl/plugins/kubeflow";

import "flyteidl/core/tasks.proto";
import "flyteidl/plugins/kubeflow/common.proto";

// Proto for plugin that enables distributed training using https://github.com/kubeflow/tf-operator
Expand All @@ -28,15 +27,8 @@ message DistributedTensorflowTrainingTask {
}

message DistributedTensorflowTrainingReplicaSpec {
// Number of replicas
int32 replicas = 1;
reserved 1, 2, 3, 4;

// Image used for the replica group
string image = 2;

// Resources required for the replica group
core.Resources resources = 3;

// RestartPolicy Determines whether pods will be restarted when they exit
RestartPolicy restart_policy = 4;
// The common replica spec
CommonReplicaSpec common = 5;
}

0 comments on commit fde3bc5

Please sign in to comment.