Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 10 additions & 4 deletions proto/snapchat/research/gbml/gigl_resource_config.proto
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ message VertexAiTrainerConfig {
// Num workers for training job
uint32 num_replicas = 4;
}

// (deprecated)
// Configuration for KFP training resources
message KFPTrainerConfig {
Expand Down Expand Up @@ -86,11 +86,17 @@ message VertexAiResourceConfig {
uint32 gpu_limit = 3;
// Num workers for job
uint32 num_replicas = 4;
// Timeout in seconds for the job. If unset or zero, will use the default @ google.cloud.aiplatform.CustomJob, which is 7 days:
// Timeout in seconds for the job. If unset or zero, will use the default @ google.cloud.aiplatform.CustomJob, which is 7 days:
// https://github.com/googleapis/python-aiplatform/blob/58fbabdeeefd1ccf1a9d0c22eeb5606aeb9c2266/google/cloud/aiplatform/jobs.py#L2252-L2253
uint32 timeout = 5;
// Region override
// If provided, then the Vertex AI Job will be launched in the provided region.
// Otherwise, will launch jobs in the region specified at CommonComputeConfig.region
// ex: "us-west1"
// NOTE: If set, then there may be data egress costs from CommonComputeConfig.region -> gcp_region_override
string gcp_region_override = 6;
}

// Configuration for KFP job resources
message KFPResourceConfig {
// Num CPU requested for job (str) which can be a number or a number followed by "m", which means 1/1000
Expand Down Expand Up @@ -187,7 +193,7 @@ message GiglResourceConfig {
// (deprecated)
// Configuration for inferencer
DataflowResourceConfig inferencer_config = 16 [deprecated=true];

// Configuration for distributed trainer
TrainerResourceConfig trainer_resource_config = 17;
// Configuration for distributed inferencer
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,42 @@ def region(self) -> str:
"""
return self.shared_resource_config.common_compute_config.region

@property
def vertex_ai_trainer_region(self) -> str:
"""
Returns the region specified in the resource config for Vertex AI trainer. (e.g. us-central1)
By default this will be set to shared_resource_config.common_compute_config.region
But if trainer_resource_config.vertex_ai_trainer_config.vertex_ai_trainer_config.gcp_region_override is set, it will return that instead.
Raises an error if the trainer is *not* a Vertex AI trainer.
"""
trainer_config = self.trainer_config
if not isinstance(trainer_config, VertexAiResourceConfig):
raise ValueError(
"Vertex AI trainer region is only supported for Vertex AI trainers."
)
if trainer_config.gcp_region_override:
return trainer_config.gcp_region_override
else:
return self.region

@property
def vertex_ai_inferencer_region(self) -> str:
"""
Returns the region specified in the resource config for Vertex AI inferencer. (e.g. us-central1)
By default this will be set to shared_resource_config.common_compute_config.region
But if inferencer_resource_config.vertex_ai_inferencer_config.vertex_ai_inferencer_config.gcp_region_override is set, it will return that instead.
Raises an error if the inferencer is *not* a Vertex AI inferencer.
"""
inferencer_config = self.inferencer_config
if not isinstance(inferencer_config, VertexAiResourceConfig):
raise ValueError(
"Vertex AI inferencer region is only supported for Vertex AI inferencers."
)
if inferencer_config.gcp_region_override:
return inferencer_config.gcp_region_override
else:
return self.region

@property
def trainer_config(
self,
Expand Down
3 changes: 2 additions & 1 deletion python/gigl/src/inference/v2/glt_inferencer.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,9 +122,10 @@ def __execute_VAI_inference(
if inferencer_resource_config.timeout
else None,
)

vertex_ai_service = VertexAIService(
project=resource_config_wrapper.project,
location=resource_config_wrapper.region,
location=resource_config_wrapper.vertex_ai_inferencer_region,
service_account=resource_config_wrapper.service_account_email,
staging_bucket=resource_config_wrapper.temp_assets_regional_bucket_path.uri,
)
Expand Down
7 changes: 4 additions & 3 deletions python/gigl/src/training/v2/glt_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,9 +120,10 @@ def __execute_VAI_training(
if trainer_resource_config.timeout
else None,
)

vertex_ai_service = VertexAIService(
project=resource_config.project,
location=resource_config.region,
location=resource_config.vertex_ai_trainer_region,
service_account=resource_config.service_account_email,
staging_bucket=resource_config.temp_assets_regional_bucket_path.uri,
)
Expand Down Expand Up @@ -205,8 +206,8 @@ def run(

initialize_metrics(task_config_uri=task_config_uri, service_name=args.job_name)

glt_inferencer = GLTTrainer()
glt_inferencer.run(
glt_trainer = GLTTrainer()
glt_trainer.run(
applied_task_identifier=applied_task_identifier,
task_config_uri=task_config_uri,
resource_config_uri=resource_config_uri,
Expand Down
2 changes: 1 addition & 1 deletion python/snapchat/research/gbml/gbml_config_pb2.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -445,7 +445,7 @@ class GbmlConfig(google.protobuf.message.Message):
GRAPH_DB_ARGS_FIELD_NUMBER: builtins.int
GRAPH_DB_SAMPLER_CONFIG_FIELD_NUMBER: builtins.int
graph_db_ingestion_cls_path: builtins.str
"""Python class path pointing to user-written
"""Python class path pointing to user-written
`BaseIngestion`` class definition. e.g. `my.team.graph_db.BaseInjectionImpl`.
This class is currently, as an implementation detail, used for injestion only.
We document this *purely* for information purposes and may change the implementation at any time.
Expand Down
46 changes: 23 additions & 23 deletions python/snapchat/research/gbml/gigl_resource_config_pb2.py

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading