-
Notifications
You must be signed in to change notification settings - Fork 232
/
azure.ai.ml.entities.SparkJob.yml
242 lines (234 loc) · 11.1 KB
/
azure.ai.ml.entities.SparkJob.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
### YamlMime:PythonClass
uid: azure.ai.ml.entities.SparkJob
name: SparkJob
fullName: azure.ai.ml.entities.SparkJob
module: azure.ai.ml.entities
inheritances:
- azure.ai.ml.entities._job.job.Job
- azure.ai.ml.entities._job.parameterized_spark.ParameterizedSpark
- azure.ai.ml.entities._job.job_io_mixin.JobIOMixin
- azure.ai.ml.entities._job.spark_job_entry_mixin.SparkJobEntryMixin
summary: A standalone Spark job.
constructor:
syntax: 'SparkJob(*, driver_cores: int | str | None = None, driver_memory: str |
None = None, executor_cores: int | str | None = None, executor_memory: str | None
= None, executor_instances: int | str | None = None, dynamic_allocation_enabled:
bool | str | None = None, dynamic_allocation_min_executors: int | str | None =
None, dynamic_allocation_max_executors: int | str | None = None, inputs: Dict[str,
Input | str | bool | int | float] | None = None, outputs: Dict[str, Output] |
None = None, compute: str | None = None, identity: Dict[str, str] | ManagedIdentityConfiguration
| AmlTokenConfiguration | UserIdentityConfiguration | None = None, resources:
Dict | SparkResourceConfiguration | None = None, **kwargs: Any)'
keywordOnlyParameters:
- name: driver_cores
description: The number of cores to use for the driver process, only in cluster
mode.
types:
- <xref:typing.Optional>[<xref:int>]
- name: driver_memory
description: 'The amount of memory to use for the driver process, formatted as
strings with a size unit
suffix ("k", "m", "g" or "t") (e.g. "512m", "2g").'
types:
- <xref:typing.Optional>[<xref:str>]
- name: executor_cores
description: The number of cores to use on each executor.
types:
- <xref:typing.Optional>[<xref:int>]
- name: executor_memory
description: 'The amount of memory to use per executor process, formatted as strings
with a size unit
suffix ("k", "m", "g" or "t") (e.g. "512m", "2g").'
types:
- <xref:typing.Optional>[<xref:str>]
- name: executor_instances
description: The initial number of executors.
types:
- <xref:typing.Optional>[<xref:int>]
- name: dynamic_allocation_enabled
description: 'Whether to use dynamic resource allocation, which scales the number
of
executors registered with this application up and down based on the workload.'
types:
- <xref:typing.Optional>[<xref:bool>]
- name: dynamic_allocation_min_executors
description: 'The lower bound for the number of executors if dynamic allocation
is
enabled.'
types:
- <xref:typing.Optional>[<xref:int>]
- name: dynamic_allocation_max_executors
description: 'The upper bound for the number of executors if dynamic allocation
is
enabled.'
types:
- <xref:typing.Optional>[<xref:int>]
- name: inputs
description: The mapping of input data bindings used in the job.
types:
- <xref:typing.Optional>[<xref:dict>[<xref:str>, <xref:azure.ai.ml.Input>]]
- name: outputs
description: The mapping of output data bindings used in the job.
types:
- <xref:typing.Optional>[<xref:dict>[<xref:str>, <xref:azure.ai.ml.Output>]]
- name: compute
description: The compute resource the job runs on.
types:
- <xref:typing.Optional>[<xref:str>]
- name: identity
description: The identity that the Spark job will use while running on compute.
types:
- <xref:typing.Optional>[<xref:typing.Union>[<xref:dict>[<xref:str>, <xref:str>],
<xref:azure.ai.ml.ManagedIdentityConfiguration>, <xref:azure.ai.ml.AmlTokenConfiguration>,
<xref:azure.ai.ml.UserIdentityConfiguration>]]
- name: resources
isRequired: true
examples:
- "Configuring a SparkJob.<!--[!code-python[Main](les\\ml_samples_spark_configurations.py\
\ )]-->\n\n<!-- literal_block {\"ids\": [], \"classes\": [], \"names\": [], \"dupnames\"\
: [], \"backrefs\": [], \"source\": \"C:\\\\hostedtoolcache\\\\windows\\\\Python\\\
\\3.11.9\\\\x64\\\\Lib\\\\site-packages\\\\py2docfx\\\\dist_temp\\\\8\\\\azure-ai-ml-1.16.1\\\
\\samples\\\\ml_samples_spark_configurations.py\", \"xml:space\": \"preserve\",\
\ \"force\": false, \"language\": \"python\", \"highlight_args\": {\"linenostart\"\
: 1}, \"linenos\": false} -->\n\n````python\n\n from azure.ai.ml import Input,\
\ Output\n from azure.ai.ml.entities import SparkJob\n\n spark_job = SparkJob(\n\
\ code=\"./sdk/ml/azure-ai-ml/tests/test_configs/dsl_pipeline/spark_job_in_pipeline/basic_src\"\
,\n entry={\"file\": \"sampleword.py\"},\n conf={\n \"spark.driver.cores\"\
: 2,\n \"spark.driver.memory\": \"1g\",\n \"spark.executor.cores\"\
: 1,\n \"spark.executor.memory\": \"1g\",\n \"spark.executor.instances\"\
: 1,\n },\n environment=\"AzureML-sklearn-1.0-ubuntu20.04-py38-cpu:33\"\
,\n inputs={\n \"input1\": Input(\n type=\"uri_file\"\
, path=\"azureml://datastores/workspaceblobstore/paths/python/data.csv\", mode=\"\
direct\"\n )\n },\n compute=\"synapsecompute\",\n outputs={\"\
component_out_path\": Output(type=\"uri_folder\")},\n args=\"--input1 ${{inputs.input1}}\
\ --output2 ${{outputs.output1}} --my_sample_rate ${{inputs.sample_rate}}\",\n \
\ )\n\n\n ````\n"
methods:
- uid: azure.ai.ml.entities.SparkJob.dump
name: dump
summary: Dumps the job content into a file in YAML format.
signature: 'dump(dest: str | PathLike | IO, **kwargs: Any) -> None'
parameters:
- name: dest
description: 'The local path or file stream to write the YAML content to.
If dest is a file path, a new file will be created.
If dest is an open file, the file will be written to directly.'
isRequired: true
types:
- <xref:typing.Union>[<xref:PathLike>, <xref:str>, <xref:typing.IO>[<xref:typing.AnyStr>]]
exceptions:
- type: FileExistsError
description: Raised if dest is a file path and the file already exists.
- type: IOError
description: Raised if dest is an open file and the file is not writable.
- uid: azure.ai.ml.entities.SparkJob.filter_conf_fields
name: filter_conf_fields
summary: 'Filters out the fields of the conf attribute that are not among the Spark
configuration fields
listed in ~azure.ai.ml._schema.job.parameterized_spark.CONF_KEY_MAP and returns
them in their own dictionary.'
signature: filter_conf_fields() -> Dict[str, str]
return:
description: A dictionary of the conf fields that are not Spark configuration
fields.
types:
- <xref:dict>[<xref:str>, <xref:str>]
attributes:
- uid: azure.ai.ml.entities.SparkJob.base_path
name: base_path
summary: The base path of the resource.
return:
description: The base path of the resource.
types:
- <xref:str>
- uid: azure.ai.ml.entities.SparkJob.creation_context
name: creation_context
summary: The creation context of the resource.
return:
description: The creation metadata for the resource.
types:
- <xref:typing.Optional>[<xref:azure.ai.ml.entities.SystemData>]
- uid: azure.ai.ml.entities.SparkJob.entry
name: entry
- uid: azure.ai.ml.entities.SparkJob.environment
name: environment
summary: The Azure ML environment to run the Spark component or job in.
return:
description: The Azure ML environment to run the Spark component or job in.
types:
- <xref:typing.Optional>[<xref:typing.Union>[<xref:str>, <xref:azure.ai.ml.entities.Environment>]]
- uid: azure.ai.ml.entities.SparkJob.id
name: id
summary: The resource ID.
return:
description: The global ID of the resource, an Azure Resource Manager (ARM) ID.
types:
- <xref:typing.Optional>[<xref:str>]
- uid: azure.ai.ml.entities.SparkJob.identity
name: identity
summary: The identity that the Spark job will use while running on compute.
return:
description: The identity that the Spark job will use while running on compute.
types:
- <xref:typing.Optional>[<xref:typing.Union>[<xref:azure.ai.ml.ManagedIdentityConfiguration>,
<xref:azure.ai.ml.AmlTokenConfiguration>, <xref:azure.ai.ml.UserIdentityConfiguration>]]
- uid: azure.ai.ml.entities.SparkJob.inputs
name: inputs
- uid: azure.ai.ml.entities.SparkJob.log_files
name: log_files
summary: Job output files.
return:
description: The dictionary of log names and URLs.
types:
- <xref:typing.Optional>[<xref:typing.Dict>[<xref:str>, <xref:str>]]
- uid: azure.ai.ml.entities.SparkJob.outputs
name: outputs
- uid: azure.ai.ml.entities.SparkJob.resources
name: resources
summary: The compute resource configuration for the job.
return:
description: The compute resource configuration for the job.
types:
- <xref:typing.Optional>[<xref:azure.ai.ml.entities.SparkResourceConfiguration>]
- uid: azure.ai.ml.entities.SparkJob.status
name: status
summary: "The status of the job.\n\nCommon values returned include \"Running\",\
\ \"Completed\", and \"Failed\". All possible values are:\n\n * NotStarted -\
\ This is a temporary state that client-side Run objects are in before cloud submission.\
\ \n\n * Starting - The Run has started being processed in the cloud. The caller\
\ has a run ID at this point. \n\n * Provisioning - On-demand compute is being\
\ created for a given job submission. \n\n * Preparing - The run environment\
\ is being prepared and is in one of two stages:\n\n * Docker image build\
\ \n\n * conda environment setup \n\n * Queued - The job is queued on\
\ the compute target. For example, in BatchAI, the job is in a queued state\n\n\
\ while waiting for all the requested nodes to be ready.\n\n * Running\
\ - The job has started to run on the compute target. \n\n * Finalizing - User\
\ code execution has completed, and the run is in post-processing stages. \n\n\
\ * CancelRequested - Cancellation has been requested for the job. \n\n *\
\ Completed - The run has completed successfully. This includes both the user\
\ code execution and run\n\n post-processing stages.\n\n * Failed - The\
\ run failed. Usually the Error property on a run will provide details as to why.\
\ \n\n * Canceled - Follows a cancellation request and indicates that the run\
\ is now successfully cancelled. \n\n * NotResponding - For runs that have Heartbeats\
\ enabled, no heartbeat has been recently sent."
return:
description: Status of the job.
types:
- <xref:typing.Optional>[<xref:str>]
- uid: azure.ai.ml.entities.SparkJob.studio_url
name: studio_url
summary: Azure ML studio endpoint.
return:
description: The URL to the job details page.
types:
- <xref:typing.Optional>[<xref:str>]
- uid: azure.ai.ml.entities.SparkJob.type
name: type
summary: The type of the job.
return:
description: The type of the job.
types:
- <xref:typing.Optional>[<xref:str>]
- uid: azure.ai.ml.entities.SparkJob.CODE_ID_RE_PATTERN
name: CODE_ID_RE_PATTERN
signature: CODE_ID_RE_PATTERN = re.compile('\\/subscriptions\\/(?P<subscription>[\\w,-]+)\\/resourceGroups\\/(?P<resource_group>[\\w,-]+)\\/providers\\/Microsoft\\.MachineLearningServices\\/workspaces\\/(?P<workspace>[\\w,-]+)\\/codes\\/(?P<co)