-
Notifications
You must be signed in to change notification settings - Fork 232
/
azure.ai.ml.entities.SparkComponent.yml
246 lines (234 loc) · 10.1 KB
/
azure.ai.ml.entities.SparkComponent.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
### YamlMime:PythonClass
uid: azure.ai.ml.entities.SparkComponent
name: SparkComponent
fullName: azure.ai.ml.entities.SparkComponent
module: azure.ai.ml.entities
inheritances:
- azure.ai.ml.entities._component.component.Component
- azure.ai.ml.entities._job.parameterized_spark.ParameterizedSpark
- azure.ai.ml.entities._job.spark_job_entry_mixin.SparkJobEntryMixin
- azure.ai.ml.entities._component.code.ComponentCodeMixin
summary: Spark component version, used to define a Spark Component or Job.
constructor:
syntax: 'SparkComponent(*, code: PathLike | str | None = ''.'', entry: Dict[str,
str] | SparkJobEntry | None = None, py_files: List[str] | None = None, jars: List[str]
| None = None, files: List[str] | None = None, archives: List[str] | None = None,
driver_cores: int | str | None = None, driver_memory: str | None = None, executor_cores:
int | str | None = None, executor_memory: str | None = None, executor_instances:
int | str | None = None, dynamic_allocation_enabled: bool | str | None = None,
dynamic_allocation_min_executors: int | str | None = None, dynamic_allocation_max_executors:
int | str | None = None, conf: Dict[str, str] | None = None, environment: Environment
| str | None = None, inputs: Dict | None = None, outputs: Dict | None = None,
args: str | None = None, **kwargs: Any)'
keywordOnlyParameters:
- name: code
description: 'The source code to run the job. Can be a local path or "http:",
"https:", or "azureml:" url pointing
to a remote location. Defaults to ".", indicating the current directory.'
defaultValue: .
- name: entry
description: The file or class entry point.
types:
- <xref:typing.Optional>[<xref:typing.Union>[<xref:dict>[<xref:str>, <xref:str>],
<xref:azure.ai.ml.entities.SparkJobEntry>]]
- name: py_files
description: The list of .zip, .egg or .py files to place on the PYTHONPATH for
Python apps. Defaults to None.
types:
- <xref:typing.Optional>[<xref:typing.List>[<xref:str>]]
- name: jars
description: The list of .JAR files to include on the driver and executor classpaths.
Defaults to None.
types:
- <xref:typing.Optional>[<xref:typing.List>[<xref:str>]]
- name: files
description: The list of files to be placed in the working directory of each executor.
Defaults to None.
types:
- <xref:typing.Optional>[<xref:typing.List>[<xref:str>]]
- name: archives
description: 'The list of archives to be extracted into the working directory
of each executor.
Defaults to None.'
types:
- <xref:typing.Optional>[<xref:typing.List>[<xref:str>]]
- name: driver_cores
description: The number of cores to use for the driver process, only in cluster
mode.
types:
- <xref:typing.Optional>[<xref:int>]
- name: driver_memory
description: 'The amount of memory to use for the driver process, formatted as
strings with a size unit
suffix ("k", "m", "g" or "t") (e.g. "512m", "2g").'
types:
- <xref:typing.Optional>[<xref:str>]
- name: executor_cores
description: The number of cores to use on each executor.
types:
- <xref:typing.Optional>[<xref:int>]
- name: executor_memory
description: 'The amount of memory to use per executor process, formatted as strings
with a size unit
suffix ("k", "m", "g" or "t") (e.g. "512m", "2g").'
types:
- <xref:typing.Optional>[<xref:str>]
- name: executor_instances
description: The initial number of executors.
types:
- <xref:typing.Optional>[<xref:int>]
- name: dynamic_allocation_enabled
description: 'Whether to use dynamic resource allocation, which scales the number
of
executors registered with this application up and down based on the workload.
Defaults to False.'
types:
- <xref:typing.Optional>[<xref:bool>]
- name: dynamic_allocation_min_executors
description: 'The lower bound for the number of executors if dynamic allocation
is
enabled.'
types:
- <xref:typing.Optional>[<xref:int>]
- name: dynamic_allocation_max_executors
description: 'The upper bound for the number of executors if dynamic allocation
is
enabled.'
types:
- <xref:typing.Optional>[<xref:int>]
- name: conf
description: A dictionary with pre-defined Spark configurations key and values.
Defaults to None.
types:
- <xref:typing.Optional>[<xref:dict>[<xref:str>, <xref:str>]]
- name: environment
description: The Azure ML environment to run the job in.
types:
- <xref:typing.Optional>[<xref:typing.Union>[<xref:str>, <xref:azure.ai.ml.entities.Environment>]]
- name: inputs
description: A mapping of input names to input data sources used in the job. Defaults
to None.
types:
- <xref:typing.Optional>[<xref:dict>[<xref:str>, <xref:typing.Union>[ <xref:azure.ai.ml.entities._job.pipeline._io.NodeOutput>,
<xref:azure.ai.ml.Input>, <xref:str>, <xref:bool>, <xref:int>, <xref:float>,
<xref:Enum>, ]]]
- name: outputs
description: A mapping of output names to output data sources used in the job.
Defaults to None.
types:
- <xref:typing.Optional>[<xref:dict>[<xref:str>, <xref:typing.Union>[<xref:str>,
<xref:azure.ai.ml.Output>]]]
- name: args
description: The arguments for the job. Defaults to None.
types:
- <xref:typing.Optional>[<xref:str>]
examples:
- "Creating SparkComponent.<!--[!code-python[Main](les\\ml_samples_spark_configurations.py\
\ )]-->\n\n<!-- literal_block {\"ids\": [], \"classes\": [], \"names\": [], \"dupnames\"\
: [], \"backrefs\": [], \"source\": \"C:\\\\hostedtoolcache\\\\windows\\\\Python\\\
\\3.11.9\\\\x64\\\\Lib\\\\site-packages\\\\py2docfx\\\\dist_temp\\\\8\\\\azure-ai-ml-1.16.1\\\
\\samples\\\\ml_samples_spark_configurations.py\", \"xml:space\": \"preserve\",\
\ \"force\": false, \"language\": \"python\", \"highlight_args\": {\"linenostart\"\
: 1}, \"linenos\": false} -->\n\n````python\n\n from azure.ai.ml.entities import\
\ SparkComponent\n\n component = SparkComponent(\n name=\"add_greeting_column_spark_component\"\
,\n display_name=\"Aml Spark add greeting column test module\",\n description=\"\
Aml Spark add greeting column test module\",\n version=\"1\",\n inputs={\n\
\ \"file_input\": {\"type\": \"uri_file\", \"mode\": \"direct\"},\n \
\ },\n driver_cores=2,\n driver_memory=\"1g\",\n executor_cores=1,\n\
\ executor_memory=\"1g\",\n executor_instances=1,\n code=\"./src\"\
,\n entry={\"file\": \"add_greeting_column.py\"},\n py_files=[\"utils.zip\"\
],\n files=[\"my_files.txt\"],\n args=\"--file_input ${{inputs.file_input}}\"\
,\n base_path=\"./sdk/ml/azure-ai-ml/tests/test_configs/dsl_pipeline/spark_job_in_pipeline\"\
,\n )\n\n\n ````\n"
methods:
- uid: azure.ai.ml.entities.SparkComponent.dump
name: dump
summary: Dump the component content into a file in yaml format.
signature: 'dump(dest: str | PathLike | IO, **kwargs: Any) -> None'
parameters:
- name: dest
description: 'The destination to receive this component''s content.
Must be either a path to a local file, or an already-open file stream.
If dest is a file path, a new file will be created,
and an exception is raised if the file exists.
If dest is an open file, the file will be written to directly,
and an exception will be raised if the file is not writable.'
isRequired: true
types:
- <xref:typing.Union>[<xref:PathLike>, <xref:str>, <xref:typing.IO>[<xref:typing.AnyStr>]]
attributes:
- uid: azure.ai.ml.entities.SparkComponent.base_path
name: base_path
summary: The base path of the resource.
return:
description: The base path of the resource.
types:
- <xref:str>
- uid: azure.ai.ml.entities.SparkComponent.creation_context
name: creation_context
summary: The creation context of the resource.
return:
description: The creation metadata for the resource.
types:
- <xref:typing.Optional>[<xref:azure.ai.ml.entities.SystemData>]
- uid: azure.ai.ml.entities.SparkComponent.display_name
name: display_name
summary: Display name of the component.
return:
description: Display name of the component.
types:
- <xref:str>
- uid: azure.ai.ml.entities.SparkComponent.entry
name: entry
- uid: azure.ai.ml.entities.SparkComponent.environment
name: environment
summary: The Azure ML environment to run the Spark component or job in.
return:
description: The Azure ML environment to run the Spark component or job in.
types:
- <xref:typing.Optional>[<xref:typing.Union>[<xref:str>, <xref:azure.ai.ml.entities.Environment>]]
- uid: azure.ai.ml.entities.SparkComponent.id
name: id
summary: The resource ID.
return:
description: The global ID of the resource, an Azure Resource Manager (ARM) ID.
types:
- <xref:typing.Optional>[<xref:str>]
- uid: azure.ai.ml.entities.SparkComponent.inputs
name: inputs
summary: Inputs of the component.
return:
description: Inputs of the component.
types:
- <xref:dict>
- uid: azure.ai.ml.entities.SparkComponent.is_deterministic
name: is_deterministic
summary: Whether the component is deterministic.
return:
description: Whether the component is deterministic
types:
- <xref:bool>
- uid: azure.ai.ml.entities.SparkComponent.outputs
name: outputs
summary: Outputs of the component.
return:
description: Outputs of the component.
types:
- <xref:dict>
- uid: azure.ai.ml.entities.SparkComponent.type
name: type
summary: Type of the component, default is 'command'.
return:
description: Type of the component.
types:
- <xref:str>
- uid: azure.ai.ml.entities.SparkComponent.version
name: version
summary: Version of the component.
return:
description: Version of the component.
types:
- <xref:str>
- uid: azure.ai.ml.entities.SparkComponent.CODE_ID_RE_PATTERN
name: CODE_ID_RE_PATTERN
signature: CODE_ID_RE_PATTERN = re.compile('\\/subscriptions\\/(?P<subscription>[\\w,-]+)\\/resourceGroups\\/(?P<resource_group>[\\w,-]+)\\/providers\\/Microsoft\\.MachineLearningServices\\/workspaces\\/(?P<workspace>[\\w,-]+)\\/codes\\/(?P<co)