-
Notifications
You must be signed in to change notification settings - Fork 12
/
emr-cluster-job-step-functions.json
139 lines (139 loc) · 4.29 KB
/
emr-cluster-job-step-functions.json
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
{
"StartAt":"Launch_EMR_Cluster",
"States":{
"Launch_EMR_Cluster":{
"Type":"Task",
"Resource":"arn:aws:states:::elasticmapreduce:createCluster.sync",
"Parameters":{
"Name":"StepFn-EMR-Hudi",
"ServiceRole":"EMR_DefaultRole",
"JobFlowRole":"EMR_EC2_DefaultRole",
"EbsRootVolumeSize":10,
"ReleaseLabel":"emr-6.4.0",
"Applications":[
{
"Name":"Hadoop"
},
{
"Name":"Spark"
},
{
"Name":"Hive"
},
{
"Name":"Livy"
}
],
"LogUri":"s3://<bucket-name>/emr/logs",
"ManagedScalingPolicy":{
"ComputeLimits":{
"MaximumCapacityUnits":2,
"MaximumCoreCapacityUnits":2,
"MaximumOnDemandCapacityUnits":2,
"MinimumCapacityUnits":1,
"UnitType":"InstanceFleetUnits"
}
},
"VisibleToAllUsers":true,
"Instances":{
"KeepJobFlowAliveWhenNoSteps":true,
"Ec2KeyName":"<key-pair-name>",
"Ec2SubnetId":"<subnet-id>",
"InstanceFleets":[
{
"InstanceFleetType":"MASTER",
"Name":"Master",
"TargetOnDemandCapacity":1,
"InstanceTypeConfigs":[
{
"InstanceType":"m5.xlarge"
}
]
},
{
"InstanceFleetType":"CORE",
"TargetOnDemandCapacity":1,
"InstanceTypeConfigs":[
{
"InstanceType":"m5.xlarge"
}
]
}
]
}
},
"Next":"Copy_Hudi_JARs"
},
"Copy_Hudi_JARs":{
"Type":"Task",
"ResultPath":"$.Result",
"Catch":[
{
"ErrorEquals":[
"States.ALL"
],
"ResultPath":"$.error-info",
"Next":"Terminate_EMR_Cluster"
}
],
"Resource":"arn:aws:states:::elasticmapreduce:addStep.sync",
"Parameters":{
"ClusterId.$":"$.ClusterId",
"Step":{
"Name":"Copy JAR files",
"ActionOnFailure":"CONTINUE",
"HadoopJarStep":{
"Jar":"s3://us-east-1.elasticmapreduce/libs/script-runner/script-runner.jar",
"Args":[
"s3://<bucket-name>/<script-path>/bootstrap.sh"
]
}
}
},
"Next":"Trigger_Spark_Job"
},
"Trigger_Spark_Job":{
"Type":"Task",
"ResultPath":"$.Result",
"Catch":[
{
"ErrorEquals":[
"States.ALL"
],
"ResultPath":"$.error-info",
"Next":"Terminate_EMR_Cluster"
}
],
"Resource":"arn:aws:states:::elasticmapreduce:addStep.sync",
"Parameters":{
"ClusterId.$":"$.ClusterId",
"Step":{
"Name":"Spark Transform Step",
"ActionOnFailure":"CONTINUE",
"HadoopJarStep":{
"Jar":"command-runner.jar",
"Args":[
"spark-submit",
"--deploy-mode",
"cluster",
"--jars",
"/usr/lib/hudi/hudi-spark-bundle.jar",
"--conf",
"spark.serializer=org.apache.spark.serializer.KryoSerializer",
"s3://<bucket-name>/<script-path-name>.py"
]
}
}
},
"Next":"Terminate_EMR_Cluster"
},
"Terminate_EMR_Cluster":{
"Type":"Task",
"Resource":"arn:aws:states:::elasticmapreduce:terminateCluster.sync",
"Parameters":{
"ClusterId.$":"$.ClusterId"
},
"End":true
}
}
}