/
RegexOpDesc.scala
53 lines (46 loc) · 2.04 KB
/
RegexOpDesc.scala
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
package edu.uci.ics.texera.workflow.operators.regex
import com.fasterxml.jackson.annotation.{JsonProperty, JsonPropertyDescription}
import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaTitle
import edu.uci.ics.amber.engine.architecture.deploysemantics.PhysicalOp
import edu.uci.ics.amber.engine.architecture.deploysemantics.layer.OpExecInitInfo
import edu.uci.ics.amber.engine.common.virtualidentity.{ExecutionIdentity, WorkflowIdentity}
import edu.uci.ics.amber.engine.common.workflow.{InputPort, OutputPort}
import edu.uci.ics.texera.workflow.common.metadata.{OperatorGroupConstants, OperatorInfo}
import edu.uci.ics.texera.workflow.common.metadata.annotations.AutofillAttributeName
import edu.uci.ics.texera.workflow.common.operators.filter.FilterOpDesc
class RegexOpDesc extends FilterOpDesc {
@JsonProperty(value = "attribute", required = true)
@JsonPropertyDescription("column to search regex on")
@AutofillAttributeName
var attribute: String = _
@JsonProperty(value = "regex", required = true)
@JsonPropertyDescription("regular expression")
var regex: String = _
@JsonProperty(required = false, defaultValue = "false")
@JsonSchemaTitle("Case Insensitive")
@JsonPropertyDescription("regex match is case sensitive")
var caseInsensitive: Boolean = _
override def getPhysicalOp(
workflowId: WorkflowIdentity,
executionId: ExecutionIdentity
): PhysicalOp = {
PhysicalOp
.oneToOnePhysicalOp(
workflowId,
executionId,
operatorIdentifier,
OpExecInitInfo((_, _) => new RegexOpExec(regex, caseInsensitive, attribute))
)
.withInputPorts(operatorInfo.inputPorts)
.withOutputPorts(operatorInfo.outputPorts)
}
override def operatorInfo: OperatorInfo =
OperatorInfo(
userFriendlyName = "Regular Expression",
operatorDescription = "Search a regular expression in a string column",
operatorGroupName = OperatorGroupConstants.SEARCH_GROUP,
inputPorts = List(InputPort()),
outputPorts = List(OutputPort()),
supportReconfiguration = true
)
}