Skip to content

Commit

Permalink
Fix: reconstruction of if-then-else constructs (#790)
Browse files Browse the repository at this point in the history
  • Loading branch information
EagleoutIce committed May 11, 2024
2 parents 9385f2b + a7fc3f7 commit 2dd9378
Show file tree
Hide file tree
Showing 13 changed files with 159 additions and 89 deletions.
2 changes: 1 addition & 1 deletion src/dataflow/environments/built-in.ts
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ registerBuiltInFunctions(['<<-'], processAssignment,
registerBuiltInFunctions(['->'], processAssignment, { swapSourceAndTarget: true, canBeReplacement: true }, )
registerBuiltInFunctions(['->>'], processAssignment, { superAssignment: true, swapSourceAndTarget: true, canBeReplacement: true } )
registerBuiltInFunctions(['&&', '||', '&', '|'], processSpecialBinOp, { lazy: true } )
registerBuiltInFunctions(['|>'], processPipe, {}, )
registerBuiltInFunctions(['|>', '%>%'], processPipe, {}, )
registerBuiltInFunctions(['function', '\\'], processFunctionDefinition, {}, )
registerBuiltInFunctions(['quote', 'substitute', 'bquote'], processQuote, { quoteArgumentsWithIndex: 0 }, )
registerBuiltInFunctions(['for'], processForLoop, {}, )
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ export function normalizeExpression(data: NormalizerData, obj: XmlBasedJson): RN

const children = normalizeExpressions(childData, childrenSource)

const [delimiters, nodes] = partition(children, x => x.type === RType.Delimiter)
const [delimiters, nodes] = partition(children, x => x.type === RType.Delimiter || x.type === RType.Comment)

if(nodes.length === 1) {
const result = nodes[0] as RNode
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ export function normalizeRootObjToAst(
log.debug('no children found, assume empty input')
}

const [delimiters, nodes] = partition(parsedChildren, x => x.type === RType.Delimiter)
const [delimiters, nodes] = partition(parsedChildren, x => x.type === RType.Delimiter || x.type === RType.Comment)

return {
type: RType.ExpressionList,
Expand Down
34 changes: 25 additions & 9 deletions src/reconstruct/reconstruct.ts
Original file line number Diff line number Diff line change
Expand Up @@ -112,15 +112,21 @@ function reconstructUnaryOp(leaf: RNodeWithParent, operand: Code, configuration:
}
}

function reconstructBinaryOp(n: RBinaryOp<ParentInformation> | RPipe<ParentInformation>, lhs: Code, rhs: Code): Code {
function reconstructBinaryOp(n: RBinaryOp<ParentInformation> | RPipe<ParentInformation>, lhs: Code, rhs: Code, config: ReconstructionConfiguration): Code {
if(lhs.length === 0 && rhs.length === 0) {
return []
}
if(lhs.length === 0) { // if we have no lhs, only return rhs
if(isSelected(config, n)) {
return plain(getLexeme(n))
} else {
return []
}
} else if(lhs.length === 0) { // if we have no lhs, only return rhs
return rhs
}
if(rhs.length === 0) { // if we have no rhs we have to keep everything to get the rhs
return plain(getLexeme(n))
} else if(rhs.length === 0) {
if(isSelected(config, n)) {
return plain(getLexeme(n))
} else {
return lhs
}
}

return reconstructRawBinaryOperator(lhs, n.type === RType.Pipe ? '|>' : n.operator, rhs)
Expand Down Expand Up @@ -205,7 +211,7 @@ function reconstructIfThenElse(ifThenElse: RIfThenElse<ParentInformation>, condi
return otherwise
}
} else {
const thenRemainder = indentBy(then.splice(1), 1)
const thenRemainder = indentBy(then.slice(1), 1)
if(thenRemainder.length > 0) {
if(!thenRemainder[thenRemainder.length - 1].line.trim().endsWith('else')) {
thenRemainder[thenRemainder.length - 1].line += ' else '
Expand Down Expand Up @@ -344,10 +350,20 @@ function reconstructSpecialInfixFunctionCall(args: (Code | typeof EmptyArgument)
}

function reconstructFunctionCall(call: RFunctionCall<ParentInformation>, functionName: Code, args: (Code | typeof EmptyArgument)[], configuration: ReconstructionConfiguration): Code {
const selected = isSelected(configuration, call)
if(!selected) {
const f = args.filter(a => a !== EmptyArgument && a.length !== 0) as Code[]
if(f.length === 0) {
return []
} else if(f.length === 1) {
return f[0]
}
}

if(call.infixSpecial === true) {
return reconstructSpecialInfixFunctionCall(args, call)
}
if(call.flavor === 'named' && isSelected(configuration, call)) {
if(call.flavor === 'named' && selected) {
return plain(getLexeme(call))
}
const filteredArgs = args.filter(a => a !== undefined && a.length > 0)
Expand Down
2 changes: 1 addition & 1 deletion src/slicing/criterion/filters/all-variables.ts
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ const defaultAllVariablesCollectorFolds: FoldFunctions<ParentInformation, NodeId
if(c.flavor === 'named') {
return c.functionName.content === 'library' ? args.slice(1) : args
} else {
return [...a, ...args]
return [...a.filter(x => x !== EmptyArgument), ...args]
}
},
foldArgument: (_: unknown, _a: unknown, b: NodeId[] | undefined) => b ?? [],
Expand Down
2 changes: 1 addition & 1 deletion test/functionality/_helper/shell.ts
Original file line number Diff line number Diff line change
Expand Up @@ -265,8 +265,8 @@ export function assertSliced(name: string | TestLabel, shell: RShell, input: str
`got: ${result.reconstruct.code}, vs. expected: ${expected}, for input ${input} (slice for ${JSON.stringify(criteria)}: ${printIdMapping(result.slice.decodedCriteria.map(({ id }) => id), result.normalize.idMap)}), url: ${graphToMermaidUrl(result.dataflow.graph, true, result.slice.result)}`
)
} catch(e) {
console.error(normalizedAstToMermaidUrl(result.normalize.ast))
console.error(`got:\n${result.reconstruct.code}\nvs. expected:\n${expected}`)
console.error(normalizedAstToMermaidUrl(result.normalize.ast))
throw e
}
})
Expand Down
8 changes: 4 additions & 4 deletions test/functionality/benchmark/slicer.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -100,10 +100,10 @@ cat(d)`
assert.deepStrictEqual(stats.perSliceMeasurements.sliceSize, {
// only one entry
lines: { min: 2, max: 5, median: 3, mean: (2+3+5)/3, std: 1.247219128924647, total: 10 },
characters: { min: 17, max: 46, median: 24, mean: 29, std: 12.355835328567093, total: 87 },
nonWhitespaceCharacters: { min: 14, max: 32, median: 18, mean: 21.333333333333332, std: 7.71722460186015, total: 64 },
tokens: { min: 13, max: 40, median: 19, mean: 24, std: 11.575836902790225, total: 72 },
normalizedTokens: { min: 8, max: 22, median: 11, mean: (8+11+22)/3, std: 6.018490028422596, total: 41 },
characters: { min: 17, max: 41, median: 24, mean: 27.333333333333332, std: 10.077477638553981, total: 82 },
nonWhitespaceCharacters: { min: 14, max: 27, median: 18, mean: 19.666666666666668, std: 5.436502143433363, total: 59 },
tokens: { min: 13, max: 35, median: 19, mean: 22.333333333333332, std: 9.285592184789413, total: 67 },
normalizedTokens: { min: 8, max: 19, median: 11, mean: (8+11+19)/3, std: 4.642796092394707, total: 38 },
dataflowNodes: { min: 3, max: 14, median: 6, mean: (3+6+14)/3, std: 4.642796092394707, total: 23 },
autoSelected: { min: 1, max: 1, median: 1, mean: 1, std: 0, total: 3 } // always select one library statement
}, statInfo)
Expand Down
2 changes: 1 addition & 1 deletion test/functionality/r-bridge/lang/ast/parse-assignments.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import { RType } from '../../../../../src/r-bridge/lang-4.x/ast/model/type'

describe('Parse simple assignments',
withShell(shell => {
describe('Constant Assignments', () => {
describe('Constant assignments', () => {
for(const op of AssignmentOperators) {
const opOffset = op.length - 1
const data = OperatorDatabase[op]
Expand Down
29 changes: 16 additions & 13 deletions test/functionality/r-bridge/lang/ast/parse-operations.ts
Original file line number Diff line number Diff line change
Expand Up @@ -59,23 +59,26 @@ describe('Parse simple operations', withShell(shell => {

describe('Intermixed with comments', () => {
assertAst(label('1 + # comment\n2', ['binary-operator', 'infix-calls', 'function-calls', 'numbers', 'comments', 'newlines', ...OperatorDatabase['+'].capabilities]),
shell, '1 + # comment\n2', exprList({ // hoist children
shell, '1 + # comment\n2', { // hoist children
type: RType.ExpressionList,
location: rangeFrom(1, 1, 2, 1),
location: undefined,
grouping: undefined,
info: {},
lexeme: '1 + # comment\n2',
lexeme: undefined,
children: [
{
type: RType.Comment,
content: ' comment',
lexeme: '# comment',
location: rangeFrom(1, 5, 1, 13),
info: {}
},
{
type: RType.BinaryOp,
info: {},
type: RType.BinaryOp,
info: {
additionalTokens: [
{
type: RType.Comment,
content: ' comment',
lexeme: '# comment',
location: rangeFrom(1, 5, 1, 13),
info: {}
}
]
},
lexeme: '+',
operator: '+',
location: rangeFrom(1, 3, 1, 3),
Expand All @@ -95,7 +98,7 @@ describe('Parse simple operations', withShell(shell => {
}
}
]
}), {
}, {
ignoreAdditionalTokens: false
}
)
Expand Down
21 changes: 14 additions & 7 deletions test/functionality/r-bridge/lang/ast/parse-values.ts
Original file line number Diff line number Diff line change
Expand Up @@ -129,13 +129,20 @@ describe('Constant Parsing',
describe('comments', () => {
assertAst(label('simple line comment', ['comments']),
shell, '# Hello World',
exprList({
type: RType.Comment,
location: rangeFrom(1, 1, 1, 13),
lexeme: '# Hello World',
content: ' Hello World',
info: {}
})
{
...exprList(),
info: {
additionalTokens: [
{
type: RType.Comment,
location: rangeFrom(1, 1, 1, 13),
lexeme: '# Hello World',
content: ' Hello World',
info: {}
}
]
}
}
)
})
})
Expand Down
46 changes: 21 additions & 25 deletions test/functionality/slicing/reconstruct/simple-tests.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,15 @@ describe('Simple', withShell(shell => {
[2, '{ x <- 5 }', ['grouping', 'name-normal', 'numbers', ...OperatorDatabase['<-'].capabilities]],
[2, '{ x <- 5; y <- 9 }', ['grouping', 'name-normal', 'numbers', 'semicolons', ...OperatorDatabase['<-'].capabilities]],
] as [number, string, SupportedFlowrCapabilityId[]][]){
assertReconstructed(label(code, caps), shell, code, id, 'x <- 5')
assertReconstructed(label(code, caps), shell, code, id, 'x')
}
})
describe('Nested Assignments', () => {
for(const [code, id, expected, caps] of [
['12 + (supi <- 42)', 0, '12 + (supi <- 42)', ['grouping', 'name-normal', ...OperatorDatabase['<-'].capabilities, ...OperatorDatabase['+'].capabilities]],
['y <- x <- 42', 1, 'x <- 42', ['name-normal', 'numbers', 'return-value-of-assignments', ...OperatorDatabase['<-'].capabilities, 'precedence'] ],
['y <- x <- 42', 0, 'y <- x <- 42', ['name-normal', 'numbers', 'return-value-of-assignments', ...OperatorDatabase['<-'].capabilities, 'precedence'] ],
['for (i in 1:20) { x <- 5 }', 6, 'x <- 5', ['for-loop', 'name-normal', 'numbers', ...OperatorDatabase['<-'].capabilities] ]
['12 + (supi <- 42)', 0, '12', ['grouping', 'name-normal', ...OperatorDatabase['<-'].capabilities, ...OperatorDatabase['+'].capabilities, 'precedence']],
['y <- x <- 42', 1, 'x', ['name-normal', 'numbers', 'return-value-of-assignments', ...OperatorDatabase['<-'].capabilities, 'precedence'] ],
['y <- x <- 42', 0, 'y', ['name-normal', 'numbers', 'return-value-of-assignments', ...OperatorDatabase['<-'].capabilities, 'precedence'] ],
['for (i in 1:20) { x <- 5 }', 6, 'x', ['for-loop', 'name-normal', 'numbers', ...OperatorDatabase['<-'].capabilities] ]
] as [string, number, string, SupportedFlowrCapabilityId[]][]) {
assertReconstructed(label(code, caps), shell, code, id, expected)
}
Expand All @@ -40,7 +40,7 @@ describe('Simple', withShell(shell => {
describe('repeat', () => {
const pool: [string, NodeId | NodeId[], string, SupportedFlowrCapabilityId[]][] = [
['repeat { x }', 2, 'x', ['repeat-loop', 'name-normal']],
['repeat { x <- 5; y <- 9 }', 2, 'x <- 5', ['repeat-loop', 'name-normal', ...OperatorDatabase['<-'].capabilities, 'semicolons', 'numbers']],
['repeat { x <- 5; y <- 9 }', 2, 'x', ['repeat-loop', 'name-normal', ...OperatorDatabase['<-'].capabilities, 'semicolons', 'numbers']],
['repeat { x <- 5; y <- 9 }', [2, 4, 6], 'x <- 5\n9', ['repeat-loop', 'name-normal', ...OperatorDatabase['<-'].capabilities, 'semicolons', 'numbers']]
]
for(const [code, id, expected, caps] of pool) {
Expand All @@ -52,14 +52,14 @@ describe('Simple', withShell(shell => {
const fiveNineCaps: SupportedFlowrCapabilityId[] = ['while-loop', 'logical', 'name-normal', ...OperatorDatabase['<-'].capabilities, 'numbers', 'semicolons']
const pool: [string, NodeId | NodeId[], string, SupportedFlowrCapabilityId[]][] = [
['while(TRUE) { x }', 3, 'x', ['while-loop', 'logical', 'name-normal']],
['while(TRUE) { x <- 5 }', 3, 'x <- 5', ['while-loop', 'logical', 'name-normal', 'numbers', ...OperatorDatabase['<-'].capabilities]],
['while(TRUE) { x <- 5; y <- 9 }', 3, 'x <- 5', fiveNineCaps],
['while(TRUE) { x <- 5; y <- 9 }', [10, 3], 'while(TRUE) x <- 5', fiveNineCaps],
['while(TRUE) { x <- 5 }', 3, 'x', ['while-loop', 'logical', 'name-normal', 'numbers', ...OperatorDatabase['<-'].capabilities]],
['while(TRUE) { x <- 5; y <- 9 }', 3, 'x', fiveNineCaps],
['while(TRUE) { x <- 5; y <- 9 }', [10, 3], 'while(TRUE) x', fiveNineCaps],
['while(TRUE) { x <- 5; y <- 9 }', [10, 3, 5], 'while(TRUE) x <- 5', fiveNineCaps],
['while(TRUE) { x <- 5; y <- 9 }', [10, 6], 'while(TRUE) y <- 9', fiveNineCaps],
['while(TRUE) { x <- 5; y <- 9 }', [3, 4, 6], 'x <- 5\ny <- 9', fiveNineCaps],
['while(x + 2 > 3) { x <- 0 }', [7], 'x <- 0', ['while-loop', 'binary-operator', 'infix-calls', ...OperatorDatabase['+'].capabilities, 'name-normal', ...OperatorDatabase['<-'].capabilities, 'numbers']],
['while(x + 2 > 3) { x <- 0 }', [0, 7], 'while(x + 2 > 3) x <- 0', ['while-loop', 'binary-operator', 'infix-calls', ...OperatorDatabase['+'].capabilities, 'name-normal', ...OperatorDatabase['<-'].capabilities, 'numbers']]
['while(TRUE) { x <- 5; y <- 9 }', [10, 6], 'while(TRUE) y', fiveNineCaps],
['while(TRUE) { x <- 5; y <- 9 }', [3, 4, 6], 'x <- 5\ny', fiveNineCaps],
['while(x + 2 > 3) { x <- 0 }', [7], 'x', ['while-loop', 'binary-operator', 'infix-calls', ...OperatorDatabase['+'].capabilities, 'name-normal', ...OperatorDatabase['<-'].capabilities, 'numbers']],
['while(x + 2 > 3) { x <- 0 }', [0, 7], 'while(x + 2 > 3) x', ['while-loop', 'binary-operator', 'infix-calls', ...OperatorDatabase['+'].capabilities, 'name-normal', ...OperatorDatabase['<-'].capabilities, 'numbers']]
]
for(const [code, id, expected, caps] of pool) {
assertReconstructed(label(code, caps), shell, code, id, expected)
Expand All @@ -77,12 +77,12 @@ describe('Simple', withShell(shell => {
const caps: SupportedFlowrCapabilityId[] = ['for-loop', 'name-normal', 'numbers', ...OperatorDatabase['<-'].capabilities, ...OperatorDatabase['->'].capabilities, 'newlines']
const pool: [string, NodeId | NodeId[], string][] = [
[largeFor, 0, 'for(i in 1:20) {}'],
[largeFor, 6, 'y <- 9'],
[largeFor, [6, 16], 'for(i in 1:20) y <- 9'],
[largeFor, [6, 9], 'y <- 9\nx <- 5'],
[largeFor, 6, 'y'],
[largeFor, [6, 16], 'for(i in 1:20) y'],
[largeFor, [6, 9], 'y\nx'],
[largeFor, [6, 12, 16], `for(i in 1:20) {
y <- 9
12 -> x
y
12
}`],
]

Expand All @@ -97,18 +97,14 @@ a <- foo({
a <- b()
c <- 3
})`, 0, `a <- foo({
a <- b()
c <- 3
})`)
})`, 0, 'a')

const caps: SupportedFlowrCapabilityId[] = ['name-normal', ...OperatorDatabase['<-'].capabilities, 'double-bracket-access', 'numbers', 'infix-calls', 'binary-operator', 'call-normal', 'newlines', 'unnamed-arguments', 'precedence']
const caps: SupportedFlowrCapabilityId[] = ['name-normal', ...OperatorDatabase['<-'].capabilities, 'double-bracket-access', 'numbers', 'infix-calls', 'binary-operator', 'call-normal', 'newlines', 'unnamed-arguments', 'precedence', 'special-operator']
assertReconstructed(label('Reconstruct access in pipe (variable)', caps), shell, `
ls <- x[[1]] %>% st_cast()
class(ls)`, 2, 'x')
assertReconstructed(label('Reconstruct access in pipe (access)', caps), shell, `
ls <- x[[1]] %>% st_cast()
class(ls)`, 13, 'class(ls)')
class(ls)`, 13, 'ls')
})
}))

2 comments on commit 2dd9378

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"artificial" Benchmark Suite

Benchmark suite Current: 2dd9378 Previous: 1e5ddeb Ratio
Retrieve AST from R code 237.93627359090908 ms 305.50355490909095 ms 0.78
Normalize R AST 31.25353440909091 ms 33.04192136363636 ms 0.95
Produce dataflow information 58.37447954545455 ms 66.58585145454545 ms 0.88
Total per-file 1294.066323590909 ms 1512.547620909091 ms 0.86
Static slicing 1.2210594568505793 ms (1.0623189895050085) 1.3817132831241319 ms (1.27303105869458) 0.88
Reconstruct code 0.40882656912935544 ms (0.22977179223364164) 0.45135141022522257 ms (0.25927254974160097) 0.91
Total per-slice 1.6472833759554493 ms (1.125043092013674) 1.8475447304133532 ms (1.3326721674469058) 0.89
failed to reconstruct/re-parse 0 # 0 # 1
times hit threshold 0 # 0 # 1
reduction (characters) 0.797431685913541 # 0.7329390759026897 # 1.09
reduction (normalized tokens) 0.7740577588998524 # 0.7209834969577295 # 1.07

This comment was automatically generated by workflow using github-action-benchmark.

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"social-science" Benchmark Suite

Benchmark suite Current: 2dd9378 Previous: 1e5ddeb Ratio
Retrieve AST from R code 239.7306396 ms 330.55613118 ms 0.73
Normalize R AST 32.28900336 ms 36.10837608 ms 0.89
Produce dataflow information 81.87014022 ms 188.6117503 ms 0.43
Total per-file 2696.1147549 ms 3858.78617088 ms 0.70
Static slicing 5.294570206736566 ms (10.111408245739772) 8.763488287471356 ms (15.142795018273485) 0.60
Reconstruct code 0.37513342773432123 ms (0.20476130374278045) 0.6295735167403027 ms (0.28826473087031307) 0.60
Total per-slice 5.67890957219614 ms (10.182301882257088) 9.403718790030862 ms (15.25727767695505) 0.60
failed to reconstruct/re-parse 2 # 7 # 0.29
times hit threshold 483 # 298 # 1.62
reduction (characters) 0.9264251300832852 # 0.8935817303062389 # 1.04
reduction (normalized tokens) 0.8956523254072724 # 0.8531248144961375 # 1.05

This comment was automatically generated by workflow using github-action-benchmark.

Please sign in to comment.