Skip to content

Commit 083a15e

Browse files
authored
switch to js implementation of orchestrion (#6877)
1 parent acc6bef commit 083a15e

File tree

39 files changed

+916
-146
lines changed

39 files changed

+916
-146
lines changed

LICENSE-3rdparty.csv

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
"@datadog/openfeature-node-server","https://github.com/DataDog/openfeature-js-client","['Apache-2.0']","['DataDog']"
88
"@datadog/pprof","https://github.com/DataDog/pprof-nodejs","['Apache-2.0']","['Google Inc.']"
99
"@datadog/sketches-js","https://github.com/DataDog/sketches-js","['Apache-2.0']","['DataDog']"
10+
"@datadog/source-map","npm:@datadog/source-map","[]","[]"
1011
"@datadog/wasm-js-rewriter","https://github.com/DataDog/dd-wasm-js-rewriter","['Apache-2.0']","['Datadog Inc.']"
1112
"@isaacs/ttlcache","https://github.com/isaacs/ttlcache","['BlueOak-1.0.0']","['Isaac Z. Schlueter']"
1213
"@jsep-plugin/assignment","https://github.com/EricSmekens/jsep","['MIT']","['Shelly']"
@@ -31,13 +32,16 @@
3132
"acorn","https://github.com/acornjs/acorn","['MIT']","['acornjs']"
3233
"acorn-import-attributes","https://github.com/xtuc/acorn-import-attributes","['MIT']","['Sven Sauleau']"
3334
"argparse","https://github.com/nodeca/argparse","['Python-2.0']","['nodeca']"
35+
"astring","https://github.com/davidbonnet/astring","['MIT']","['David Bonnet']"
3436
"cjs-module-lexer","https://github.com/nodejs/cjs-module-lexer","['MIT']","['Guy Bedford']"
3537
"crypto-randomuuid","npm:crypto-randomuuid","['MIT']","['Stephen Belanger']"
3638
"dc-polyfill","https://github.com/DataDog/dc-polyfill","['MIT']","['Thomas Hunter II']"
3739
"dd-trace","https://github.com/DataDog/dd-trace-js","['(Apache-2.0 OR BSD-3-Clause)']","['Datadog Inc. <info@datadoghq.com>']"
3840
"delay","https://github.com/sindresorhus/delay","['MIT']","['Sindre Sorhus']"
3941
"detect-newline","https://github.com/sindresorhus/detect-newline","['MIT']","['Sindre Sorhus']"
4042
"escape-string-regexp","https://github.com/sindresorhus/escape-string-regexp","['MIT']","['Sindre Sorhus']"
43+
"esquery","https://github.com/estools/esquery","['BSD-3-Clause']","['Joel Feenstra']"
44+
"estraverse","https://github.com/estools/estraverse","['BSD-2-Clause']","['estools']"
4145
"fast-fifo","https://github.com/mafintosh/fast-fifo","['MIT']","['Mathias Buus']"
4246
"ignore","https://github.com/kaelzhang/node-ignore","['MIT']","['kael']"
4347
"import-in-the-middle","https://github.com/nodejs/import-in-the-middle","['Apache-2.0']","['Bryan English']"
@@ -50,6 +54,7 @@
5054
"lodash.sortby","https://github.com/lodash/lodash","['MIT']","['John-David Dalton']"
5155
"long","https://github.com/dcodeIO/long.js","['Apache-2.0']","['Daniel Wirtz']"
5256
"lru-cache","https://github.com/isaacs/node-lru-cache","['ISC']","['Isaac Z. Schlueter']"
57+
"meriyah","https://github.com/meriyah/meriyah","['ISC']","['Kenny F.']"
5358
"module-details-from-path","https://github.com/watson/module-details-from-path","['MIT']","['Thomas Watson']"
5459
"mutexify","https://github.com/mafintosh/mutexify","['MIT']","['Mathias Buus']"
5560
"node-addon-api","https://github.com/nodejs/node-addon-api","['MIT']","['nodejs']"

loader-hook.mjs

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ import regexpEscapeModule from './packages/node_modules/escape-string-regexp/ind
22
import * as iitm from 'import-in-the-middle/hook.mjs'
33
import hooks from './packages/datadog-instrumentations/src/helpers/hooks.js'
44
import configHelper from './packages/dd-trace/src/config-helper.js'
5+
import * as rewriterLoader from './packages/datadog-instrumentations/src/helpers/rewriter/loader.mjs'
56

67
const regexpEscape = regexpEscapeModule.default
78

@@ -19,6 +20,10 @@ function initialize (data = {}) {
1920
return iitm.initialize(data)
2021
}
2122

23+
function load (url, context, nextLoad) {
24+
return rewriterLoader.load(url, context, (url, context) => iitm.load(url, context, nextLoad))
25+
}
26+
2227
function addInstrumentations (data) {
2328
const instrumentations = Object.keys(hooks)
2429

@@ -50,5 +55,5 @@ function addExclusions (data) {
5055
)
5156
}
5257

53-
export { initialize }
54-
export { load, getFormat, resolve, getSource } from 'import-in-the-middle/hook.mjs'
58+
export { initialize, load }
59+
export { getFormat, resolve, getSource } from 'import-in-the-middle/hook.mjs'

packages/datadog-instrumentations/index.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,4 @@
22

33
require('./src/helpers/bundler-register')
44
require('./src/helpers/register')
5+
require('./src/helpers/rewriter/loader')

packages/datadog-instrumentations/src/helpers/instrument.js

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
const dc = require('dc-polyfill')
44
const instrumentations = require('./instrumentations')
5+
const rewriterInstrumentations = require('./rewriter/instrumentations')
56
const { AsyncResource } = require('async_hooks')
67

78
const channelMap = {}
@@ -22,6 +23,15 @@ exports.tracingChannel = function (name) {
2223
return tc
2324
}
2425

26+
exports.getHooks = function getHooks (names) {
27+
names = [names].flat()
28+
29+
return rewriterInstrumentations
30+
.map(inst => inst.module)
31+
.filter(({ name }) => names.includes(name))
32+
.map(({ name, versionRange, filePath }) => ({ name, versions: [versionRange], file: filePath }))
33+
}
34+
2535
/**
2636
* @param {object} args
2737
* @param {string|string[]} args.name module name

packages/datadog-instrumentations/src/helpers/register.js

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ const checkRequireCache = require('./check-require-cache')
1010
const telemetry = require('../../../dd-trace/src/guardrails/telemetry')
1111
const { isInServerlessEnvironment } = require('../../../dd-trace/src/serverless')
1212
const { getEnvironmentVariables } = require('../../../dd-trace/src/config-helper')
13+
const rewriter = require('./rewriter')
1314

1415
const envs = getEnvironmentVariables()
1516

@@ -48,6 +49,10 @@ if (DD_TRACE_DEBUG && DD_TRACE_DEBUG.toLowerCase() !== 'false') {
4849
const seenCombo = new Set()
4950
const allInstrumentations = {}
5051

52+
for (const inst of disabledInstrumentations) {
53+
rewriter.disable(inst)
54+
}
55+
5156
// TODO: make this more efficient
5257
for (const packageName of names) {
5358
if (disabledInstrumentations.has(packageName)) continue
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
'use strict'
2+
3+
let meriyah
4+
let astring
5+
let esquery
6+
7+
module.exports = {
8+
parse: (...args) => {
9+
meriyah ??= require('meriyah')
10+
11+
return meriyah.parse(...args)
12+
},
13+
14+
generate: (...args) => {
15+
astring ??= require('astring')
16+
17+
return astring.generate(...args)
18+
},
19+
20+
traverse: (ast, query, visitor) => {
21+
esquery ??= require('esquery').default
22+
23+
const selector = esquery.parse(query)
24+
25+
return esquery.traverse(ast, selector, visitor)
26+
},
27+
}
Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,152 @@
1+
'use strict'
2+
3+
/*
4+
This rewriter is basically a JavaScript version of Orchestrion-JS. The goal is
5+
not to replace Orchestrion-JS, but rather to make it easier and faster to write
6+
new integrations in the short-term, especially as many changes to the rewriter
7+
will be needed as all the patterns we need have not been identified yet. This
8+
will avoid the back and forth of having to make Rust changes to an external
9+
library for every integration change or addition that requires something new.
10+
11+
In the meantime, we'll work concurrently on a change to Orchestrion-JS that
12+
adds an "arbitrary transform" or "plugin" system that can be used from
13+
JavaScript, in order to enable quick iteration while still using Orchestrion-JS.
14+
Once that's done we'll use that, so that we can remove this JS approach and
15+
return to using Orchestrion-JS.
16+
17+
The long term goal is to backport any additional features we add to the JS
18+
rewriter (or using the plugin system in Orchestrion-JS once we're using that)
19+
to Orchestrion-JS once we're confident that the implementation is fairly
20+
complete and has all features we need.
21+
22+
Here is a list of the additions and changes in this rewriter compared to
23+
Orchestrion-JS that will need to be backported:
24+
25+
(NOTE: Please keep this list up-to-date whenever new features are added)
26+
27+
- Supports an `astQuery` field to filter AST nodes with an esquery query. This
28+
is mostly meant to be used when experimenting or if what needs to be queried
29+
is not a function. We'll see over time if something like this is needed to be
30+
backported or if it can be replaced by simpler queries.
31+
- Supports replacing methods of child class instances in the base constructor.
32+
*/
33+
34+
const { readFileSync } = require('fs')
35+
const { join } = require('path')
36+
const semifies = require('semifies')
37+
const transforms = require('./transforms')
38+
const { generate, parse, traverse } = require('./compiler')
39+
const log = require('../../../../dd-trace/src/log')
40+
const instrumentations = require('./instrumentations')
41+
const { getEnvironmentVariable } = require('../../../../dd-trace/src/config-helper')
42+
43+
const NODE_OPTIONS = getEnvironmentVariable('NODE_OPTIONS')
44+
45+
const supported = {}
46+
const disabled = new Set()
47+
48+
// TODO: Source maps without `--enable-source-maps`.
49+
const enableSourceMaps = NODE_OPTIONS?.includes('--enable-source-maps') ||
50+
process.execArgv?.some(arg => arg.includes('--enable-source-maps'))
51+
52+
let SourceMapGenerator
53+
54+
function rewrite (content, filename, format) {
55+
if (!content) return content
56+
57+
try {
58+
let ast
59+
60+
filename = filename.replace('file://', '')
61+
62+
for (const inst of instrumentations) {
63+
const { astQuery, functionQuery = {}, module: { name, versionRange, filePath } } = inst
64+
const { kind } = functionQuery
65+
const operator = kind === 'Async' ? 'tracePromise' : kind === 'Callback' ? 'traceCallback' : 'traceSync'
66+
const transform = transforms[operator]
67+
68+
if (disabled.has(name)) continue
69+
if (!filename.endsWith(`${name}/${filePath}`)) continue
70+
if (!transform) continue
71+
if (!satisfies(filename, filePath, versionRange)) continue
72+
73+
ast ??= parse(content.toString(), { loc: true, ranges: true, module: format === 'module' })
74+
75+
const query = astQuery || fromFunctionQuery(functionQuery)
76+
const state = { ...inst, format, functionQuery, operator }
77+
78+
traverse(ast, query, (...args) => transform(state, ...args))
79+
}
80+
81+
if (ast) {
82+
if (!enableSourceMaps) return generate(ast)
83+
84+
// TODO: Can we use the same version of `source-map` that DI uses?
85+
SourceMapGenerator ??= require('@datadog/source-map').SourceMapGenerator
86+
87+
const sourceMap = new SourceMapGenerator({ file: filename })
88+
const code = generate(ast, { sourceMap })
89+
const map = Buffer.from(sourceMap.toString()).toString('base64')
90+
91+
return code + '\n' + `//# sourceMappingURL=data:application/json;base64,${map}`
92+
}
93+
} catch (e) {
94+
log.error(e)
95+
}
96+
97+
return content
98+
}
99+
100+
function disable (instrumentation) {
101+
disabled.add(instrumentation)
102+
}
103+
104+
function satisfies (filename, filePath, versions) {
105+
const [basename] = filename.split(filePath)
106+
107+
if (supported[basename] === undefined) {
108+
try {
109+
const pkg = JSON.parse(readFileSync(
110+
join(basename, 'package.json'), 'utf8'
111+
))
112+
113+
supported[basename] = semifies(pkg.version, versions)
114+
} catch {
115+
supported[basename] = false
116+
}
117+
}
118+
119+
return supported[basename]
120+
}
121+
122+
// TODO: Support index
123+
function fromFunctionQuery (functionQuery) {
124+
const { methodName, functionName, expressionName, className } = functionQuery
125+
const queries = []
126+
127+
if (className) {
128+
queries.push(
129+
`[id.name="${className}"]`,
130+
`[id.name="${className}"] > ClassBody > [key.name="${methodName}"] > [async]`,
131+
`[id.name="${className}"] > ClassExpression > ClassBody > [key.name="${methodName}"] > [async]`
132+
)
133+
} else if (methodName) {
134+
queries.push(
135+
`ClassBody > [key.name="${methodName}"] > [async]`,
136+
`Property[key.name="${methodName}"] > [async]`
137+
)
138+
}
139+
140+
if (functionName) {
141+
queries.push(`FunctionDeclaration[id.name="${functionName}"][async]`)
142+
} else if (expressionName) {
143+
queries.push(
144+
`FunctionExpression[id.name="${expressionName}"][async]`,
145+
`ArrowFunctionExpression[id.name="${expressionName}"][async]`
146+
)
147+
}
148+
149+
return queries.join(', ')
150+
}
151+
152+
module.exports = { rewrite, disable }
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
'use strict'
2+
3+
module.exports = [
4+
...require('./langchain')
5+
]

0 commit comments

Comments
 (0)