Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[backend] Improve bulk Stix loader to support 100K+ relations loading in one bulk (#6386) #6387

Merged
merged 1 commit into from
Mar 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
28 changes: 18 additions & 10 deletions opencti-platform/opencti-graphql/src/database/engine.js
Original file line number Diff line number Diff line change
Expand Up @@ -1605,17 +1605,25 @@
const parentKey = arrayKeys.at(0);
const { key: nestedKey, values: nestedValues, operator: nestedOperator = 'eq' } = nestedElement;
const nestedShould = [];
for (let i = 0; i < nestedValues.length; i += 1) {
const nestedFieldKey = `${parentKey}.${nestedKey}`;
const nestedSearchValues = nestedValues[i].toString();
if (nestedOperator === 'wildcard') {
nestedShould.push({ query_string: { query: `${nestedSearchValues}`, fields: [nestedFieldKey] } });
} else if (nestedOperator === 'not_eq') {
nestedMustNot.push({ match_phrase: { [nestedFieldKey]: nestedSearchValues } });
} else if (RANGE_OPERATORS.includes(nestedOperator)) {
nestedShould.push({ range: { [nestedFieldKey]: { [nestedOperator]: nestedSearchValues } } });
const nestedFieldKey = `${parentKey}.${nestedKey}`;
if (nestedKey === ID_INTERNAL) {
if (nestedOperator === 'not_eq') {
nestedMustNot.push({ terms: { [`${nestedFieldKey}.keyword`]: nestedValues } });
} else {
nestedShould.push({ match_phrase: { [nestedFieldKey]: nestedSearchValues } });
nestedShould.push({ terms: { [`${nestedFieldKey}.keyword`]: nestedValues } });
}
} else {
for (let i = 0; i < nestedValues.length; i += 1) {
const nestedSearchValues = nestedValues[i].toString();
if (nestedOperator === 'wildcard') {
nestedShould.push({ query_string: { query: `${nestedSearchValues}`, fields: [nestedFieldKey] } });
} else if (nestedOperator === 'not_eq') {
nestedMustNot.push({ match_phrase: { [nestedFieldKey]: nestedSearchValues } });

Check warning on line 1621 in opencti-platform/opencti-graphql/src/database/engine.js

View check run for this annotation

Codecov / codecov/patch

opencti-platform/opencti-graphql/src/database/engine.js#L1621

Added line #L1621 was not covered by tests
} else if (RANGE_OPERATORS.includes(nestedOperator)) {
nestedShould.push({ range: { [nestedFieldKey]: { [nestedOperator]: nestedSearchValues } } });

Check warning on line 1623 in opencti-platform/opencti-graphql/src/database/engine.js

View check run for this annotation

Codecov / codecov/patch

opencti-platform/opencti-graphql/src/database/engine.js#L1623

Added line #L1623 was not covered by tests
} else {
nestedShould.push({ match_phrase: { [nestedFieldKey]: nestedSearchValues } });
}
}
}
const should = {
Expand Down
39 changes: 29 additions & 10 deletions opencti-platform/opencti-graphql/src/database/middleware.js
Original file line number Diff line number Diff line change
Expand Up @@ -274,8 +274,13 @@
for (let i = 0; i < groupOfWorkingIds.length; i += 1) {
const fromIds = groupOfWorkingIds[i];
const relationFilter = { mode: FilterMode.And, filters: [{ key: ['fromId'], values: fromIds }], filterGroups: [] };
const refsListed = await listAllRelations(context, user, relTypes, { filters: relationFilter });
refsRelations.push(...refsListed);
// All callback to iteratively push the relations to the global ref relations array
// As listAllRelations can bring more than 100K+ relations, we need to split the append
// due to nodejs limitation to 100K function parameters limit
const allRelCallback = async (relations) => {
refsRelations.push(...relations);
};
await listAllRelations(context, user, relTypes, { baseData: true, filters: relationFilter, callback: allRelCallback });
}
const refsPerElements = R.groupBy((r) => r.fromId, refsRelations);
// Parallel resolutions
Expand All @@ -294,16 +299,25 @@
const entries = Object.entries(grouped);
for (let index = 0; index < entries.length; index += 1) {
const [key, values] = entries[index];
const inputKey = schemaRelationsRefDefinition.convertDatabaseNameToInputName(element.entity_type, key);
const resolvedElementsWithRelation = R.map((v) => {
const invalidRelations = [];
const resolvedElementsWithRelation = [];
let startProcessingTime = new Date().getTime();
for (let valueIndex = 0; valueIndex < values.length; valueIndex += 1) {
const v = values[valueIndex];
const resolvedElement = toResolvedElements[v.toId];
return resolvedElement ? { ...resolvedElement, i_relation: v } : {};
}, values).filter((d) => isNotEmptyField(d));
const metaRefKey = schemaRelationsRefDefinition.getRelationRef(element.entity_type, inputKey);
if (isEmptyField(metaRefKey)) {
throw UnsupportedError('Schema validation failure when loading dependencies', { key, inputKey, type: element.entity_type });
if (resolvedElement) {
resolvedElementsWithRelation.push({ ...resolvedElement, i_relation: v });
} else {
invalidRelations.push({ relation_id: v.id, target_id: v.toId });
}

Check warning on line 312 in opencti-platform/opencti-graphql/src/database/middleware.js

View check run for this annotation

Codecov / codecov/patch

opencti-platform/opencti-graphql/src/database/middleware.js#L311-L312

Added lines #L311 - L312 were not covered by tests
// Prevent event loop locking more than MAX_EVENT_LOOP_PROCESSING_TIME
if (new Date().getTime() - startProcessingTime > MAX_EVENT_LOOP_PROCESSING_TIME) {
startProcessingTime = new Date().getTime();
await new Promise((resolve) => {
setImmediate(resolve);
});
}

Check warning on line 319 in opencti-platform/opencti-graphql/src/database/middleware.js

View check run for this annotation

Codecov / codecov/patch

opencti-platform/opencti-graphql/src/database/middleware.js#L315-L319

Added lines #L315 - L319 were not covered by tests
}
const invalidRelations = values.filter((v) => toResolvedElements[v.toId] === undefined);
if (invalidRelations.length > 0) {
// Some targets can be unresolved in case of potential inconsistency between relation and target
// This kind of situation can happen if:
Expand All @@ -312,6 +326,11 @@
const relations = invalidRelations.map((v) => ({ relation_id: v.id, target_id: v.toId }));
logApp.warn('Targets of loadElementMetaDependencies not found', { relations });
}
const inputKey = schemaRelationsRefDefinition.convertDatabaseNameToInputName(element.entity_type, key);
const metaRefKey = schemaRelationsRefDefinition.getRelationRef(element.entity_type, inputKey);
if (isEmptyField(metaRefKey)) {
throw UnsupportedError('Schema validation failure when loading dependencies', { key, inputKey, type: element.entity_type });
}

Check warning on line 333 in opencti-platform/opencti-graphql/src/database/middleware.js

View check run for this annotation

Codecov / codecov/patch

opencti-platform/opencti-graphql/src/database/middleware.js#L332-L333

Added lines #L332 - L333 were not covered by tests
data[key] = !metaRefKey.multiple ? R.head(resolvedElementsWithRelation)?.internal_id : resolvedElementsWithRelation.map((r) => r.internal_id);
data[inputKey] = !metaRefKey.multiple ? R.head(resolvedElementsWithRelation) : resolvedElementsWithRelation;
}
Expand Down