diff --git a/HISTORY.md b/HISTORY.md index 63abbec38..2cf58f5aa 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -6,13 +6,17 @@ All notable changes to this project will be documented in this file. +* Fixed + * XML: properly handle `normalizedString` & `token` ([#1098] via [#1116]) * Build * Use _TypeScript_ `v5.5.3` now, was `v5.4.5` (via [#1108]) * Use _webpack_ `v5.92.1` now, was `v5.91.0` (via [#1091], [#1094]) [#1091]: https://github.com/CycloneDX/cyclonedx-javascript-library/pull/1091 [#1094]: https://github.com/CycloneDX/cyclonedx-javascript-library/pull/1094 +[#1098]: https://github.com/CycloneDX/cyclonedx-javascript-library/issues/1098 [#1108]: https://github.com/CycloneDX/cyclonedx-javascript-library/pull/1108 +[#1116]: https://github.com/CycloneDX/cyclonedx-javascript-library/pull/1116 ## 6.10.0 -- 2024-06-06 diff --git a/src/serialize/xml/_xsd.ts b/src/serialize/xml/_xsd.ts new file mode 100644 index 000000000..2025340ef --- /dev/null +++ b/src/serialize/xml/_xsd.ts @@ -0,0 +1,77 @@ +/*! +This file is part of CycloneDX JavaScript Library. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +SPDX-License-Identifier: Apache-2.0 +Copyright (c) OWASP Foundation. All Rights Reserved. +*/ + +// region normalizedString + +/** search-item for {@link normalizedString} */ +const _normalizeStringForbiddenSearch = /\r\n|\t|\n|\r/g +/** replace-item for {@link normalizedString} */ +const _normalizeStringForbiddenReplace = ' ' + +/** + * Make a 'normalizedString', adhering XML spec. + * + * @see {@link http://www.w3.org/TR/xmlschema-2/#normalizedString} + * + * @remarks + * + * quote from the XML schema spec: + * + * *normalizedString* represents white space normalized strings. + * The [·value space·](https://www.w3.org/TR/xmlschema-2/#dt-value-space) of normalizedString is the set of strings that do not contain the carriage return (#xD), line feed (#xA) nor tab (#x9) characters. + * The [·lexical space·](https://www.w3.org/TR/xmlschema-2/#dt-lexical-space) of normalizedString is the set of strings that do not contain the carriage return (#xD), line feed (#xA) nor tab (#x9) characters. + * The [·base type·](https://www.w3.org/TR/xmlschema-2/#dt-basetype) of normalizedString is [string](https://www.w3.org/TR/xmlschema-2/#string). + * + * @internal + */ +export function normalizedString(s: string): string { + return s.replace(_normalizeStringForbiddenSearch, _normalizeStringForbiddenReplace) +} + +// endregion + +// region token + +/** search-item for {@link token} */ +const _tokenMultispaceSearch = / {2,}/g +/** replace-item for {@link token} */ +const _tokenMultispaceReplace = ' ' + +/** + * Make a 'token', adhering XML spec. + * + * @see {@link http://www.w3.org/TR/xmlschema-2/#token} + * + * @remarks + * + * quote from the XML schema spec: + * + * *token* represents tokenized strings. + * The [·value space·](https://www.w3.org/TR/xmlschema-2/#dt-value-space) of token is the set of strings that do not contain the carriage return (#xD), line feed (#xA) nor tab (#x9) characters, that have no leading or trailing spaces (#x20) and that have no internal sequences of two or more spaces. + * The [·lexical space·](https://www.w3.org/TR/xmlschema-2/#dt-lexical-space) of token is the set of strings that do not contain the carriage return (#xD), line feed (#xA) nor tab (#x9) characters, that have no leading or trailing spaces (#x20) and that have no internal sequences of two or more spaces. + * The [·base type·](https://www.w3.org/TR/xmlschema-2/#dt-basetype) of token is [normalizedString](https://www.w3.org/TR/xmlschema-2/#normalizedString). + * + * @internal + */ +export function token(s: string): string { + // according to spec, `token` inherits from `normalizedString` - so we utilize it here. + return normalizedString(s).trim().replace(_tokenMultispaceSearch, _tokenMultispaceReplace) +} + +// endregion diff --git a/src/serialize/xml/normalize.ts b/src/serialize/xml/normalize.ts index 33c3d0895..10ef64635 100644 --- a/src/serialize/xml/normalize.ts +++ b/src/serialize/xml/normalize.ts @@ -30,6 +30,7 @@ import { isSupportedSpdxId } from '../../spdx' import type { _SpecProtocol as Spec } from '../../spec/_protocol' import { Version as SpecVersion } from '../../spec/enums' import type { NormalizerOptions } from '../types' +import { normalizedString, token} from './_xsd' import type { SimpleXml } from './types' import { XmlSchema } from './types' @@ -295,7 +296,7 @@ export class LifecycleNormalizer extends BaseXmlNormalizer { type: 'element', name: elementName, children: [ - makeTextElement(data.name, 'name'), + makeTextElement(data.name, 'name', normalizedString), makeOptionalTextElement(data.description, 'description') ].filter(isNotUndefined) } @@ -338,9 +339,9 @@ export class ToolNormalizer extends BaseXmlNormalizer { type: 'element', name: elementName, children: [ - makeOptionalTextElement(data.vendor, 'vendor'), - makeOptionalTextElement(data.name, 'name'), - makeOptionalTextElement(data.version, 'version'), + makeOptionalTextElement(data.vendor, 'vendor', normalizedString), + makeOptionalTextElement(data.name, 'name', normalizedString), + makeOptionalTextElement(data.version, 'version', normalizedString), hashes, externalReferences ].filter(isNotUndefined) @@ -364,7 +365,7 @@ export class HashNormalizer extends BaseXmlNormalizer { type: 'element', name: elementName, attributes: { alg: algorithm }, - children: content + children: token(content) } : undefined } @@ -386,9 +387,9 @@ export class OrganizationalContactNormalizer extends BaseXmlNormalizer escapeUri(s.toString()) ), options, 'url' @@ -442,7 +443,8 @@ export class ComponentNormalizer extends BaseXmlNormalizer { : makeOptionalTextElement )( data.version ?? '', - 'version' + 'version', + normalizedString ) const hashes: SimpleXml.Element | undefined = data.hashes.size > 0 ? { @@ -494,16 +496,16 @@ export class ComponentNormalizer extends BaseXmlNormalizer { }, children: [ supplier, - makeOptionalTextElement(data.author, 'author'), - makeOptionalTextElement(data.publisher, 'publisher'), - makeOptionalTextElement(data.group, 'group'), - makeTextElement(data.name, 'name'), + makeOptionalTextElement(data.author, 'author', normalizedString), + makeOptionalTextElement(data.publisher, 'publisher', normalizedString), + makeOptionalTextElement(data.group, 'group', normalizedString), + makeTextElement(data.name, 'name', normalizedString), version, - makeOptionalTextElement(data.description, 'description'), + makeOptionalTextElement(data.description, 'description', normalizedString), makeOptionalTextElement(data.scope, 'scope'), hashes, licenses, - makeOptionalTextElement(data.copyright, 'copyright'), + makeOptionalTextElement(data.copyright, 'copyright', normalizedString), makeOptionalTextElement(data.cpe, 'cpe'), makeOptionalTextElement(data.purl, 'purl'), swid, @@ -587,7 +589,7 @@ export class LicenseNormalizer extends BaseXmlNormalizer { : undefined }, children: [ - makeTextElement(data.name, 'name'), + makeTextElement(data.name, 'name', normalizedString), data.text === undefined ? undefined : this._factory.makeForAttachment().normalize(data.text, options, 'text'), @@ -621,7 +623,7 @@ export class LicenseNormalizer extends BaseXmlNormalizer { } #normalizeLicenseExpression (data: Models.LicenseExpression): SimpleXml.Element { - const elem = makeTextElement(data.expression, 'expression') + const elem = makeTextElement(data.expression, 'expression', normalizedString) elem.attributes = { acknowledgement: this._factory.spec.supportsLicenseAcknowledgement ? data.acknowledgement @@ -722,7 +724,9 @@ export class AttachmentNormalizer extends BaseXmlNormalizer { type: 'element', name: elementName, attributes: { - 'content-type': data.contentType || undefined, + 'content-type': data.contentType + ? normalizedString(data.contentType) + : undefined, encoding: data.encoding || undefined }, children: data.content.toString() @@ -738,7 +742,7 @@ export class PropertyNormalizer extends BaseXmlNormalizer { attributes: { name: data.name }, - children: data.value + children: normalizedString(data.value) } } @@ -875,7 +879,7 @@ export class VulnerabilityNormalizer extends BaseXmlNormalizer string +const noTEM: TextElementModifier = (s) => s + +function makeOptionalTextElement (data: null | undefined | Stringable, elementName: string, mod: TextElementModifier = noTEM): undefined | StrictTextElement { + const s = mod(data?.toString() ?? '') return s.length > 0 ? makeTextElement(s, elementName) : undefined } -function makeTextElement (data: Stringable, elementName: string): StrictTextElement { +function makeTextElement (data: Stringable, elementName: string, mod: TextElementModifier = noTEM): StrictTextElement { return { type: 'element', name: elementName, - children: data.toString() + children: mod(data.toString()) } } -function makeTextElementIter (data: Iterable, options: NormalizerOptions, elementName: string): StrictTextElement[] { - const r: StrictTextElement[] = Array.from(data, d => makeTextElement(d, elementName)) +function makeTextElementIter (data: Iterable, options: NormalizerOptions, elementName: string, mod: TextElementModifier = noTEM): StrictTextElement[] { + const r: StrictTextElement[] = Array.from(data, d => makeTextElement(d, elementName, mod)) if (options.sortLists ?? false) { r.sort(({ children: a }, { children: b }) => a.localeCompare(b)) } return r } -function makeOptionalDateTimeElement (data: null | undefined | Date, elementName: string): undefined | StrictTextElement { +function makeOptionalDateTimeElement (data: null | undefined | Date, elementName: string, mod: TextElementModifier = noTEM): undefined | StrictTextElement { const d = data?.toISOString() return d === undefined ? undefined - : makeTextElement(d, elementName) + : makeTextElement(d, elementName, mod) } diff --git a/tests/unit/Serialize.XML._xsd.spec.js b/tests/unit/Serialize.XML._xsd.spec.js new file mode 100644 index 000000000..66fe2f83f --- /dev/null +++ b/tests/unit/Serialize.XML._xsd.spec.js @@ -0,0 +1,68 @@ +/*! +This file is part of CycloneDX JavaScript Library. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +SPDX-License-Identifier: Apache-2.0 +Copyright (c) OWASP Foundation. All Rights Reserved. +*/ + +const assert = require('assert') +const { suite, test } = require('mocha') + +const { + normalizedString, + token +} = require('../../dist.node/serialize/xml/_xsd.js') + +suite('Serialize.XML._xsd', () => { + const normalizedStringCases = { + '': '', + '123': '123', + ' 0 1\r\n2\t3\n4\t': ' 0 1 2 3 4 ', + ' 0 1\r\n 2 \t3 \n 4 \t': ' 0 1 2 3 4 ', + } + + const tokenCases = { + '': '', + '123': '123', + ' 0 1 \r\n2\t 3 \n4\n ': '0 1 2 3 4', + ' 0 1\r\n 2 \t3 \n 4 \t ': '0 1 2 3 4', + } + + /** + * @param {string} s + * @return {string} + */ + function escapeTNR(s) { + return s + .replace(/\t/g, '\\t') + .replace(/\n/g, '\\n') + .replace(/\r/g, '\\r') + } + + suite('normalizedString()', () => { + for (const [input, expected] of Object.entries(normalizedStringCases)) { + test(`i: "${escapeTNR(input)}"`, () => { + assert.strictEqual(normalizedString(input), expected) + }) + } + }) + suite('token()', () => { + for (const [input, expected] of Object.entries(tokenCases)) { + test(`i: "${escapeTNR(input)}"`, () => { + assert.strictEqual(token(input), expected) + }) + } + }) +})