Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
605 changes: 445 additions & 160 deletions package-lock.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
"jasmine-core": "^5.7.1",
"jest": "^29",
"jest-environment-jsdom": "^29",
"jsdom": "^26.1.0",
"karma": "^6.4.4",
"karma-chrome-launcher": "^3.2.0",
"karma-coverage": "^2.2.1",
Expand Down
6 changes: 5 additions & 1 deletion packages/studio-web/src/app/b64.service.ts
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,11 @@ export class B64Service {
}

xmlToB64(xml: Document) {
return this.utf8_to_b64(new XMLSerializer().serializeToString(xml));
return this.utf8_to_b64(
new XMLSerializer()
.serializeToString(xml)
.replace("?><read", "?>\n<read"),
);
}

blobToB64(blob: any) {
Expand Down
87 changes: 52 additions & 35 deletions packages/studio-web/src/app/editor/editor.component.ts
Original file line number Diff line number Diff line change
Expand Up @@ -287,37 +287,63 @@ export class EditorComponent implements OnDestroy, OnInit, AfterViewInit {
const parser = new DOMParser();
const readalong = parser.parseFromString(text, "text/html");
const element = readalong.querySelector("read-along");

if (element === undefined || element === null) {
return undefined;
}

// Store the element as parsed XML
// Create body element, which mysteriously gets removed from the text element.
let textNode = element.querySelector("text");
if (textNode && !textNode.querySelector("body")) {
const body = document.createElement("body");
body.id = "t0b0";
while (textNode.hasChildNodes()) {
// @ts-ignore
body.appendChild(textNode.firstChild);
// What is the appropriate source for the XML read along document? Either it was
// encoded in the element's href attribute, or included as a child element of the
// <read-along /> element.
//
// Prioritize the href implementation since it is more common.
const href = element.getAttribute("href");
if (href) {
const reply = await fetch(href);
if (reply.ok) {
// FIXME: potential zip-bombing?
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's the potential zip-bombing issue here?

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure, this was a comment left by Aidan which I've kept.

The previous implementation was recursive, that might have been the issue.

let xmlString = await reply.text();
if (!xmlString.startsWith("<?xml")) {
xmlString = `<?xml version='1.0' encoding='utf-8'?>\n` + xmlString;
}

this.editorService.rasControl$.setValue(
parser.parseFromString(xmlString, "application/xml"),
);
}
} else {
// Store the element as parsed XML
// Create body element, which gets removed from the text element. This occurs
// because the document was parsed with text/html mimetype which only allows
// a single <body /> element as a child of <html />.
let textNode = element.querySelector("text");
if (textNode && !textNode.querySelector("body")) {
const body = document.createElement("body");
body.id = "t0b0";
while (textNode.hasChildNodes()) {
// @ts-ignore
body.appendChild(textNode.firstChild);
}
textNode.appendChild(body);
}

// Similar issue, the document was parsed with a text/html mimetype, attributes
// in HTML are always lowercased.
const serializer = new XMLSerializer();
let xmlString = serializer
.serializeToString(element)
.replace(/arpabet=/g, "ARPABET=") // Our DTD says ARPABET is upper case
.replace(/xmlns="[\w\/\:\.]*"/g, ""); // Our DTD does not accept xmlns that the parser inserts
if (!xmlString.startsWith("<?xml")) {
xmlString = `<?xml version='1.0' encoding='utf-8'?>\n` + xmlString;
}
textNode.appendChild(body);

this.editorService.rasControl$.setValue(
parser.parseFromString(xmlString, "application/xml"),
); // re-parse as XML
}
const serializer = new XMLSerializer();
const xmlString = serializer
.serializeToString(element)
.replace(/arpabet=/g, "ARPABET=") // Our DTD says ARPABET is upper case
.replace(/xmlns="[\w\/\:\.]*"/g, ""); // Our DTD does not accept xmlns that the parser inserts
//console.log(xmlString);
this.editorService.rasControl$.setValue(
parser.parseFromString(xmlString, "text/xml"),
); // re-parse as XML
//console.log(this.editorService.rasControl$.value);

// Oh, there's an audio file, okay, try to load it
const audio = element.getAttribute("audio");

if (audio !== null) {
const reply = await fetch(audio);
// Did that work? Great!
Expand All @@ -328,20 +354,11 @@ export class EditorComponent implements OnDestroy, OnInit, AfterViewInit {
);
}
}
// Is read-along linked (including data URI) or embedded?
const href = element.getAttribute("href");
if (href === null) {
if (this.editorService.rasControl$.value) {
this.createSegments(this.editorService.rasControl$.value);
}
} else {
const reply = await fetch(href);
if (reply.ok) {
const text2 = await reply.text();
// FIXME: potential zip-bombing?
this.parseReadalong(text2);
}

if (this.editorService.rasControl$.value) {
this.createSegments(this.editorService.rasControl$.value);
}

return readalong.querySelector("body")?.innerHTML;
}

Expand Down
12 changes: 10 additions & 2 deletions packages/studio-web/src/app/ras.service.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { catchError, Observable, of, take } from "rxjs";
import { map, Observable } from "rxjs";

import { HttpClient } from "@angular/common/http";
import { Injectable } from "@angular/core";
Expand Down Expand Up @@ -72,7 +72,15 @@ export class RasService {
}

assembleReadalong$(body: ReadAlongRequest): Observable<ReadAlong> {
return this.http.post<ReadAlong>(this.baseURL + "/assemble", body);
return this.http.post<ReadAlong>(this.baseURL + "/assemble", body).pipe(
map((ras: ReadAlong) => {
if (!ras.processed_ras.startsWith("<?xml")) {
ras.processed_ras =
`<?xml version='1.0' encoding='utf-8'?>\n` + ras.processed_ras;
}
return ras;
}),
);
}
getLangs$(): Observable<Array<SupportedLanguage>> {
return this.http.get<Array<SupportedLanguage>>(this.baseURL + "/langs");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -344,9 +344,9 @@ Please host all assets on your server, include the font and package imports defi
// - add .readalong file
await this.updateTranslations(rasXML, readalong);

const xmlString = this.xmlSerializer.serializeToString(
rasXML.documentElement,
);
const xmlString = this.xmlSerializer
.serializeToString(rasXML)
.replace("?><read", "?>\n<read");
const rasFile = new Blob([xmlString], { type: "application/xml" });
assetsFolder?.file(`${basename}.readalong`, rasFile);
// - add index.html file
Expand Down Expand Up @@ -448,7 +448,7 @@ Use the text editor to paste the snippet below in your WordPress page:
.convertRasFormat$(
{
dur: audio.duration,
ras: new XMLSerializer().serializeToString(rasXML.documentElement),
ras: new XMLSerializer().serializeToString(rasXML),
},
selectedOutputFormat,
)
Expand Down
12 changes: 10 additions & 2 deletions packages/studio-web/tests/editor/download-web-bundle.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,11 @@ test("should Download web bundle (zip file format) from the Editor", async ({
const zipPath = await download1.path();
const zipBin = await fs.readFileSync(zipPath);
const zip = await JSZip.loadAsync(zipBin);
verifyWebBundle(zip);
await verifyWebBundle(zip);
});

// verify web-bundle contents used by tests in editor.
function verifyWebBundle(zip: JSZip) {
async function verifyWebBundle(zip: JSZip) {
expect(
zip.folder(/Offline-HTML/),
"should have Offline-HTML folder",
Expand Down Expand Up @@ -76,4 +76,12 @@ function verifyWebBundle(zip: JSZip) {
1,
);
expect(zip.file(/www\/index.html/), "should have index file").toHaveLength(1);

const xmlString = await zip
.file(/www\/assets\/sentence\-paragr\-[0-9]*\.readalong/)[0]
.async("text");
await expect(
xmlString,
"download file should contain XML declaration",
).toMatch(/^<\?xml/);
}
16 changes: 16 additions & 0 deletions packages/studio-web/tests/studio-web/download-html.spec.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import { test, expect } from "@playwright/test";
import { testMakeAReadAlong, defaultBeforeEach } from "../test-commands";
import { text } from "node:stream/consumers";
import { JSDOM } from "jsdom";

test("should Download default (single file format)", async ({
page,
Expand All @@ -17,4 +19,18 @@ test("should Download default (single file format)", async ({
download.suggestedFilename(),
"should have the expected filename",
).toMatch(/sentence\-paragr\-[0-9]*\.html/);

//
const reader = await download.createReadStream();
const contentText = await text(reader);
const dom = new JSDOM(contentText);
const xmlString = dom.window.document
.getElementsByTagName("read-along")[0]
.getAttribute("href") as string;

const resp = await fetch(xmlString);
await expect(
await resp.text(),
"download file should contain XML declaration",
).toMatch(/^<\?xml/);
});
Original file line number Diff line number Diff line change
Expand Up @@ -68,4 +68,12 @@ test("should Download web bundle (zip file format)", async ({
zip.file(/www\/index.html/),
"should have index file",
).toHaveLength(1); //www/index.html exists

const xmlString = await zip
.file(/www\/assets\/sentence\-paragr\-[0-9]*\.readalong/)[0]
.async("text");
await expect(
xmlString,
"download file should contain XML declaration",
).toMatch(/^<\?xml/);
});
Loading