From 6a775e29a4c96588822288188d8e34ea4c992713 Mon Sep 17 00:00:00 2001 From: enson-choy <88302346+enson-choy@users.noreply.github.com> Date: Fri, 10 Sep 2021 00:44:45 +0800 Subject: [PATCH] fix: Fix CC parsing of EPB and v1 TKHD boxes (#3610) When unboxing TKHD, the reader read int64 as trackId instead of int32. Thus unable to find matching timescale when doing TFHD unboxing. Therefore when parsing MDAT, the default timescale will be used which is 90000. All CC timestamps will then be incorrect. This also fixes "Shaka Error MEDIA.VIDEO_ERROR (3,,PIPELINE_ERROR_DECODE: Failed to parse H.264 stream)" error when playing DASH MP4 H.264 streams with CEA-608 CC embedded. It's likely that the VDA bundled in Chromium-based browsers have already included EPB detection & prevention. If we let the player to remove the byte, VDA will complain about stream conformance. Closes #3502 --- lib/cea/sei_processor.js | 41 ++++++++++++++++++------------- lib/util/mp4_box_parsers.js | 2 +- test/cea/mp4_cea_parser_unit.js | 26 ++++++++++++++++++++ test/util/mp4_box_parsers_unit.js | 28 +++++++++++++++++++++ 4 files changed, 79 insertions(+), 18 deletions(-) diff --git a/lib/cea/sei_processor.js b/lib/cea/sei_processor.js index 6fc0e8e839..320264d298 100644 --- a/lib/cea/sei_processor.js +++ b/lib/cea/sei_processor.js @@ -17,31 +17,31 @@ shaka.cea.SeiProcessor = class { * @return {!Iterable.} */ * process(naluData) { - const emuCount = this.removeEmu_(naluData); + const naluClone = this.removeEmu(naluData); // The following is an implementation of section 7.3.2.3.1 // in Rec. ITU-T H.264 (06/2019), the H.264 spec. let offset = 0; - while (offset + emuCount < naluData.length) { + while (offset < naluClone.length) { let payloadType = 0; // SEI payload type as defined by H.264 spec - while (naluData[offset] == 0xFF) { + while (naluClone[offset] == 0xFF) { payloadType += 255; offset++; } - payloadType += naluData[offset++]; + payloadType += naluClone[offset++]; let payloadSize = 0; // SEI payload size as defined by H.264 spec - while (naluData[offset] == 0xFF) { + while (naluClone[offset] == 0xFF) { payloadSize += 255; offset++; } - payloadSize += naluData[offset++]; + payloadSize += naluClone[offset++]; // Payload type 4 is user_data_registered_itu_t_t35, as per the H.264 // spec. This payload type contains caption data. if (payloadType == 0x04) { - yield naluData.subarray(offset, offset + payloadSize); + yield naluClone.subarray(offset, offset + payloadSize); } offset += payloadSize; } @@ -50,28 +50,35 @@ shaka.cea.SeiProcessor = class { /** * Removes H.264 emulation prevention bytes from the byte array. + * + * Note: Remove bytes by shifting will cause Chromium (VDA) to complain + * about conformance. Recreating a new array solves it. + * * @param {!Uint8Array} naluData NALU from which EMUs should be removed. - * @return {number} The number of removed emulation prevention bytes. - * @private + * @return {!Uint8Array} The NALU with the emulation prevention byte removed. */ - removeEmu_(naluData) { + removeEmu(naluData) { + let naluClone = naluData; let zeroCount = 0; let src = 0; - let dst = 0; - while (src < naluData.length) { - if (zeroCount == 2 && naluData[src] == 0x03) { + while (src < naluClone.length) { + if (zeroCount == 2 && naluClone[src] == 0x03) { + // 0x00, 0x00, 0x03 pattern detected zeroCount = 0; + + // Splice the array and recreate a new one, instead of shifting bytes + const newArr = [...naluClone]; + newArr.splice(src, 1); + naluClone = new Uint8Array(newArr); } else { - if (naluData[src] == 0x00) { + if (naluClone[src] == 0x00) { zeroCount++; } else { zeroCount = 0; } - naluData[dst] = naluData[src]; - dst++; } src++; } - return (src - dst); + return naluClone; } }; diff --git a/lib/util/mp4_box_parsers.js b/lib/util/mp4_box_parsers.js index 7f0b328588..6479ba01f1 100644 --- a/lib/util/mp4_box_parsers.js +++ b/lib/util/mp4_box_parsers.js @@ -176,7 +176,7 @@ shaka.util.Mp4BoxParsers = class { if (version == 1) { reader.skip(8); // Skip "creation_time" reader.skip(8); // Skip "modification_time" - trackId = reader.readUint64(); + trackId = reader.readUint32(); } else { reader.skip(4); // Skip "creation_time" reader.skip(4); // Skip "modification_time" diff --git a/test/cea/mp4_cea_parser_unit.js b/test/cea/mp4_cea_parser_unit.js index 11023cc7fc..aa7c8d45d0 100644 --- a/test/cea/mp4_cea_parser_unit.js +++ b/test/cea/mp4_cea_parser_unit.js @@ -27,6 +27,32 @@ describe('Mp4CeaParser', () => { ceaSegment = responses[1]; }); + /** + * Test only the functionality of removing EPB + * Expect removeEmu() to return the NALU with correct length + * + * Chromium VDA has a strict standard on NALU length + * It will complain about conformance if the array is malformed + * + * If EPB is removed by shifting bytes, it will return the original NALU + * length, which will fail this test + * + * Note that the CEA-608 packet in this test is incomplete + */ + it('parses CEA-608 SEI data from MP4 H.264 stream', () => { + const seiProcessor = new shaka.cea.SeiProcessor(); + + const cea608Packet = new Uint8Array([ + 0x00, 0x00, 0x03, // Emulation prevention byte + ]); + + const naluData = seiProcessor.removeEmu(cea608Packet); + expect(naluData).toBeDefined(); + + // EPB should be removed by returning new array, not by shifting bytes + expect(naluData.length).toBe(2); + }); + it('parses cea data from mp4 stream', () => { const cea708Parser = new shaka.cea.Mp4CeaParser(); diff --git a/test/util/mp4_box_parsers_unit.js b/test/util/mp4_box_parsers_unit.js index e49ce948f5..45da1b1e6d 100644 --- a/test/util/mp4_box_parsers_unit.js +++ b/test/util/mp4_box_parsers_unit.js @@ -149,4 +149,32 @@ describe('Mp4BoxParsers', () => { expect(defaultSampleDuration).toBe(expectedDefaultSampleDuration); expect(baseMediaDecodeTime).toBe(expectedBaseMediaDecodeTime); }); + + /** + * Test on parsing an incomplete TKHD V1 box, since the parser doesn't + * parse the other fields + * + * Explanation on the Uint8Array: + * [ + * , + * , + * + * ] + * + * Time is a 32B integer expressed in seconds since Jan 1, 1904, 0000 UTC + * + */ + it('parses TKHD v1 box', () => { + const tkhdBox = new Uint8Array([ + 0x00, 0x00, 0x00, 0x00, 0xDC, 0xBF, 0x0F, 0xD7, // Creation time + 0x00, 0x00, 0x00, 0x00, 0xDC, 0xBF, 0x0F, 0xD7, // Modification time + 0x00, 0x00, 0x00, 0x01, // Track ID + // Remaining fields are not processed in parseTKHD() + ]); + const reader = new shaka.util.DataViewReader( + tkhdBox, shaka.util.DataViewReader.Endianness.BIG_ENDIAN); + const parsedTkhd = shaka.util.Mp4BoxParsers + .parseTKHD(reader, /* version= */ 1); + expect(parsedTkhd.trackId).toBe(1); + }); });