diff --git a/.changeset/warm-geese-dance.md b/.changeset/warm-geese-dance.md new file mode 100644 index 00000000000..5deb7a3e6eb --- /dev/null +++ b/.changeset/warm-geese-dance.md @@ -0,0 +1,5 @@ +--- +'openzeppelin-solidity': patch +--- + +`Base64`: Fix issue where dirty memory located just after the input buffer is affecting the result. diff --git a/contracts/mocks/Base64Dirty.sol b/contracts/mocks/Base64Dirty.sol new file mode 100644 index 00000000000..238bd26a369 --- /dev/null +++ b/contracts/mocks/Base64Dirty.sol @@ -0,0 +1,19 @@ +// SPDX-License-Identifier: MIT + +pragma solidity ^0.8.20; + +import {Base64} from "../utils/Base64.sol"; + +contract Base64Dirty { + struct A { + uint256 value; + } + + function encode(bytes memory input) public pure returns (string memory) { + A memory unused = A({value: type(uint256).max}); + // To silence warning + unused; + + return Base64.encode(input); + } +} diff --git a/contracts/utils/Base64.sol b/contracts/utils/Base64.sol index 3f2d7c134b3..b7c74866721 100644 --- a/contracts/utils/Base64.sol +++ b/contracts/utils/Base64.sol @@ -58,12 +58,19 @@ library Base64 { let tablePtr := add(table, 1) // Prepare result pointer, jump over length - let resultPtr := add(result, 32) + let resultPtr := add(result, 0x20) + let dataPtr := data + let endPtr := add(data, mload(data)) + + // In some cases, the last iteration will read bytes after the end of the data. We cache the value, and + // set it to zero to make sure no dirty bytes are read in that section. + let afterPtr := add(endPtr, 0x20) + let afterCache := mload(afterPtr) + mstore(afterPtr, 0x00) // Run over the input, 3 bytes at a time for { - let dataPtr := data - let endPtr := add(data, mload(data)) + } lt(dataPtr, endPtr) { } { @@ -71,13 +78,12 @@ library Base64 { dataPtr := add(dataPtr, 3) let input := mload(dataPtr) - // To write each character, shift the 3 bytes (18 bits) chunk + // To write each character, shift the 3 byte (24 bits) chunk // 4 times in blocks of 6 bits for each character (18, 12, 6, 0) - // and apply logical AND with 0x3F which is the number of - // the previous character in the ASCII table prior to the Base64 Table - // The result is then added to the table to get the character to write, - // and finally write it in the result pointer but with a left shift - // of 256 (1 byte) - 8 (1 ASCII char) = 248 bits + // and apply logical AND with 0x3F to bitmask the least significant 6 bits. + // Use this as an index into the lookup table, mload an entire word + // so the desired character is in the least significant byte, and + // mstore8 this least significant byte into the result and continue. mstore8(resultPtr, mload(add(tablePtr, and(shr(18, input), 0x3F)))) resultPtr := add(resultPtr, 1) // Advance @@ -92,6 +98,9 @@ library Base64 { resultPtr := add(resultPtr, 1) // Advance } + // Reset the value that was cached + mstore(afterPtr, afterCache) + if withPadding { // When data `bytes` is not exactly 3 bytes long // it is padded with `=` characters at the end diff --git a/test/utils/Base64.test.js b/test/utils/Base64.test.js index 3f3e9fc8beb..5c427466671 100644 --- a/test/utils/Base64.test.js +++ b/test/utils/Base64.test.js @@ -47,4 +47,13 @@ describe('Strings', function () { expect(await this.mock.$encodeURL(buffer)).to.equal(expected); }); }); + + it('Encode reads beyond the input buffer into dirty memory', async function () { + const mock = await ethers.deployContract('Base64Dirty'); + const buffer32 = ethers.id('example'); + const buffer31 = buffer32.slice(0, -2); + + expect(await mock.encode(buffer31)).to.equal(ethers.encodeBase64(buffer31)); + expect(await mock.encode(buffer32)).to.equal(ethers.encodeBase64(buffer32)); + }); });