Skip to content

Commit

Permalink
parseMail: fix support for UTF-8 strings in input, accept Uint8Arra…
Browse files Browse the repository at this point in the history
…y for other encodings

The input data was treated as a 'binary string' internally, meaning that if the data was a native
string that included non-ASCII chars, these might fail to be decoded.

This commit changes the `parseMail` interface so that:
- if a string input is passed, it is treated as UTF-8 (not a 'binary string')
- Uint8Array inputs are now accepted, and should be used for other encodings
  • Loading branch information
larabr committed Jul 3, 2023
1 parent 2d9638e commit a904720
Show file tree
Hide file tree
Showing 5 changed files with 46 additions and 15 deletions.
2 changes: 1 addition & 1 deletion index.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,4 @@ export interface ParsedMessage {
'reply-to'?: Address
}

export function parseMail(message: string): ParsedMessage;
export function parseMail(message: string | Uint8Array): ParsedMessage;
2 changes: 1 addition & 1 deletion index.js
Original file line number Diff line number Diff line change
@@ -1 +1 @@
export { parseMail } from './lib/mailParser';
export { parseMail } from './lib/mailParser';
10 changes: 6 additions & 4 deletions lib/mailParser.js
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,7 @@ export const MimeParser = {
* The input is a string that is immediately parsed, calling all functions on
* the emitter before this function returns.
*
* @param input A string or input stream of text to parse.
* @param {BinaryString} input A string or input stream of text to parse.
* @param emitter The emitter to receive callbacks on.
* @param opts A set of options for the parser.
*/
Expand Down Expand Up @@ -401,7 +401,7 @@ export const MimeParser = {
*
* The input is any type of input that would be accepted by parseSync.
*
* @param input A string of text to parse.
* @param {BinaryString} input A string of text to parse.
*/
extractMimeMsg(input, options = {}) {
var emitter = Object.create(ExtractMimeMsgEmitter);
Expand Down Expand Up @@ -548,11 +548,13 @@ export const MimeParser = {

/**
* Parse MIME message
* @param {String} data - MIME message to parse
* @param {String|Uint8Array} data - MIME message to parse
* @returns {Object} parsed content (see TS definitions for more details)
*/
export function parseMail(data) {
const { headers, allAttachments, bodyParts } = MimeParser.extractMimeMsg(data);
const encoded = (typeof data === 'string') ? new TextEncoder().encode(data) : data;

const { headers, allAttachments, bodyParts } = MimeParser.extractMimeMsg(uint8ArrayToString(encoded));
// these fields can only contain a single value
const singleKeys = new Set([
'message-id',
Expand Down
36 changes: 27 additions & 9 deletions test/test_mail_parser.ts
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
import { expect } from "chai";
import { read_file } from "./utils";
import { read_file_raw } from "./utils";
import { parseMail } from "../";
import { uint8ArrayToString } from "../lib/utils";

const toBase64 = uInt8Array => btoa(uint8ArrayToString(uInt8Array));

describe('mail parser', () => {
it('correctly parses multipart message with both HTML and plain text data', async () => {
const eml = await read_file("multipart-complex1");
const eml = await read_file_raw("multipart-complex1");
const { body, attachments } = parseMail(eml);

expect(body.html).to.equal('<html><head>This part should be returned.</head></html>\n');
Expand All @@ -22,11 +22,29 @@ describe('mail parser', () => {
expect(attachments[1].fileName).to.equal('');
});

it('correctly parses UTF-8 string input', async () => {
const eml = `Content-Type: multipart/mixed;
boundary="------------cJMvmFk1NneB7MT4jwYHY7ap"
This is a multi-part message in MIME format.
--------------cJMvmFk1NneB7MT4jwYHY7ap
Content-Type: text/plain; charset=UTF-8;
Content-Transfer-Encoding: 8bit
Import HTML cöntäct//Subjεέςτ//
--------------cJMvmFk1NneB7MT4jwYHY7ap--`;
const { body } = parseMail(eml);

expect(body.text).to.equal('Import HTML cöntäct//Subjεέςτ//\n');
});

it('correctly parses SHIFT-JIS body with png attachment', async () => {
const expectedText = 'Portable Network Graphics(ポータブル・ネットワーク・グラフィックス、PNG)はコンピュータでビットマップ画像を扱うファイルフォーマットである。圧縮アルゴリズムとしてDeflateを採用している、圧縮による画質の劣化のない可逆圧縮の画像ファイルフォーマットである。\n';
const expectedAttachmentContent = 'iVBORw0KGgoAAAANSUhEUgAAAIAAAABECAIAAADGJao+AAAAwklEQVR4Xu3UgQbDMBRA0bc03f//b7N0VuqJEmwoc+KqNEkDh9b+2HuJu1KNO4f+AQCAAAAQAAACAEAAAAgAAAEAIAAABACAAAAQAAACAEAAAAgAAAEAIAAAANReamRLlPWYfNH0klxcPs+cP3NxWF+vi3lb7pa2R+vx6tHOtuN1O+a5lY3HzgM5ya/GM5N7ZjfPq7/5yS8IgAAAEAAAAgBAAAAIAAABACAAAAQAgAAAEAAAAgBAAAAIAAABACAAAIw322gDIPvtlmUAAAAASUVORK5CYII=';

const eml = await read_file("shift-jis-image");
const eml = await read_file_raw("shift-jis-image");
console.log(eml)
const { body, subject, headers, attachments: [attachment] } = parseMail(eml);

expect(body.text).to.equal(expectedText);
Expand All @@ -39,7 +57,7 @@ describe('mail parser', () => {
});

it('correctly reads binary attachments', async () => {
const eml = await read_file("multipart-binary");
const eml = await read_file_raw("multipart-binary");
const { attachments: [attachment] } = parseMail(eml);

expect(attachment.content).to.deep.equal(new Uint8Array([1, 2, 3]));
Expand All @@ -48,7 +66,7 @@ describe('mail parser', () => {
});

it('includes the content-id and filename for each attachment', async () => {
const eml = await read_file("multipart-content-id");
const eml = await read_file_raw("multipart-content-id");
const { attachments: [attachment1, attachment2] } = parseMail(eml);

expect(attachment1.content).to.deep.equal(attachment2.content);
Expand All @@ -60,7 +78,7 @@ describe('mail parser', () => {
});

it('returns an empty array for empty attachment body', async () => {
const eml = await read_file("multipart-empty-attachment");
const eml = await read_file_raw("multipart-empty-attachment");
const { attachments: [attachment] } = parseMail(eml);

expect(attachment.content).to.be.instanceOf(Uint8Array);
Expand All @@ -70,15 +88,15 @@ describe('mail parser', () => {
});

it('decodes the subject', async () => {
const eml = await read_file("multipart-encrypted-subject-utf8");
const eml = await read_file_raw("multipart-encrypted-subject-utf8");
const { subject, body } = parseMail(eml);

expect(subject).to.equal('subject with emojis 😃😇');
expect(body.text).to.equal('test utf8 in encrypted subject\n');
});

it('parses addresses and date', async () => {
const eml = await read_file("multipart-addresses");
const eml = await read_file_raw("multipart-addresses");
const { from, to, cc, bcc, date } = parseMail(eml);

expect(from).to.deep.equal({ name: 'Some One', email: 'someone@test.com' });
Expand All @@ -89,7 +107,7 @@ describe('mail parser', () => {
});

it('parses address groups', async () => {
const eml = await read_file("multipart-addresses-groups");
const eml = await read_file_raw("multipart-addresses-groups");
const { from, to, cc, bcc } = parseMail(eml);

expect(from).to.deep.equal({ name: 'Some One', email: 'someone@test.com' });
Expand Down
11 changes: 11 additions & 0 deletions test/utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,17 @@ export function read_file(file, start = undefined, end = undefined) {
});
}

export function read_file_raw(file) {
return new Promise(function(resolve, reject) {
fetch('base/test/data/' + file)
.then(response => response.ok ? response.arrayBuffer() : reject(new Error('error fetching file')))
.then(buffer => {
resolve(new Uint8Array(buffer))
})
.catch(err => reject(err))
});
}

export function isWebKit() {
// simply testing the 'webkit' appears in the userAgent is not enough, as that is the case for
// any browser running on macOS.
Expand Down

0 comments on commit a904720

Please sign in to comment.