Skip to content

Commit

Permalink
fix(file uploads): stricter filename sanitization
Browse files Browse the repository at this point in the history
  • Loading branch information
sr258 committed Jun 11, 2020
1 parent ce46d25 commit 6eefa26
Show file tree
Hide file tree
Showing 8 changed files with 170 additions and 25 deletions.
13 changes: 12 additions & 1 deletion src/implementation/db/MongoS3ContentStorage.ts
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,13 @@ export default class MongoS3ContentStorage implements IContentStorage {
contentId: ContentId,
user: IUser
) => Promise<Permission[]>;
/**
* These characters will be removed from files that are saved to S3.
* There is a very strict default list that basically only leaves
* alphanumeric filenames intact. Should you need more relaxed
* settings you can specify them here.
*/
invalidCharactersRegexp?: RegExp;
/**
* Indicates how long keys in S3 can be. Defaults to 1024. (S3
* supports 1024 characters, other systems such as Minio might only
Expand Down Expand Up @@ -682,6 +689,10 @@ export default class MongoS3ContentStorage implements IContentStorage {
* @returns the clean filename
*/
public sanitizeFilename(filename: string): string {
return sanitizeFilename(filename, this.maxKeyLength);
return sanitizeFilename(
filename,
this.maxKeyLength,
this.options?.invalidCharactersRegexp
);
}
}
13 changes: 12 additions & 1 deletion src/implementation/db/S3TemporaryFileStorage.ts
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,13 @@ export default class S3TemporaryFileStorage implements ITemporaryFileStorage {
userId: string,
filename?: string
) => Promise<Permission[]>;
/**
* These characters will be removed from files that are saved to S3.
* There is a very strict default list that basically only leaves
* alphanumeric filenames intact. Should you need more relaxed
* settings you can specify them here.
*/
invalidCharactersRegexp?: RegExp;
/**
* Indicates how long keys in S3 can be. Defaults to 1024. (S3
* supports 1024 characters, other systems such as Minio might only
Expand Down Expand Up @@ -254,7 +261,11 @@ export default class S3TemporaryFileStorage implements ITemporaryFileStorage {
* @returns the clean filename
*/
public sanitizeFilename(filename: string): string {
return sanitizeFilename(filename, this.maxKeyLength);
return sanitizeFilename(
filename,
this.maxKeyLength,
this.options?.invalidCharactersRegexp
);
}

/**
Expand Down
23 changes: 19 additions & 4 deletions src/implementation/db/S3Utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,10 @@ const log = new Logger('S3Utils');
* @param filename the filename to check
* @returns no return value; throws errors if the filename is not valid
*/
export function validateFilename(filename: string): void {
export function validateFilename(
filename: string,
invalidCharactersRegExp?: RegExp
): void {
if (/\.\.\//.test(filename)) {
log.error(
`Relative paths in filenames are not allowed: ${filename} is illegal`
Expand All @@ -29,19 +32,31 @@ export function validateFilename(filename: string): void {
// expect for ranges of non-printable ASCII characters:
// &$@=;:+ ,?\\{^}%`]'">[~<#

if (/[&\$@=;:\+\s,\?\\\{\^\}%`\]'">\[~<#|]/.test(filename)) {
if (/[^A-Za-z0-9\-._!()\/]/.test(filename)) {
log.error(`Found illegal character in filename: ${filename}`);
throw new H5pError('illegal-filename', { filename }, 400);
}
}

/**
* Sanitizes a filename or path by shortening it to the specified maximum length
* and removing the invalid characters in the RegExp. If you don't specify a
* RegExp a very strict invalid character list will be used that only leaves
* alphanumeric filenames untouched.
* @param filename the filename or path (with UNIX slash separator) to sanitize
* @param maxFileLength the filename will be shortened to this length
* @param invalidCharactersRegExp these characters will be removed from the
* filename
* @returns the cleaned filename
*/
export function sanitizeFilename(
filename: string,
maxFileLength: number
maxFileLength: number,
invalidCharactersRegExp?: RegExp
): string {
return generalizedSanitizeFilename(
filename,
/[&\$@=;:\+\s,\?\\\{\^\}%`\]'">\[~<#|]/g,
invalidCharactersRegExp ?? /[^A-Za-z0-9\-._!()\/]/g,
maxFileLength
);
}
25 changes: 20 additions & 5 deletions src/implementation/fs/DirectoryTemporaryFileStorage.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,24 @@ export default class DirectoryTemporaryFileStorage
/**
* @param directory the directory in which the temporary files are stored.
* Must be read- and write accessible
* @param maxPathLength how long paths can be in the filesystem (Differs
* between Windows, Linux and MacOS, so check out the limitation of your
* system!)
*/
constructor(
private directory: string,
options?: { maxPathLength?: number }
protected options?: {
/**
* These characters will be removed from files that are saved to S3.
* There is a very strict default list that basically only leaves
* alphanumeric filenames intact. Should you need more relaxed
* settings you can specify them here.
*/
invalidCharactersRegexp?: RegExp;
/*
* How long paths can be in the filesystem (Differs between Windows,
* Linux and MacOS, so check out the limitation of your
* system!)
*/
maxPathLength?: number;
}
) {
fsExtra.ensureDirSync(directory);
this.maxFileLength =
Expand Down Expand Up @@ -118,7 +129,11 @@ export default class DirectoryTemporaryFileStorage
* @returns the clean filename
*/
public sanitizeFilename(filename: string): string {
return sanitizeFilename(filename, this.maxFileLength);
return sanitizeFilename(
filename,
this.maxFileLength,
this.options?.invalidCharactersRegexp
);
}

public async saveFile(
Expand Down
25 changes: 20 additions & 5 deletions src/implementation/fs/FileContentStorage.ts
Original file line number Diff line number Diff line change
Expand Up @@ -54,13 +54,24 @@ export default class FileContentStorage implements IContentStorage {

/**
* @param contentPath The absolute path to the directory where the content should be stored
* @param maxPathLength how long paths can be in the filesystem (Differs
* between Windows, Linux and MacOS, so check out the limitation of your
* system!)
*/
constructor(
protected contentPath: string,
options?: { maxPathLength?: number }
protected options?: {
/**
* These characters will be removed from files that are saved to S3.
* There is a very strict default list that basically only leaves
* alphanumeric filenames intact. Should you need more relaxed
* settings you can specify them here.
*/
invalidCharactersRegexp?: RegExp;
/*
* How long paths can be in the filesystem (Differs between Windows,
* Linux and MacOS, so check out the limitation of your
* system!)
*/
maxPathLength?: number;
}
) {
fsExtra.ensureDirSync(contentPath);
this.maxFileLength =
Expand Down Expand Up @@ -409,6 +420,10 @@ export default class FileContentStorage implements IContentStorage {
* @returns the clean filename
*/
public sanitizeFilename(filename: string): string {
return sanitizeFilename(filename, this.maxFileLength);
return sanitizeFilename(
filename,
this.maxFileLength,
this.options?.invalidCharactersRegexp
);
}
}
23 changes: 21 additions & 2 deletions src/implementation/fs/filenameUtils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,25 @@ export function checkFilename(filename: string): void {
}
}

export function sanitizeFilename(filename: string, maxLength: number): string {
return generalizedSanitizeFilename(filename, /<>:"\|\?\*/g, maxLength);
/**
* Sanitizes a filename or path by shortening it to the specified maximum length
* and removing the invalid characters in the RegExp. If you don't specify a
* RegExp a very strict invalid character list will be used that only leaves
* alphanumeric filenames untouched.
* @param filename the filename or path (with UNIX slash separator) to sanitize
* @param maxFileLength the filename will be shortened to this length
* @param invalidCharactersRegex these characters will be removed from the
* filename
* @returns the cleaned filename
*/
export function sanitizeFilename(
filename: string,
maxFileLength: number,
invalidCharactersRegex?: RegExp
): string {
return generalizedSanitizeFilename(
filename,
invalidCharactersRegex ?? /[^A-Za-z0-9\-._!()\/]/g,
maxFileLength
);
}
25 changes: 18 additions & 7 deletions src/implementation/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,17 +17,28 @@ export function generalizedSanitizeFilename(
// We keep / and \ as the "filename" can be a relative path with
// directories. We don't use the sanitize-filename package, as it
// also removes directory separators.
const cleanedFilename = filename.replace(invalidCharacterRegex, '');
let cleanedFilename = filename.replace(invalidCharacterRegex, '');

// Second, shorten the filename if it is too long.
// Should the filename only contain the extension now (because all
// characters of the basename were invalid), we add a generic filename.
let extension = path.extname(cleanedFilename);
let basename = path.basename(cleanedFilename, extension);
const dirname = path.dirname(cleanedFilename);
if (extension === '') {
extension = basename;
basename = 'file';
cleanedFilename = `${dirname}/${basename}${extension}`;
}

// Shorten the filename if it is too long.
const numberOfCharactersToCut = cleanedFilename.length - maxLength;
if (numberOfCharactersToCut < 0) {
return cleanedFilename;
}

const extension = path.extname(cleanedFilename);
const dirname = path.dirname(cleanedFilename);
const basename = path.basename(cleanedFilename);
const finalLength = Math.max(1, basename.length - numberOfCharactersToCut);
return path.join(dirname, `${basename.substr(0, finalLength)}${extension}`);
const finalBasenameLength = Math.max(
1,
basename.length - numberOfCharactersToCut
);
return `${dirname}/${basename.substr(0, finalBasenameLength)}${extension}`;
}
48 changes: 48 additions & 0 deletions test/implementation/S3Utils.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import { sanitizeFilename } from '../../src/implementation/db/S3Utils';

describe('S3Utils', () => {
describe('sanitizeFilename', () => {
it('should leave valid filenames intact', () => {
const sanitizedName1 = sanitizeFilename('images/file.jpg', 255);
expect(sanitizedName1).toEqual('images/file.jpg');

const sanitizedName2 = sanitizeFilename('file.any', 255);
expect(sanitizedName2).toEqual('file.any');

const sanitizedName3 = sanitizeFilename('file.any.jpg', 255);
expect(sanitizedName3).toEqual('file.any.jpg');
});

it('should remove invalid characters', () => {
const sanitizedName = sanitizeFilename(
'i§$mages/äÜß`fileäüöä.j#pg*',
255
);
expect(sanitizedName).toEqual('images/file.jpg');
});

it('should add a generic filename if all characters in a filename are invalid', () => {
const sanitizedName = sanitizeFilename(
'images/中文的名字.jpg',
255
);
expect(sanitizedName).toEqual('images/file.jpg');
});

it('should work even if all characters are invalid', () => {
const sanitizedName = sanitizeFilename(
'images/中文的名字.中文',
255
);
expect(sanitizedName).toEqual('images/file.');
});

it('should shorten long filenames', () => {
const sanitizedName = sanitizeFilename(
'images/aVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryLongFilename.jpg',
24
);
expect(sanitizedName).toEqual('images/aVeryVeryVery.jpg');
});
});
});

0 comments on commit 6eefa26

Please sign in to comment.