Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Fix errors when scanning files > 4G
This commit resolves https://bugzilla.clamav.net/show_bug.cgi?id=12673

Changes in 0.103 to order of operations for creating fmaps and
performaing hashes of fmaps resulted errors when scanning files that are
4096M and a different (but related) error when scanning files > 4096M.
This is despite the fact that scanning is supposed to be limited to
--max-scansize (MaxScanSize) and was also apparently limited to
INT_MAX - 2 (aka ~1.999999G) back in 2014 to alleviate reported crashes
for a few large file formats.
(see https://bugzilla.clamav.net/show_bug.cgi?id=10960)
This last limitation was not documented, so I added it to the sample
clamd.conf.

Anyways, the main issue is that the fmap module was using "unsigned int"
and was then enforcing a limitation (verbose error messages) when that
a map length exceeded the capapacity of an unsigned int. This commit
switches the associated variables over to uint64_t, and while fmaps are
still limited to size_t in other places, the fmap module will at least
work with files > 4G on 64bit systems.

In testing this, I found that the time to hash a file, particularly when
hashing a file on an NTFS partition from Linux was really slow because
we were hashing in FILEBUFF chunks (about 8K) at a time.  Increasing
this to 10MB chunks speeds up scanning of large files.

Finally, now that hashing is performed immediately when an fmap is
created for a file, hashing of files larger than max-scansize was
occuring. This commit adds checks to bail out early if the file size
exceeds the maximum before creating an fmap. It will alert with the
Heuristics.Limits.Exceeded name if the heuristic is enabled.

Also fixed CheckFmapFeatures.cmake module that detects if
sysconf(_SC_PAGESIZE) is available.
  • Loading branch information
micahsnyder committed Apr 1, 2021
1 parent 89a4604 commit 1a3b784
Show file tree
Hide file tree
Showing 7 changed files with 67 additions and 42 deletions.
2 changes: 0 additions & 2 deletions cmake/CheckFmapFeatures.cmake
Expand Up @@ -124,9 +124,7 @@ check_symbol_exists(getpagesize unistd.h HAVE_GETPAGESIZE)
check_c_source_compiles(
"
#include <sys/types.h>
#if HAVE_UNISTD_H
#include <unistd.h>
#endif
int main(void)
{
int x = sysconf(_SC_PAGESIZE);
Expand Down
2 changes: 1 addition & 1 deletion docs/man/clamd.conf.5.in
Expand Up @@ -540,7 +540,7 @@ Sets the maximum amount of data to be scanned for each input file. Archives and
Default: 100M
.TP
\fBMaxFileSize SIZE\fR
Files larger than this limit won't be scanned. Affects the input file itself as well as files contained inside it (when the input file is an archive, a document or some other kind of container). \fBWarning: disabling this limit or setting it too high may result in severe damage to the system.\fR
Files larger than this limit won't be scanned. Affects the input file itself as well as files contained inside it (when the input file is an archive, a document or some other kind of container). \fBWarning: disabling this limit or setting it too high may result in severe damage to the system. Technical design limitations prevent ClamAV from scanning files greater than 2 GB at this time.\fR
.br
Default: 25M
.TP
Expand Down
3 changes: 2 additions & 1 deletion etc/clamd.conf.sample
Expand Up @@ -524,6 +524,8 @@ Example
# Value of 0 disables the limit.
# Note: disabling this limit or setting it too high may result in severe damage
# to the system.
# Technical design limitations prevent ClamAV from scanning files greater than
# 2 GB at this time.
# Default: 25M
#MaxFileSize 30M

Expand Down Expand Up @@ -788,4 +790,3 @@ Example
#
# Default: 5000
# BytecodeTimeout 1000

56 changes: 28 additions & 28 deletions libclamav/fmap.c
Expand Up @@ -85,9 +85,9 @@ pthread_mutex_t fmap_mutex = PTHREAD_MUTEX_INITIALIZER;

#define fmap_bitmap (m->bitmap)

static inline unsigned int fmap_align_items(unsigned int sz, unsigned int al);
static inline unsigned int fmap_align_to(unsigned int sz, unsigned int al);
static inline unsigned int fmap_which_page(fmap_t *m, size_t at);
static inline uint64_t fmap_align_items(uint64_t sz, uint64_t al);
static inline uint64_t fmap_align_to(uint64_t sz, uint64_t al);
static inline uint64_t fmap_which_page(fmap_t *m, size_t at);

static const void *handle_need(fmap_t *m, size_t at, size_t len, int lock);
static void handle_unneed_off(fmap_t *m, size_t at, size_t len);
Expand Down Expand Up @@ -164,7 +164,7 @@ static void unmap_win32(fmap_t *m)

fmap_t *fmap_check_empty(int fd, off_t offset, size_t len, int *empty, const char *name)
{ /* WIN32 */
unsigned int pages, mapsz;
uint64_t pages, mapsz;
int pgsz = cli_getpagesize();
STATBUF st;
fmap_t *m = NULL;
Expand Down Expand Up @@ -346,7 +346,7 @@ extern cl_fmap_t *cl_fmap_open_handle(void *handle, size_t offset, size_t len,
clcb_pread pread_cb, int use_aging)
{
cl_error_t status = CL_EMEM;
unsigned int pages;
uint64_t pages;
size_t mapsz, bitmap_size;
cl_fmap_t *m = NULL;
int pgsz = cli_getpagesize();
Expand All @@ -368,7 +368,7 @@ extern cl_fmap_t *cl_fmap_open_handle(void *handle, size_t offset, size_t len,

pages = fmap_align_items(len, pgsz);

bitmap_size = pages * sizeof(uint32_t);
bitmap_size = pages * sizeof(uint64_t);
mapsz = pages * pgsz;

m = cli_calloc(1, sizeof(fmap_t));
Expand Down Expand Up @@ -450,10 +450,10 @@ static void fmap_aging(fmap_t *m)
#ifdef ANONYMOUS_MAP
if (!m->aging) return;
if (m->paged * m->pgsz > UNPAGE_THRSHLD_HI) { /* we alloc'd too much */
unsigned int i, avail = 0, freeme[2048], maxavail = MIN(sizeof(freeme) / sizeof(*freeme), m->paged - UNPAGE_THRSHLD_LO / m->pgsz) - 1;
uint64_t i, avail = 0, freeme[2048], maxavail = MIN(sizeof(freeme) / sizeof(*freeme), m->paged - UNPAGE_THRSHLD_LO / m->pgsz) - 1;

for (i = 0; i < m->pages; i++) {
uint32_t s = fmap_bitmap[i];
uint64_t s = fmap_bitmap[i];
if ((s & (FM_MASK_PAGED | FM_MASK_LOCKED)) == FM_MASK_PAGED) {
/* page is paged and not locked: dec age */
if (s & FM_MASK_COUNT) fmap_bitmap[i]--;
Expand All @@ -464,7 +464,7 @@ static void fmap_aging(fmap_t *m)
avail++;
} else {
/* Insert sort onto a stack'd array - same performance as quickselect */
unsigned int insert_to = MIN(maxavail, avail) - 1, age = fmap_bitmap[i] & FM_MASK_COUNT;
uint64_t insert_to = MIN(maxavail, avail) - 1, age = fmap_bitmap[i] & FM_MASK_COUNT;
if (avail <= maxavail || (fmap_bitmap[freeme[maxavail]] & FM_MASK_COUNT) > age) {
while ((fmap_bitmap[freeme[insert_to]] & FM_MASK_COUNT) > age) {
freeme[insert_to + 1] = freeme[insert_to];
Expand Down Expand Up @@ -513,15 +513,15 @@ static void fmap_aging(fmap_t *m)
#endif
}

static int fmap_readpage(fmap_t *m, uint64_t first_page, uint32_t count, uint32_t lock_count)
static int fmap_readpage(fmap_t *m, uint64_t first_page, uint64_t count, uint64_t lock_count)
{
size_t readsz = 0, eintr_off;
char *pptr = NULL, errtxt[256];
uint32_t sbitmap;
uint64_t sbitmap;
uint64_t i, page = first_page, force_read = 0;

if ((size_t)(m->real_len) > (size_t)(UINT_MAX)) {
cli_dbgmsg("fmap_readage: size of file exceeds total prefaultible page size (unpacked file is too large)\n");
if ((uint64_t)(m->real_len) > (uint64_t)(m->pages * m->pgsz)) {
cli_dbgmsg("fmap_readpage: size of file exceeds total prefaultible page size (unpacked file is too large)\n");
return 1;
}

Expand Down Expand Up @@ -573,7 +573,7 @@ static int fmap_readpage(fmap_t *m, uint64_t first_page, uint32_t count, uint32_
if (force_read) {
/* we have some pending reads to perform */
if (m->handle_is_fd) {
unsigned int j;
uint64_t j;
int _fd = (int)(ptrdiff_t)m->handle;
for (j = first_page; j < page; j++) {
if (fmap_bitmap[j] & FM_MASK_SEEN) {
Expand Down Expand Up @@ -613,7 +613,7 @@ static int fmap_readpage(fmap_t *m, uint64_t first_page, uint32_t count, uint32_
cli_strerror(errno, errtxt, sizeof(errtxt));
cli_errmsg("fmap_readpage: pread error: %s\n", errtxt);
} else {
cli_warnmsg("fmap_readpage: pread fail: asked for %lu bytes @ offset %lu, got %lu\n", (long unsigned int)readsz, (long unsigned int)target_offset, (long unsigned int)got);
cli_warnmsg("fmap_readpage: pread fail: asked for %zu bytes @ offset %zu, got %zd\n", readsz, (size_t)target_offset, got);
}
return 1;
}
Expand Down Expand Up @@ -672,9 +672,9 @@ static const void *handle_need(fmap_t *m, size_t at, size_t len, int lock)
return (void *)ret;
}

static void fmap_unneed_page(fmap_t *m, unsigned int page)
static void fmap_unneed_page(fmap_t *m, uint64_t page)
{
uint32_t s = fmap_bitmap[page];
uint64_t s = fmap_bitmap[page];

if ((s & (FM_MASK_PAGED | FM_MASK_LOCKED)) == (FM_MASK_PAGED | FM_MASK_LOCKED)) {
/* page is paged and locked: check lock count */
Expand All @@ -693,7 +693,7 @@ static void fmap_unneed_page(fmap_t *m, unsigned int page)

static void handle_unneed_off(fmap_t *m, size_t at, size_t len)
{
unsigned int i, first_page, last_page;
uint64_t i, first_page, last_page;
if (!m->aging) return;
if (!len) {
cli_warnmsg("fmap_unneed: attempted void unneed\n");
Expand Down Expand Up @@ -737,7 +737,7 @@ static void unmap_malloc(fmap_t *m)

static const void *handle_need_offstr(fmap_t *m, size_t at, size_t len_hint)
{
unsigned int i, first_page, last_page;
uint64_t i, first_page, last_page;
void *ptr = (void *)((char *)m->data + at);

if (!len_hint || len_hint > m->real_len - at)
Expand All @@ -753,7 +753,7 @@ static const void *handle_need_offstr(fmap_t *m, size_t at, size_t len_hint)

for (i = first_page; i <= last_page; i++) {
char *thispage = (char *)m->data + i * m->pgsz;
unsigned int scanat, scansz;
uint64_t scanat, scansz;

if (fmap_readpage(m, i, 1, 1)) {
last_page = i - 1;
Expand All @@ -777,7 +777,7 @@ static const void *handle_need_offstr(fmap_t *m, size_t at, size_t len_hint)

static const void *handle_gets(fmap_t *m, char *dst, size_t *at, size_t max_len)
{
unsigned int i, first_page, last_page;
uint64_t i, first_page, last_page;
char *src = (void *)((char *)m->data + *at);
char *endptr = NULL;
size_t len = MIN(max_len - 1, m->real_len - *at);
Expand All @@ -793,7 +793,7 @@ static const void *handle_gets(fmap_t *m, char *dst, size_t *at, size_t max_len)

for (i = first_page; i <= last_page; i++) {
char *thispage = (char *)m->data + i * m->pgsz;
unsigned int scanat, scansz;
uint64_t scanat, scansz;

if (fmap_readpage(m, i, 1, 0))
return NULL;
Expand Down Expand Up @@ -941,17 +941,17 @@ fmap_t *fmap(int fd, off_t offset, size_t len, const char *name)
return fmap_check_empty(fd, offset, len, &unused, name);
}

static inline unsigned int fmap_align_items(unsigned int sz, unsigned int al)
static inline uint64_t fmap_align_items(uint64_t sz, uint64_t al)
{
return sz / al + (sz % al != 0);
}

static inline unsigned int fmap_align_to(unsigned int sz, unsigned int al)
static inline uint64_t fmap_align_to(uint64_t sz, uint64_t al)
{
return al * fmap_align_items(sz, al);
}

static inline unsigned int fmap_which_page(fmap_t *m, size_t at)
static inline uint64_t fmap_which_page(fmap_t *m, size_t at)
{
return at / m->pgsz;
}
Expand Down Expand Up @@ -984,8 +984,8 @@ cl_error_t fmap_dump_to_file(fmap_t *map, const char *filepath, const char *tmpd
} else if ((start_offset != 0) && (end_offset != map->real_len)) {
/* If we're only dumping a portion of the file, inlcude the offsets in the prefix,...
* e.g. tmp filename will become something like: filebase.500-1200.<randhex> */
uint32_t prefix_len = strlen(filebase) + 1 + SIZE_T_CHARLEN + 1 + SIZE_T_CHARLEN + 1;
prefix = malloc(prefix_len);
size_t prefix_len = strlen(filebase) + 1 + SIZE_T_CHARLEN + 1 + SIZE_T_CHARLEN + 1;
prefix = malloc(prefix_len);
if (NULL == prefix) {
cli_errmsg("fmap_dump_to_file: Failed to allocate memory for tempfile prefix.\n");
if (NULL != filebase)
Expand Down Expand Up @@ -1086,7 +1086,7 @@ cl_error_t fmap_get_MD5(unsigned char *hash, fmap_t *map)

while (todo) {
const void *buf;
size_t readme = todo < FILEBUFF ? todo : FILEBUFF;
size_t readme = todo < 1024 * 1024 * 10 ? todo : 1024 * 1024 * 10;

if (!(buf = fmap_need_off_once(map, at, readme))) {
cl_hash_destroy(hashctx);
Expand Down
12 changes: 6 additions & 6 deletions libclamav/fmap.h
Expand Up @@ -46,12 +46,12 @@ struct cl_fmap {

/* internal */
time_t mtime;
unsigned int pages;
uint64_t pages;
uint64_t pgsz;
unsigned int paged;
unsigned short aging;
unsigned short dont_cache_flag;
unsigned short handle_is_fd;
uint64_t paged;
uint16_t aging;
uint16_t dont_cache_flag;
uint16_t handle_is_fd;

/* memory interface */
const void *data;
Expand Down Expand Up @@ -81,7 +81,7 @@ struct cl_fmap {
HANDLE mh;
#endif
unsigned char maphash[16];
uint32_t *bitmap;
uint64_t *bitmap;
char *name;
};

Expand Down
31 changes: 28 additions & 3 deletions libclamav/scanners.c
Expand Up @@ -1506,7 +1506,8 @@ static cl_error_t vba_scandata(const unsigned char *data, size_t len, cli_ctx *c
cli_ac_freedata(&tmdata);
cli_ac_freedata(&gmdata);

return (ret != CL_CLEAN) ? ret : viruses_found ? CL_VIRUS : CL_CLEAN;
return (ret != CL_CLEAN) ? ret : viruses_found ? CL_VIRUS
: CL_CLEAN;
}

#define min(x, y) ((x) < (y) ? (x) : (y))
Expand Down Expand Up @@ -4746,7 +4747,12 @@ static cl_error_t scan_common(cl_fmap_t *map, const char *filepath, const char *
return CL_ENULLARG;
}

/* We have a limit of around 2.17GB (INT_MAX - 2). Enforce it here. */
/* We have a limit of around 2GB (INT_MAX - 2). Enforce it here. */
/* TODO: Large file support is large-ly untested. Remove this restriction
* and test with a large set of large files of various types. libclamav's
* integer type safety has come a long way since 2014, so it's possible
* we could lift this restriction, but at least one of the parsers is
* bound to behave badly with large files. */
if ((size_t)(map->real_len) > (size_t)(INT_MAX - 2))
return CL_CLEAN;

Expand Down Expand Up @@ -5023,10 +5029,20 @@ cl_error_t cl_scandesc_callback(int desc, const char *filename, const char **vir
goto done;
}
if (sb.st_size <= 5) {
cli_dbgmsg("cl_scandesc_callback: File too small (%u bytes), ignoring\n", (unsigned int)sb.st_size);
cli_dbgmsg("cl_scandesc_callback: File too small (" STDu64 " bytes), ignoring\n", (uint64_t)sb.st_size);
status = CL_CLEAN;
goto done;
}
if ((uint64_t)sb.st_size > engine->maxfilesize) {
cli_dbgmsg("cl_scandesc_callback: File too large (" STDu64 " bytes), ignoring\n", (uint64_t)sb.st_size);
if (scanoptions->heuristic & CL_SCAN_HEURISTIC_EXCEEDS_MAX) {
engine->cb_virus_found(desc, "Heuristics.Limits.Exceeded", context);
status = CL_VIRUS;
} else {
status = CL_CLEAN;
}
goto done;
}

if (NULL != filename) {
(void)cli_basename(filename, strlen(filename), &filename_base);
Expand All @@ -5053,6 +5069,15 @@ cl_error_t cl_scandesc_callback(int desc, const char *filename, const char **vir

cl_error_t cl_scanmap_callback(cl_fmap_t *map, const char *filename, const char **virname, unsigned long int *scanned, const struct cl_engine *engine, struct cl_scan_options *scanoptions, void *context)
{
if (map->real_len > engine->maxfilesize) {
cli_dbgmsg("cl_scandesc_callback: File too large (%zu bytes), ignoring\n", map->real_len);
if (scanoptions->heuristic & CL_SCAN_HEURISTIC_EXCEEDS_MAX) {
engine->cb_virus_found(fmap_fd(map), "Heuristics.Limits.Exceeded", context);
return CL_VIRUS;
}
return CL_CLEAN;
}

return scan_common(map, filename, virname, scanned, engine, scanoptions, context);
}

Expand Down
3 changes: 2 additions & 1 deletion win32/conf_examples/clamd.conf.sample
Expand Up @@ -497,6 +497,8 @@ TCPAddr 127.0.0.1
# Value of 0 disables the limit.
# Note: disabling this limit or setting it too high may result in severe damage
# to the system.
# Technical design limitations prevent ClamAV from scanning files greater than
# 2 GB at this time.
# Default: 25M
#MaxFileSize 30M

Expand Down Expand Up @@ -653,4 +655,3 @@ TCPAddr 127.0.0.1
#
# Default: 5000
# BytecodeTimeout 1000

0 comments on commit 1a3b784

Please sign in to comment.