From 774700c14ccbbb26318af9a14bcf677df6c98c15 Mon Sep 17 00:00:00 2001 From: Micah Snyder Date: Sun, 25 Sep 2022 21:46:43 -0700 Subject: [PATCH] Reduce RAM usage; only allocate offset fields as needed The AC, BM, and PCRE pattern structures each store a number of fields for offsets when, at least for AC patterns, the offsets are most often not specified (aka wildcard / any). This commit moves those values to a separate structure that is optional. If not specified, then the pattern will be treated as having a wildcard offset (any offset). I was also able to shrink the size of the `boundary` field from 4 bytes to 1 byte. Practically speaking, with today's databases, this appears to reduce RAM usage by about 39.22MB. It's not huge. But given I think this also improves the readability of the code, I'll call it a win. --- libclamav/bytecode.c | 32 ++-- libclamav/matcher-ac.c | 142 ++++++++++----- libclamav/matcher-ac.h | 5 +- libclamav/matcher-bm.c | 84 ++++++--- libclamav/matcher-bm.h | 6 +- libclamav/matcher-byte-comp.c | 2 +- libclamav/matcher-offset.h | 65 +++++++ libclamav/matcher-pcre.c | 81 +++++---- libclamav/matcher-pcre.h | 3 +- libclamav/matcher.c | 329 ++++++++++++++++++---------------- libclamav/matcher.h | 61 ++++--- libclamav/readdb.c | 12 +- libclamav/regex_list.c | 18 +- libclamav/unzip.c | 15 +- libclamav/yara_exec.c | 4 +- libclamav_rust/src/sys.rs | 28 +-- 16 files changed, 537 insertions(+), 350 deletions(-) create mode 100644 libclamav/matcher-offset.h diff --git a/libclamav/bytecode.c b/libclamav/bytecode.c index 13ddd099e3..cab00469f3 100644 --- a/libclamav/bytecode.c +++ b/libclamav/bytecode.c @@ -66,22 +66,22 @@ static const uint32_t nomatch[64] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; static const uint32_t nooffsets[64] = { - CLI_OFF_NONE, CLI_OFF_NONE, CLI_OFF_NONE, CLI_OFF_NONE, - CLI_OFF_NONE, CLI_OFF_NONE, CLI_OFF_NONE, CLI_OFF_NONE, - CLI_OFF_NONE, CLI_OFF_NONE, CLI_OFF_NONE, CLI_OFF_NONE, - CLI_OFF_NONE, CLI_OFF_NONE, CLI_OFF_NONE, CLI_OFF_NONE, - CLI_OFF_NONE, CLI_OFF_NONE, CLI_OFF_NONE, CLI_OFF_NONE, - CLI_OFF_NONE, CLI_OFF_NONE, CLI_OFF_NONE, CLI_OFF_NONE, - CLI_OFF_NONE, CLI_OFF_NONE, CLI_OFF_NONE, CLI_OFF_NONE, - CLI_OFF_NONE, CLI_OFF_NONE, CLI_OFF_NONE, CLI_OFF_NONE, - CLI_OFF_NONE, CLI_OFF_NONE, CLI_OFF_NONE, CLI_OFF_NONE, - CLI_OFF_NONE, CLI_OFF_NONE, CLI_OFF_NONE, CLI_OFF_NONE, - CLI_OFF_NONE, CLI_OFF_NONE, CLI_OFF_NONE, CLI_OFF_NONE, - CLI_OFF_NONE, CLI_OFF_NONE, CLI_OFF_NONE, CLI_OFF_NONE, - CLI_OFF_NONE, CLI_OFF_NONE, CLI_OFF_NONE, CLI_OFF_NONE, - CLI_OFF_NONE, CLI_OFF_NONE, CLI_OFF_NONE, CLI_OFF_NONE, - CLI_OFF_NONE, CLI_OFF_NONE, CLI_OFF_NONE, CLI_OFF_NONE, - CLI_OFF_NONE, CLI_OFF_NONE, CLI_OFF_NONE, CLI_OFF_NONE}; + CLI_SIZE_NONE, CLI_SIZE_NONE, CLI_SIZE_NONE, CLI_SIZE_NONE, + CLI_SIZE_NONE, CLI_SIZE_NONE, CLI_SIZE_NONE, CLI_SIZE_NONE, + CLI_SIZE_NONE, CLI_SIZE_NONE, CLI_SIZE_NONE, CLI_SIZE_NONE, + CLI_SIZE_NONE, CLI_SIZE_NONE, CLI_SIZE_NONE, CLI_SIZE_NONE, + CLI_SIZE_NONE, CLI_SIZE_NONE, CLI_SIZE_NONE, CLI_SIZE_NONE, + CLI_SIZE_NONE, CLI_SIZE_NONE, CLI_SIZE_NONE, CLI_SIZE_NONE, + CLI_SIZE_NONE, CLI_SIZE_NONE, CLI_SIZE_NONE, CLI_SIZE_NONE, + CLI_SIZE_NONE, CLI_SIZE_NONE, CLI_SIZE_NONE, CLI_SIZE_NONE, + CLI_SIZE_NONE, CLI_SIZE_NONE, CLI_SIZE_NONE, CLI_SIZE_NONE, + CLI_SIZE_NONE, CLI_SIZE_NONE, CLI_SIZE_NONE, CLI_SIZE_NONE, + CLI_SIZE_NONE, CLI_SIZE_NONE, CLI_SIZE_NONE, CLI_SIZE_NONE, + CLI_SIZE_NONE, CLI_SIZE_NONE, CLI_SIZE_NONE, CLI_SIZE_NONE, + CLI_SIZE_NONE, CLI_SIZE_NONE, CLI_SIZE_NONE, CLI_SIZE_NONE, + CLI_SIZE_NONE, CLI_SIZE_NONE, CLI_SIZE_NONE, CLI_SIZE_NONE, + CLI_SIZE_NONE, CLI_SIZE_NONE, CLI_SIZE_NONE, CLI_SIZE_NONE, + CLI_SIZE_NONE, CLI_SIZE_NONE, CLI_SIZE_NONE, CLI_SIZE_NONE}; static const uint16_t nokind; static const uint32_t nofilesize; diff --git a/libclamav/matcher-ac.c b/libclamav/matcher-ac.c index 2543c3354c..cbdb560e5d 100644 --- a/libclamav/matcher-ac.c +++ b/libclamav/matcher-ac.c @@ -697,6 +697,9 @@ void cli_ac_free(struct cli_matcher *root) if (patt->special) { mpool_ac_free_special(root->mempool, patt); } + if (patt->offset_data) { + MPOOL_FREE(root->mempool, patt->offset_data); + } MPOOL_FREE(root->mempool, patt); } @@ -1392,7 +1395,7 @@ cl_error_t cli_ac_initdata(struct cli_ac_data *data, uint32_t partsigs, uint32_t return CL_EMEM; } for (i = 0; i < reloffsigs * 2; i += 2) - data->offset[i] = CLI_OFF_NONE; + data->offset[i] = CLI_SIZE_NONE; } data->partsigs = partsigs; @@ -1501,20 +1504,20 @@ cl_error_t cli_ac_initdata(struct cli_ac_data *data, uint32_t partsigs, uint32_t return CL_EMEM; } for (j = 0; j < 64; j++) { - data->lsigsuboff_last[0][j] = CLI_OFF_NONE; - data->lsigsuboff_first[0][j] = CLI_OFF_NONE; + data->lsigsuboff_last[0][j] = CLI_SIZE_NONE; + data->lsigsuboff_first[0][j] = CLI_SIZE_NONE; } for (i = 1; i < lsigs; i++) { data->lsigsuboff_last[i] = data->lsigsuboff_last[0] + 64 * i; data->lsigsuboff_first[i] = data->lsigsuboff_first[0] + 64 * i; for (j = 0; j < 64; j++) { - data->lsigsuboff_last[i][j] = CLI_OFF_NONE; - data->lsigsuboff_first[i][j] = CLI_OFF_NONE; + data->lsigsuboff_last[i][j] = CLI_SIZE_NONE; + data->lsigsuboff_first[i][j] = CLI_SIZE_NONE; } } } for (i = 0; i < 32; i++) - data->macro_lastmatch[i] = CLI_OFF_NONE; + data->macro_lastmatch[i] = CLI_SIZE_NONE; data->min_partno = 1; @@ -1532,13 +1535,23 @@ cl_error_t cli_ac_caloff(const struct cli_matcher *root, struct cli_ac_data *dat for (i = 0; i < root->ac_reloff_num; i++) { patt = root->ac_reloff[i]; - if (!info) { - data->offset[patt->offset_min] = CLI_OFF_NONE; - } else if (CL_SUCCESS != (ret = cli_caloff(NULL, info, root->type, patt->offdata, &data->offset[patt->offset_min], &data->offset[patt->offset_max]))) { - cli_errmsg("cli_ac_caloff: Can't calculate relative offset in signature for %s\n", patt->virname); - return ret; - } else if ((data->offset[patt->offset_min] != CLI_OFF_NONE) && (data->offset[patt->offset_min] + patt->length[1] > info->fsize)) { - data->offset[patt->offset_min] = CLI_OFF_NONE; + + if (patt->offset_data != NULL) { + if (!info) { + data->offset[patt->offset_data->offset_min] = CLI_SIZE_NONE; + } else { + ret = matcher_calculate_relative_offsets(info, patt->offset_data, &data->offset[patt->offset_data->offset_min], &data->offset[patt->offset_data->offset_max]); + if (CL_SUCCESS != ret) { + cli_errmsg("cli_ac_caloff: Can't calculate relative offset in signature for %s\n", patt->virname); + return ret; + } + + if ((data->offset[patt->offset_data->offset_min] != CLI_SIZE_NONE) && + (data->offset[patt->offset_data->offset_min] + patt->length[1] > info->fsize)) { + + data->offset[patt->offset_data->offset_min] = CLI_SIZE_NONE; + } + } } } @@ -1642,13 +1655,13 @@ cl_error_t lsig_sub_matched(const struct cli_matcher *root, struct cli_ac_data * const struct cli_ac_lsig *ac_lsig = root->ac_lsigtable[lsig_id]; const struct cli_lsig_tdb *tdb = &ac_lsig->tdb; - if (realoff != CLI_OFF_NONE) { - if (mdata->lsigsuboff_first[lsig_id][subsig_id] == CLI_OFF_NONE) { + if (realoff != CLI_SIZE_NONE) { + if (mdata->lsigsuboff_first[lsig_id][subsig_id] == CLI_SIZE_NONE) { /* If this is the first subsig in the lsig, store the offset in the first-list. */ mdata->lsigsuboff_first[lsig_id][subsig_id] = realoff; } - if (mdata->lsigsuboff_last[lsig_id][subsig_id] != CLI_OFF_NONE && + if (mdata->lsigsuboff_last[lsig_id][subsig_id] != CLI_SIZE_NONE && /* If this isn't the first subsig match for this logical sig and the offset is earlier in the file than the last subsig match, don't count it. */ ((!partial && realoff <= mdata->lsigsuboff_last[lsig_id][subsig_id]) || @@ -1740,7 +1753,7 @@ cl_error_t lsig_sub_matched(const struct cli_matcher *root, struct cli_ac_data * /* start of previous lsig subsig match */ last_macroprev_match = mdata->lsigsuboff_last[lsig_id][subsig_id]; - if (last_macro_match == CLI_OFF_NONE || + if (last_macro_match == CLI_SIZE_NONE || last_macroprev_match + smin > last_macro_match || last_macroprev_match + smax < last_macro_match) { cli_dbgmsg("Canceled false lsig macro match\n"); @@ -1769,7 +1782,7 @@ cl_error_t cli_ac_chkmacro(struct cli_matcher *root, struct cli_ac_data *data, u /* Loop through all subsigs, and if they are tied to macros check that the * macro matched at a correct distance */ for (i = 0; i < tdb->subsigs; i++) { - rc = lsig_sub_matched(root, data, lsig_id, i, CLI_OFF_NONE, 0); + rc = lsig_sub_matched(root, data, lsig_id, i, CLI_SIZE_NONE, 0); if (rc != CL_SUCCESS) return rc; } @@ -1826,20 +1839,37 @@ cl_error_t cli_ac_scanbuff( continue; } bp = i + 1 - patt->depth; - if (patt->offdata[0] != CLI_OFF_VERSION && patt->offdata[0] != CLI_OFF_MACRO && !pattN->next_same && (patt->offset_min != CLI_OFF_ANY) && (!patt->sigid || patt->partno == 1)) { - if (patt->offset_min == CLI_OFF_NONE) { + + if ((patt->offset_data != NULL) && + (patt->offset_data->type != PATTERN_OFF_VERSION) && + (patt->offset_data->type != PATTERN_OFF_MACRO) && + (patt->offset_data->offset_min != CLI_SIZE_ANY) && + (pattN->next_same == NULL) && + (patt->sigid == 0 || patt->partno == 1)) { + + if ((patt->offset_data != NULL) && + (patt->offset_data->offset_min == CLI_SIZE_NONE)) { + pattN = pattN->next; continue; } + exptoff[0] = offset + bp - patt->prefix_length[2]; /* lower offset end */ exptoff[1] = offset + bp - patt->prefix_length[1]; /* higher offset end */ - if (patt->offdata[0] == CLI_OFF_ABSOLUTE) { - if (patt->offset_max < exptoff[0] || patt->offset_min > exptoff[1]) { + + if (patt->offset_data->type == PATTERN_OFF_ABSOLUTE) { + if (patt->offset_data->offset_max < exptoff[0] || + patt->offset_data->offset_min > exptoff[1]) { + pattN = pattN->next; continue; } + } else { - if (mdata->offset[patt->offset_min] == CLI_OFF_NONE || mdata->offset[patt->offset_max] < exptoff[0] || mdata->offset[patt->offset_min] > exptoff[1]) { + if ((mdata->offset[patt->offset_data->offset_min] == CLI_SIZE_NONE) || + (mdata->offset[patt->offset_data->offset_max] < exptoff[0]) || + (mdata->offset[patt->offset_data->offset_min] > exptoff[1])) { + pattN = pattN->next; continue; } @@ -1859,31 +1889,44 @@ cl_error_t cli_ac_scanbuff( } realoff = offset + matchstart; - if (pt->offdata[0] == CLI_OFF_VERSION) { - if (false == cli_hashset_contains_maybe_noalloc(mdata->vinfo, realoff)) { - ptN = ptN->next_same; - continue; - } - cli_dbgmsg("cli_ac_scanbuff: VI match for offset %x\n", realoff); - } else if (pt->offdata[0] == CLI_OFF_MACRO) { - mdata->macro_lastmatch[patt->offdata[1]] = realoff; - ptN = ptN->next_same; - continue; - } else if (pt->offset_min != CLI_OFF_ANY && (!pt->sigid || pt->partno == 1)) { - if (pt->offset_min == CLI_OFF_NONE) { - ptN = ptN->next_same; - continue; - } - if (pt->offdata[0] == CLI_OFF_ABSOLUTE) { - if (pt->offset_max < realoff || pt->offset_min > realoff) { + + if (pt->offset_data != NULL) { + if (pt->offset_data->type == PATTERN_OFF_VERSION) { + if (false == cli_hashset_contains_maybe_noalloc(mdata->vinfo, realoff)) { ptN = ptN->next_same; continue; } - } else { - if (mdata->offset[pt->offset_min] == CLI_OFF_NONE || mdata->offset[pt->offset_max] < realoff || mdata->offset[pt->offset_min] > realoff) { + + cli_dbgmsg("cli_ac_scanbuff: VI match for offset %x\n", realoff); + + } else if (pt->offset_data->type == PATTERN_OFF_MACRO) { + mdata->macro_lastmatch[patt->offset_data->offset_value] = realoff; + ptN = ptN->next_same; + continue; + + } else if (pt->offset_data->offset_min != CLI_SIZE_ANY && (!pt->sigid || pt->partno == 1)) { + if (pt->offset_data->offset_min == CLI_SIZE_NONE) { ptN = ptN->next_same; continue; } + + if (pt->offset_data->type == PATTERN_OFF_ABSOLUTE) { + if (pt->offset_data->offset_max < realoff || + pt->offset_data->offset_min > realoff) { + + ptN = ptN->next_same; + continue; + } + + } else { + if (mdata->offset[pt->offset_data->offset_min] == CLI_SIZE_NONE || + mdata->offset[pt->offset_data->offset_max] < realoff || + mdata->offset[pt->offset_data->offset_min] > realoff) { + + ptN = ptN->next_same; + continue; + } + } } } @@ -3064,7 +3107,7 @@ cl_error_t cli_ac_addsig(struct cli_matcher *root, const char *virname, const ch new->virname = virname_copy; } - ret = cli_caloff(offset, NULL, root->type, new->offdata, &new->offset_min, &new->offset_max); + ret = matcher_decode_offset_string(root, offset, root->type, &new->offset_data); if (ret != CL_SUCCESS) { MPOOL_FREE(root->mempool, new->prefix ? new->prefix : new->pattern); mpool_ac_free_special(root->mempool, new); @@ -3085,9 +3128,12 @@ cl_error_t cli_ac_addsig(struct cli_matcher *root, const char *virname, const ch return ret; } - if ((new->offdata[0] != CLI_OFF_ANY) && - (new->offdata[0] != CLI_OFF_ABSOLUTE) && - (new->offdata[0] != CLI_OFF_MACRO)) { + if (new->offset_data != NULL && + (new->offset_data->type != PATTERN_OFF_ABSOLUTE) && + (new->offset_data->type != PATTERN_OFF_MACRO)) { + /* + * This new pattern has a relative offset. + */ root->ac_reloff = (struct cli_ac_patt **)MPOOL_REALLOC2(root->mempool, root->ac_reloff, (root->ac_reloff_num + 1) * sizeof(struct cli_ac_patt *)); if (!root->ac_reloff) { @@ -3096,8 +3142,8 @@ cl_error_t cli_ac_addsig(struct cli_matcher *root, const char *virname, const ch } root->ac_reloff[root->ac_reloff_num] = new; - new->offset_min = root->ac_reloff_num * 2; - new->offset_max = new->offset_min + 1; + new->offset_data->offset_min = root->ac_reloff_num * 2; + new->offset_data->offset_max = new->offset_data->offset_min + 1; root->ac_reloff_num++; } diff --git a/libclamav/matcher-ac.h b/libclamav/matcher-ac.h index c8e74d05ee..89ba37bc53 100644 --- a/libclamav/matcher-ac.h +++ b/libclamav/matcher-ac.h @@ -28,6 +28,7 @@ #include "clamav-types.h" #include "fmap.h" #include "hashtab.h" +#include "matcher-offset.h" #define AC_CH_MAXDIST 32 #define ACPATT_ALTN_MAXNEST 15 @@ -99,9 +100,9 @@ struct cli_ac_patt { uint16_t ch_maxdist[2]; uint16_t parts, partno, special, special_pattern; struct cli_ac_special **special_table; + pattern_offset_data *offset_data; uint16_t rtype, type; - uint32_t offdata[4], offset_min, offset_max; - uint32_t boundary; + uint16_t boundary; uint8_t depth; uint8_t sigopts; }; diff --git a/libclamav/matcher-bm.c b/libclamav/matcher-bm.c index 3bd541e2d5..374f6f0036 100644 --- a/libclamav/matcher-bm.c +++ b/libclamav/matcher-bm.c @@ -52,15 +52,18 @@ cl_error_t cli_bm_addpatt(struct cli_matcher *root, struct cli_bm_patt *pattern, return CL_EMALFDB; } - if (CL_SUCCESS != (ret = cli_caloff(offset, NULL, root->type, pattern->offdata, &pattern->offset_min, &pattern->offset_max))) { + ret = matcher_decode_offset_string(root, offset, root->type, &pattern->offset_data); + if (CL_SUCCESS != ret) { cli_errmsg("cli_bm_addpatt: Can't calculate offset for signature %s\n", pattern->virname); return ret; } - if (pattern->offdata[0] != CLI_OFF_ANY) { - if (pattern->offdata[0] == CLI_OFF_ABSOLUTE) + + if (pattern->offset_data != NULL) { + if (pattern->offset_data->type == PATTERN_OFF_ABSOLUTE) { root->bm_absoff_num++; - else + } else { root->bm_reloff_num++; + } } /* bm_offmode doesn't use the prefilter for BM signatures anyway, so @@ -125,8 +128,12 @@ cl_error_t cli_bm_addpatt(struct cli_matcher *root, struct cli_bm_patt *pattern, return CL_EMEM; } root->bm_pattab[root->bm_patterns] = pattern; - if (pattern->offdata[0] != CLI_OFF_ABSOLUTE) - pattern->offset_min = root->bm_patterns; + + if ((pattern->offset_data != NULL) && + (pattern->offset_data->type != PATTERN_OFF_ABSOLUTE)) { + + pattern->offset_data->offset_min = root->bm_patterns; + } } root->bm_patterns++; @@ -180,22 +187,32 @@ cl_error_t cli_bm_initoff(const struct cli_matcher *root, struct cli_bm_off *dat } for (i = 0; i < root->bm_patterns; i++) { patt = root->bm_pattab[i]; - if (patt->offdata[0] == CLI_OFF_ABSOLUTE) { - data->offtab[data->cnt] = patt->offset_min + patt->prefix_length; - if (data->offtab[data->cnt] >= info->fsize) - continue; - data->cnt++; - } else if (CL_SUCCESS != (ret = cli_caloff(NULL, info, root->type, patt->offdata, &data->offset[patt->offset_min], NULL))) { - cli_errmsg("cli_bm_initoff: Can't calculate relative offset in signature for %s\n", patt->virname); - free(data->offtab); - free(data->offset); - return ret; - } else if ((data->offset[patt->offset_min] != CLI_OFF_NONE) && (data->offset[patt->offset_min] + patt->length <= info->fsize)) { - if (!data->cnt || (data->offset[patt->offset_min] + patt->prefix_length != data->offtab[data->cnt - 1])) { - data->offtab[data->cnt] = data->offset[patt->offset_min] + patt->prefix_length; + + if (patt->offset_data != NULL) { + if (patt->offset_data->type == PATTERN_OFF_ABSOLUTE) { + data->offtab[data->cnt] = patt->offset_data->offset_min + patt->prefix_length; if (data->offtab[data->cnt] >= info->fsize) continue; data->cnt++; + } else { + ret = matcher_calculate_relative_offsets(info, patt->offset_data, &data->offset[patt->offset_data->offset_min], NULL); + if (CL_SUCCESS != ret) { + cli_errmsg("cli_bm_initoff: Can't calculate relative offset in signature for %s\n", patt->virname); + free(data->offtab); + free(data->offset); + return ret; + } + + if ((data->offset[patt->offset_data->offset_min] != CLI_SIZE_NONE) && + (data->offset[patt->offset_data->offset_min] + patt->length <= info->fsize)) { + + if (!data->cnt || (data->offset[patt->offset_data->offset_min] + patt->prefix_length != data->offtab[data->cnt - 1])) { + data->offtab[data->cnt] = data->offset[patt->offset_data->offset_min] + patt->prefix_length; + if (data->offtab[data->cnt] >= info->fsize) + continue; + data->cnt++; + } + } } } } @@ -235,6 +252,8 @@ void cli_bm_free(struct cli_matcher *root) MPOOL_FREE(root->mempool, prev->pattern); if (prev->virname) MPOOL_FREE(root->mempool, prev->virname); + if (prev->offset_data) + MPOOL_FREE(root->mempool, prev->offset_data); MPOOL_FREE(root->mempool, prev); } } @@ -314,13 +333,14 @@ cl_error_t cli_bm_scanbuff(const unsigned char *buffer, uint32_t length, const c continue; } - if (offdata) { - if (p->offdata[0] == CLI_OFF_ABSOLUTE) { - if (p->offset_min != offset + off - p->prefix_length) { + if (offdata && p->offset_data != NULL) { + if (p->offset_data->type == PATTERN_OFF_ABSOLUTE) { + if (p->offset_data->offset_min != offset + off - p->prefix_length) { p = p->next; continue; } - } else if ((offdata->offset[p->offset_min] == CLI_OFF_NONE) || (offdata->offset[p->offset_min] != offset + off - p->prefix_length)) { + } else if ((offdata->offset[p->offset_data->offset_min] == CLI_SIZE_NONE) || + (offdata->offset[p->offset_data->offset_min] != offset + off - p->prefix_length)) { p = p->next; continue; } @@ -358,23 +378,29 @@ cl_error_t cli_bm_scanbuff(const unsigned char *buffer, uint32_t length, const c } if (found && p->length + p->prefix_length == j) { - if (!offdata && (p->offset_min != CLI_OFF_ANY)) { - if (p->offdata[0] != CLI_OFF_ABSOLUTE) { + if ((offdata == NULL) && + (p->offset_data != NULL) && + (p->offset_data->offset_min != CLI_SIZE_ANY)) { + + if (p->offset_data != NULL && p->offset_data->type != PATTERN_OFF_ABSOLUTE) { if (!info) { p = p->next; continue; } - ret = cli_caloff(NULL, info, root->type, p->offdata, &off_min, &off_max); + + ret = matcher_calculate_relative_offsets(info, p->offset_data, &off_min, &off_max); if (ret != CL_SUCCESS) { cli_errmsg("cli_bm_scanbuff: Can't calculate relative offset in signature for %s\n", p->virname); return ret; } } else { - off_min = p->offset_min; - off_max = p->offset_max; + off_min = p->offset_data->offset_min; + off_max = p->offset_data->offset_max; } + off = offset + i - p->prefix_length - BM_MIN_LENGTH + BM_BLOCK_SIZE; - if (off_min == CLI_OFF_NONE || off_max < off || off_min > off) { + + if (off_min == CLI_SIZE_NONE || off_max < off || off_min > off) { p = p->next; continue; } diff --git a/libclamav/matcher-bm.h b/libclamav/matcher-bm.h index e94594bcc5..a7a2f1f31f 100644 --- a/libclamav/matcher-bm.h +++ b/libclamav/matcher-bm.h @@ -27,18 +27,20 @@ #include "clamav-types.h" #include "fmap.h" #include "others.h" +#include "matcher-offset.h" #define BM_BOUNDARY_EOL 1 struct cli_bm_patt { unsigned char *pattern, *prefix; char *virname; - uint32_t offdata[4], offset_min, offset_max; + pattern_offset_data *offset_data; struct cli_bm_patt *next; uint16_t length, prefix_length; uint16_t cnt; unsigned char pattern0; - uint32_t boundary, filesize; + uint8_t boundary; + uint32_t filesize; }; struct cli_bm_off { diff --git a/libclamav/matcher-byte-comp.c b/libclamav/matcher-byte-comp.c index d3cf69b24a..60d0545073 100644 --- a/libclamav/matcher-byte-comp.c +++ b/libclamav/matcher-byte-comp.c @@ -510,7 +510,7 @@ cl_error_t cli_bcomp_scanbuf(const unsigned char *buffer, size_t buffer_length, } /* no offset available, make a best effort */ - if (offset == CLI_OFF_NONE) { + if (offset == CLI_SIZE_NONE) { offset = 0; } diff --git a/libclamav/matcher-offset.h b/libclamav/matcher-offset.h new file mode 100644 index 0000000000..c202e50678 --- /dev/null +++ b/libclamav/matcher-offset.h @@ -0,0 +1,65 @@ +/* + * Copyright (C) 2022 Cisco Systems, Inc. and/or its affiliates. All rights reserved. + * + * Authors: Micah Snyder + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301, USA. + */ + +#ifndef __MATCHER_OFFSET_H +#define __MATCHER_OFFSET_H + +#include "clamav.h" + +/* + * These are a magic numbers used for a variety of signature size fields. + * The values are selected to be large, uncommon, and must fit within a size_t. + */ +#define CLI_SIZE_ANY 0xffffffff +#define CLI_SIZE_NONE 0xfffffffe + +enum pattern_off_type { + PATTERN_OFF_ABSOLUTE = 1, + PATTERN_OFF_EOF_MINUS = 2, + PATTERN_OFF_EP_PLUS = 3, + PATTERN_OFF_EP_MINUS = 4, + PATTERN_OFF_SL_PLUS = 5, + PATTERN_OFF_SX_PLUS = 6, + PATTERN_OFF_VERSION = 7, + PATTERN_OFF_MACRO = 8, + PATTERN_OFF_SE = 9, + PATTERN_OFF_NONE = 0xfe +}; + +typedef struct pattern_offset_data { + /* Offset from start of offset_min. */ + uint32_t offset_value; + + /* Amount the match may shift from the start of the offset. Uncommonly specified. */ + uint32_t max_shift; + + /* lower boundary for matching within a specific section. */ + uint32_t offset_min; + /* upper boundary for matching within a specific section. */ + uint32_t offset_max; + + /* section offsets are unknown until scanning a specific file, so the offsets are recalculated using the requested section at that time. */ + uint16_t section_number; + + /* pattern_off_type enum stored as uint8_t to conserve memory */ + uint8_t type; +} pattern_offset_data; + +#endif diff --git a/libclamav/matcher-pcre.c b/libclamav/matcher-pcre.c index a0da207e7c..f60683f4cd 100644 --- a/libclamav/matcher-pcre.c +++ b/libclamav/matcher-pcre.c @@ -288,18 +288,20 @@ cl_error_t cli_pcre_addpatt(struct cli_matcher *root, const char *virname, const /* offset parsing and usage, similar to cli_ac_addsig */ /* relative and type-specific offsets handled during scan */ - ret = cli_caloff(offset, NULL, root->type, pm->offdata, &(pm->offset_min), &(pm->offset_max)); + ret = matcher_decode_offset_string(root, offset, root->type, &pm->offset_data); if (ret != CL_SUCCESS) { cli_errmsg("cli_pcre_addpatt: cannot calculate offset data: %s for pattern: %s\n", offset, pattern); cli_pcre_freemeta(root, pm); MPOOL_FREE(root->mempool, pm); return ret; } - if (pm->offdata[0] != CLI_OFF_ANY) { - if (pm->offdata[0] == CLI_OFF_ABSOLUTE) + + if (pm->offset_data != NULL) { + if (pm->offset_data->type == PATTERN_OFF_ABSOLUTE) { root->pcre_absoff_num++; - else + } else { root->pcre_reloff_num++; + } } /* parse and add options, also totally not from snort */ @@ -483,8 +485,8 @@ cl_error_t cli_pcre_recaloff(struct cli_matcher *root, struct cli_pcre_off *data return CL_EMEM; } - pm_dbgmsg("CLI_OFF_NONE: %u\n", CLI_OFF_NONE); - pm_dbgmsg("CLI_OFF_ANY: %u\n", CLI_OFF_ANY); + pm_dbgmsg("CLI_SIZE_NONE: %u\n", CLI_SIZE_NONE); + pm_dbgmsg("CLI_SIZE_ANY: %u\n", CLI_SIZE_ANY); /* iterate across all pcre metadata and recalc offsets */ for (i = 0; i < root->pcre_metas; ++i) { @@ -492,39 +494,40 @@ cl_error_t cli_pcre_recaloff(struct cli_matcher *root, struct cli_pcre_off *data /* skip broken pcres, not getting executed anyways */ if (pm->flags & CLI_PCRE_DISABLED) { - data->offset[i] = CLI_OFF_NONE; + data->offset[i] = CLI_SIZE_NONE; data->shift[i] = 0; continue; } - if (pm->offdata[0] == CLI_OFF_ANY) { - data->offset[i] = CLI_OFF_ANY; + if (pm->offset_data == NULL) { + /* No offset specified. Pattern may match anywhere in buffer. */ + data->offset[i] = CLI_SIZE_ANY; data->shift[i] = 0; - } else if (pm->offdata[0] == CLI_OFF_NONE) { - data->offset[i] = CLI_OFF_NONE; + } else if (pm->offset_data->type == PATTERN_OFF_NONE) { + data->offset[i] = CLI_SIZE_NONE; data->shift[i] = 0; - } else if (pm->offdata[0] == CLI_OFF_ABSOLUTE) { - data->offset[i] = pm->offdata[1]; - data->shift[i] = pm->offdata[2]; + } else if (pm->offset_data->type == PATTERN_OFF_ABSOLUTE) { + data->offset[i] = pm->offset_data->offset_value; + data->shift[i] = pm->offset_data->max_shift; } else { - ret = cli_caloff(NULL, info, root->type, pm->offdata, &data->offset[i], &endoff); + ret = matcher_calculate_relative_offsets(info, pm->offset_data, &data->offset[i], &endoff); if (ret != CL_SUCCESS) { cli_errmsg("cli_pcre_recaloff: cannot recalculate relative offset for signature\n"); free(data->shift); free(data->offset); return ret; } - /* CLI_OFF_NONE gets passed down, CLI_OFF_ANY gets reinterpreted */ - /* TODO - CLI_OFF_VERSION is interpreted as CLI_OFF_ANY(?) */ - if (data->offset[i] == CLI_OFF_ANY) { - data->offset[i] = CLI_OFF_ANY; + /* CLI_SIZE_NONE gets passed down, CLI_SIZE_ANY gets reinterpreted */ + /* TODO - PATTERN_OFF_VERSION is interpreted as CLI_SIZE_ANY(?) */ + if (data->offset[i] == CLI_SIZE_ANY) { + data->offset[i] = CLI_SIZE_ANY; data->shift[i] = 0; } else { data->shift[i] = endoff - (data->offset[i]); } } - pm_dbgmsg("%u: %u %u->%u(+%u)\n", i, pm->offdata[0], data->offset[i], + pm_dbgmsg("%u: %u %u->%u(+%u)\n", i, pm->offset_data->type, data->offset[i], data->offset[i] + data->shift[i], data->shift[i]); } @@ -547,17 +550,18 @@ int cli_pcre_qoff(struct cli_pcre_meta *pm, uint32_t length, uint32_t *adjbuffer return CL_ENULLARG; /* default to scanning whole buffer but try to use existing offdata */ - if (pm->offdata[0] == CLI_OFF_NONE) { - return CL_BREAK; - } else if (pm->offdata[0] == CLI_OFF_ANY) { - *adjbuffer = CLI_OFF_ANY; + if (pm->offset_data == NULL) { + /* No offset specified. Pattern may match anywhere in buffer. */ + *adjbuffer = CLI_SIZE_ANY; *adjshift = 0; - } else if (pm->offdata[0] == CLI_OFF_ABSOLUTE) { - *adjbuffer = pm->offdata[1]; - *adjshift = pm->offdata[2]; - } else if (pm->offdata[0] == CLI_OFF_EOF_MINUS) { - *adjbuffer = length - pm->offdata[1]; - *adjshift = pm->offdata[2]; + } else if (pm->offset_data->type == PATTERN_OFF_NONE) { + return CL_BREAK; + } else if (pm->offset_data->type == PATTERN_OFF_ABSOLUTE) { + *adjbuffer = pm->offset_data->offset_value; + *adjshift = pm->offset_data->max_shift; + } else if (pm->offset_data->type == PATTERN_OFF_EOF_MINUS) { + *adjbuffer = length - pm->offset_data->offset_value; + *adjshift = pm->offset_data->max_shift; } else { /* all relative offsets */ /* TODO - check if relative offsets apply for normal hex substrs */ @@ -600,9 +604,9 @@ cl_error_t cli_pcre_scanbuf(const unsigned char *buffer, uint32_t length, const continue; } - /* skip checking and running CLI_OFF_NONE pcres */ - if (data && data->offset[i] == CLI_OFF_NONE) { - pm_dbgmsg("cli_pcre_scanbuf: skipping CLI_OFF_NONE regex /%s/\n", pd->expression); + /* skip checking and running CLI_SIZE_NONE pcres */ + if (data && data->offset[i] == CLI_SIZE_NONE) { + pm_dbgmsg("cli_pcre_scanbuf: skipping CLI_SIZE_NONE regex /%s/\n", pd->expression); continue; } @@ -637,7 +641,7 @@ cl_error_t cli_pcre_scanbuf(const unsigned char *buffer, uint32_t length, const } /* check for need to anchoring */ - if (!rolling && !adjshift && (adjbuffer != CLI_OFF_ANY)) + if (!rolling && !adjshift && (adjbuffer != CLI_SIZE_ANY)) #if USING_PCRE2 options |= PCRE2_ANCHORED; #else @@ -646,13 +650,13 @@ cl_error_t cli_pcre_scanbuf(const unsigned char *buffer, uint32_t length, const else options = 0; - if (adjbuffer == CLI_OFF_ANY) + if (adjbuffer == CLI_SIZE_ANY) adjbuffer = 0; /* check the offset bounds */ if (adjbuffer < length) { /* handle encompass flag */ - if (encompass && adjshift != 0 && adjshift != CLI_OFF_NONE) { + if (encompass && adjshift != 0 && adjshift != CLI_SIZE_NONE) { if (adjbuffer + adjshift > length) adjlength = length - adjbuffer; else @@ -790,6 +794,11 @@ void cli_pcre_freemeta(struct cli_matcher *root, struct cli_pcre_meta *pm) pm->statname = NULL; } + if (pm->offset_data) { + MPOOL_FREE(root->mempool, pm->offset_data); + pm->offset_data = NULL; + } + cli_pcre_free_single(&(pm->pdata)); } diff --git a/libclamav/matcher-pcre.h b/libclamav/matcher-pcre.h index fb2a5aab60..ca7cbc3f7e 100644 --- a/libclamav/matcher-pcre.h +++ b/libclamav/matcher-pcre.h @@ -57,8 +57,7 @@ struct cli_pcre_meta { uint32_t lsigid[3]; /* 0=valid, 1=lsigid, 2=subsigid */ struct cli_pcre_data pdata; /* clamav offset data */ - uint32_t offdata[4]; - uint32_t offset_min, offset_max; + pattern_offset_data *offset_data; /* internal flags (bitfield?) */ uint32_t flags; /* performance tracking */ diff --git a/libclamav/matcher.c b/libclamav/matcher.c index 3bee45f8e2..46b49e5089 100644 --- a/libclamav/matcher.c +++ b/libclamav/matcher.c @@ -340,184 +340,213 @@ cl_error_t cli_scan_buff(const unsigned char *buffer, uint32_t length, uint32_t return ret; } -/* - * offdata[0]: type - * offdata[1]: offset value - * offdata[2]: max shift - * offdata[3]: section number - */ -cl_error_t cli_caloff(const char *offstr, const struct cli_target_info *info, cli_target_t target, uint32_t *offdata, uint32_t *offset_min, uint32_t *offset_max) +cl_error_t matcher_decode_offset_string(mpool_t *mempool, const char *offstr, cli_target_t target, pattern_offset_data **offset_data) { char offcpy[65] = {0}; unsigned int n = 0, val = 0; char *pt = NULL; - if (!info) { /* decode offset string */ - if (!offstr) { - cli_errmsg("cli_caloff: offstr == NULL\n"); - return CL_ENULLARG; - } +#if !USE_MPOOL + UNUSEDPARAM(mempool); +#endif - if (!strcmp(offstr, "*")) { - offdata[0] = *offset_max = *offset_min = CLI_OFF_ANY; - return CL_SUCCESS; - } + if (NULL == offset_data) { + return CL_EARG; + } - if (strlen(offstr) > 64) { - cli_errmsg("cli_caloff: Offset string too long\n"); + if (!offstr) { + cli_errmsg("matcher_decode_offset_string: offstr == NULL\n"); + return CL_ENULLARG; + } + + if (!strcmp(offstr, "*")) { + return CL_SUCCESS; + } + + *offset_data = MPOOL_CALLOC(mempool, 1, sizeof(pattern_offset_data)); + if (NULL == offset_data) { + cli_errmsg("Failed to allocate memory for pattern_offset_data\n"); + return CL_EMEM; + } + + if (strlen(offstr) > 64) { + cli_errmsg("matcher_decode_offset_string: Offset string too long\n"); + return CL_EMALFDB; + } + strcpy(offcpy, offstr); + + if ((pt = strchr(offcpy, ','))) { + if (!cli_isnumber(pt + 1)) { + cli_errmsg("matcher_decode_offset_string: Invalid offset shift value\n"); return CL_EMALFDB; } - strcpy(offcpy, offstr); + (*offset_data)->max_shift = atoi(pt + 1); + *pt = 0; + } else { + (*offset_data)->max_shift = 0; + } - if ((pt = strchr(offcpy, ','))) { - if (!cli_isnumber(pt + 1)) { - cli_errmsg("cli_caloff: Invalid offset shift value\n"); - return CL_EMALFDB; - } - offdata[2] = atoi(pt + 1); - *pt = 0; - } else { - offdata[2] = 0; - } + (*offset_data)->offset_max = (*offset_data)->offset_min = CLI_SIZE_NONE; - *offset_max = *offset_min = CLI_OFF_NONE; + if (!strncmp(offcpy, "EP+", 3) || !strncmp(offcpy, "EP-", 3)) { + if (offcpy[2] == '+') + (*offset_data)->type = PATTERN_OFF_EP_PLUS; + else + (*offset_data)->type = PATTERN_OFF_EP_MINUS; - if (!strncmp(offcpy, "EP+", 3) || !strncmp(offcpy, "EP-", 3)) { - if (offcpy[2] == '+') - offdata[0] = CLI_OFF_EP_PLUS; - else - offdata[0] = CLI_OFF_EP_MINUS; + if (!cli_isnumber(&offcpy[3])) { + cli_errmsg("matcher_decode_offset_string: Invalid offset value\n"); + return CL_EMALFDB; + } + (*offset_data)->offset_value = atoi(&offcpy[3]); - if (!cli_isnumber(&offcpy[3])) { - cli_errmsg("cli_caloff: Invalid offset value\n"); + } else if (offcpy[0] == 'S') { + if (offcpy[1] == 'E') { + if (!cli_isnumber(&offcpy[2])) { + cli_errmsg("matcher_decode_offset_string: Invalid section number\n"); return CL_EMALFDB; } - offdata[1] = atoi(&offcpy[3]); - - } else if (offcpy[0] == 'S') { - if (offcpy[1] == 'E') { - if (!cli_isnumber(&offcpy[2])) { - cli_errmsg("cli_caloff: Invalid section number\n"); - return CL_EMALFDB; - } - offdata[0] = CLI_OFF_SE; - offdata[3] = atoi(&offcpy[2]); - - } else if (!strncmp(offstr, "SL+", 3)) { - offdata[0] = CLI_OFF_SL_PLUS; - if (!cli_isnumber(&offcpy[3])) { - cli_errmsg("cli_caloff: Invalid offset value\n"); - return CL_EMALFDB; - } - offdata[1] = atoi(&offcpy[3]); + (*offset_data)->type = PATTERN_OFF_SE; + (*offset_data)->section_number = atoi(&offcpy[2]); - } else if (sscanf(offcpy, "S%u+%u", &n, &val) == 2) { - offdata[0] = CLI_OFF_SX_PLUS; - offdata[1] = val; - offdata[3] = n; - } else { - cli_errmsg("cli_caloff: Invalid offset string\n"); + } else if (!strncmp(offstr, "SL+", 3)) { + (*offset_data)->type = PATTERN_OFF_SL_PLUS; + if (!cli_isnumber(&offcpy[3])) { + cli_errmsg("matcher_decode_offset_string: Invalid offset value\n"); return CL_EMALFDB; } + (*offset_data)->offset_value = atoi(&offcpy[3]); - } else if (!strncmp(offcpy, "EOF-", 4)) { - offdata[0] = CLI_OFF_EOF_MINUS; - if (!cli_isnumber(&offcpy[4])) { - cli_errmsg("cli_caloff: Invalid offset value\n"); - return CL_EMALFDB; - } - offdata[1] = atoi(&offcpy[4]); - } else if (!strncmp(offcpy, "VI", 2)) { - /* versioninfo */ - offdata[0] = CLI_OFF_VERSION; - } else if (strchr(offcpy, '$')) { - if (sscanf(offcpy, "$%u$", &n) != 1) { - cli_errmsg("cli_caloff: Invalid macro($) in offset: %s\n", offcpy); - return CL_EMALFDB; - } - if (n >= 32) { - cli_errmsg("cli_caloff: at most 32 macro groups supported\n"); - return CL_EMALFDB; - } - offdata[0] = CLI_OFF_MACRO; - offdata[1] = n; + } else if (sscanf(offcpy, "S%u+%u", &n, &val) == 2) { + (*offset_data)->type = PATTERN_OFF_SX_PLUS; + (*offset_data)->offset_value = val; + (*offset_data)->section_number = n; } else { - offdata[0] = CLI_OFF_ABSOLUTE; - if (!cli_isnumber(offcpy)) { - cli_errmsg("cli_caloff: Invalid offset value\n"); - return CL_EMALFDB; - } - *offset_min = offdata[1] = atoi(offcpy); - *offset_max = *offset_min + offdata[2]; + cli_errmsg("matcher_decode_offset_string: Invalid offset string\n"); + return CL_EMALFDB; } - if (offdata[0] != CLI_OFF_ANY && offdata[0] != CLI_OFF_ABSOLUTE && - offdata[0] != CLI_OFF_EOF_MINUS && offdata[0] != CLI_OFF_MACRO) { - if (target != TARGET_PE && target != TARGET_ELF && target != TARGET_MACHO) { - cli_errmsg("cli_caloff: Invalid offset type for target %u\n", target); - return CL_EMALFDB; - } + } else if (!strncmp(offcpy, "EOF-", 4)) { + (*offset_data)->type = PATTERN_OFF_EOF_MINUS; + if (!cli_isnumber(&offcpy[4])) { + cli_errmsg("matcher_decode_offset_string: Invalid offset value\n"); + return CL_EMALFDB; + } + (*offset_data)->offset_value = atoi(&offcpy[4]); + + } else if (!strncmp(offcpy, "VI", 2)) { + /* versioninfo */ + (*offset_data)->type = PATTERN_OFF_VERSION; + + } else if (strchr(offcpy, '$')) { + if (sscanf(offcpy, "$%u$", &n) != 1) { + cli_errmsg("matcher_decode_offset_string: Invalid macro($) in offset: %s\n", offcpy); + return CL_EMALFDB; + } + if (n >= 32) { + cli_errmsg("matcher_decode_offset_string: at most 32 macro groups supported\n"); + return CL_EMALFDB; } + (*offset_data)->type = PATTERN_OFF_MACRO; + (*offset_data)->offset_value = n; } else { - /* calculate relative offsets */ - *offset_min = CLI_OFF_NONE; - if (offset_max) - *offset_max = CLI_OFF_NONE; - if (info->status == -1) { - // If the executable headers weren't parsed successfully then we - // can't process any ndb/ldb EOF-n/EP+n/EP-n/Sx+n/SEx/SL+n subsigs - return CL_SUCCESS; + (*offset_data)->type = PATTERN_OFF_ABSOLUTE; + if (!cli_isnumber(offcpy)) { + cli_errmsg("matcher_decode_offset_string: Invalid offset value\n"); + return CL_EMALFDB; } - switch (offdata[0]) { - case CLI_OFF_EOF_MINUS: - *offset_min = info->fsize - offdata[1]; - break; - - case CLI_OFF_EP_PLUS: - *offset_min = info->exeinfo.ep + offdata[1]; - break; - - case CLI_OFF_EP_MINUS: - *offset_min = info->exeinfo.ep - offdata[1]; - break; - - case CLI_OFF_SL_PLUS: - *offset_min = info->exeinfo.sections[info->exeinfo.nsections - 1].raw + offdata[1]; - break; - - case CLI_OFF_SX_PLUS: - if (offdata[3] >= info->exeinfo.nsections) - *offset_min = CLI_OFF_NONE; - else - *offset_min = info->exeinfo.sections[offdata[3]].raw + offdata[1]; - break; - - case CLI_OFF_SE: - if (offdata[3] >= info->exeinfo.nsections) { - *offset_min = CLI_OFF_NONE; - } else { - *offset_min = info->exeinfo.sections[offdata[3]].raw; - if (offset_max) - *offset_max = *offset_min + info->exeinfo.sections[offdata[3]].rsz + offdata[2]; - // TODO offdata[2] == MaxShift. Won't this make offset_max - // extend beyond the end of the section? This doesn't seem like - // what we want... - } - break; - - case CLI_OFF_VERSION: - if (offset_max) - *offset_min = *offset_max = CLI_OFF_ANY; - break; - default: - cli_errmsg("cli_caloff: Not a relative offset (type: %u)\n", offdata[0]); - return CL_EARG; + (*offset_data)->offset_min = (*offset_data)->offset_value = atoi(offcpy); + + (*offset_data)->offset_max = (*offset_data)->offset_min + (*offset_data)->max_shift; + } + + if ((*offset_data)->type != PATTERN_OFF_ABSOLUTE && + (*offset_data)->type != PATTERN_OFF_EOF_MINUS && + (*offset_data)->type != PATTERN_OFF_MACRO) { + /* The other offset types only work for PE, ELF and Mach-O files */ + + if (target != TARGET_PE && target != TARGET_ELF && target != TARGET_MACHO) { + cli_errmsg("matcher_decode_offset_string: Invalid offset type for target %u\n", target); + return CL_EMALFDB; } + } + + return CL_SUCCESS; +} + +cl_error_t matcher_calculate_relative_offsets(const struct cli_target_info *info, pattern_offset_data *offset_data, uint32_t *offset_min, uint32_t *offset_max) +{ + if (NULL == offset_data) { + return CL_EARG; + } + + /* calculate relative offsets */ + *offset_min = CLI_SIZE_NONE; + if (offset_max != NULL) { + *offset_max = CLI_SIZE_NONE; + } + + if (info->status == -1) { + // If the executable headers weren't parsed successfully then we + // can't process any ndb/ldb EOF-n/EP+n/EP-n/Sx+n/SEx/SL+n subsigs + return CL_SUCCESS; + } + + switch (offset_data->type) { + case PATTERN_OFF_EOF_MINUS: + *offset_min = info->fsize - offset_data->offset_value; + break; + + case PATTERN_OFF_EP_PLUS: + *offset_min = info->exeinfo.ep + offset_data->offset_value; + break; + + case PATTERN_OFF_EP_MINUS: + *offset_min = info->exeinfo.ep - offset_data->offset_value; + break; + + case PATTERN_OFF_SL_PLUS: + *offset_min = info->exeinfo.sections[info->exeinfo.nsections - 1].raw + offset_data->offset_value; + break; + + case PATTERN_OFF_SX_PLUS: + if (offset_data->section_number >= info->exeinfo.nsections) + *offset_min = CLI_SIZE_NONE; + else + *offset_min = info->exeinfo.sections[offset_data->section_number].raw + offset_data->offset_value; + break; + + case PATTERN_OFF_SE: + if (offset_data->section_number >= info->exeinfo.nsections) { + *offset_min = CLI_SIZE_NONE; + } else { + *offset_min = info->exeinfo.sections[offset_data->section_number].raw; + if (offset_max != NULL) { + *offset_max = *offset_min + info->exeinfo.sections[offset_data->section_number].rsz + offset_data->max_shift; + } + // TODO offset_data->max_shift == MaxShift. Won't this make offset_max + // extend beyond the end of the section? This doesn't seem like + // what we want... + } + break; + + case PATTERN_OFF_VERSION: + if (offset_max != NULL) { + *offset_min = *offset_max = CLI_SIZE_ANY; + } + break; + default: + cli_errmsg("matcher_calculate_relative_offsets: Not a relative offset (type: %u)\n", offset_data->type); + return CL_EARG; + } + + if ((offset_max != NULL) && + (offset_data->offset_max == CLI_SIZE_NONE) && + (offset_data->offset_min != CLI_SIZE_NONE)) { - if (offset_max && *offset_max == CLI_OFF_NONE && *offset_min != CLI_OFF_NONE) - *offset_max = *offset_min + offdata[2]; + offset_data->offset_max = offset_data->offset_min + offset_data->max_shift; } return CL_SUCCESS; @@ -1413,7 +1442,7 @@ cl_error_t cli_scan_fmap(cli_ctx *ctx, cli_file_t ftype, bool filetype_only, str } #define CDBRANGE(field, val) \ - if (field[0] != CLI_OFF_ANY) { \ + if (field[0] != CLI_SIZE_ANY) { \ if (field[0] == field[1] && field[0] != val) \ continue; \ else if (field[0] != field[1] && ((field[0] && field[0] > val) || \ diff --git a/libclamav/matcher.h b/libclamav/matcher.h index 9b19a1626f..1bce493b59 100644 --- a/libclamav/matcher.h +++ b/libclamav/matcher.h @@ -182,18 +182,17 @@ struct cli_matcher { }; struct cli_cdb { - char *virname; /* virus name */ - cli_file_t ctype; /* container type */ - regex_t name; /* filename regex */ - size_t csize[2]; /* container size (min, max); if csize[0] != csize[1] - * then value of 0 makes the field ignored - */ - size_t fsizec[2]; /* file size in container */ - size_t fsizer[2]; /* real file size */ - int encrypted; /* file is encrypted; 2 == ignore */ - unsigned int filepos[2]; /* file position in container */ - int res1; /* reserved / format specific */ - void *res2; /* reserved / format specific */ + char *virname; /* virus name */ + cli_file_t ctype; /* container type */ + regex_t name; /* filename regex */ + size_t csize[2]; /* container size (min, max); if csize[0] != csize[1] + * then value of 0 makes the field ignored */ + size_t fsizec[2]; /* file size in container */ + size_t fsizer[2]; /* real file size */ + int encrypted; /* file is encrypted; 2 == ignore */ + size_t filepos[2]; /* file position in container */ + int res1; /* reserved / format specific */ + void *res2; /* reserved / format specific */ struct cli_cdb *next; }; @@ -245,22 +244,6 @@ static const struct cli_mtarget cli_mtargets[CLI_MTARGETS] = { {{CL_TYPE_INTERNAL, 0, 0, 0, 0, 0, 0, 0, 0, 0}, "INTERNAL", TARGET_INTERNAL, 1, 0, 1}, {{CL_TYPE_OTHER, 0, 0, 0, 0, 0, 0, 0, 0, 0}, "OTHER", TARGET_OTHER, 1, 0, 1}}; -// clang-format off - -#define CLI_OFF_ANY 0xffffffff -#define CLI_OFF_NONE 0xfffffffe -#define CLI_OFF_ABSOLUTE 1 -#define CLI_OFF_EOF_MINUS 2 -#define CLI_OFF_EP_PLUS 3 -#define CLI_OFF_EP_MINUS 4 -#define CLI_OFF_SL_PLUS 5 -#define CLI_OFF_SX_PLUS 6 -#define CLI_OFF_VERSION 7 -#define CLI_OFF_MACRO 8 -#define CLI_OFF_SE 9 - -// clang-format on - /** * @brief Non-magic scan matching using a file buffer for input. Older API * @@ -357,7 +340,27 @@ cl_error_t cli_scan_fmap(cli_ctx *ctx, cli_file_t ftype, bool filetype_only, str */ cl_error_t cli_exp_eval(cli_ctx *ctx, struct cli_matcher *root, struct cli_ac_data *acdata, struct cli_target_info *target_info, const char *hash); -cl_error_t cli_caloff(const char *offstr, const struct cli_target_info *info, unsigned int target, uint32_t *offdata, uint32_t *offset_min, uint32_t *offset_max); +/** + * @brief Decode an offset string into the component offset, max_shift, etc. + * + * @param mempool May be used to allocate a new offset_data structure. + * @param offstr The offset string + * @param target The target type for the signature. May be used to reject invalid offset options. + * @param[out] offset_data Offset_data structure to fill out. May be allocated. + * @return cl_error_t + */ +cl_error_t matcher_decode_offset_string(mpool_t *mempool, const char *offstr, cli_target_t target, pattern_offset_data **offset_data); + +/** + * @brief Calculate relative offsets for the given scan target scan target. + * + * @param info The target info structure to fill out. + * @param offset_data The offset data structure to use for calculating offsets. + * @param[out] offset_min The minimum offset to match. + * @param[out] offset_max (optional) The maximum offsets for the given offset data. + * @return cl_error_t + */ +cl_error_t matcher_calculate_relative_offsets(const struct cli_target_info *info, pattern_offset_data *offset_data, uint32_t *offset_min, uint32_t *offset_max); /** * @brief Determine if an alert is a known false positive, using each fmap in the the ctx->container stack to check MD5, SHA1, and SHA256 hashes. diff --git a/libclamav/readdb.c b/libclamav/readdb.c index 8b6108c0ba..fd1b7735ce 100644 --- a/libclamav/readdb.c +++ b/libclamav/readdb.c @@ -366,7 +366,7 @@ static cl_error_t readdb_load_regex_subsignature(struct cli_matcher *root, const if (subtokens_count == 2) { // Offset was specified offset = subtokens[0]; - sig = subtokens[1]; + sig = subtokens[1]; } else { sig = subtokens[0]; } @@ -3038,15 +3038,15 @@ static int cli_loadmd(FILE *fs, struct cl_engine *engine, unsigned int *signo, i ret = CL_EMEM; break; } - new->csize[0] = new->csize[1] = CLI_OFF_ANY; + new->csize[0] = new->csize[1] = CLI_SIZE_ANY; if (!strcmp(tokens[3], "*")) - new->fsizer[0] = new->fsizer[1] = CLI_OFF_ANY; + new->fsizer[0] = new->fsizer[1] = CLI_SIZE_ANY; else new->fsizer[0] = new->fsizer[1] = atoi(tokens[3]); if (!strcmp(tokens[4], "*")) - new->fsizec[0] = new->fsizec[1] = CLI_OFF_ANY; + new->fsizec[0] = new->fsizec[1] = CLI_SIZE_ANY; else new->fsizec[0] = new->fsizec[1] = atoi(tokens[4]); @@ -3064,7 +3064,7 @@ static int cli_loadmd(FILE *fs, struct cl_engine *engine, unsigned int *signo, i /* tokens[6] - not used */ - new->filepos[0] = new->filepos[1] = strcmp(tokens[7], "*") ? (unsigned int)atoi(tokens[7]) : (unsigned int)CLI_OFF_ANY; + new->filepos[0] = new->filepos[1] = strcmp(tokens[7], "*") ? (size_t)atoi(tokens[7]) : (size_t)CLI_SIZE_ANY; /* tokens[8] - not used */ @@ -3214,7 +3214,7 @@ static int cli_loadcdb(FILE *fs, struct cl_engine *engine, unsigned int *signo, break; \ } \ } else { \ - dest[0] = dest[1] = CLI_OFF_ANY; \ + dest[0] = dest[1] = CLI_SIZE_ANY; \ } CDBRANGE(tokens[2], new->csize); diff --git a/libclamav/regex_list.c b/libclamav/regex_list.c index df689134e8..ae6888d708 100644 --- a/libclamav/regex_list.c +++ b/libclamav/regex_list.c @@ -689,15 +689,15 @@ static cl_error_t add_newsuffix(struct regex_matcher *matcher, struct regex_list goto done; } - new->rtype = 0; - new->type = 0; - new->sigid = 0; - new->parts = 0; - new->partno = 0; - new->mindist = 0; - new->maxdist = 0; - new->offset_min = CLI_OFF_ANY; - new->length[0] = (uint16_t)len; + new->rtype = 0; + new->type = 0; + new->sigid = 0; + new->parts = 0; + new->partno = 0; + new->mindist = 0; + new->maxdist = 0; + new->offset_data = NULL; + new->length[0] = (uint16_t)len; new->ch[0] = new->ch[1] |= CLI_MATCH_IGNORE; if (new->length[0] > root->maxpatlen) diff --git a/libclamav/unzip.c b/libclamav/unzip.c index f8dc7505e1..0aab892e49 100644 --- a/libclamav/unzip.c +++ b/libclamav/unzip.c @@ -812,15 +812,18 @@ parse_central_directory_file_header( *ret = CL_EPARSE; - if (!(central_header = fmap_need_off(map, coff, SIZEOF_CENTRAL_HEADER)) || CENTRAL_HEADER_magic != ZIP_MAGIC_CENTRAL_DIRECTORY_RECORD_BEGIN) { - if (central_header) { - fmap_unneed_ptr(map, central_header, SIZEOF_CENTRAL_HEADER); - central_header = NULL; - } - cli_dbgmsg("cli_unzip: central header - wrkcomplete\n"); + if (NULL == (central_header = fmap_need_off(map, coff, SIZEOF_CENTRAL_HEADER))) { + cli_dbgmsg("cli_unzip: failed to get pointer for central header from fmap\n"); + last = 1; + goto done; + } + + if (CENTRAL_HEADER_magic != ZIP_MAGIC_CENTRAL_DIRECTORY_RECORD_BEGIN) { + cli_dbgmsg("cli_unzip: alleged central directory header does not start with expected magic bytes\n"); last = 1; goto done; } + coff += SIZEOF_CENTRAL_HEADER; cli_dbgmsg("cli_unzip: central header - flags %x - method %x - csize %x - usize %x - flen %x - elen %x - clen %x - disk %x - off %x\n", diff --git a/libclamav/yara_exec.c b/libclamav/yara_exec.c index ab434b071b..3e40faff06 100644 --- a/libclamav/yara_exec.c +++ b/libclamav/yara_exec.c @@ -575,7 +575,7 @@ int yr_execute_code( #if REAL_YARA push(string->matches[tidx].tail != NULL ? 1 : 0); #else - push(acdata->lsigsuboff_first[aclsig->id][string->subsig_id] != CLI_OFF_NONE ? 1 : 0); + push(acdata->lsigsuboff_first[aclsig->id][string->subsig_id] != CLI_SIZE_NONE ? 1 : 0); #endif break; @@ -764,7 +764,7 @@ int yr_execute_code( { string = UINT64_TO_PTR(YR_STRING*, r1); lsig_id = string->subsig_id; - if (acdata->lsigsuboff_first[aclsig->id][lsig_id] != CLI_OFF_NONE) + if (acdata->lsigsuboff_first[aclsig->id][lsig_id] != CLI_SIZE_NONE) found++; count++; pop(r1); diff --git a/libclamav_rust/src/sys.rs b/libclamav_rust/src/sys.rs index 804ac6cc45..0fc097794a 100644 --- a/libclamav_rust/src/sys.rs +++ b/libclamav_rust/src/sys.rs @@ -989,6 +989,16 @@ pub struct cli_hashset { } #[repr(C)] #[derive(Debug, Copy, Clone)] +pub struct pattern_offset_data { + pub offset_value: u32, + pub max_shift: u32, + pub offset_min: u32, + pub offset_max: u32, + pub section_number: u16, + pub type_: u8, +} +#[repr(C)] +#[derive(Debug, Copy, Clone)] pub struct cli_subsig_matches { pub last: u32, pub next: u32, @@ -1063,12 +1073,10 @@ pub struct cli_ac_patt { pub special: u16, pub special_pattern: u16, pub special_table: *mut *mut cli_ac_special, + pub offset_data: *mut pattern_offset_data, pub rtype: u16, pub type_: u16, - pub offdata: [u32; 4usize], - pub offset_min: u32, - pub offset_max: u32, - pub boundary: u32, + pub boundary: u16, pub depth: u8, pub sigopts: u8, } @@ -1113,15 +1121,13 @@ pub struct cli_bm_patt { pub pattern: *mut ::std::os::raw::c_uchar, pub prefix: *mut ::std::os::raw::c_uchar, pub virname: *mut ::std::os::raw::c_char, - pub offdata: [u32; 4usize], - pub offset_min: u32, - pub offset_max: u32, + pub offset_data: *mut pattern_offset_data, pub next: *mut cli_bm_patt, pub length: u16, pub prefix_length: u16, pub cnt: u16, pub pattern0: ::std::os::raw::c_uchar, - pub boundary: u32, + pub boundary: u8, pub filesize: u32, } #[repr(C)] @@ -1168,9 +1174,7 @@ pub struct cli_pcre_meta { pub trigger: *mut ::std::os::raw::c_char, pub lsigid: [u32; 3usize], pub pdata: cli_pcre_data, - pub offdata: [u32; 4usize], - pub offset_min: u32, - pub offset_max: u32, + pub offset_data: *mut pattern_offset_data, pub flags: u32, pub statname: *mut ::std::os::raw::c_char, pub sigtime_id: u32, @@ -1298,7 +1302,7 @@ pub struct cli_cdb { pub fsizec: [size_t; 2usize], pub fsizer: [size_t; 2usize], pub encrypted: ::std::os::raw::c_int, - pub filepos: [::std::os::raw::c_uint; 2usize], + pub filepos: [size_t; 2usize], pub res1: ::std::os::raw::c_int, pub res2: *mut ::std::os::raw::c_void, pub next: *mut cli_cdb,