Skip to content

Commit

Permalink
Serve stale (#159)
Browse files Browse the repository at this point in the history
- Added serve-stale functionality as described in
  draft-ietf-dnsop-serve-stale-10. `serve-expired-*` options can be used
  to configure the behavior.
- Updated cachedb to honor `serve-expired-ttl`; Fixes #107.
- Renamed statistic `num.zero_ttl` to `num.expired` as expired replies
  come with a configurable TTL value (`serve-expired-reply-ttl`).
- Fixed stats when replying with cached, cname-aliased records.
- Added missing default values for redis cachedb backend.
  • Loading branch information
gthess committed Feb 5, 2020
1 parent 5980d9c commit f7fe95a
Show file tree
Hide file tree
Showing 38 changed files with 4,771 additions and 3,292 deletions.
58 changes: 42 additions & 16 deletions cachedb/cachedb.c
Original file line number Diff line number Diff line change
Expand Up @@ -218,10 +218,6 @@ static int
cachedb_apply_cfg(struct cachedb_env* cachedb_env, struct config_file* cfg)
{
const char* backend_str = cfg->cachedb_backend;

/* If unspecified we use the in-memory test DB. */
if(!backend_str)
backend_str = "testframe";
cachedb_env->backend = cachedb_find_backend(backend_str);
if(!cachedb_env->backend) {
log_err("cachedb: cannot find backend name '%s'", backend_str);
Expand Down Expand Up @@ -259,6 +255,15 @@ cachedb_init(struct module_env* env, int id)
return 0;
}
cachedb_env->enabled = 1;
if(env->cfg->serve_expired_reply_ttl)
log_warn(
"cachedb: serve-expired-reply-ttl is set but not working for data "
"originating from the external cache; 0 TLL is used for those.");
if(env->cfg->serve_expired_client_timeout)
log_warn(
"cachedb: serve-expired-client-timeout is set but not working for "
"data originating from the external cache; expired data are used "
"in the reply without first trying to refresh the data.");
return 1;
}

Expand Down Expand Up @@ -329,8 +334,7 @@ calc_hash(struct module_qstate* qstate, char* buf, size_t len)
size_t clen = 0;
uint8_t hash[CACHEDB_HASHSIZE/8];
const char* hex = "0123456789ABCDEF";
const char* secret = qstate->env->cfg->cachedb_secret ?
qstate->env->cfg->cachedb_secret : "default";
const char* secret = qstate->env->cfg->cachedb_secret;
size_t i;

/* copy the hash info into the clear buffer */
Expand Down Expand Up @@ -433,8 +437,14 @@ good_expiry_and_qinfo(struct module_qstate* qstate, struct sldns_buffer* buf)
&expiry, sizeof(expiry));
expiry = be64toh(expiry);

/* Check if we are allowed to return expired entries:
* - serve_expired needs to be set
* - if SERVE_EXPIRED_TTL is set make sure that the record is not older
* than that. */
if((time_t)expiry < *qstate->env->now &&
!qstate->env->cfg->serve_expired)
(!qstate->env->cfg->serve_expired ||
(SERVE_EXPIRED_TTL &&
*qstate->env->now - (time_t)expiry > SERVE_EXPIRED_TTL)))
return 0;

return 1;
Expand All @@ -445,15 +455,15 @@ good_expiry_and_qinfo(struct module_qstate* qstate, struct sldns_buffer* buf)
static void
packed_rrset_ttl_subtract(struct packed_rrset_data* data, time_t subtract)
{
size_t i;
size_t total = data->count + data->rrsig_count;
size_t i;
size_t total = data->count + data->rrsig_count;
if(subtract >= 0 && data->ttl > subtract)
data->ttl -= subtract;
else data->ttl = 0;
for(i=0; i<total; i++) {
for(i=0; i<total; i++) {
if(subtract >= 0 && data->rr_ttl[i] > subtract)
data->rr_ttl[i] -= subtract;
else data->rr_ttl[i] = 0;
data->rr_ttl[i] -= subtract;
else data->rr_ttl[i] = 0;
}
}

Expand All @@ -465,7 +475,8 @@ adjust_msg_ttl(struct dns_msg* msg, time_t adjust)
size_t i;
if(adjust >= 0 && msg->rep->ttl > adjust)
msg->rep->ttl -= adjust;
else msg->rep->ttl = 0;
else
msg->rep->ttl = 0;
msg->rep->prefetch_ttl = PREFETCH_TTL_CALC(msg->rep->ttl);
msg->rep->serve_expired_ttl = msg->rep->ttl + SERVE_EXPIRED_TTL;

Expand Down Expand Up @@ -673,16 +684,18 @@ cachedb_handle_query(struct module_qstate* qstate,
return;
}

/* lookup inside unbound's internal cache */
/* lookup inside unbound's internal cache.
* This does not look for expired entries. */
if(cachedb_intcache_lookup(qstate)) {
if(verbosity >= VERB_ALGO) {
if(qstate->return_msg->rep)
log_dns_msg("cachedb internal cache lookup",
&qstate->return_msg->qinfo,
qstate->return_msg->rep);
else log_info("cachedb internal cache lookup: rcode %s",
sldns_lookup_by_id(sldns_rcodes, qstate->return_rcode)?
sldns_lookup_by_id(sldns_rcodes, qstate->return_rcode)->name:"??");
sldns_lookup_by_id(sldns_rcodes, qstate->return_rcode)
?sldns_lookup_by_id(sldns_rcodes, qstate->return_rcode)->name
:"??");
}
/* we are done with the query */
qstate->ext_state[id] = module_finished;
Expand All @@ -697,6 +710,19 @@ cachedb_handle_query(struct module_qstate* qstate,
qstate->return_msg->rep);
/* store this result in internal cache */
cachedb_intcache_store(qstate);
/* In case we have expired data but there is a client timer for expired
* answers, pass execution to next module in order to try updating the
* data first.
* TODO: this needs revisit. The expired data stored from cachedb has
* 0 TTL which is picked up by iterator later when looking in the cache.
* Document that ext cachedb does not work properly with
* serve_stale_reply_ttl yet. */
if(qstate->need_refetch && qstate->serve_expired_data &&
qstate->serve_expired_data->timer) {
qstate->return_msg = NULL;
qstate->ext_state[id] = module_wait_module;
return;
}
/* we are done with the query */
qstate->ext_state[id] = module_finished;
return;
Expand Down
4 changes: 2 additions & 2 deletions daemon/remote.c
Original file line number Diff line number Diff line change
Expand Up @@ -720,8 +720,8 @@ print_stats(RES* ssl, const char* nm, struct ub_stats_info* s)
(unsigned long)s->svr.num_queries_missed_cache)) return 0;
if(!ssl_printf(ssl, "%s.num.prefetch"SQ"%lu\n", nm,
(unsigned long)s->svr.num_queries_prefetch)) return 0;
if(!ssl_printf(ssl, "%s.num.zero_ttl"SQ"%lu\n", nm,
(unsigned long)s->svr.zero_ttl_responses)) return 0;
if(!ssl_printf(ssl, "%s.num.expired"SQ"%lu\n", nm,
(unsigned long)s->svr.ans_expired)) return 0;
if(!ssl_printf(ssl, "%s.num.recursivereplies"SQ"%lu\n", nm,
(unsigned long)s->mesh_replies_sent)) return 0;
#ifdef USE_DNSCRYPT
Expand Down
2 changes: 1 addition & 1 deletion daemon/stats.c
Original file line number Diff line number Diff line change
Expand Up @@ -400,6 +400,7 @@ void server_stats_add(struct ub_stats_info* total, struct ub_stats_info* a)
total->svr.num_queries_missed_cache += a->svr.num_queries_missed_cache;
total->svr.num_queries_prefetch += a->svr.num_queries_prefetch;
total->svr.sum_query_list_size += a->svr.sum_query_list_size;
total->svr.ans_expired += a->svr.ans_expired;
#ifdef USE_DNSCRYPT
total->svr.num_query_dnscrypt_crypted += a->svr.num_query_dnscrypt_crypted;
total->svr.num_query_dnscrypt_cert += a->svr.num_query_dnscrypt_cert;
Expand Down Expand Up @@ -432,7 +433,6 @@ void server_stats_add(struct ub_stats_info* total, struct ub_stats_info* a)
total->svr.qEDNS += a->svr.qEDNS;
total->svr.qEDNS_DO += a->svr.qEDNS_DO;
total->svr.ans_rcode_nodata += a->svr.ans_rcode_nodata;
total->svr.zero_ttl_responses += a->svr.zero_ttl_responses;
total->svr.ans_secure += a->svr.ans_secure;
total->svr.ans_bogus += a->svr.ans_bogus;
total->svr.unwanted_replies += a->svr.unwanted_replies;
Expand Down
94 changes: 53 additions & 41 deletions daemon/worker.c
Original file line number Diff line number Diff line change
Expand Up @@ -625,49 +625,48 @@ apply_respip_action(struct worker* worker, const struct query_info* qinfo,
* be completely dropped, '*need_drop' will be set to 1. */
static int
answer_from_cache(struct worker* worker, struct query_info* qinfo,
struct respip_client_info* cinfo, int* need_drop,
struct ub_packed_rrset_key** alias_rrset,
struct respip_client_info* cinfo, int* need_drop, int* is_expired_answer,
int* is_secure_answer, struct ub_packed_rrset_key** alias_rrset,
struct reply_info** partial_repp,
struct reply_info* rep, uint16_t id, uint16_t flags,
struct reply_info* rep, uint16_t id, uint16_t flags,
struct comm_reply* repinfo, struct edns_data* edns)
{
struct edns_data edns_bak;
time_t timenow = *worker->env.now;
uint16_t udpsize = edns->udp_size;
struct reply_info* encode_rep = rep;
struct reply_info* partial_rep = *partial_repp;
int secure;
int must_validate = (!(flags&BIT_CD) || worker->env.cfg->ignore_cd)
&& worker->env.need_to_validate;
*partial_repp = NULL; /* avoid accidental further pass */
if(worker->env.cfg->serve_expired) {
if(worker->env.cfg->serve_expired_ttl &&
rep->serve_expired_ttl < timenow)
return 0;
if(!rrset_array_lock(rep->ref, rep->rrset_count, 0))
return 0;
/* below, rrsets with ttl before timenow become TTL 0 in
* the response */
/* This response was served with zero TTL */
if (timenow >= rep->ttl) {
worker->stats.zero_ttl_responses++;
}
} else {
/* see if it is possible */
if(rep->ttl < timenow) {
*partial_repp = NULL; /* avoid accidental further pass */

/* Check TTL */
if(rep->ttl < timenow) {
/* Check if we need to serve expired now */
if(worker->env.cfg->serve_expired &&
!worker->env.cfg->serve_expired_client_timeout) {
if(worker->env.cfg->serve_expired_ttl &&
rep->serve_expired_ttl < timenow)
return 0;
if(!rrset_array_lock(rep->ref, rep->rrset_count, 0))
return 0;
*is_expired_answer = 1;
} else {
/* the rrsets may have been updated in the meantime.
* we will refetch the message format from the
* authoritative server
* authoritative server
*/
return 0;
}
} else {
if(!rrset_array_lock(rep->ref, rep->rrset_count, timenow))
return 0;
/* locked and ids and ttls are OK. */
}
/* locked and ids and ttls are OK. */

/* check CNAME chain (if any) */
if(rep->an_numrrsets > 0 && (rep->rrsets[0]->rk.type ==
htons(LDNS_RR_TYPE_CNAME) || rep->rrsets[0]->rk.type ==
if(rep->an_numrrsets > 0 && (rep->rrsets[0]->rk.type ==
htons(LDNS_RR_TYPE_CNAME) || rep->rrsets[0]->rk.type ==
htons(LDNS_RR_TYPE_DNAME))) {
if(!reply_check_cname_chain(qinfo, rep)) {
/* cname chain invalid, redo iterator steps */
Expand All @@ -686,31 +685,31 @@ answer_from_cache(struct worker* worker, struct query_info* qinfo,
if(!inplace_cb_reply_servfail_call(&worker->env, qinfo, NULL, rep,
LDNS_RCODE_SERVFAIL, edns, repinfo, worker->scratchpad))
goto bail_out;
error_encode(repinfo->c->buffer, LDNS_RCODE_SERVFAIL,
error_encode(repinfo->c->buffer, LDNS_RCODE_SERVFAIL,
qinfo, id, flags, edns);
rrset_array_unlock_touch(worker->env.rrset_cache,
rrset_array_unlock_touch(worker->env.rrset_cache,
worker->scratchpad, rep->ref, rep->rrset_count);
if(worker->stats.extended) {
worker->stats.ans_bogus ++;
worker->stats.ans_rcode[LDNS_RCODE_SERVFAIL] ++;
}
return 1;
} else if( rep->security == sec_status_unchecked && must_validate) {
} else if(rep->security == sec_status_unchecked && must_validate) {
verbose(VERB_ALGO, "Cache reply: unchecked entry needs "
"validation");
goto bail_out; /* need to validate cache entry first */
} else if(rep->security == sec_status_secure) {
if(reply_all_rrsets_secure(rep))
secure = 1;
else {
if(reply_all_rrsets_secure(rep)) {
*is_secure_answer = 1;
} else {
if(must_validate) {
verbose(VERB_ALGO, "Cache reply: secure entry"
" changed status");
goto bail_out; /* rrset changed, re-verify */
}
secure = 0;
*is_secure_answer = 0;
}
} else secure = 0;
} else *is_secure_answer = 0;

edns_bak = *edns;
edns->edns_version = EDNS_ADVERTISED_VERSION;
Expand All @@ -732,8 +731,10 @@ answer_from_cache(struct worker* worker, struct query_info* qinfo,
worker->env.auth_zones)) {
goto bail_out;
}
if(encode_rep != rep)
secure = 0; /* if rewritten, it can't be considered "secure" */
if(encode_rep != rep) {
/* if rewritten, it can't be considered "secure" */
*is_secure_answer = 0;
}
if(!encode_rep || *alias_rrset) {
if(!encode_rep)
*need_drop = 1;
Expand All @@ -750,7 +751,7 @@ answer_from_cache(struct worker* worker, struct query_info* qinfo,
repinfo->c, worker->scratchpad) ||
!reply_info_answer_encode(qinfo, encode_rep, id, flags,
repinfo->c->buffer, timenow, 1, worker->scratchpad,
udpsize, edns, (int)(edns->bits & EDNS_DO), secure)) {
udpsize, edns, (int)(edns->bits & EDNS_DO), *is_secure_answer)) {
if(!inplace_cb_reply_servfail_call(&worker->env, qinfo, NULL, NULL,
LDNS_RCODE_SERVFAIL, edns, repinfo, worker->scratchpad))
edns->opt_list = NULL;
Expand All @@ -761,10 +762,6 @@ answer_from_cache(struct worker* worker, struct query_info* qinfo,
* is bad while holding locks. */
rrset_array_unlock_touch(worker->env.rrset_cache, worker->scratchpad,
rep->ref, rep->rrset_count);
if(worker->stats.extended) {
if(secure) worker->stats.ans_secure++;
server_stats_insrcode(&worker->stats, repinfo->c->buffer);
}
/* go and return this buffer to the client */
return 1;

Expand Down Expand Up @@ -1099,6 +1096,8 @@ worker_handle_request(struct comm_point* c, void* arg, int error,
struct acl_addr* acladdr;
int rc = 0;
int need_drop = 0;
int is_expired_answer = 0;
int is_secure_answer = 0;
/* We might have to chase a CNAME chain internally, in which case
* we'll have up to two replies and combine them to build a complete
* answer. These variables control this case. */
Expand Down Expand Up @@ -1481,12 +1480,14 @@ worker_handle_request(struct comm_point* c, void* arg, int error,
* each pass. We should still pass the original qinfo to
* answer_from_cache(), however, since it's used to build the reply. */
if(!edns_bypass_cache_stage(edns.opt_list, &worker->env)) {
is_expired_answer = 0;
is_secure_answer = 0;
h = query_info_hash(lookup_qinfo, sldns_buffer_read_u16_at(c->buffer, 2));
if((e=slabhash_lookup(worker->env.msg_cache, h, lookup_qinfo, 0))) {
/* answer from cache - we have acquired a readlock on it */
if(answer_from_cache(worker, &qinfo,
cinfo, &need_drop, &alias_rrset, &partial_rep,
(struct reply_info*)e->data,
cinfo, &need_drop, &is_expired_answer, &is_secure_answer,
&alias_rrset, &partial_rep, (struct reply_info*)e->data,
*(uint16_t*)(void *)sldns_buffer_begin(c->buffer),
sldns_buffer_read_u16_at(c->buffer, 2), repinfo,
&edns)) {
Expand Down Expand Up @@ -1583,6 +1584,13 @@ worker_handle_request(struct comm_point* c, void* arg, int error,
comm_point_drop_reply(repinfo);
return 0;
}
if(is_expired_answer) {
worker->stats.ans_expired++;
}
if(worker->stats.extended) {
if(is_secure_answer) worker->stats.ans_secure++;
server_stats_insrcode(&worker->stats, repinfo->c->buffer);
}
#ifdef USE_DNSTAP
if(worker->dtenv.log_client_response_messages)
dt_msg_send_client_response(&worker->dtenv, &repinfo->addr,
Expand Down Expand Up @@ -1858,6 +1866,10 @@ worker_init(struct worker* worker, struct config_file *cfg,
return 0;
}
worker->env.mesh = mesh_create(&worker->daemon->mods, &worker->env);
/* Pass on daemon variables that we would need in the mesh area */
worker->env.mesh->use_response_ip = worker->daemon->use_response_ip;
worker->env.mesh->use_rpz = worker->daemon->use_rpz;

worker->env.detach_subs = &mesh_detach_subs;
worker->env.attach_sub = &mesh_attach_sub;
worker->env.add_sub = &mesh_add_sub;
Expand Down
12 changes: 11 additions & 1 deletion doc/Changelog
Original file line number Diff line number Diff line change
@@ -1,4 +1,14 @@
-3 February 2020: Ralph
5 February 2020: George
- Added serve-stale functionality as described in
draft-ietf-dnsop-serve-stale-10. `serve-expired-*` options can be used
to configure the behavior.
- Updated cachedb to honor `serve-expired-ttl`; Fixes #107.
- Renamed statistic `num.zero_ttl` to `num.expired` as expired replies
come with a configurable TTL value (`serve-expired-reply-ttl`).
- Fixed stats when replying with cached, cname-aliased records.
- Added missing default values for redis cachedb backend.

3 February 2020: Ralph
- Add assertion to please static analyzer

31 January 2020: Wouter
Expand Down
14 changes: 12 additions & 2 deletions doc/example.conf.in
Original file line number Diff line number Diff line change
Expand Up @@ -558,8 +558,8 @@ server:
# that set CD but cannot validate themselves.
# ignore-cd-flag: no

# Serve expired responses from cache, with TTL 0 in the response,
# and then attempt to fetch the data afresh.
# Serve expired responses from cache, with serve-expired-reply-ttl in
# the response, and then attempt to fetch the data afresh.
# serve-expired: no
#
# Limit serving of expired responses to configured seconds after
Expand All @@ -571,6 +571,16 @@ server:
# that the expired records will be served as long as there are queries
# for it.
# serve-expired-ttl-reset: no
#
# TTL value to use when replying with expired data.
# serve-expired-reply-ttl: 30
#
# Time in milliseconds before replying to the client with expired data.
# This essentially enables the serve-stale behavior as specified in
# draft-ietf-dnsop-serve-stale-10 that first tries to resolve before
# immediately responding with expired data. 0 disables this behavior.
# A recommended value is 1800.
# serve-expired-client-timeout: 0

# Have the validator log failed validations for your diagnosis.
# 0: off. 1: A line per failed user query. 2: With reason and bad IP.
Expand Down
Loading

0 comments on commit f7fe95a

Please sign in to comment.