/* -*- Mode: C; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
/* $Id$ */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <sys/types.h>
#include <sys/socket.h>
#include <sys/time.h>
#include <netinet/in.h>
#include <event.h>
#include <netdb.h>
#include "protocol_binary.h"
#define DATA_BUFFER_SIZE 2048
#define UDP_READ_BUFFER_SIZE 65536
#define UDP_MAX_PAYLOAD_SIZE 1400
#define UDP_HEADER_SIZE 8
#define MAX_SENDBUF_SIZE (256 * 1024 * 1024)
/* I'm told the max legnth of a 64-bit num converted to string is 20 bytes.
* Plus a few for spaces, \r\n, \0 */
#define SUFFIX_SIZE 24
/** Initial size of list of items being returned by "get". */
#define ITEM_LIST_INITIAL 200
/** Initial size of list of CAS suffixes appended to "gets" lines. */
#define SUFFIX_LIST_INITIAL 20
/** Initial size of the sendmsg() scatter/gather array. */
#define IOV_LIST_INITIAL 400
/** Initial number of sendmsg() argument structures to allocate. */
#define MSG_LIST_INITIAL 10
/** High water marks for buffer shrinking */
#define READ_BUFFER_HIGHWAT 8192
#define ITEM_LIST_HIGHWAT 400
#define IOV_LIST_HIGHWAT 600
#define MSG_LIST_HIGHWAT 100
/* Binary protocol stuff */
#define MIN_BIN_PKT_LENGTH 16
#define BIN_PKT_HDR_WORDS (MIN_BIN_PKT_LENGTH/sizeof(uint32_t))
/* Get a consistent bool type */
#if HAVE_STDBOOL_H
# include <stdbool.h>
#else
typedef enum {false = 0, true = 1} bool;
#endif
#if HAVE_STDINT_H
# include <stdint.h>
#else
typedef unsigned char uint8_t;
#endif
/* unistd.h is here */
#if HAVE_UNISTD_H
# include <unistd.h>
#endif
/** Time relative to server start. Smaller than time_t on 64-bit systems. */
typedef unsigned int rel_time_t;
struct stats {
unsigned int curr_items;
unsigned int total_items;
uint64_t curr_bytes;
unsigned int curr_conns;
unsigned int total_conns;
unsigned int conn_structs;
uint64_t get_cmds;
uint64_t set_cmds;
uint64_t get_hits;
uint64_t get_misses;
uint64_t evictions;
time_t started; /* when the process was started */
uint64_t bytes_read;
uint64_t bytes_written;
};
#define MAX_VERBOSITY_LEVEL 2
struct engine_handle;
struct settings {
size_t maxbytes;
int maxconns;
int port;
int udpport;
char *inter;
int verbose;
rel_time_t oldest_live; /* ignore existing items older than this */
bool managed; /* if 1, a tracker manages virtual buckets */
int evict_to_free;
char *socketpath; /* path to unix socket if using local socket */
int access; /* access mask (a la chmod) for unix domain socket */
double factor; /* chunk size growth factor */
int chunk_size;
int num_threads; /* number of libevent threads to run */
char prefix_delimiter; /* character that marks a key prefix (for stats) */
int detail_enabled; /* nonzero if we're collecting detailed stats */
struct engine_handle *engine;
};
extern struct stats stats;
extern struct settings settings;
#define ITEM_LINKED 1
#define ITEM_DELETED 2
/* temp */
#define ITEM_SLABBED 4
typedef struct _stritem {
struct _stritem *next;
struct _stritem *prev;
struct _stritem *h_next; /* hash chain next */
rel_time_t time; /* least recent access */
rel_time_t exptime; /* expire time */
int nbytes; /* size of data */
unsigned short refcount;
uint8_t nsuffix; /* length of flags-and-length string */
uint8_t it_flags; /* ITEM_* above */
uint8_t slabs_clsid;/* which slab class we're in */
uint8_t nkey; /* key length, w/terminating null and padding */
uint64_t cas_id; /* the CAS identifier */
void * end[];
/* then null-terminated key */
/* then " flags length\r\n" (no terminating null) */
/* then data with terminating \r\n (no terminating null; it's binary!) */
} item;
#define ITEM_key(item) ((char*)&((item)->end[0]))
/* warning: don't use these macros with a function, as it evals its arg twice */
#define ITEM_suffix(item) ((char*) &((item)->end[0]) + (item)->nkey + 1)
#define ITEM_data(item) ((char*) &((item)->end[0]) + (item)->nkey + 1 + (item)->nsuffix)
#define ITEM_ntotal(item) (sizeof(struct _stritem) + (item)->nkey + 1 + (item)->nsuffix + (item)->nbytes)
/**
* NOTE: If you modify this table you _MUST_ update the function state_text
*/
enum conn_states {
conn_listening, /** the socket which listens for connections */
conn_new_cmd, /** Prepare connection for next command */
conn_waiting, /** waiting for a readable socket */
conn_read, /** reading in a command line */
conn_parse_cmd, /** try to parse a command from the input buffer */
conn_write, /** writing out a simple response */
conn_nread, /** reading in a fixed number of bytes */
conn_swallow, /** swallowing unnecessary bytes w/o storing */
conn_closing, /** closing this connection */
conn_mwrite, /** writing out many items sequentially */
conn_max_state, /** Max state value (used for assertion) */
};
enum bin_substates {
bin_no_state,
bin_reading_set_header,
bin_reading_cas_header,
bin_read_set_value,
bin_reading_get_key,
bin_reading_del_header,
bin_reading_incr_header,
bin_read_flush_exptime
};
enum protocol {
ascii_prot = 3, /* arbitrary value. */
ascii_udp_prot,
binary_prot,
negotiating_prot, /* Discovering the protocol */
};
#define IS_UDP(x) (x == ascii_udp_prot)
enum operation { NREAD_ADD = 1, NREAD_SET, NREAD_REPLACE, NREAD_APPEND, NREAD_PREPEND, NREAD_CAS };
typedef struct conn conn;
struct conn {
int sfd;
enum conn_states state;
enum bin_substates substate;
struct event event;
short ev_flags;
short which; /** which events were just triggered */
char *rbuf; /** buffer to read commands into */
char *rcurr; /** but if we parsed some already, this is where we stopped */
int rsize; /** total allocated size of rbuf */
int rbytes; /** how much data, starting from rcur, do we have unparsed */
char *wbuf;
char *wcurr;
int wsize;
int wbytes;
int write_and_go; /** which state to go into after finishing current write */
void *write_and_free; /** free this memory after finishing writing */
char *ritem; /** when we read in an item's value, it goes here */
int rlbytes;
/* data for the nread state */
/**
* item is used to hold an item structure created after reading the command
* line of set/add/replace commands, but before we finished reading the actual
* data. The data is read into ITEM_data(item) to avoid extra copying.
*/
void *item; /* for commands set/add/replace */
int item_comm; /* which one is it: set/add/replace */
/* data for the swallow state */
int sbytes; /* how many bytes to swallow */
/* data for the mwrite state */
struct iovec *iov;
int iovsize; /* number of elements allocated in iov[] */
int iovused; /* number of elements used in iov[] */
struct msghdr *msglist;
int msgsize; /* number of elements allocated in msglist[] */
int msgused; /* number of elements used in msglist[] */
int msgcurr; /* element in msglist[] being transmitted now */
int msgbytes; /* number of bytes in current msg */
item **ilist; /* list of items to write out */
int isize;
item **icurr;
int ileft;
char **suffixlist;
int suffixsize;
char **suffixcurr;
int suffixleft;
enum protocol protocol; /* which protocol this connection speaks */
/* data for UDP clients */
int request_id; /* Incoming UDP request ID, if this is a UDP "connection" */
struct sockaddr request_addr; /* Who sent the most recent request */
socklen_t request_addr_size;
unsigned char *hdrbuf; /* udp packet headers */
int hdrsize; /* number of headers' worth of space is allocated */
int binary; /* are we in binary mode */
int bucket; /* bucket number for the next command, if running as
a managed instance. -1 (_not_ 0) means invalid. */
int gen; /* generation requested for the bucket */
bool noreply; /* True if the reply should not be sent. */
/* Binary protocol stuff */
/* This is where the binary header goes */
protocol_binary_request_header binary_header;
uint64_t cas; /* the cas to return */
short cmd;
int opaque;
int keylen;
conn *next; /* Used for generating a list of conn structures */
};
/* number of virtual buckets for a managed instance */
#define MAX_BUCKETS 32768
/* current time of day (updated periodically) */
extern volatile rel_time_t current_time;
/*
* Functions
*/
conn *do_conn_from_freelist();
bool do_conn_add_to_freelist(conn *c);
char *do_suffix_from_freelist();
bool do_suffix_add_to_freelist(char *s);
char *do_add_delta(item *item, const bool incr, const int64_t delta, char *buf);
conn *conn_new(const int sfd, const enum conn_states init_state, const int event_flags, const int read_buffer_size, enum protocol prot, struct event_base *base);
#include "stats.h"
#include "engine.h"
rel_time_t realtime(const time_t exptime);
/*
* Functions such as the libevent-related calls that need to do cross-thread
* communication in multithreaded mode (rather than actually doing the work
* in the current thread) are called via "dispatch_" frontends, which are
* also #define-d to directly call the underlying code in singlethreaded mode.
*/
void thread_init(int nthreads, struct event_base *main_base);
int dispatch_event_add(int thread, conn *c);
void dispatch_conn_new(int sfd, enum conn_states init_state, int event_flags, int read_buffer_size, enum protocol prot);
/* Lock wrappers for cache functions that are called from main loop. */
conn *conn_from_freelist(void);
bool conn_add_to_freelist(conn *c);
char *suffix_from_freelist(void);
bool suffix_add_to_freelist(char *s);
int is_listen_thread(void);
void STATS_LOCK(void);
void STATS_UNLOCK(void);
/* If supported, give compiler hints for branch prediction. */
#if !defined(__GNUC__) || (__GNUC__ == 2 && __GNUC_MINOR__ < 96)
#define __builtin_expect(x, expected_value) (x)
#endif
#define likely(x) __builtin_expect((x),1)
#define unlikely(x) __builtin_expect((x),0)