-
Notifications
You must be signed in to change notification settings - Fork 587
/
lmdb.h
1653 lines (1564 loc) · 74.1 KB
/
lmdb.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/** @file lmdb.h
* @brief Lightning memory-mapped database library
*
* @mainpage Lightning Memory-Mapped Database Manager (LMDB)
*
* @section intro_sec Introduction
* LMDB is a Btree-based database management library modeled loosely on the
* BerkeleyDB API, but much simplified. The entire database is exposed
* in a memory map, and all data fetches return data directly
* from the mapped memory, so no malloc's or memcpy's occur during
* data fetches. As such, the library is extremely simple because it
* requires no page caching layer of its own, and it is extremely high
* performance and memory-efficient. It is also fully transactional with
* full ACID semantics, and when the memory map is read-only, the
* database integrity cannot be corrupted by stray pointer writes from
* application code.
*
* The library is fully thread-aware and supports concurrent read/write
* access from multiple processes and threads. Data pages use a copy-on-
* write strategy so no active data pages are ever overwritten, which
* also provides resistance to corruption and eliminates the need of any
* special recovery procedures after a system crash. Writes are fully
* serialized; only one write transaction may be active at a time, which
* guarantees that writers can never deadlock. The database structure is
* multi-versioned so readers run with no locks; writers cannot block
* readers, and readers don't block writers.
*
* Unlike other well-known database mechanisms which use either write-ahead
* transaction logs or append-only data writes, LMDB requires no maintenance
* during operation. Both write-ahead loggers and append-only databases
* require periodic checkpointing and/or compaction of their log or database
* files otherwise they grow without bound. LMDB tracks free pages within
* the database and re-uses them for new write operations, so the database
* size does not grow without bound in normal use.
*
* The memory map can be used as a read-only or read-write map. It is
* read-only by default as this provides total immunity to corruption.
* Using read-write mode offers much higher write performance, but adds
* the possibility for stray application writes thru pointers to silently
* corrupt the database. Of course if your application code is known to
* be bug-free (...) then this is not an issue.
*
* If this is your first time using a transactional embedded key/value
* store, you may find the \ref starting page to be helpful.
*
* @section caveats_sec Caveats
* Troubleshooting the lock file, plus semaphores on BSD systems:
*
* - A broken lockfile can cause sync issues.
* Stale reader transactions left behind by an aborted program
* cause further writes to grow the database quickly, and
* stale locks can block further operation.
*
* Fix: Check for stale readers periodically, using the
* #mdb_reader_check function or the \ref mdb_stat_1 "mdb_stat" tool.
* Stale writers will be cleared automatically on most systems:
* - Windows - automatic
* - BSD, systems using SysV semaphores - automatic
* - Linux, systems using POSIX mutexes with Robust option - automatic
* Otherwise just make all programs using the database close it;
* the lockfile is always reset on first open of the environment.
*
* - On BSD systems or others configured with MDB_USE_SYSV_SEM or
* MDB_USE_POSIX_SEM,
* startup can fail due to semaphores owned by another userid.
*
* Fix: Open and close the database as the user which owns the
* semaphores (likely last user) or as root, while no other
* process is using the database.
*
* Restrictions/caveats (in addition to those listed for some functions):
*
* - Only the database owner should normally use the database on
* BSD systems or when otherwise configured with MDB_USE_POSIX_SEM.
* Multiple users can cause startup to fail later, as noted above.
*
* - There is normally no pure read-only mode, since readers need write
* access to locks and lock file. Exceptions: On read-only filesystems
* or with the #MDB_NOLOCK flag described under #mdb_env_open().
*
* - An LMDB configuration will often reserve considerable \b unused
* memory address space and maybe file size for future growth.
* This does not use actual memory or disk space, but users may need
* to understand the difference so they won't be scared off.
*
* - By default, in versions before 0.9.10, unused portions of the data
* file might receive garbage data from memory freed by other code.
* (This does not happen when using the #MDB_WRITEMAP flag.) As of
* 0.9.10 the default behavior is to initialize such memory before
* writing to the data file. Since there may be a slight performance
* cost due to this initialization, applications may disable it using
* the #MDB_NOMEMINIT flag. Applications handling sensitive data
* which must not be written should not use this flag. This flag is
* irrelevant when using #MDB_WRITEMAP.
*
* - A thread can only use one transaction at a time, plus any child
* transactions. Each transaction belongs to one thread. See below.
* The #MDB_NOTLS flag changes this for read-only transactions.
*
* - Use an MDB_env* in the process which opened it, not after fork().
*
* - Do not have open an LMDB database twice in the same process at
* the same time. Not even from a plain open() call - close()ing it
* breaks fcntl() advisory locking. (It is OK to reopen it after
* fork() - exec*(), since the lockfile has FD_CLOEXEC set.)
*
* - Avoid long-lived transactions. Read transactions prevent
* reuse of pages freed by newer write transactions, thus the
* database can grow quickly. Write transactions prevent
* other write transactions, since writes are serialized.
*
* - Avoid suspending a process with active transactions. These
* would then be "long-lived" as above. Also read transactions
* suspended when writers commit could sometimes see wrong data.
*
* ...when several processes can use a database concurrently:
*
* - Avoid aborting a process with an active transaction.
* The transaction becomes "long-lived" as above until a check
* for stale readers is performed or the lockfile is reset,
* since the process may not remove it from the lockfile.
*
* This does not apply to write transactions if the system clears
* stale writers, see above.
*
* - If you do that anyway, do a periodic check for stale readers. Or
* close the environment once in a while, so the lockfile can get reset.
*
* - Do not use LMDB databases on remote filesystems, even between
* processes on the same host. This breaks flock() on some OSes,
* possibly memory map sync, and certainly sync between programs
* on different hosts.
*
* - Opening a database can fail if another process is opening or
* closing it at exactly the same time.
*
* @author Howard Chu, Symas Corporation.
*
* @copyright Copyright 2011-2021 Howard Chu, Symas Corp. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted only as authorized by the OpenLDAP
* Public License.
*
* A copy of this license is available in the file LICENSE in the
* top-level directory of the distribution or, alternatively, at
* <http://www.OpenLDAP.org/license.html>.
*
* @par Derived From:
* This code is derived from btree.c written by Martin Hedenfalk.
*
* Copyright (c) 2009, 2010 Martin Hedenfalk <martin@bzero.se>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#ifndef _LMDB_H_
#define _LMDB_H_
#include <sys/types.h>
#include <inttypes.h>
#include <limits.h>
#ifdef __cplusplus
extern "C" {
#endif
/** Unix permissions for creating files, or dummy definition for Windows */
#ifdef _MSC_VER
typedef int mdb_mode_t;
#else
typedef mode_t mdb_mode_t;
#endif
#ifdef _WIN32
# define MDB_FMT_Z "I"
#else
# define MDB_FMT_Z "z" /**< printf/scanf format modifier for size_t */
#endif
#ifndef MDB_VL32
/** Unsigned type used for mapsize, entry counts and page/transaction IDs.
*
* It is normally size_t, hence the name. Defining MDB_VL32 makes it
* uint64_t, but do not try this unless you know what you are doing.
*/
typedef size_t mdb_size_t;
# define MDB_SIZE_MAX SIZE_MAX /**< max #mdb_size_t */
/** #mdb_size_t printf formats, \b t = one of [diouxX] without quotes */
# define MDB_PRIy(t) MDB_FMT_Z #t
/** #mdb_size_t scanf formats, \b t = one of [dioux] without quotes */
# define MDB_SCNy(t) MDB_FMT_Z #t
#else
typedef uint64_t mdb_size_t;
# define MDB_SIZE_MAX UINT64_MAX
# define MDB_PRIy(t) PRI##t##64
# define MDB_SCNy(t) SCN##t##64
# define mdb_env_create mdb_env_create_vl32 /**< Prevent mixing with non-VL32 builds */
#endif
/** An abstraction for a file handle.
* On POSIX systems file handles are small integers. On Windows
* they're opaque pointers.
*/
#ifdef _WIN32
typedef void *mdb_filehandle_t;
#else
typedef int mdb_filehandle_t;
#endif
/** @defgroup mdb LMDB API
* @{
* @brief OpenLDAP Lightning Memory-Mapped Database Manager
*/
/** @defgroup Version Version Macros
* @{
*/
/** Library major version */
#define MDB_VERSION_MAJOR 0
/** Library minor version */
#define MDB_VERSION_MINOR 9
/** Library patch version */
#define MDB_VERSION_PATCH 70
/** Combine args a,b,c into a single integer for easy version comparisons */
#define MDB_VERINT(a,b,c) (((a) << 24) | ((b) << 16) | (c))
/** The full library version as a single integer */
#define MDB_VERSION_FULL \
MDB_VERINT(MDB_VERSION_MAJOR,MDB_VERSION_MINOR,MDB_VERSION_PATCH)
/** The release date of this library version */
#define MDB_VERSION_DATE "December 19, 2015"
/** A stringifier for the version info */
#define MDB_VERSTR(a,b,c,d) "LMDB " #a "." #b "." #c ": (" d ")"
/** A helper for the stringifier macro */
#define MDB_VERFOO(a,b,c,d) MDB_VERSTR(a,b,c,d)
/** The full library version as a C string */
#define MDB_VERSION_STRING \
MDB_VERFOO(MDB_VERSION_MAJOR,MDB_VERSION_MINOR,MDB_VERSION_PATCH,MDB_VERSION_DATE)
/** @} */
/** @brief Opaque structure for a database environment.
*
* A DB environment supports multiple databases, all residing in the same
* shared-memory map.
*/
typedef struct MDB_env MDB_env;
/** @brief Opaque structure for a transaction handle.
*
* All database operations require a transaction handle. Transactions may be
* read-only or read-write.
*/
typedef struct MDB_txn MDB_txn;
/** @brief A handle for an individual database in the DB environment. */
typedef unsigned int MDB_dbi;
/** @brief Opaque structure for navigating through a database */
typedef struct MDB_cursor MDB_cursor;
/** @brief Generic structure used for passing keys and data in and out
* of the database.
*
* Values returned from the database are valid only until a subsequent
* update operation, or the end of the transaction. Do not modify or
* free them, they commonly point into the database itself.
*
* Key sizes must be between 1 and #mdb_env_get_maxkeysize() inclusive.
* The same applies to data sizes in databases with the #MDB_DUPSORT flag.
* Other data items can in theory be from 0 to 0xffffffff bytes long.
*/
typedef struct MDB_val {
size_t mv_size; /**< size of the data item */
void *mv_data; /**< address of the data item */
} MDB_val;
/** @brief A callback function used to compare two keys in a database */
typedef int (MDB_cmp_func)(const MDB_val *a, const MDB_val *b);
/** @brief A callback function used to relocate a position-dependent data item
* in a fixed-address database.
*
* The \b newptr gives the item's desired address in
* the memory map, and \b oldptr gives its previous address. The item's actual
* data resides at the address in \b item. This callback is expected to walk
* through the fields of the record in \b item and modify any
* values based at the \b oldptr address to be relative to the \b newptr address.
* @param[in,out] item The item that is to be relocated.
* @param[in] oldptr The previous address.
* @param[in] newptr The new address to relocate to.
* @param[in] relctx An application-provided context, set by #mdb_set_relctx().
* @todo This feature is currently unimplemented.
*/
typedef void (MDB_rel_func)(MDB_val *item, void *oldptr, void *newptr, void *relctx);
/** @defgroup mdb_env Environment Flags
* @{
*/
/** mmap at a fixed address (experimental) */
#define MDB_FIXEDMAP 0x01
/** no environment directory */
#define MDB_NOSUBDIR 0x4000
/** don't fsync after commit */
#define MDB_NOSYNC 0x10000
/** read only */
#define MDB_RDONLY 0x20000
/** don't fsync metapage after commit */
#define MDB_NOMETASYNC 0x40000
/** use writable mmap */
#define MDB_WRITEMAP 0x80000
/** use asynchronous msync when #MDB_WRITEMAP is used */
#define MDB_MAPASYNC 0x100000
/** tie reader locktable slots to #MDB_txn objects instead of to threads */
#define MDB_NOTLS 0x200000
/** don't do any locking, caller must manage their own locks */
#define MDB_NOLOCK 0x400000
/** don't do readahead (no effect on Windows) */
#define MDB_NORDAHEAD 0x800000
/** don't initialize malloc'd memory before writing to datafile */
#define MDB_NOMEMINIT 0x1000000
/** use the previous snapshot rather than the latest one */
#define MDB_PREVSNAPSHOT 0x2000000
/** @} */
/** @defgroup mdb_dbi_open Database Flags
* @{
*/
/** use reverse string keys */
#define MDB_REVERSEKEY 0x02
/** use sorted duplicates */
#define MDB_DUPSORT 0x04
/** numeric keys in native byte order, either unsigned int or #mdb_size_t.
* (lmdb expects 32-bit int <= size_t <= 32/64-bit mdb_size_t.)
* The keys must all be of the same size. */
#define MDB_INTEGERKEY 0x08
/** with #MDB_DUPSORT, sorted dup items have fixed size */
#define MDB_DUPFIXED 0x10
/** with #MDB_DUPSORT, dups are #MDB_INTEGERKEY-style integers */
#define MDB_INTEGERDUP 0x20
/** with #MDB_DUPSORT, use reverse string dups */
#define MDB_REVERSEDUP 0x40
/** create DB if not already existing */
#define MDB_CREATE 0x40000
/** @} */
/** @defgroup mdb_put Write Flags
* @{
*/
/** For put: Don't write if the key already exists. */
#define MDB_NOOVERWRITE 0x10
/** Only for #MDB_DUPSORT<br>
* For put: don't write if the key and data pair already exist.<br>
* For mdb_cursor_del: remove all duplicate data items.
*/
#define MDB_NODUPDATA 0x20
/** For mdb_cursor_put: overwrite the current key/data pair */
#define MDB_CURRENT 0x40
/** For put: Just reserve space for data, don't copy it. Return a
* pointer to the reserved space.
*/
#define MDB_RESERVE 0x10000
/** Data is being appended, don't split full pages. */
#define MDB_APPEND 0x20000
/** Duplicate data is being appended, don't split full pages. */
#define MDB_APPENDDUP 0x40000
/** Store multiple data items in one call. Only for #MDB_DUPFIXED. */
#define MDB_MULTIPLE 0x80000
/* @} */
/** @defgroup mdb_copy Copy Flags
* @{
*/
/** Compacting copy: Omit free space from copy, and renumber all
* pages sequentially.
*/
#define MDB_CP_COMPACT 0x01
/* @} */
/** @brief Cursor Get operations.
*
* This is the set of all operations for retrieving data
* using a cursor.
*/
typedef enum MDB_cursor_op {
MDB_FIRST, /**< Position at first key/data item */
MDB_FIRST_DUP, /**< Position at first data item of current key.
Only for #MDB_DUPSORT */
MDB_GET_BOTH, /**< Position at key/data pair. Only for #MDB_DUPSORT */
MDB_GET_BOTH_RANGE, /**< position at key, nearest data. Only for #MDB_DUPSORT */
MDB_GET_CURRENT, /**< Return key/data at current cursor position */
MDB_GET_MULTIPLE, /**< Return up to a page of duplicate data items
from current cursor position. Move cursor to prepare
for #MDB_NEXT_MULTIPLE. Only for #MDB_DUPFIXED */
MDB_LAST, /**< Position at last key/data item */
MDB_LAST_DUP, /**< Position at last data item of current key.
Only for #MDB_DUPSORT */
MDB_NEXT, /**< Position at next data item */
MDB_NEXT_DUP, /**< Position at next data item of current key.
Only for #MDB_DUPSORT */
MDB_NEXT_MULTIPLE, /**< Return up to a page of duplicate data items
from next cursor position. Move cursor to prepare
for #MDB_NEXT_MULTIPLE. Only for #MDB_DUPFIXED */
MDB_NEXT_NODUP, /**< Position at first data item of next key */
MDB_PREV, /**< Position at previous data item */
MDB_PREV_DUP, /**< Position at previous data item of current key.
Only for #MDB_DUPSORT */
MDB_PREV_NODUP, /**< Position at last data item of previous key */
MDB_SET, /**< Position at specified key */
MDB_SET_KEY, /**< Position at specified key, return key + data */
MDB_SET_RANGE, /**< Position at first key greater than or equal to specified key. */
MDB_PREV_MULTIPLE /**< Position at previous page and return up to
a page of duplicate data items. Only for #MDB_DUPFIXED */
} MDB_cursor_op;
/** @defgroup errors Return Codes
*
* BerkeleyDB uses -30800 to -30999, we'll go under them
* @{
*/
/** Successful result */
#define MDB_SUCCESS 0
/** key/data pair already exists */
#define MDB_KEYEXIST (-30799)
/** key/data pair not found (EOF) */
#define MDB_NOTFOUND (-30798)
/** Requested page not found - this usually indicates corruption */
#define MDB_PAGE_NOTFOUND (-30797)
/** Located page was wrong type */
#define MDB_CORRUPTED (-30796)
/** Update of meta page failed or environment had fatal error */
#define MDB_PANIC (-30795)
/** Environment version mismatch */
#define MDB_VERSION_MISMATCH (-30794)
/** File is not a valid LMDB file */
#define MDB_INVALID (-30793)
/** Environment mapsize reached */
#define MDB_MAP_FULL (-30792)
/** Environment maxdbs reached */
#define MDB_DBS_FULL (-30791)
/** Environment maxreaders reached */
#define MDB_READERS_FULL (-30790)
/** Too many TLS keys in use - Windows only */
#define MDB_TLS_FULL (-30789)
/** Txn has too many dirty pages */
#define MDB_TXN_FULL (-30788)
/** Cursor stack too deep - internal error */
#define MDB_CURSOR_FULL (-30787)
/** Page has not enough space - internal error */
#define MDB_PAGE_FULL (-30786)
/** Database contents grew beyond environment mapsize */
#define MDB_MAP_RESIZED (-30785)
/** Operation and DB incompatible, or DB type changed. This can mean:
* <ul>
* <li>The operation expects an #MDB_DUPSORT / #MDB_DUPFIXED database.
* <li>Opening a named DB when the unnamed DB has #MDB_DUPSORT / #MDB_INTEGERKEY.
* <li>Accessing a data record as a database, or vice versa.
* <li>The database was dropped and recreated with different flags.
* </ul>
*/
#define MDB_INCOMPATIBLE (-30784)
/** Invalid reuse of reader locktable slot */
#define MDB_BAD_RSLOT (-30783)
/** Transaction must abort, has a child, or is invalid */
#define MDB_BAD_TXN (-30782)
/** Unsupported size of key/DB name/data, or wrong DUPFIXED size */
#define MDB_BAD_VALSIZE (-30781)
/** The specified DBI was changed unexpectedly */
#define MDB_BAD_DBI (-30780)
/** Unexpected problem - txn should abort */
#define MDB_PROBLEM (-30779)
/** The last defined error code */
#define MDB_LAST_ERRCODE MDB_PROBLEM
/** @} */
/** @brief Statistics for a database in the environment */
typedef struct MDB_stat {
unsigned int ms_psize; /**< Size of a database page.
This is currently the same for all databases. */
unsigned int ms_depth; /**< Depth (height) of the B-tree */
mdb_size_t ms_branch_pages; /**< Number of internal (non-leaf) pages */
mdb_size_t ms_leaf_pages; /**< Number of leaf pages */
mdb_size_t ms_overflow_pages; /**< Number of overflow pages */
mdb_size_t ms_entries; /**< Number of data items */
} MDB_stat;
/** @brief Information about the environment */
typedef struct MDB_envinfo {
void *me_mapaddr; /**< Address of map, if fixed */
mdb_size_t me_mapsize; /**< Size of the data memory map */
mdb_size_t me_last_pgno; /**< ID of the last used page */
mdb_size_t me_last_txnid; /**< ID of the last committed transaction */
unsigned int me_maxreaders; /**< max reader slots in the environment */
unsigned int me_numreaders; /**< max reader slots used in the environment */
} MDB_envinfo;
/** @brief Return the LMDB library version information.
*
* @param[out] major if non-NULL, the library major version number is copied here
* @param[out] minor if non-NULL, the library minor version number is copied here
* @param[out] patch if non-NULL, the library patch version number is copied here
* @retval "version string" The library version as a string
*/
char *mdb_version(int *major, int *minor, int *patch);
/** @brief Return a string describing a given error code.
*
* This function is a superset of the ANSI C X3.159-1989 (ANSI C) strerror(3)
* function. If the error code is greater than or equal to 0, then the string
* returned by the system function strerror(3) is returned. If the error code
* is less than 0, an error string corresponding to the LMDB library error is
* returned. See @ref errors for a list of LMDB-specific error codes.
* @param[in] err The error code
* @retval "error message" The description of the error
*/
char *mdb_strerror(int err);
/** @brief Create an LMDB environment handle.
*
* This function allocates memory for a #MDB_env structure. To release
* the allocated memory and discard the handle, call #mdb_env_close().
* Before the handle may be used, it must be opened using #mdb_env_open().
* Various other options may also need to be set before opening the handle,
* e.g. #mdb_env_set_mapsize(), #mdb_env_set_maxreaders(), #mdb_env_set_maxdbs(),
* depending on usage requirements.
* @param[out] env The address where the new handle will be stored
* @return A non-zero error value on failure and 0 on success.
*/
int mdb_env_create(MDB_env **env);
/** @brief Open an environment handle.
*
* If this function fails, #mdb_env_close() must be called to discard the #MDB_env handle.
* @param[in] env An environment handle returned by #mdb_env_create()
* @param[in] path The directory in which the database files reside. This
* directory must already exist and be writable.
* @param[in] flags Special options for this environment. This parameter
* must be set to 0 or by bitwise OR'ing together one or more of the
* values described here.
* Flags set by mdb_env_set_flags() are also used.
* <ul>
* <li>#MDB_FIXEDMAP
* use a fixed address for the mmap region. This flag must be specified
* when creating the environment, and is stored persistently in the environment.
* If successful, the memory map will always reside at the same virtual address
* and pointers used to reference data items in the database will be constant
* across multiple invocations. This option may not always work, depending on
* how the operating system has allocated memory to shared libraries and other uses.
* The feature is highly experimental.
* <li>#MDB_NOSUBDIR
* By default, LMDB creates its environment in a directory whose
* pathname is given in \b path, and creates its data and lock files
* under that directory. With this option, \b path is used as-is for
* the database main data file. The database lock file is the \b path
* with "-lock" appended.
* <li>#MDB_RDONLY
* Open the environment in read-only mode. No write operations will be
* allowed. LMDB will still modify the lock file - except on read-only
* filesystems, where LMDB does not use locks.
* <li>#MDB_WRITEMAP
* Use a writeable memory map unless MDB_RDONLY is set. This uses
* fewer mallocs but loses protection from application bugs
* like wild pointer writes and other bad updates into the database.
* This may be slightly faster for DBs that fit entirely in RAM, but
* is slower for DBs larger than RAM.
* Incompatible with nested transactions.
* Do not mix processes with and without MDB_WRITEMAP on the same
* environment. This can defeat durability (#mdb_env_sync etc).
* <li>#MDB_NOMETASYNC
* Flush system buffers to disk only once per transaction, omit the
* metadata flush. Defer that until the system flushes files to disk,
* or next non-MDB_RDONLY commit or #mdb_env_sync(). This optimization
* maintains database integrity, but a system crash may undo the last
* committed transaction. I.e. it preserves the ACI (atomicity,
* consistency, isolation) but not D (durability) database property.
* This flag may be changed at any time using #mdb_env_set_flags().
* <li>#MDB_NOSYNC
* Don't flush system buffers to disk when committing a transaction.
* This optimization means a system crash can corrupt the database or
* lose the last transactions if buffers are not yet flushed to disk.
* The risk is governed by how often the system flushes dirty buffers
* to disk and how often #mdb_env_sync() is called. However, if the
* filesystem preserves write order and the #MDB_WRITEMAP flag is not
* used, transactions exhibit ACI (atomicity, consistency, isolation)
* properties and only lose D (durability). I.e. database integrity
* is maintained, but a system crash may undo the final transactions.
* Note that (#MDB_NOSYNC | #MDB_WRITEMAP) leaves the system with no
* hint for when to write transactions to disk, unless #mdb_env_sync()
* is called. (#MDB_MAPASYNC | #MDB_WRITEMAP) may be preferable.
* This flag may be changed at any time using #mdb_env_set_flags().
* <li>#MDB_MAPASYNC
* When using #MDB_WRITEMAP, use asynchronous flushes to disk.
* As with #MDB_NOSYNC, a system crash can then corrupt the
* database or lose the last transactions. Calling #mdb_env_sync()
* ensures on-disk database integrity until next commit.
* This flag may be changed at any time using #mdb_env_set_flags().
* <li>#MDB_NOTLS
* Don't use Thread-Local Storage. Tie reader locktable slots to
* #MDB_txn objects instead of to threads. I.e. #mdb_txn_reset() keeps
* the slot reserved for the #MDB_txn object. A thread may use parallel
* read-only transactions. A read-only transaction may span threads if
* the user synchronizes its use. Applications that multiplex many
* user threads over individual OS threads need this option. Such an
* application must also serialize the write transactions in an OS
* thread, since LMDB's write locking is unaware of the user threads.
* <li>#MDB_NOLOCK
* Don't do any locking. If concurrent access is anticipated, the
* caller must manage all concurrency itself. For proper operation
* the caller must enforce single-writer semantics, and must ensure
* that no readers are using old transactions while a writer is
* active. The simplest approach is to use an exclusive lock so that
* no readers may be active at all when a writer begins.
* <li>#MDB_NORDAHEAD
* Turn off readahead. Most operating systems perform readahead on
* read requests by default. This option turns it off if the OS
* supports it. Turning it off may help random read performance
* when the DB is larger than RAM and system RAM is full.
* The option is not implemented on Windows.
* <li>#MDB_NOMEMINIT
* Don't initialize malloc'd memory before writing to unused spaces
* in the data file. By default, memory for pages written to the data
* file is obtained using malloc. While these pages may be reused in
* subsequent transactions, freshly malloc'd pages will be initialized
* to zeroes before use. This avoids persisting leftover data from other
* code (that used the heap and subsequently freed the memory) into the
* data file. Note that many other system libraries may allocate
* and free memory from the heap for arbitrary uses. E.g., stdio may
* use the heap for file I/O buffers. This initialization step has a
* modest performance cost so some applications may want to disable
* it using this flag. This option can be a problem for applications
* which handle sensitive data like passwords, and it makes memory
* checkers like Valgrind noisy. This flag is not needed with #MDB_WRITEMAP,
* which writes directly to the mmap instead of using malloc for pages. The
* initialization is also skipped if #MDB_RESERVE is used; the
* caller is expected to overwrite all of the memory that was
* reserved in that case.
* This flag may be changed at any time using #mdb_env_set_flags().
* <li>#MDB_PREVSNAPSHOT
* Open the environment with the previous snapshot rather than the latest
* one. This loses the latest transaction, but may help work around some
* types of corruption. If opened with write access, this must be the
* only process using the environment. This flag is automatically reset
* after a write transaction is successfully committed.
* </ul>
* @param[in] mode The UNIX permissions to set on created files and semaphores.
* This parameter is ignored on Windows.
* @return A non-zero error value on failure and 0 on success. Some possible
* errors are:
* <ul>
* <li>#MDB_VERSION_MISMATCH - the version of the LMDB library doesn't match the
* version that created the database environment.
* <li>#MDB_INVALID - the environment file headers are corrupted.
* <li>ENOENT - the directory specified by the path parameter doesn't exist.
* <li>EACCES - the user didn't have permission to access the environment files.
* <li>EAGAIN - the environment was locked by another process.
* </ul>
*/
int mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mdb_mode_t mode);
/** @brief Copy an LMDB environment to the specified path.
*
* This function may be used to make a backup of an existing environment.
* No lockfile is created, since it gets recreated at need.
* @note This call can trigger significant file size growth if run in
* parallel with write transactions, because it employs a read-only
* transaction. See long-lived transactions under @ref caveats_sec.
* @param[in] env An environment handle returned by #mdb_env_create(). It
* must have already been opened successfully.
* @param[in] path The directory in which the copy will reside. This
* directory must already exist and be writable but must otherwise be
* empty.
* @return A non-zero error value on failure and 0 on success.
*/
int mdb_env_copy(MDB_env *env, const char *path);
/** @brief Copy an LMDB environment to the specified file descriptor.
*
* This function may be used to make a backup of an existing environment.
* No lockfile is created, since it gets recreated at need.
* @note This call can trigger significant file size growth if run in
* parallel with write transactions, because it employs a read-only
* transaction. See long-lived transactions under @ref caveats_sec.
* @param[in] env An environment handle returned by #mdb_env_create(). It
* must have already been opened successfully.
* @param[in] fd The filedescriptor to write the copy to. It must
* have already been opened for Write access.
* @return A non-zero error value on failure and 0 on success.
*/
int mdb_env_copyfd(MDB_env *env, mdb_filehandle_t fd);
/** @brief Copy an LMDB environment to the specified path, with options.
*
* This function may be used to make a backup of an existing environment.
* No lockfile is created, since it gets recreated at need.
* @note This call can trigger significant file size growth if run in
* parallel with write transactions, because it employs a read-only
* transaction. See long-lived transactions under @ref caveats_sec.
* @param[in] env An environment handle returned by #mdb_env_create(). It
* must have already been opened successfully.
* @param[in] path The directory in which the copy will reside. This
* directory must already exist and be writable but must otherwise be
* empty.
* @param[in] flags Special options for this operation. This parameter
* must be set to 0 or by bitwise OR'ing together one or more of the
* values described here.
* <ul>
* <li>#MDB_CP_COMPACT - Perform compaction while copying: omit free
* pages and sequentially renumber all pages in output. This option
* consumes more CPU and runs more slowly than the default.
* Currently it fails if the environment has suffered a page leak.
* </ul>
* @return A non-zero error value on failure and 0 on success.
*/
int mdb_env_copy2(MDB_env *env, const char *path, unsigned int flags);
/** @brief Copy an LMDB environment to the specified file descriptor,
* with options.
*
* This function may be used to make a backup of an existing environment.
* No lockfile is created, since it gets recreated at need. See
* #mdb_env_copy2() for further details.
* @note This call can trigger significant file size growth if run in
* parallel with write transactions, because it employs a read-only
* transaction. See long-lived transactions under @ref caveats_sec.
* @param[in] env An environment handle returned by #mdb_env_create(). It
* must have already been opened successfully.
* @param[in] fd The filedescriptor to write the copy to. It must
* have already been opened for Write access.
* @param[in] flags Special options for this operation.
* See #mdb_env_copy2() for options.
* @return A non-zero error value on failure and 0 on success.
*/
int mdb_env_copyfd2(MDB_env *env, mdb_filehandle_t fd, unsigned int flags);
/** @brief Return statistics about the LMDB environment.
*
* @param[in] env An environment handle returned by #mdb_env_create()
* @param[out] stat The address of an #MDB_stat structure
* where the statistics will be copied
*/
int mdb_env_stat(MDB_env *env, MDB_stat *stat);
/** @brief Return information about the LMDB environment.
*
* @param[in] env An environment handle returned by #mdb_env_create()
* @param[out] stat The address of an #MDB_envinfo structure
* where the information will be copied
*/
int mdb_env_info(MDB_env *env, MDB_envinfo *stat);
/** @brief Flush the data buffers to disk.
*
* Data is always written to disk when #mdb_txn_commit() is called,
* but the operating system may keep it buffered. LMDB always flushes
* the OS buffers upon commit as well, unless the environment was
* opened with #MDB_NOSYNC or in part #MDB_NOMETASYNC. This call is
* not valid if the environment was opened with #MDB_RDONLY.
* @param[in] env An environment handle returned by #mdb_env_create()
* @param[in] force If non-zero, force a synchronous flush. Otherwise
* if the environment has the #MDB_NOSYNC flag set the flushes
* will be omitted, and with #MDB_MAPASYNC they will be asynchronous.
* @return A non-zero error value on failure and 0 on success. Some possible
* errors are:
* <ul>
* <li>EACCES - the environment is read-only.
* <li>EINVAL - an invalid parameter was specified.
* <li>EIO - an error occurred during synchronization.
* </ul>
*/
int mdb_env_sync(MDB_env *env, int force);
/** @brief Close the environment and release the memory map.
*
* Only a single thread may call this function. All transactions, databases,
* and cursors must already be closed before calling this function. Attempts to
* use any such handles after calling this function will cause a SIGSEGV.
* The environment handle will be freed and must not be used again after this call.
* @param[in] env An environment handle returned by #mdb_env_create()
*/
void mdb_env_close(MDB_env *env);
/** @brief Set environment flags.
*
* This may be used to set some flags in addition to those from
* #mdb_env_open(), or to unset these flags. If several threads
* change the flags at the same time, the result is undefined.
* @param[in] env An environment handle returned by #mdb_env_create()
* @param[in] flags The flags to change, bitwise OR'ed together
* @param[in] onoff A non-zero value sets the flags, zero clears them.
* @return A non-zero error value on failure and 0 on success. Some possible
* errors are:
* <ul>
* <li>EINVAL - an invalid parameter was specified.
* </ul>
*/
int mdb_env_set_flags(MDB_env *env, unsigned int flags, int onoff);
/** @brief Get environment flags.
*
* @param[in] env An environment handle returned by #mdb_env_create()
* @param[out] flags The address of an integer to store the flags
* @return A non-zero error value on failure and 0 on success. Some possible
* errors are:
* <ul>
* <li>EINVAL - an invalid parameter was specified.
* </ul>
*/
int mdb_env_get_flags(MDB_env *env, unsigned int *flags);
/** @brief Return the path that was used in #mdb_env_open().
*
* @param[in] env An environment handle returned by #mdb_env_create()
* @param[out] path Address of a string pointer to contain the path. This
* is the actual string in the environment, not a copy. It should not be
* altered in any way.
* @return A non-zero error value on failure and 0 on success. Some possible
* errors are:
* <ul>
* <li>EINVAL - an invalid parameter was specified.
* </ul>
*/
int mdb_env_get_path(MDB_env *env, const char **path);
/** @brief Return the filedescriptor for the given environment.
*
* This function may be called after fork(), so the descriptor can be
* closed before exec*(). Other LMDB file descriptors have FD_CLOEXEC.
* (Until LMDB 0.9.18, only the lockfile had that.)
*
* @param[in] env An environment handle returned by #mdb_env_create()
* @param[out] fd Address of a mdb_filehandle_t to contain the descriptor.
* @return A non-zero error value on failure and 0 on success. Some possible
* errors are:
* <ul>
* <li>EINVAL - an invalid parameter was specified.
* </ul>
*/
int mdb_env_get_fd(MDB_env *env, mdb_filehandle_t *fd);
/** @brief Set the size of the memory map to use for this environment.
*
* The size should be a multiple of the OS page size. The default is
* 10485760 bytes. The size of the memory map is also the maximum size
* of the database. The value should be chosen as large as possible,
* to accommodate future growth of the database.
* This function should be called after #mdb_env_create() and before #mdb_env_open().
* It may be called at later times if no transactions are active in
* this process. Note that the library does not check for this condition,
* the caller must ensure it explicitly.
*
* The new size takes effect immediately for the current process but
* will not be persisted to any others until a write transaction has been
* committed by the current process. Also, only mapsize increases are
* persisted into the environment.
*
* If the mapsize is increased by another process, and data has grown
* beyond the range of the current mapsize, #mdb_txn_begin() will
* return #MDB_MAP_RESIZED. This function may be called with a size
* of zero to adopt the new size.
*
* Any attempt to set a size smaller than the space already consumed
* by the environment will be silently changed to the current size of the used space.
* @param[in] env An environment handle returned by #mdb_env_create()
* @param[in] size The size in bytes
* @return A non-zero error value on failure and 0 on success. Some possible
* errors are:
* <ul>
* <li>EINVAL - an invalid parameter was specified, or the environment has
* an active write transaction.
* </ul>
*/
int mdb_env_set_mapsize(MDB_env *env, mdb_size_t size);
/** @brief Set the maximum number of threads/reader slots for the environment.
*
* This defines the number of slots in the lock table that is used to track readers in the
* the environment. The default is 126.
* Starting a read-only transaction normally ties a lock table slot to the
* current thread until the environment closes or the thread exits. If
* MDB_NOTLS is in use, #mdb_txn_begin() instead ties the slot to the
* MDB_txn object until it or the #MDB_env object is destroyed.
* This function may only be called after #mdb_env_create() and before #mdb_env_open().
* @param[in] env An environment handle returned by #mdb_env_create()
* @param[in] readers The maximum number of reader lock table slots
* @return A non-zero error value on failure and 0 on success. Some possible
* errors are:
* <ul>
* <li>EINVAL - an invalid parameter was specified, or the environment is already open.
* </ul>
*/
int mdb_env_set_maxreaders(MDB_env *env, unsigned int readers);
/** @brief Get the maximum number of threads/reader slots for the environment.
*
* @param[in] env An environment handle returned by #mdb_env_create()
* @param[out] readers Address of an integer to store the number of readers
* @return A non-zero error value on failure and 0 on success. Some possible
* errors are:
* <ul>
* <li>EINVAL - an invalid parameter was specified.
* </ul>
*/
int mdb_env_get_maxreaders(MDB_env *env, unsigned int *readers);
/** @brief Set the maximum number of named databases for the environment.
*
* This function is only needed if multiple databases will be used in the
* environment. Simpler applications that use the environment as a single
* unnamed database can ignore this option.
* This function may only be called after #mdb_env_create() and before #mdb_env_open().
*
* Currently a moderate number of slots are cheap but a huge number gets
* expensive: 7-120 words per transaction, and every #mdb_dbi_open()
* does a linear search of the opened slots.
* @param[in] env An environment handle returned by #mdb_env_create()
* @param[in] dbs The maximum number of databases
* @return A non-zero error value on failure and 0 on success. Some possible
* errors are:
* <ul>
* <li>EINVAL - an invalid parameter was specified, or the environment is already open.
* </ul>
*/
int mdb_env_set_maxdbs(MDB_env *env, MDB_dbi dbs);
/** @brief Get the maximum size of keys and #MDB_DUPSORT data we can write.
*
* Depends on the compile-time constant #MDB_MAXKEYSIZE. Default 511.
* See @ref MDB_val.
* @param[in] env An environment handle returned by #mdb_env_create()
* @return The maximum size of a key we can write
*/
int mdb_env_get_maxkeysize(MDB_env *env);
/** @brief Set application information associated with the #MDB_env.
*
* @param[in] env An environment handle returned by #mdb_env_create()
* @param[in] ctx An arbitrary pointer for whatever the application needs.
* @return A non-zero error value on failure and 0 on success.
*/
int mdb_env_set_userctx(MDB_env *env, void *ctx);
/** @brief Get the application information associated with the #MDB_env.
*
* @param[in] env An environment handle returned by #mdb_env_create()
* @return The pointer set by #mdb_env_set_userctx().
*/
void *mdb_env_get_userctx(MDB_env *env);
/** @brief A callback function for most LMDB assert() failures,
* called before printing the message and aborting.
*
* @param[in] env An environment handle returned by #mdb_env_create().
* @param[in] msg The assertion message, not including newline.
*/
typedef void MDB_assert_func(MDB_env *env, const char *msg);
/** Set or reset the assert() callback of the environment.
* Disabled if liblmdb is built with NDEBUG.
* @note This hack should become obsolete as lmdb's error handling matures.
* @param[in] env An environment handle returned by #mdb_env_create().
* @param[in] func An #MDB_assert_func function, or 0.
* @return A non-zero error value on failure and 0 on success.
*/
int mdb_env_set_assert(MDB_env *env, MDB_assert_func *func);
/** @brief Create a transaction for use with the environment.
*
* The transaction handle may be discarded using #mdb_txn_abort() or #mdb_txn_commit().
* @note A transaction and its cursors must only be used by a single
* thread, and a thread may only have a single transaction at a time.
* If #MDB_NOTLS is in use, this does not apply to read-only transactions.
* @note Cursors may not span transactions.
* @param[in] env An environment handle returned by #mdb_env_create()
* @param[in] parent If this parameter is non-NULL, the new transaction
* will be a nested transaction, with the transaction indicated by \b parent
* as its parent. Transactions may be nested to any level. A parent
* transaction and its cursors may not issue any other operations than
* mdb_txn_commit and mdb_txn_abort while it has active child transactions.
* @param[in] flags Special options for this transaction. This parameter
* must be set to 0 or by bitwise OR'ing together one or more of the
* values described here.
* <ul>
* <li>#MDB_RDONLY
* This transaction will not perform any write operations.
* <li>#MDB_NOSYNC
* Don't flush system buffers to disk when committing this transaction.
* <li>#MDB_NOMETASYNC