Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[PATCH v5] linux-gen: ishm: implement huge page cache #685

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
17 changes: 17 additions & 0 deletions config/odp-linux-generic.conf
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,23 @@
odp_implementation = "linux-generic"
config_file_version = "0.0.1"

# Shared memory options
shm: {
# Number of cached default size huge pages. These pages are allocated
# during odp_init_global() and freed back to the kernel in
# odp_term_global(). A value of zero means no pages are cached.
# No negative values should be used here, they are reserved for future
# implementations.
#
# ODP will reserve as many huge pages as possible, which may be less
# than requested here if the system does not have enough huge pages
# available.
#
# When using process mode threads, this value should be set to 0
# because the current implementation won't work properly otherwise.
num_cached_hp = 0
}

# DPDK pktio options
pktio_dpdk: {
# Default options
Expand Down
235 changes: 221 additions & 14 deletions platform/linux-generic/odp_ishm.c
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@
#include <odp_ishm_internal.h>
#include <odp_ishmphy_internal.h>
#include <odp_ishmpool_internal.h>
#include <odp_libconfig_internal.h>
#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
Expand Down Expand Up @@ -164,7 +165,7 @@ typedef struct ishm_fragment {
* will allocate both a block and a fragment.
* Blocks contain only global data common to all processes.
*/
typedef enum {UNKNOWN, HUGE, NORMAL, EXTERNAL} huge_flag_t;
typedef enum {UNKNOWN, HUGE, NORMAL, EXTERNAL, CACHED} huge_flag_t;
typedef struct ishm_block {
char name[ISHM_NAME_MAXLEN]; /* name for the ishm block (if any) */
char filename[ISHM_FILENAME_MAXLEN]; /* name of the .../odp-* file */
Expand Down Expand Up @@ -238,13 +239,176 @@ typedef struct {
} ishm_ftable_t;
static ishm_ftable_t *ishm_ftbl;

struct huge_page_cache {
uint64_t len;
int max_fds; /* maximum amount requested of pre-allocated huge pages */
int total; /* amount of actually pre-allocated huge pages */
int idx; /* retrieve fd[idx] to get a free file descriptor */
int fd[]; /* list of file descriptors */
};

static struct huge_page_cache *hpc;

#ifndef MAP_ANONYMOUS
#define MAP_ANONYMOUS MAP_ANON
#endif

/* prototypes: */
static void procsync(void);

static int hp_create_file(uint64_t len, const char *filename)
{
int fd;
void *addr;

if (len <= 0) {
ODP_ERR("Length is wrong\n");
return -1;
}

fd = open(filename, O_RDWR | O_CREAT | O_TRUNC,
S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
if (fd < 0) {
ODP_ERR("Could not create cache file %s\n", filename);
return -1;
}

/* remove file from file system */
unlink(filename);

if (ftruncate(fd, len) == -1) {
ODP_ERR("Could not truncate file: %s\n", strerror(errno));
close(fd);
return -1;
}

/* commit huge page */
addr = _odp_ishmphy_map(fd, NULL, len, 0);
if (addr == NULL) {
/* no more pages available */
close(fd);
return -1;
}
_odp_ishmphy_unmap(addr, len, 0);

ODP_DBG("Created HP cache file %s, fd: %d\n", filename, fd);

return fd;
}

static void hp_init(void)
Copy link
Collaborator

@MatiasElo MatiasElo Sep 10, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When thinking about the configuration option documentation I noticed it could be useful that hp_init() would cause odp_init_global() to fail if all requested pages are not successfully reserved. This way a user can be sure that a selected number of pages is available at runtime.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The idea was to provide a "best effort" cache, so to speak, but let's discuss during the meeting today how this should behave.

{
char filename[ISHM_FILENAME_MAXLEN];
char dir[ISHM_FILENAME_MAXLEN];
int count;
void *addr;

if (!_odp_libconfig_lookup_ext_int("shm", NULL, "num_cached_hp",
&count)) {
return;
}

if (count <= 0)
return;

ODP_DBG("Init HP cache with up to %d pages\n", count);

if (!odp_global_data.hugepage_info.default_huge_page_dir) {
ODP_ERR("No huge page dir\n");
return;
}

snprintf(dir, ISHM_FILENAME_MAXLEN, "%s/%s",
odp_global_data.hugepage_info.default_huge_page_dir,
odp_global_data.uid);

if (mkdir(dir, 0744) != 0) {
if (errno != EEXIST) {
ODP_ERR("Failed to create dir: %s\n", strerror(errno));
return;
}
}

snprintf(filename, ISHM_FILENAME_MAXLEN,
"%s/odp-%d-ishm_cached",
dir,
odp_global_data.main_pid);

addr = mmap(NULL,
sizeof(struct huge_page_cache) + sizeof(int) * count,
PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
if (addr == MAP_FAILED) {
ODP_ERR("Unable to mmap memory for huge page cache\n.");
return;
}

hpc = addr;

hpc->max_fds = count;
hpc->total = 0;
hpc->idx = -1;
hpc->len = odp_sys_huge_page_size();

for (int i = 0; i < count; ++i) {
int fd;

fd = hp_create_file(hpc->len, filename);
if (fd == -1) {
do {
hpc->fd[i++] = -1;
} while (i < count);
break;
}
hpc->total++;
hpc->fd[i] = fd;
}
hpc->idx = hpc->total - 1;

ODP_DBG("HP cache has %d huge pages of size 0x%08" PRIx64 "\n",
hpc->total, hpc->len);
}

static void hp_term(void)
{
if (NULL == hpc)
return;

for (int i = 0; i < hpc->total; i++) {
if (hpc->fd[i] != -1)
close(hpc->fd[i]);
}

hpc->total = 0;
hpc->idx = -1;
hpc->len = 0;
}

static int hp_get_cached(uint64_t len)
{
int fd;

if (NULL == hpc || hpc->idx < 0 || len != hpc->len)
return -1;

fd = hpc->fd[hpc->idx];
hpc->fd[hpc->idx--] = -1;

return fd;
}

static int hp_put_cached(int fd)
{
if (NULL == hpc || odp_unlikely(++hpc->idx >= hpc->total)) {
hpc->idx--;
ODP_ERR("Trying to put more FD than allowed: %d\n", fd);
return -1;
}

hpc->fd[hpc->idx] = fd;

return 0;
}

/*
* Take a piece of the preallocated virtual space to fit "size" bytes.
* (best fit). Size must be rounded up to an integer number of pages size.
Expand Down Expand Up @@ -798,8 +962,14 @@ static int block_free_internal(int block_index, int close_fd, int deregister)
block_index);

/* close the related fd */
if (close_fd)
close(ishm_proctable->entry[proc_index].fd);
if (close_fd) {
int fd = ishm_proctable->entry[proc_index].fd;

if (block->huge == CACHED)
hp_put_cached(fd);
else
close(fd);
}

/* remove entry from process local table: */
last = ishm_proctable->nb_entries - 1;
Expand Down Expand Up @@ -910,6 +1080,7 @@ int _odp_ishm_reserve(const char *name, uint64_t size, int fd,
new_block->huge = EXTERNAL;
} else {
new_block->external_fd = 0;
new_block->huge = UNKNOWN;
}

/* Otherwise, Try first huge pages when possible and needed: */
Expand All @@ -927,17 +1098,38 @@ int _odp_ishm_reserve(const char *name, uint64_t size, int fd,

/* roundup to page size */
len = (size + (page_hp_size - 1)) & (-page_hp_size);
addr = do_map(new_index, len, hp_align, flags, HUGE, &fd);

if (addr == NULL) {
if (!huge_error_printed) {
ODP_ERR("No huge pages, fall back to normal "
"pages. "
"check: /proc/sys/vm/nr_hugepages.\n");
huge_error_printed = 1;
if (!(flags & _ODP_ISHM_SINGLE_VA)) {
/* try pre-allocated pages */
fd = hp_get_cached(len);
if (fd != -1) {
/* do as if user provided a fd */
new_block->external_fd = 1;
addr = do_map(new_index, len, hp_align, flags,
CACHED, &fd);
if (addr == NULL) {
ODP_ERR("Could not use cached hp %d\n",
fd);
hp_put_cached(fd);
fd = -1;
} else {
new_block->huge = CACHED;
}
}
}
if (fd == -1) {
addr = do_map(new_index, len, hp_align, flags, HUGE,
&fd);

if (addr == NULL) {
if (!huge_error_printed) {
ODP_ERR("No huge pages, fall back to "
"normal pages. Check: "
"/proc/sys/vm/nr_hugepages.\n");
huge_error_printed = 1;
}
} else {
new_block->huge = HUGE;
}
} else {
new_block->huge = HUGE;
}
}

Expand All @@ -961,8 +1153,12 @@ int _odp_ishm_reserve(const char *name, uint64_t size, int fd,

/* if neither huge pages or normal pages works, we cannot proceed: */
if ((fd < 0) || (addr == NULL) || (len == 0)) {
if ((!new_block->external_fd) && (fd >= 0))
if (new_block->external_fd) {
if (new_block->huge == CACHED)
hp_put_cached(fd);
} else if (fd >= 0) {
close(fd);
}
delete_file(new_block);
odp_spinlock_unlock(&ishm_tbl->lock);
ODP_ERR("_ishm_reserve failed.\n");
Expand Down Expand Up @@ -1564,6 +1760,9 @@ int _odp_ishm_init_global(const odp_init_t *init)
/* get ready to create pools: */
_odp_ishm_pool_init();

/* init cache files */
hp_init();

return 0;

init_glob_err4:
Expand Down Expand Up @@ -1705,6 +1904,8 @@ int _odp_ishm_term_global(void)
if (!odp_global_data.shm_dir_from_env)
free(odp_global_data.shm_dir);

hp_term();

return ret;
}

Expand Down Expand Up @@ -1778,6 +1979,9 @@ int _odp_ishm_status(const char *title)
case EXTERNAL:
huge = 'E';
break;
case CACHED:
huge = 'C';
break;
default:
huge = '?';
}
Expand Down Expand Up @@ -1911,6 +2115,9 @@ void _odp_ishm_print(int block_index)
case EXTERNAL:
str = "external";
break;
case CACHED:
str = "cached";
break;
default:
str = "??";
}
Expand Down