Skip to content
This repository has been archived by the owner on Nov 14, 2021. It is now read-only.

Commit

Permalink
mm: Micro-optimize PID map reads for arm64 while retaining output format
Browse files Browse the repository at this point in the history
Android and various applications in Android need to read PID map data in
order to work. Some processes can contain over 10,000 mappings, which
results in lots of time wasted on simply generating strings. This wasted
time adds up, especially in the case of Unity-based games, which utilize
the Boehm garbage collector. A game's main process typically has well
over 10,000 mappings due to the loaded textures, and the Boehm GC reads
PID maps several times a second. This results in over 100,000 map
entries being printed out per second, so micro-optimization here is
important. Before this commit, show_vma_header_prefix() would typically
take around 1000 ns to run on a Snapdragon 855; now it only takes about
50 ns to run, which is a 20x improvement.

The primary micro-optimizations here assume that there are no more than
40 bits in the virtual address space, hence the CONFIG_ARM64_VA_BITS
check. Arm64 uses a virtual address size of 39 bits, so this perfectly
covers it.

This also removes padding used to beautify PID map output to further
speed up reads and reduce the amount of bytes printed, and optimizes the
dentry path retrieval for file-backed mappings. Note, however, that the
trailing space at the end of the line for non-file-backed mappings
cannot be omitted, as it breaks some PID map parsers.

This still retains insignificant leading zeros from printed hex values
to maintain the current output format.

Signed-off-by: Sultan Alsawaf <sultan@kerneltoast.com>
Signed-off-by: Adam W. Willis <return.of.octobot@gmail.com>
  • Loading branch information
kerneltoast authored and 0ctobot committed May 9, 2021
1 parent 0478001 commit 0dd8c14
Show file tree
Hide file tree
Showing 3 changed files with 165 additions and 47 deletions.
20 changes: 10 additions & 10 deletions fs/d_path.c
Expand Up @@ -46,12 +46,7 @@ static int prepend_name(char **buffer, int *buflen, const struct qstr *name)
return -ENAMETOOLONG;
p = *buffer -= dlen + 1;
*p++ = '/';
while (dlen--) {
char c = *dname++;
if (!c)
break;
*p++ = c;
}
memcpy(p, dname, dlen);
return 0;
}

Expand Down Expand Up @@ -254,9 +249,9 @@ static void get_fs_root_rcu(struct fs_struct *fs, struct path *root)
*
* "buflen" should be positive.
*/
char *d_path(const struct path *path, char *buf, int buflen)
char *d_path_outlen(const struct path *path, char *buf, int *buflen)
{
char *res = buf + buflen;
char *res = buf + *buflen;
struct path root;
int error;

Expand All @@ -273,17 +268,22 @@ char *d_path(const struct path *path, char *buf, int buflen)
*/
if (path->dentry->d_op && path->dentry->d_op->d_dname &&
(!IS_ROOT(path->dentry) || path->dentry != path->mnt->mnt_root))
return path->dentry->d_op->d_dname(path->dentry, buf, buflen);
return path->dentry->d_op->d_dname(path->dentry, buf, *buflen);

rcu_read_lock();
get_fs_root_rcu(current->fs, &root);
error = path_with_deleted(path, &root, &res, &buflen);
error = path_with_deleted(path, &root, &res, buflen);
rcu_read_unlock();

if (error < 0)
res = ERR_PTR(error);
return res;
}

char *d_path(const struct path *path, char *buf, int buflen)
{
return d_path_outlen(path, buf, &buflen);
}
EXPORT_SYMBOL(d_path);

/*
Expand Down
191 changes: 154 additions & 37 deletions fs/proc/task_mmu.c
Expand Up @@ -140,7 +140,7 @@ static void seq_print_vma_name(struct seq_file *m, struct vm_area_struct *vma)
page_offset = (unsigned long)name - page_start_vaddr;
num_pages = DIV_ROUND_UP(page_offset + max_len, PAGE_SIZE);

seq_puts(m, "[anon:");
seq_write(m, "[anon:", 6);

for (i = 0; i < num_pages; i++) {
int len;
Expand All @@ -152,7 +152,7 @@ static void seq_print_vma_name(struct seq_file *m, struct vm_area_struct *vma)
pages_pinned = get_user_pages_remote(current, mm,
page_start_vaddr, 1, 0, &page, NULL, NULL);
if (pages_pinned < 1) {
seq_puts(m, "<fault>]");
seq_write(m, "<fault>]\n", 9);
return;
}

Expand All @@ -172,7 +172,7 @@ static void seq_print_vma_name(struct seq_file *m, struct vm_area_struct *vma)
page_start_vaddr += PAGE_SIZE;
}

seq_putc(m, ']');
seq_write(m, "]\n", 2);
}

static void vma_stop(struct proc_maps_private *priv)
Expand Down Expand Up @@ -328,24 +328,117 @@ static int is_stack(struct vm_area_struct *vma)
vma->vm_end >= vma->vm_mm->start_stack;
}

static void show_vma_header_prefix(struct seq_file *m,
unsigned long start, unsigned long end,
vm_flags_t flags, unsigned long long pgoff,
dev_t dev, unsigned long ino)
{
seq_setwidth(m, 25 + sizeof(void *) * 6 - 1);
seq_put_hex_ll(m, NULL, start, 8);
seq_put_hex_ll(m, "-", end, 8);
seq_putc(m, ' ');
seq_putc(m, flags & VM_READ ? 'r' : '-');
seq_putc(m, flags & VM_WRITE ? 'w' : '-');
seq_putc(m, flags & VM_EXEC ? 'x' : '-');
seq_putc(m, flags & VM_MAYSHARE ? 's' : 'p');
seq_put_hex_ll(m, " ", pgoff, 8);
seq_put_hex_ll(m, " ", MAJOR(dev), 2);
seq_put_hex_ll(m, ":", MINOR(dev), 2);
seq_put_decimal_ull(m, " ", ino);
seq_putc(m, ' ');
#define print_vma_hex10(out, val, clz_fn) \
({ \
const typeof(val) __val = val; \
char *const __out = out; \
size_t __len; \
\
if (__val) { \
__len = (sizeof(__val) * 8 - clz_fn(__val) + 3) / 4; \
switch (__len) { \
case 10: \
__out[9] = hex_asc[(__val >> 0) & 0xf]; \
__out[8] = hex_asc[(__val >> 4) & 0xf]; \
__out[7] = hex_asc[(__val >> 8) & 0xf]; \
__out[6] = hex_asc[(__val >> 12) & 0xf]; \
__out[5] = hex_asc[(__val >> 16) & 0xf]; \
__out[4] = hex_asc[(__val >> 20) & 0xf]; \
__out[3] = hex_asc[(__val >> 24) & 0xf]; \
__out[2] = hex_asc[(__val >> 28) & 0xf]; \
__out[1] = hex_asc[(__val >> 32) & 0xf]; \
__out[0] = hex_asc[(__val >> 36) & 0xf]; \
break; \
case 9: \
__out[8] = hex_asc[(__val >> 0) & 0xf]; \
__out[7] = hex_asc[(__val >> 4) & 0xf]; \
__out[6] = hex_asc[(__val >> 8) & 0xf]; \
__out[5] = hex_asc[(__val >> 12) & 0xf]; \
__out[4] = hex_asc[(__val >> 16) & 0xf]; \
__out[3] = hex_asc[(__val >> 20) & 0xf]; \
__out[2] = hex_asc[(__val >> 24) & 0xf]; \
__out[1] = hex_asc[(__val >> 28) & 0xf]; \
__out[0] = hex_asc[(__val >> 32) & 0xf]; \
break; \
default: \
__out[7] = hex_asc[(__val >> 0) & 0xf]; \
__out[6] = hex_asc[(__val >> 4) & 0xf]; \
__out[5] = hex_asc[(__val >> 8) & 0xf]; \
__out[4] = hex_asc[(__val >> 12) & 0xf]; \
__out[3] = hex_asc[(__val >> 16) & 0xf]; \
__out[2] = hex_asc[(__val >> 20) & 0xf]; \
__out[1] = hex_asc[(__val >> 24) & 0xf]; \
__out[0] = hex_asc[(__val >> 28) & 0xf]; \
__len = 8; \
break; \
} \
} else { \
*(u64 *)__out = U64_C(0x3030303030303030); \
__len = 8; \
} \
\
__len; \
})

#define print_vma_hex2(out, val) \
({ \
const typeof(val) __val = val; \
char *const __out = out; \
\
__out[1] = hex_asc[(__val >> 0) & 0xf]; \
__out[0] = hex_asc[(__val >> 4) & 0xf]; \
\
2; \
})

static int show_vma_header_prefix(struct seq_file *m, unsigned long start,
unsigned long end, vm_flags_t flags,
unsigned long long pgoff, dev_t dev,
unsigned long ino)
{
size_t len;
char *out;

/* Set the overflow status to get more memory if there's no space */
if (seq_get_buf(m, &out) < 65) {
seq_commit(m, -1);
return -ENOMEM;
}

/* Supports printing up to 40 bits per virtual address */
BUILD_BUG_ON(CONFIG_ARM64_VA_BITS > 40);

len = print_vma_hex10(out, start, __builtin_clzl);

out[len++] = '-';

len += print_vma_hex10(out + len, end, __builtin_clzl);

out[len++] = ' ';
out[len++] = "-r"[!!(flags & VM_READ)];
out[len++] = "-w"[!!(flags & VM_WRITE)];
out[len++] = "-x"[!!(flags & VM_EXEC)];
out[len++] = "ps"[!!(flags & VM_MAYSHARE)];
out[len++] = ' ';

len += print_vma_hex10(out + len, pgoff, __builtin_clzll);

out[len++] = ' ';

len += print_vma_hex2(out + len, MAJOR(dev));

out[len++] = ':';

len += print_vma_hex2(out + len, MINOR(dev));

out[len++] = ' ';

len += num_to_str(&out[len], 20, ino, 0);

out[len++] = ' ';

m->count += len;
return 0;
}

static void
Expand All @@ -369,16 +462,44 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma)

start = vma->vm_start;
end = vma->vm_end;
show_vma_header_prefix(m, start, end, flags, pgoff, dev, ino);
if (show_vma_header_prefix(m, start, end, flags, pgoff, dev, ino))
return;

/*
* Print the dentry name for named mappings, and a
* special [heap] marker for the heap:
*/
if (file) {
seq_pad(m, ' ');
seq_file_path(m, file, "\n");
goto done;
char *buf;
size_t size = seq_get_buf(m, &buf);

/*
* This won't escape newline characters from the path. If a
* program uses newlines in its paths then it can kick rocks.
*/
if (size > 1) {
const int inlen = size - 1;
int outlen = inlen;
char *p;

p = d_path_outlen(&file->f_path, buf, &outlen);
if (!IS_ERR(p)) {
size_t len;

if (outlen != inlen)
len = inlen - outlen - 1;
else
len = strlen(p);
memmove(buf, p, len);
buf[len] = '\n';
seq_commit(m, len + 1);
return;
}
}

/* Set the overflow status to get more memory */
seq_commit(m, -1);
return;
}

if (vma->vm_ops && vma->vm_ops->name) {
Expand All @@ -390,32 +511,30 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
name = arch_vma_name(vma);
if (!name) {
if (!mm) {
name = "[vdso]";
goto done;
seq_write(m, "[vdso]\n", 7);
return;
}

if (vma->vm_start <= mm->brk &&
vma->vm_end >= mm->start_brk) {
name = "[heap]";
goto done;
seq_write(m, "[heap]\n", 7);
return;
}

if (is_stack(vma)) {
name = "[stack]";
goto done;
seq_write(m, "[stack]\n", 8);
return;
}

if (vma_get_anon_name(vma)) {
seq_pad(m, ' ');
seq_print_vma_name(m, vma);
return;
}
}

done:
if (name) {
seq_pad(m, ' ');
if (name)
seq_puts(m, name);
}
seq_putc(m, '\n');
}

Expand Down Expand Up @@ -854,7 +973,6 @@ static int show_smap(struct seq_file *m, void *v)
if (vma_get_anon_name(vma)) {
seq_puts(m, "Name: ");
seq_print_vma_name(m, vma);
seq_putc(m, '\n');
}

SEQ_PUT_DEC("Size: ", vma->vm_end - vma->vm_start);
Expand Down Expand Up @@ -909,7 +1027,6 @@ static int show_smaps_rollup(struct seq_file *m, void *v)

show_vma_header_prefix(m, priv->mm->mmap->vm_start,
last_vma_end, 0, 0, 0, 0);
seq_pad(m, ' ');
seq_puts(m, "[rollup]\n");

__show_smap(m, &mss);
Expand Down
1 change: 1 addition & 0 deletions include/linux/dcache.h
Expand Up @@ -306,6 +306,7 @@ extern char *simple_dname(struct dentry *, char *, int);
extern char *__d_path(const struct path *, const struct path *, char *, int);
extern char *d_absolute_path(const struct path *, char *, int);
extern char *d_path(const struct path *, char *, int);
extern char *d_path_outlen(const struct path *, char *, int *);
extern char *dentry_path_raw(struct dentry *, char *, int);
extern char *dentry_path(struct dentry *, char *, int);

Expand Down

0 comments on commit 0dd8c14

Please sign in to comment.