Skip to content

Commit

Permalink
kallsyms: add reliable symbol size info
Browse files Browse the repository at this point in the history
The existing mechanisms in get_symbol_pos to determine the end of a
symbol is an inaccurate heuristic.  By passing nm -S output into
scripts/kallsyms.c and writing the symbol sizes to a new .kallsyms_sizes
section, we can get accurate sizes and sort the symbols accordingly,
reliably sorting zero-size symbols first (on the grounds that they are
usually e.g. section markers, and other symbols at the same address are
conceptually contained within them and should be sorted after them),
then larger symbols before smaller ones (so that overlapping symbols
print the containing symbol first, before its containees).  We can
also use this to improve aliased symbol detection.

Emit the size info as an extra column in /proc/kallmodsyms (since its
format is not yet set in stone), and export it to iterator consumers.

The notable downside of this is that the new .kallsyms_sizes is pretty
big: a PTR per symbol, so vmlinux.o grows by almost a megabyte, though
it compresses pretty well, so bzImage grows by only a megabyte.

I'm not sure how to reduce this (perhaps using an array with elements
sized to be no larger than needed for the contents, so that almost
always two-byte entries would do? except that in my test kernel two
symbols are bigger than this: sme_workarea, at 400K, and __log_buf, at
100K: the latter seems often likely to be larger than 64K).  A simple
scheme to reduce this would be to split the sizes array into several
arrays with differently-sized elements, and run-length-compress away the
zero bytes -- but that's not implemented yet, and might never be if
people think the whole idea of this is pointless.

In the absence of a way to shrink things, this should probably be hidden
behind a new config symbol if exposed at all, and kallmodsyms just shows
zero sizes if it's configured out (but this is enough of an RFC that
that's not yet done: possibly the benefits of this are too marginal to
be worth it, even if they do let kall(mod)syms consumers distinguish
symbols from padding, which was previously impossible).

Signed-off-by: Nick Alcock <nick.alcock@oracle.com>
Signed-off-by: Eugene Loh <eugene.loh@oracle.com>
  • Loading branch information
nickalcock authored and intel-lab-lkp committed Dec 16, 2021
1 parent 4487902 commit a42fff4
Show file tree
Hide file tree
Showing 5 changed files with 77 additions and 44 deletions.
7 changes: 4 additions & 3 deletions include/linux/module.h
Original file line number Diff line number Diff line change
Expand Up @@ -590,7 +590,8 @@ struct module *find_module(const char *name);
/* Returns 0 and fills in value, defined and namebuf, or -ERANGE if
symnum out of range. */
int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
char *name, char *module_name, int *exported);
char *name, char *module_name, unsigned long *size,
int *exported);

/* Look for this name: can be of form module:name. */
unsigned long module_kallsyms_lookup_name(const char *name);
Expand Down Expand Up @@ -768,8 +769,8 @@ static inline int lookup_module_symbol_attrs(unsigned long addr, unsigned long *
}

static inline int module_get_kallsym(unsigned int symnum, unsigned long *value,
char *type, char *name,
char *module_name, int *exported)
char *type, char *name, char *module_name,
unsigned long *size, int *exported)
{
return -ERANGE;
}
Expand Down
74 changes: 39 additions & 35 deletions kernel/kallsyms.c
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
*/
extern const unsigned long kallsyms_addresses[] __weak;
extern const int kallsyms_offsets[] __weak;
extern const unsigned long kallsyms_sizes[] __weak;
extern const u8 kallsyms_names[] __weak;

/*
Expand Down Expand Up @@ -277,12 +278,24 @@ int kallsyms_on_each_symbol(int (*fn)(void *, const char *, struct module *,
}
#endif /* CONFIG_LIVEPATCH */

/*
* The caller passes in an address, and we return an index to the symbol --
* potentially also size and offset information.
* But an address might map to multiple symbols because:
* - some symbols might have zero size
* - some symbols might be aliases of one another
* - some symbols might span (encompass) others
* The symbols should already be ordered so that, for a particular address,
* we first have the zero-size ones, then the biggest, then the smallest.
* So we find the index by:
* - finding the last symbol with the target address
* - backing the index up so long as both the address and size are unchanged
*/
static unsigned long get_symbol_pos(unsigned long addr,
unsigned long *symbolsize,
unsigned long *offset)
{
unsigned long symbol_start = 0, symbol_end = 0;
unsigned long i, low, high, mid;
unsigned long low, high, mid;

/* This kernel should never had been booted. */
if (!IS_ENABLED(CONFIG_KALLSYMS_BASE_RELATIVE))
Expand All @@ -303,36 +316,17 @@ static unsigned long get_symbol_pos(unsigned long addr,
}

/*
* Search for the first aliased symbol. Aliased
* symbols are symbols with the same address.
* Search for the first aliased symbol.
*/
while (low && kallsyms_sym_address(low-1) == kallsyms_sym_address(low))
while (low
&& kallsyms_sym_address(low-1) == kallsyms_sym_address(low)
&& kallsyms_sizes[low-1] == kallsyms_sizes[low])
--low;

symbol_start = kallsyms_sym_address(low);

/* Search for next non-aliased symbol. */
for (i = low + 1; i < kallsyms_num_syms; i++) {
if (kallsyms_sym_address(i) > symbol_start) {
symbol_end = kallsyms_sym_address(i);
break;
}
}

/* If we found no next symbol, we use the end of the section. */
if (!symbol_end) {
if (is_kernel_inittext(addr))
symbol_end = (unsigned long)_einittext;
else if (IS_ENABLED(CONFIG_KALLSYMS_ALL))
symbol_end = (unsigned long)_end;
else
symbol_end = (unsigned long)_etext;
}

if (symbolsize)
*symbolsize = symbol_end - symbol_start;
*symbolsize = kallsyms_sizes[low];
if (offset)
*offset = addr - symbol_start;
*offset = addr - kallsyms_sym_address(low);

return low;
}
Expand Down Expand Up @@ -653,6 +647,7 @@ struct kallsym_iter {
loff_t pos_bpf_end;
unsigned long value;
unsigned int nameoff; /* If iterating in core kernel symbols. */
unsigned long size;
char type;
char name[KSYM_NAME_LEN];
char module_name[MODULE_NAME_LEN];
Expand Down Expand Up @@ -687,7 +682,7 @@ static int get_ksymbol_mod(struct kallsym_iter *iter)
int ret = module_get_kallsym(iter->pos - iter->pos_arch_end,
&iter->value, &iter->type,
iter->name, iter->module_name,
&iter->exported);
&iter->size, &iter->exported);
iter->builtin_module_names = NULL;

if (ret < 0) {
Expand Down Expand Up @@ -760,6 +755,7 @@ static unsigned long get_ksymbol_core(struct kallsym_iter *iter, int kallmodsyms
iter->exported = 0;
iter->value = kallsyms_sym_address(iter->pos);

iter->size = kallsyms_sizes[iter->pos];
iter->type = kallsyms_get_symbol_type(off);

iter->module_name[0] = '\0';
Expand Down Expand Up @@ -878,12 +874,14 @@ static int s_show_internal(struct seq_file *m, void *p, int kallmodsyms)
{
void *value;
struct kallsym_iter *iter = m->private;
unsigned long size;

/* Some debugging symbols have no name. Ignore them. */
if (!iter->name[0])
return 0;

value = iter->show_value ? (void *)iter->value : NULL;
size = iter->show_value ? iter->size : 0;

/*
* Real module, or built-in module and /proc/kallsyms being shown.
Expand All @@ -903,15 +901,15 @@ static int s_show_internal(struct seq_file *m, void *p, int kallmodsyms)
* /proc/kallmodsyms, built as a module.
*/
if (iter->builtin_module_names == NULL)
seq_printf(m, "%px %c %s\t[%s]\n", value,
type, iter->name,
seq_printf(m, "%px %lx %c %s\t[%s]\n", value,
size, type, iter->name,
iter->module_name);
/*
* /proc/kallmodsyms, single-module symbol.
*/
else if (*iter->builtin_module_names != '\0')
seq_printf(m, "%px %c %s\t[%s]\n", value,
type, iter->name,
seq_printf(m, "%px %lx %c %s\t[%s]\n", value,
size, type, iter->name,
iter->builtin_module_names);
/*
* /proc/kallmodsyms, multimodule symbol. Formatted
Expand All @@ -922,8 +920,8 @@ static int s_show_internal(struct seq_file *m, void *p, int kallmodsyms)
size_t i = *(char *)(iter->builtin_module_names + 1);
const char *walk = iter->builtin_module_names + 2;

seq_printf(m, "%px %c %s\t[%s]", value,
type, iter->name, walk);
seq_printf(m, "%px %lx %c %s\t[%s]", value,
size, type, iter->name, walk);

while (--i > 0) {
walk += strlen(walk) + 1;
Expand All @@ -935,7 +933,13 @@ static int s_show_internal(struct seq_file *m, void *p, int kallmodsyms)
#endif /* CONFIG_KALLMODSYMS */
seq_printf(m, "%px %c %s\t[%s]\n", value,
type, iter->name, iter->module_name);
} else
/*
* Non-modular, /proc/kallmodsyms -> print size.
*/
} else if (kallmodsyms)
seq_printf(m, "%px %lx %c %s\n", value, size,
iter->type, iter->name);
else
seq_printf(m, "%px %c %s\n", value,
iter->type, iter->name);
return 0;
Expand Down
4 changes: 3 additions & 1 deletion kernel/module.c
Original file line number Diff line number Diff line change
Expand Up @@ -4405,7 +4405,8 @@ int lookup_module_symbol_attrs(unsigned long addr, unsigned long *size,
}

int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
char *name, char *module_name, int *exported)
char *name, char *module_name, unsigned long *size,
int *exported)
{
struct module *mod;

Expand All @@ -4424,6 +4425,7 @@ int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
strlcpy(name, kallsyms_symbol_name(kallsyms, symnum), KSYM_NAME_LEN);
strlcpy(module_name, mod->name, MODULE_NAME_LEN);
*exported = is_exported(name, *value, mod);
*size = kallsyms->symtab[symnum].st_size;
preempt_enable();
return 0;
}
Expand Down
29 changes: 25 additions & 4 deletions scripts/kallsyms.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
* This software may be used and distributed according to the terms
* of the GNU General Public License, incorporated herein by reference.
*
* Usage: nm -n vmlinux
* Usage: nm -n -S vmlinux
* | scripts/kallsyms [--all-symbols] [--absolute-percpu]
* [--base-relative] [--builtin=modules_thick.builtin]
* > symbols.S
Expand Down Expand Up @@ -38,6 +38,7 @@

struct sym_entry {
unsigned long long addr;
unsigned long long size;
unsigned int len;
unsigned int start_pos;
unsigned int percpu_absolute;
Expand Down Expand Up @@ -394,6 +395,7 @@ static bool is_ignored_symbol(const char *name, char type)
"kallsyms_addresses",
"kallsyms_offsets",
"kallsyms_relative_base",
"kallsyms_sizes",
"kallsyms_num_syms",
"kallsyms_num_modules",
"kallsyms_names",
Expand Down Expand Up @@ -507,10 +509,11 @@ static struct sym_entry *read_symbol(FILE *in)
unsigned long long addr;
unsigned int len;
struct sym_entry *sym;
int rc;
int rc = 0;
unsigned long long size;

rc = fscanf(in, "%llx %c %499s\n", &addr, &type, name);
if (rc != 3) {
rc = fscanf(in, "%llx %llx %c %499s\n", &addr, &size, &type, name);
if (rc != 4) {
if (rc != EOF && fgets(name, 500, in) == NULL)
fprintf(stderr, "Read error or end of file.\n");
return NULL;
Expand Down Expand Up @@ -548,6 +551,7 @@ static struct sym_entry *read_symbol(FILE *in)
sym->sym[0] = type;
strcpy(sym_name(sym), name);
sym->percpu_absolute = 0;
sym->size = size;

return sym;
}
Expand Down Expand Up @@ -932,6 +936,11 @@ static void write_src(void)
printf("\n");
}

output_label("kallsyms_sizes");
for (i = 0; i < table_cnt; i++)
printf("\tPTR\t%#llx\n", table[i]->size);
printf("\n");

#ifdef CONFIG_KALLMODSYMS
output_kallmodsyms_modules();
output_kallmodsyms_objfiles();
Expand Down Expand Up @@ -1189,6 +1198,18 @@ static int compare_symbols(const void *a, const void *b)
if (sa->addr < sb->addr)
return -1;

/* zero-size markers before nonzero-size symbols */
if (sa->size > 0 && sb->size == 0)
return 1;
if (sa->size == 0 && sb->size > 0)
return -1;

/* sort by size (large size preceding symbols it encompasses) */
if (sa->size < sb->size)
return 1;
if (sa->size > sb->size)
return -1;

/* sort by "weakness" type */
wa = (sa->sym[0] == 'w') || (sa->sym[0] == 'W');
wb = (sb->sym[0] == 'w') || (sb->sym[0] == 'W');
Expand Down
7 changes: 6 additions & 1 deletion scripts/link-vmlinux.sh
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,12 @@ kallsyms()
fi

info KSYMS ${2}
${NM} -n ${1} | scripts/kallsyms ${kallsymopt} > ${2}
# "nm -S" does not print symbol size when size is 0
# Therefore use awk to regularize the data:
# - when there are only three fields, add an explicit "0"
# - when there are already four fields, pass through as is
${NM} -n -S ${1} | ${AWK} 'NF==3 {print $1, 0, $2, $3}; NF==4' | \
scripts/kallsyms ${kallsymopt} > ${2}
}

# Perform one step in kallsyms generation, including temporary linking of
Expand Down

0 comments on commit a42fff4

Please sign in to comment.