Skip to content

Commit

Permalink
Enable passing of command line parameters to the module. This fixes #45
Browse files Browse the repository at this point in the history
… and

will be useful for #43.
- The module will statically assign the specified major number to each msr
  device. Any combination of the following arguments are valid, mdev_msr_safe,
  mdev_msr_batch, and mdev_msr_whitelist.
- If no arguments are specified, then the module will dynamically
  assign a major number to each device. This is the default behavior.

Updating the README to reflect new command line parameters.

Signed-off-by: Stephanie Labasan <labasan1@llnl.gov>
  • Loading branch information
Stephanie Labasan committed Nov 15, 2018
1 parent cc79f29 commit e67638f
Show file tree
Hide file tree
Showing 6 changed files with 108 additions and 62 deletions.
46 changes: 29 additions & 17 deletions README
Expand Up @@ -7,21 +7,34 @@ The msr-safe.ko module is comprised of the following source files:
msr_entry.c Original MSR driver with added calls to batch and
whitelist implementations.
msr_batch.[ch] MSR batching implementation
msr_whitelist.[ch] MSR Whitelist implementation
msr_whitelist.[ch] MSR whitelist implementation
whitelists Sample text whitelist that may be input to msr_safe

Kernel Build & Load
-------------------

Building the msr-safe.ko module can be done with the commands below. A
successful load of the msr-safe kernel module will have `msr_batch` and
`msr_whitelist` in `/dev/cpu`, and will have an `msr_safe` present under each
CPU directory in `/dev/cpu/*`.
Building and loading the msr-safe.ko module can be done with the commands
below. When no command line arguments are specified, the kernel will
dynamically assign major numbers to each device. A successful load of the
msr-safe kernel module will have `msr_batch` and `msr_whitelist` in `/dev/cpu`,
and will have an `msr_safe` present under each CPU directory in `/dev/cpu/*`.

git clone https://github.com/LLNL/msr-safe
cd msr-safe
make
insmod msr-safe.ko
$ git clone https://github.com/LLNL/msr-safe
$ cd msr-safe
$ make
$ insmod msr-safe.ko

Kernel Load with Command Line Arguments
---------------------------------------

Alternatively, this module can be loaded with command line arguments. The
arguments specify the major device number you want to associate with a
particular device. When loading the kernel, you can specify 1 or all 3 of the
msr devices.

$ insmod msr-safe.ko mdev_msr_safe=<#> \
mdev_msr_whitelist=<#> \
mdev_msr_batch=<#>

Configuration Notes After Install
---------------------------------
Expand Down Expand Up @@ -63,14 +76,13 @@ The msrsave utility provides a mechanism for saving and restoring MSR values
based on entries in the whitelist. To restore MSR values, the register must
have an appropriate writemask.

Modification of MSR's that are marked as safe in the whitelist may
impact subsequent users on a shared HPC system. It is important the
resource manager on such a system use the msrsave utility to save and
restore MSR values between allocating compute nodes to users. An
example of this has been implemented for the SLURM resource manager as
a SPANK plugin. This plugin can be built with the "make spank" target
and installed with the "make install-spank" taget. This uses the
SLURM SPANK infrastructure to make a popen(3) call to the msrsave
Modification of MSRs that are marked as safe in the whitelist may impact
subsequent users on a shared HPC system. It is important the resource manager
on such a system use the msrsave utility to save and restore MSR values between
allocating compute nodes to users. An example of this has been implemented for
the SLURM resource manager as a SPANK plugin. This plugin can be built with the
"make spank" target and installed with the "make install-spank" target. This
uses the SLURM SPANK infrastructure to make a popen(3) call to the msrsave
command line utility in the job epilogue and prologue.

Release
Expand Down
25 changes: 14 additions & 11 deletions msr_batch.c
Expand Up @@ -58,7 +58,6 @@
#include "msr_safe.h"
#include "msr_whitelist.h"

static int majordev;
static struct class *cdev_class;
static char cdev_created;
static char cdev_registered;
Expand Down Expand Up @@ -221,7 +220,7 @@ static const struct file_operations fops =
.release = msrbatch_close
};

void msrbatch_cleanup(void)
void msrbatch_cleanup(int majordev)
{
if (cdev_created)
{
Expand Down Expand Up @@ -251,36 +250,40 @@ static char *msrbatch_nodename(struct device *dev, umode_t *mode)
return kasprintf(GFP_KERNEL, "cpu/msr_batch");
}

int msrbatch_init(void)
int msrbatch_init(int *majordev)
{
int err;
int err = 0;
struct device *dev;

majordev = register_chrdev(0, "cpu/msr_batch", &fops);
if (majordev < 0)
err = register_chrdev(*majordev, "cpu/msr_batch", &fops);
if (err < 0)
{
pr_debug("msrbatch_init: unable to register chrdev\n");
msrbatch_cleanup();
pr_debug("%s: unable to register chrdev\n", __FUNCTION__);
msrbatch_cleanup(*majordev);
return -EBUSY;
}
if (err > 0)
{
*majordev = err;
}
cdev_registered = 1;

cdev_class = class_create(THIS_MODULE, "msr_batch");
if (IS_ERR(cdev_class))
{
err = PTR_ERR(cdev_class);
msrbatch_cleanup();
msrbatch_cleanup(*majordev);
return err;
}
cdev_class_created = 1;

cdev_class->devnode = msrbatch_nodename;

dev = device_create(cdev_class, NULL, MKDEV(majordev, 0), NULL, "msr_batch");
dev = device_create(cdev_class, NULL, MKDEV(*majordev, 0), NULL, "msr_batch");
if (IS_ERR(dev))
{
err = PTR_ERR(dev);
msrbatch_cleanup();
msrbatch_cleanup(*majordev);
return err;
}
cdev_created = 1;
Expand Down
4 changes: 2 additions & 2 deletions msr_batch.h
Expand Up @@ -32,8 +32,8 @@
#ifndef MSR_BATCH_HEADER_INCLUDE
#define MSR_BATCH_HEADER_INCLUDE

extern void msrbatch_cleanup(void);
void msrbatch_cleanup(int majordev);

extern int msrbatch_init(void);
int msrbatch_init(int *majordev);

#endif
65 changes: 46 additions & 19 deletions msr_entry.c
Expand Up @@ -50,7 +50,16 @@ static struct class *msr_class;
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0)
static enum cpuhp_state cpuhp_msr_state;
#endif
static int majordev;
static int mdev_msr_safe;
static int mdev_msr_whitelist;
static int mdev_msr_batch;

module_param(mdev_msr_safe, int, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
MODULE_PARM_DESC(mdev_msr_safe, "Major number for msr_safe (int).");
module_param(mdev_msr_whitelist, int, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
MODULE_PARM_DESC(mdev_msr_whitelist, "Major number for msr_whitelist (int).");
module_param(mdev_msr_batch, int, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
MODULE_PARM_DESC(mdev_msr_batch, "Major number for msr_batch (int).");

static loff_t msr_seek(struct file *file, loff_t offset, int orig)
{
Expand Down Expand Up @@ -182,7 +191,7 @@ static long msr_ioctl(struct file *file, unsigned int ioc, unsigned long arg)
u32 __user *uregs = (u32 __user *)arg;
u32 regs[8];
int cpu = iminor(file->f_path.dentry->d_inode);
int err;
int err = 0;

if (!capable(CAP_SYS_RAWIO))
{
Expand Down Expand Up @@ -286,19 +295,19 @@ static int msr_device_create(unsigned int cpu)
{
struct device *dev;

dev = device_create(msr_class, NULL, MKDEV(majordev, cpu), NULL, "msr_safe%d", cpu);
dev = device_create(msr_class, NULL, MKDEV(mdev_msr_safe, cpu), NULL, "msr_safe%d", cpu);
return IS_ERR(dev) ? PTR_ERR(dev) : 0;
}

#if LINUX_VERSION_CODE < KERNEL_VERSION(4,10,0)
static void msr_device_destroy(int cpu)
{
device_destroy(msr_class, MKDEV(majordev, cpu));
device_destroy(msr_class, MKDEV(mdev_msr_safe, cpu));
}
#else
static int msr_device_destroy(unsigned int cpu)
{
device_destroy(msr_class, MKDEV(majordev, cpu));
device_destroy(msr_class, MKDEV(mdev_msr_safe, cpu));
return 0;
}
#endif
Expand Down Expand Up @@ -340,30 +349,48 @@ static char *msr_devnode(struct device *dev, umode_t *mode)

static int __init msr_init(void)
{
int err;
int err = 0;
#if LINUX_VERSION_CODE < KERNEL_VERSION(4,10,0)
int i;
#endif

err = msrbatch_init();
err = msrbatch_init(&mdev_msr_batch);
if (err != 0)
{
pr_debug("failed to initialize msrbatch\n");
pr_debug("failed to initialize msr_batch\n");
goto out;
}
err = msr_whitelist_init();

err = msr_whitelist_init(&mdev_msr_whitelist);
if (err != 0)
{
pr_debug("failed to initialize whitelist for msr\n");
pr_debug("failed to initialize msr_whitelist\n");
goto out_batch;
}
majordev = __register_chrdev(0, 0, num_possible_cpus(), "cpu/msr_safe", &msr_fops);
if (majordev < 0)

/*
* register_chrdev will return:
* If major == 0, dynamically allocate a major and return its number
* If major > 0, attempt to reserve a device with the given major
* number and return zero on success
* Return a negative errno on failure
*/
err = __register_chrdev(mdev_msr_safe, 0, num_possible_cpus(), "cpu/msr_safe", &msr_fops);
if (err < 0)
{
pr_debug("unable to get major %d for msr_safe\n", majordev);
pr_debug("unable to get major %d for msr_safe\n", mdev_msr_safe);
err = -EBUSY;
goto out_wlist;
}
if (err > 0)
{
mdev_msr_safe = err;
}

pr_debug("msr_safe major dev: %i\n", mdev_msr_safe);
pr_debug("msr_batch major dev: %i\n", mdev_msr_batch);
pr_debug("msr_whitelist major dev: %i\n", mdev_msr_whitelist);

msr_class = class_create(THIS_MODULE, "msr_safe");
if (IS_ERR(msr_class))
{
Expand Down Expand Up @@ -399,11 +426,11 @@ static int __init msr_init(void)
#endif
class_destroy(msr_class);
out_chrdev:
__unregister_chrdev(majordev, 0, num_possible_cpus(), "cpu/msr_safe");
__unregister_chrdev(mdev_msr_safe, 0, num_possible_cpus(), "cpu/msr_safe");
out_wlist:
msr_whitelist_cleanup();
msr_whitelist_cleanup(mdev_msr_whitelist);
out_batch:
msrbatch_cleanup();
msrbatch_cleanup(mdev_msr_batch);
out:
return err;
}
Expand All @@ -420,14 +447,14 @@ static void __exit msr_exit(void)
cpuhp_remove_state(cpuhp_msr_state);
#endif
class_destroy(msr_class);
__unregister_chrdev(majordev, 0, num_possible_cpus(), "cpu/msr_safe");
__unregister_chrdev(mdev_msr_safe, 0, num_possible_cpus(), "cpu/msr_safe");

#if LINUX_VERSION_CODE < KERNEL_VERSION(4,10,0)
unregister_hotcpu_notifier(&msr_class_cpu_notifier);
#endif

msr_whitelist_cleanup();
msrbatch_cleanup();
msr_whitelist_cleanup(mdev_msr_whitelist);
msrbatch_cleanup(mdev_msr_batch);
}

module_exit(msr_exit)
Expand Down
26 changes: 15 additions & 11 deletions msr_whitelist.c
Expand Up @@ -65,7 +65,6 @@ static struct whitelist_entry *find_in_whitelist(u64 msr);
static void add_to_whitelist(struct whitelist_entry *entry);
static int parse_next_whitelist_entry(char *inbuf, char **nextinbuf, struct whitelist_entry *entry);
static ssize_t read_whitelist(struct file *file, char __user *buf, size_t count, loff_t *ppos);
static int majordev;
static struct class *cdev_class;
static char cdev_created;
static char cdev_registered;
Expand Down Expand Up @@ -394,7 +393,7 @@ static char *msr_whitelist_nodename(struct device *dev, umode_t *mode)
return kasprintf(GFP_KERNEL, "cpu/msr_whitelist");
}

void msr_whitelist_cleanup(void)
void msr_whitelist_cleanup(int majordev)
{
delete_whitelist();

Expand All @@ -417,36 +416,41 @@ void msr_whitelist_cleanup(void)
}
}

int msr_whitelist_init(void)
int msr_whitelist_init(int *majordev)
{
int err;
int err = 0;
struct device *dev;

majordev = register_chrdev(0, "cpu/msr_whitelist", &fops);
if (majordev < 0)
err = register_chrdev(*majordev, "cpu/msr_whitelist", &fops);
if (err < 0)
{
pr_debug("%s: unable to register chrdev\n", __FUNCTION__);
msr_whitelist_cleanup();
return -EBUSY;
msr_whitelist_cleanup(*majordev);
err = -EBUSY;
return err;
}
if (err > 0)
{
*majordev = err;
}
cdev_registered = 1;

cdev_class = class_create(THIS_MODULE, "msr_whitelist");
if (IS_ERR(cdev_class))
{
err = PTR_ERR(cdev_class);
msr_whitelist_cleanup();
msr_whitelist_cleanup(*majordev);
return err;
}
cdev_class_created = 1;

cdev_class->devnode = msr_whitelist_nodename;

dev = device_create(cdev_class, NULL, MKDEV(majordev, 0), NULL, "msr_whitelist");
dev = device_create(cdev_class, NULL, MKDEV(*majordev, 0), NULL, "msr_whitelist");
if (IS_ERR(dev))
{
err = PTR_ERR(dev);
msr_whitelist_cleanup();
msr_whitelist_cleanup(*majordev);
return err;
}
cdev_created = 1;
Expand Down
4 changes: 2 additions & 2 deletions msr_whitelist.h
Expand Up @@ -43,9 +43,9 @@

#include <linux/types.h>

int msr_whitelist_init(void);
int msr_whitelist_init(int *majordev);

int msr_whitelist_cleanup(void);
int msr_whitelist_cleanup(int majordev);

int msr_whitelist_exists(void);

Expand Down

0 comments on commit e67638f

Please sign in to comment.