Skip to content

Commit

Permalink
IB/mlx5: Page faults handling infrastructure
Browse files Browse the repository at this point in the history
* Refactor MR registration and cleanup, and fix reg_pages accounting.
* Create a work queue to handle page fault events in a kthread context.
* Register a fault handler to get events from the core for each QP.

The registered fault handler is empty in this patch, and only a later
patch implements it.

Signed-off-by: Sagi Grimberg <sagig@mellanox.com>
Signed-off-by: Shachar Raindel <raindel@mellanox.com>
Signed-off-by: Haggai Eran <haggaie@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
  • Loading branch information
haggaie authored and rolandd committed Dec 16, 2014
1 parent 832a6b0 commit 6aec21f
Show file tree
Hide file tree
Showing 6 changed files with 294 additions and 22 deletions.
31 changes: 27 additions & 4 deletions drivers/infiniband/hw/mlx5/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -864,7 +864,7 @@ static ssize_t show_reg_pages(struct device *device,
struct mlx5_ib_dev *dev =
container_of(device, struct mlx5_ib_dev, ib_dev.dev);

return sprintf(buf, "%d\n", dev->mdev->priv.reg_pages);
return sprintf(buf, "%d\n", atomic_read(&dev->mdev->priv.reg_pages));
}

static ssize_t show_hca(struct device *device, struct device_attribute *attr,
Expand Down Expand Up @@ -1389,16 +1389,19 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
goto err_eqs;

mutex_init(&dev->cap_mask_mutex);
spin_lock_init(&dev->mr_lock);

err = create_dev_resources(&dev->devr);
if (err)
goto err_eqs;

err = ib_register_device(&dev->ib_dev, NULL);
err = mlx5_ib_odp_init_one(dev);
if (err)
goto err_rsrc;

err = ib_register_device(&dev->ib_dev, NULL);
if (err)
goto err_odp;

err = create_umr_res(dev);
if (err)
goto err_dev;
Expand All @@ -1420,6 +1423,9 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
err_dev:
ib_unregister_device(&dev->ib_dev);

err_odp:
mlx5_ib_odp_remove_one(dev);

err_rsrc:
destroy_dev_resources(&dev->devr);

Expand All @@ -1435,8 +1441,10 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context)
{
struct mlx5_ib_dev *dev = context;

ib_unregister_device(&dev->ib_dev);
destroy_umrc_res(dev);
mlx5_ib_odp_remove_one(dev);
destroy_dev_resources(&dev->devr);
free_comp_eqs(dev);
ib_dealloc_device(&dev->ib_dev);
Expand All @@ -1450,15 +1458,30 @@ static struct mlx5_interface mlx5_ib_interface = {

static int __init mlx5_ib_init(void)
{
int err;

if (deprecated_prof_sel != 2)
pr_warn("prof_sel is deprecated for mlx5_ib, set it for mlx5_core\n");

return mlx5_register_interface(&mlx5_ib_interface);
err = mlx5_ib_odp_init();
if (err)
return err;

err = mlx5_register_interface(&mlx5_ib_interface);
if (err)
goto clean_odp;

return err;

clean_odp:
mlx5_ib_odp_cleanup();
return err;
}

static void __exit mlx5_ib_cleanup(void)
{
mlx5_unregister_interface(&mlx5_ib_interface);
mlx5_ib_odp_cleanup();
}

module_init(mlx5_ib_init);
Expand Down
67 changes: 65 additions & 2 deletions drivers/infiniband/hw/mlx5/mlx5_ib.h
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,29 @@ enum {
MLX5_QP_EMPTY
};

/*
* Connect-IB can trigger up to four concurrent pagefaults
* per-QP.
*/
enum mlx5_ib_pagefault_context {
MLX5_IB_PAGEFAULT_RESPONDER_READ,
MLX5_IB_PAGEFAULT_REQUESTOR_READ,
MLX5_IB_PAGEFAULT_RESPONDER_WRITE,
MLX5_IB_PAGEFAULT_REQUESTOR_WRITE,
MLX5_IB_PAGEFAULT_CONTEXTS
};

static inline enum mlx5_ib_pagefault_context
mlx5_ib_get_pagefault_context(struct mlx5_pagefault *pagefault)
{
return pagefault->flags & (MLX5_PFAULT_REQUESTOR | MLX5_PFAULT_WRITE);
}

struct mlx5_ib_pfault {
struct work_struct work;
struct mlx5_pagefault mpfault;
};

struct mlx5_ib_qp {
struct ib_qp ibqp;
struct mlx5_core_qp mqp;
Expand Down Expand Up @@ -194,6 +217,21 @@ struct mlx5_ib_qp {

/* Store signature errors */
bool signature_en;

#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
/*
* A flag that is true for QP's that are in a state that doesn't
* allow page faults, and shouldn't schedule any more faults.
*/
int disable_page_faults;
/*
* The disable_page_faults_lock protects a QP's disable_page_faults
* field, allowing for a thread to atomically check whether the QP
* allows page faults, and if so schedule a page fault.
*/
spinlock_t disable_page_faults_lock;
struct mlx5_ib_pfault pagefaults[MLX5_IB_PAGEFAULT_CONTEXTS];
#endif
};

struct mlx5_ib_cq_buf {
Expand Down Expand Up @@ -392,13 +430,17 @@ struct mlx5_ib_dev {
struct umr_common umrc;
/* sync used page count stats
*/
spinlock_t mr_lock;
struct mlx5_ib_resources devr;
struct mlx5_mr_cache cache;
struct timer_list delay_timer;
int fill_delay;
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
struct ib_odp_caps odp_caps;
/*
* Sleepable RCU that prevents destruction of MRs while they are still
* being used by a page fault handler.
*/
struct srcu_struct mr_srcu;
#endif
};

Expand Down Expand Up @@ -575,12 +617,33 @@ int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
struct ib_mr_status *mr_status);

#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
extern struct workqueue_struct *mlx5_ib_page_fault_wq;

int mlx5_ib_internal_query_odp_caps(struct mlx5_ib_dev *dev);
#else
void mlx5_ib_mr_pfault_handler(struct mlx5_ib_qp *qp,
struct mlx5_ib_pfault *pfault);
void mlx5_ib_odp_create_qp(struct mlx5_ib_qp *qp);
int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev);
void mlx5_ib_odp_remove_one(struct mlx5_ib_dev *ibdev);
int __init mlx5_ib_odp_init(void);
void mlx5_ib_odp_cleanup(void);
void mlx5_ib_qp_disable_pagefaults(struct mlx5_ib_qp *qp);
void mlx5_ib_qp_enable_pagefaults(struct mlx5_ib_qp *qp);

#else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
static inline int mlx5_ib_internal_query_odp_caps(struct mlx5_ib_dev *dev)
{
return 0;
}

static inline void mlx5_ib_odp_create_qp(struct mlx5_ib_qp *qp) {}
static inline int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev) { return 0; }
static inline void mlx5_ib_odp_remove_one(struct mlx5_ib_dev *ibdev) {}
static inline int mlx5_ib_odp_init(void) { return 0; }
static inline void mlx5_ib_odp_cleanup(void) {}
static inline void mlx5_ib_qp_disable_pagefaults(struct mlx5_ib_qp *qp) {}
static inline void mlx5_ib_qp_enable_pagefaults(struct mlx5_ib_qp *qp) {}

#endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */

static inline void init_query_mad(struct ib_smp *mad)
Expand Down
45 changes: 31 additions & 14 deletions drivers/infiniband/hw/mlx5/mr.c
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ static __be64 mlx5_ib_update_mtt_emergency_buffer[
static DEFINE_MUTEX(mlx5_ib_update_mtt_emergency_buffer_mutex);
#endif

static int clean_mr(struct mlx5_ib_mr *mr);

static int order2idx(struct mlx5_ib_dev *dev, int order)
{
struct mlx5_mr_cache *cache = &dev->cache;
Expand Down Expand Up @@ -1049,6 +1051,10 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
mlx5_ib_dbg(dev, "cache empty for order %d", order);
mr = NULL;
}
} else if (access_flags & IB_ACCESS_ON_DEMAND) {
err = -EINVAL;
pr_err("Got MR registration for ODP MR > 512MB, not supported for Connect-IB");
goto error;
}

if (!mr)
Expand All @@ -1064,9 +1070,7 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,

mr->umem = umem;
mr->npages = npages;
spin_lock(&dev->mr_lock);
dev->mdev->priv.reg_pages += npages;
spin_unlock(&dev->mr_lock);
atomic_add(npages, &dev->mdev->priv.reg_pages);
mr->ibmr.lkey = mr->mmr.key;
mr->ibmr.rkey = mr->mmr.key;

Expand Down Expand Up @@ -1110,12 +1114,9 @@ static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
return err;
}

int mlx5_ib_dereg_mr(struct ib_mr *ibmr)
static int clean_mr(struct mlx5_ib_mr *mr)
{
struct mlx5_ib_dev *dev = to_mdev(ibmr->device);
struct mlx5_ib_mr *mr = to_mmr(ibmr);
struct ib_umem *umem = mr->umem;
int npages = mr->npages;
struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device);
int umred = mr->umred;
int err;

Expand All @@ -1135,16 +1136,32 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr)
free_cached_mr(dev, mr);
}

if (!umred)
kfree(mr);

return 0;
}

int mlx5_ib_dereg_mr(struct ib_mr *ibmr)
{
struct mlx5_ib_dev *dev = to_mdev(ibmr->device);
struct mlx5_ib_mr *mr = to_mmr(ibmr);
int npages = mr->npages;
struct ib_umem *umem = mr->umem;

#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
if (umem)
/* Wait for all running page-fault handlers to finish. */
synchronize_srcu(&dev->mr_srcu);
#endif

clean_mr(mr);

if (umem) {
ib_umem_release(umem);
spin_lock(&dev->mr_lock);
dev->mdev->priv.reg_pages -= npages;
spin_unlock(&dev->mr_lock);
atomic_sub(npages, &dev->mdev->priv.reg_pages);
}

if (!umred)
kfree(mr);

return 0;
}

Expand Down
Loading

0 comments on commit 6aec21f

Please sign in to comment.