Skip to content

Commit

Permalink
genirq/irqdomain: Add reference counting to IRQs
Browse files Browse the repository at this point in the history
PCI devices share 4 legacy INTx interrupts from the same PCI host bridge.
Device drivers map/unmap hardware interrupts via irq_create_mapping()/
irq_dispose_mapping(). The problem with that these interrupts are
shared and when performing hot unplug, we need to unmap the interrupt
only when the last device is released.

This reuses already existing irq_desc::kobj for this purpose.
The refcounter is naturally 1 when the descriptor is allocated already;
this adds kobject_get() in places where already existing mapped virq
is returned.

This reorganizes irq_dispose_mapping() to release the kobj and let
the release callback do the cleanup.

As kobject_put() is called directly now (not via RCU), it can also handle
the early boot case (irq_kobj_base==NULL) with the help of
the kobject::state_in_sysfs flag and without additional irq_sysfs_del().
While at this, clean up the comment at where irq_sysfs_del() was called.

Quick grep shows no sign of irq reference counting in drivers. Drivers
typically request mapping when probing and dispose it when removing;
platforms tend to dispose only if setup failed and the rest seems
calling one dispose per one mapping. Except (at least) PPC/pseries
which needs https://lkml.org/lkml/2020/10/27/259

Cc: Cédric Le Goater <clg@kaod.org>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Qian Cai <cai@lca.pw>
Cc: Rob Herring <robh@kernel.org>
Cc: Frederic Barrat <fbarrat@linux.ibm.com>
Cc: Michal Suchánek <msuchanek@suse.de>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
  • Loading branch information
aik authored and intel-lab-lkp committed Nov 9, 2020
1 parent d315c62 commit 3fe0622
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 47 deletions.
57 changes: 25 additions & 32 deletions kernel/irq/irqdesc.c
Original file line number Diff line number Diff line change
Expand Up @@ -295,18 +295,6 @@ static void irq_sysfs_add(int irq, struct irq_desc *desc)
}
}

static void irq_sysfs_del(struct irq_desc *desc)
{
/*
* If irq_sysfs_init() has not yet been invoked (early boot), then
* irq_kobj_base is NULL and the descriptor was never added.
* kobject_del() complains about a object with no parent, so make
* it conditional.
*/
if (irq_kobj_base)
kobject_del(&desc->kobj);
}

static int __init irq_sysfs_init(void)
{
struct irq_desc *desc;
Expand Down Expand Up @@ -337,7 +325,6 @@ static struct kobj_type irq_kobj_type = {
};

static void irq_sysfs_add(int irq, struct irq_desc *desc) {}
static void irq_sysfs_del(struct irq_desc *desc) {}

#endif /* CONFIG_SYSFS */

Expand Down Expand Up @@ -419,20 +406,40 @@ static struct irq_desc *alloc_desc(int irq, int node, unsigned int flags,
return NULL;
}

static void delayed_free_desc(struct rcu_head *rhp);
static void irq_kobj_release(struct kobject *kobj)
{
struct irq_desc *desc = container_of(kobj, struct irq_desc, kobj);

free_masks(desc);
free_percpu(desc->kstat_irqs);
kfree(desc);
#ifdef CONFIG_IRQ_DOMAIN
struct irq_domain *domain;
unsigned int virq = desc->irq_data.irq;

domain = desc->irq_data.domain;
if (domain) {
if (irq_domain_is_hierarchy(domain)) {
irq_domain_free_irqs(virq, 1);
} else {
irq_domain_disassociate(domain, virq);
irq_free_desc(virq);
}
}
#endif
/*
* We free the descriptor, masks and stat fields via RCU. That
* allows demultiplex interrupts to do rcu based management of
* the child interrupts.
* This also allows us to use rcu in kstat_irqs_usr().
*/
call_rcu(&desc->rcu, delayed_free_desc);
}

static void delayed_free_desc(struct rcu_head *rhp)
{
struct irq_desc *desc = container_of(rhp, struct irq_desc, rcu);

kobject_put(&desc->kobj);
free_masks(desc);
free_percpu(desc->kstat_irqs);
kfree(desc);
}

static void free_desc(unsigned int irq)
Expand All @@ -443,24 +450,10 @@ static void free_desc(unsigned int irq)
unregister_irq_proc(irq, desc);

/*
* sparse_irq_lock protects also show_interrupts() and
* kstat_irq_usr(). Once we deleted the descriptor from the
* sparse tree we can free it. Access in proc will fail to
* lookup the descriptor.
*
* The sysfs entry must be serialized against a concurrent
* irq_sysfs_init() as well.
*/
irq_sysfs_del(desc);
delete_irq_desc(irq);

/*
* We free the descriptor, masks and stat fields via RCU. That
* allows demultiplex interrupts to do rcu based management of
* the child interrupts.
* This also allows us to use rcu in kstat_irqs_usr().
*/
call_rcu(&desc->rcu, delayed_free_desc);
}

static int alloc_descs(unsigned int start, unsigned int cnt, int node,
Expand Down
37 changes: 22 additions & 15 deletions kernel/irq/irqdomain.c
Original file line number Diff line number Diff line change
Expand Up @@ -649,6 +649,7 @@ unsigned int irq_create_mapping(struct irq_domain *domain,
{
struct device_node *of_node;
int virq;
struct irq_desc *desc;

pr_debug("irq_create_mapping(0x%p, 0x%lx)\n", domain, hwirq);

Expand All @@ -666,7 +667,9 @@ unsigned int irq_create_mapping(struct irq_domain *domain,
/* Check if mapping already exists */
virq = irq_find_mapping(domain, hwirq);
if (virq) {
desc = irq_to_desc(virq);
pr_debug("-> existing mapping on virq %d\n", virq);
kobject_get(&desc->kobj);
return virq;
}

Expand Down Expand Up @@ -762,6 +765,7 @@ unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec)
irq_hw_number_t hwirq;
unsigned int type = IRQ_TYPE_NONE;
int virq;
struct irq_desc *desc;

if (fwspec->fwnode) {
domain = irq_find_matching_fwspec(fwspec, DOMAIN_BUS_WIRED);
Expand Down Expand Up @@ -798,8 +802,11 @@ unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec)
* current trigger type then we are done so return the
* interrupt number.
*/
if (type == IRQ_TYPE_NONE || type == irq_get_trigger_type(virq))
if (type == IRQ_TYPE_NONE || type == irq_get_trigger_type(virq)) {
desc = irq_to_desc(virq);
kobject_get(&desc->kobj);
return virq;
}

/*
* If the trigger type has not been set yet, then set
Expand All @@ -811,6 +818,8 @@ unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec)
return 0;

irqd_set_trigger_type(irq_data, type);
desc = irq_to_desc(virq);
kobject_get(&desc->kobj);
return virq;
}

Expand Down Expand Up @@ -863,22 +872,12 @@ EXPORT_SYMBOL_GPL(irq_create_of_mapping);
*/
void irq_dispose_mapping(unsigned int virq)
{
struct irq_data *irq_data = irq_get_irq_data(virq);
struct irq_domain *domain;

if (!virq || !irq_data)
return;
struct irq_desc *desc = irq_to_desc(virq);

domain = irq_data->domain;
if (WARN_ON(domain == NULL))
if (!virq || !desc)
return;

if (irq_domain_is_hierarchy(domain)) {
irq_domain_free_irqs(virq, 1);
} else {
irq_domain_disassociate(domain, virq);
irq_free_desc(virq);
}
kobject_put(&desc->kobj);
}
EXPORT_SYMBOL_GPL(irq_dispose_mapping);

Expand Down Expand Up @@ -1424,6 +1423,7 @@ int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base,
bool realloc, const struct irq_affinity_desc *affinity)
{
int i, ret, virq;
bool get_ref = false;

if (domain == NULL) {
domain = irq_default_domain;
Expand All @@ -1433,6 +1433,7 @@ int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base,

if (realloc && irq_base >= 0) {
virq = irq_base;
get_ref = true;
} else {
virq = irq_domain_alloc_descs(irq_base, nr_irqs, 0, node,
affinity);
Expand Down Expand Up @@ -1464,8 +1465,14 @@ int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base,
}
}

for (i = 0; i < nr_irqs; i++)
for (i = 0; i < nr_irqs; i++) {
irq_domain_insert_irq(virq + i);
if (get_ref) {
struct irq_desc *desc = irq_to_desc(virq + i);

kobject_get(&desc->kobj);
}
}
mutex_unlock(&irq_domain_mutex);

return virq;
Expand Down

0 comments on commit 3fe0622

Please sign in to comment.