|
|
@@ -0,0 +1,440 @@ |
|
|
From 308f548ee8f7171d8a34238f3f435dcf861ee499 Mon Sep 17 00:00:00 2001 |
|
|
From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= |
|
|
<marmarek@invisiblethingslab.com> |
|
|
Date: Mon, 14 Nov 2022 15:50:46 +0100 |
|
|
Subject: [PATCH 2/2] x86/hvm: Allow writes to registers on the same page as |
|
|
MSI-X table |
|
|
MIME-Version: 1.0 |
|
|
Content-Type: text/plain; charset=UTF-8 |
|
|
Content-Transfer-Encoding: 8bit |
|
|
|
|
|
Some devices (notably Intel Wifi 6 AX210 card) keep auxiliary registers |
|
|
on the same page as MSI-X table. Device model (especially one in |
|
|
stubdomain) cannot really handle those, as direct writes to that page is |
|
|
refused (page is on the mmio_ro_ranges list). Instead, extend |
|
|
msixtbl_mmio_ops to handle such accesses too. |
|
|
|
|
|
Doing this, requires correlating write location with guest view |
|
|
of MSI-X table address. Since QEMU doesn't map MSI-X table to the guest, |
|
|
it requires msixtbl_entry->gtable, which is HVM-only. Similar feature |
|
|
for PV would need to be done separately. |
|
|
|
|
|
This will be also used to read Pending Bit Array, if it lives on the same |
|
|
page, making QEMU not needing /dev/mem access at all (especially helpful |
|
|
with lockdown enabled in dom0). If PBA lives on another page, QEMU will |
|
|
map it to the guest directly. |
|
|
If PBA lives on the same page, discard writes and log a message. |
|
|
Technically, writes outside of PBA could be allowed, but at this moment |
|
|
the precise location of PBA isn't saved, and also no known device abuses |
|
|
the spec in this way (at least yet). |
|
|
|
|
|
To access those registers, msixtbl_mmio_ops need the relevant page |
|
|
mapped. MSI handling already has infrastructure for that, using fixmap, |
|
|
so try to map first/last page of the MSI-X table (if necessary) and save |
|
|
their fixmap indexes. Note that msix_get_fixmap() does reference |
|
|
counting and reuses existing mapping, so just call it directly, even if |
|
|
the page was mapped before. Also, it uses a specific range of fixmap |
|
|
indexes which doesn't include 0, so use 0 as default ("not mapped") |
|
|
value - which simplifies code a bit. |
|
|
|
|
|
GCC gets confused about 'desc' variable: |
|
|
|
|
|
arch/x86/hvm/vmsi.c: In function ‘msixtbl_range’: |
|
|
arch/x86/hvm/vmsi.c:553:8: error: ‘desc’ may be used uninitialized [-Werror=maybe-uninitialized] |
|
|
553 | if ( desc ) |
|
|
| ^ |
|
|
arch/x86/hvm/vmsi.c:537:28: note: ‘desc’ was declared here |
|
|
537 | const struct msi_desc *desc; |
|
|
| ^~~~ |
|
|
|
|
|
It's conditional initialization is actually correct (in the case where |
|
|
it isn't initialized, function returns early), but to avoid |
|
|
build failure initialize it explicitly to NULL anyway. |
|
|
|
|
|
Signed-off-by: Marek Marczykowski-Górecki <marmarek@invisiblethingslab.com> |
|
|
--- |
|
|
Changes in v4: |
|
|
- drop same_page parameter of msixtbl_find_entry(), distinguish two |
|
|
cases in relevant callers |
|
|
- rename adj_access_table_idx to adj_access_idx |
|
|
- code style fixes |
|
|
- drop alignment check in adjacent_{read,write}() - all callers already |
|
|
have it earlier |
|
|
- delay mapping first/last MSI-X pages until preparing device for a |
|
|
passthrough |
|
|
v3: |
|
|
- merge handling into msixtbl_mmio_ops |
|
|
- extend commit message |
|
|
v2: |
|
|
- adjust commit message |
|
|
- pass struct domain to msixtbl_page_handler_get_hwaddr() |
|
|
- reduce local variables used only once |
|
|
- log a warning if write is forbidden if MSI-X and PBA lives on the same |
|
|
page |
|
|
- do not passthrough unaligned accesses |
|
|
- handle accesses both before and after MSI-X table |
|
|
--- |
|
|
xen/arch/x86/hvm/vmsi.c | 191 +++++++++++++++++++++++++++++++-- |
|
|
xen/arch/x86/include/asm/msi.h | 5 + |
|
|
xen/arch/x86/msi.c | 40 +++++++ |
|
|
3 files changed, 225 insertions(+), 11 deletions(-) |
|
|
|
|
|
diff --git a/xen/arch/x86/hvm/vmsi.c b/xen/arch/x86/hvm/vmsi.c |
|
|
index ebc052b8ea84..e21c09f0f5cb 100644 |
|
|
--- a/xen/arch/x86/hvm/vmsi.c |
|
|
+++ b/xen/arch/x86/hvm/vmsi.c |
|
|
@@ -180,6 +180,10 @@ static bool msixtbl_initialised(const struct domain *d) |
|
|
return d->arch.hvm.msixtbl_list.next; |
|
|
} |
|
|
|
|
|
+/* |
|
|
+ * Lookup an msixtbl_entry on the same page as given addr. It's up to the |
|
|
+ * caller to check if address is strictly part of the table - if relevant. |
|
|
+ */ |
|
|
static struct msixtbl_entry *msixtbl_find_entry( |
|
|
struct vcpu *v, unsigned long addr) |
|
|
{ |
|
|
@@ -187,8 +191,8 @@ static struct msixtbl_entry *msixtbl_find_entry( |
|
|
struct domain *d = v->domain; |
|
|
|
|
|
list_for_each_entry( entry, &d->arch.hvm.msixtbl_list, list ) |
|
|
- if ( addr >= entry->gtable && |
|
|
- addr < entry->gtable + entry->table_len ) |
|
|
+ if ( PFN_DOWN(addr) >= PFN_DOWN(entry->gtable) && |
|
|
+ PFN_DOWN(addr) <= PFN_DOWN(entry->gtable + entry->table_len - 1) ) |
|
|
return entry; |
|
|
|
|
|
return NULL; |
|
|
@@ -213,6 +217,131 @@ static struct msi_desc *msixtbl_addr_to_desc( |
|
|
return NULL; |
|
|
} |
|
|
|
|
|
+/* |
|
|
+ * Returns: |
|
|
+ * - UINT_MAX if no handling should be done |
|
|
+ * - UINT_MAX-1 if write should be discarded |
|
|
+ * - a fixmap idx to use for handling |
|
|
+ */ |
|
|
+#define ADJACENT_DONT_HANDLE UINT_MAX |
|
|
+#define ADJACENT_DISCARD_WRITE (UINT_MAX - 1) |
|
|
+static unsigned int adjacent_handle( |
|
|
+ const struct msixtbl_entry *entry, unsigned long addr, bool write) |
|
|
+{ |
|
|
+ unsigned int adj_type; |
|
|
+ const struct arch_msix *msix; |
|
|
+ |
|
|
+ if ( !entry || !entry->pdev ) |
|
|
+ return ADJACENT_DONT_HANDLE; |
|
|
+ |
|
|
+ if ( PFN_DOWN(addr) == PFN_DOWN(entry->gtable) && addr < entry->gtable ) |
|
|
+ adj_type = ADJ_IDX_FIRST; |
|
|
+ else if ( PFN_DOWN(addr) == PFN_DOWN(entry->gtable + entry->table_len - 1) && |
|
|
+ addr >= entry->gtable + entry->table_len ) |
|
|
+ adj_type = ADJ_IDX_LAST; |
|
|
+ else |
|
|
+ return ADJACENT_DONT_HANDLE; |
|
|
+ |
|
|
+ msix = entry->pdev->msix; |
|
|
+ ASSERT(msix); |
|
|
+ |
|
|
+ if ( !msix->adj_access_idx[adj_type] ) |
|
|
+ { |
|
|
+ gprintk(XENLOG_WARNING, |
|
|
+ "Page for adjacent(%d) MSI-X table access not initialized for %pp (addr %#lx, gtable %#lx\n", |
|
|
+ adj_type, &entry->pdev->sbdf, addr, entry->gtable); |
|
|
+ |
|
|
+ return ADJACENT_DONT_HANDLE; |
|
|
+ } |
|
|
+ |
|
|
+ /* If PBA lives on the same page too, discard writes. */ |
|
|
+ if ( write && |
|
|
+ ((adj_type == ADJ_IDX_LAST && |
|
|
+ msix->table.last == msix->pba.first) || |
|
|
+ (adj_type == ADJ_IDX_FIRST && |
|
|
+ msix->table.first == msix->pba.last)) ) |
|
|
+ { |
|
|
+ gprintk(XENLOG_WARNING, |
|
|
+ "MSI-X table and PBA of %pp live on the same page, " |
|
|
+ "writing to other registers there is not implemented\n", |
|
|
+ &entry->pdev->sbdf); |
|
|
+ return ADJACENT_DISCARD_WRITE; |
|
|
+ } |
|
|
+ |
|
|
+ return msix->adj_access_idx[adj_type]; |
|
|
+} |
|
|
+ |
|
|
+static int adjacent_read( |
|
|
+ unsigned int fixmap_idx, |
|
|
+ paddr_t address, unsigned int len, uint64_t *pval) |
|
|
+{ |
|
|
+ const void __iomem *hwaddr; |
|
|
+ |
|
|
+ *pval = ~0UL; |
|
|
+ |
|
|
+ ASSERT(fixmap_idx != ADJACENT_DISCARD_WRITE); |
|
|
+ |
|
|
+ hwaddr = fix_to_virt(fixmap_idx) + PAGE_OFFSET(address); |
|
|
+ |
|
|
+ switch ( len ) |
|
|
+ { |
|
|
+ case 1: |
|
|
+ *pval = readb(hwaddr); |
|
|
+ break; |
|
|
+ |
|
|
+ case 2: |
|
|
+ *pval = readw(hwaddr); |
|
|
+ break; |
|
|
+ |
|
|
+ case 4: |
|
|
+ *pval = readl(hwaddr); |
|
|
+ break; |
|
|
+ |
|
|
+ case 8: |
|
|
+ *pval = readq(hwaddr); |
|
|
+ break; |
|
|
+ |
|
|
+ default: |
|
|
+ ASSERT_UNREACHABLE(); |
|
|
+ } |
|
|
+ return X86EMUL_OKAY; |
|
|
+} |
|
|
+ |
|
|
+static int adjacent_write( |
|
|
+ unsigned int fixmap_idx, |
|
|
+ uint64_t address, uint32_t len, uint64_t val) |
|
|
+{ |
|
|
+ void __iomem *hwaddr; |
|
|
+ |
|
|
+ if ( fixmap_idx == ADJACENT_DISCARD_WRITE ) |
|
|
+ return X86EMUL_OKAY; |
|
|
+ |
|
|
+ hwaddr = fix_to_virt(fixmap_idx) + PAGE_OFFSET(address); |
|
|
+ |
|
|
+ switch ( len ) |
|
|
+ { |
|
|
+ case 1: |
|
|
+ writeb(val, hwaddr); |
|
|
+ break; |
|
|
+ |
|
|
+ case 2: |
|
|
+ writew(val, hwaddr); |
|
|
+ break; |
|
|
+ |
|
|
+ case 4: |
|
|
+ writel(val, hwaddr); |
|
|
+ break; |
|
|
+ |
|
|
+ case 8: |
|
|
+ writeq(val, hwaddr); |
|
|
+ break; |
|
|
+ |
|
|
+ default: |
|
|
+ ASSERT_UNREACHABLE(); |
|
|
+ } |
|
|
+ return X86EMUL_OKAY; |
|
|
+} |
|
|
+ |
|
|
static int cf_check msixtbl_read( |
|
|
const struct hvm_io_handler *handler, uint64_t address, uint32_t len, |
|
|
uint64_t *pval) |
|
|
@@ -220,16 +349,31 @@ static int cf_check msixtbl_read( |
|
|
unsigned long offset; |
|
|
struct msixtbl_entry *entry; |
|
|
unsigned int nr_entry, index; |
|
|
+ unsigned int adjacent_fixmap; |
|
|
int r = X86EMUL_UNHANDLEABLE; |
|
|
|
|
|
- if ( (len != 4 && len != 8) || (address & (len - 1)) ) |
|
|
+ if ( !IS_ALIGNED(address, len) ) |
|
|
return r; |
|
|
|
|
|
rcu_read_lock(&msixtbl_rcu_lock); |
|
|
- |
|
|
entry = msixtbl_find_entry(current, address); |
|
|
if ( !entry ) |
|
|
goto out; |
|
|
+ |
|
|
+ adjacent_fixmap = adjacent_handle(entry, address, false); |
|
|
+ if ( adjacent_fixmap != ADJACENT_DONT_HANDLE ) |
|
|
+ { |
|
|
+ r = adjacent_read(adjacent_fixmap, address, len, pval); |
|
|
+ goto out; |
|
|
+ } |
|
|
+ |
|
|
+ if ( address < entry->gtable || |
|
|
+ address >= entry->gtable + entry->table_len ) |
|
|
+ goto out; |
|
|
+ |
|
|
+ if ( len != 4 && len != 8 ) |
|
|
+ goto out; |
|
|
+ |
|
|
offset = address & (PCI_MSIX_ENTRY_SIZE - 1); |
|
|
|
|
|
if ( offset != PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET ) |
|
|
@@ -290,6 +434,7 @@ static int msixtbl_write(struct vcpu *v, unsigned long address, |
|
|
int r = X86EMUL_UNHANDLEABLE; |
|
|
unsigned long flags; |
|
|
struct irq_desc *desc; |
|
|
+ unsigned int adjacent_fixmap; |
|
|
|
|
|
if ( !IS_ALIGNED(address, len) ) |
|
|
return X86EMUL_OKAY; |
|
|
@@ -299,6 +444,19 @@ static int msixtbl_write(struct vcpu *v, unsigned long address, |
|
|
entry = msixtbl_find_entry(v, address); |
|
|
if ( !entry ) |
|
|
goto out; |
|
|
+ |
|
|
+ adjacent_fixmap = adjacent_handle(entry, address, true); |
|
|
+ if ( adjacent_fixmap != ADJACENT_DONT_HANDLE ) |
|
|
+ { |
|
|
+ r = adjacent_write(adjacent_fixmap, address, len, val); |
|
|
+ goto out; |
|
|
+ } |
|
|
+ if ( address < entry->gtable || |
|
|
+ address >= entry->gtable + entry->table_len ) |
|
|
+ goto out; |
|
|
+ if ( len != 4 && len != 8 ) |
|
|
+ goto out; |
|
|
+ |
|
|
nr_entry = array_index_nospec(((address - entry->gtable) / |
|
|
PCI_MSIX_ENTRY_SIZE), |
|
|
MAX_MSIX_TABLE_ENTRIES); |
|
|
@@ -364,8 +522,8 @@ static int cf_check _msixtbl_write( |
|
|
const struct hvm_io_handler *handler, uint64_t address, uint32_t len, |
|
|
uint64_t val) |
|
|
{ |
|
|
- /* ignore invalid length or unaligned writes */ |
|
|
- if ( len != 4 && len != 8 || !IS_ALIGNED(address, len) ) |
|
|
+ /* ignore unaligned writes */ |
|
|
+ if ( !IS_ALIGNED(address, len) ) |
|
|
return X86EMUL_OKAY; |
|
|
|
|
|
/* |
|
|
@@ -382,14 +540,22 @@ static bool cf_check msixtbl_range( |
|
|
{ |
|
|
struct vcpu *curr = current; |
|
|
unsigned long addr = r->addr; |
|
|
- const struct msi_desc *desc; |
|
|
+ const struct msixtbl_entry *entry; |
|
|
+ const struct msi_desc *desc = NULL; |
|
|
+ unsigned int adjacent_fixmap; |
|
|
|
|
|
ASSERT(r->type == IOREQ_TYPE_COPY); |
|
|
|
|
|
rcu_read_lock(&msixtbl_rcu_lock); |
|
|
- desc = msixtbl_addr_to_desc(msixtbl_find_entry(curr, addr), addr); |
|
|
+ entry = msixtbl_find_entry(curr, addr); |
|
|
+ adjacent_fixmap = adjacent_handle(entry, addr, false); |
|
|
+ if ( adjacent_fixmap == ADJACENT_DONT_HANDLE ) |
|
|
+ desc = msixtbl_addr_to_desc(entry, addr); |
|
|
rcu_read_unlock(&msixtbl_rcu_lock); |
|
|
|
|
|
+ if ( adjacent_fixmap != ADJACENT_DONT_HANDLE ) |
|
|
+ return 1; |
|
|
+ |
|
|
if ( desc ) |
|
|
return 1; |
|
|
|
|
|
@@ -630,12 +796,15 @@ void msix_write_completion(struct vcpu *v) |
|
|
v->arch.hvm.hvm_io.msix_snoop_gpa ) |
|
|
{ |
|
|
unsigned int token = hvmemul_cache_disable(v); |
|
|
- const struct msi_desc *desc; |
|
|
+ const struct msi_desc *desc = NULL; |
|
|
+ const struct msixtbl_entry *entry; |
|
|
uint32_t data; |
|
|
|
|
|
rcu_read_lock(&msixtbl_rcu_lock); |
|
|
- desc = msixtbl_addr_to_desc(msixtbl_find_entry(v, snoop_addr), |
|
|
- snoop_addr); |
|
|
+ entry = msixtbl_find_entry(v, snoop_addr); |
|
|
+ if ( entry && snoop_addr >= entry->gtable && |
|
|
+ snoop_addr < entry->gtable + entry->table_len ) |
|
|
+ desc = msixtbl_addr_to_desc(entry, snoop_addr); |
|
|
rcu_read_unlock(&msixtbl_rcu_lock); |
|
|
|
|
|
if ( desc && |
|
|
diff --git a/xen/arch/x86/include/asm/msi.h b/xen/arch/x86/include/asm/msi.h |
|
|
index fe670895eed2..86acae3adc6a 100644 |
|
|
--- a/xen/arch/x86/include/asm/msi.h |
|
|
+++ b/xen/arch/x86/include/asm/msi.h |
|
|
@@ -229,6 +229,10 @@ struct __packed msg_address { |
|
|
PCI_MSIX_ENTRY_SIZE + \ |
|
|
(~PCI_MSIX_BIRMASK & (PAGE_SIZE - 1))) |
|
|
|
|
|
+/* indexes in adj_access_idx[] below */ |
|
|
+#define ADJ_IDX_FIRST 0 |
|
|
+#define ADJ_IDX_LAST 1 |
|
|
+ |
|
|
struct arch_msix { |
|
|
unsigned int nr_entries, used_entries; |
|
|
struct { |
|
|
@@ -236,6 +240,7 @@ struct arch_msix { |
|
|
} table, pba; |
|
|
int table_refcnt[MAX_MSIX_TABLE_PAGES]; |
|
|
int table_idx[MAX_MSIX_TABLE_PAGES]; |
|
|
+ unsigned int adj_access_idx[2]; |
|
|
spinlock_t table_lock; |
|
|
bool host_maskall, guest_maskall; |
|
|
domid_t warned; |
|
|
diff --git a/xen/arch/x86/msi.c b/xen/arch/x86/msi.c |
|
|
index d0bf63df1def..7673c1cffe43 100644 |
|
|
--- a/xen/arch/x86/msi.c |
|
|
+++ b/xen/arch/x86/msi.c |
|
|
@@ -928,6 +928,36 @@ static int msix_capability_init(struct pci_dev *dev, |
|
|
list_add_tail(&entry->list, &dev->msi_list); |
|
|
*desc = entry; |
|
|
} |
|
|
+ else |
|
|
+ { |
|
|
+ /* |
|
|
+ * If the MSI-X table doesn't start at the page boundary, map the first page for |
|
|
+ * passthrough accesses. |
|
|
+ */ |
|
|
+ if ( PAGE_OFFSET(table_paddr) ) |
|
|
+ { |
|
|
+ int idx = msix_get_fixmap(msix, table_paddr, table_paddr); |
|
|
+ |
|
|
+ if ( idx > 0 ) |
|
|
+ msix->adj_access_idx[ADJ_IDX_FIRST] = idx; |
|
|
+ else |
|
|
+ gprintk(XENLOG_ERR, "Failed to map first MSI-X table page: %d\n", idx); |
|
|
+ } |
|
|
+ /* |
|
|
+ * If the MSI-X table doesn't end on the page boundary, map the last page |
|
|
+ * for passthrough accesses. |
|
|
+ */ |
|
|
+ if ( PAGE_OFFSET(table_paddr + msix->nr_entries * PCI_MSIX_ENTRY_SIZE) ) |
|
|
+ { |
|
|
+ uint64_t entry_paddr = table_paddr + msix->nr_entries * PCI_MSIX_ENTRY_SIZE; |
|
|
+ int idx = msix_get_fixmap(msix, table_paddr, entry_paddr); |
|
|
+ |
|
|
+ if ( idx > 0 ) |
|
|
+ msix->adj_access_idx[ADJ_IDX_LAST] = idx; |
|
|
+ else |
|
|
+ gprintk(XENLOG_ERR, "Failed to map last MSI-X table page: %d\n", idx); |
|
|
+ } |
|
|
+ } |
|
|
|
|
|
if ( !msix->used_entries ) |
|
|
{ |
|
|
@@ -1090,6 +1120,16 @@ static void _pci_cleanup_msix(struct arch_msix *msix) |
|
|
WARN(); |
|
|
msix->table.first = 0; |
|
|
msix->table.last = 0; |
|
|
+ if ( msix->adj_access_idx[ADJ_IDX_FIRST] ) |
|
|
+ { |
|
|
+ msix_put_fixmap(msix, msix->adj_access_idx[ADJ_IDX_FIRST]); |
|
|
+ msix->adj_access_idx[ADJ_IDX_FIRST] = 0; |
|
|
+ } |
|
|
+ if ( msix->adj_access_idx[ADJ_IDX_LAST] ) |
|
|
+ { |
|
|
+ msix_put_fixmap(msix, msix->adj_access_idx[ADJ_IDX_LAST]); |
|
|
+ msix->adj_access_idx[ADJ_IDX_LAST] = 0; |
|
|
+ } |
|
|
|
|
|
if ( rangeset_remove_range(mmio_ro_ranges, msix->pba.first, |
|
|
msix->pba.last) ) |
|
|
-- |
|
|
2.41.0 |
|
|
|