Skip to content

Commit

Permalink
riscv: mm: support icache&tlb HW broadcast on xuantie
Browse files Browse the repository at this point in the history
By configuring `flush-tlb-range` and `flush-icache-range` in DTS to enable the
HW broadcast feature of Xuantie CPU, soft broadcasting is avoided and system
performance is improved.

The DTS example as follows:
```
cpus {
    ...
    flush-icache-range = <1>;
    flush-tlb-range = <1>;
    cpu@0 {
    ...
```

Signed-off-by: Chen Pei <cp0613@linux.alibaba.com>
  • Loading branch information
cp0613 authored and guoren83 committed Apr 15, 2024
1 parent 85dbf11 commit a08f2e5
Show file tree
Hide file tree
Showing 2 changed files with 90 additions and 9 deletions.
35 changes: 33 additions & 2 deletions arch/riscv/mm/cacheflush.c
Original file line number Diff line number Diff line change
Expand Up @@ -80,12 +80,31 @@ void flush_icache_mm(struct mm_struct *mm, bool local)
#endif /* CONFIG_SMP */

#ifdef CONFIG_MMU
static u32 xuantie_flush_icache_range_flag = 0;
static inline void __flush_icache_range(unsigned long start, unsigned long end)
{
register unsigned long i asm("a0") = start & ~(L1_CACHE_BYTES - 1);

for (; i < end; i += L1_CACHE_BYTES)
asm volatile (".long 0x0305000b"); /* icache.iva a0 */

asm volatile (".long 0x01b0000b"); /* sync_is */
}
void flush_icache_pte(pte_t pte)
{
struct folio *folio = page_folio(pte_page(pte));
struct page *page = pte_page(pte);
struct folio *folio = page_folio(page);

if (!test_bit(PG_dcache_clean, &folio->flags)) {
flush_icache_all();
if (xuantie_flush_icache_range_flag) {
unsigned long start = (unsigned long)page_address(page);
unsigned long end = start + page_size(page);

/* remove flush_icache_all for performance */
__flush_icache_range(start, end);
}
else
flush_icache_all();
set_bit(PG_dcache_clean, &folio->flags);
}
}
Expand Down Expand Up @@ -139,3 +158,15 @@ void __init riscv_init_cbo_blocksizes(void)
if (cboz_block_size)
riscv_cboz_block_size = cboz_block_size;
}

static int __init xuantie_flush_icache_init(void)
{
struct device_node *cpu = of_find_node_by_path("/cpus");
if (cpu) {
of_property_read_u32(cpu, "flush-icache-range", &xuantie_flush_icache_range_flag);
of_node_put(cpu);
}

return 0;
}
arch_initcall(xuantie_flush_icache_init);
64 changes: 57 additions & 7 deletions arch/riscv/mm/tlbflush.c
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
// SPDX-License-Identifier: GPL-2.0

#include <linux/of.h>
#include <linux/mm.h>
#include <linux/smp.h>
#include <linux/sched.h>
#include <asm/sbi.h>
#include <asm/mmu_context.h>

static u32 xuantie_flush_tlb_range_flag = 0;

static inline void local_flush_tlb_all_asid(unsigned long asid)
{
__asm__ __volatile__ ("sfence.vma x0, %0"
Expand Down Expand Up @@ -48,10 +51,14 @@ static void __ipi_flush_tlb_all(void *info)

void flush_tlb_all(void)
{
if (riscv_use_ipi_for_rfence())
on_each_cpu(__ipi_flush_tlb_all, NULL, 1);
else
sbi_remote_sfence_vma(NULL, 0, -1);
if (xuantie_flush_tlb_range_flag) {
__asm__ __volatile__ ("sfence.vma" : : : "memory");
} else {
if (riscv_use_ipi_for_rfence())
on_each_cpu(__ipi_flush_tlb_all, NULL, 1);
else
sbi_remote_sfence_vma(NULL, 0, -1);
}
}

struct flush_tlb_range_data {
Expand Down Expand Up @@ -129,18 +136,49 @@ static void __flush_tlb_range(struct mm_struct *mm, unsigned long start,

void flush_tlb_mm(struct mm_struct *mm)
{
__flush_tlb_range(mm, 0, -1, PAGE_SIZE);
if (xuantie_flush_tlb_range_flag) {
unsigned long asid = atomic_long_read(&mm->context.id) & asid_mask;
__asm__ __volatile__ ("sfence.vma zero, %0"
:
: "r"(asid)
: "memory");
} else {
__flush_tlb_range(mm, 0, -1, PAGE_SIZE);
}
}

void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr)
{
__flush_tlb_range(vma->vm_mm, addr, PAGE_SIZE, PAGE_SIZE);
if (xuantie_flush_tlb_range_flag) {
unsigned long asid = atomic_long_read(&vma->vm_mm->context.id) & asid_mask;
addr &= PAGE_MASK;
__asm__ __volatile__ ("sfence.vma %0, %1"
:
: "r"(addr), "r"(asid)
: "memory");
} else {
__flush_tlb_range(vma->vm_mm, addr, PAGE_SIZE, PAGE_SIZE);
}
}

void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
unsigned long end)
{
__flush_tlb_range(vma->vm_mm, start, end - start, PAGE_SIZE);
if (xuantie_flush_tlb_range_flag) {
unsigned long asid = atomic_long_read(&vma->vm_mm->context.id) & asid_mask;
start &= PAGE_MASK;
end += PAGE_SIZE - 1;
end &= PAGE_MASK;
while (start < end) {
__asm__ __volatile__ ("sfence.vma %0, %1"
:
: "r"(start), "r"(asid)
: "memory");
start += PAGE_SIZE;
}
} else {
__flush_tlb_range(vma->vm_mm, start, end - start, PAGE_SIZE);
}
}
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start,
Expand All @@ -149,3 +187,15 @@ void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start,
__flush_tlb_range(vma->vm_mm, start, end - start, PMD_SIZE);
}
#endif

static int __init xuantie_flush_tlb_init(void)
{
struct device_node *cpu = of_find_node_by_path("/cpus");
if (cpu) {
of_property_read_u32(cpu, "flush-tlb-range", &xuantie_flush_tlb_range_flag);
of_node_put(cpu);
}

return 0;
}
arch_initcall(xuantie_flush_tlb_init);

0 comments on commit a08f2e5

Please sign in to comment.