LinxISA · zhoubot · May 16, 2026 · May 16, 2026
diff --git a/Documentation/linxisa/abi.md b/Documentation/linxisa/abi.md
@@ -4,18 +4,22 @@ This document captures the **current** LinxISA scalar ABI as implemented by the
 bring-up toolchain, and the **Linux boot ABI** used by the QEMU LinxISA `virt`
 machine.
 
-## Toolchain ABI profiles (linx32 vs linx64)
+## Toolchain ABI profiles (current branch)
 
-The LinxISA toolchain supports two profiles (see the LinxISA ISA manual):
+The current in-repo compiler branch exposes these scalar toolchain profiles:
 
-- **linx32**: ILP32 (32-bit `int`, `long`, and pointers)
 - **linx64**: LP64 (32-bit `int`, 64-bit `long` and pointers)
+- **linx64be**: LP64 big-endian
 
 For the current Linux bring-up on `qemu-system-linx64`, use **linx64**.
 The `arch/linx/` port in this tree is configured as a 64-bit kernel
 (`CONFIG_64BIT=y`).
 
-## `/proc/cpuinfo` ISA reporting (Linx v0.3)
+Historical bring-up notes may still mention `linx32`, but the current Bisheng
+compiler implementation used by the superproject does not register a `linx32`
+arch and the active compiler gate surface is `linx64`.
+
+## `/proc/cpuinfo` ISA reporting (Linx v0.56)
 
 The Linx Linux bring-up port reports ISA identity in two fields:
 
@@ -76,8 +80,8 @@ Kernel images accepted by QEMU `virt`:
 - ELF relocatable (`ET_REL`) object (`.o`) with `_start` (legacy bring-up path)
 - ELF executable (`ET_EXEC`) or PIE (`ET_DYN`) (recommended for Linux bring-up)
 
-Machine model reference and UART/exit MMIO are documented in
-`/Users/zhoubot/qemu/docs/linxisa/README.md`.
+Machine model reference and UART/exit MMIO are documented in the in-repo QEMU
+tree under `emulator/qemu/docs/linxisa/README.md`.
 
 This boot ABI is validated by the bring-up bootstub in `tools/linxisa/bootstub/`.
 

diff --git a/arch/linx/kernel/process.c b/arch/linx/kernel/process.c
@@ -210,10 +210,16 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
 	p->thread.sp = (unsigned long)regs;
 	memset(p->thread.s, 0, sizeof(p->thread.s));
 	p->thread.ebarg0 = 0;
-	p->thread.ebarg_bpc_cur = 0;
-	p->thread.ebarg_bpc_tgt = 0;
-	p->thread.ebarg_tpc = 0;
-	p->thread.ebarg_lra = 0;
+	/*
+	 * Fresh tasks first resume in linx_ret_from_fork() via __switch_to.
+	 * Seed the kernel-side EBARG PC/LRA fields to that entry point so the
+	 * first schedule_tail()/finish_task_switch() return chain does not
+	 * observe a zero link target before userspace pt_regs state is restored.
+	 */
+	p->thread.ebarg_bpc_cur = p->thread.ra;
+	p->thread.ebarg_bpc_tgt = p->thread.ra;
+	p->thread.ebarg_tpc = p->thread.ra;
+	p->thread.ebarg_lra = p->thread.ra;
 	memset(p->thread.ebarg_tq, 0, sizeof(p->thread.ebarg_tq));
 	memset(p->thread.ebarg_uq, 0, sizeof(p->thread.ebarg_uq));
 	p->thread.ebarg_lb = 0;

diff --git a/block/blk-core.c b/block/blk-core.c
@@ -51,8 +51,21 @@
 #include "blk-throttle.h"
 #include "blk-ioprio.h"
 
+#ifdef CONFIG_LINX
+#define LINX_BLKCORE_FN __attribute__((optnone)) noinline
+#else
+#define LINX_BLKCORE_FN
+#endif
+
 struct dentry *blk_debugfs_root;
 
+#ifdef CONFIG_LINX
+static LINX_BLKCORE_FN void linx_blk_percpu_ref_put(struct percpu_ref *ref)
+{
+	percpu_ref_put(ref);
+}
+#endif
+
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap);
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete);
@@ -366,9 +379,13 @@ int __bio_queue_enter(struct request_queue *q, struct bio *bio)
 	return -ENODEV;
 }
 
-void blk_queue_exit(struct request_queue *q)
+LINX_BLKCORE_FN void blk_queue_exit(struct request_queue *q)
 {
+#ifdef CONFIG_LINX
+	linx_blk_percpu_ref_put(&q->q_usage_counter);
+#else
 	percpu_ref_put(&q->q_usage_counter);
+#endif
 }
 
 static void blk_queue_usage_counter_release(struct percpu_ref *ref)
@@ -623,15 +640,19 @@ static inline blk_status_t blk_check_zone_append(struct request_queue *q,
 	return BLK_STS_OK;
 }
 
-static void __submit_bio(struct bio *bio)
+LINX_BLKCORE_FN static void __submit_bio(struct bio *bio)
 {
 	/* If plug is not used, add new plug here to cache nsecs time. */
+#ifndef CONFIG_LINX
 	struct blk_plug plug;
+#endif
 
 	if (unlikely(!blk_crypto_bio_prep(&bio)))
 		return;
 
+#ifndef CONFIG_LINX
 	blk_start_plug(&plug);
+#endif
 
 	if (!bdev_test_flag(bio->bi_bdev, BD_HAS_SUBMIT_BIO)) {
 		blk_mq_submit_bio(bio);
@@ -648,7 +669,9 @@ static void __submit_bio(struct bio *bio)
 		blk_queue_exit(disk->queue);
 	}
 
+#ifndef CONFIG_LINX
 	blk_finish_plug(&plug);
+#endif
 }
 
 /*
@@ -714,7 +737,7 @@ static void __submit_bio_noacct(struct bio *bio)
 	current->bio_list = NULL;
 }
 
-static void __submit_bio_noacct_mq(struct bio *bio)
+LINX_BLKCORE_FN static void __submit_bio_noacct_mq(struct bio *bio)
 {
 	struct bio_list bio_list[2] = { };
 
@@ -727,7 +750,7 @@ static void __submit_bio_noacct_mq(struct bio *bio)
 	current->bio_list = NULL;
 }
 
-void submit_bio_noacct_nocheck(struct bio *bio, bool split)
+LINX_BLKCORE_FN void submit_bio_noacct_nocheck(struct bio *bio, bool split)
 {
 	blk_cgroup_bio_start(bio);
 
@@ -779,7 +802,7 @@ static blk_status_t blk_validate_atomic_write_op_size(struct request_queue *q,
  * systems and other upper level users of the block layer should use
  * submit_bio() instead.
  */
-void submit_bio_noacct(struct bio *bio)
+LINX_BLKCORE_FN void submit_bio_noacct(struct bio *bio)
 {
 	struct block_device *bdev = bio->bi_bdev;
 	struct request_queue *q = bdev_get_queue(bdev);
@@ -908,7 +931,7 @@ static void bio_set_ioprio(struct bio *bio)
  * in @bio.  The bio must NOT be touched by the caller until ->bi_end_io() has
  * been called.
  */
-void submit_bio(struct bio *bio)
+LINX_BLKCORE_FN void submit_bio(struct bio *bio)
 {
 	if (bio_op(bio) == REQ_OP_READ) {
 		task_io_account_read(bio->bi_iter.bi_size);
@@ -1170,7 +1193,7 @@ void blk_start_plug_nr_ios(struct blk_plug *plug, unsigned short nr_ios)
  *   plug. By flushing the pending I/O when the process goes to sleep, we avoid
  *   this kind of deadlock.
  */
-void blk_start_plug(struct blk_plug *plug)
+LINX_BLKCORE_FN void blk_start_plug(struct blk_plug *plug)
 {
 	blk_start_plug_nr_ios(plug, 1);
 }

diff --git a/block/blk-mq.c b/block/blk-mq.c
@@ -33,6 +33,7 @@
 
 #ifdef CONFIG_LINX
 #include <asm/debug_uart.h>
+#define LINX_BLKMQ_FN __attribute__((optnone)) noinline
 static inline void linx_blk_mq_dbg(const char *tag, unsigned long val)
 {
 	linx_debug_uart_puts("[BLKMQ] ");
@@ -41,6 +42,7 @@ static inline void linx_blk_mq_dbg(const char *tag, unsigned long val)
 	linx_debug_uart_putc('\n');
 }
 #else
+#define LINX_BLKMQ_FN
 static inline void linx_blk_mq_dbg(const char *tag, unsigned long val) { }
 #endif
 
@@ -2943,7 +2945,7 @@ static void blk_mq_dispatch_list(struct rq_list *rqs, bool from_sched)
 	} else {
 		blk_mq_insert_requests(this_hctx, this_ctx, &list, from_sched);
 	}
-	percpu_ref_put(&this_hctx->queue->q_usage_counter);
+	blk_queue_exit(this_hctx->queue);
 }
 
 static void blk_mq_dispatch_multiple_queue_requests(struct rq_list *rqs)
@@ -3131,7 +3133,7 @@ static bool bio_unaligned(const struct bio *bio, struct request_queue *q)
  * It will not queue the request if there is an error with the bio, or at the
  * request creation.
  */
-void blk_mq_submit_bio(struct bio *bio)
+LINX_BLKMQ_FN void blk_mq_submit_bio(struct bio *bio)
 {
 	struct request_queue *q = bdev_get_queue(bio->bi_bdev);
 	struct blk_plug *plug = current->plug;

diff --git a/include/linux/preempt.h b/include/linux/preempt.h
@@ -467,8 +467,18 @@ static __always_inline void preempt_enable_nested(void)
 		preempt_enable();
 }
 
-DEFINE_LOCK_GUARD_0(preempt, preempt_disable(), preempt_enable())
-DEFINE_LOCK_GUARD_0(preempt_notrace, preempt_disable_notrace(), preempt_enable_notrace())
+#ifdef CONFIG_LINX
+void linx_preempt_guard_enable(void);
+void linx_preempt_guard_enable_notrace(void);
+#define __linx_preempt_guard_enable() linx_preempt_guard_enable()
+#define __linx_preempt_guard_enable_notrace() linx_preempt_guard_enable_notrace()
+#else
+#define __linx_preempt_guard_enable() preempt_enable()
+#define __linx_preempt_guard_enable_notrace() preempt_enable_notrace()
+#endif
+
+DEFINE_LOCK_GUARD_0(preempt, preempt_disable(), __linx_preempt_guard_enable())
+DEFINE_LOCK_GUARD_0(preempt_notrace, preempt_disable_notrace(), __linx_preempt_guard_enable_notrace())
 
 #ifdef CONFIG_PREEMPT_DYNAMIC
 

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
@@ -4351,6 +4351,22 @@ int try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
 	return success;
 }
 
+#ifdef CONFIG_LINX
+noinline void linx_preempt_guard_enable(void)
+{
+	preempt_enable();
+}
+
+EXPORT_SYMBOL_GPL(linx_preempt_guard_enable);
+
+noinline void linx_preempt_guard_enable_notrace(void)
+{
+	preempt_enable_notrace();
+}
+
+EXPORT_SYMBOL_GPL(linx_preempt_guard_enable_notrace);
+#endif
+
 static bool __task_needs_rq_lock(struct task_struct *p)
 {
 	unsigned int state = READ_ONCE(p->__state);

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
@@ -3390,13 +3390,17 @@ __acquires(&pool->lock)
 	 * workqueues), so hiding them isn't a problem.
 	 */
 	lockdep_invariant_state(true);
+#ifndef CONFIG_LINX
 	trace_workqueue_execute_start(work);
+#endif
 	worker->current_func(work);
 	/*
 	 * While we must be careful to not use "work" after this, the trace
 	 * point will only record its address.
 	 */
+#ifndef CONFIG_LINX
 	trace_workqueue_execute_end(work, worker->current_func);
+#endif
 
 	lock_map_release(&lockdep_map);
 	if (!bh_draining)

diff --git a/lib/atomic64.c b/lib/atomic64.c
@@ -12,6 +12,17 @@
 #include <linux/export.h>
 #include <linux/atomic.h>
 
+#ifdef CONFIG_LINX
+/*
+ * Linx bring-up currently trips E_BLOCK in several tiny optimized helper
+ * epilogues. Keep the generic atomic64 fallback out-of-line and unoptimized
+ * until the underlying call/return issue is fully closed.
+ */
+#define LINX_ATOMIC64_FN __attribute__((optnone)) noinline
+#else
+#define LINX_ATOMIC64_FN
+#endif
+
 /*
  * We use a hashed array of spinlocks to provide exclusive access
  * to each atomic64_t variable.  Since this is expected to used on
@@ -42,7 +53,7 @@ static inline arch_spinlock_t *lock_addr(const atomic64_t *v)
 	return &atomic64_lock[addr & (NR_LOCKS - 1)].lock;
 }
 
-s64 generic_atomic64_read(const atomic64_t *v)
+LINX_ATOMIC64_FN s64 generic_atomic64_read(const atomic64_t *v)
 {
 	unsigned long flags;
 	arch_spinlock_t *lock = lock_addr(v);
@@ -57,7 +68,7 @@ s64 generic_atomic64_read(const atomic64_t *v)
 }
 EXPORT_SYMBOL(generic_atomic64_read);
 
-void generic_atomic64_set(atomic64_t *v, s64 i)
+LINX_ATOMIC64_FN void generic_atomic64_set(atomic64_t *v, s64 i)
 {
 	unsigned long flags;
 	arch_spinlock_t *lock = lock_addr(v);
@@ -71,7 +82,7 @@ void generic_atomic64_set(atomic64_t *v, s64 i)
 EXPORT_SYMBOL(generic_atomic64_set);
 
 #define ATOMIC64_OP(op, c_op)						\
-void generic_atomic64_##op(s64 a, atomic64_t *v)			\
+LINX_ATOMIC64_FN void generic_atomic64_##op(s64 a, atomic64_t *v)	\
 {									\
 	unsigned long flags;						\
 	arch_spinlock_t *lock = lock_addr(v);				\
@@ -85,7 +96,7 @@ void generic_atomic64_##op(s64 a, atomic64_t *v)			\
 EXPORT_SYMBOL(generic_atomic64_##op);
 
 #define ATOMIC64_OP_RETURN(op, c_op)					\
-s64 generic_atomic64_##op##_return(s64 a, atomic64_t *v)		\
+LINX_ATOMIC64_FN s64 generic_atomic64_##op##_return(s64 a, atomic64_t *v) \
 {									\
 	unsigned long flags;						\
 	arch_spinlock_t *lock = lock_addr(v);				\
@@ -101,7 +112,7 @@ s64 generic_atomic64_##op##_return(s64 a, atomic64_t *v)		\
 EXPORT_SYMBOL(generic_atomic64_##op##_return);
 
 #define ATOMIC64_FETCH_OP(op, c_op)					\
-s64 generic_atomic64_fetch_##op(s64 a, atomic64_t *v)			\
+LINX_ATOMIC64_FN s64 generic_atomic64_fetch_##op(s64 a, atomic64_t *v) \
 {									\
 	unsigned long flags;						\
 	arch_spinlock_t *lock = lock_addr(v);				\
@@ -138,7 +149,7 @@ ATOMIC64_OPS(xor, ^=)
 #undef ATOMIC64_FETCH_OP
 #undef ATOMIC64_OP
 
-s64 generic_atomic64_dec_if_positive(atomic64_t *v)
+LINX_ATOMIC64_FN s64 generic_atomic64_dec_if_positive(atomic64_t *v)
 {
 	unsigned long flags;
 	arch_spinlock_t *lock = lock_addr(v);
@@ -155,7 +166,7 @@ s64 generic_atomic64_dec_if_positive(atomic64_t *v)
 }
 EXPORT_SYMBOL(generic_atomic64_dec_if_positive);
 
-s64 generic_atomic64_cmpxchg(atomic64_t *v, s64 o, s64 n)
+LINX_ATOMIC64_FN s64 generic_atomic64_cmpxchg(atomic64_t *v, s64 o, s64 n)
 {
 	unsigned long flags;
 	arch_spinlock_t *lock = lock_addr(v);
@@ -172,7 +183,7 @@ s64 generic_atomic64_cmpxchg(atomic64_t *v, s64 o, s64 n)
 }
 EXPORT_SYMBOL(generic_atomic64_cmpxchg);
 
-s64 generic_atomic64_xchg(atomic64_t *v, s64 new)
+LINX_ATOMIC64_FN s64 generic_atomic64_xchg(atomic64_t *v, s64 new)
 {
 	unsigned long flags;
 	arch_spinlock_t *lock = lock_addr(v);
@@ -188,7 +199,7 @@ s64 generic_atomic64_xchg(atomic64_t *v, s64 new)
 }
 EXPORT_SYMBOL(generic_atomic64_xchg);
 
-s64 generic_atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
+LINX_ATOMIC64_FN s64 generic_atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
 {
 	unsigned long flags;
 	arch_spinlock_t *lock = lock_addr(v);