diff --git a/cpu/esp8266/ld/esp8266.riot-os.no_sdk.app.ld b/cpu/esp8266/ld/esp8266.riot-os.no_sdk.app.ld
index e7b7e0d550d7..fd555cae0172 100644
--- a/cpu/esp8266/ld/esp8266.riot-os.no_sdk.app.ld
+++ b/cpu/esp8266/ld/esp8266.riot-os.no_sdk.app.ld
@@ -206,6 +206,8 @@ SECTIONS
     LONG(0)
     LONG(0)
     . = ALIGN (16);
+    *(.UserExceptionTrampoline.text)
+    . = ALIGN (16);
     *(.entry.text)
     *(.init.literal)
     *(.init)
diff --git a/cpu/esp8266/ld/esp8266.riot-os.sdk.app.ld b/cpu/esp8266/ld/esp8266.riot-os.sdk.app.ld
index 5a3bf0f3da02..f9f779cff223 100644
--- a/cpu/esp8266/ld/esp8266.riot-os.sdk.app.ld
+++ b/cpu/esp8266/ld/esp8266.riot-os.sdk.app.ld
@@ -105,16 +105,43 @@ SECTIONS
     _data_end = ABSOLUTE(.);
   } >dram0_0_seg :dram0_0_phdr
 
+  /*
+   * .rodata sections that are placed in RAM
+   *
+   * Usually, all .rodata sections are placed in RAM by the Espressif SDK
+   * since IROM (flash) access requires 32-bit word aligned reads.
+   *
+   * However, thanks to the LoadStoreError handler from esp-open-rtos which is
+   * also used in RIOT-OS, it is possible to place .rodata sections in IROM
+   * (flash) to save RAM resources.
+   *
+   * Only .rodata data sections of compilation units that may be executed
+   * while SPI flash is not mapped have to be stored in RAM. These are IRAM
+   * functions that are called from interrupt context or SPI flash management
+   * functions. Such compilation units have to be listed here.
+   *
+   * Furthermore, compilation units with constant data that are performance-
+   * critical should be listed here as well.
+   */
+
   .rodata : ALIGN(4)
   {
     _rodata_start = ABSOLUTE(.);
     *(.sdk.version)
-    /* TODO put only necessary .rodata to dram
-    *libc.a:*.o(.rodata.* .rodata)
+
     *core.a:*(.rodata.* .rodata)
     *cpu.a:*(.rodata .rodata.*)
-    */
-    *(.rodata .rodata.*)
+    *esp.a:*(.rodata .rodata.*)
+    *esp_now.a:*(.rodata .rodata.*)
+    *esp_wifi.a:*(.rodata .rodata.*)
+    *periph.a:*(.rodata.* .rodata)
+    *sdk.a:*(.rodata .rodata.*)
+    *xtensa.a:*(.rodata .rodata.*)
+
+    *libc.a:*.o(.rodata.* .rodata)
+    *libpp.a:wdev.o(.rodata.* .rodata)
+    *libmain.a:spi_flash.o(.rodata.* .rodata)
+
     *(.gnu.linkonce.r.*)
     *(.rodata1)
     __XT_EXCEPTION_TABLE__ = ABSOLUTE(.);
@@ -206,6 +233,8 @@ SECTIONS
     LONG(0)
     LONG(0)
     . = ALIGN (16);
+    *(.UserExceptionTrampoline.text)
+    . = ALIGN (16);
     *(.entry.text)
     *(.init.literal)
     *(.init)
diff --git a/cpu/esp8266/vendor/xtensa/xtensa_vectors.S b/cpu/esp8266/vendor/xtensa/xtensa_vectors.S
index e620f5facc0d..069d9b558464 100644
--- a/cpu/esp8266/vendor/xtensa/xtensa_vectors.S
+++ b/cpu/esp8266/vendor/xtensa/xtensa_vectors.S
@@ -488,12 +488,324 @@ User Exception (including Level 1 Interrupt from user mode).
 
 _UserExceptionVector:
 
+#ifdef MCU_ESP8266
+    wsr     a0, EXCSAVE_1                   /* preserve a0 */
+    j       _UserExceptionTrampoline        /* jump to handler trampoline */
+#else
     wsr     a0, EXCSAVE_1                   /* preserve a0 */
     call0   _xt_user_exc                    /* user exception handler */
     /* never returns here - call0 is used as a jump (see note at top) */
+#endif
 
     .end        literal_prefix
 
+#ifdef MCU_ESP8266
+/*************************** LoadStoreError Handler BEGIN ********************/
+/*
+ * PLEASE NOTE: The code between "LoadStoreError Handler BEGIN" and
+ * "LoadStoreError Handler END" markers was extracted from esp-open-rtos. It is
+ * under the following copyright:
+ *
+ * Original vector contents Copyright (C) 2014-2015 Espressif Systems
+ * Additions Copyright (C) Superhouse Automation Pty Ltd and Angus Gratton
+ * BSD Licensed as described in the file LICENSE
+ *
+ * Usually, the access to the IROM (flash) memory requires 32-bit word aligned
+ * reads. Attempts to access data in the IROM (flash) memory less than 32 bits
+ * in size triggers a LoadStoreError exception. Therefore, it is not possible to
+ * place .rodata sections in IROM (flash). Rather, .rodata sections have to
+ * be placed in RAM. With the exception handler from esp-open-rtos it becomes
+ * possible to access data in IROM (flash) with a size of less than 32 bits
+ * and thus to place .rodata sections in the IROM (flash).
+ */
+
+#define CAUSE_LOADSTORE         3
+#define fatal_exception_handler _xt_user_exc
+
+/* LoadStoreError handler stack */
+
+    .section .bss
+    .balign 16
+
+_LoadStoreErrorHandlerStack:
+    .word   0       # a0
+    .word   0       # (unused)
+    .word   0       # a2
+    .word   0       # a3
+    .word   0       # a4
+
+/* LoadStoreError Trampoline */
+
+    .section .UserExceptionTrampoline.text, "x"
+    .literal_position
+    .balign 4
+
+_UserExceptionTrampoline:
+
+    wsr     a1, EXCSAVE_2                   /* preserve a1 */
+#ifdef MCU_ESP8266
+    rsr     a1, exccause
+    beqi    a1, CAUSE_LOADSTORE, _LoadStoreErrorHandler
+#endif
+    rsr     a1, EXCSAVE_2                   /* restore a1 */
+    call0   _xt_user_exc                    /* user exception handler */
+    /* never returns here - call0 is used as a jump (see note at top) */
+
+    /*
+     * Xtensa "Load/Store Exception" handler:
+     * Completes L8/L16 load instructions from Instruction address space,
+     * for which the architecture only supports 32-bit reads.
+     *
+     * Called from UserExceptionVector if EXCCAUSE is LoadStoreErrorCause
+     *
+     * (Fast path (no branches) is for L8UI)
+     */
+    .literal_position
+    .balign 4
+    .type   LoadStoreErrorHandler, @function
+
+_LoadStoreErrorHandler:
+
+    rsr     a1, EXCSAVE_2                   /* restore a1 */
+    wsr     a1, EXCSAVE_1                   /* save it to excsave1 */
+    /* Registers are saved in the address corresponding to their register
+     * number times 4.  This allows a quick and easy mapping later on when
+     * needing to store the value to a particular register number. */
+    movi    sp, _LoadStoreErrorHandlerStack
+    s32i    a0, sp, 0
+    s32i    a2, sp, 0x08
+    s32i    a3, sp, 0x0c
+    s32i    a4, sp, 0x10
+    rsr     a0, sar     # Save SAR in a0 to restore later
+
+    /* Examine the opcode which generated the exception */
+    /* Note: Instructions are in this order to avoid pipeline stalls. */
+    rsr     a2, epc1
+    movi    a3, ~3
+    ssa8l   a2          # sar is now correct shift for aligned read
+    and     a2, a2, a3      # a2 now 4-byte aligned address of instruction
+    l32i    a4, a2, 0
+    l32i    a2, a2, 4
+    movi    a3, 0x00700F    # opcode mask for l8ui/l16si/l16ui
+    src     a2, a2, a4      # a2 now instruction that failed
+    and     a3, a2, a3      # a3 is masked instruction
+    bnei    a3, 0x000002, .LSE_check_l16
+
+    /* Note: At this point, opcode could technically be one of two things:
+     *   xx0xx2 (L8UI)
+     *   xx8xx2 (Reserved (invalid) opcode)
+     * It is assumed that we'll never get to this point from an illegal
+     * opcode, so we don't bother to check for that case and presume this
+     * is always an L8UI. */
+
+    movi    a4, ~3
+    rsr     a3, excvaddr    # read faulting address
+    and     a4, a3, a4      # a4 now word aligned read address
+
+    l32i    a4, a4, 0       # perform the actual read
+    ssa8l   a3          # sar is now shift to extract a3's byte
+    srl     a3, a4      # shift right correct distance
+    extui   a4, a3, 0, 8    # mask off bits we need for an l8
+
+.LSE_post_fetch:
+    /* We jump back here after either the L8UI or the L16*I routines do the
+     * necessary work to read the value from memory.
+     * At this point, a2 holds the faulting instruction and a4 holds the
+     * correctly read value.
+
+     * Restore original SAR value (saved in a0) and update EPC so we'll
+     * return back to the instruction following the one we just emulated */
+
+    /* Note: Instructions are in this order to avoid pipeline stalls */
+    rsr     a3, epc1
+    wsr     a0, sar
+    addi    a3, a3, 0x3
+    wsr     a3, epc1
+
+    /* Stupid opcode tricks: The jumptable we use later on needs 16 bytes
+     * per entry (so we can avoid a second jump by just doing a RFE inside
+     * each entry).  Unfortunately, however, Xtensa doesn't have an addx16
+     * operation to make that easy for us.  Luckily, all of the faulting
+     * opcodes we're processing are guaranteed to have bit 3 be zero, which
+     * means if we just shift the register bits of the opcode down by 3
+     * instead of 4, we will get the register number multiplied by 2.  This
+     * combined with an addx8 will give us an effective addx16 without
+     * needing any extra shift operations. */
+    extui   a2, a2, 3, 5    # a2 is now destination register 0-15 times 2
+
+    bgei    a2, 10, .LSE_assign_reg     # a5..a15 use jumptable
+    beqi    a2, 2, .LSE_assign_a1       # a1 uses a special routine
+
+    /* We're storing into a0 or a2..a4, which are all saved in our "stack"
+     * area.  Calculate the correct address and stick the value in there,
+     * then just do our normal restore and RFE (no jumps required, which
+     * actually makes a0..a4 substantially faster). */
+    addx2   a2, a2, sp
+    s32i    a4, a2, 0
+
+    /* Restore all regs and return */
+    l32i    a0, sp, 0
+    l32i    a2, sp, 0x08
+    l32i    a3, sp, 0x0c
+    l32i    a4, sp, 0x10
+    rsr     a1, excsave1    # restore a1 saved by UserExceptionVector
+    rfe
+
+.LSE_assign_reg:
+    /* At this point, a2 contains the register number times 2, a4 is the
+     * read value. */
+
+    /* Calculate the jumptable address, and restore all regs except a2 and
+     * a4 so we have less to do after jumping. */
+    /* Note: Instructions are in this order to avoid pipeline stalls. */
+    movi    a3, .LSE_jumptable_base
+    l32i    a0, sp, 0
+    addx8   a2, a2, a3      # a2 is now the address to jump to
+    l32i    a3, sp, 0x0c
+
+    jx      a2
+
+    .balign 4
+.LSE_check_l16:
+    /* At this point, a2 contains the opcode, a3 is masked opcode */
+    movi    a4, 0x001002    # l16si or l16ui opcode after masking
+    bne     a3, a4, .LSE_wrong_opcode
+
+    /* Note: At this point, the opcode could be one of two things:
+     *   xx1xx2 (L16UI)
+     *   xx9xx2 (L16SI)
+     * Both of these we can handle. */
+
+    movi    a4, ~3
+    rsr     a3, excvaddr    # read faulting address
+    and     a4, a3, a4      # a4 now word aligned read address
+
+    l32i    a4, a4, 0       # perform the actual read
+    ssa8l   a3          # sar is now shift to extract a3's bytes
+    srl     a3, a4      # shift right correct distance
+    extui   a4, a3, 0, 16   # mask off bits we need for an l16
+
+    bbci    a2, 15, .LSE_post_fetch  # Not a signed op
+    bbci    a4, 15, .LSE_post_fetch  # Value does not need sign-extension
+
+    movi    a3, 0xFFFF0000
+    or      a4, a3, a4      # set 32-bit sign bits
+    j       .LSE_post_fetch
+
+.LSE_wrong_opcode:
+    /* If we got here it's not an opcode we can try to fix, so bomb out.
+     * Restore registers so any dump the fatal exception routine produces
+     * will have correct values */
+    wsr     a0, sar
+    l32i    a0, sp, 0
+    /*l32i    a2, sp, 0x08*/
+    l32i    a3, sp, 0x0c
+    l32i    a4, sp, 0x10
+    rsr     a1, excsave1
+    mov     a2, a1
+    movi    a3, 0
+    call0   fatal_exception_handler
+
+    .balign 4
+.LSE_assign_a1:
+    /* a1 is saved in excsave1, so just update that with the value, */
+    wsr     a4, excsave1
+    /* Then restore all regs and return */
+    l32i    a0, sp, 0
+    l32i    a2, sp, 0x08
+    l32i    a3, sp, 0x0c
+    l32i    a4, sp, 0x10
+    rsr     a1, excsave1
+    rfe
+
+    .balign 4
+.LSE_jumptable:
+    /* The first 5 entries (80 bytes) of this table are unused (registers
+     * a0..a4 are handled separately above).  Rather than have a whole bunch
+     * of wasted space, we just pretend that the table starts 80 bytes
+     * earlier in memory. */
+    .set    .LSE_jumptable_base, .LSE_jumptable - (16 * 5)
+
+    .org    .LSE_jumptable_base + (16 * 5)
+    mov     a5, a4
+    l32i    a2, sp, 0x08
+    l32i    a4, sp, 0x10
+    rsr     a1, excsave1
+    rfe
+
+    .org    .LSE_jumptable_base + (16 * 6)
+    mov     a6, a4
+    l32i    a2, sp, 0x08
+    l32i    a4, sp, 0x10
+    rsr     a1, excsave1
+    rfe
+
+    .org    .LSE_jumptable_base + (16 * 7)
+    mov     a7, a4
+    l32i    a2, sp, 0x08
+    l32i    a4, sp, 0x10
+    rsr     a1, excsave1
+    rfe
+
+    .org    .LSE_jumptable_base + (16 * 8)
+    mov     a8, a4
+    l32i    a2, sp, 0x08
+    l32i    a4, sp, 0x10
+    rsr     a1, excsave1
+    rfe
+
+    .org    .LSE_jumptable_base + (16 * 9)
+    mov     a9, a4
+    l32i    a2, sp, 0x08
+    l32i    a4, sp, 0x10
+    rsr     a1, excsave1
+    rfe
+
+    .org    .LSE_jumptable_base + (16 * 10)
+    mov     a10, a4
+    l32i    a2, sp, 0x08
+    l32i    a4, sp, 0x10
+    rsr     a1, excsave1
+    rfe
+
+    .org    .LSE_jumptable_base + (16 * 11)
+    mov     a11, a4
+    l32i    a2, sp, 0x08
+    l32i    a4, sp, 0x10
+    rsr     a1, excsave1
+    rfe
+
+    .org    .LSE_jumptable_base + (16 * 12)
+    mov     a12, a4
+    l32i    a2, sp, 0x08
+    l32i    a4, sp, 0x10
+    rsr     a1, excsave1
+    rfe
+
+    .org    .LSE_jumptable_base + (16 * 13)
+    mov     a13, a4
+    l32i    a2, sp, 0x08
+    l32i    a4, sp, 0x10
+    rsr     a1, excsave1
+    rfe
+
+    .org    .LSE_jumptable_base + (16 * 14)
+    mov     a14, a4
+    l32i    a2, sp, 0x08
+    l32i    a4, sp, 0x10
+    rsr     a1, excsave1
+    rfe
+
+    .org    .LSE_jumptable_base + (16 * 15)
+    mov     a15, a4
+    l32i    a2, sp, 0x08
+    l32i    a4, sp, 0x10
+    rsr     a1, excsave1
+    rfe
+
+/*************************** LoadStoreError Handler END **********************/
+#endif
+
 /*
 --------------------------------------------------------------------------------
   Insert some waypoints for jumping beyond the signed 8-bit range of