Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion libcpu/arm/cortex-m3/SConscript
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ if rtconfig.PLATFORM == 'gcc':
if rtconfig.PLATFORM == 'iar':
src += Glob('*_iar.S')

group = DefineGroup('CPU', src, depend = [''], CPPPATH = CPPPATH)
CPPDEFINES = ['RT_USING_ASM_MEMCPY']

group = DefineGroup('CPU', src, depend = [''], CPPPATH = CPPPATH, CPPDEFINES = CPPDEFINES)

Return('group')
275 changes: 275 additions & 0 deletions libcpu/arm/cortex-m3/rt_memcpy_gcc.S
Original file line number Diff line number Diff line change
@@ -0,0 +1,275 @@
/*
* Copyright (c) 2006-2018, RT-Thread Development Team
*
* SPDX-License-Identifier: Apache-2.0
*
* Change Logs:
* Date Author Notes
* 2020-12-23 Meco Man porting to RT-Thread
*/

@********************************************************************************************************
@ Copyright 2004-2020 Silicon Laboratories Inc. www.silabs.com
@
@ SPDX-License-Identifier: APACHE-2.0
@
@ This software is subject to an open source license and is distributed by
@ Silicon Laboratories Inc. pursuant to the terms of the Apache License,
@ Version 2.0 available at www.apache.org/licenses/LICENSE-2.0.
@********************************************************************************************************


@********************************************************************************************************
@ PUBLIC FUNCTIONS
@********************************************************************************************************

.global rt_memcpy_asm


@********************************************************************************************************
@ CODE GENERATION DIRECTIVES
@********************************************************************************************************

.text
.align 2
.thumb
.syntax unified

@********************************************************************************************************
@ rt_memcpy_asm()
@
@ Description : Copy data octets from one buffer to another buffer.
@
@ Argument(s) : pdest Pointer to destination memory buffer.
@
@ psrc Pointer to source memory buffer.
@
@ size Number of data buffer octets to copy.
@
@ Return(s) : pdest Pointer to destination memory buffer.
@
@ Caller(s) : Application.
@
@ Note(s) : (1) Null copies allowed (i.e. 0-octet size).
@
@ (2) Memory buffers NOT checked for overlapping.
@
@ (3) Modulo arithmetic is used to determine whether a memory buffer starts on a 'CPU_ALIGN'
@ address boundary.
@
@ (4) ARM Cortex-M3 processors use a subset of the ARM Thumb-2 instruction set which does
@ NOT support 16-bit conditional branch instructions but ONLY supports 8-bit conditional
@ branch instructions.
@
@ Therefore, branches exceeding 8-bit, signed, relative offsets :
@
@ (a) CANNOT be implemented with conditional branches@ but ...
@ (b) MUST be implemented with non-conditional branches.
@********************************************************************************************************

@ void rt_memcpy_asm (void *pdest, @ ==> R0
@ void *psrc, @ ==> R1
@ rt_ubase_t count) @ ==> R2

.thumb_func
rt_memcpy_asm:
CMP R0, #0
BNE rt_memcpy_1
BX LR @ return if pdest == NULL

rt_memcpy_1:
CMP R1, #0
BNE rt_memcpy_2
BX LR @ return if psrc == NULL

rt_memcpy_2:
CMP R2, #0
BNE rt_memcpy_3
BX LR @ return if size == 0

rt_memcpy_3:
STMFD SP!, {R3-R12} @ save registers on stack
PUSH {R0} @ save pdest

Chk_Align_32: @ check if both dest & src 32-bit aligned
AND R3, R0, #0x03
AND R4, R1, #0x03
CMP R3, R4
BNE chk_align_16 @ not 32-bit aligned, check for 16-bit alignment

RSB R3, R3, #0x04 @ compute 1-2-3 pre-copy bytes (to align to the next 32-bit boundary)
AND R3, R3, #0x03

pre_copy_1:
CMP R3, #1 @ copy 1-2-3 bytes (to align to the next 32-bit boundary)
BCC copy_32_1 @ start real 32-bit copy
CMP R2, #1 @ check if any more data to copy
BCS pre_copy_1_cont
B rt_memcpy_end @ no more data to copy (see Note #4b)

pre_copy_1_cont:
LDRB R4, [R1], #1
STRB R4, [R0], #1
SUB R3, R3, #1
SUB R2, R2, #1
B pre_copy_1


chk_align_16: @ check if both dest & src 16-bit aligned
AND R3, R0, #0x01
AND R4, R1, #0x01
CMP R3, R4
BEQ pre_copy_2
B copy_08_1 @ not 16-bit aligned, start 8-bit copy (see Note #4b)

pre_copy_2:
CMP R3, #1 @ copy 1 byte (to align to the next 16-bit boundary)
BCC copy_16_1 @ start real 16-bit copy

LDRB R4, [R1], #1
STRB R4, [R0], #1
SUB R3, R3, #1
SUB R2, R2, #1
B pre_copy_2


copy_32_1:
CMP R2, #360 @ Copy 9 chunks of 10 32-bit words (360 octets per loop)
BCC copy_32_2
LDMIA R1!, {R3-R12}
STMIA R0!, {R3-R12}
LDMIA R1!, {R3-R12}
STMIA R0!, {R3-R12}
LDMIA R1!, {R3-R12}
STMIA R0!, {R3-R12}
LDMIA R1!, {R3-R12}
STMIA R0!, {R3-R12}
LDMIA R1!, {R3-R12}
STMIA R0!, {R3-R12}
LDMIA R1!, {R3-R12}
STMIA R0!, {R3-R12}
LDMIA R1!, {R3-R12}
STMIA R0!, {R3-R12}
LDMIA R1!, {R3-R12}
STMIA R0!, {R3-R12}
LDMIA R1!, {R3-R12}
STMIA R0!, {R3-R12}
SUB R2, R2, #360
B copy_32_1

copy_32_2:
CMP R2, #(04*10*01) @ Copy chunks of 10 32-bit words (40 octets per loop)
BCC copy_32_3
LDMIA R1!, {R3-R12}
STMIA R0!, {R3-R12}
SUB R2, R2, #(04*10*01)
B copy_32_2

copy_32_3:
CMP R2, #(04*01*01) @ Copy remaining 32-bit words
BCC copy_16_1
LDR R3, [R1], #4
STR R3, [R0], #4
SUB R2, R2, #(04*01*01)
B copy_32_3

copy_16_1:
CMP R2, #(02*01*16) @ Copy chunks of 16 16-bit words (32 bytes per loop)
BCC copy_16_2
LDRH R3, [R1], #2
STRH R3, [R0], #2
LDRH R3, [R1], #2
STRH R3, [R0], #2
LDRH R3, [R1], #2
STRH R3, [R0], #2
LDRH R3, [R1], #2
STRH R3, [R0], #2
LDRH R3, [R1], #2
STRH R3, [R0], #2
LDRH R3, [R1], #2
STRH R3, [R0], #2
LDRH R3, [R1], #2
STRH R3, [R0], #2
LDRH R3, [R1], #2
STRH R3, [R0], #2
LDRH R3, [R1], #2
STRH R3, [R0], #2
LDRH R3, [R1], #2
STRH R3, [R0], #2
LDRH R3, [R1], #2
STRH R3, [R0], #2
LDRH R3, [R1], #2
STRH R3, [R0], #2
LDRH R3, [R1], #2
STRH R3, [R0], #2
LDRH R3, [R1], #2
STRH R3, [R0], #2
LDRH R3, [R1], #2
STRH R3, [R0], #2
LDRH R3, [R1], #2
STRH R3, [R0], #2
SUB R2, R2, #(02*01*16)
B copy_16_1

copy_16_2:
CMP R2, #(02*01*01) @ Copy remaining 16-bit words
BCC copy_08_1
LDRH R3, [R1], #2
STRH R3, [R0], #2
SUB R2, R2, #(02*01*01)
B copy_16_2

copy_08_1:
CMP R2, #(01*01*16) @ Copy chunks of 16 8-bit words (16 bytes per loop)
BCC copy_08_2
LDRB R3, [R1], #1
STRB R3, [R0], #1
LDRB R3, [R1], #1
STRB R3, [R0], #1
LDRB R3, [R1], #1
STRB R3, [R0], #1
LDRB R3, [R1], #1
STRB R3, [R0], #1
LDRB R3, [R1], #1
STRB R3, [R0], #1
LDRB R3, [R1], #1
STRB R3, [R0], #1
LDRB R3, [R1], #1
STRB R3, [R0], #1
LDRB R3, [R1], #1
STRB R3, [R0], #1
LDRB R3, [R1], #1
STRB R3, [R0], #1
LDRB R3, [R1], #1
STRB R3, [R0], #1
LDRB R3, [R1], #1
STRB R3, [R0], #1
LDRB R3, [R1], #1
STRB R3, [R0], #1
LDRB R3, [R1], #1
STRB R3, [R0], #1
LDRB R3, [R1], #1
STRB R3, [R0], #1
LDRB R3, [R1], #1
STRB R3, [R0], #1
LDRB R3, [R1], #1
STRB R3, [R0], #1
SUB R2, R2, #(01*01*16)
B copy_08_1

copy_08_2:
CMP R2, #(01*01*01) @ Copy remaining 8-bit words
BCC rt_memcpy_end
LDRB R3, [R1], #1
STRB R3, [R0], #1
SUB R2, R2, #(01*01*01)
B copy_08_2


rt_memcpy_end:
POP {R0} @ pop pdest
LDMFD SP!, {R3-R12} @ restore registers from stack
BX LR @ return


.end
Loading