From 05893bb25cd0f6ebdc76f1c47ce038b0bb631e47 Mon Sep 17 00:00:00 2001
From: Antonio <adropulic@gmail.com>
Date: Mon, 5 Dec 2022 21:19:04 +0100
Subject: [PATCH] inline Word fn

after 5b84ad4: generic rc5 pefromance regression was noticed.
after analizing the generated asm I noticed that while the generic
top level generic impl does get inlined the actual algorithm impl
differs in the fact that it does not inline Word functions. After
adding inline annotations performance is once again compareable.
---
 rc5/src/core/primitives.rs | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)
diff --git a/rc5/src/core/primitives.rs b/rc5/src/core/primitives.rs
index 0812e8d1..5e3e80b8 100644
--- a/rc5/src/core/primitives.rs
+++ b/rc5/src/core/primitives.rs
@@ -49,30 +49,36 @@ impl Word for u32 {
     const P: Self = 0xb7e15163;
     const Q: Self = 0x9e3779b9;
 
+    #[inline(always)]
     fn wrapping_add(self, rhs: Self) -> Self {
         u32::wrapping_add(self, rhs)
     }
-
+    #[inline(always)]
     fn wrapping_sub(self, rhs: Self) -> Self {
         u32::wrapping_sub(self, rhs)
     }
 
+    #[inline(always)]
     fn rotate_left(self, n: Self) -> Self {
         u32::rotate_left(self, n)
     }
 
+    #[inline(always)]
     fn rotate_right(self, n: Self) -> Self {
         u32::rotate_right(self, n)
     }
 
+    #[inline(always)]
     fn from_le_bytes(bytes: &GenericArray<u8, Self::Bytes>) -> Self {
         u32::from_le_bytes(bytes.as_slice().try_into().unwrap())
     }
 
+    #[inline(always)]
     fn to_le_bytes(self) -> GenericArray<u8, Self::Bytes> {
         u32::to_le_bytes(self).into()
     }
 
+    #[inline(always)]
     fn bitxor(self, other: Self) -> Self {
         <u32 as BitXor>::bitxor(self, other)
     }