v4.1 updates

GPUOpen-Tools · Nov 23, 2020 · 9fcfd7c · 9fcfd7c
1 parent 71eb7cd
commit 9fcfd7c
Show file tree

Hide file tree

Showing 295 changed files with 30,624 additions and 10,833 deletions.
diff --git a/applications/_libs/cmakelists.txt b/applications/_libs/cmakelists.txt
@@ -6,6 +6,8 @@ add_subdirectory(cmp_fileio)
 add_subdirectory(cmp_gui)
 add_subdirectory(cmp_math)
 add_subdirectory(cmp_mesh)
+if (OPTION_BUILD_DRACO)
 add_subdirectory(cmp_meshcompressor)
+endif()
 add_subdirectory(cmp_meshoptimizer)
 add_subdirectory(gpu_decode)
diff --git a/applications/_libs/cmp_fileio/cmakelists.txt b/applications/_libs/cmp_fileio/cmakelists.txt
@@ -11,9 +11,9 @@ target_sources(CMP_FileIO PRIVATE
 )
 
 target_include_directories(CMP_FileIO PUBLIC
-
-    ${LEGACY_LOCATION}
     ./
+    ${LEGACY_LOCATION}
+    ${OpenEXR_INCLUDE_DIRS}
 )
 
 set_target_properties(CMP_FileIO PROPERTIES FOLDER ${FOLDER_NAME})
diff --git a/applications/_libs/cmp_math/cmakelists.txt b/applications/_libs/cmp_math/cmakelists.txt
@@ -1,8 +1,11 @@
 
 add_library(CMP_Math)
 
-# This is the current location of these files. They should be moved here
-set(LEGACY_LOCATION ../../../cmp_framework/common)
+if(CMP_HOST_WINDOWS)
+    target_compile_definitions(CMP_Math PUBLIC
+        -DCMP_USE_XMMINTRIN
+        )
+endif()
 
 target_sources(CMP_Math PRIVATE
 
@@ -19,18 +22,20 @@ target_sources(CMP_Math PRIVATE
     jmlvec2.h
     jmlvec3.h
     jrtcommon.h
-    ${LEGACY_LOCATION}/mathmacros.h
+    ${PROJECT_SOURCE_DIR}/cmp_framework/common/mathmacros.h
     tootlepch.h
 )
 
 target_include_directories(CMP_Math PUBLIC
-
     ./
+    ${PROJECT_SOURCE_DIR}/cmp_core/shaders
+    ${PROJECT_SOURCE_DIR}/cmp_core/source
 )
 
 target_link_libraries(CMP_Math PRIVATE
-
-    CMP_Core
+ #   CMP_Core
 )
 
-set_target_properties(CMP_Math PROPERTIES FOLDER ${FOLDER_NAME})
+set_target_properties(CMP_Math PROPERTIES 
+    FOLDER "Libs"
+    )
diff --git a/applications/_libs/cmp_math/cmp_math_common.cpp b/applications/_libs/cmp_math/cmp_math_common.cpp
@@ -1,5 +1,5 @@
 //=====================================================================
-// Copyright 2018 (c), Advanced Micro Devices, Inc. All rights reserved.
+// Copyright 2020 (c), Advanced Micro Devices, Inc. All rights reserved.
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy
 // of this software and associated documentation files(the "Software"), to deal
@@ -34,7 +34,8 @@ float cpu_sqrtf(float * pIn) {
     return sqrtf(*pIn);
 }
 
-#ifndef _LINUX
+#ifdef CMP_USE_XMMINTRIN
+#ifndef __linux__
 //---------------------------------------------
 // SSE: Computes square root of  a float value
 //---------------------------------------------
@@ -45,6 +46,7 @@ float sse_sqrtf(  float *pIn ) {
     return val.m128_f32[0];
 }
 #endif
+#endif
 
 //-------------------------------------------------
 // CPU: Computes 1 / (square root of a float value)
@@ -57,16 +59,29 @@ float cpu_rsqf(float *f) {
         return 0.0f;
 }
 
-#ifndef _LINUX
+#ifdef CMP_USE_XMMINTRIN
+#ifndef __linux__
 //-------------------------------------------------
 // SSE: Computes 1 / (square root of a float value)
 //-------------------------------------------------
-float sse_rsqf(float *v) {
+#ifdef CMP_USE_RSQ_RSQR
+float sse_rsqf(float* v)
+{
     __m128 val = _mm_load1_ps(v);
-    val = _mm_rsqrt_ss(val);
+    val        = _mm_rsqrt_ss(val);
     float frsq = val.m128_f32[0];
-    return (0.5f * frsq) * (3.0f - (*v  * frsq) * frsq);
+    return (0.5f * frsq) * (3.0f - (*v * frsq) * frsq);
 };
+#else
+float sse_rsqf(float *v) {
+    __m128 val  = _mm_set_ss(*v); // Copy float and zero the upper 3 elements
+    __m128 val1 = _mm_set_ss(1.0f);
+    val         = _mm_sqrt_ss(val);
+    val         = _mm_div_ss(val1, val);
+    return ( val.m128_f32[0] );
+};
+#endif
+#endif
 #endif
 
 //---------------------------------------------
@@ -76,13 +91,15 @@ float cpu_minf(float l1, float r1) {
     return (l1 < r1 ? l1 : r1);
 }
 
-#ifndef _LINUX
+#ifdef CMP_USE_XMMINTRIN
+#ifndef __linux__
 float sse_minf( float a, float b ) {
     // Branchless SSE min.
     _mm_store_ss( &a, _mm_min_ss(_mm_set_ss(a),_mm_set_ss(b)) );
     return a;
 }
 #endif
+#endif
 
 //---------------------------------------------
 // CPU: Computes max of two float values
@@ -91,13 +108,15 @@ float cpu_maxf(float l1, float r1) {
     return (l1 > r1 ? l1 : r1);
 }
 
-#ifndef _LINUX
+#ifdef CMP_USE_XMMINTRIN
+#ifndef __linux__
 float sse_maxf( float a, float b ) {
     // Branchless SSE max.
     _mm_store_ss( &a, _mm_max_ss(_mm_set_ss(a),_mm_set_ss(b)) );
     return a;
 }
 #endif
+#endif
 
 //================================================
 // Clamp the value in the range [minval .. maxval]
@@ -111,12 +130,14 @@ float cpu_clampf(float value, float minval, float maxval) {
     return value;
 }
 
-#ifndef _LINUX
+#ifdef CMP_USE_XMMINTRIN
+#ifndef __linux__
 float sse_clampf( float val, float minval, float maxval ) {
     _mm_store_ss( &val, _mm_min_ss( _mm_max_ss(_mm_set_ss(val),_mm_set_ss(minval)), _mm_set_ss(maxval) ) );
     return val;
 }
 #endif
+#endif
 
 void cpu_averageRGB(unsigned char *src_rgba_block) {
     float medianR = 0.0f, medianG = 0.0f, medianB = 0.0f;
@@ -184,7 +205,8 @@ float cpu_lerp2(CMP_Vec4uc C1, CMP_Vec4uc CA, CMP_Vec4uc CB, CMP_Vec4uc C2, CMP_
     return float(min1+min2);
 }
 
-#ifndef _LINUX
+#ifdef CMP_USE_XMMINTRIN
+#ifndef __linux__
 float sse_lerp2(CMP_Vec4uc C1, CMP_Vec4uc CA, CMP_Vec4uc CB, CMP_Vec4uc C2, CMP_MATH_BYTE *encode1, CMP_MATH_BYTE *encode2) {
     // Initial Setup
     __m128 iC1, iC2, iCA, iCB; //Load auchars into _m128
@@ -301,6 +323,7 @@ void cmp_set_fma3_features() {
     cmp_lerp2 = fma_lerp2;
 }
 #endif
+#endif
 
 
 void cmp_set_cpu_features() {
@@ -313,7 +336,9 @@ void cmp_set_cpu_features() {
     cmp_sqrtf    = cpu_sqrtf;
 }
 
-#ifndef _LINUX
+
+#ifdef CMP_USE_XMMINTRIN
+#ifndef __linux__
 void cmp_set_sse2_features() {
     cmp_clampf   = sse_clampf;
     cmp_lerp2    = sse_lerp2;
@@ -323,6 +348,7 @@ void cmp_set_sse2_features() {
     cmp_sqrtf    = sse_sqrtf;
 }
 #endif
+#endif
 
 //---------------------------------
 // User Interface to the CMP_MATH

diff --git a/applications/_libs/cmp_math/cmp_math_common.h b/applications/_libs/cmp_math/cmp_math_common.h
@@ -43,10 +43,12 @@
 typedef unsigned char CMP_MATH_BYTE;
 typedef unsigned int  CMP_MATH_DWORD;
 
-#ifndef _LINUX
+#ifdef CMP_USE_XMMINTRIN
+#ifndef __linux__
 extern void cmp_set_fma3_features();
 extern void cmp_set_sse2_features();
 #endif
+#endif
 
 extern void cmp_set_cpu_features();
 
@@ -61,7 +63,7 @@ extern float cpu_rsqf(float *f);
 extern float cpu_sqrtf(float * pIn);
 
 
-#ifndef _LINUX
+#ifndef __linux__
 extern float sse_clampf(float value, float minval, float maxval);
 extern float sse_lerp2(CMP_Vec4uc C1, CMP_Vec4uc CA, CMP_Vec4uc CB, CMP_Vec4uc C2, CMP_MATH_BYTE *encode1, CMP_MATH_BYTE *encode2);
 extern float sse_maxf(float l1, float r1);

diff --git a/applications/_libs/cmp_math/cmp_math_cpuid.cpp b/applications/_libs/cmp_math/cmp_math_cpuid.cpp
@@ -30,7 +30,7 @@
 void cmp_cpuid(int cpuInfo[4], int function_id) {
     // subfunction_id = 0
 #ifdef _WIN32
-    __cpuidex(cpuInfo, function_id, 0);
+    __cpuidex(cpuInfo, function_id, 0); // defined in intrin.h
 #else
     // To Do
     //__cpuid_count(0, function_id, cpuInfo[0], cpuInfo[1], cpuInfo[2], cpuInfo[3]);
@@ -47,7 +47,7 @@ cmp_cpufeatures cmp_get_cpufeatures() {
         cpu.feature[i] = 0;
     }
 
-#ifndef _LINUX
+#ifndef __linux__
     cmp_cpuid(cpuInfo,0);
     int nIds = cpuInfo[0];
 
@@ -118,7 +118,8 @@ void cmp_autodected_cpufeatures(CMP_MATH_BYTE set) {
     if ((set & CMP_MATH_USE_CPU) > 0) return;
 
 
-#ifndef _LINUX
+#ifdef CMP_USE_XMMINTRIN
+#ifndef __linux__
     // Auto detect CPU features to enable
     for (int i = 0; i<SSP_SSE_COUNT; i++) {
         if (cpu.feature[i] > 0) {
@@ -139,6 +140,7 @@ void cmp_autodected_cpufeatures(CMP_MATH_BYTE set) {
         }
     }
 #endif
+#endif
 
 }
 

diff --git a/applications/_libs/cmp_math/jml.h b/applications/_libs/cmp_math/jml.h
@@ -24,7 +24,7 @@
 #ifndef _JML_H_
 #define _JML_H_
 
-#ifdef _LINUX
+#ifdef __linux__
 #define ALIGN16
 #else
 // helpful alias for 16-byte alignment

diff --git a/applications/_libs/cmp_math/jmlfuncs.h b/applications/_libs/cmp_math/jmlfuncs.h
@@ -1,5 +1,5 @@
 //=====================================================================
-// Copyright 2006-2018 (c), Advanced Micro Devices, Inc. All rights reserved.
+// Copyright 2006-2020 (c), Advanced Micro Devices, Inc. All rights reserved.
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy
 // of this software and associated documentation files(the "Software"), to deal
@@ -124,14 +124,27 @@ inline Vec3d Cross(const Vec3d& v1, const Vec3d& v2) {
 /// Returns a random float between 0 and 1
 float RandomFloat();
 
+#ifdef CMP_USE_RSQ_RSQR
+inline float FastRCP(float v)
+{
+    __m128 a      = _mm_load_ss(&v);
+    __m128 Ra0    = _mm_rcp_ps(a);
+    __m128 result = _mm_sub_ps(_mm_add_ps(Ra0, Ra0), _mm_mul_ps(_mm_mul_ps(Ra0, a), Ra0));
+    float  x;
+    _mm_store_ss(&x, result);
+    return x;
+}
+#else
 inline float FastRCP(float v) {
     __m128 a   = _mm_load_ss(&v);
-    __m128 Ra0 = _mm_rcp_ps(a);
+    __m128 val1   = _mm_set_ps(1.0f, 1.0f, 1.0f, 1.0f);
+    __m128 Ra0    = _mm_div_ps(val1, a);  //__m128 Ra0 = _mm_rcp_ps(a);
     __m128 result = _mm_sub_ps(_mm_add_ps(Ra0, Ra0), _mm_mul_ps(_mm_mul_ps(Ra0, a), Ra0));
     float x;
     _mm_store_ss(&x, result);
     return x;
 };
+#endif
 
 /// Transformation of a point (w=1) by an arbitrary matrix, multiplies from right
 /// It is safe for pPoint to equal pPointOut

diff --git a/applications/_libs/cmp_math/jmlscalar.h b/applications/_libs/cmp_math/jmlscalar.h
@@ -27,23 +27,43 @@
 #include <stdlib.h>
 
 namespace JML {
+
 inline float RandomFloat() {
     return (float)rand() / (float) RAND_MAX;
 };
 
-
-inline float FastSQRT(float v) {
+#ifdef CMP_USE_RSQ_RSQR
+inline float FastSQRT(float v)
+{
     __m128 val = _mm_load1_ps(&v);
-    val = _mm_sqrt_ss(val);
+    val        = _mm_sqrt_ss(val);
     return val.m128_f32[0];
 };
 
-inline float FastRSQ(float v) {
+inline float FastRSQ(float v)
+{
     __m128 val = _mm_load1_ps(&v);
-    val = _mm_rsqrt_ss(val);
+    val        = _mm_rsqrt_ss(val);
     float frsq = val.m128_f32[0];
+    return (0.5f * frsq) * (3.0f - (v * frsq) * frsq);
+};
+#else
+inline float FastSQRT(float v) {
+    __m128 val  = _mm_set_ss(v);
+    val         = _mm_sqrt_ss(val);
+    return (val.m128_f32[0]);
+
+};
+
+inline float FastRSQ(float v) {
+    __m128 val  = _mm_set_ss(v); 
+    __m128 val1 = _mm_set_ss(1.0f);
+    val         = _mm_sqrt_ss(val);
+    val         = _mm_div_ss(val1, val);
+    float frsq  = val.m128_f32[0];
     return (0.5f * frsq) * (3.0f - (v  * frsq) * frsq);
 };
+#endif
 
 };