Skip to content

Commit

Permalink
v4.1 updates
Browse files Browse the repository at this point in the history
  • Loading branch information
NavNTCMP committed Nov 23, 2020
1 parent 71eb7cd commit 9fcfd7c
Show file tree
Hide file tree
Showing 295 changed files with 30,624 additions and 10,833 deletions.
2 changes: 2 additions & 0 deletions applications/_libs/cmakelists.txt
Expand Up @@ -6,6 +6,8 @@ add_subdirectory(cmp_fileio)
add_subdirectory(cmp_gui)
add_subdirectory(cmp_math)
add_subdirectory(cmp_mesh)
if (OPTION_BUILD_DRACO)
add_subdirectory(cmp_meshcompressor)
endif()
add_subdirectory(cmp_meshoptimizer)
add_subdirectory(gpu_decode)
4 changes: 2 additions & 2 deletions applications/_libs/cmp_fileio/cmakelists.txt
Expand Up @@ -11,9 +11,9 @@ target_sources(CMP_FileIO PRIVATE
)

target_include_directories(CMP_FileIO PUBLIC

${LEGACY_LOCATION}
./
${LEGACY_LOCATION}
${OpenEXR_INCLUDE_DIRS}
)

set_target_properties(CMP_FileIO PROPERTIES FOLDER ${FOLDER_NAME})
19 changes: 12 additions & 7 deletions applications/_libs/cmp_math/cmakelists.txt
@@ -1,8 +1,11 @@

add_library(CMP_Math)

# This is the current location of these files. They should be moved here
set(LEGACY_LOCATION ../../../cmp_framework/common)
if(CMP_HOST_WINDOWS)
target_compile_definitions(CMP_Math PUBLIC
-DCMP_USE_XMMINTRIN
)
endif()

target_sources(CMP_Math PRIVATE

Expand All @@ -19,18 +22,20 @@ target_sources(CMP_Math PRIVATE
jmlvec2.h
jmlvec3.h
jrtcommon.h
${LEGACY_LOCATION}/mathmacros.h
${PROJECT_SOURCE_DIR}/cmp_framework/common/mathmacros.h
tootlepch.h
)

target_include_directories(CMP_Math PUBLIC

./
${PROJECT_SOURCE_DIR}/cmp_core/shaders
${PROJECT_SOURCE_DIR}/cmp_core/source
)

target_link_libraries(CMP_Math PRIVATE

CMP_Core
# CMP_Core
)

set_target_properties(CMP_Math PROPERTIES FOLDER ${FOLDER_NAME})
set_target_properties(CMP_Math PROPERTIES
FOLDER "Libs"
)
48 changes: 37 additions & 11 deletions applications/_libs/cmp_math/cmp_math_common.cpp
@@ -1,5 +1,5 @@
//=====================================================================
// Copyright 2018 (c), Advanced Micro Devices, Inc. All rights reserved.
// Copyright 2020 (c), Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files(the "Software"), to deal
Expand Down Expand Up @@ -34,7 +34,8 @@ float cpu_sqrtf(float * pIn) {
return sqrtf(*pIn);
}

#ifndef _LINUX
#ifdef CMP_USE_XMMINTRIN
#ifndef __linux__
//---------------------------------------------
// SSE: Computes square root of a float value
//---------------------------------------------
Expand All @@ -45,6 +46,7 @@ float sse_sqrtf( float *pIn ) {
return val.m128_f32[0];
}
#endif
#endif

//-------------------------------------------------
// CPU: Computes 1 / (square root of a float value)
Expand All @@ -57,16 +59,29 @@ float cpu_rsqf(float *f) {
return 0.0f;
}

#ifndef _LINUX
#ifdef CMP_USE_XMMINTRIN
#ifndef __linux__
//-------------------------------------------------
// SSE: Computes 1 / (square root of a float value)
//-------------------------------------------------
float sse_rsqf(float *v) {
#ifdef CMP_USE_RSQ_RSQR
float sse_rsqf(float* v)
{
__m128 val = _mm_load1_ps(v);
val = _mm_rsqrt_ss(val);
val = _mm_rsqrt_ss(val);
float frsq = val.m128_f32[0];
return (0.5f * frsq) * (3.0f - (*v * frsq) * frsq);
return (0.5f * frsq) * (3.0f - (*v * frsq) * frsq);
};
#else
float sse_rsqf(float *v) {
__m128 val = _mm_set_ss(*v); // Copy float and zero the upper 3 elements
__m128 val1 = _mm_set_ss(1.0f);
val = _mm_sqrt_ss(val);
val = _mm_div_ss(val1, val);
return ( val.m128_f32[0] );
};
#endif
#endif
#endif

//---------------------------------------------
Expand All @@ -76,13 +91,15 @@ float cpu_minf(float l1, float r1) {
return (l1 < r1 ? l1 : r1);
}

#ifndef _LINUX
#ifdef CMP_USE_XMMINTRIN
#ifndef __linux__
float sse_minf( float a, float b ) {
// Branchless SSE min.
_mm_store_ss( &a, _mm_min_ss(_mm_set_ss(a),_mm_set_ss(b)) );
return a;
}
#endif
#endif

//---------------------------------------------
// CPU: Computes max of two float values
Expand All @@ -91,13 +108,15 @@ float cpu_maxf(float l1, float r1) {
return (l1 > r1 ? l1 : r1);
}

#ifndef _LINUX
#ifdef CMP_USE_XMMINTRIN
#ifndef __linux__
float sse_maxf( float a, float b ) {
// Branchless SSE max.
_mm_store_ss( &a, _mm_max_ss(_mm_set_ss(a),_mm_set_ss(b)) );
return a;
}
#endif
#endif

//================================================
// Clamp the value in the range [minval .. maxval]
Expand All @@ -111,12 +130,14 @@ float cpu_clampf(float value, float minval, float maxval) {
return value;
}

#ifndef _LINUX
#ifdef CMP_USE_XMMINTRIN
#ifndef __linux__
float sse_clampf( float val, float minval, float maxval ) {
_mm_store_ss( &val, _mm_min_ss( _mm_max_ss(_mm_set_ss(val),_mm_set_ss(minval)), _mm_set_ss(maxval) ) );
return val;
}
#endif
#endif

void cpu_averageRGB(unsigned char *src_rgba_block) {
float medianR = 0.0f, medianG = 0.0f, medianB = 0.0f;
Expand Down Expand Up @@ -184,7 +205,8 @@ float cpu_lerp2(CMP_Vec4uc C1, CMP_Vec4uc CA, CMP_Vec4uc CB, CMP_Vec4uc C2, CMP_
return float(min1+min2);
}

#ifndef _LINUX
#ifdef CMP_USE_XMMINTRIN
#ifndef __linux__
float sse_lerp2(CMP_Vec4uc C1, CMP_Vec4uc CA, CMP_Vec4uc CB, CMP_Vec4uc C2, CMP_MATH_BYTE *encode1, CMP_MATH_BYTE *encode2) {
// Initial Setup
__m128 iC1, iC2, iCA, iCB; //Load auchars into _m128
Expand Down Expand Up @@ -301,6 +323,7 @@ void cmp_set_fma3_features() {
cmp_lerp2 = fma_lerp2;
}
#endif
#endif


void cmp_set_cpu_features() {
Expand All @@ -313,7 +336,9 @@ void cmp_set_cpu_features() {
cmp_sqrtf = cpu_sqrtf;
}

#ifndef _LINUX

#ifdef CMP_USE_XMMINTRIN
#ifndef __linux__
void cmp_set_sse2_features() {
cmp_clampf = sse_clampf;
cmp_lerp2 = sse_lerp2;
Expand All @@ -323,6 +348,7 @@ void cmp_set_sse2_features() {
cmp_sqrtf = sse_sqrtf;
}
#endif
#endif

//---------------------------------
// User Interface to the CMP_MATH
Expand Down
6 changes: 4 additions & 2 deletions applications/_libs/cmp_math/cmp_math_common.h
Expand Up @@ -43,10 +43,12 @@
typedef unsigned char CMP_MATH_BYTE;
typedef unsigned int CMP_MATH_DWORD;

#ifndef _LINUX
#ifdef CMP_USE_XMMINTRIN
#ifndef __linux__
extern void cmp_set_fma3_features();
extern void cmp_set_sse2_features();
#endif
#endif

extern void cmp_set_cpu_features();

Expand All @@ -61,7 +63,7 @@ extern float cpu_rsqf(float *f);
extern float cpu_sqrtf(float * pIn);


#ifndef _LINUX
#ifndef __linux__
extern float sse_clampf(float value, float minval, float maxval);
extern float sse_lerp2(CMP_Vec4uc C1, CMP_Vec4uc CA, CMP_Vec4uc CB, CMP_Vec4uc C2, CMP_MATH_BYTE *encode1, CMP_MATH_BYTE *encode2);
extern float sse_maxf(float l1, float r1);
Expand Down
8 changes: 5 additions & 3 deletions applications/_libs/cmp_math/cmp_math_cpuid.cpp
Expand Up @@ -30,7 +30,7 @@
void cmp_cpuid(int cpuInfo[4], int function_id) {
// subfunction_id = 0
#ifdef _WIN32
__cpuidex(cpuInfo, function_id, 0);
__cpuidex(cpuInfo, function_id, 0); // defined in intrin.h
#else
// To Do
//__cpuid_count(0, function_id, cpuInfo[0], cpuInfo[1], cpuInfo[2], cpuInfo[3]);
Expand All @@ -47,7 +47,7 @@ cmp_cpufeatures cmp_get_cpufeatures() {
cpu.feature[i] = 0;
}

#ifndef _LINUX
#ifndef __linux__
cmp_cpuid(cpuInfo,0);
int nIds = cpuInfo[0];

Expand Down Expand Up @@ -118,7 +118,8 @@ void cmp_autodected_cpufeatures(CMP_MATH_BYTE set) {
if ((set & CMP_MATH_USE_CPU) > 0) return;


#ifndef _LINUX
#ifdef CMP_USE_XMMINTRIN
#ifndef __linux__
// Auto detect CPU features to enable
for (int i = 0; i<SSP_SSE_COUNT; i++) {
if (cpu.feature[i] > 0) {
Expand All @@ -139,6 +140,7 @@ void cmp_autodected_cpufeatures(CMP_MATH_BYTE set) {
}
}
#endif
#endif

}

Expand Down
2 changes: 1 addition & 1 deletion applications/_libs/cmp_math/jml.h
Expand Up @@ -24,7 +24,7 @@
#ifndef _JML_H_
#define _JML_H_

#ifdef _LINUX
#ifdef __linux__
#define ALIGN16
#else
// helpful alias for 16-byte alignment
Expand Down
17 changes: 15 additions & 2 deletions applications/_libs/cmp_math/jmlfuncs.h
@@ -1,5 +1,5 @@
//=====================================================================
// Copyright 2006-2018 (c), Advanced Micro Devices, Inc. All rights reserved.
// Copyright 2006-2020 (c), Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files(the "Software"), to deal
Expand Down Expand Up @@ -124,14 +124,27 @@ inline Vec3d Cross(const Vec3d& v1, const Vec3d& v2) {
/// Returns a random float between 0 and 1
float RandomFloat();

#ifdef CMP_USE_RSQ_RSQR
inline float FastRCP(float v)
{
__m128 a = _mm_load_ss(&v);
__m128 Ra0 = _mm_rcp_ps(a);
__m128 result = _mm_sub_ps(_mm_add_ps(Ra0, Ra0), _mm_mul_ps(_mm_mul_ps(Ra0, a), Ra0));
float x;
_mm_store_ss(&x, result);
return x;
}
#else
inline float FastRCP(float v) {
__m128 a = _mm_load_ss(&v);
__m128 Ra0 = _mm_rcp_ps(a);
__m128 val1 = _mm_set_ps(1.0f, 1.0f, 1.0f, 1.0f);
__m128 Ra0 = _mm_div_ps(val1, a); //__m128 Ra0 = _mm_rcp_ps(a);
__m128 result = _mm_sub_ps(_mm_add_ps(Ra0, Ra0), _mm_mul_ps(_mm_mul_ps(Ra0, a), Ra0));
float x;
_mm_store_ss(&x, result);
return x;
};
#endif

/// Transformation of a point (w=1) by an arbitrary matrix, multiplies from right
/// It is safe for pPoint to equal pPointOut
Expand Down
30 changes: 25 additions & 5 deletions applications/_libs/cmp_math/jmlscalar.h
Expand Up @@ -27,23 +27,43 @@
#include <stdlib.h>

namespace JML {

inline float RandomFloat() {
return (float)rand() / (float) RAND_MAX;
};


inline float FastSQRT(float v) {
#ifdef CMP_USE_RSQ_RSQR
inline float FastSQRT(float v)
{
__m128 val = _mm_load1_ps(&v);
val = _mm_sqrt_ss(val);
val = _mm_sqrt_ss(val);
return val.m128_f32[0];
};

inline float FastRSQ(float v) {
inline float FastRSQ(float v)
{
__m128 val = _mm_load1_ps(&v);
val = _mm_rsqrt_ss(val);
val = _mm_rsqrt_ss(val);
float frsq = val.m128_f32[0];
return (0.5f * frsq) * (3.0f - (v * frsq) * frsq);
};
#else
inline float FastSQRT(float v) {
__m128 val = _mm_set_ss(v);
val = _mm_sqrt_ss(val);
return (val.m128_f32[0]);

};

inline float FastRSQ(float v) {
__m128 val = _mm_set_ss(v);
__m128 val1 = _mm_set_ss(1.0f);
val = _mm_sqrt_ss(val);
val = _mm_div_ss(val1, val);
float frsq = val.m128_f32[0];
return (0.5f * frsq) * (3.0f - (v * frsq) * frsq);
};
#endif

};

Expand Down

0 comments on commit 9fcfd7c

Please sign in to comment.