From b02bcc568d2465cf873c1f90d220bb0979dc0150 Mon Sep 17 00:00:00 2001 From: Daniel Gibson Date: Sun, 6 Sep 2020 04:54:51 +0200 Subject: [PATCH] Disable broken idSIMD_SSE::UpSampleOGGTo44kHz() It corrupted the stack when called with buffers allocated on the stack and numSamples that are not a multiple of four, apparently, by writing 4 floats too many, at least in the 22KHz Stereo case.. This caused the crash described in https://github.com/dhewm/dhewm3/issues/303#issuecomment-678809662 Now it just uses the generic C code, like all platforms besides MSVC/x86 already do. --- idlib/math/Simd_SSE.cpp | 5 +++++ idlib/math/Simd_SSE.h | 3 ++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/idlib/math/Simd_SSE.cpp b/idlib/math/Simd_SSE.cpp index 4c00500a..609cb59d 100644 --- a/idlib/math/Simd_SSE.cpp +++ b/idlib/math/Simd_SSE.cpp @@ -17164,6 +17164,10 @@ void idSIMD_SSE::UpSamplePCMTo44kHz( float *dest, const short *src, const int nu } } + +// DG: at least in the 22KHz Stereo OGG case with numSamples % 4 != 0 this is broken (writes 4 floats too much which can destroy the stack, see #303), +// so let's just not use it anymore its MSVC+32bit only anyway and I doubt it gets noticeable speedups, so I don't feel like trying to understand and fix it.. +#if 0 /* ============ SSE_UpSample11kHzMonoOGGTo44kHz @@ -17474,6 +17478,7 @@ void idSIMD_SSE::UpSampleOGGTo44kHz( float *dest, const float * const *ogg, cons assert( 0 ); } } +#endif // 0 (DG: commenting out all the OGG-related SSE code) /* ============ diff --git a/idlib/math/Simd_SSE.h b/idlib/math/Simd_SSE.h index 3c66a551..385bac89 100644 --- a/idlib/math/Simd_SSE.h +++ b/idlib/math/Simd_SSE.h @@ -135,7 +135,8 @@ class idSIMD_SSE : public idSIMD_MMX { virtual int VPCALL CreateVertexProgramShadowCache( idVec4 *vertexCache, const idDrawVert *verts, const int numVerts ); virtual void VPCALL UpSamplePCMTo44kHz( float *dest, const short *pcm, const int numSamples, const int kHz, const int numChannels ); - virtual void VPCALL UpSampleOGGTo44kHz( float *dest, const float * const *ogg, const int numSamples, const int kHz, const int numChannels ); + // DG: the following is broken at least in the 22KHz Stereo case with numSamples % 4 != 0 (writes 4 floats too much which can destroy the stack), so let's not use it anymore. + //virtual void VPCALL UpSampleOGGTo44kHz( float *dest, const float * const *ogg, const int numSamples, const int kHz, const int numChannels ); virtual void VPCALL MixSoundTwoSpeakerMono( float *mixBuffer, const float *samples, const int numSamples, const float lastV[2], const float currentV[2] ); virtual void VPCALL MixSoundTwoSpeakerStereo( float *mixBuffer, const float *samples, const int numSamples, const float lastV[2], const float currentV[2] ); virtual void VPCALL MixSoundSixSpeakerMono( float *mixBuffer, const float *samples, const int numSamples, const float lastV[6], const float currentV[6] );