diff --git a/CMakeLists.txt b/CMakeLists.txt
index bb09d22..936eafc 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -66,11 +66,11 @@ if (MSVC)
 endif ()
 
 # Compile options
-IF(${CMAKE_SYSTEM_PROCESSOR} MATCHES "x86_64")
-  SET(SIMD -msse4.1)
-ELSE(${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch64")
-  SET(SIMD -fPIC)
-ENDIF()
+if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "x86_64")
+  set (SIMD -msse4.1)
+elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch64")
+  set (SIMD -fPIC)
+endif ()
 
 if (CMAKE_CXX_COMPILER_ID MATCHES "Clang")
     set (COMPILE_OPTIONS ${SIMD} -Wextra)
diff --git a/Include/NRD.h b/Include/NRD.h
index 93c5235..6b31ed4 100644
--- a/Include/NRD.h
+++ b/Include/NRD.h
@@ -29,8 +29,8 @@ license agreement from NVIDIA CORPORATION is strictly prohibited.
 
 #define NRD_VERSION_MAJOR 4
 #define NRD_VERSION_MINOR 3
-#define NRD_VERSION_BUILD 3
-#define NRD_VERSION_DATE "12 October 2023"
+#define NRD_VERSION_BUILD 4
+#define NRD_VERSION_DATE "20 October 2023"
 
 #if defined(_MSC_VER)
     #define NRD_CALL __fastcall
diff --git a/README.md b/README.md
index c65c2a2..2e122ea 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-# NVIDIA REAL-TIME DENOISERS v4.3.3 (NRD)
+# NVIDIA REAL-TIME DENOISERS v4.3.4 (NRD)
 
 [![Build NRD SDK](https://github.com/NVIDIAGameWorks/RayTracingDenoiser/actions/workflows/build.yml/badge.svg)](https://github.com/NVIDIAGameWorks/RayTracingDenoiser/actions/workflows/build.yml)
 
diff --git a/Resources/Version.h b/Resources/Version.h
index 955263a..354ebd3 100644
--- a/Resources/Version.h
+++ b/Resources/Version.h
@@ -23,6 +23,6 @@ Versioning rules:
 
 #define VERSION_MAJOR                   4
 #define VERSION_MINOR                   3
-#define VERSION_BUILD                   3
+#define VERSION_BUILD                   4
 
 #define VERSION_STRING STR(VERSION_MAJOR.VERSION_MINOR.VERSION_BUILD encoding=NRD_NORMAL_ENCODING.NRD_ROUGHNESS_ENCODING)
diff --git a/Shaders/Include/Common.hlsli b/Shaders/Include/Common.hlsli
index aab17a8..b57cd3e 100644
--- a/Shaders/Include/Common.hlsli
+++ b/Shaders/Include/Common.hlsli
@@ -180,11 +180,24 @@ float4 GetBlurKernelRotation( compiletime const uint mode, uint2 pixelPos, float
     return baseRotator;
 }
 
+// IMPORTANT: use "IsInScreen2x2" in critical places
 float IsInScreen( float2 uv )
 {
     return float( all( saturate( uv ) == uv ) );
 }
 
+// x y
+// z w
+float4 IsInScreen2x2( float2 footprintOrigin, float2 rectSize )
+{
+    float4 p = footprintOrigin.xyxy + float4( 0, 0, 1, 1 );
+
+    float4 r = float4( p >= 0.0 );
+    r *= float4( p < rectSize.xyxy );
+
+    return r.xzxz * r.yyww;
+}
+
 float2 ApplyCheckerboardShift( float2 uv, uint mode, uint counter, float2 screenSize, float2 invScreenSize, uint frameIndex )
 {
     int2 uvi = int2( uv * screenSize );
@@ -385,6 +398,9 @@ float2 GetRelaxedRoughnessWeightParams( float m, float fraction = 1.0, float sen
 #define ComputeNonExponentialWeight( x, px, py ) \
     STL::Math::SmoothStep( 0.999, 0.001, abs( ( x ) * px + py ) )
 
+#define ComputeNonExponentialWeightWithSigma( x, px, py, sigma ) \
+    STL::Math::SmoothStep( 0.999, 0.001, abs( ( x ) * px + py ) - sigma * px )
+
 #if( NRD_USE_EXPONENTIAL_WEIGHTS == 1 )
     #define ComputeWeight( x, px, py )     ComputeExponentialWeight( x, px, py )
 #else
diff --git a/Shaders/Include/NRD.hlsli b/Shaders/Include/NRD.hlsli
index 7bd198d..631b593 100644
--- a/Shaders/Include/NRD.hlsli
+++ b/Shaders/Include/NRD.hlsli
@@ -512,7 +512,7 @@ float4 NRD_FrontEnd_UnpackNormalAndRoughness( float4 p, out float materialID )
     #if( NRD_ROUGHNESS_ENCODING == NRD_ROUGHNESS_ENCODING_SQRT_LINEAR )
         r.w *= r.w;
     #elif( NRD_ROUGHNESS_ENCODING == NRD_ROUGHNESS_ENCODING_SQ_LINEAR )
-        r.w = sqrt( r.w );
+        r.w = sqrt( saturate( r.w ) );
     #endif
 
     return r;
diff --git a/Shaders/Include/REBLUR/REBLUR_Common.hlsli b/Shaders/Include/REBLUR/REBLUR_Common.hlsli
index 33abb75..d190656 100644
--- a/Shaders/Include/REBLUR/REBLUR_Common.hlsli
+++ b/Shaders/Include/REBLUR/REBLUR_Common.hlsli
@@ -91,9 +91,8 @@ float4 UnpackData1( float4 p )
 uint PackData2( float fbits, float curvature, float virtualHistoryAmount )
 {
     // BITS:
-    // 0     - smbAllowCatRom
-    // 1-4   - smbOcclusion 2x2
-    // other - free // TODO: use if needed
+    // 0-3 - smbOcclusion 2x2
+    // 4-7 - vmbOcclusion 2x2
 
     uint p = uint( fbits + 0.5 );
     p |= uint( saturate( virtualHistoryAmount ) * 255.0 + 0.5 ) << 8;
diff --git a/Shaders/Include/REBLUR/REBLUR_Config.hlsli b/Shaders/Include/REBLUR/REBLUR_Config.hlsli
index ad5973c..95a80df 100644
--- a/Shaders/Include/REBLUR/REBLUR_Config.hlsli
+++ b/Shaders/Include/REBLUR/REBLUR_Config.hlsli
@@ -161,6 +161,14 @@ static const float3 g_Special8[ 8 ] =
     float3( -0.25 * sqrt( 2.0 ) , -0.25 * sqrt( 2.0 ) , 0.5 )
 };
 
+#ifdef REBLUR_DIRECTIONAL_OCCLUSION
+    #undef REBLUR_USE_CATROM_FOR_SURFACE_MOTION_IN_TA
+    #define REBLUR_USE_CATROM_FOR_SURFACE_MOTION_IN_TA          0
+
+    #undef REBLUR_USE_CATROM_FOR_VIRTUAL_MOTION_IN_TA
+    #define REBLUR_USE_CATROM_FOR_VIRTUAL_MOTION_IN_TA          0
+#endif
+
 // PERFORMANCE MODE: x1.25 perf boost by sacrificing IQ ( DIFFUSE_SPECULAR on RTX 3090 @ 1440p 2.05 vs 2.55 ms )
 #ifdef REBLUR_PERFORMANCE_MODE
     #undef REBLUR_USE_CATROM_FOR_SURFACE_MOTION_IN_TA
diff --git a/Shaders/Include/REBLUR/REBLUR_DiffuseSpecular_TemporalAccumulation.hlsli b/Shaders/Include/REBLUR/REBLUR_DiffuseSpecular_TemporalAccumulation.hlsli
index ecfe2c9..7715a21 100644
--- a/Shaders/Include/REBLUR/REBLUR_DiffuseSpecular_TemporalAccumulation.hlsli
+++ b/Shaders/Include/REBLUR/REBLUR_DiffuseSpecular_TemporalAccumulation.hlsli
@@ -8,7 +8,8 @@ distribution of this software and related documentation without an express
 license agreement from NVIDIA CORPORATION is strictly prohibited.
 */
 
-groupshared float4 s_Normal_MinHitDist[ BUFFER_Y ][ BUFFER_X ];
+groupshared float4 s_Normal_Roughness[ BUFFER_Y ][ BUFFER_X ];
+groupshared float s_HitDistForTracking[ BUFFER_Y ][ BUFFER_X ];
 
 void Preload( uint2 sharedPos, int2 globalPos )
 {
@@ -16,6 +17,7 @@ void Preload( uint2 sharedPos, int2 globalPos )
     uint2 globalIdUser = gRectOrigin + globalPos;
 
     float4 temp = NRD_FrontEnd_UnpackNormalAndRoughness( gIn_Normal_Roughness[ globalIdUser ] );
+    s_Normal_Roughness[ sharedPos.y ][ sharedPos.x ] = temp;
 
     #ifdef REBLUR_SPECULAR
         #ifdef REBLUR_OCCLUSION
@@ -27,13 +29,13 @@ void Preload( uint2 sharedPos, int2 globalPos )
 
         REBLUR_TYPE spec = gIn_Spec[ pos ];
         #ifdef REBLUR_OCCLUSION
-            temp.w = ExtractHitDist( spec );
+            float hitDist = ExtractHitDist( spec );
         #else
-            temp.w = gSpecPrepassBlurRadius == 0.0 ? ExtractHitDist( spec ) : gIn_Spec_HitDistForTracking[ globalPos ];
+            float hitDist = gSpecPrepassBlurRadius == 0.0 ? ExtractHitDist( spec ) : gIn_Spec_HitDistForTracking[ globalPos ];
         #endif
-    #endif
 
-    s_Normal_MinHitDist[ sharedPos.y ][ sharedPos.x ] = temp;
+        s_HitDistForTracking[ sharedPos.y ][ sharedPos.x ] = hitDist;
+    #endif
 }
 
 [numthreads( GROUP_X, GROUP_Y, 1 )]
@@ -59,11 +61,13 @@ NRD_EXPORT void NRD_CS_MAIN( int2 threadPos : SV_GroupThreadId, int2 pixelPos :
     float3 Xv = STL::Geometry::ReconstructViewPosition( pixelUv, gFrustum, viewZ, gOrthoMode );
     float3 X = STL::Geometry::RotateVector( gViewToWorld, Xv );
 
-    // Find hit distance for tracking and averaged normal
-    int2 smemPos = threadPos + BORDER;
-    float4 t = s_Normal_MinHitDist[ smemPos.y ][ smemPos.x ];
-    float3 Navg = t.xyz;
-    float hitDistForTracking = t.w == 0.0 ? NRD_INF : t.w;
+    // Find hit distance for tracking, averaged normal and roughness variance
+    float3 Navg = 0.0;
+    #ifdef REBLUR_SPECULAR
+        float hitDistForTracking = NRD_INF;
+        float roughnessM1 = 0.0;
+        float roughnessM2 = 0.0;
+    #endif
 
     [unroll]
     for( j = 0; j <= BORDER * 2; j++ )
@@ -71,18 +75,22 @@ NRD_EXPORT void NRD_CS_MAIN( int2 threadPos : SV_GroupThreadId, int2 pixelPos :
         [unroll]
         for( i = 0; i <= BORDER * 2; i++ )
         {
-            if( i == BORDER && j == BORDER )
-                continue;
-
             int2 pos = threadPos + int2( i, j );
-            float4 t = s_Normal_MinHitDist[ pos.y ][ pos.x ];
+            float4 normalAndRoughness = s_Normal_Roughness[ pos.y ][ pos.x ];
 
-            if( i < 2 && j < 2 )
-                Navg += t.xyz;
+            // Average normal
+            if( i < 2 && j < 2 ) // TODO: is backward 2x2 OK?
+                Navg += normalAndRoughness.xyz;
 
             #ifdef REBLUR_SPECULAR
                 // Min hit distance for tracking, ignoring 0 values ( which still can be produced by VNDF sampling )
-                hitDistForTracking = min( hitDistForTracking, t.w == 0.0 ? NRD_INF : t.w );
+                float h = s_HitDistForTracking[ pos.y ][ pos.x ];
+                hitDistForTracking = min( hitDistForTracking, h == 0.0 ? NRD_INF : h );
+
+                // Roughness variance
+                // IMPORTANT: squared because the test uses "roughness ^ 2"
+                roughnessM1 += normalAndRoughness.w * normalAndRoughness.w;
+                roughnessM2 += normalAndRoughness.w * normalAndRoughness.w * normalAndRoughness.w * normalAndRoughness.w;
             #endif
         }
     }
@@ -97,6 +105,10 @@ NRD_EXPORT void NRD_CS_MAIN( int2 threadPos : SV_GroupThreadId, int2 pixelPos :
 
     #ifdef REBLUR_SPECULAR
         float roughnessModified = STL::Filtering::GetModifiedRoughnessFromNormalVariance( roughness, Navg ); // TODO: needed?
+
+        roughnessM1 /= ( 1 + BORDER * 2 ) * ( 1 + BORDER * 2 );
+        roughnessM2 /= ( 1 + BORDER * 2 ) * ( 1 + BORDER * 2 );
+        float roughnessSigma = GetStdDev( roughnessM1, roughnessM2 );
     #endif
 
     // Hit distance for tracking ( tests 8, 110, 139, e3, e9 without normal map, e24 )
@@ -149,7 +161,7 @@ NRD_EXPORT void NRD_CS_MAIN( int2 threadPos : SV_GroupThreadId, int2 pixelPos :
         2x 2y 3x 3y       2y 3x
            2z 3z
     */
-    STL::Filtering::CatmullRom smbCatromFilter = STL::Filtering::GetCatmullRomFilter( saturate( smbPixelUv ), gRectSizePrev );
+    STL::Filtering::CatmullRom smbCatromFilter = STL::Filtering::GetCatmullRomFilter( smbPixelUv, gRectSizePrev );
     float2 smbCatromGatherUv = smbCatromFilter.origin * gInvScreenSize;
     float4 smbViewZ0 = gIn_Prev_ViewZ.GatherRed( gNearestClamp, smbCatromGatherUv, float2( 1, 1 ) ).wzxy;
     float4 smbViewZ1 = gIn_Prev_ViewZ.GatherRed( gNearestClamp, smbCatromGatherUv, float2( 3, 1 ) ).wzxy;
@@ -163,7 +175,7 @@ NRD_EXPORT void NRD_CS_MAIN( int2 threadPos : SV_GroupThreadId, int2 pixelPos :
 
     // Previous normal averaged for all pixels in 2x2 footprint
     // IMPORTANT: bilinear filter can touch sky pixels, due to this reason "Post Blur" writes special values into sky-pixels
-    STL::Filtering::Bilinear smbBilinearFilter = STL::Filtering::GetBilinearFilter( saturate( smbPixelUv ), gRectSizePrev );
+    STL::Filtering::Bilinear smbBilinearFilter = STL::Filtering::GetBilinearFilter( smbPixelUv, gRectSizePrev );
 
     float2 smbBilinearGatherUv = ( smbBilinearFilter.origin + 1.0 ) * gInvScreenSize;
     float3 prevNavg = UnpackNormalAndRoughness( gIn_Prev_Normal_Roughness.SampleLevel( gLinearClamp, smbBilinearGatherUv, 0 ), false ).xyz;
@@ -192,32 +204,35 @@ NRD_EXPORT void NRD_CS_MAIN( int2 threadPos : SV_GroupThreadId, int2 pixelPos :
     // Surface motion - plane distance based disocclusion
     float3 V = GetViewVector( X );
     float NoV = abs( dot( N, V ) );
-    float smbDisocclusionThreshold = disocclusionThresholdMulFrustumSize / lerp( 0.05 + 0.95 * NoV, 1.0, saturate( smbParallaxInPixels / 30.0 ) );
-
     float mvLengthFactor = STL::Math::LinearStep( 0.5, 1.0, smbParallaxInPixels );
     float frontFacing = lerp( cos( STL::Math::DegToRad( 135.0 ) ), cos( STL::Math::DegToRad( 91.0 ) ), mvLengthFactor );
-    bool isInScreenAndNotBackfacing = IsInScreen( smbPixelUv ) && dot( prevNavg, Navg ) > frontFacing;
-    smbDisocclusionThreshold = isInScreenAndNotBackfacing ? smbDisocclusionThreshold : -1.0;
+    float4 smbDisocclusionThreshold = disocclusionThresholdMulFrustumSize / lerp( 0.05 + 0.95 * NoV, 1.0, saturate( smbParallaxInPixels / 30.0 ) );
+    smbDisocclusionThreshold *= float( dot( prevNavg, Navg ) > frontFacing );
+    smbDisocclusionThreshold *= IsInScreen2x2( smbBilinearFilter.origin, gRectSizePrev );
+    smbDisocclusionThreshold -= NRD_EPS;
 
     float3 Xvprev = STL::Geometry::AffineTransform( gWorldToViewPrev, Xprev );
     float3 smbPlaneDist0 = abs( prevViewZ0 - Xvprev.z );
     float3 smbPlaneDist1 = abs( prevViewZ1 - Xvprev.z );
     float3 smbPlaneDist2 = abs( prevViewZ2 - Xvprev.z );
     float3 smbPlaneDist3 = abs( prevViewZ3 - Xvprev.z );
-    float3 smbOcclusion0 = step( smbPlaneDist0, smbDisocclusionThreshold );
-    float3 smbOcclusion1 = step( smbPlaneDist1, smbDisocclusionThreshold );
-    float3 smbOcclusion2 = step( smbPlaneDist2, smbDisocclusionThreshold );
-    float3 smbOcclusion3 = step( smbPlaneDist3, smbDisocclusionThreshold );
+    float3 smbOcclusion0 = step( smbPlaneDist0, smbDisocclusionThreshold.x );
+    float3 smbOcclusion1 = step( smbPlaneDist1, smbDisocclusionThreshold.y );
+    float3 smbOcclusion2 = step( smbPlaneDist2, smbDisocclusionThreshold.z );
+    float3 smbOcclusion3 = step( smbPlaneDist3, smbDisocclusionThreshold.w );
 
     float4 smbOcclusionWeights = STL::Filtering::GetBilinearCustomWeights( smbBilinearFilter, float4( smbOcclusion0.z, smbOcclusion1.y, smbOcclusion2.y, smbOcclusion3.x ) );
     bool smbAllowCatRom = dot( smbOcclusion0 + smbOcclusion1 + smbOcclusion2 + smbOcclusion3, 1.0 ) > 11.5 && REBLUR_USE_CATROM_FOR_SURFACE_MOTION_IN_TA;
-    #ifdef REBLUR_DIRECTIONAL_OCCLUSION
-        smbAllowCatRom = false;
-    #endif
-
     float smbFootprintQuality = STL::Filtering::ApplyBilinearFilter( smbOcclusion0.z, smbOcclusion1.y, smbOcclusion2.y, smbOcclusion3.x, smbBilinearFilter );
     smbFootprintQuality = STL::Math::Sqrt01( smbFootprintQuality );
 
+    // Bits
+    // IMPORTANT: MaterialID check is not needed for TS
+    float fbits = smbOcclusion0.z * 1.0;
+    fbits += smbOcclusion1.y * 2.0;
+    fbits += smbOcclusion2.y * 4.0;
+    fbits += smbOcclusion3.x * 8.0;
+
     // Material ID check
     float4 materialCmps = CompareMaterials( materialID, prevMaterialIDs, 1 );
     smbOcclusion0.z *= materialCmps.x;
@@ -227,7 +242,6 @@ NRD_EXPORT void NRD_CS_MAIN( int2 threadPos : SV_GroupThreadId, int2 pixelPos :
 
     float4 smbOcclusionWeightsWithMaterialID = STL::Filtering::GetBilinearCustomWeights( smbBilinearFilter, float4( smbOcclusion0.z, smbOcclusion1.y, smbOcclusion2.y, smbOcclusion3.x ) );
     bool smbAllowCatRomWithMaterialID = smbAllowCatRom && dot( materialCmps, 1.0 ) > 3.5 && REBLUR_USE_CATROM_FOR_SURFACE_MOTION_IN_TA;
-
     float smbFootprintQualityWithMaterialID = STL::Filtering::ApplyBilinearFilter( smbOcclusion0.z, smbOcclusion1.y, smbOcclusion2.y, smbOcclusion3.x, smbBilinearFilter );
     smbFootprintQualityWithMaterialID = STL::Math::Sqrt01( smbFootprintQualityWithMaterialID );
 
@@ -237,16 +251,10 @@ NRD_EXPORT void NRD_CS_MAIN( int2 threadPos : SV_GroupThreadId, int2 pixelPos :
     float sizeQuality = ( NoVprev + 1e-3 ) / ( NoV + 1e-3 ); // this order because we need to fix stretching only, shrinking is OK
     sizeQuality *= sizeQuality;
     sizeQuality = lerp( 0.1, 1.0, saturate( sizeQuality ) );
+
     smbFootprintQuality *= sizeQuality;
     smbFootprintQualityWithMaterialID *= sizeQuality;
 
-    // Bits
-    float fbits = float( smbAllowCatRom ) * 1.0;
-    fbits += smbOcclusion0.z * 2.0;
-    fbits += smbOcclusion1.y * 4.0;
-    fbits += smbOcclusion2.y * 8.0;
-    fbits += smbOcclusion3.x * 16.0;
-
     // Update accumulation speeds
     #ifdef REBLUR_DIFFUSE
         float4 diffOcclusionWeights = gDiffMaterialMask ? smbOcclusionWeightsWithMaterialID : smbOcclusionWeights;
@@ -458,18 +466,18 @@ NRD_EXPORT void NRD_CS_MAIN( int2 threadPos : SV_GroupThreadId, int2 pixelPos :
             int2 pos = threadPos + BORDER + int2( f.origin ) - pixelPos;
             pos = clamp( pos, 0, int2( BUFFER_X, BUFFER_Y ) - 2 ); // just in case?
 
-            float3 n00 = s_Normal_MinHitDist[ pos.y ][ pos.x ].xyz;
-            float3 n10 = s_Normal_MinHitDist[ pos.y ][ pos.x + 1 ].xyz;
-            float3 n01 = s_Normal_MinHitDist[ pos.y + 1 ][ pos.x ].xyz;
-            float3 n11 = s_Normal_MinHitDist[ pos.y + 1 ][ pos.x + 1 ].xyz;
+            float3 n00 = s_Normal_Roughness[ pos.y ][ pos.x ].xyz;
+            float3 n10 = s_Normal_Roughness[ pos.y ][ pos.x + 1 ].xyz;
+            float3 n01 = s_Normal_Roughness[ pos.y + 1 ][ pos.x ].xyz;
+            float3 n11 = s_Normal_Roughness[ pos.y + 1 ][ pos.x + 1 ].xyz;
 
             float3 n = normalize( STL::Filtering::ApplyBilinearFilter( n00, n10, n01, n11, f ) );
 
             // ( Optional ) High parallax - flattens surface on high motion ( test 132, e9 )
             // IMPORTANT: a must for 8-bit and 10-bit normals ( tests b7, b10, b33 )
-            deltaUvLen *= NRD_USE_HIGH_PARALLAX_CURVATURE_SILHOUETTE_FIX ? NoV : 1.0; // it fixes silhouettes, but leads to less flattening
-            float2 motionUvHigh = pixelUv + deltaUvLen * deltaUv * gInvRectSize;
-            if( NRD_USE_HIGH_PARALLAX_CURVATURE && deltaUvLen > 1.0 && IsInScreen( motionUvHigh ) )
+            float deltaUvLenFixed = deltaUvLen * ( NRD_USE_HIGH_PARALLAX_CURVATURE_SILHOUETTE_FIX ? NoV : 1.0 ); // it fixes silhouettes, but leads to less flattening
+            float2 motionUvHigh = pixelUv + deltaUvLenFixed * deltaUv * gInvRectSize;
+            if( NRD_USE_HIGH_PARALLAX_CURVATURE && deltaUvLenFixed > 1.0 && IsInScreen( motionUvHigh ) )
             {
                 // Construct the other edge point "xHigh"
                 float zHigh = abs( gIn_ViewZ.SampleLevel( gLinearClamp, gRectOffset + motionUvHigh * gResolutionScale, 0 ) );
@@ -515,8 +523,7 @@ NRD_EXPORT void NRD_CS_MAIN( int2 threadPos : SV_GroupThreadId, int2 pixelPos :
             float2 uv1 = STL::Geometry::GetScreenUv( gWorldToClipPrev, X - V * ApplyThinLensEquation( NoV, hitDistForTracking, curvature ) );
             float2 uv2 = STL::Geometry::GetScreenUv( gWorldToClipPrev, X );
             float a = length( ( uv1 - uv2 ) * gRectSize );
-            float b = length( deltaUv * gRectSize );
-            curvature *= float( a < 3.0 * b + gInvRectSize.x ); // TODO:it's a hack, incompatible with concave mirrors ( tests 22b, 23b, 25b )
+            curvature *= float( a < 3.0 * deltaUvLen + gInvRectSize.x ); // TODO:it's a hack, incompatible with concave mirrors ( tests 22b, 23b, 25b )
 
             // Smooth virtual motion delta ( omitting huge values if curvature is negative and curvature radius is very small )
             float3 Xvirtual = GetXvirtual( NoV, hitDistForTracking, max( curvature, 0.0 ), X, Xprev, V, Dfactor );
@@ -531,21 +538,53 @@ NRD_EXPORT void NRD_CS_MAIN( int2 threadPos : SV_GroupThreadId, int2 pixelPos :
         float vmbPixelsTraveled = length( vmbDelta * gRectSize );
         float XvirtualLength = length( Xvirtual );
 
-        // Virtual motion - plane distance based disocclusion
-        // IMPORTANT: use "Navg" in this test to avoid false reaction on bumpy surfaces ( test 181 )
-        STL::Filtering::Bilinear vmbBilinearFilter = STL::Filtering::GetBilinearFilter( saturate( vmbPixelUv ), gRectSizePrev );
+        // Estimate how many pixels are traveled by virtual motion - how many radians can it be?
+        // IMPORTANT: if curvature angle is multiplied by path length then we can get an angle exceeding 2 * PI, what is impossible. The max
+        // angle is PI ( most left and most right points on a hemisphere ), it can be achieved by using "tan" instead of angle.
+        float pixelSize = PixelRadiusToWorld( gUnproject, gOrthoMode, 1.0, viewZ );
+        float curvatureAngleTan = pixelSize * abs( curvature ); // tana = pixelSize / curvatureRadius = pixelSize * curvature
+        curvatureAngleTan *= max( vmbPixelsTraveled / max( NoV, 0.01 ), 1.0 ); // path length
+
+        float lobeHalfAngle = max( STL::ImportanceSampling::GetSpecularLobeHalfAngle( roughnessModified ), NRD_NORMAL_ULP );
+        float curvatureAngle = atan( curvatureAngleTan );
+
+        // IMPORTANT: increase roughness sensitivity at high FPS
+        float roughnessSensitivity = NRD_ROUGHNESS_SENSITIVITY * lerp( 1.0, 0.5, STL::Math::SmoothStep( 1.0, 4.0, gFramerateScale ) );
+
+        // Virtual motion - roughness
+        STL::Filtering::Bilinear vmbBilinearFilter = STL::Filtering::GetBilinearFilter( vmbPixelUv, gRectSizePrev );
         float2 vmbBilinearGatherUv = ( vmbBilinearFilter.origin + 1.0 ) * gInvScreenSize;
+        float2 relaxedRoughnessWeightParams = GetRelaxedRoughnessWeightParams( roughness * roughness, gRoughnessFraction, roughnessSensitivity );
+        float4 vmbRoughness = gIn_Prev_Normal_Roughness.GatherAlpha( gNearestClamp, vmbBilinearGatherUv ).wzxy;
+        float4 roughnessWeight = ComputeNonExponentialWeightWithSigma( vmbRoughness * vmbRoughness, relaxedRoughnessWeightParams.x, relaxedRoughnessWeightParams.y, roughnessSigma );
+        roughnessWeight = lerp( STL::Math::SmoothStep( 1.0, 0.0, smbParallaxInPixels ), 1.0, roughnessWeight ); // jitter friendly
+        float virtualHistoryRoughnessBasedConfidence = STL::Filtering::ApplyBilinearFilter( roughnessWeight.x, roughnessWeight.y, roughnessWeight.z, roughnessWeight.w, vmbBilinearFilter );
+
+        // Virtual motion - disocclusion: plane distance and roughness
+        // IMPORTANT: use "Navg" in this test to avoid false reaction on bumpy surfaces ( test 181 )
         float4 vmbViewZs = UnpackViewZ( gIn_Prev_ViewZ.GatherRed( gNearestClamp, vmbBilinearGatherUv ).wzxy );
+        float4 vmbBilinearFilterMask = IsInScreen2x2( vmbBilinearFilter.origin, gRectSizePrev );
+        float4 vmbOcclusionThreshold = disocclusionThresholdMulFrustumSize;
+        vmbOcclusionThreshold *= vmbBilinearFilterMask;
+        vmbOcclusionThreshold -= NRD_EPS;
+
         float3 vmbVv = STL::Geometry::ReconstructViewPosition( vmbPixelUv, gFrustumPrev, 1.0 ); // unnormalized, orthoMode = 0
         float3 Nvprev = STL::Geometry::RotateVector( gWorldToViewPrev, Navg );
         float NoXreal = dot( Navg, X - gCameraDelta );
         float4 NoX = ( Nvprev.x * vmbVv.x + Nvprev.y * vmbVv.y ) * ( gOrthoMode == 0 ? vmbViewZs : gOrthoMode ) + Nvprev.z * vmbVv.z * vmbViewZs;
         float4 vmbPlaneDist = abs( NoX - NoXreal );
-        float4 vmbOcclusion = step( vmbPlaneDist, IsInScreen( vmbPixelUv ) ? disocclusionThresholdMulFrustumSize : -1.0 );
+        float4 vmbOcclusion = step( vmbPlaneDist, vmbOcclusionThreshold );
+        vmbOcclusion *= step( 0.5, roughnessWeight );
 
         bool vmbAllowCatRom = dot( vmbOcclusion, 1.0 ) > 3.5 && REBLUR_USE_CATROM_FOR_VIRTUAL_MOTION_IN_TA;
         vmbAllowCatRom = vmbAllowCatRom && specAllowCatRom; // helps to reduce over-sharpening in disoccluded areas
 
+        // Bits
+        fbits += vmbOcclusion.x * 16.0;
+        fbits += vmbOcclusion.y * 32.0;
+        fbits += vmbOcclusion.z * 64.0;
+        fbits += vmbOcclusion.w * 128.0;
+
         // Virtual motion - accumulation speed
         uint4 vmbInternalData = gIn_Prev_InternalData.GatherRed( gNearestClamp, vmbBilinearGatherUv ).wzxy;
 
@@ -561,16 +600,6 @@ NRD_EXPORT void NRD_CS_MAIN( int2 threadPos : SV_GroupThreadId, int2 pixelPos :
         vmbFootprintQuality = STL::Math::Sqrt01( vmbFootprintQuality );
         vmbSpecAccumSpeed *= lerp( vmbFootprintQuality, 1.0, 1.0 / ( 1.0 + vmbSpecAccumSpeed ) );
 
-        // Estimate how many pixels are traveled by virtual motion - how many radians can it be?
-        // If curvature angle is multiplied by path length then we can get an angle exceeding 2 * PI, what is impossible. The max
-        // angle is PI ( most left and most right points on a hemisphere ), it can be achieved by using "tan" instead of angle.
-        float pixelSize = PixelRadiusToWorld( gUnproject, gOrthoMode, 1.0, viewZ );
-        float curvatureAngleTan = pixelSize * abs( curvature ); // tana = pixelSize / curvatureRadius = pixelSize * curvature
-        curvatureAngleTan *= max( vmbPixelsTraveled / max( NoV, 0.01 ), 1.0 ); // path length
-
-        float lobeHalfAngle = max( STL::ImportanceSampling::GetSpecularLobeHalfAngle( roughnessModified ), NRD_NORMAL_ULP );
-        float curvatureAngle = atan( curvatureAngleTan );
-
         // Virtual motion - normal: parallax ( test 132 )
         float4 vmbNormalAndRoughness = UnpackNormalAndRoughness( gIn_Prev_Normal_Roughness.SampleLevel( gLinearClamp, vmbPixelUv * gResolutionScalePrev, 0 ) );
         float3 vmbN = STL::Geometry::RotateVector( gWorldPrevToWorld, vmbNormalAndRoughness.xyz );
@@ -592,16 +621,6 @@ NRD_EXPORT void NRD_CS_MAIN( int2 threadPos : SV_GroupThreadId, int2 pixelPos :
         // IMPORTANT: this is currently needed for bumpy surfaces, because virtual motion gets ruined by big curvature
         float virtualHistoryAmount = virtualHistoryNormalBasedConfidence;
 
-        // IMPORTANT: at high FPS "smb" works well, so we get enough frames in "specAccumSpeed", even if "vmb" is rejected.
-        // TA doesn't behave well on roughness edges at high FPS due to significantly enlarged "sigma". As a WAR, we can
-        // increase roughness sensitivity instead.
-        float roughnessSensitivity = lerp( NRD_ROUGHNESS_SENSITIVITY, 0.005, STL::Math::SmoothStep( 1.0, 4.0, gFramerateScale ) );
-
-        // Virtual motion - roughness
-        float2 relaxedRoughnessWeightParams = GetRelaxedRoughnessWeightParams( roughness * roughness, gRoughnessFraction, roughnessSensitivity );
-        float virtualHistoryRoughnessBasedConfidence = ComputeWeight( vmbNormalAndRoughness.w * vmbNormalAndRoughness.w, relaxedRoughnessWeightParams.x, relaxedRoughnessWeightParams.y );
-        virtualHistoryRoughnessBasedConfidence = lerp( STL::Math::SmoothStep( 1.0, 0.0, smbParallaxInPixels ), 1.0, virtualHistoryRoughnessBasedConfidence ); // jitter friendly
-
         // Virtual motion - virtual parallax difference
         // Tests 3, 6, 8, 11, 14, 100, 103, 104, 106, 109, 110, 114, 120, 127, 130, 131, 132, 138, 139 and 9e
         float hitDistForTrackingPrev = gIn_Prev_Spec_HitDistForTracking.SampleLevel( gLinearClamp, vmbPixelUv * gResolutionScalePrev, 0 );
@@ -626,21 +645,24 @@ NRD_EXPORT void NRD_CS_MAIN( int2 threadPos : SV_GroupThreadId, int2 pixelPos :
         float virtualHistoryParallaxBasedConfidence = STL::Math::SmoothStep( lobeRadiusInPixels + 0.25, 0.0, deltaParallaxInPixels );
 
         // Virtual motion - normal & roughness prev-prev tests
+        // IMPORTANT: 2 is needed because:
+        // - line *** allows fallback to laggy surface motion, which can be wrongly redistributed by virtual motion
+        // - we use at least linear filters, as the result a wider initial offset is needed
+        float stepBetweenTaps = min( vmbPixelsTraveled * gFramerateScale, 2.0 ) + vmbPixelsTraveled / REBLUR_VIRTUAL_MOTION_PREV_PREV_WEIGHT_ITERATION_NUM;
         vmbDelta *= STL::Math::Rsqrt( STL::Math::LengthSquared( vmbDelta ) );
         vmbDelta /= gRectSizePrev;
-        vmbDelta *= saturate( vmbPixelsTraveled / 0.1 ) + vmbPixelsTraveled / REBLUR_VIRTUAL_MOTION_PREV_PREV_WEIGHT_ITERATION_NUM;
 
         relaxedRoughnessWeightParams = GetRelaxedRoughnessWeightParams( vmbNormalAndRoughness.w * vmbNormalAndRoughness.w, gRoughnessFraction, roughnessSensitivity );
 
         [unroll]
         for( i = 1; i <= REBLUR_VIRTUAL_MOTION_PREV_PREV_WEIGHT_ITERATION_NUM; i++ )
         {
-            float2 vmbPixelUvPrev = vmbPixelUv + vmbDelta * i;
+            float2 vmbPixelUvPrev = vmbPixelUv + vmbDelta * i * stepBetweenTaps;
             float4 vmbNormalAndRoughnessPrev = UnpackNormalAndRoughness( gIn_Prev_Normal_Roughness.SampleLevel( gLinearClamp, vmbPixelUvPrev * gResolutionScalePrev, 0 ) );
 
             float2 w;
-            w.x = GetEncodingAwareNormalWeight( vmbNormalAndRoughness.xyz, vmbNormalAndRoughnessPrev.xyz, lobeHalfAngle, curvatureAngle * ( i + 1 ) );
-            w.y = ComputeWeight( vmbNormalAndRoughnessPrev.w * vmbNormalAndRoughnessPrev.w, relaxedRoughnessWeightParams.x, relaxedRoughnessWeightParams.y );
+            w.x = GetEncodingAwareNormalWeight( vmbNormalAndRoughness.xyz, vmbNormalAndRoughnessPrev.xyz, lobeHalfAngle, curvatureAngle * ( 1.0 + i * stepBetweenTaps ) );
+            w.y = ComputeNonExponentialWeightWithSigma( vmbNormalAndRoughnessPrev.w * vmbNormalAndRoughnessPrev.w, relaxedRoughnessWeightParams.x, relaxedRoughnessWeightParams.y, roughnessSigma );
 
             w = IsInScreen( vmbPixelUvPrev ) ? w : 1.0;
 
@@ -696,7 +718,7 @@ NRD_EXPORT void NRD_CS_MAIN( int2 threadPos : SV_GroupThreadId, int2 pixelPos :
         // Virtual history amount - other ( tests 65, 66, 103, 111, 132, e9, e11 )
         virtualHistoryAmount *= STL::Math::SmoothStep( 0.05, 0.95, Dfactor );
         virtualHistoryAmount *= virtualHistoryRoughnessBasedConfidence;
-        virtualHistoryAmount *= saturate( vmbSpecAccumSpeed / ( smbSpecAccumSpeed + NRD_EPS ) );
+        virtualHistoryAmount *= saturate( vmbSpecAccumSpeed / ( smbSpecAccumSpeed + NRD_EPS ) ); // ***
 
         #if( REBLUR_VIRTUAL_HISTORY_AMOUNT != 2 )
             virtualHistoryAmount = REBLUR_VIRTUAL_HISTORY_AMOUNT;
diff --git a/Shaders/Include/REBLUR/REBLUR_DiffuseSpecular_TemporalStabilization.hlsli b/Shaders/Include/REBLUR/REBLUR_DiffuseSpecular_TemporalStabilization.hlsli
index 7d119cf..b3d2ad9 100644
--- a/Shaders/Include/REBLUR/REBLUR_DiffuseSpecular_TemporalStabilization.hlsli
+++ b/Shaders/Include/REBLUR/REBLUR_DiffuseSpecular_TemporalStabilization.hlsli
@@ -224,22 +224,16 @@ NRD_EXPORT void NRD_CS_MAIN( int2 threadPos : SV_GroupThreadId, int2 pixelPos :
     float4 data1 = UnpackData1( gIn_Data1[ pixelPos ] );
     float2 data2 = UnpackData2( gIn_Data2[ pixelPos ], bits );
 
-    float4 smbOcclusion = float4( ( bits & uint4( 2, 4, 8, 16 ) ) != 0 );
-
-    float pixelSize = PixelRadiusToWorld( gUnproject, gOrthoMode, 1.0, viewZ );
     float stabilizationStrength = gStabilizationStrength * float( pixelUv.x >= gSplitScreen );
 
-    STL::Filtering::Bilinear smbBilinearFilter = STL::Filtering::GetBilinearFilter( saturate( smbPixelUv ), gRectSizePrev );
-
-    // Only for "...WithMaterialID" even if material ID test is disabled
+    // Surface motion footprint
+    STL::Filtering::Bilinear smbBilinearFilter = STL::Filtering::GetBilinearFilter( smbPixelUv, gRectSizePrev );
+    float4 smbOcclusion = float4( ( bits & uint4( 1, 2, 4, 8 ) ) != 0 );
     float4 smbOcclusionWeights = STL::Filtering::GetBilinearCustomWeights( smbBilinearFilter, smbOcclusion );
-    bool smbIsCatromAllowed = ( bits & 1 ) != 0 && REBLUR_USE_CATROM_FOR_SURFACE_MOTION_IN_TS;
-
+    bool smbAllowCatRom = dot( smbOcclusion, 1.0 ) > 3.5 && REBLUR_USE_CATROM_FOR_SURFACE_MOTION_IN_TS;
     float smbFootprintQuality = STL::Filtering::ApplyBilinearFilter( smbOcclusion.x, smbOcclusion.y, smbOcclusion.z, smbOcclusion.w, smbBilinearFilter );
     smbFootprintQuality = STL::Math::Sqrt01( smbFootprintQuality );
 
-    float smbIsInScreenMulFootprintQuality = IsInScreen( smbPixelUv ) * smbFootprintQuality;
-
     // Diffuse
     #ifdef REBLUR_DIFFUSE
         // Sample history - surface motion
@@ -248,7 +242,7 @@ NRD_EXPORT void NRD_CS_MAIN( int2 threadPos : SV_GroupThreadId, int2 pixelPos :
 
         BicubicFilterNoCornersWithFallbackToBilinearFilterWithCustomWeights(
             saturate( smbPixelUv ) * gRectSizePrev, gInvScreenSize,
-            smbOcclusionWeights, smbIsCatromAllowed,
+            smbOcclusionWeights, smbAllowCatRom,
             gIn_Diff_StabilizedHistory, smbDiffHistory
             #ifdef REBLUR_SH
                 , gIn_DiffSh_StabilizedHistory, smbDiffShHistory
@@ -260,10 +254,10 @@ NRD_EXPORT void NRD_CS_MAIN( int2 threadPos : SV_GroupThreadId, int2 pixelPos :
 
         // Compute antilag
         float diffStabilizationStrength = stabilizationStrength * float( smbPixelUv.x >= gSplitScreen );
-        float diffAntilag = ComputeAntilag( smbDiffHistory, diff, diffSigma, gAntilagParams, data1.x );
+        float diffAntilag = ComputeAntilag( smbDiffHistory, diff, diffSigma, gAntilagParams, smbFootprintQuality * data1.x );
 
         // Clamp history and combine with the current frame
-        float2 diffTemporalAccumulationParams = GetTemporalAccumulationParams( smbIsInScreenMulFootprintQuality, data1.x );
+        float2 diffTemporalAccumulationParams = GetTemporalAccumulationParams( smbFootprintQuality, data1.x );
 
         float diffHistoryWeight = diffTemporalAccumulationParams.x;
         diffHistoryWeight *= diffAntilag; // this is important
@@ -352,28 +346,33 @@ NRD_EXPORT void NRD_CS_MAIN( int2 threadPos : SV_GroupThreadId, int2 pixelPos :
 
         BicubicFilterNoCornersWithFallbackToBilinearFilterWithCustomWeights(
             saturate( smbPixelUv ) * gRectSizePrev, gInvScreenSize,
-            smbOcclusionWeights, smbIsCatromAllowed,
+            smbOcclusionWeights, smbAllowCatRom,
             gIn_Spec_StabilizedHistory, smbSpecHistory
             #ifdef REBLUR_SH
                 , gIn_SpecSh_StabilizedHistory, smbSpecShHistory
             #endif
         );
 
+        // Virtual motion footprint
+        STL::Filtering::Bilinear vmbBilinearFilter = STL::Filtering::GetBilinearFilter( vmbPixelUv, gRectSizePrev );
+        float4 vmbOcclusion = float4( ( bits & uint4( 16, 32, 64, 128 ) ) != 0 );
+        float4 vmbOcclusionWeights = STL::Filtering::GetBilinearCustomWeights( vmbBilinearFilter, vmbOcclusion );
+        bool vmbAllowCatRom = dot( vmbOcclusion, 1.0 ) > 3.5 && REBLUR_USE_CATROM_FOR_VIRTUAL_MOTION_IN_TS;
+        float vmbFootprintQuality = STL::Filtering::ApplyBilinearFilter( vmbOcclusion.x, vmbOcclusion.y, vmbOcclusion.z, vmbOcclusion.w, vmbBilinearFilter );
+        vmbFootprintQuality = STL::Math::Sqrt01( vmbFootprintQuality );
+
         // Sample history - virtual motion
         REBLUR_TYPE vmbSpecHistory;
-        #if( REBLUR_USE_CATROM_FOR_VIRTUAL_MOTION_IN_TS == 1 )
-            BicubicFilterNoCornersWithFallbackToBilinearFilterWithCustomWeights(
-                saturate( vmbPixelUv ) * gRectSizePrev, gInvScreenSize,
-                0, true,
-                gIn_Spec_StabilizedHistory, vmbSpecHistory
-            );
-        #else
-            vmbSpecHistory = gIn_Spec_StabilizedHistory.SampleLevel( gLinearClamp, vmbPixelUv * gResolutionScalePrev, 0 );
-        #endif
+        REBLUR_SH_TYPE vmbSpecShHistory;
 
-        #ifdef REBLUR_SH
-            float4 vmbSpecShHistory = gIn_SpecSh_StabilizedHistory.SampleLevel( gLinearClamp, vmbPixelUv * gResolutionScalePrev, 0 );
-        #endif
+        BicubicFilterNoCornersWithFallbackToBilinearFilterWithCustomWeights(
+            saturate( vmbPixelUv ) * gRectSizePrev, gInvScreenSize,
+            vmbOcclusionWeights, vmbAllowCatRom,
+            gIn_Spec_StabilizedHistory, vmbSpecHistory
+            #ifdef REBLUR_SH
+                , gIn_SpecSh_StabilizedHistory, vmbSpecShHistory
+            #endif
+        );
 
         // Avoid negative values
         smbSpecHistory = ClampNegativeToZero( smbSpecHistory );
@@ -394,11 +393,11 @@ NRD_EXPORT void NRD_CS_MAIN( int2 threadPos : SV_GroupThreadId, int2 pixelPos :
         if( virtualHistoryAmount != 0.0 )
             specStabilizationStrength *= float( vmbPixelUv.x >= gSplitScreen );
 
-        float specAntilag = ComputeAntilag( specHistory, spec, specSigma, gAntilagParams, data1.z );
+        float footprintQuality = lerp( smbFootprintQuality, vmbFootprintQuality, virtualHistoryAmount );
+        float specAntilag = ComputeAntilag( specHistory, spec, specSigma, gAntilagParams, footprintQuality * data1.z );
 
         // Clamp history and combine with the current frame
-        float isInScreenMulFootprintQuality = lerp( smbIsInScreenMulFootprintQuality, 1.0, virtualHistoryAmount );
-        float2 specTemporalAccumulationParams = GetTemporalAccumulationParams( isInScreenMulFootprintQuality, data1.z );
+        float2 specTemporalAccumulationParams = GetTemporalAccumulationParams( footprintQuality, data1.z );
 
         // TODO: roughness should affect stabilization:
         // - use "virtualHistoryRoughnessBasedConfidence" from TA
diff --git a/Shaders/Include/RELAX/RELAX_DiffuseSpecular_TemporalAccumulation.hlsli b/Shaders/Include/RELAX/RELAX_DiffuseSpecular_TemporalAccumulation.hlsli
index a028e8e..43e8b87 100644
--- a/Shaders/Include/RELAX/RELAX_DiffuseSpecular_TemporalAccumulation.hlsli
+++ b/Shaders/Include/RELAX/RELAX_DiffuseSpecular_TemporalAccumulation.hlsli
@@ -63,11 +63,6 @@ float loadSurfaceMotionBasedPrevData(
 #endif
 )
 {
-    // Calculating disocclusion threshold
-    float pixelSize = PixelRadiusToWorld(gUnproject, gOrthoMode, 1.0, currentLinearZ);
-    float frustumSize = pixelSize * min(gRectSize.x, gRectSize.y);
-    float disocclusionThreshold = mixedDisocclusionDepthThreshold * frustumSize / lerp(NdotV, 1.0, saturate(parallaxInPixels / 30.0));
-
     // Calculating previous pixel position
     float2 prevPixelPosFloat = prevUVSMB * gRectSizePrev;
 
@@ -106,16 +101,23 @@ float loadSurfaceMotionBasedPrevData(
     float4 prevMaterialIDs01 = gPrevMaterialID.GatherRed(gNearestClamp, gatherOrigin01).wzxy;
     float4 prevMaterialIDs11 = gPrevMaterialID.GatherRed(gNearestClamp, gatherOrigin11).wzxy;
 
+    // Calculating disocclusion threshold
+    float pixelSize = PixelRadiusToWorld(gUnproject, gOrthoMode, 1.0, currentLinearZ);
+    float frustumSize = pixelSize * min(gRectSize.x, gRectSize.y);
+    float4 disocclusionThreshold = mixedDisocclusionDepthThreshold * frustumSize / lerp(NdotV, 1.0, saturate(parallaxInPixels / 30.0));
+    disocclusionThreshold *= IsInScreen2x2( bilinearOrigin, gRectSizePrev );
+    disocclusionThreshold -= NRD_EPS;
+
     // Calculating validity of 12 bicubic taps, 4 of those are bilinear taps
     float3 prevViewPos = STL::Geometry::AffineTransform(gPrevWorldToView, prevWorldPos);
     float3 planeDist0 = abs(prevViewZs00.yzw - prevViewPos.zzz);
     float3 planeDist1 = abs(prevViewZs10.xzw - prevViewPos.zzz);
     float3 planeDist2 = abs(prevViewZs01.xyw - prevViewPos.zzz);
     float3 planeDist3 = abs(prevViewZs11.xyz - prevViewPos.zzz);
-    float3 tapsValid0 = step(planeDist0, disocclusionThreshold);
-    float3 tapsValid1 = step(planeDist1, disocclusionThreshold);
-    float3 tapsValid2 = step(planeDist2, disocclusionThreshold);
-    float3 tapsValid3 = step(planeDist3, disocclusionThreshold);
+    float3 tapsValid0 = step(planeDist0, disocclusionThreshold.x);
+    float3 tapsValid1 = step(planeDist1, disocclusionThreshold.y);
+    float3 tapsValid2 = step(planeDist2, disocclusionThreshold.z);
+    float3 tapsValid3 = step(planeDist3, disocclusionThreshold.w);
     tapsValid0 *= CompareMaterials(currentMaterialID.xxx, prevMaterialIDs00.yzw, materialIDMask);
     tapsValid1 *= CompareMaterials(currentMaterialID.xxx, prevMaterialIDs10.xzw, materialIDMask);
     tapsValid2 *= CompareMaterials(currentMaterialID.xxx, prevMaterialIDs01.xyw, materialIDMask);
@@ -138,16 +140,6 @@ float loadSurfaceMotionBasedPrevData(
         bicubicFootprintValid = 0;
     }
 
-    // Checking bicubic footprint validity for being in screen
-    [flatten]
-    if (any(bilinearOrigin < int2(1, 1)) || any(bilinearOrigin >= int2(gRectSizePrev)-int2(2, 2)))
-    {
-        bicubicFootprintValid = 0;
-    }
-
-    // Checking bilinear footprint validity for being in screen
-    bilinearTapsValid *= IsInScreen(prevUVSMB);
-
     // Calculating bilinear weights in advance
     STL::Filtering::Bilinear bilinear;
     bilinear.weights = bilinearWeights;
@@ -278,7 +270,6 @@ float loadVirtualMotionBasedPrevData(
     prevUVVMB = prevVirtualClipPos.xy * float2(0.5, -0.5) + float2(0.5, 0.5);
 
     float2 prevVirtualPixelPosFloat = prevUVVMB * gRectSizePrev;
-    float disocclusionThreshold = mixedDisocclusionDepthThreshold * (gOrthoMode == 0 ? currentLinearZ : 1.0);
 
     // Consider reprojection to the same pixel index a small motion.
     // It is useful for skipping reprojection test for static camera when the jitter is the only source of motion.
@@ -294,6 +285,11 @@ float loadVirtualMotionBasedPrevData(
     // Taking care of camera motion, because world-space is always centered at camera position in NRD
     currentWorldPos -= gPrevCameraPosition.xyz;
 
+    // Calculating disocclusion threshold
+    float4 disocclusionThreshold = mixedDisocclusionDepthThreshold * (gOrthoMode == 0 ? currentLinearZ : 1.0);
+    disocclusionThreshold *= IsInScreen2x2( bilinearOrigin, gRectSizePrev );
+    disocclusionThreshold -= NRD_EPS;
+
     // Checking bilinear footprint only for virtual motion based specular reprojection
     float4 prevViewZs = gPrevViewZ.GatherRed(gNearestClamp, gatherOrigin).wzxy;
     float4 prevMaterialIDs = gPrevMaterialID.GatherRed(gNearestClamp, gatherOrigin).wzxy;
@@ -301,20 +297,17 @@ float loadVirtualMotionBasedPrevData(
     float4 bilinearTapsValid;
 
     prevWorldPosInTap = GetPreviousWorldPosFromPixelPos(bilinearOrigin + int2(0, 0), prevViewZs.x);
-    bilinearTapsValid.x = isReprojectionTapValid(currentWorldPos, prevWorldPosInTap, currentNormal, disocclusionThreshold);
+    bilinearTapsValid.x = isReprojectionTapValid(currentWorldPos, prevWorldPosInTap, currentNormal, disocclusionThreshold.x);
     prevWorldPosInTap = GetPreviousWorldPosFromPixelPos(bilinearOrigin + int2(1, 0), prevViewZs.y);
-    bilinearTapsValid.y = isReprojectionTapValid(currentWorldPos, prevWorldPosInTap, currentNormal, disocclusionThreshold);
+    bilinearTapsValid.y = isReprojectionTapValid(currentWorldPos, prevWorldPosInTap, currentNormal, disocclusionThreshold.y);
     prevWorldPosInTap = GetPreviousWorldPosFromPixelPos(bilinearOrigin + int2(0, 1), prevViewZs.z);
-    bilinearTapsValid.z = isReprojectionTapValid(currentWorldPos, prevWorldPosInTap, currentNormal, disocclusionThreshold);
+    bilinearTapsValid.z = isReprojectionTapValid(currentWorldPos, prevWorldPosInTap, currentNormal, disocclusionThreshold.z);
     prevWorldPosInTap = GetPreviousWorldPosFromPixelPos(bilinearOrigin + int2(1, 1), prevViewZs.w);
-    bilinearTapsValid.w = isReprojectionTapValid(currentWorldPos, prevWorldPosInTap, currentNormal, disocclusionThreshold);
+    bilinearTapsValid.w = isReprojectionTapValid(currentWorldPos, prevWorldPosInTap, currentNormal, disocclusionThreshold.w);
 
     bilinearTapsValid *= CompareMaterials(currentMaterialID.xxxx, prevMaterialIDs.xyzw, materialIDMask);
     bilinearTapsValid = skipReprojectionTest ? float4(1.0, 1.0, 1.0, 1.0) : bilinearTapsValid;
 
-    // Checking bilinear footprint validity for being in screen
-    bilinearTapsValid *= IsInScreen(prevUVVMB);
-
     // Applying reprojection
     prevSpecularIllumAnd2ndMoment = 0;
     prevSpecularResponsiveIllum = 0;
@@ -684,9 +677,9 @@ NRD_EXPORT void NRD_CS_MAIN(uint2 pixelPos : SV_DispatchThreadId, uint2 threadPo
 
     // ( Optional ) High parallax - flattens surface on high motion ( test 132, e9 )
     // IMPORTANT: a must for 8-bit and 10-bit normals ( tests b7, b10, b33 )
-    deltaUvLen *= NRD_USE_HIGH_PARALLAX_CURVATURE_SILHOUETTE_FIX ? NoV : 1.0;
-    float2 motionUvHigh = pixelUv + deltaUvLen * deltaUv * gInvRectSize;
-    if (NRD_USE_HIGH_PARALLAX_CURVATURE && deltaUvLen > 1.0 && IsInScreen(motionUvHigh))
+    float deltaUvLenFixed = deltaUvLen * ( NRD_USE_HIGH_PARALLAX_CURVATURE_SILHOUETTE_FIX ? NoV : 1.0 ); // it fixes silhouettes, but leads to less flattening
+    float2 motionUvHigh = pixelUv + deltaUvLenFixed * deltaUv * gInvRectSize;
+    if (NRD_USE_HIGH_PARALLAX_CURVATURE && deltaUvLenFixed > 1.0 && IsInScreen(motionUvHigh))
     {
         float zHigh = abs(gViewZ.SampleLevel(gLinearClamp, gRectOffset + motionUvHigh * gResolutionScale, 0));
         float3 xHigh = GetCurrentWorldPosFromClipSpaceXY(motionUvHigh * 2.0 - 1.0, zHigh);
@@ -728,8 +721,7 @@ NRD_EXPORT void NRD_CS_MAIN(uint2 pixelPos : SV_DispatchThreadId, uint2 threadPo
     float2 uv1 = STL::Geometry::GetScreenUv(gPrevWorldToClip, currentWorldPos - V * ApplyThinLensEquation(NoV, hitDist, curvature));
     float2 uv2 = STL::Geometry::GetScreenUv(gPrevWorldToClip, currentWorldPos);
     float a = length((uv1 - uv2) * gRectSize);
-    float b = length(deltaUv * gRectSize);
-    curvature *= float(a < 3.0 * b + gInvRectSize.x); // TODO:it's a hack, incompatible with concave mirrors ( tests 22b, 23b, 25b )
+    curvature *= float(a < 3.0 * deltaUvLen + gInvRectSize.x); // TODO:it's a hack, incompatible with concave mirrors ( tests 22b, 23b, 25b )
 
     // Thin lens equation for adjusting reflection HitT
     float hitDistFocused = ApplyThinLensEquation(NoV, hitDist, curvature);
@@ -861,10 +853,8 @@ NRD_EXPORT void NRD_CS_MAIN(uint2 pixelPos : SV_DispatchThreadId, uint2 threadPo
     virtualHistoryHitDistConfidence *= STL::Math::SmoothStep(lobeRadiusInPixels + 0.25, 0.0, deltaParallaxInPixels);
 
     // Current specular signal ( surface motion )
-    float smcFactor = lerp(0.25, 0.001, SMC); // TODO: tune better?
-    smcFactor *= lerp(1.0, lerp(1.0, 0.25, SMC), NoV);
-    float specSMBConfidence = (SMBReprojectionFound > 0 ? 1.0 : 0.0) / (1.0 + smcFactor * parallaxInPixels);
-    specSMBConfidence *= GetNormalWeight(V, Vprev, lobeHalfAngle * NoV / gFramerateScale);
+    float specSMBConfidence = (SMBReprojectionFound > 0 ? 1.0 : 0.0) *
+        GetNormalWeight(V, Vprev, lobeHalfAngle * NoV / gFramerateScale);
 
     float specSMBAlpha = 1.0 - specSMBConfidence;
     float specSMBResponsiveAlpha = 1.0 - specSMBConfidence;
diff --git a/Shaders/Source/SpecularReflectionMv_Compute.cs.hlsl b/Shaders/Source/SpecularReflectionMv_Compute.cs.hlsl
index 5ddc974..283f6a1 100644
--- a/Shaders/Source/SpecularReflectionMv_Compute.cs.hlsl
+++ b/Shaders/Source/SpecularReflectionMv_Compute.cs.hlsl
@@ -135,9 +135,9 @@ NRD_EXPORT void NRD_CS_MAIN( int2 threadPos : SV_GroupThreadId, int2 pixelPos :
 
         // ( Optional ) High parallax - flattens surface on high motion ( test 132, e9 )
         // IMPORTANT: a must for 8-bit and 10-bit normals ( tests b7, b10, b33 )
-        deltaUvLen *= NRD_USE_HIGH_PARALLAX_CURVATURE_SILHOUETTE_FIX ? NoV : 1.0;
-        float2 motionUvHigh = pixelUv + deltaUvLen * deltaUv * gInvRectSize;
-        if( NRD_USE_HIGH_PARALLAX_CURVATURE && deltaUvLen > 1.0 && IsInScreen( motionUvHigh ) )
+        float deltaUvLenFixed = deltaUvLen * ( NRD_USE_HIGH_PARALLAX_CURVATURE_SILHOUETTE_FIX ? NoV : 1.0 ); // it fixes silhouettes, but leads to less flattening
+        float2 motionUvHigh = pixelUv + deltaUvLenFixed * deltaUv * gInvRectSize;
+        if( NRD_USE_HIGH_PARALLAX_CURVATURE && deltaUvLenFixed > 1.0 && IsInScreen( motionUvHigh ) )
         {
             // Construct the other edge point "xHigh"
             float zHigh = abs( gIn_ViewZ.SampleLevel( gLinearClamp, gRectOffset + motionUvHigh * gResolutionScale, 0 ) );
@@ -183,8 +183,7 @@ NRD_EXPORT void NRD_CS_MAIN( int2 threadPos : SV_GroupThreadId, int2 pixelPos :
         float2 uv1 = STL::Geometry::GetScreenUv( gWorldToClipPrev, X - V * ApplyThinLensEquation( NoV, hitDistForTracking, curvature ) );
         float2 uv2 = STL::Geometry::GetScreenUv( gWorldToClipPrev, X );
         float a = length( ( uv1 - uv2 ) * gRectSize );
-        float b = length( deltaUv * gRectSize );
-        curvature *= float( a < 3.0 * b + gInvRectSize.x ); // TODO:it's a hack, incompatible with concave mirrors ( tests 22b, 23b, 25b )
+        curvature *= float( a < 3.0 * deltaUvLen + gInvRectSize.x ); // TODO:it's a hack, incompatible with concave mirrors ( tests 22b, 23b, 25b )
     }
 
     // Virtual motion