Skip to content

Commit

Permalink
paq8px_v193fix2
Browse files Browse the repository at this point in the history
- Slightly improved 24/32bpp image model
- Fixed DEC Alpha transform (failed on non-multiple of 4 block sizes)
- Fixed zlib transform bug from v142fix2 that caused a huge slowdown
- Use deterministic AVX2 code path for Adam optimizer
  • Loading branch information
MarcioPais committed Aug 31, 2020
1 parent a1fad33 commit 6e1d570
Show file tree
Hide file tree
Showing 9 changed files with 24 additions and 11 deletions.
13 changes: 11 additions & 2 deletions CHANGELOG
@@ -1,4 +1,4 @@
---------------
---------------
VERSION HISTORY
---------------

Expand Down Expand Up @@ -1768,5 +1768,14 @@ paq8px_v193 by Márcio Pais


paq8px_v193fix1 by Zoltán Gotthardt
2020.08.30
- Cosmetic changes, fixed compiler warnings
- Fixed OLS predictor (bug since v189)
- Fixed OLS predictor (bug since v189)


paq8px_v193fix2 by Márcio Pais
2020.08.31
- Slightly improved 24/32bpp image model
- Fixed DEC Alpha transform (failed on non-multiple of 4 block sizes)
- Fixed zlib transform bug from v142fix2 that caused a huge slowdown
- Use deterministic AVX2 code path for Adam optimizer
3 changes: 3 additions & 0 deletions filter/DecAlphaFilter.hpp
Expand Up @@ -74,6 +74,9 @@ class DECAlphaFilter : public Filter {
blk[i + 2u] = instruction >> 16u;
blk[i + 3u] = instruction >> 24u;
}
std::size_t const l = static_cast<std::size_t>(length - (length & 3u));
for (std::size_t i = 0u; i < static_cast<std::size_t>(length & 3u); i++)
blk[l + i] = encoder->decompressByte();

if (fMode == FDECOMPRESS) {
out->blockWrite(&blk[0u], length);
Expand Down
4 changes: 2 additions & 2 deletions filter/zlib.hpp
Expand Up @@ -132,7 +132,7 @@ static auto encodeZlib(File *in, File *out, uint64_t len, int &headerSize) -> in
uint32_t blSize = min(uint32_t(len - i), block);
nTrials = 0;
for( int j = 0; j < 81; j++ ) {
if( diffCount[j] == limit ) {
if( diffCount[j] >= limit ) {
continue;
}
nTrials++;
Expand All @@ -159,7 +159,7 @@ static auto encodeZlib(File *in, File *out, uint64_t len, int &headerSize) -> in

// Recompress/deflate block with all possible parameters
for( int j = mtf.getFirst(); j >= 0; j = mtf.getNext()) {
if( diffCount[j] == limit ) {
if( diffCount[j] >= limit ) {
continue;
}
nTrials++;
Expand Down
2 changes: 1 addition & 1 deletion lstm/Adam.hpp
Expand Up @@ -5,7 +5,7 @@
#include "../utils.hpp"
#include "../simd.hpp"
#include <cmath>
#define USE_RSQRT
//#define USE_RSQRT

template <SIMD simd, std::uint16_t B1, std::uint8_t E1, std::uint16_t B2, std::uint8_t E2, std::uint16_t C, std::uint8_t E3>
class Adam :
Expand Down
3 changes: 2 additions & 1 deletion lstm/SimdFunctions.hpp
Expand Up @@ -66,11 +66,12 @@ float dot256_ps_fma3(float const* x1, float const* x2, std::size_t const len, fl
sum0 = _mm256_fmadd_ps(_mm256_loadu_ps(x1 + i), _mm256_loadu_ps(x2 + i), sum0);
sum1 = _mm256_fmadd_ps(_mm256_loadu_ps(x1 + i + SIMDW), _mm256_loadu_ps(x2 + i + SIMDW), sum1);
}
sum0 = _mm256_add_ps(sum0, sum1);
if (i < limit)
sum0 = _mm256_fmadd_ps(_mm256_loadu_ps(x1 + i), _mm256_loadu_ps(x2 + i), sum0);
for (; remainder > 0; remainder--)
init += x1[len - remainder] * x2[len - remainder];
return init + hsum256_ps_avx(_mm256_add_ps(sum0, sum1));
return init + hsum256_ps_avx(sum0);
#endif
}

Expand Down
4 changes: 2 additions & 2 deletions model/Image24BitModel.cpp
Expand Up @@ -491,7 +491,7 @@ void Image24BitModel::mix(Mixer &m) {
if( ++col >= stride * 8 ) {
col = 0;
}
m.set(5, 6);
m.set(5 + (((line & 0x7u) << 5u) | col), 5 + 256, 5);
m.set(min(63, column[0]) + ((ctx[0] >> 3U) & 0xC0U), 256);
m.set(min(127, column[1]) + ((ctx[0] >> 2U) & 0x180U), 512);
m.set((ctx[0] & 0x7FCU) | (bpos >> 1), 2048);
Expand All @@ -507,6 +507,6 @@ void Image24BitModel::mix(Mixer &m) {
m.set(min(255, (x + line) / 32), 256);
} else {
m.add(-2048 + ((filter >> (7 - bpos)) & 1U) * 4096);
m.set(min(4, filter), MIXERCONTEXTSETS);
m.set(min(4, filter), MIXERCONTEXTS);
}
}
2 changes: 1 addition & 1 deletion model/Image24BitModel.hpp
Expand Up @@ -25,7 +25,7 @@ class Image24BitModel {
public:
static constexpr int MIXERINPUTS = nSSM * SmallStationaryContextMap::MIXERINPUTS + nSM * StationaryMap::MIXERINPUTS +
nCM * (ContextMap2::MIXERINPUTS + ContextMap2::MIXERINPUTS_RUN_STATS);
static constexpr int MIXERCONTEXTS = 6 + 256 + 512 + 2048 + 8 * 32 + 6 * 64 + 256 * 2 + 1024 + 8192 + 8192 + 8192 + 8192 + 256; //38022
static constexpr int MIXERCONTEXTS = (5 + 256) + 256 + 512 + 2048 + 8 * 32 + 6 * 64 + 256 * 2 + 1024 + 8192 + 8192 + 8192 + 8192 + 256; //38277
static constexpr int MIXERCONTEXTSETS = 13;

Shared * const shared;
Expand Down
2 changes: 1 addition & 1 deletion model/Image8BitModel.cpp
Expand Up @@ -415,6 +415,6 @@ void Image8BitModel::mix(Mixer &m) {
m.set(min(255, (x + line) / 32), 256);
} else {
m.add(-2048 + ((filter >> (7 - bpos)) & 1U) * 4096);
m.set(min(4, filter), MIXERINPUTS);
m.set(min(4, filter), MIXERCONTEXTS);
}
}
2 changes: 1 addition & 1 deletion paq8px.cpp
Expand Up @@ -8,7 +8,7 @@
//////////////////////// Versioning ////////////////////////////////////////

#define PROGNAME "paq8px"
#define PROGVERSION "193fix1" //update version here before publishing your changes
#define PROGVERSION "193fix2" //update version here before publishing your changes
#define PROGYEAR "2020"


Expand Down

0 comments on commit 6e1d570

Please sign in to comment.