From 3d6f888fb6aac8e92afae6d088d65acd52014095 Mon Sep 17 00:00:00 2001 From: Leorize Date: Mon, 17 Jan 2022 06:41:48 -0600 Subject: [PATCH] koch: generate zstd archives by default Zstandard is a fast compression algorithm with ratio rivaling that of XZ. Emperical testing on my Ryzen 5 3600 resulted in a massive improvement in compression and decompression speed, in particular: - Zstd is 13% faster than XZ for compression. - Zstd is 88% faster than XZ for decompression. - Zstd archive is 6% larger than XZ archive. All tests are done on an unix binary tarball generated by: ./koch.py unixrelease --format:tar With this change we will be trading off a small amount of space for a massive improvement in compression and decompression time. --- .github/workflows/reproducible.yml | 4 ++-- tools/koch/koch.nim | 4 ++-- tools/niminst/niminst.nim | 10 +++++++++- 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/.github/workflows/reproducible.yml b/.github/workflows/reproducible.yml index dddb00180944..231b19ac5eb0 100644 --- a/.github/workflows/reproducible.yml +++ b/.github/workflows/reproducible.yml @@ -32,11 +32,11 @@ jobs: test: - name: Source archive command: './koch.py boot -d:danger && ./koch.py csource -d:danger && ./koch.py archive' - pattern: 'build/*.tar.xz' + pattern: 'build/*.tar.zst' - name: Unix binary archive command: './koch.py unixrelease' - pattern: 'build/*.tar.xz' + pattern: 'build/*.tar.zst' # Note: this tests the zip generation and not exe generation determinism. # diff --git a/tools/koch/koch.nim b/tools/koch/koch.nim index 00cd80a54773..2aef36e0100d 100644 --- a/tools/koch/koch.nim +++ b/tools/koch/koch.nim @@ -226,7 +226,7 @@ proc archive(args: string) = nimexec("cc -r $2 --var:version=$1 --var:mingw=none --main:compiler/nim.nim scripts compiler/installer.ini" % [VersionAsString, compileNimInst]) let (commit, date) = getSourceMetadata() - exec("$# --var:version=$# --var:mingw=none --var:commit=$# --var:commitdate=$# --main:compiler/nim.nim --format:tar.xz $# archive compiler/installer.ini" % + exec("$# --var:version=$# --var:mingw=none --var:commit=$# --var:commitdate=$# --main:compiler/nim.nim --format:tar.zst $# archive compiler/installer.ini" % ["tools" / "niminst" / "niminst".exe, VersionAsString, quoteShell(commit), quoteShell(date), args]) proc buildTool(toolname, args: string) = @@ -290,7 +290,7 @@ proc binArchive(target: BinArchiveTarget, args: string) = of Windows: quoteShellCommand(["--format:zip", "--binaries:windows"]) of Unix: - quoteShellCommand(["--format:tar.xz", "--binaries:unix"]) + quoteShellCommand(["--format:tar.zst", "--binaries:unix"]) archive(binaryArgs & " " & args) diff --git a/tools/niminst/niminst.nim b/tools/niminst/niminst.nim index ca4a891d3ba8..55811acf0f2d 100644 --- a/tools/niminst/niminst.nim +++ b/tools/niminst/niminst.nim @@ -38,6 +38,7 @@ type Zip = "zip" ## A zip archive Tar = "tar" ## An uncompressed tarball TarXz = "tar.xz" ## A tarball compressed with xz + TarZst = "tar.zst" ## A tarball compressed with zstd FileCategory = enum fcWinBin, # binaries for Windows @@ -73,7 +74,7 @@ type format: ArchiveFormat const - tarFormats = {Tar..TarXz} + tarFormats = {Tar..TarZst} ## Archive formats based on tar unixDirVars: array[fcConfig..fcLib, string] = [ "$configdir", "$datadir", "$docdir", "$libdir" @@ -943,6 +944,13 @@ proc archiveDist(c: var ConfigData) = case c.format of TarXz: checkedExec("xz", "-9f", proj & ".tar") + of TarZst: + # Archive level 20 gives us roughly the same ratio as xz while having + # around 40% speed up in decompression time and a lot more in + # compression time thanks to multithreading support. + checkedExec( + "zstd", "-T0", "-20f", "--ultra", "--rm", proj & ".tar" + ) else: discard