From 295438ebecbb87928289f6228f5380aeefde0a84 Mon Sep 17 00:00:00 2001 From: git567 Date: Tue, 7 May 2024 10:21:45 +0200 Subject: [PATCH] the input file is now read via mORMot's type 'TMemoryMap', the hash code is now computed via mORMot's crc32c-function, several small improvements --- entries/hgrosser/README.md | 33 +-- .../hgrosser/src/{1brc.lpi => 1brc_old.lpi} | 10 +- .../hgrosser/src/{1brc.pas => 1brc_old.pas} | 0 entries/hgrosser/src/1brc_th.lpi | 6 + entries/hgrosser/src/1brc_th.pas | 260 +++++++++++++++--- 5 files changed, 251 insertions(+), 58 deletions(-) rename entries/hgrosser/src/{1brc.lpi => 1brc_old.lpi} (94%) rename entries/hgrosser/src/{1brc.pas => 1brc_old.pas} (100%) diff --git a/entries/hgrosser/README.md b/entries/hgrosser/README.md index 1fe78ba..a9d20ee 100644 --- a/entries/hgrosser/README.md +++ b/entries/hgrosser/README.md @@ -3,42 +3,33 @@ **1 billion row Challenge entry** ## Version -Version 2.00 (first version with threads) +Version 2.10 (version with threads, does NOT yet work for 400 stations or 5 billion rows) ## How to compile The program was developed with FPC 3.2.2 and Lazarus 2.2.4 -1brc.lpi + 1brc.pas = version without threads +It uses package **mORMot 2** to compile, which you can download from https://github.com/synopse/mORMot2 -1brc_th.lpi + 1brc_th.pas = version with threads +Files to compile: **1brc_th.lpi** and **1brc_th.pas** = this version with threads ## How to start ``` -Usage: [ [buffer size in KB]] - - thread count: allowed range = [1..32] +Usage: [bit-width for hash-list] + - thread count: allowed range = [1..64] - bit-width for hash-list: sets the size of the hash list, e.g. '16' => 65536 entries, - allowed range = [16..28], Default=18 - - buffer size in KB: allowed range = [1..2,000,000 KB], Default=128 KB -Example: hgrosser measurements.txt 32 18 128 + allowed range = [16..28], Default=16 +Example: hgrosser measurements.txt 32 16 ``` -There are no switches like `-i` etc, only 2..4 values. +There are no switches like `-i` etc, only 2 or 3 values. -### Optimizing the 2nd command line parameter -This parameter sets the thread count. With my own old CPU I could only test 1..4 threads. Although I expect 32 threads to be the fastest, I would be very interested to see the results for 1, 4, 8, 16 and 32 threads. Please use for this test the defaults for the 3rd and 4th command line parameters. - -### Optimizing the 3rd command line parameter -In theory the program should run faster with greater bit-widths for the hash-list (because of less collisions). On the computer of Gus - without threads - 18 bits was the fastest. Please try the values from 16 to 20 again and use '32' for the 2nd command line parameter and the default for the 4th command line parameter. - -### Optimizing the 4th command line parameter -After the 2nd and 3rd command line parameters had been optimized, please try with them 64, 96, 128, 192 and 256 KB as the 4th command line parameter. Thanks a lot! +There is no more need for further optimizing. Please use '32' as 2nd command line parameter and '16' as 3rd command line parameter. ## How the program works -The Program works with multi threads. +The Program works with threads. To speed things up: -- the input file is read via procedure 'blockread' ... -- into an AnsiString, so that function 'PosEx' can be used to parse it +- the input file is read via **mORMot**'s type 'TMemoryMap' - to manage the city names, a self made hash-list is used - temperatures are stored as integers (multiplied by 10) @@ -50,4 +41,4 @@ To speed things up: - Version 1.60: hash-list optimized, some minor improvements, Conditional "noCR" added - Version 1.61: Conditional "noCR" constantely enabled => input files must NOT have CR's - Version 2.00: 1st version with threads - +- Version 2.10: the input file is now read via **mORMot**'s type 'TMemoryMap', the hash code is now computed via **mORMot**'s crc32c-function, several small improvements diff --git a/entries/hgrosser/src/1brc.lpi b/entries/hgrosser/src/1brc_old.lpi similarity index 94% rename from entries/hgrosser/src/1brc.lpi rename to entries/hgrosser/src/1brc_old.lpi index 83aae8c..5564e3b 100644 --- a/entries/hgrosser/src/1brc.lpi +++ b/entries/hgrosser/src/1brc_old.lpi @@ -11,7 +11,7 @@ - + <Title Value="1brc_old"/> <UseAppBundle Value="False"/> <ResourceType Value="res"/> </General> @@ -24,7 +24,7 @@ <CompilerOptions> <Version Value="11"/> <Target> - <Filename Value="../../../bin/hgrosser"/> + <Filename Value="../../../bin/hgrosser_old"/> </Target> <SearchPaths> <IncludeFiles Value="$(ProjOutDir)"/> @@ -59,7 +59,7 @@ <CompilerOptions> <Version Value="11"/> <Target> - <Filename Value="../../../bin/hgrosser"/> + <Filename Value="../../../bin/hgrosser_old"/> </Target> <SearchPaths> <IncludeFiles Value="$(ProjOutDir)"/> @@ -104,7 +104,7 @@ </RunParams> <Units Count="1"> <Unit0> - <Filename Value="1brc.pas"/> + <Filename Value="1brc_old.pas"/> <IsPartOfProject Value="True"/> </Unit0> </Units> @@ -112,7 +112,7 @@ <CompilerOptions> <Version Value="11"/> <Target> - <Filename Value="../../../bin/hgrosser"/> + <Filename Value="../../../bin/hgrosser_old"/> </Target> <SearchPaths> <IncludeFiles Value="$(ProjOutDir)"/> diff --git a/entries/hgrosser/src/1brc.pas b/entries/hgrosser/src/1brc_old.pas similarity index 100% rename from entries/hgrosser/src/1brc.pas rename to entries/hgrosser/src/1brc_old.pas diff --git a/entries/hgrosser/src/1brc_th.lpi b/entries/hgrosser/src/1brc_th.lpi index 4c0d7e2..e9aaf77 100644 --- a/entries/hgrosser/src/1brc_th.lpi +++ b/entries/hgrosser/src/1brc_th.lpi @@ -101,6 +101,11 @@ </Mode0> </Modes> </RunParams> + <RequiredPackages Count="1"> + <Item1> + <PackageName Value="mormot2"/> + </Item1> + </RequiredPackages> <Units Count="1"> <Unit0> <Filename Value="1brc_th.pas"/> @@ -115,6 +120,7 @@ </Target> <SearchPaths> <IncludeFiles Value="$(ProjOutDir)"/> + <OtherUnitFiles Value="../../../baseline/Common"/> <UnitOutputDirectory Value="../../../bin/lib/$(TargetCPU)-$(TargetOS)"/> </SearchPaths> <Parsing> diff --git a/entries/hgrosser/src/1brc_th.pas b/entries/hgrosser/src/1brc_th.pas index f0228a6..c120add 100644 --- a/entries/hgrosser/src/1brc_th.pas +++ b/entries/hgrosser/src/1brc_th.pas @@ -4,6 +4,7 @@ { $DEFINE XTEST} {please keep this UNDEFINED / compile a safe or a fast program} {$DEFINE XINLINE} {please keep this DEFINED / use INLINE for routines or not} +{$DEFINE XMORMOT} {please keep this DEFINED / use mORMot twice or not} {$DEFINE XSTDOUT} {please keep this DEFINED / output result to STDOUT?} {$IFDEF XTEST} @@ -12,16 +13,30 @@ {$R-} {$Q-} {$S-} {$OPTIMIZATION REGVAR LEVEL4} {fast} {$ENDIF} +{$IF DEFINED(LINUX) AND DEFINED(CPU64)} + {$DEFINE LINUX64} {abbreviation} +{$ELSE} + {$UNDEF LINUX64} +{$ENDIF} + {$INLINE ON} {allows INLINE procedures} uses - {$IFDEF UNIX} - cthreads, - {$ENDIF} - sysutils, strutils, math, classes; +{$IFDEF UNIX} + cthreads, +{$ENDIF} + sysutils, strutils, math, classes +{$IFNDEF LINUX64} + ,baseline.Common // only for func RoundExDouble() +{$ENDIF} +{$IFDEF XMORMOT} + ,mormot.core.os // only for type 'TMemoryMap' + ,mormot.core.base // only for func crc32c() +{$ENDIF} + ; const - M_version = '2.00'; {version number} + M_version = '2.10'; {version number} {------------------------------ Common routines: ------------------------------} @@ -108,6 +123,7 @@ procedure error_halt(s: ansistring); {$ASMMODE INTEL} +{$IFDEF LINUX64} function FPHashX(var buf; len: dword): dword; assembler; nostackframe; inline; {returns a hash code similar to function FPHash() from FPC-Unit 'Contnrs', but with the difference, that all hash codes for all cities here are UNIQUE. @@ -126,11 +142,34 @@ function FPHashX(var buf; len: dword): dword; assembler; nostackframe; inline; JNZ @LOOP end; {FPHashX} +{$ELSE} + +function FPHashX(var buf; len: dword): dword; inline; + {returns a hash code similar to function FPHash() from FPC-Unit 'Contnrs', + but with the difference, that all hash codes for all cities here are UNIQUE} +var + p, pmax: PChar; +begin +{$PUSH} +{$Q-} {Overflow checks off} + Result := dword(-1); + p := addr(buf); + pmax := p + len; + + while (p < pmax) do + begin + Result := dword(longint(Result shl 5) - longint(Result)) xor dword(p^); + Inc(p); + end; +{$POP} +end; {FPHashX} +{$ENDIF LINUX64} + {----------------------------- consts and types: ------------------------------} const MaxCities = 41500; {max. size for array WA[]} - MaxStrLen = 56; {string enough for 1 line incl. LF} + MaxLineLen = 56; {string enough for 1 line incl. LF} OverlapLen = 60; {extension to hash list array to avoid wrap-arounds. Needs '54' for 'bitsHash=16'} LF = 10; {line separator} @@ -257,7 +296,9 @@ TMyThread = class(TThread) bufsiz: longint; bitsHash: integer); procedure Execute; override; procedure process_chunk; - +{$IFDEF XMORMOT} + procedure process_chunk_MM; +{$ENDIF} procedure myHL_init; function myHL_find_and_add(out new: boolean): dataTyp; {$IFDEF XINLINE} inline; {$ENDIF} @@ -299,10 +340,15 @@ constructor TMyThread.Create(job: integer; fspec: ansistring; p1, p2: int64; procedure TMyThread.Execute; begin - myHL_init; {initializes my hash list} - process_chunk; {reads a file chunk and processes all it's weather data} + myHL_init; {initializes my hash list} +{$IFNDEF XMORMOT} + process_chunk; {reads a file chunk and processes all it's weather data} +{$ELSE} + process_chunk_MM; {reads a file chunk and processes all it's weather data} +{$ENDIF} + debug('Job ' + IntToStr(jobNo) + ' done ' + outTimer(start0) + #13); - sort_WA; {sorts array WA[0..cntWA-1] by city names} + sort_WA; {sorts array WA[0..cntWA-1] by city names} done := True; end; {Execute} @@ -381,6 +427,75 @@ procedure TMyThread.process_chunk; Close(fi); end; {process_chunk} +const +{$IFDEF CPU32} + MaxArraySize = High(longint);; {max. allowed array-size: } +{$ELSE} + MaxArraySize = High(int64); +{$ENDIF} + +{$IFDEF XMORMOT} +type + byteArray = array[0..MaxArraySize - 1] of byte; {file buffer} + +var + MM: TMemoryMap; + +procedure TMyThread.process_chunk_MM; + {reads positions [pos1..pos2] from file 'fspecIn' via Mormot-MemMap and + processes all weather measurements; It is assured, that position 'pos1' is + the 1st byte of a city name and that position 'pos2' is a LF} +var + pBA: ^byteArray; + p1, p2: int64; + len: longint; + p, i: integer; + neg: boolean; +begin + pBA := pointer(MM.Buffer); {buffer-startaddress} + p1 := pos1; + + repeat + p2 := succ(IndexByte(pBA^[p1], succ(pos2 - p1), LF)); {seeks next 'LF'} +{$IFDEF XTEST} + if p2 = 0 then error_halt('p2=0 in process_chunk_MM()'); +{$ENDIF} + Inc(p2, pred(p1)); {=> p2:=position of 'LF'} + len := p2 - p1; {length for city name and temperature} + city[0] := chr(len); + move(pBA^[p1], city[1], len); + {extract temperature: } + p := succ(IndexByte(city[1], length(city), Ord(';'))); {seeks for ';'} +{$IFDEF XTEST} + if p = 0 then error_halt('Internal Error: p=0 in city="' + city + '"'); +{$ENDIF} + i := succ(p); {get numeric temperature: } + if city[i] = '-' then + begin + Inc(i); + neg := True; + end + else + neg := False; + city[length(city) - 1] := city[length(city)]; {skip the '.': } + Dec(city[0]); + tmp := 0; + + while i <= length(city) do + begin + tmp := 10 * tmp + Ord(city[i]) - $30; + Inc(i); + end; + if neg then tmp := -tmp; + + city[0] := chr(p - 1); {cityname without ';' and temperature} + myHL_addCityTemp; {stores 'city' and 'tmp' in array WA[]} + p1 := succ(p2); {'p1' points to the 1. Byte after the LF} + + until p1 > pos2; +end; {process_chunk_MM} +{$ENDIF XMORMOT} + procedure TMyThread.myHL_init; {initializes my hash list} begin @@ -404,16 +519,20 @@ function TMyThread.myHL_find_and_add(out new: boolean): dataTyp; O: new: was a new entry in array WA[] added? ex I: WA[cntWA++] ATTENTION: this hash list requires, that all hash codes from function - FPHashX() are UNIQUE for all city names!} + FPHashX() or crc32c() are UNIQUE for all city names!} var p: ^hashRec; h: hashTyp; i: longint; begin - h := FPHashX(city[1], length(city)); {compute unique hash code for 'city'} +{$IFNDEF XMORMOT} + h := FPHashX(city[1],length(city)); {compute unique hash code for 'city': } +{$ELSE} + h := crc32c(0,@city[1],length(city)); +{$ENDIF} p := @HL[h and AndHash]; {get Index in HL[0..AndHash]} - if not Assigned(p^.hdata) then {if this entry in HL[] is still free: } + if not Assigned(p^.hdata) then {if this entry in HL[] is still free: } begin new := True; p^.hhash := h; @@ -492,8 +611,7 @@ procedure TMyThread.sort_WA; end; function RoundExString(x: double): NumStr; -{$IFDEF XINLINE} inline; -{$ENDIF} +{$IFDEF XINLINE} inline; {$ENDIF} {new official rounding function} var V, Q, R: integer; @@ -514,12 +632,22 @@ function RoundExString(x: double): NumStr; end; {RoundExString} function myRound(sum, Count: longint): NumStr; -{$IFDEF XINLINE} inline; -{$ENDIF} +{$IFDEF XINLINE} inline; {$ENDIF} {using new official rounding function from 26.3.24} +{$IFNDEF LINUX64} +var + s: NumStr; + d: double; +{$ENDIF} begin +{$IFDEF LINUX64} exit(RoundExString(sum / (Count * 10))); -end; +{$ELSE} + d := baseline.Common.RoundExDouble(sum / (Count * 10)); + str(d: 0: 1, s); + exit(s); +{$ENDIF} +end; {myRound} procedure create_output; {creates the result of this program: either to STDOUT or into a textfile} @@ -584,12 +712,12 @@ procedure process_measurements(fspecIn: ansistring; buflen: longint; {starts all threads to do their jobs} var fi: file; - Buf: array[1..MaxStrLen] of byte; + Buf: array[1..MaxLineLen] of byte; size, p1, p2, p: int64; j, dones: integer; oldFileMode: byte; begin - start0 := GetTickCount64; {start Timer} + start0 := GetTickCount64; {start main Timer} oldFileMode := FileMode; {save value} FileMode := fmShareDenyNone; {open file in read-only Mode, allow others} @@ -612,8 +740,8 @@ procedure process_measurements(fspecIn: ansistring; buflen: longint; else begin seek(fi, p2); {increase 'p2' until 1st 'LF': } - blockread(fi, Buf, MaxStrLen); - p := IndexByte(Buf, MaxStrLen, LF); + blockread(fi, Buf, MaxLineLen); + p := IndexByte(Buf, MaxLineLen, LF); if p < 0 then error_halt('p < 0 in process_measurements()'); Inc(p2, p); end; @@ -628,20 +756,71 @@ procedure process_measurements(fspecIn: ansistring; buflen: longint; for j := 1 to threads do if TRA[j].done then Inc(dones); until dones = threads; - create_output; {creates the result of this program to STDOUT} + create_output; {creates + sends the result of this program to STDOUT} for j := 1 to threads do TRA[j].Free; {frees all threads} debug('Elapsed time: ' + outTimer(start0)); end; {process_measurements} +{$IFDEF XMORMOT} +procedure process_measurements_MM(fspecIn: ansistring; bitsHash: integer); +{starts all threads to do their jobs (with Mormot-MemMap function)} +var + pBA: ^byteArray; + s: string; + size, p1, p2, p: int64; + j, dones: integer; + ok: boolean; +begin + start0 := GetTickCount64; {start main Timer} + ok := MM.Map(fspecIn); {init Mormot-MemMap} + if not ok then error_halt('TMemoryMap.Map() failed'); + + pBA := pointer(MM.Buffer); {buffer-startaddress} + size := MM.Size; {filesize} + debug('MM: fspec=' + SysUtils.ExtractFileName(fspecIn) + ' filesize=' + + IntToStr3(size) + ' bitsHash=' + IntToStr(bitsHash)); + + SetLength(TRA, threads + 1); {TRA[0] is unused} + + p2 := -1; + for j := 1 to threads do + begin + p1 := p2 + 1; + p2 := j * round(size / threads); + if j = threads then p2 := size - 1 {last job => read until end of file} + else + begin + p := IndexByte(pBA^[p2], MaxLineLen, LF); {seeks next LF} + if p < 0 then error_halt('p < 0 in process_measurements_MM()'); + Inc(p2, p); + end; + TRA[j] := TMyThread.Create(j, fspecIn, p1, p2, 1, bitsHash); + end; + + repeat {wait until all jobs are done: } + dones := 0; + sleep(1); + for j := 1 to threads do if TRA[j].done then Inc(dones); + until dones = threads; + + MM.UnMap; {free 'MM'} + + create_output; {creates + sends the result of this program to STDOUT} + + for j := 1 to threads do TRA[j].Free; + debug('Elapsed time: ' + outTimer(start0)); +end; {process_measurements_MM} +{$ENDIF XMORMOT} + {-------------------------- command line parameters: --------------------------} const ThreadsMin = 1; {minimum allowed threads} - ThreadsMax = 32; {maximum allowed threads} + ThreadsMax = 64; {maximum allowed threads} HashBitsMin = 16; {Min. bit-width for the hash-array HL[]} HashBitsMax = 28; {Max. bit-width for the hash-array HL[]} - HashBitsDef = 18; {Default bit-width for the hash-array HL[]} + HashBitsDef = 16; {Default bit-width for the hash-array HL[]} BufLenMin_KB = 1; {Min. file buffer size in KB} BufLenMax_KB = 2000000; {Max. file buffer size in KB} BufLenDef_KB = 128; {Default file buffer size in KB} @@ -651,21 +830,22 @@ procedure syntax_halt; begin writeln('1 billion row Challenge program with threads by Hartmut Grosser, version ', M_version); - writeln('Usage: <path to input file> <thread count> [<bit-width for hash-list> ', - '[buffer size in KB]]'); + writeln('Usage: <path to input file> <thread count> [<bit-width for hash-list>', + {$IFNDEF XMORMOT} ' [buffer size in KB]', {$ENDIF} ']'); writeln(' - thread count: allowed range = [', IntToStr(ThreadsMin), '..', IntToStr(ThreadsMax), ']'); writeln(' - bit-width for hash-list: sets the size of the hash list, ', 'e.g. ''16'' => 65536 entries,'); writeln(' allowed range = [', IntToStr(HashBitsMin), '..', IntToStr(HashBitsMax), '], Default=', IntToStr(HashBitsDef)); +{$IFNDEF XMORMOT} writeln(' - buffer size in KB: allowed range = [', IntToStr(BufLenMin_KB), - '..', IntToStr3(BufLenMax_KB), ' KB], Default=', - IntToStr(BufLenDef_KB), ' KB'); + '..', IntToStr3(BufLenMax_KB), ' KB], Default=', IntToStr(BufLenDef_KB), ' KB'); +{$ENDIF} writeln('Example: ', SysUtils.ExtractFileName(ParamStr(0)), - ' measurements.txt 32 18 128'); + ' measurements.txt 32 16' {$IFNDEF XMORMOT} , ' 128' {$ENDIF}); halt(1); -end; +end; {syntax_halt} var fspecInp: ansistring; {filespec of input file} @@ -674,11 +854,17 @@ procedure syntax_halt; procedure take_command_line_parameters; {checks command line parameters. Errors => show message and halt} +const +{$IFNDEF XMORMOT} + MaxP = 4; {with file buffer size} +{$ELSE} + MaxP = 3; {without file buffer size} +{$ENDIF} var p, code: integer; begin p := ParamCount; - if (p < 2) or (p > 4) then syntax_halt; + if (p < 2) or (p > MaxP) then syntax_halt; fspecInp := ParamStr(1); hashBits := HashBitsDef; @@ -722,12 +908,22 @@ procedure take_command_line_parameters; {$ELSE} debug('without INLINE'); {$ENDIF} +{$IFNDEF XMORMOT} + debug('without mORMot'); +{$ELSE} + debug('with mORMot'); +{$ENDIF} {$IFDEF XSTDOUT} debug('Output=STDOUT'); {$ELSE} debug('Output=Textfile'); {$ENDIF} +{$IFNDEF XMORMOT} process_measurements(fspecInp, buflen, hashBits); +{$ELSE} + process_measurements_MM(fspecInp, hashBits); +{$ENDIF} debug(''); end. +