From 9d0237fe8ffd498ac56b6eef31e77d63bb0bc1ad Mon Sep 17 00:00:00 2001 From: Georges Hatem Date: Sat, 27 Apr 2024 07:34:55 +0300 Subject: [PATCH 1/3] undo the dict-size parameter --- entries/ghatem-fpc/src/OneBRCproj.lpr | 12 ++---------- entries/ghatem-fpc/src/baseline.console.pas | 2 -- entries/ghatem-fpc/src/onebrc.pas | 12 ++++-------- 3 files changed, 6 insertions(+), 20 deletions(-) diff --git a/entries/ghatem-fpc/src/OneBRCproj.lpr b/entries/ghatem-fpc/src/OneBRCproj.lpr index 4c68867..91d6af5 100644 --- a/entries/ghatem-fpc/src/OneBRCproj.lpr +++ b/entries/ghatem-fpc/src/OneBRCproj.lpr @@ -18,7 +18,6 @@ TOneBRCApp = class(TCustomApplication) private FFileName: string; FThreadCount: Integer; - FDictSize: Integer; procedure RunOneBRC; protected procedure DoRun; override; @@ -34,7 +33,7 @@ procedure TOneBRCApp.RunOneBRC; var vOneBRC: TOneBRC; begin - vOneBRC := TOneBRC.Create (FThreadCount, FDictSize); + vOneBRC := TOneBRC.Create (FThreadCount); try try vOneBRC.mORMotMMF(FFileName); @@ -89,17 +88,15 @@ procedure TOneBRCApp.DoRun; ErrorMsg: String; begin // quick check parameters - ErrorMsg:= CheckOptions(Format('%s%s%s%s%s:',[ + ErrorMsg:= CheckOptions(Format('%s%s%s%s:',[ cShortOptHelp, cShortOptThread, - cShortOptDictSize, cShortOptVersion, cShortOptInput ]), [ cLongOptHelp, cLongOptThread+':', - cLongOptDictSize+':', cLongOptVersion, cLongOptInput+':' ] @@ -129,11 +126,6 @@ procedure TOneBRCApp.DoRun; FThreadCount := StrToInt (GetOptionValue(cShortOptThread, cLongOptThread)); end; - FDictSize := 45003; - if HasOption(cShortOptDictSize, cLongOptDictSize) then begin - FDictSize := StrToInt (GetOptionValue(cShortOptDictSize, cLongOptDictSize)); - end; - if HasOption(cShortOptInput, cLongOptInput) then begin FFileName := GetOptionValue( cShortOptInput, diff --git a/entries/ghatem-fpc/src/baseline.console.pas b/entries/ghatem-fpc/src/baseline.console.pas index 6ef14c8..bf961c0 100644 --- a/entries/ghatem-fpc/src/baseline.console.pas +++ b/entries/ghatem-fpc/src/baseline.console.pas @@ -21,8 +21,6 @@ interface cLongOptInput = 'input-file'; cShortOptThread: Char = 't'; cLongOptThread = 'threads'; - cShortOptDictSize: Char = 's'; - cLongOptDictSize = 'size'; {$ELSE} cOptionHelp: array of string = ['-h', '--help']; cOptionVersion: array of string = ['-v', '--version']; diff --git a/entries/ghatem-fpc/src/onebrc.pas b/entries/ghatem-fpc/src/onebrc.pas index c336858..d150230 100644 --- a/entries/ghatem-fpc/src/onebrc.pas +++ b/entries/ghatem-fpc/src/onebrc.pas @@ -10,10 +10,8 @@ interface function RoundExDouble(const ATemp: Double): Double; inline; -{$WRITEABLECONST ON} const - cDictSize: Integer = 45003; -{$WRITEABLECONST OFF} + cDictSize: Integer = 45007; type @@ -62,10 +60,10 @@ TOneBRC = class FThreads: array of TThread; FStationsDicts: array of TMyDictionary; - procedure ExtractLineData(const aStart: Int64; const aEnd: Int64; out aLength: ShortInt; out aTemp: SmallInt); inline; + procedure ExtractLineData(const aStart: Int64; const aEnd: Int64; out aLength: ShortInt; out aTemp: SmallInt); public - constructor Create (const aThreadCount: UInt16; const aDictSize: Integer); + constructor Create (const aThreadCount: UInt16); destructor Destroy; override; function mORMotMMF (const afilename: string): Boolean; procedure DispatchThreads; @@ -251,11 +249,9 @@ procedure TOneBRC.ExtractLineData(const aStart: Int64; const aEnd: Int64; out aL //--------------------------------------------------- -constructor TOneBRC.Create (const aThreadCount: UInt16; const aDictSize: Integer); +constructor TOneBRC.Create (const aThreadCount: UInt16); var I: UInt16; begin - cDictSize := aDictSize; - FThreadCount := aThreadCount; SetLength (FStationsDicts, aThreadCount); SetLength (FThreads, aThreadCount); From 1e200be2056a97c83d4642fb1ac98ec8b91184e5 Mon Sep 17 00:00:00 2001 From: Georges Hatem Date: Sat, 27 Apr 2024 09:40:59 +0300 Subject: [PATCH 2/3] new HASHMULT build to test a different hash function --- entries/ghatem-fpc/src/OneBRCproj.lpi | 31 ++++++++++++++++++++++++++- entries/ghatem-fpc/src/onebrc.pas | 4 ++++ 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/entries/ghatem-fpc/src/OneBRCproj.lpi b/entries/ghatem-fpc/src/OneBRCproj.lpi index 310cdad..853c254 100644 --- a/entries/ghatem-fpc/src/OneBRCproj.lpi +++ b/entries/ghatem-fpc/src/OneBRCproj.lpi @@ -15,7 +15,7 @@ - + @@ -108,6 +108,35 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/entries/ghatem-fpc/src/onebrc.pas b/entries/ghatem-fpc/src/onebrc.pas index d150230..746c3db 100644 --- a/entries/ghatem-fpc/src/onebrc.pas +++ b/entries/ghatem-fpc/src/onebrc.pas @@ -136,9 +136,13 @@ procedure TMyDictionary.InternalFind(const aKey: Cardinal; out aFound: Boolean; vDbl: Double; vOffset: Integer; begin +{$IFDEF HASHMULT} vDbl := aKey * cHashConst; vDbl := vDbl - Trunc (vDbl); vIdx := Trunc (vDbl * cDictSize); +{$ELSE} + vIdx := aKey mod cDictSize; +{$ENDIF} aFound := False; From 2c6cd054d80ed494a92fd09c0f0eeb385eab2d84 Mon Sep 17 00:00:00 2001 From: Georges Hatem Date: Sat, 27 Apr 2024 09:48:28 +0300 Subject: [PATCH 3/3] readme improvements --- entries/ghatem-fpc/README.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/entries/ghatem-fpc/README.md b/entries/ghatem-fpc/README.md index e00d4e3..0cf1e53 100644 --- a/entries/ghatem-fpc/README.md +++ b/entries/ghatem-fpc/README.md @@ -233,3 +233,12 @@ a few performance improvements, and measurements as per gcarreno on a busy machi - using mORMot's `crc32c` function instead of the native `crc32`, time dropped to 3.8 seconds - I had removed my pre-allocated records implementation. restored it in the custom dictionary class, time dropped to 3.2 seconds - skipping a few chars that we don't need to bother with, no timing yet + +## v.5 (2024-04-27) + +Various attempts at dictionary sizes, ranging from 45k to 95k. Even though larger dictionaries reduce collision tremendously, a dictionary of size 45k was still optimal. + +Another trial with various hash functions, a simple modulus vs. a slightly more complex one: modulus is slower on my PC, remains to try on the test env. +Can be tested with the HASHMULT build option + +Finally, it seems choosing a dictionary size that is a prime number is also recommended: shaves 1 second out of 20 on my PC.