diff --git a/entries/ghatem-fpc/README.md b/entries/ghatem-fpc/README.md index c526bd5..cdce45f 100644 --- a/entries/ghatem-fpc/README.md +++ b/entries/ghatem-fpc/README.md @@ -247,3 +247,31 @@ Another trial with various hash functions, a simple modulus vs. a slightly more Can be tested with the HASHMULT build option Finally, it seems choosing a dictionary size that is a prime number is also recommended: shaves 1 second out of 20 on my PC. + +## v.6 (2024-05-04) + +As of the latest results executed by Paweld, there are two main bottlenecks throttling the entire implementation, according to CallGrind and KCacheGrind: + - function ExtractLineData, 23% of total cost, of which 9% is due to `fpc_stackcheck` + - the hash lookup function, at 40% of total cost + +Currently, the hash lookup is done on an array of records. Increasing the array size causes slowness, and reducing it causes further collisions. +Will try to see how to reduce collisions (increase array size), all while minimizing the cost of cache misses. + +Edit: +The goal is to both: + - minimize collisions on the hashes (keys) by having a good hash function, but also increase the size of the keys storage + - minimize the size of the array of packed records + +The idea: + - the dictionary will no longer point to a PStationData pointer, but rather to an index between 0 and StationCount, where the record is stored in the array. + - -> data about the same station will be stored at the same index for all threads' data-arrays + - -> names will also be stored at that same index upon first encounter, and is common to all threads + - no locking needs to occur when the key is already found, since there is no multiple-write occurring + - the data-arrays are pre-allocated, and a atomic-counter will be incremented to know where the next element will be stored. + +Thinking again, this is likely similar to the approach mentioned by @synopse in one of his comments. + +For the ExtractLineData, three ideas to try implementing: + - avoid using a function, to get rid of the cost of stack checking + - reduce branching, I think it should be possible to go from 3 if-statements, to only 1 + - unroll the loop (although I had tried this in the past, did not show any improvements) diff --git a/entries/ghatem-fpc/src/OneBRC-nosharedname.lpr b/entries/ghatem-fpc/src/OneBRC-nosharedname.lpr index ef9301d..3d3ce50 100644 --- a/entries/ghatem-fpc/src/OneBRC-nosharedname.lpr +++ b/entries/ghatem-fpc/src/OneBRC-nosharedname.lpr @@ -151,12 +151,12 @@ function Ceiling (const ANumber: Double): Integer; inline; Result := Trunc(ANumber) + Ord(Frac(ANumber) > 0); end; -function RoundExDouble (const aTemp: Double): Double; inline; +function RoundExInteger (const aTemp: Double): Integer; inline; var vTmp: Double; begin vTmp := aTemp * 10; - Result := Ceiling (vTmp) / 10; + Result := Ceiling (vTmp); end; //--------------------------------------------------- @@ -473,8 +473,24 @@ procedure TOneBRC.MergeAll; //--------------------------------------------------- +function MyFormatInt (const aIn: SmallInt): AnsiString; inline; +begin + Result := IntToStr(aIn); + Insert ('.', Result, Length(Result)); + + if Result[1] = '.' then begin + Insert ('0', Result, 1); + exit; + end; + + if (Result[1] = '-') and (Result[2] = '.') then + Insert('0', Result, 2); +end; + +//--------------------------------------------------- + procedure TOneBRC.GenerateOutput; -var vMin, vMean, vMax: Double; +var vMean: Integer; vStream: TStringStream; I, N: Int64; vData: PStationData; @@ -507,14 +523,12 @@ procedure TOneBRC.GenerateOutput; // debatable, and the whole output generation is < 0.3 seconds, so not exactly worth it vHash := crc32c(0, @vStations[i][1], Length (vStations[i])); FStationsDicts[0].TryGetValue(vHash, vData); - vMin := vData^.Min/10; - vMax := vData^.Max/10; - vMean := RoundExDouble(vData^.Sum/vData^.Count/10); + vMean := RoundExInteger(vData^.Sum/vData^.Count/10); vStream.WriteString( - vStations[i] + '=' + FormatFloat('0.0', vMin) - + '/' + FormatFloat('0.0', vMean) - + '/' + FormatFloat('0.0', vMax) + ', ' + vStations[i] + '=' + MyFormatInt(vData^.Min) + + '/' + MyFormatInt(vMean) + + '/' + MyFormatInt(vData^.Max) + ', ' ); Inc(I); end; @@ -645,4 +659,3 @@ procedure TOneBRCApp.WriteHelp; end. {$ENDREGION} - diff --git a/entries/ghatem-fpc/src/OneBRC-smallrec.lpr b/entries/ghatem-fpc/src/OneBRC-smallrec.lpr index f46b861..e4624e4 100644 --- a/entries/ghatem-fpc/src/OneBRC-smallrec.lpr +++ b/entries/ghatem-fpc/src/OneBRC-smallrec.lpr @@ -154,12 +154,12 @@ function Ceiling (const ANumber: Double): Integer; inline; Result := Trunc(ANumber) + Ord(Frac(ANumber) > 0); end; -function RoundExDouble (const aTemp: Double): Double; inline; +function RoundExInteger (const aTemp: Double): Integer; inline; var vTmp: Double; begin vTmp := aTemp * 10; - Result := Ceiling (vTmp) / 10; + Result := Ceiling (vTmp); end; //--------------------------------------------------- @@ -487,8 +487,24 @@ procedure TOneBRC.MergeAll; //--------------------------------------------------- +function MyFormatInt (const aIn: SmallInt): AnsiString; inline; +begin + Result := IntToStr(aIn); + Insert ('.', Result, Length(Result)); + + if Result[1] = '.' then begin + Insert ('0', Result, 1); + exit; + end; + + if (Result[1] = '-') and (Result[2] = '.') then + Insert('0', Result, 2); +end; + +//--------------------------------------------------- + procedure TOneBRC.GenerateOutput; -var vMin, vMean, vMax: Double; +var vMean: Integer; vStream: TStringStream; I, N: Int64; vData: PStationData; @@ -521,14 +537,12 @@ procedure TOneBRC.GenerateOutput; // debatable, and the whole output generation is < 0.3 seconds, so not exactly worth it vHash := crc32c(0, @vStations[i][1], Length (vStations[i])); FStationsDicts[0].TryGetValue(vHash, vData); - vMin := vData^.Min/10; - vMax := vData^.Max/10; - vMean := RoundExDouble(vData^.Sum/vData^.Count/10); + vMean := RoundExInteger(vData^.Sum/vData^.Count/10); vStream.WriteString( - vStations[i] + '=' + FormatFloat('0.0', vMin) - + '/' + FormatFloat('0.0', vMean) - + '/' + FormatFloat('0.0', vMax) + ', ' + vStations[i] + '=' + MyFormatInt(vData^.Min) + + '/' + MyFormatInt(vMean) + + '/' + MyFormatInt(vData^.Max) + ', ' ); Inc(I); end; @@ -659,4 +673,3 @@ procedure TOneBRCApp.WriteHelp; end. {$ENDREGION} - diff --git a/entries/ghatem-fpc/src/OneBRC.lpr b/entries/ghatem-fpc/src/OneBRC.lpr index 09849d1..fc4cdf0 100644 --- a/entries/ghatem-fpc/src/OneBRC.lpr +++ b/entries/ghatem-fpc/src/OneBRC.lpr @@ -154,12 +154,12 @@ function Ceiling (const ANumber: Double): Integer; inline; Result := Trunc(ANumber) + Ord(Frac(ANumber) > 0); end; -function RoundExDouble (const aTemp: Double): Double; inline; +function RoundExInteger (const aTemp: Double): Integer; inline; var vTmp: Double; begin vTmp := aTemp * 10; - Result := Ceiling (vTmp) / 10; + Result := Ceiling (vTmp); end; //--------------------------------------------------- @@ -487,8 +487,24 @@ procedure TOneBRC.MergeAll; //--------------------------------------------------- +function MyFormatInt (const aIn: SmallInt): AnsiString; inline; +begin + Result := IntToStr(aIn); + Insert ('.', Result, Length(Result)); + + if Result[1] = '.' then begin + Insert ('0', Result, 1); + exit; + end; + + if (Result[1] = '-') and (Result[2] = '.') then + Insert('0', Result, 2); +end; + +//--------------------------------------------------- + procedure TOneBRC.GenerateOutput; -var vMin, vMean, vMax: Double; +var vMean: Integer; vStream: TStringStream; I, N: Int64; vData: PStationData; @@ -521,14 +537,12 @@ procedure TOneBRC.GenerateOutput; // debatable, and the whole output generation is < 0.3 seconds, so not exactly worth it vHash := crc32c(0, @vStations[i][1], Length (vStations[i])); FStationsDicts[0].TryGetValue(vHash, vData); - vMin := vData^.Min/10; - vMax := vData^.Max/10; - vMean := RoundExDouble(vData^.Sum/vData^.Count/10); + vMean := RoundExInteger(vData^.Sum/vData^.Count/10); vStream.WriteString( - vStations[i] + '=' + FormatFloat('0.0', vMin) - + '/' + FormatFloat('0.0', vMean) - + '/' + FormatFloat('0.0', vMax) + ', ' + vStations[i] + '=' + MyFormatInt(vData^.Min) + + '/' + MyFormatInt(vMean) + + '/' + MyFormatInt(vData^.Max) + ', ' ); Inc(I); end; @@ -659,4 +673,3 @@ procedure TOneBRCApp.WriteHelp; end. {$ENDREGION} -