From f0935076555cba9227ca61777b73d2ec733ae66b Mon Sep 17 00:00:00 2001 From: Georges Hatem Date: Wed, 1 May 2024 21:30:40 +0300 Subject: [PATCH] remove station name from the station record, store in separate array instead --- entries/ghatem-fpc/src/onebrc.pas | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/entries/ghatem-fpc/src/onebrc.pas b/entries/ghatem-fpc/src/onebrc.pas index 54eb819..61d3a44 100644 --- a/entries/ghatem-fpc/src/onebrc.pas +++ b/entries/ghatem-fpc/src/onebrc.pas @@ -22,11 +22,11 @@ function RoundExDouble(const ATemp: Double): Double; inline; Max: SmallInt; Count: UInt32; Sum: Integer; - Name: AnsiString; end; PStationData = ^TStationData; TKeys = array [0..45006] of Cardinal; + TStationNames = array [0..45006] of AnsiString; TValues = array [0..45006] of PStationData; { TMyDictionary } @@ -36,13 +36,16 @@ TMyDictionary = class FHashes: TKeys; FValues: TValues; FRecords: array [0..45006] of TStationData; + // store the station names outside of the record as they are filled only upon first encounter + FStationNames: TStationNames; procedure InternalFind(const aKey: Cardinal; out aFound: Boolean; out aIndex: Integer); public constructor Create; property Keys: TKeys read FHashes; + property StationNames: TStationNames read FStationNames; property Values: TValues read FValues; function TryGetValue (const aKey: Cardinal; out aValue: PStationData): Boolean; inline; - procedure Add (const aKey: Cardinal; const aValue: PStationData); inline; + procedure Add (const aKey: Cardinal; const aValue: PStationData; const aStationName: AnsiString); inline; end; { TOneBRC } @@ -139,6 +142,7 @@ procedure TMyDictionary.InternalFind(const aKey: Cardinal; out aFound: Boolean; end else begin vOffset := 1; + while True do begin // quadratic probing, by incrementing vOffset Inc (vIdx, vOffset); @@ -181,7 +185,7 @@ function TMyDictionary.TryGetValue(const aKey: Cardinal; out aValue: PStationDat aValue := FValues[vIdx]; end; -procedure TMyDictionary.Add(const aKey: Cardinal; const aValue: PStationData); +procedure TMyDictionary.Add(const aKey: Cardinal; const aValue: PStationData; const aStationName: AnsiString); var vIdx: Integer; vFound: Boolean; @@ -190,6 +194,7 @@ procedure TMyDictionary.Add(const aKey: Cardinal; const aValue: PStationData); if not vFound then begin FHashes[vIdx] := aKey; FValues[vIdx] := aValue; + FStationNames[vIdx] := aStationName; end else raise Exception.Create ('TMyDict: cannot add, duplicate key'); @@ -350,8 +355,7 @@ procedure TOneBRC.ProcessData (aThreadNb: UInt16; aStartIdx: Int64; aEndIdx: Int vData^.Max := vTemp; vData^.Sum := vTemp; vData^.Count := 1; - vData^.Name := vStation; - FStationsDicts[aThreadNb].Add (vHash, vData); + FStationsDicts[aThreadNb].Add (vHash, vData, vStation); end; // we're at a #10: next line starts at the next index @@ -376,8 +380,10 @@ procedure TOneBRC.Merge(aLeft: UInt16; aRight: UInt16); var iHash: Cardinal; vDataR: PStationData; vDataL: PStationData; + I: Integer; begin - for iHash in FStationsDicts[aRight].Keys do begin + for I := 0 to cDictSize - 1 do begin + iHash := FStationsDicts[aRight].Keys[I]; // zero means empty slot: skip if iHash = 0 then continue; @@ -387,13 +393,14 @@ procedure TOneBRC.Merge(aLeft: UInt16; aRight: UInt16); if FStationsDicts[aLeft].TryGetValue(iHash, vDataL) then begin vDataL^.Count := vDataL^.Count + vDataR^.Count; vDataL^.Sum := vDataL^.Sum + vDataR^.Sum; + if vDataR^.Max > vDataL^.Max then vDataL^.Max := vDataR^.Max; if vDataR^.Min < vDataL^.Min then vDataL^.Min := vDataR^.Min; end else begin - FStationsDicts[aLeft].Add (iHash, vDataR); + FStationsDicts[aLeft].Add (iHash, vDataR, FStationsDicts[aRight].StationNames[I]); end; end; end; @@ -419,14 +426,16 @@ procedure TOneBRC.GenerateOutput; begin vStream := TStringStream.Create; vStations := TStringList.Create; - vStations.Capacity := 45000; + vStations.Capacity := cDictSize; vStations.UseLocale := False; try vStations.BeginUpdate; - for vData in FStationsDicts[0].Values do begin + for I := 0 to cDictSize - 1 do begin + vData := FStationsDicts[0].Values[I]; // count = 0 means empty slot: skip - if vData^.Count <> 0 then - vStations.Add(vData^.Name); + if vData^.Count <> 0 then begin + vStations.Add(FStationsDicts[0].StationNames[I]); + end; end; vStations.EndUpdate;