diff --git a/entries/ghatem-fpc/src/OneBRC-dirty.lpi b/entries/ghatem-fpc/src/OneBRC-largerec.lpi similarity index 94% rename from entries/ghatem-fpc/src/OneBRC-dirty.lpi rename to entries/ghatem-fpc/src/OneBRC-largerec.lpi index be8f46b..188a364 100644 --- a/entries/ghatem-fpc/src/OneBRC-dirty.lpi +++ b/entries/ghatem-fpc/src/OneBRC-largerec.lpi @@ -22,7 +22,7 @@ - + @@ -60,7 +60,7 @@ - + @@ -89,7 +89,7 @@ - + @@ -126,7 +126,7 @@ - + @@ -135,7 +135,7 @@ - + diff --git a/entries/ghatem-fpc/src/OneBRC-dirty.lpr b/entries/ghatem-fpc/src/OneBRC-largerec.lpr similarity index 84% rename from entries/ghatem-fpc/src/OneBRC-dirty.lpr rename to entries/ghatem-fpc/src/OneBRC-largerec.lpr index df4b840..454c5aa 100644 --- a/entries/ghatem-fpc/src/OneBRC-dirty.lpr +++ b/entries/ghatem-fpc/src/OneBRC-largerec.lpr @@ -11,7 +11,7 @@ Baseline.Console; const - cNumStations = 41343; + cNumStations = 41343; // as per the input file cDictSize = 248071; // numstations * 6, next prime number cThreadCount = 32; @@ -41,7 +41,7 @@ TOneBRCApp = class(TCustomApplication) TStationData = packed record Min: SmallInt; Max: SmallInt; - Count: UInt16; + Count: UInt32; Sum: Integer; end; PStationData = ^TStationData; @@ -59,18 +59,42 @@ TOneBRCApp = class(TCustomApplication) TBRCDictionary = class private + // keys/values for a Large dictionary: + // values will be the index where the actual record is stored FHashes: THashes; FIndexes: TIndexes; - FStationNames: TStationNames; + + // where the actual records are stored: + // - each thread holds its own data + // - for each thread, pre-allocate as much space as needed + // - all threads store their data at the exact same index FThreadData: array [0..cThreadCount-1] of array [0..cNumStations-1] of TStationData; + + // station names are also shared, not lock-protected (in the worst case, the value is written twice) + // stored separately as it is rarely needed + FStationNames: TStationNames; + + // points to the next slot in FThreadData where we should fill a newly encountered station FCounter: TStationCount; + + // exclusively to protect FCounter from concurrent-writes FCS: TCriticalSection; + + // searches for a given key, returns if found the key and the storage index + // (or, if not found, which index to use next) procedure InternalFind(const aKey: Cardinal; out aFound: Boolean; out aIndex: THashSize); + public constructor Create; destructor Destroy; override; + + // simple wrapper to find station-record pointers function TryGetValue (const aKey: Cardinal; const aThreadNb: TThreadCount; out aValue: PStationData): Boolean; inline; + + // multithread-unprotected: adds a firstly-encountered station-data (temp, name) procedure Add (const aHashIdx: THashSize; const aThreadNb: TThreadCount; const aTemp: SmallInt; const aStationName: AnsiString); inline; + + // multithread-protected: safely assign a slot for a given key function AtomicRegisterHash (const aKey: Cardinal): THashSize; end; @@ -89,17 +113,27 @@ TOneBRC = class FThreads: array of TThread; FDictionary: TBRCDictionary; + // for a line between idx [aStart; aEnd], returns the station-name length, and the integer-value of temperature procedure ExtractLineData(const aStart: Int64; const aEnd: Int64; out aLength: ShortInt; out aTemp: SmallInt); inline; public constructor Create (const aThreadCount: TThreadCount); destructor Destroy; override; function mORMotMMF (const afilename: string): Boolean; + + // initial thread-spawn procedure DispatchThreads; + + // await for all threads to complete work procedure WaitAll; + + // executed by each thread to process data in the given range procedure ProcessData (aThreadNb: TThreadCount; aStartIdx: Int64; aEndIdx: Int64); + + // merge data from all threads procedure Merge (aLeft: TThreadCount; aRight: TThreadCount); procedure MergeAll; + procedure GenerateOutput; property DataSize: Int64 read FDataSize; end; @@ -160,6 +194,9 @@ function TBRCDictionary.AtomicRegisterHash(const aKey: Cardinal): THashSize; var vFound: Boolean; begin + // must call InternalFind again, within the critical-section, + // to ensure the slot was not taken by another thread + // this function should execute only once per station, so at most 41343 times FCS.Acquire; try InternalFind (aKey, vFound, Result); @@ -177,14 +214,19 @@ procedure TBRCDictionary.InternalFind(const aKey: Cardinal; out aFound: Boolean; var vIdx: Integer; vOffset: Integer; begin + // Lemire hashing: faster to ocmpute than modulus, but more collisions from trials + // https://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/ + // thanks Arnaud for the suggestion vIdx := aKey * cDictSize shr 32; if FHashes[vIdx] = 0 then begin + // found match aIndex := vIdx; aFound := False; exit; end; if FHashes[vIdx] = aKey then begin + // found empty bucket to use aIndex := vIdx; aFound := True; exit; @@ -193,7 +235,7 @@ procedure TBRCDictionary.InternalFind(const aKey: Cardinal; out aFound: Boolean; vOffset := 1; while True do begin - // quadratic probing, by incrementing vOffset + // linear (not quadratic) probing, with continous increments to minimize clusters Inc (vIdx, vOffset); Inc (vOffset); @@ -293,18 +335,28 @@ procedure TOneBRC.ExtractLineData(const aStart: Int64; const aEnd: Int64; out aL // repeat with the remaining digits, multiplying by 10^x (skip the '.') // multiply by -1 upon reaching a '-' - //aTemp := (Ord(FData[aEnd]) - c0ascii) - // + 10 *(Ord(FData[aEnd-2]) - c0ascii); - //vDigit := Ord(FData[aEnd-3]); - //if (vDigit >= c0ascii) and (vDigit <= c9ascii) then begin - // aTemp := aTemp + 100*(Ord(FData[aEnd-3]) - c0ascii); - // vDigit := Ord(FData[aEnd-4]); - // if vDigit = cNegAscii then - // aTemp := -aTemp; - //end - //else if vDigit = cNegAscii then - // aTemp := -aTemp; - + { + aTemp := (Ord(FData[aEnd]) - c0ascii) + + 10 *(Ord(FData[aEnd-2]) - c0ascii); + vDigit := Ord(FData[aEnd-3]); + if (vDigit >= c0ascii) and (vDigit <= c9ascii) then begin + aTemp := aTemp + 100*(Ord(FData[aEnd-3]) - c0ascii); + vDigit := Ord(FData[aEnd-4]); + if vDigit = cNegAscii then + aTemp := -aTemp; + end + else if vDigit = cNegAscii then + aTemp := -aTemp; + } + + //========== + // entire computation is branchless (for readability, see version above) + // no intermediary results also showed better performance + + // 0 if - + // 1 if + + // convert range [0;1] to [-1;1] for branchless negation when needed + // if there is a 3rd digit (*100), add it, otherwise multiply by 0 to cancel it out vIsNeg := Ord (FData[J+1] <> '-'); aTemp := ( @@ -312,10 +364,6 @@ procedure TOneBRC.ExtractLineData(const aStart: Int64; const aEnd: Int64; out aL + 10 *(Ord(FData[aEnd-2]) - c0ascii) + Ord ((J+4 - vIsNeg < aEnd)) * 100*(Ord(FData[aEnd-3]) - c0ascii) ) * (vIsNeg * 2 - 1); - //if (J+4 - vIsNeg < aEnd) then begin - //aTemp := aTemp - //end; - //aTemp := (vIsNeg * 2 - 1) * aTemp; end; //--------------------------------------------------- @@ -350,6 +398,7 @@ procedure TOneBRC.DispatchThreads; I: TThreadCount; vRange: Int64; begin + // distribute input equally across available threads vRange := Trunc (FDataSize / FThreadCount); for I := 0 to FThreadCount - 1 do begin @@ -380,6 +429,7 @@ procedure TOneBRC.ProcessData (aThreadNb: TThreadCount; aStartIdx: Int64; aEndId vLenStationName: ShortInt; vFound: Boolean; begin + // initialize min/max, else we may get zeroes (due to our Add that fires once per station across all threads) for I := 0 to cNumStations - 1 do begin FDictionary.FThreadData[aThreadNb][I].Max := -2000; FDictionary.FThreadData[aThreadNb][I].Min := 2000; @@ -422,7 +472,6 @@ procedure TOneBRC.ProcessData (aThreadNb: TThreadCount; aStartIdx: Int64; aEndId FDictionary.InternalFind (vHash, vFound, vHashIdx); - if vFound then begin vData := @FDictionary.FThreadData[aThreadNb][FDictionary.FIndexes[vHashIdx]]; if vTemp < vData^.Min then @@ -435,9 +484,11 @@ procedure TOneBRC.ProcessData (aThreadNb: TThreadCount; aStartIdx: Int64; aEndId else begin // pre-allocated array of records instead of on-the-go allocation vHashIdx := FDictionary.AtomicRegisterHash (vHash); + // SetString done only once per station name, for later sorting SetString(vStation, pAnsiChar(@FData[vLineStart]), vLenStationName); + // data can be safely added at the given index, without locking FDictionary.Add(vHashIdx, aThreadNb, vTemp, vStation); end; @@ -460,6 +511,7 @@ procedure TOneBRC.Merge(aLeft: TThreadCount; aRight: TThreadCount); vDataL: PStationData; I: Integer; begin + // accumulate data into Left for I := 0 to cNumStations - 1 do begin vDataR := @FDictionary.FThreadData[aRight][I]; vDataL := @FDictionary.FThreadData[aLeft][I]; @@ -478,6 +530,7 @@ procedure TOneBRC.MergeAll; var I: TThreadCount; begin + // all thread-data is accumulated into index 0 for I := 1 to FThreadCount - 1 do begin Merge (0, I); end; @@ -487,6 +540,8 @@ procedure TOneBRC.MergeAll; function MyFormatInt (const aIn: SmallInt): AnsiString; inline; begin + // much faster than FormatFloat + // oddly, IntToStr does not include leading zeroes for both pos and neg numbers Result := IntToStr(aIn); Insert ('.', Result, Length(Result)); diff --git a/entries/ghatem-fpc/src/OneBRC-nosharedname.lpi b/entries/ghatem-fpc/src/OneBRC-nosharedname.lpi deleted file mode 100644 index 87fff28..0000000 --- a/entries/ghatem-fpc/src/OneBRC-nosharedname.lpi +++ /dev/null @@ -1,166 +0,0 @@ - - - - - - - - - - - - - - - <UseAppBundle Value="False"/> - <ResourceType Value="res"/> - </General> - <BuildModes Count="4"> - <Item1 Name="Default" Default="True"/> - <Item2 Name="Debug"> - <CompilerOptions> - <Version Value="11"/> - <PathDelim Value="\"/> - <Target> - <Filename Value="..\..\..\bin\ghatem-nosharedname"/> - </Target> - <SearchPaths> - <IncludeFiles Value="$(ProjOutDir)"/> - <UnitOutputDirectory Value="..\..\..\bin\lib\$(TargetCPU)-$(TargetOS)"/> - </SearchPaths> - <Parsing> - <SyntaxOptions> - <IncludeAssertionCode Value="True"/> - </SyntaxOptions> - </Parsing> - <CodeGeneration> - <Checks> - <IOChecks Value="True"/> - <RangeChecks Value="True"/> - <OverflowChecks Value="True"/> - <StackChecks Value="True"/> - </Checks> - <VerifyObjMethodCallValidity Value="True"/> - </CodeGeneration> - <Linking> - <Debugging> - <DebugInfoType Value="dsDwarf3"/> - <TrashVariables Value="True"/> - <UseValgrind Value="True"/> - <UseExternalDbgSyms Value="True"/> - </Debugging> - </Linking> - <Other> - <CustomOptions Value="-dDEBUG"/> - </Other> - </CompilerOptions> - </Item2> - <Item3 Name="Release"> - <CompilerOptions> - <Version Value="11"/> - <PathDelim Value="\"/> - <Target> - <Filename Value="..\..\..\bin\ghatem-nosharedname"/> - </Target> - <SearchPaths> - <IncludeFiles Value="$(ProjOutDir)"/> - <UnitOutputDirectory Value="..\..\..\bin\lib\$(TargetCPU)-$(TargetOS)"/> - </SearchPaths> - <CodeGeneration> - <SmartLinkUnit Value="True"/> - <Optimizations> - <OptimizationLevel Value="3"/> - </Optimizations> - </CodeGeneration> - <Linking> - <Debugging> - <GenerateDebugInfo Value="False"/> - <RunWithoutDebug Value="True"/> - </Debugging> - <LinkSmart Value="True"/> - </Linking> - <Other> - <CustomOptions Value="-dRELEASE"/> - </Other> - </CompilerOptions> - </Item3> - <Item4 Name="Valgrind"> - <CompilerOptions> - <Version Value="11"/> - <PathDelim Value="\"/> - <Target> - <Filename Value="..\..\..\bin\ghatem-nosharedname"/> - </Target> - <SearchPaths> - <IncludeFiles Value="$(ProjOutDir)"/> - <UnitOutputDirectory Value="..\..\..\bin\lib\$(TargetCPU)-$(TargetOS)"/> - </SearchPaths> - <Linking> - <Debugging> - <DebugInfoType Value="dsDwarf3"/> - <TrashVariables Value="True"/> - <UseValgrind Value="True"/> - <UseExternalDbgSyms Value="True"/> - </Debugging> - </Linking> - <Other> - <CustomOptions Value="-dDEBUG"/> - </Other> - </CompilerOptions> - </Item4> - </BuildModes> - <PublishOptions> - <Version Value="2"/> - <UseFileFilters Value="True"/> - </PublishOptions> - <RunParams> - <FormatVersion Value="2"/> - </RunParams> - <RequiredPackages Count="2"> - <Item1> - <PackageName Value="mormot2"/> - </Item1> - <Item2> - <PackageName Value="LCLBase"/> - </Item2> - </RequiredPackages> - <Units Count="1"> - <Unit0> - <Filename Value="OneBRC-nosharedname.lpr"/> - <IsPartOfProject Value="True"/> - </Unit0> - </Units> - </ProjectOptions> - <CompilerOptions> - <Version Value="11"/> - <PathDelim Value="\"/> - <Target> - <Filename Value="..\..\..\bin\ghatem-nosharedname"/> - </Target> - <SearchPaths> - <IncludeFiles Value="$(ProjOutDir)"/> - <UnitOutputDirectory Value="..\..\..\bin\lib\$(TargetCPU)-$(TargetOS)"/> - </SearchPaths> - <Linking> - <Debugging> - <DebugInfoType Value="dsDwarf3"/> - </Debugging> - </Linking> - <Other> - <CustomOptions Value="-dDEBUG"/> - </Other> - </CompilerOptions> - <Debugging> - <Exceptions Count="3"> - <Item1> - <Name Value="EAbort"/> - </Item1> - <Item2> - <Name Value="ECodetoolError"/> - </Item2> - <Item3> - <Name Value="EFOpenError"/> - </Item3> - </Exceptions> - </Debugging> -</CONFIG> diff --git a/entries/ghatem-fpc/src/OneBRC-nosharedname.lpr b/entries/ghatem-fpc/src/OneBRC-nosharedname.lpr deleted file mode 100644 index 3d3ce50..0000000 --- a/entries/ghatem-fpc/src/OneBRC-nosharedname.lpr +++ /dev/null @@ -1,661 +0,0 @@ -program OneBRC; - -{$mode objfpc}{$H+} - -uses - {$IFDEF UNIX} - cthreads, - {$ENDIF} - Classes, SysUtils, CustApp, Lclintf, UTF8Process, - mormot.core.os, mormot.core.base, - Baseline.Console; - -const - cDictSize: Integer = 45007; - - c0ascii: ShortInt = 48; - c9ascii: ShortInt = 57; - cNegAscii: ShortInt = 45; - -type - //--------------------------------------- - { TOneBRCApp } - - TOneBRCApp = class(TCustomApplication) - private - FFileName: string; - FThreadCount: Integer; - procedure RunOneBRC; - protected - procedure DoRun; override; - public - constructor Create(aOwner: TComponent); override; - destructor Destroy; override; - procedure WriteHelp; virtual; - end; - - //--------------------------------------- - - // record is packed to minimize its size - // use pointers to avoid passing entire records around - TStationData = packed record - Min: SmallInt; - Max: SmallInt; - Count: UInt32; - Sum: Integer; - end; - PStationData = ^TStationData; - - TKeys = array [0..45006] of Cardinal; - TStationNames = array [0..45006] of AnsiString; - TValues = array [0..45006] of PStationData; - - //--------------------------------------- - { TMyDictionary } - - TMyDictionary = class - private - FHashes: TKeys; - FValues: TValues; - FRecords: array [0..45006] of TStationData; - // store the station names outside of the record as they are filled only upon first encounter - FStationNames: TStationNames; - procedure InternalFind(const aKey: Cardinal; out aFound: Boolean; out aIndex: Integer); - public - constructor Create; - property Keys: TKeys read FHashes; - property StationNames: TStationNames read FStationNames; - property Values: TValues read FValues; - function TryGetValue (const aKey: Cardinal; out aValue: PStationData): Boolean; inline; - procedure Add (const aKey: Cardinal; const aValue: PStationData; const aStationName: AnsiString); inline; - procedure AddName (const aKey: Cardinal; const aStationName: AnsiString); inline; - end; - - //--------------------------------------- - { TOneBRC } - - TOneBRC = class - private - // mormot memory map for fast bytes read. - // I tried using CreateFileMapping and failed, more details in unit FileReader - FMemoryMap: TMemoryMap; - FData: pAnsiChar; - FDataSize: Int64; - - FThreadCount: UInt16; - FThreads: array of TThread; - FStationsDicts: array of TMyDictionary; - - procedure ExtractLineData(const aStart: Int64; const aEnd: Int64; out aLength: ShortInt; out aTemp: SmallInt); inline; - - public - constructor Create (const aThreadCount: UInt16); - destructor Destroy; override; - function mORMotMMF (const afilename: string): Boolean; - procedure DispatchThreads; - procedure WaitAll; - procedure ProcessData (aThreadNb: UInt16; aStartIdx: Int64; aEndIdx: Int64); - procedure Merge (aLeft: UInt16; aRight: UInt16); - procedure MergeAll; - procedure GenerateOutput; - property DataSize: Int64 read FDataSize; - end; - - //--------------------------------------- - { TBRCThread } - - TThreadProc = procedure (aThreadNb: UInt16; aStartIdx: Int64; aEndIdx: Int64) of object; - - TBRCThread = class (TThread) - private - FProc: TThreadProc; - FThreadNb: UInt16; - FStart: Int64; - FEnd: Int64; - protected - procedure Execute; override; - public - constructor Create (aProc: TThreadProc; aThreadNb: UInt16; aStart: Int64; aEnd: Int64); - end; - -{ TOneBRCApp } - -procedure TOneBRCApp.RunOneBRC; -var - vOneBRC: TOneBRC; -begin - vOneBRC := TOneBRC.Create (FThreadCount); - try - try - vOneBRC.mORMotMMF(FFileName); - vOneBRC.DispatchThreads; - vOneBRC.WaitAll; - vOneBRC.MergeAll; - vOneBRC.GenerateOutput; - except - on E: Exception do - begin - WriteLn(Format(rsErrorMessage, [ E.Message ])); - ReadLn; - end; - end; - finally - vOneBRC.Free; - end; -end; - -//--------------------------------------------------- - -function Ceiling (const ANumber: Double): Integer; inline; -begin - Result := Trunc(ANumber) + Ord(Frac(ANumber) > 0); -end; - -function RoundExInteger (const aTemp: Double): Integer; inline; -var - vTmp: Double; -begin - vTmp := aTemp * 10; - Result := Ceiling (vTmp); -end; - -//--------------------------------------------------- -{ TOneBRC } - -function Compare(AList: TStringList; AIndex1, AIndex2: Integer): Integer; -var - Str1, Str2: String; -begin - Str1 := AList.Strings[AIndex1]; - Str2 := AList.Strings[AIndex2]; - Result := CompareStr(Str1, Str2); -end; - -{ TMyDictionary } - -procedure TMyDictionary.InternalFind(const aKey: Cardinal; out aFound: Boolean; out aIndex: Integer); -var vIdx: Integer; - vOffset: Integer; -begin - vIdx := aKey * cDictSize shr 32; - - if FHashes[vIdx] = aKey then begin - aIndex := vIdx; - aFound := True; - end - else begin - vOffset := 1; - - while True do begin - // quadratic probing, by incrementing vOffset - Inc (vIdx, vOffset); - Inc (vOffset); - - // exceeded boundary, loop back - if vIdx >= cDictSize then - Dec (vIdx, cDictSize); - - if FHashes[vIdx] = aKey then begin - // found match - aIndex := vIdx; - aFound := True; - break; - end; - if FHashes[vIdx] = 0 then begin - // found empty bucket to use - aIndex := vIdx; - aFound := False; - break; - end; - end; - end; -end; - -constructor TMyDictionary.Create; -var - I: Integer; -begin - for I := 0 to cDictSize - 1 do begin - FValues[I] := @FRecords[I]; - end; -end; - -function TMyDictionary.TryGetValue(const aKey: Cardinal; out aValue: PStationData): Boolean; -var - vIdx: Integer; -begin - InternalFind (aKey, Result, vIdx); - aValue := FValues[vIdx]; -end; - -procedure TMyDictionary.Add(const aKey: Cardinal; const aValue: PStationData; const aStationName: AnsiString); -var - vIdx: Integer; - vFound: Boolean; -begin - InternalFind (aKey, vFound, vIdx); - if not vFound then begin - FHashes[vIdx] := aKey; - FValues[vIdx] := aValue; - FStationNames[vIdx] := aStationName; - end - else - raise Exception.Create ('TMyDict: cannot add, duplicate key'); -end; - -procedure TMyDictionary.AddName (const aKey: Cardinal; const aStationName: AnsiString); -var - vIdx: Integer; - vFound: Boolean; -begin - InternalFind (aKey, vFound, vIdx); - if not vFound then begin - FHashes[vIdx] := aKey; - FStationNames[vIdx] := aStationName; - end - else - raise Exception.Create ('TMyDict: cannot add, duplicate key'); -end; - -procedure TOneBRC.ExtractLineData(const aStart: Int64; const aEnd: Int64; out aLength: ShortInt; out aTemp: SmallInt); -// given a line of data, extract the length of station name, and temperature as Integer. -var - I: Int64; - vDigit: UInt8; -begin - // we're looking for the semicolon ';', but backwards since there's fewer characters to traverse - // a thermo measurement must be at least 3 characters long (one decimal, one period, and the units) - // e.g. input: Rock Hill;-54.3 - // can safely skip 3: ^^^ - I := aEnd - 3; - - while True do begin - if FData[I] = ';' then - break; - Dec(I); - end; - // I is the position of the semi-colon, extract what's before and after it - - // length of the station name string - aLength := i - aStart; - - // ASCII of 3 is 51. - // subtract ASCII of 0 to get the digit 3 - // repeat with the remaining digits, multiplying by 10^x (skip the '.') - // multiply by -1 upon reaching a '-' - aTemp := (Ord(FData[aEnd]) - c0ascii) - + 10 *(Ord(FData[aEnd-2]) - c0ascii); - vDigit := Ord(FData[aEnd-3]); - if (vDigit >= c0ascii) and (vDigit <= c9ascii) then begin - aTemp := aTemp + 100*(Ord(FData[aEnd-3]) - c0ascii); - vDigit := Ord(FData[aEnd-4]); - if vDigit = cNegAscii then - aTemp := -aTemp; - end - else if vDigit = cNegAscii then - aTemp := -aTemp; -end; - -//--------------------------------------------------- - -constructor TOneBRC.Create (const aThreadCount: UInt16); -var I: UInt16; -begin - FThreadCount := aThreadCount; - SetLength (FStationsDicts, aThreadCount); - SetLength (FThreads, aThreadCount); - - for I := 0 to aThreadCount - 1 do begin - FStationsDicts[I] := TMyDictionary.Create; - end; -end; - -destructor TOneBRC.Destroy; -var I: UInt16; -begin - for I := 0 to FThreadCount - 1 do begin - FStationsDicts[I].Free; - end; - inherited Destroy; -end; - -//--------------------------------------------------- - -function TOneBRC.mORMotMMF (const afilename: string): Boolean; -begin - Result := FMemoryMap.Map (aFilename); - if Result then begin - FData := FMemoryMap.Buffer; - FDataSize := FMemoryMap.Size; - end; -end; - -procedure TOneBRC.DispatchThreads; -var - I: UInt16; - vRange: Int64; -begin - vRange := Trunc (FDataSize / FThreadCount); - - for I := 0 to FThreadCount - 1 do begin - FThreads[I] := TBRCThread.Create (@ProcessData, I, I*vRange, (I+1)*vRange); - end; -end; - -procedure TOneBRC.WaitAll; -var - I: UInt16; -begin - for I := 0 to FThreadCount - 1 do begin - FThreads[I].WaitFor; - end; -end; - -//--------------------------------------------------- - -procedure TOneBRC.ProcessData (aThreadNb: UInt16; aStartIdx: Int64; aEndIdx: Int64); -var - i: Int64; - vStation: AnsiString; - vTemp: SmallInt; - vData: PStationData; - vLineStart: Int64; - vHash: Cardinal; - vLenStationName: ShortInt; -begin - i := aStartIdx; - - // the given starting point might be in the middle of a line: - // find the beginning of that line - while i-1 >= 0 do begin - if FData[i-1] <> #10 then - Dec (I) - else - break; - end; - - // the given ending point might be in the middle of a line: - // find the beginning of that line (the last block works well) - while True do begin - if FData[aEndIdx] <> #10 then - Dec (aEndIdx) - else - break; - end; - Inc (aEndIdx, 1); - - vLineStart := i; - - while i < aEndIdx do begin - if FData[i] = #10 then begin - // new line parsed, process its contents - ExtractLineData (vLineStart, i - 1, vLenStationName, vTemp); - - // compute the hash starting at the station's first char, and its length - // mORMot's crc32c is ~33% faster than the built-in one - vHash := crc32c(0, @FData[vLineStart], vLenStationName); - - if FstationsDicts[aThreadNb].TryGetValue(vHash, vData) then begin - if vTemp < vData^.Min then - vData^.Min := vTemp; - if vTemp > vData^.Max then - vData^.Max := vTemp; - vData^.Sum := vData^.Sum + vTemp; - Inc (vData^.Count); - end - else begin - // SetString done only once per station name per thread, for later sorting - SetString(vStation, pAnsiChar(@FData[vLineStart]), vLenStationName); - - // pre-allocated array of records instead of on-the-go allocation - vData^.Min := vTemp; - vData^.Max := vTemp; - vData^.Sum := vTemp; - vData^.Count := 1; - FStationsDicts[aThreadNb].Add (vHash, vData, vStation); - end; - - // we're at a #10: next line starts at the next index - vLineStart := i+1; - - // we're at a #10: - // until the next #10 char, there will be: - // - 1 semicolon - // - 3 chars for the temp (min) - // - 2 chars for the name (min) - // - the usual Inc (I) - // so we should be able to skip 7 chars until another #10 may appear - Inc (i, 7); - continue; - end; - - Inc (i); - end; -end; - -procedure TOneBRC.Merge(aLeft: UInt16; aRight: UInt16); -var iHash: Cardinal; - vDataR: PStationData; - vDataL: PStationData; - I: Integer; -begin - for I := 0 to cDictSize - 1 do begin - iHash := FStationsDicts[aRight].Keys[I]; - // zero means empty slot: skip - if iHash = 0 then - continue; - - FStationsDicts[aRight].TryGetValue(iHash, vDataR); - - if FStationsDicts[aLeft].TryGetValue(iHash, vDataL) then begin - vDataL^.Count := vDataL^.Count + vDataR^.Count; - vDataL^.Sum := vDataL^.Sum + vDataR^.Sum; - - if vDataR^.Max > vDataL^.Max then - vDataL^.Max := vDataR^.Max; - if vDataR^.Min < vDataL^.Min then - vDataL^.Min := vDataR^.Min; - end - else begin - FStationsDicts[aLeft].Add (iHash, vDataR, ''); - end; - end; -end; - -procedure TOneBRC.MergeAll; -var - I: UInt16; -begin - for I := 1 to FThreadCount - 1 do begin - Merge (0, I); - end; -end; - -//--------------------------------------------------- - -function MyFormatInt (const aIn: SmallInt): AnsiString; inline; -begin - Result := IntToStr(aIn); - Insert ('.', Result, Length(Result)); - - if Result[1] = '.' then begin - Insert ('0', Result, 1); - exit; - end; - - if (Result[1] = '-') and (Result[2] = '.') then - Insert('0', Result, 2); -end; - -//--------------------------------------------------- - -procedure TOneBRC.GenerateOutput; -var vMean: Integer; - vStream: TStringStream; - I, N: Int64; - vData: PStationData; - vHash: Cardinal; - vStations: TStringList; -begin - vStream := TStringStream.Create; - vStations := TStringList.Create; - vStations.Capacity := cDictSize; - vStations.UseLocale := False; - try - vStations.BeginUpdate; - for I := 0 to cDictSize - 1 do begin - if FStationsDicts[0].Keys[I] <> 0 then begin - // count = 0 means empty slot: skip - vStations.Add(FStationsDicts[0].StationNames[I]); - end; - end; - vStations.EndUpdate; - - vStations.CustomSort (@Compare); - - I := 0; - N := vStations.Count; - - vStream.WriteString('{'); - while I < N do begin - // the stations are now sorted, but we need to locate the data: recompute hash - // would it be more efficient to store the hash as well? - // debatable, and the whole output generation is < 0.3 seconds, so not exactly worth it - vHash := crc32c(0, @vStations[i][1], Length (vStations[i])); - FStationsDicts[0].TryGetValue(vHash, vData); - vMean := RoundExInteger(vData^.Sum/vData^.Count/10); - - vStream.WriteString( - vStations[i] + '=' + MyFormatInt(vData^.Min) - + '/' + MyFormatInt(vMean) - + '/' + MyFormatInt(vData^.Max) + ', ' - ); - Inc(I); - end; - - vStream.SetSize(vStream.Size - 2); - vStream.WriteString('}' + #10); -{$IFDEF DEBUG} - vStream.SaveToFile('ghatem-out.txt'); -{$ENDIF} -{$IFDEF RELEASE} - Write(vStream.DataString); -{$ENDIF} - finally - vStream.Free; - vStations.Free; - end; -end; - -{ TBRCThread } - -procedure TBRCThread.Execute; -begin - FProc (FThreadNb, FStart, FEnd); - Terminate; -end; - -constructor TBRCThread.Create(aProc: TThreadProc; aThreadNb: UInt16; aStart: Int64; aEnd: Int64); -begin - inherited Create(False); - FProc := aProc; - FThreadNb := aThreadNb; - FStart := aStart; - FEnd := aEnd; -end; - -//--------------------------------------------------- - - - -{$REGION TOneBRCApp scaffolding} - -procedure TOneBRCApp.DoRun; -var - ErrorMsg: String; -begin - // quick check parameters - ErrorMsg:= CheckOptions(Format('%s%s%s%s:',[ - cShortOptHelp, - cShortOptThread, - cShortOptVersion, - cShortOptInput - ]), - [ - cLongOptHelp, - cLongOptThread+':', - cLongOptVersion, - cLongOptInput+':' - ] - ); - if ErrorMsg<>'' then begin - WriteLn(Format(rsErrorMessage, [ ErrorMsg ])); - Terminate; - Exit; - end; - - // parse parameters - if HasOption(cShortOptHelp, cLongOptHelp) then begin - WriteHelp; - Terminate; - Exit; - end; - - if HasOption(cShortOptVersion, cLongOptVersion) then begin - WriteLn(Format(rsGeneratorVersion, [ 1.0 ])); - Terminate; - Exit; - end; - - FThreadCount := GetSystemThreadCount; - if HasOption(cShortOptThread, cLongOptThread) then begin - FThreadCount := StrToInt (GetOptionValue(cShortOptThread, cLongOptThread)); - end; - - if HasOption(cShortOptInput, cLongOptInput) then begin - FFileName := GetOptionValue( - cShortOptInput, - cLongOptInput - ); - end - else begin - WriteLn(Format(rsErrorMessage, [ rsMissingInputFlag ])); - Terminate; - Exit; - end; - - FFileName := ExpandFileName(FFileName); - - RunOneBRC; - - // stop program loop - Terminate; -end; - -constructor TOneBRCApp.Create(aOwner: TComponent); -begin - inherited Create(aOwner); - StopOnException:=True; -end; - -destructor TOneBRCApp.Destroy; -begin - inherited Destroy; -end; - -procedure TOneBRCApp.WriteHelp; -begin - { add your help code here } - writeln('Usage: ', ExeName, ' -h'); -end; - -var - Application: TOneBRCApp; -begin - Application:=TOneBRCApp.Create(nil); - Application.Title:='1 BRC'; - Application.Run; - Application.Free; -end. - -{$ENDREGION} diff --git a/entries/ghatem-fpc/src/OneBRC-smallrec.lpi b/entries/ghatem-fpc/src/OneBRC-smallrec.lpi deleted file mode 100644 index 262aaef..0000000 --- a/entries/ghatem-fpc/src/OneBRC-smallrec.lpi +++ /dev/null @@ -1,166 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<CONFIG> - <ProjectOptions> - <Version Value="12"/> - <PathDelim Value="\"/> - <General> - <Flags> - <MainUnitHasCreateFormStatements Value="False"/> - <MainUnitHasTitleStatement Value="False"/> - <MainUnitHasScaledStatement Value="False"/> - <CompatibilityMode Value="True"/> - </Flags> - <SessionStorage Value="InProjectDir"/> - <Title Value="1 BRC"/> - <UseAppBundle Value="False"/> - <ResourceType Value="res"/> - </General> - <BuildModes Count="4"> - <Item1 Name="Default" Default="True"/> - <Item2 Name="Debug"> - <CompilerOptions> - <Version Value="11"/> - <PathDelim Value="\"/> - <Target> - <Filename Value="..\..\..\bin\ghatem-smallrec"/> - </Target> - <SearchPaths> - <IncludeFiles Value="$(ProjOutDir)"/> - <UnitOutputDirectory Value="..\..\..\bin\lib\$(TargetCPU)-$(TargetOS)"/> - </SearchPaths> - <Parsing> - <SyntaxOptions> - <IncludeAssertionCode Value="True"/> - </SyntaxOptions> - </Parsing> - <CodeGeneration> - <Checks> - <IOChecks Value="True"/> - <RangeChecks Value="True"/> - <OverflowChecks Value="True"/> - <StackChecks Value="True"/> - </Checks> - <VerifyObjMethodCallValidity Value="True"/> - </CodeGeneration> - <Linking> - <Debugging> - <DebugInfoType Value="dsDwarf3"/> - <TrashVariables Value="True"/> - <UseValgrind Value="True"/> - <UseExternalDbgSyms Value="True"/> - </Debugging> - </Linking> - <Other> - <CustomOptions Value="-dDEBUG"/> - </Other> - </CompilerOptions> - </Item2> - <Item3 Name="Release"> - <CompilerOptions> - <Version Value="11"/> - <PathDelim Value="\"/> - <Target> - <Filename Value="..\..\..\bin\ghatem-smallrec"/> - </Target> - <SearchPaths> - <IncludeFiles Value="$(ProjOutDir)"/> - <UnitOutputDirectory Value="..\..\..\bin\lib\$(TargetCPU)-$(TargetOS)"/> - </SearchPaths> - <CodeGeneration> - <SmartLinkUnit Value="True"/> - <Optimizations> - <OptimizationLevel Value="3"/> - </Optimizations> - </CodeGeneration> - <Linking> - <Debugging> - <GenerateDebugInfo Value="False"/> - <RunWithoutDebug Value="True"/> - </Debugging> - <LinkSmart Value="True"/> - </Linking> - <Other> - <CustomOptions Value="-dRELEASE"/> - </Other> - </CompilerOptions> - </Item3> - <Item4 Name="Valgrind"> - <CompilerOptions> - <Version Value="11"/> - <PathDelim Value="\"/> - <Target> - <Filename Value="..\..\..\bin\ghatem-smallrec"/> - </Target> - <SearchPaths> - <IncludeFiles Value="$(ProjOutDir)"/> - <UnitOutputDirectory Value="..\..\..\bin\lib\$(TargetCPU)-$(TargetOS)"/> - </SearchPaths> - <Linking> - <Debugging> - <DebugInfoType Value="dsDwarf3"/> - <TrashVariables Value="True"/> - <UseValgrind Value="True"/> - <UseExternalDbgSyms Value="True"/> - </Debugging> - </Linking> - <Other> - <CustomOptions Value="-dDEBUG"/> - </Other> - </CompilerOptions> - </Item4> - </BuildModes> - <PublishOptions> - <Version Value="2"/> - <UseFileFilters Value="True"/> - </PublishOptions> - <RunParams> - <FormatVersion Value="2"/> - </RunParams> - <RequiredPackages Count="2"> - <Item1> - <PackageName Value="mormot2"/> - </Item1> - <Item2> - <PackageName Value="LCLBase"/> - </Item2> - </RequiredPackages> - <Units Count="1"> - <Unit0> - <Filename Value="OneBRC-smallrec.lpr"/> - <IsPartOfProject Value="True"/> - </Unit0> - </Units> - </ProjectOptions> - <CompilerOptions> - <Version Value="11"/> - <PathDelim Value="\"/> - <Target> - <Filename Value="..\..\..\bin\ghatem-smallrec"/> - </Target> - <SearchPaths> - <IncludeFiles Value="$(ProjOutDir)"/> - <UnitOutputDirectory Value="..\..\..\bin\lib\$(TargetCPU)-$(TargetOS)"/> - </SearchPaths> - <Linking> - <Debugging> - <DebugInfoType Value="dsDwarf3"/> - </Debugging> - </Linking> - <Other> - <CustomOptions Value="-dDEBUG"/> - </Other> - </CompilerOptions> - <Debugging> - <Exceptions Count="3"> - <Item1> - <Name Value="EAbort"/> - </Item1> - <Item2> - <Name Value="ECodetoolError"/> - </Item2> - <Item3> - <Name Value="EFOpenError"/> - </Item3> - </Exceptions> - </Debugging> -</CONFIG> diff --git a/entries/ghatem-fpc/src/OneBRC-smallrec.lpr b/entries/ghatem-fpc/src/OneBRC-smallrec.lpr deleted file mode 100644 index e4624e4..0000000 --- a/entries/ghatem-fpc/src/OneBRC-smallrec.lpr +++ /dev/null @@ -1,675 +0,0 @@ -program OneBRC; - -{$mode objfpc}{$H+} - -uses - {$IFDEF UNIX} - cthreads, - {$ENDIF} - Classes, SysUtils, CustApp, Lclintf, UTF8Process, syncobjs, - mormot.core.os, mormot.core.base, - Baseline.Console; - -const - cDictSize: Integer = 45007; - - c0ascii: ShortInt = 48; - c9ascii: ShortInt = 57; - cNegAscii: ShortInt = 45; - -type - //--------------------------------------- - { TOneBRCApp } - - TOneBRCApp = class(TCustomApplication) - private - FFileName: string; - FThreadCount: Integer; - procedure RunOneBRC; - protected - procedure DoRun; override; - public - constructor Create(aOwner: TComponent); override; - destructor Destroy; override; - procedure WriteHelp; virtual; - end; - - //--------------------------------------- - - // record is packed to minimize its size - // use pointers to avoid passing entire records around - TStationData = packed record - Min: SmallInt; - Max: SmallInt; - Count: UInt16; - Sum: Integer; - end; - PStationData = ^TStationData; - - TKeys = array [0..45006] of Cardinal; - TStationNames = array [0..45006] of AnsiString; - TValues = array [0..45006] of PStationData; - - //--------------------------------------- - { TMyDictionary } - - TMyDictionary = class - private - FHashes: TKeys; - FValues: TValues; - FRecords: array [0..45006] of TStationData; - // store the station names outside of the record as they are filled only upon first encounter - FStationNames: TStationNames; - procedure InternalFind(const aKey: Cardinal; out aFound: Boolean; out aIndex: Integer); - public - constructor Create; - property Keys: TKeys read FHashes; - property StationNames: TStationNames read FStationNames; - property Values: TValues read FValues; - function TryGetValue (const aKey: Cardinal; out aValue: PStationData): Boolean; inline; - procedure Add (const aKey: Cardinal; const aValue: PStationData); inline; - procedure AddName (const aKey: Cardinal; const aStationName: AnsiString); inline; - end; - - //--------------------------------------- - { TOneBRC } - - TOneBRC = class - private - // mormot memory map for fast bytes read. - // I tried using CreateFileMapping and failed, more details in unit FileReader - FMemoryMap: TMemoryMap; - FData: pAnsiChar; - FDataSize: Int64; - FCS: TCriticalSection; - - FThreadCount: UInt16; - FThreads: array of TThread; - FStationsDicts: array of TMyDictionary; - - FStationsNames: TMyDictionary; - - procedure ExtractLineData(const aStart: Int64; const aEnd: Int64; out aLength: ShortInt; out aTemp: SmallInt); inline; - - public - constructor Create (const aThreadCount: UInt16); - destructor Destroy; override; - function mORMotMMF (const afilename: string): Boolean; - procedure DispatchThreads; - procedure WaitAll; - procedure ProcessData (aThreadNb: UInt16; aStartIdx: Int64; aEndIdx: Int64); - procedure Merge (aLeft: UInt16; aRight: UInt16); - procedure MergeAll; - procedure GenerateOutput; - property DataSize: Int64 read FDataSize; - end; - - //--------------------------------------- - { TBRCThread } - - TThreadProc = procedure (aThreadNb: UInt16; aStartIdx: Int64; aEndIdx: Int64) of object; - - TBRCThread = class (TThread) - private - FProc: TThreadProc; - FThreadNb: UInt16; - FStart: Int64; - FEnd: Int64; - protected - procedure Execute; override; - public - constructor Create (aProc: TThreadProc; aThreadNb: UInt16; aStart: Int64; aEnd: Int64); - end; - -{ TOneBRCApp } - -procedure TOneBRCApp.RunOneBRC; -var - vOneBRC: TOneBRC; -begin - vOneBRC := TOneBRC.Create (FThreadCount); - try - try - vOneBRC.mORMotMMF(FFileName); - vOneBRC.DispatchThreads; - vOneBRC.WaitAll; - vOneBRC.MergeAll; - vOneBRC.GenerateOutput; - except - on E: Exception do - begin - WriteLn(Format(rsErrorMessage, [ E.Message ])); - ReadLn; - end; - end; - finally - vOneBRC.Free; - end; -end; - -//--------------------------------------------------- - -function Ceiling (const ANumber: Double): Integer; inline; -begin - Result := Trunc(ANumber) + Ord(Frac(ANumber) > 0); -end; - -function RoundExInteger (const aTemp: Double): Integer; inline; -var - vTmp: Double; -begin - vTmp := aTemp * 10; - Result := Ceiling (vTmp); -end; - -//--------------------------------------------------- -{ TOneBRC } - -function Compare(AList: TStringList; AIndex1, AIndex2: Integer): Integer; -var - Str1, Str2: String; -begin - Str1 := AList.Strings[AIndex1]; - Str2 := AList.Strings[AIndex2]; - Result := CompareStr(Str1, Str2); -end; - -{ TMyDictionary } - -procedure TMyDictionary.InternalFind(const aKey: Cardinal; out aFound: Boolean; out aIndex: Integer); -var vIdx: Integer; - vOffset: Integer; -begin - vIdx := aKey * cDictSize shr 32; - - if FHashes[vIdx] = aKey then begin - aIndex := vIdx; - aFound := True; - end - else begin - vOffset := 1; - - while True do begin - // quadratic probing, by incrementing vOffset - Inc (vIdx, vOffset); - Inc (vOffset); - - // exceeded boundary, loop back - if vIdx >= cDictSize then - Dec (vIdx, cDictSize); - - if FHashes[vIdx] = aKey then begin - // found match - aIndex := vIdx; - aFound := True; - break; - end; - if FHashes[vIdx] = 0 then begin - // found empty bucket to use - aIndex := vIdx; - aFound := False; - break; - end; - end; - end; -end; - -constructor TMyDictionary.Create; -var - I: Integer; -begin - for I := 0 to cDictSize - 1 do begin - FValues[I] := @FRecords[I]; - end; -end; - -function TMyDictionary.TryGetValue(const aKey: Cardinal; out aValue: PStationData): Boolean; -var - vIdx: Integer; -begin - InternalFind (aKey, Result, vIdx); - aValue := FValues[vIdx]; -end; - -procedure TMyDictionary.Add(const aKey: Cardinal; const aValue: PStationData); -var - vIdx: Integer; - vFound: Boolean; -begin - InternalFind (aKey, vFound, vIdx); - if not vFound then begin - FHashes[vIdx] := aKey; - FValues[vIdx] := aValue; - end - else - raise Exception.Create ('TMyDict: cannot add, duplicate key'); -end; - -procedure TMyDictionary.AddName (const aKey: Cardinal; const aStationName: AnsiString); -var - vIdx: Integer; - vFound: Boolean; -begin - InternalFind (aKey, vFound, vIdx); - if not vFound then begin - FHashes[vIdx] := aKey; - FStationNames[vIdx] := aStationName; - end - else - raise Exception.Create ('TMyDict: cannot add, duplicate key'); -end; - -procedure TOneBRC.ExtractLineData(const aStart: Int64; const aEnd: Int64; out aLength: ShortInt; out aTemp: SmallInt); -// given a line of data, extract the length of station name, and temperature as Integer. -var - I: Int64; - vDigit: UInt8; -begin - // we're looking for the semicolon ';', but backwards since there's fewer characters to traverse - // a thermo measurement must be at least 3 characters long (one decimal, one period, and the units) - // e.g. input: Rock Hill;-54.3 - // can safely skip 3: ^^^ - I := aEnd - 3; - - while True do begin - if FData[I] = ';' then - break; - Dec(I); - end; - // I is the position of the semi-colon, extract what's before and after it - - // length of the station name string - aLength := i - aStart; - - // ASCII of 3 is 51. - // subtract ASCII of 0 to get the digit 3 - // repeat with the remaining digits, multiplying by 10^x (skip the '.') - // multiply by -1 upon reaching a '-' - aTemp := (Ord(FData[aEnd]) - c0ascii) - + 10 *(Ord(FData[aEnd-2]) - c0ascii); - vDigit := Ord(FData[aEnd-3]); - if (vDigit >= c0ascii) and (vDigit <= c9ascii) then begin - aTemp := aTemp + 100*(Ord(FData[aEnd-3]) - c0ascii); - vDigit := Ord(FData[aEnd-4]); - if vDigit = cNegAscii then - aTemp := -aTemp; - end - else if vDigit = cNegAscii then - aTemp := -aTemp; -end; - -//--------------------------------------------------- - -constructor TOneBRC.Create (const aThreadCount: UInt16); -var I: UInt16; -begin - FThreadCount := aThreadCount; - SetLength (FStationsDicts, aThreadCount); - SetLength (FThreads, aThreadCount); - - for I := 0 to aThreadCount - 1 do begin - FStationsDicts[I] := TMyDictionary.Create; - end; - - FStationsNames := TMyDictionary.Create; - FCS := TCriticalSection.Create; -end; - -destructor TOneBRC.Destroy; -var I: UInt16; -begin - FCS.Free; - FStationsNames.Free; - for I := 0 to FThreadCount - 1 do begin - FStationsDicts[I].Free; - end; - inherited Destroy; -end; - -//--------------------------------------------------- - -function TOneBRC.mORMotMMF (const afilename: string): Boolean; -begin - Result := FMemoryMap.Map (aFilename); - if Result then begin - FData := FMemoryMap.Buffer; - FDataSize := FMemoryMap.Size; - end; -end; - -procedure TOneBRC.DispatchThreads; -var - I: UInt16; - vRange: Int64; -begin - vRange := Trunc (FDataSize / FThreadCount); - - for I := 0 to FThreadCount - 1 do begin - FThreads[I] := TBRCThread.Create (@ProcessData, I, I*vRange, (I+1)*vRange); - end; -end; - -procedure TOneBRC.WaitAll; -var - I: UInt16; -begin - for I := 0 to FThreadCount - 1 do begin - FThreads[I].WaitFor; - end; -end; - -//--------------------------------------------------- - -procedure TOneBRC.ProcessData (aThreadNb: UInt16; aStartIdx: Int64; aEndIdx: Int64); -var - i: Int64; - vStation: AnsiString; - vTemp: SmallInt; - vData: PStationData; - vLineStart: Int64; - vHash: Cardinal; - vLenStationName: ShortInt; -begin - i := aStartIdx; - - // the given starting point might be in the middle of a line: - // find the beginning of that line - while i-1 >= 0 do begin - if FData[i-1] <> #10 then - Dec (I) - else - break; - end; - - // the given ending point might be in the middle of a line: - // find the beginning of that line (the last block works well) - while True do begin - if FData[aEndIdx] <> #10 then - Dec (aEndIdx) - else - break; - end; - Inc (aEndIdx, 1); - - vLineStart := i; - - while i < aEndIdx do begin - if FData[i] = #10 then begin - // new line parsed, process its contents - ExtractLineData (vLineStart, i - 1, vLenStationName, vTemp); - - // compute the hash starting at the station's first char, and its length - // mORMot's crc32c is ~33% faster than the built-in one - vHash := crc32c(0, @FData[vLineStart], vLenStationName); - - if FstationsDicts[aThreadNb].TryGetValue(vHash, vData) then begin - if vTemp < vData^.Min then - vData^.Min := vTemp; - if vTemp > vData^.Max then - vData^.Max := vTemp; - vData^.Sum := vData^.Sum + vTemp; - Inc (vData^.Count); - end - else begin - // pre-allocated array of records instead of on-the-go allocation - vData^.Min := vTemp; - vData^.Max := vTemp; - vData^.Sum := vTemp; - vData^.Count := 1; - FStationsDicts[aThreadNb].Add (vHash, vData); - - // SetString done only once per station name, for later sorting - if not FStationsNames.TryGetValue (vHash, vData) then begin - FCS.Acquire; - if not FStationsNames.TryGetValue (vHash, vData) then begin - SetString(vStation, pAnsiChar(@FData[vLineStart]), vLenStationName); - FStationsNames.AddName (vHash, vStation); - end; - FCS.Release; - end; - end; - - // we're at a #10: next line starts at the next index - vLineStart := i+1; - - // we're at a #10: - // until the next #10 char, there will be: - // - 1 semicolon - // - 3 chars for the temp (min) - // - 2 chars for the name (min) - // - the usual Inc (I) - // so we should be able to skip 7 chars until another #10 may appear - Inc (i, 7); - continue; - end; - - Inc (i); - end; -end; - -procedure TOneBRC.Merge(aLeft: UInt16; aRight: UInt16); -var iHash: Cardinal; - vDataR: PStationData; - vDataL: PStationData; - I: Integer; -begin - for I := 0 to cDictSize - 1 do begin - iHash := FStationsDicts[aRight].Keys[I]; - // zero means empty slot: skip - if iHash = 0 then - continue; - - FStationsDicts[aRight].TryGetValue(iHash, vDataR); - - if FStationsDicts[aLeft].TryGetValue(iHash, vDataL) then begin - vDataL^.Count := vDataL^.Count + vDataR^.Count; - vDataL^.Sum := vDataL^.Sum + vDataR^.Sum; - - if vDataR^.Max > vDataL^.Max then - vDataL^.Max := vDataR^.Max; - if vDataR^.Min < vDataL^.Min then - vDataL^.Min := vDataR^.Min; - end - else begin - FStationsDicts[aLeft].Add (iHash, vDataR); - end; - end; -end; - -procedure TOneBRC.MergeAll; -var - I: UInt16; -begin - for I := 1 to FThreadCount - 1 do begin - Merge (0, I); - end; -end; - -//--------------------------------------------------- - -function MyFormatInt (const aIn: SmallInt): AnsiString; inline; -begin - Result := IntToStr(aIn); - Insert ('.', Result, Length(Result)); - - if Result[1] = '.' then begin - Insert ('0', Result, 1); - exit; - end; - - if (Result[1] = '-') and (Result[2] = '.') then - Insert('0', Result, 2); -end; - -//--------------------------------------------------- - -procedure TOneBRC.GenerateOutput; -var vMean: Integer; - vStream: TStringStream; - I, N: Int64; - vData: PStationData; - vHash: Cardinal; - vStations: TStringList; -begin - vStream := TStringStream.Create; - vStations := TStringList.Create; - vStations.Capacity := cDictSize; - vStations.UseLocale := False; - try - vStations.BeginUpdate; - for I := 0 to cDictSize - 1 do begin - if FStationsNames.Keys[I] <> 0 then begin - // count = 0 means empty slot: skip - vStations.Add(FStationsNames.StationNames[I]); - end; - end; - vStations.EndUpdate; - - vStations.CustomSort (@Compare); - - I := 0; - N := vStations.Count; - - vStream.WriteString('{'); - while I < N do begin - // the stations are now sorted, but we need to locate the data: recompute hash - // would it be more efficient to store the hash as well? - // debatable, and the whole output generation is < 0.3 seconds, so not exactly worth it - vHash := crc32c(0, @vStations[i][1], Length (vStations[i])); - FStationsDicts[0].TryGetValue(vHash, vData); - vMean := RoundExInteger(vData^.Sum/vData^.Count/10); - - vStream.WriteString( - vStations[i] + '=' + MyFormatInt(vData^.Min) - + '/' + MyFormatInt(vMean) - + '/' + MyFormatInt(vData^.Max) + ', ' - ); - Inc(I); - end; - - vStream.SetSize(vStream.Size - 2); - vStream.WriteString('}' + #10); -{$IFDEF DEBUG} - vStream.SaveToFile('ghatem-out.txt'); -{$ENDIF} -{$IFDEF RELEASE} - Write(vStream.DataString); -{$ENDIF} - finally - vStream.Free; - vStations.Free; - end; -end; - -{ TBRCThread } - -procedure TBRCThread.Execute; -begin - FProc (FThreadNb, FStart, FEnd); - Terminate; -end; - -constructor TBRCThread.Create(aProc: TThreadProc; aThreadNb: UInt16; aStart: Int64; aEnd: Int64); -begin - inherited Create(False); - FProc := aProc; - FThreadNb := aThreadNb; - FStart := aStart; - FEnd := aEnd; -end; - -//--------------------------------------------------- - - - -{$REGION TOneBRCApp scaffolding} - -procedure TOneBRCApp.DoRun; -var - ErrorMsg: String; -begin - // quick check parameters - ErrorMsg:= CheckOptions(Format('%s%s%s%s:',[ - cShortOptHelp, - cShortOptThread, - cShortOptVersion, - cShortOptInput - ]), - [ - cLongOptHelp, - cLongOptThread+':', - cLongOptVersion, - cLongOptInput+':' - ] - ); - if ErrorMsg<>'' then begin - WriteLn(Format(rsErrorMessage, [ ErrorMsg ])); - Terminate; - Exit; - end; - - // parse parameters - if HasOption(cShortOptHelp, cLongOptHelp) then begin - WriteHelp; - Terminate; - Exit; - end; - - if HasOption(cShortOptVersion, cLongOptVersion) then begin - WriteLn(Format(rsGeneratorVersion, [ 1.0 ])); - Terminate; - Exit; - end; - - FThreadCount := GetSystemThreadCount; - if HasOption(cShortOptThread, cLongOptThread) then begin - FThreadCount := StrToInt (GetOptionValue(cShortOptThread, cLongOptThread)); - end; - - if HasOption(cShortOptInput, cLongOptInput) then begin - FFileName := GetOptionValue( - cShortOptInput, - cLongOptInput - ); - end - else begin - WriteLn(Format(rsErrorMessage, [ rsMissingInputFlag ])); - Terminate; - Exit; - end; - - FFileName := ExpandFileName(FFileName); - - RunOneBRC; - - // stop program loop - Terminate; -end; - -constructor TOneBRCApp.Create(aOwner: TComponent); -begin - inherited Create(aOwner); - StopOnException:=True; -end; - -destructor TOneBRCApp.Destroy; -begin - inherited Destroy; -end; - -procedure TOneBRCApp.WriteHelp; -begin - { add your help code here } - writeln('Usage: ', ExeName, ' -h'); -end; - -var - Application: TOneBRCApp; -begin - Application:=TOneBRCApp.Create(nil); - Application.Title:='1 BRC'; - Application.Run; - Application.Free; -end. - -{$ENDREGION} diff --git a/entries/ghatem-fpc/src/OneBRC.lpr b/entries/ghatem-fpc/src/OneBRC.lpr index fc4cdf0..698f593 100644 --- a/entries/ghatem-fpc/src/OneBRC.lpr +++ b/entries/ghatem-fpc/src/OneBRC.lpr @@ -11,11 +11,11 @@ Baseline.Console; const - cDictSize: Integer = 45007; + cNumStations = 41343; // as per the input file + cDictSize = 248071; // numstations * 6, next prime number + cThreadCount = 32; c0ascii: ShortInt = 48; - c9ascii: ShortInt = 57; - cNegAscii: ShortInt = 45; type //--------------------------------------- @@ -41,34 +41,61 @@ TOneBRCApp = class(TCustomApplication) TStationData = packed record Min: SmallInt; Max: SmallInt; - Count: UInt32; + Count: UInt16; // will fail on 400 stations / 5B row tests, due to overflow Sum: Integer; end; PStationData = ^TStationData; - TKeys = array [0..45006] of Cardinal; - TStationNames = array [0..45006] of AnsiString; - TValues = array [0..45006] of PStationData; + TThreadCount = UInt8; // max number of threads = 256 + TStationCount = UInt16; // max number of stations = 65536 (enough for 42k) + THashSize = UInt32; // size of dictionary can exceed 2^16 - //--------------------------------------- - { TMyDictionary } + THashes = array [0..cDictSize-1] of Cardinal; + TIndexes = array [0..cDictSize-1] of TStationCount; + TStationNames = array [0..cNumStations-1] of AnsiString; + + + { TBRCDictionary } - TMyDictionary = class + TBRCDictionary = class private - FHashes: TKeys; - FValues: TValues; - FRecords: array [0..45006] of TStationData; - // store the station names outside of the record as they are filled only upon first encounter + // keys/values for a Large dictionary: + // values will be the index where the actual record is stored + FHashes: THashes; + FIndexes: TIndexes; + + // where the actual records are stored: + // - each thread holds its own data + // - for each thread, pre-allocate as much space as needed + // - all threads store their data at the exact same index + FThreadData: array [0..cThreadCount-1] of array [0..cNumStations-1] of TStationData; + + // station names are also shared, not lock-protected (in the worst case, the value is written twice) + // stored separately as it is rarely needed FStationNames: TStationNames; - procedure InternalFind(const aKey: Cardinal; out aFound: Boolean; out aIndex: Integer); + + // points to the next slot in FThreadData where we should fill a newly encountered station + FCounter: TStationCount; + + // exclusively to protect FCounter from concurrent-writes + FCS: TCriticalSection; + + // searches for a given key, returns if found the key and the storage index + // (or, if not found, which index to use next) + procedure InternalFind(const aKey: Cardinal; out aFound: Boolean; out aIndex: THashSize); + public constructor Create; - property Keys: TKeys read FHashes; - property StationNames: TStationNames read FStationNames; - property Values: TValues read FValues; - function TryGetValue (const aKey: Cardinal; out aValue: PStationData): Boolean; inline; - procedure Add (const aKey: Cardinal; const aValue: PStationData); inline; - procedure AddName (const aKey: Cardinal; const aStationName: AnsiString); inline; + destructor Destroy; override; + + // simple wrapper to find station-record pointers + function TryGetValue (const aKey: Cardinal; const aThreadNb: TThreadCount; out aValue: PStationData): Boolean; inline; + + // multithread-unprotected: adds a firstly-encountered station-data (temp, name) + procedure Add (const aHashIdx: THashSize; const aThreadNb: TThreadCount; const aTemp: SmallInt; const aStationName: AnsiString); inline; + + // multithread-protected: safely assign a slot for a given key + function AtomicRegisterHash (const aKey: Cardinal): THashSize; end; //--------------------------------------- @@ -81,25 +108,32 @@ TOneBRC = class FMemoryMap: TMemoryMap; FData: pAnsiChar; FDataSize: Int64; - FCS: TCriticalSection; - FThreadCount: UInt16; + FThreadCount: TThreadCount; FThreads: array of TThread; - FStationsDicts: array of TMyDictionary; - - FStationsNames: TMyDictionary; + FDictionary: TBRCDictionary; + // for a line between idx [aStart; aEnd], returns the station-name length, and the integer-value of temperature procedure ExtractLineData(const aStart: Int64; const aEnd: Int64; out aLength: ShortInt; out aTemp: SmallInt); inline; public - constructor Create (const aThreadCount: UInt16); + constructor Create (const aThreadCount: TThreadCount); destructor Destroy; override; function mORMotMMF (const afilename: string): Boolean; + + // initial thread-spawn procedure DispatchThreads; + + // await for all threads to complete work procedure WaitAll; - procedure ProcessData (aThreadNb: UInt16; aStartIdx: Int64; aEndIdx: Int64); - procedure Merge (aLeft: UInt16; aRight: UInt16); + + // executed by each thread to process data in the given range + procedure ProcessData (aThreadNb: TThreadCount; aStartIdx: Int64; aEndIdx: Int64); + + // merge data from all threads + procedure Merge (aLeft: TThreadCount; aRight: TThreadCount); procedure MergeAll; + procedure GenerateOutput; property DataSize: Int64 read FDataSize; end; @@ -107,90 +141,101 @@ TOneBRC = class //--------------------------------------- { TBRCThread } - TThreadProc = procedure (aThreadNb: UInt16; aStartIdx: Int64; aEndIdx: Int64) of object; + TThreadProc = procedure (aThreadNb: TThreadCount; aStartIdx: Int64; aEndIdx: Int64) of object; TBRCThread = class (TThread) private FProc: TThreadProc; - FThreadNb: UInt16; + FThreadNb: TThreadCount; FStart: Int64; FEnd: Int64; protected procedure Execute; override; public - constructor Create (aProc: TThreadProc; aThreadNb: UInt16; aStart: Int64; aEnd: Int64); + constructor Create (aProc: TThreadProc; aThreadNb: TThreadCount; aStart: Int64; aEnd: Int64); end; -{ TOneBRCApp } +{ TBRCDictionary } -procedure TOneBRCApp.RunOneBRC; -var - vOneBRC: TOneBRC; +constructor TBRCDictionary.Create; begin - vOneBRC := TOneBRC.Create (FThreadCount); - try - try - vOneBRC.mORMotMMF(FFileName); - vOneBRC.DispatchThreads; - vOneBRC.WaitAll; - vOneBRC.MergeAll; - vOneBRC.GenerateOutput; - except - on E: Exception do - begin - WriteLn(Format(rsErrorMessage, [ E.Message ])); - ReadLn; - end; - end; - finally - vOneBRC.Free; - end; + FCS := TCriticalSection.Create; + FCounter := 0; end; -//--------------------------------------------------- - -function Ceiling (const ANumber: Double): Integer; inline; +destructor TBRCDictionary.Destroy; begin - Result := Trunc(ANumber) + Ord(Frac(ANumber) > 0); + FCS.Free; + inherited Destroy; end; -function RoundExInteger (const aTemp: Double): Integer; inline; +function TBRCDictionary.TryGetValue(const aKey: Cardinal; const aThreadNb: TThreadCount; out aValue: PStationData): Boolean; var - vTmp: Double; + vIdx: THashSize; begin - vTmp := aTemp * 10; - Result := Ceiling (vTmp); + InternalFind (aKey, Result, vIdx); + aValue := @FThreadData[aThreadNb][FIndexes[vIdx]]; end; -//--------------------------------------------------- -{ TOneBRC } - -function Compare(AList: TStringList; AIndex1, AIndex2: Integer): Integer; +procedure TBRCDictionary.Add(const aHashIdx: THashSize; const aThreadNb: TThreadCount; const aTemp: SmallInt; const aStationName: AnsiString); var - Str1, Str2: String; + vData: PStationData; begin - Str1 := AList.Strings[AIndex1]; - Str2 := AList.Strings[AIndex2]; - Result := CompareStr(Str1, Str2); + vData := @FThreadData[aThreadNb][FIndexes[aHashIdx]]; + vData^.Count := 1; + vData^.Max := aTemp; + vData^.Min := aTemp; + vData^.Sum := aTemp; + + FStationNames[FIndexes[aHashIdx]] := aStationName; end; -{ TMyDictionary } +function TBRCDictionary.AtomicRegisterHash(const aKey: Cardinal): THashSize; +var + vFound: Boolean; +begin + // must call InternalFind again, within the critical-section, + // to ensure the slot was not taken by another thread + // this function should execute only once per station, so at most 41343 times + FCS.Acquire; + try + InternalFind (aKey, vFound, Result); + if not vFound then begin + FHashes[Result] := aKey; + FIndexes[Result] := FCounter; + Inc (FCounter); + end; + finally + FCS.Release; + end; +end; -procedure TMyDictionary.InternalFind(const aKey: Cardinal; out aFound: Boolean; out aIndex: Integer); +procedure TBRCDictionary.InternalFind(const aKey: Cardinal; out aFound: Boolean; out aIndex: THashSize); var vIdx: Integer; vOffset: Integer; begin + // Lemire hashing: faster to ocmpute than modulus, but more collisions from trials + // https://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/ + // thanks Arnaud for the suggestion vIdx := aKey * cDictSize shr 32; + if FHashes[vIdx] = 0 then begin + // found match + aIndex := vIdx; + aFound := False; + exit; + end; if FHashes[vIdx] = aKey then begin + // found empty bucket to use aIndex := vIdx; aFound := True; + exit; end else begin vOffset := 1; while True do begin - // quadratic probing, by incrementing vOffset + // linear (not quadratic) probing, with continous increments to minimize clusters Inc (vIdx, vOffset); Inc (vOffset); @@ -202,89 +247,95 @@ procedure TMyDictionary.InternalFind(const aKey: Cardinal; out aFound: Boolean; // found match aIndex := vIdx; aFound := True; - break; + exit; end; if FHashes[vIdx] = 0 then begin // found empty bucket to use aIndex := vIdx; aFound := False; - break; + exit; end; end; end; end; -constructor TMyDictionary.Create; + +{ TOneBRCApp } + +procedure TOneBRCApp.RunOneBRC; var - I: Integer; + vOneBRC: TOneBRC; begin - for I := 0 to cDictSize - 1 do begin - FValues[I] := @FRecords[I]; + vOneBRC := TOneBRC.Create (FThreadCount); + try + try + vOneBRC.mORMotMMF(FFileName); + vOneBRC.DispatchThreads; + vOneBRC.WaitAll; + vOneBRC.MergeAll; + vOneBRC.GenerateOutput; + except + on E: Exception do + begin + WriteLn(Format(rsErrorMessage, [ E.Message ])); + ReadLn; + end; + end; + finally + vOneBRC.Free; end; end; -function TMyDictionary.TryGetValue(const aKey: Cardinal; out aValue: PStationData): Boolean; -var - vIdx: Integer; +//--------------------------------------------------- + +function Ceiling (const ANumber: Double): Integer; inline; begin - InternalFind (aKey, Result, vIdx); - aValue := FValues[vIdx]; + Result := Trunc(ANumber) + Ord(Frac(ANumber) > 0); end; -procedure TMyDictionary.Add(const aKey: Cardinal; const aValue: PStationData); +function RoundExInteger (const aTemp: Double): Integer; inline; var - vIdx: Integer; - vFound: Boolean; + vTmp: Double; begin - InternalFind (aKey, vFound, vIdx); - if not vFound then begin - FHashes[vIdx] := aKey; - FValues[vIdx] := aValue; - end - else - raise Exception.Create ('TMyDict: cannot add, duplicate key'); + vTmp := aTemp * 10; + Result := Ceiling (vTmp); end; -procedure TMyDictionary.AddName (const aKey: Cardinal; const aStationName: AnsiString); -var - vIdx: Integer; - vFound: Boolean; +//--------------------------------------------------- +{ TOneBRC } + +function Compare(AList: TStringList; AIndex1, AIndex2: Integer): Integer; begin - InternalFind (aKey, vFound, vIdx); - if not vFound then begin - FHashes[vIdx] := aKey; - FStationNames[vIdx] := aStationName; - end - else - raise Exception.Create ('TMyDict: cannot add, duplicate key'); + Result := CompareStr(AList.Strings[AIndex1], AList.Strings[AIndex2]); end; procedure TOneBRC.ExtractLineData(const aStart: Int64; const aEnd: Int64; out aLength: ShortInt; out aTemp: SmallInt); // given a line of data, extract the length of station name, and temperature as Integer. var - I: Int64; - vDigit: UInt8; + J: Int64; + vIsNeg: Int8; begin // we're looking for the semicolon ';', but backwards since there's fewer characters to traverse // a thermo measurement must be at least 3 characters long (one decimal, one period, and the units) // e.g. input: Rock Hill;-54.3 // can safely skip 3: ^^^ - I := aEnd - 3; + J := aEnd - 3; - while True do begin - if FData[I] = ';' then - break; - Dec(I); + if FData[J] <> ';' then begin + Dec (J); + Dec (J, Ord (FData[J] <> ';')); end; // I is the position of the semi-colon, extract what's before and after it // length of the station name string - aLength := i - aStart; + aLength := J - aStart; // ASCII of 3 is 51. // subtract ASCII of 0 to get the digit 3 // repeat with the remaining digits, multiplying by 10^x (skip the '.') // multiply by -1 upon reaching a '-' + + { aTemp := (Ord(FData[aEnd]) - c0ascii) + 10 *(Ord(FData[aEnd-2]) - c0ascii); vDigit := Ord(FData[aEnd-3]); @@ -296,33 +347,38 @@ procedure TOneBRC.ExtractLineData(const aStart: Int64; const aEnd: Int64; out aL end else if vDigit = cNegAscii then aTemp := -aTemp; + } + + //========== + // entire computation is branchless (for readability, see version above) + // no intermediary results also showed better performance + + // 0 if - + // 1 if + + // convert range [0;1] to [-1;1] for branchless negation when needed + // if there is a 3rd digit (*100), add it, otherwise multiply by 0 to cancel it out + vIsNeg := Ord (FData[J+1] <> '-'); + + aTemp := ( + (Ord(FData[aEnd]) - c0ascii) + + 10 *(Ord(FData[aEnd-2]) - c0ascii) + + Ord ((J+4 - vIsNeg < aEnd)) * 100*(Ord(FData[aEnd-3]) - c0ascii) + ) * (vIsNeg * 2 - 1); end; //--------------------------------------------------- -constructor TOneBRC.Create (const aThreadCount: UInt16); -var I: UInt16; +constructor TOneBRC.Create (const aThreadCount: TThreadCount); begin FThreadCount := aThreadCount; - SetLength (FStationsDicts, aThreadCount); SetLength (FThreads, aThreadCount); - for I := 0 to aThreadCount - 1 do begin - FStationsDicts[I] := TMyDictionary.Create; - end; - - FStationsNames := TMyDictionary.Create; - FCS := TCriticalSection.Create; + FDictionary := TBRCDictionary.Create; end; destructor TOneBRC.Destroy; -var I: UInt16; begin - FCS.Free; - FStationsNames.Free; - for I := 0 to FThreadCount - 1 do begin - FStationsDicts[I].Free; - end; + FDictionary.Free; inherited Destroy; end; @@ -339,9 +395,10 @@ function TOneBRC.mORMotMMF (const afilename: string): Boolean; procedure TOneBRC.DispatchThreads; var - I: UInt16; + I: TThreadCount; vRange: Int64; begin + // distribute input equally across available threads vRange := Trunc (FDataSize / FThreadCount); for I := 0 to FThreadCount - 1 do begin @@ -351,7 +408,7 @@ procedure TOneBRC.DispatchThreads; procedure TOneBRC.WaitAll; var - I: UInt16; + I: TThreadCount; begin for I := 0 to FThreadCount - 1 do begin FThreads[I].WaitFor; @@ -360,16 +417,24 @@ procedure TOneBRC.WaitAll; //--------------------------------------------------- -procedure TOneBRC.ProcessData (aThreadNb: UInt16; aStartIdx: Int64; aEndIdx: Int64); +procedure TOneBRC.ProcessData (aThreadNb: TThreadCount; aStartIdx: Int64; aEndIdx: Int64); var i: Int64; vStation: AnsiString; vTemp: SmallInt; vData: PStationData; + vHashIdx: THashSize; vLineStart: Int64; vHash: Cardinal; vLenStationName: ShortInt; + vFound: Boolean; begin + // initialize min/max, else we may get zeroes (due to our Add that fires once per station across all threads) + for I := 0 to cNumStations - 1 do begin + FDictionary.FThreadData[aThreadNb][I].Max := -2000; + FDictionary.FThreadData[aThreadNb][I].Min := 2000; + end; + i := aStartIdx; // the given starting point might be in the middle of a line: @@ -394,92 +459,78 @@ procedure TOneBRC.ProcessData (aThreadNb: UInt16; aStartIdx: Int64; aEndIdx: Int vLineStart := i; while i < aEndIdx do begin - if FData[i] = #10 then begin - // new line parsed, process its contents - ExtractLineData (vLineStart, i - 1, vLenStationName, vTemp); - - // compute the hash starting at the station's first char, and its length - // mORMot's crc32c is ~33% faster than the built-in one - vHash := crc32c(0, @FData[vLineStart], vLenStationName); - - if FstationsDicts[aThreadNb].TryGetValue(vHash, vData) then begin - if vTemp < vData^.Min then - vData^.Min := vTemp; - if vTemp > vData^.Max then - vData^.Max := vTemp; - vData^.Sum := vData^.Sum + vTemp; - Inc (vData^.Count); - end - else begin - // pre-allocated array of records instead of on-the-go allocation + while FData[i] <> #10 do begin + Inc (I); + end; + + // new line parsed, process its contents + ExtractLineData (vLineStart, i - 1, vLenStationName, vTemp); + + // compute the hash starting at the station's first char, and its length + // mORMot's crc32c is ~33% faster than the built-in one + vHash := crc32c(0, @FData[vLineStart], vLenStationName); + + FDictionary.InternalFind (vHash, vFound, vHashIdx); + + if vFound then begin + vData := @FDictionary.FThreadData[aThreadNb][FDictionary.FIndexes[vHashIdx]]; + if vTemp < vData^.Min then vData^.Min := vTemp; + if vTemp > vData^.Max then vData^.Max := vTemp; - vData^.Sum := vTemp; - vData^.Count := 1; - FStationsDicts[aThreadNb].Add (vHash, vData); - - // SetString done only once per station name, for later sorting - if not FStationsNames.TryGetValue (vHash, vData) then begin - FCS.Acquire; - if not FStationsNames.TryGetValue (vHash, vData) then begin - SetString(vStation, pAnsiChar(@FData[vLineStart]), vLenStationName); - FStationsNames.AddName (vHash, vStation); - end; - FCS.Release; - end; - end; + Inc (vData^.Sum, vTemp); + Inc (vData^.Count); + end + else begin + // pre-allocated array of records instead of on-the-go allocation + vHashIdx := FDictionary.AtomicRegisterHash (vHash); - // we're at a #10: next line starts at the next index - vLineStart := i+1; - - // we're at a #10: - // until the next #10 char, there will be: - // - 1 semicolon - // - 3 chars for the temp (min) - // - 2 chars for the name (min) - // - the usual Inc (I) - // so we should be able to skip 7 chars until another #10 may appear - Inc (i, 7); - continue; + // SetString done only once per station name, for later sorting + SetString(vStation, pAnsiChar(@FData[vLineStart]), vLenStationName); + + // data can be safely added at the given index, without locking + FDictionary.Add(vHashIdx, aThreadNb, vTemp, vStation); end; - Inc (i); + // we're at a #10: next line starts at the next index + vLineStart := i+1; + + // we're at a #10: + // until the next #10 char, there will be: + // - 1 semicolon + // - 3 chars for the temp (min) + // - 2 chars for the name (min) + // - the usual Inc (I) + // so we should be able to skip 7 chars until another #10 may appear + Inc (i, 7); end; end; -procedure TOneBRC.Merge(aLeft: UInt16; aRight: UInt16); -var iHash: Cardinal; - vDataR: PStationData; +procedure TOneBRC.Merge(aLeft: TThreadCount; aRight: TThreadCount); +var vDataR: PStationData; vDataL: PStationData; I: Integer; begin - for I := 0 to cDictSize - 1 do begin - iHash := FStationsDicts[aRight].Keys[I]; - // zero means empty slot: skip - if iHash = 0 then - continue; - - FStationsDicts[aRight].TryGetValue(iHash, vDataR); - - if FStationsDicts[aLeft].TryGetValue(iHash, vDataL) then begin - vDataL^.Count := vDataL^.Count + vDataR^.Count; - vDataL^.Sum := vDataL^.Sum + vDataR^.Sum; - - if vDataR^.Max > vDataL^.Max then - vDataL^.Max := vDataR^.Max; - if vDataR^.Min < vDataL^.Min then - vDataL^.Min := vDataR^.Min; - end - else begin - FStationsDicts[aLeft].Add (iHash, vDataR); - end; + // accumulate data into Left + for I := 0 to cNumStations - 1 do begin + vDataR := @FDictionary.FThreadData[aRight][I]; + vDataL := @FDictionary.FThreadData[aLeft][I]; + + vDataL^.Count := vDataL^.Count + vDataR^.Count; + vDataL^.Sum := vDataL^.Sum + vDataR^.Sum; + + if (vDataR^.Max > vDataL^.Max) then + vDataL^.Max := vDataR^.Max; + if (vDataR^.Min < vDataL^.Min) then + vDataL^.Min := vDataR^.Min; end; end; procedure TOneBRC.MergeAll; var - I: UInt16; + I: TThreadCount; begin + // all thread-data is accumulated into index 0 for I := 1 to FThreadCount - 1 do begin Merge (0, I); end; @@ -489,6 +540,8 @@ procedure TOneBRC.MergeAll; function MyFormatInt (const aIn: SmallInt): AnsiString; inline; begin + // much faster than FormatFloat + // oddly, IntToStr does not include leading zeroes for both pos and neg numbers Result := IntToStr(aIn); Insert ('.', Result, Length(Result)); @@ -510,6 +563,7 @@ procedure TOneBRC.GenerateOutput; vData: PStationData; vHash: Cardinal; vStations: TStringList; + iStationName: AnsiString; begin vStream := TStringStream.Create; vStations := TStringList.Create; @@ -517,14 +571,11 @@ procedure TOneBRC.GenerateOutput; vStations.UseLocale := False; try vStations.BeginUpdate; - for I := 0 to cDictSize - 1 do begin - if FStationsNames.Keys[I] <> 0 then begin - // count = 0 means empty slot: skip - vStations.Add(FStationsNames.StationNames[I]); - end; + for iStationName in FDictionary.FStationNames do begin + if iStationName <> '' then + vStations.Add(iStationName); end; vStations.EndUpdate; - vStations.CustomSort (@Compare); I := 0; @@ -536,7 +587,7 @@ procedure TOneBRC.GenerateOutput; // would it be more efficient to store the hash as well? // debatable, and the whole output generation is < 0.3 seconds, so not exactly worth it vHash := crc32c(0, @vStations[i][1], Length (vStations[i])); - FStationsDicts[0].TryGetValue(vHash, vData); + FDictionary.TryGetValue(vHash, 0, vData); vMean := RoundExInteger(vData^.Sum/vData^.Count/10); vStream.WriteString( @@ -569,7 +620,7 @@ procedure TBRCThread.Execute; Terminate; end; -constructor TBRCThread.Create(aProc: TThreadProc; aThreadNb: UInt16; aStart: Int64; aEnd: Int64); +constructor TBRCThread.Create(aProc: TThreadProc; aThreadNb: TThreadCount; aStart: Int64; aEnd: Int64); begin inherited Create(False); FProc := aProc;