From a9ae7381b562a03ff2823e1d9980731847a5d1dd Mon Sep 17 00:00:00 2001 From: Iwan Kelaiah Date: Fri, 15 Mar 2024 11:46:27 +1100 Subject: [PATCH 1/2] New - Initial commit. --- entries/ikelaiah/LICENSE.md | 9 + entries/ikelaiah/README.md | 82 +++++++++ entries/ikelaiah/src/OneBRC.lpi | 78 ++++++++ entries/ikelaiah/src/OneBRC.lpr | 137 ++++++++++++++ entries/ikelaiah/src/stopwatch.pas | 64 +++++++ entries/ikelaiah/src/weatherstation.pas | 226 ++++++++++++++++++++++++ 6 files changed, 596 insertions(+) create mode 100644 entries/ikelaiah/LICENSE.md create mode 100644 entries/ikelaiah/README.md create mode 100644 entries/ikelaiah/src/OneBRC.lpi create mode 100644 entries/ikelaiah/src/OneBRC.lpr create mode 100644 entries/ikelaiah/src/stopwatch.pas create mode 100644 entries/ikelaiah/src/weatherstation.pas diff --git a/entries/ikelaiah/LICENSE.md b/entries/ikelaiah/LICENSE.md new file mode 100644 index 0000000..003c259 --- /dev/null +++ b/entries/ikelaiah/LICENSE.md @@ -0,0 +1,9 @@ +MIT License + +Copyright (c) 2024 ikelaiah + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. \ No newline at end of file diff --git a/entries/ikelaiah/README.md b/entries/ikelaiah/README.md new file mode 100644 index 0000000..850dc24 --- /dev/null +++ b/entries/ikelaiah/README.md @@ -0,0 +1,82 @@ +# OneBRC for Object Pascal + +An Entry to the One Billion Row Challenge in Object Pascal. + +## Getting Started + +### Dependencies + +* None. Only latest Free Pascal Compiler and Lazarus. You can get these easily by using [`https://github.com/LongDirtyAnimAlf/fpcupdeluxe/releases`](https://github.com/LongDirtyAnimAlf/fpcupdeluxe/releases). + +### Compiling + +* Open `OneBRC.lpi` using Lazarus. +* Hit `Ctrl + F9` to compile. + +### Running the executable + +```bash +$ ./OneBRC.exe -i your_measurements.txt +``` + +To time the execution, do the following. + +```bash +$ time ./OneBRC.exe -i your_measurements.txt +``` + +## Help + +To see flags, use `-h`. + +``` +$ ./OneBRC.exe -h +OneBRC -- An entry to the One Billion Row Challenge for Object Pascal + +Usage: OneBRC [-h] [-v] [-i input_file] + + -h | --help Show this help screen + -v | --version Show the version number + -i | --input-file Input text file to process. + Each row is one temperature measurement in the format ; +``` + +Use `-v` to check version. + +```bash +$ ./OneBRC.exe -v +OneBRC version 1.0 +``` + +## Authors + +Iwan Kelaiah +[ikelaiah](https://github.com/ikelaiah) + +## Version History + +* 1.0 + * Initial Release - Sequential approach. + +## License + +This project is licensed under the MIT License - see the LICENSE.md file for details + +## Acknowledgments + +Inspiration, code snippets, etc. + + 1. The FPC team, Lazarus team, fpcupdeluxe team, and other contributors. + - For providing a usable programming language and a usable ecosystem. + 2. Gustavo 'Gus' Carreno. + - For making this happen. + - Borrowed Gus' approach to use `TCustomApplication` and using `unit`s properly + to make main code more readable. + - Borrowed and modified Gus' `WriteHelp` from the `baseline.lpr`. + 3. Székely Balázs. + - Now I know what `Single` data type is! + - I borrowed the custom `TStringList` comparer from the `baseline` program. + 4. Shraddha Agrawal - https://www.bytesizego.com/blog/one-billion-row-challenge-go. + - The advice for not storing measurements for each station in a data structure. + 5. Arman Hajisafi - https://arman-hs.github.io + - Encouragements and inspirations. \ No newline at end of file diff --git a/entries/ikelaiah/src/OneBRC.lpi b/entries/ikelaiah/src/OneBRC.lpi new file mode 100644 index 0000000..777b3a0 --- /dev/null +++ b/entries/ikelaiah/src/OneBRC.lpi @@ -0,0 +1,78 @@ + + + + + + + + + + + + + + <UseAppBundle Value="False"/> + <ResourceType Value="res"/> + </General> + <BuildModes> + <Item Name="Default" Default="True"/> + </BuildModes> + <PublishOptions> + <Version Value="2"/> + <UseFileFilters Value="True"/> + </PublishOptions> + <RunParams> + <FormatVersion Value="2"/> + </RunParams> + <Units> + <Unit> + <Filename Value="OneBRC.lpr"/> + <IsPartOfProject Value="True"/> + </Unit> + <Unit> + <Filename Value="stopwatch.pas"/> + <IsPartOfProject Value="True"/> + <UnitName Value="Stopwatch"/> + </Unit> + <Unit> + <Filename Value="weatherstation.pas"/> + <IsPartOfProject Value="True"/> + <UnitName Value="WeatherStation"/> + </Unit> + </Units> + </ProjectOptions> + <CompilerOptions> + <Version Value="11"/> + <PathDelim Value="\"/> + <Target> + <Filename Value="OneBRC"/> + </Target> + <SearchPaths> + <IncludeFiles Value="$(ProjOutDir)"/> + <UnitOutputDirectory Value="lib\$(TargetCPU)-$(TargetOS)"/> + </SearchPaths> + <CodeGeneration> + <SmartLinkUnit Value="True"/> + <RelocatableUnit Value="True"/> + <Optimizations> + <OptimizationLevel Value="2"/> + </Optimizations> + </CodeGeneration> + <Linking> + <LinkSmart Value="True"/> + </Linking> + </CompilerOptions> + <Debugging> + <Exceptions> + <Item> + <Name Value="EAbort"/> + </Item> + <Item> + <Name Value="ECodetoolError"/> + </Item> + <Item> + <Name Value="EFOpenError"/> + </Item> + </Exceptions> + </Debugging> +</CONFIG> diff --git a/entries/ikelaiah/src/OneBRC.lpr b/entries/ikelaiah/src/OneBRC.lpr new file mode 100644 index 0000000..b8a613d --- /dev/null +++ b/entries/ikelaiah/src/OneBRC.lpr @@ -0,0 +1,137 @@ +program OneBRC; + +{ + ==Credits== + + 1. The FPC team, Lazarus team, fpcupdeluxe team, and other contributors. + - For providing a usable programming language and a usable ecosystem. + 2. Gustavo 'Gus' Carreno. + - For making this happen. + - Borrowed Gus' approach to use `TCustomApplication` and using `unit`s properly + to make main code more readable. + - Borrowed and modified Gus' `WriteHelp` from the `baseline.lpr`. + 3. Székely Balázs. + - Now I know what `Single` data type is! + - I borrowed the custom `TStringList` comparer from the `baseline` program. + 4. Shraddha Agrawal - https://www.bytesizego.com/blog/one-billion-row-challenge-go. + - The advice for not storing measurements for each station in a data structure. + 5. Arman Hajisafi - https://arman-hs.github.io + - Encouragements and inspirations. + } + +{$mode objfpc}{$H+}{$J-}{$modeSwitch advancedRecords} +{$codepage utf8} +//{$DEFINE DEBUG} + +uses + {$IFDEF UNIX} + cthreads, + {$ENDIF} + Classes, + SysUtils, + CustApp, + WeatherStation; + +const + version = '1.0'; + +type + + { TOneBRC } + + TOneBRC = class(TCustomApplication) + protected + procedure DoRun; override; + public + constructor Create(TheOwner: TComponent); override; + destructor Destroy; override; + procedure WriteHelp; virtual; + end; + + { TOneBRC } + + procedure TOneBRC.DoRun; + var + ErrorMsg: string; + filename: string = ''; + begin + // quick check parameters + ErrorMsg := CheckOptions('hvi:', ['help', 'version', 'input:']); + if ErrorMsg <> '' then + begin + // Commented out the default ShowException as the generated text is not user friendly. + // ShowException(Exception.Create(ErrorMsg)); + WriteLn('Error: ', ErrorMsg); + WriteHelp; + Terminate; + Exit; + end; + + // Parse h + if HasOption('h', 'help') then + begin + WriteHelp; + Terminate; + Exit; + end; + + // Parse v + if HasOption('v', 'version') then + begin + WriteLn('OneBRC version ', version); + Terminate; + Exit; + end; + + // Parse i + if HasOption('i', 'input') then + begin + filename := GetOptionValue('i', 'input'); + end; + + if (length(filename) < 4) then + begin + WriteLn('Input file seems invalid.'); + WriteHelp; + Terminate; + Exit; + end; + + // Start the main algorithm + WeatherStation.ProcessTempMeasurements(filename); + + // stop program loop + Terminate; + end; + + constructor TOneBRC.Create(TheOwner: TComponent); + begin + inherited Create(TheOwner); + StopOnException := True; + end; + + destructor TOneBRC.Destroy; + begin + inherited Destroy; + end; + + procedure TOneBRC.WriteHelp; + begin + WriteLn('OneBRC -- An entry to the One Billion Row Challenge for Object Pascal'); + WriteLn; + WriteLn('Usage: OneBRC [-h] [-v] [-i input_file]'); + WriteLn; + WriteLn(' -h | --help Show this help screen'); + WriteLn(' -v | --version Show the version number'); + WriteLn(' -i | --input-file <filename> Input text file to process.'); + WriteLn(' Each row is one temperature measurement in the format <string: station name>;<double: measurement>'); + end; + +var + Application: TOneBRC; +begin + Application := TOneBRC.Create(nil); + Application.Title := 'OneBRC'; + Application.Run; + Application.Free; +end. diff --git a/entries/ikelaiah/src/stopwatch.pas b/entries/ikelaiah/src/stopwatch.pas new file mode 100644 index 0000000..5489abe --- /dev/null +++ b/entries/ikelaiah/src/stopwatch.pas @@ -0,0 +1,64 @@ +unit Stopwatch; + +{$mode objfpc}{$H+}{$J-}{$modeSwitch advancedRecords} + +interface + +uses + Classes, SysUtils; + +procedure StartTimer; +procedure StopTimer; +procedure ResetTimer; +procedure DisplayTimer; + +implementation + +var + startTime: QWord = 0; + endTime: QWord = 0; + elapsedMilliseconds: QWord = 0; + hours, minutes, seconds, milliseconds: word; + +procedure StartTimer; +begin + startTime := GetTickCount64; +end; + +procedure StopTimer; +begin + endTime := GetTickCount64; +end; + +procedure ResetTimer; +begin + startTime := 0; + endTime := 0; + elapsedMilliseconds := 0; +end; + +procedure DisplayTimer; +begin + + // Elapsed milliseconds + elapsedMilliseconds := endTime - startTime; + + // Convert milliseconds to hours, minutes, seconds, and milliseconds + hours := elapsedMilliseconds div 3600000; + elapsedMilliseconds := elapsedMilliseconds mod 3600000; + + minutes := elapsedMilliseconds div 60000; + elapsedMilliseconds := elapsedMilliseconds mod 60000; + + seconds := elapsedMilliseconds div 1000; + milliseconds := elapsedMilliseconds mod 1000; + + WriteLn; + WriteLn('------------------------------'); + WriteLn('Elapsed time: ', hours, ' hours ', minutes, ' minutes ', + seconds, ' seconds ', milliseconds, ' milliseconds'); + //WriteLn('Elapsed time: ', (endTime - startTime), ' ms'); +end; + + +end. diff --git a/entries/ikelaiah/src/weatherstation.pas b/entries/ikelaiah/src/weatherstation.pas new file mode 100644 index 0000000..18b2b2f --- /dev/null +++ b/entries/ikelaiah/src/weatherstation.pas @@ -0,0 +1,226 @@ +unit WeatherStation; + +{$mode objfpc}{$H+}{$J-}{$modeSwitch advancedRecords} + +interface + +uses + {$IFDEF UNIX} + cthreads, + {$ENDIF} + Classes, + SysUtils, + Generics.Collections, + Stopwatch; + +type + // Create a record of temperature stats + TStat = record + var + min: single; + max: single; + sum: single; + count: word; + public + constructor Create(newMin: single; + newMax: single; + newSum: single; + newCount: word); + function ToString: string; + end; + +type + // Create a dictionary + TWeatherDictionary = specialize TDictionary<string, TStat>; + + +{// A helper function to add a city temperature into the TWeatherDictionary +procedure AddCityTemperature(cityName: string; + newTemp: single; + var weatherDictionary: TWeatherDictionary); +} + +// The main algorithm to process the temp measurements from various weather station +procedure ProcessTempMeasurements(filename: string); + + +implementation + +constructor TStat.Create(newMin: single; + newMax: single; + newSum: single; + newCount: word); +begin + self.min := newMin; + self.max := newMax; + self.sum := newSum; + self.count := newCount; +end; + +function TStat.ToString: string; +begin + {$IFDEF DEBUG} + Result := Format('Min: %.1f; Mean: %.1f; Maxp: %.1f; Sum: %.1f; Count %d', + [self.min, (self.sum / self.Count), self.max, + self.sum, self.Count]); + {$ENDIF DEBUG} + // Result := Format('%.1f/%.1f/%.1f', [self.min, (self.sum / self.count), self.max]); + Result := FormatFloat('0.0', self.min) + '/' + FormatFloat('0.0', (self.sum/self.count)) + '/' + FormatFloat('0.0', self.max) + +end; + +{ + A custom comparer for TStringList. + + The following procedure Written by Székely Balázs for the 1BRC for Object Pascal. + URL: https://github.com/gcarreno/1brc-ObjectPascal/tree/main +} +function CustomTStringListComparer(AList: TStringList; AIndex1, AIndex2: Integer): Integer; +var + Pos1, Pos2: Integer; + Str1, Str2: String; +begin + Result := 0; + Str1 := AList.Strings[AIndex1]; + Str2 := AList.Strings[AIndex2]; + Pos1 := Pos('=', Str1); + Pos2 := Pos('=', Str2); + if (Pos1 > 0) and (Pos2 > 0) then + begin + Str1 := Copy(Str1, 1, Pos1 - 1); + Str2 := Copy(Str2, 1, Pos2 - 1); + Result := CompareStr(Str1, Str2); + end; +end; + +procedure AddCityTemperature(cityName: string; + newTemp: single; + var weatherDictionary: TWeatherDictionary); +var + stat: TStat; +begin + // If city name exists, modify temp as needed + if weatherDictionary.ContainsKey(cityName) then + begin + + {$IFDEF DEBUG} + WriteLn('City found: ', cityName); + {$ENDIF DEBUG} + + // Get the temp record + stat := weatherDictionary[cityName]; + + // If the temp lower then min, set the new min. + if newTemp < stat.min then stat.min := newTemp; + + // If the temp higher than max, set the new max. + if newTemp > stat.max then stat.max := newTemp; + + // Add count for this city. + stat.sum := stat.sum + newTemp; + + // Increase the counter + stat.Count := stat.Count + 1; + + // Update the stat of this city + weatherDictionary.AddOrSetValue(cityName, stat); + end; + + // If city name doesn't exist add a new entry + if not weatherDictionary.ContainsKey(cityName) then + begin + weatherDictionary.Add(cityName, TStat.Create(newTemp, newTemp, newTemp, 1)); + {$IFDEF DEBUG} + WriteLn('Added: ', cityName); + {$ENDIF DEBUG} + end; +end; + +procedure ProcessTempMeasurements(filename: string); +var + wd: TWeatherDictionary; + line, ws: string; + lineSeparated: array of string; + weatherStationList: TStringList; + textFile: System.TextFile; + isFirstKey: boolean = True; +begin + + // Start a timer + // Stopwatch.StartTimer; + + // Create a city - weather dictionary + wd := TWeatherDictionary.Create; + weatherStationList := TStringList.Create; + try + + // Read text file ////////////////////////////////////////////////////////// + AssignFile(textFile, filename); + + // Perform the read operation in a try..except block to handle errors gracefully + try + // Open the file for reading + Reset(textFile); + + // Keep reading lines until the end of the file is reached + while not EOF(textFile) do + begin + // Read a line + ReadLn(textFile, line); + // If the line start with #, then continue/skip. + if (line[1] = '#') then continue; + + // Else, add an entry into the dictionary. + lineSeparated := line.Split([';']); + AddCityTemperature(lineSeparated[0], StrToFloat(lineSeparated[1]), wd); + + end; // end while loop reading line at a time + + // Close the file + CloseFile(textFile); + + except + on E: Exception do + WriteLn('File handling error occurred. Details: ', E.Message); + end; // End of file reading //////////////////////////////////////////////// + + // Format and sort weather station by name and temp stat /////////////////// + for ws in wd.Keys do + begin + weatherStationList.Add(ws + '=' + wd[ws].ToString); + end; + weatherStationList.CustomSort(@CustomTStringListComparer); + + // Print TStringList - sorted by weather station and temp stat ///////////// + Write('{'); + for ws in weatherStationList do + begin + // If it's not the first key, print a comma + if not isFirstKey then + Write(', '); + + // Print the weather station and the temp stat + Write(ws); + + // Set isFirstKey to False after printing the first key + isFirstKey := False; + end; + + Write('}'); + + {$IFDEF DEBUG} + WriteLn('DEBUG mode on'); + {$ENDIF DEBUG} + + finally + weatherStationList.Free; + wd.Free; + end; // End of processing TDictionary and TStringList + + // Stop a timer + // Stopwatch.StopTimer; + // Stopwatch.DisplayTimer; + +end; + +end. From 6374940a389e771d06e07af45b016f972dd12ca7 Mon Sep 17 00:00:00 2001 From: Iwan Kelaiah <iwan.kelaiah@gmail.com> Date: Fri, 15 Mar 2024 18:03:15 +1100 Subject: [PATCH 2/2] Update - Minor updates as per organiser's request. --- entries/ikelaiah/LICENSE.md | 9 --------- entries/ikelaiah/README.md | 9 ++++++++- 2 files changed, 8 insertions(+), 10 deletions(-) delete mode 100644 entries/ikelaiah/LICENSE.md diff --git a/entries/ikelaiah/LICENSE.md b/entries/ikelaiah/LICENSE.md deleted file mode 100644 index 003c259..0000000 --- a/entries/ikelaiah/LICENSE.md +++ /dev/null @@ -1,9 +0,0 @@ -MIT License - -Copyright (c) 2024 ikelaiah - -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. \ No newline at end of file diff --git a/entries/ikelaiah/README.md b/entries/ikelaiah/README.md index 850dc24..b6a319e 100644 --- a/entries/ikelaiah/README.md +++ b/entries/ikelaiah/README.md @@ -1,7 +1,14 @@ -# OneBRC for Object Pascal +# Iwan Kelaiah An Entry to the One Billion Row Challenge in Object Pascal. +The approach I implemented here is simplistic. + +- Sequentially read the measurement file. +- Populate a `TDictionary` with station names, min, max, count and sum; without storing all the temperature measurements. +- Use a custom comparer to sort the station and temperature statistics in a `TStringList`. +- Display the sorted measurements using a simple for loop. + ## Getting Started ### Dependencies