diff --git a/entries/ikelaiah/README.md b/entries/ikelaiah/README.md new file mode 100644 index 0000000..b6a319e --- /dev/null +++ b/entries/ikelaiah/README.md @@ -0,0 +1,89 @@ +# Iwan Kelaiah + +An Entry to the One Billion Row Challenge in Object Pascal. + +The approach I implemented here is simplistic. + +- Sequentially read the measurement file. +- Populate a `TDictionary` with station names, min, max, count and sum; without storing all the temperature measurements. +- Use a custom comparer to sort the station and temperature statistics in a `TStringList`. +- Display the sorted measurements using a simple for loop. + +## Getting Started + +### Dependencies + +* None. Only latest Free Pascal Compiler and Lazarus. You can get these easily by using [`https://github.com/LongDirtyAnimAlf/fpcupdeluxe/releases`](https://github.com/LongDirtyAnimAlf/fpcupdeluxe/releases). + +### Compiling + +* Open `OneBRC.lpi` using Lazarus. +* Hit `Ctrl + F9` to compile. + +### Running the executable + +```bash +$ ./OneBRC.exe -i your_measurements.txt +``` + +To time the execution, do the following. + +```bash +$ time ./OneBRC.exe -i your_measurements.txt +``` + +## Help + +To see flags, use `-h`. + +``` +$ ./OneBRC.exe -h +OneBRC -- An entry to the One Billion Row Challenge for Object Pascal + +Usage: OneBRC [-h] [-v] [-i input_file] + + -h | --help Show this help screen + -v | --version Show the version number + -i | --input-file Input text file to process. + Each row is one temperature measurement in the format ; +``` + +Use `-v` to check version. + +```bash +$ ./OneBRC.exe -v +OneBRC version 1.0 +``` + +## Authors + +Iwan Kelaiah +[ikelaiah](https://github.com/ikelaiah) + +## Version History + +* 1.0 + * Initial Release - Sequential approach. + +## License + +This project is licensed under the MIT License - see the LICENSE.md file for details + +## Acknowledgments + +Inspiration, code snippets, etc. + + 1. The FPC team, Lazarus team, fpcupdeluxe team, and other contributors. + - For providing a usable programming language and a usable ecosystem. + 2. Gustavo 'Gus' Carreno. + - For making this happen. + - Borrowed Gus' approach to use `TCustomApplication` and using `unit`s properly + to make main code more readable. + - Borrowed and modified Gus' `WriteHelp` from the `baseline.lpr`. + 3. Székely Balázs. + - Now I know what `Single` data type is! + - I borrowed the custom `TStringList` comparer from the `baseline` program. + 4. Shraddha Agrawal - https://www.bytesizego.com/blog/one-billion-row-challenge-go. + - The advice for not storing measurements for each station in a data structure. + 5. Arman Hajisafi - https://arman-hs.github.io + - Encouragements and inspirations. \ No newline at end of file diff --git a/entries/ikelaiah/src/OneBRC.lpi b/entries/ikelaiah/src/OneBRC.lpi new file mode 100644 index 0000000..777b3a0 --- /dev/null +++ b/entries/ikelaiah/src/OneBRC.lpi @@ -0,0 +1,78 @@ + + + + + + + + + + + + + + <UseAppBundle Value="False"/> + <ResourceType Value="res"/> + </General> + <BuildModes> + <Item Name="Default" Default="True"/> + </BuildModes> + <PublishOptions> + <Version Value="2"/> + <UseFileFilters Value="True"/> + </PublishOptions> + <RunParams> + <FormatVersion Value="2"/> + </RunParams> + <Units> + <Unit> + <Filename Value="OneBRC.lpr"/> + <IsPartOfProject Value="True"/> + </Unit> + <Unit> + <Filename Value="stopwatch.pas"/> + <IsPartOfProject Value="True"/> + <UnitName Value="Stopwatch"/> + </Unit> + <Unit> + <Filename Value="weatherstation.pas"/> + <IsPartOfProject Value="True"/> + <UnitName Value="WeatherStation"/> + </Unit> + </Units> + </ProjectOptions> + <CompilerOptions> + <Version Value="11"/> + <PathDelim Value="\"/> + <Target> + <Filename Value="OneBRC"/> + </Target> + <SearchPaths> + <IncludeFiles Value="$(ProjOutDir)"/> + <UnitOutputDirectory Value="lib\$(TargetCPU)-$(TargetOS)"/> + </SearchPaths> + <CodeGeneration> + <SmartLinkUnit Value="True"/> + <RelocatableUnit Value="True"/> + <Optimizations> + <OptimizationLevel Value="2"/> + </Optimizations> + </CodeGeneration> + <Linking> + <LinkSmart Value="True"/> + </Linking> + </CompilerOptions> + <Debugging> + <Exceptions> + <Item> + <Name Value="EAbort"/> + </Item> + <Item> + <Name Value="ECodetoolError"/> + </Item> + <Item> + <Name Value="EFOpenError"/> + </Item> + </Exceptions> + </Debugging> +</CONFIG> diff --git a/entries/ikelaiah/src/OneBRC.lpr b/entries/ikelaiah/src/OneBRC.lpr new file mode 100644 index 0000000..b8a613d --- /dev/null +++ b/entries/ikelaiah/src/OneBRC.lpr @@ -0,0 +1,137 @@ +program OneBRC; + +{ + ==Credits== + + 1. The FPC team, Lazarus team, fpcupdeluxe team, and other contributors. + - For providing a usable programming language and a usable ecosystem. + 2. Gustavo 'Gus' Carreno. + - For making this happen. + - Borrowed Gus' approach to use `TCustomApplication` and using `unit`s properly + to make main code more readable. + - Borrowed and modified Gus' `WriteHelp` from the `baseline.lpr`. + 3. Székely Balázs. + - Now I know what `Single` data type is! + - I borrowed the custom `TStringList` comparer from the `baseline` program. + 4. Shraddha Agrawal - https://www.bytesizego.com/blog/one-billion-row-challenge-go. + - The advice for not storing measurements for each station in a data structure. + 5. Arman Hajisafi - https://arman-hs.github.io + - Encouragements and inspirations. + } + +{$mode objfpc}{$H+}{$J-}{$modeSwitch advancedRecords} +{$codepage utf8} +//{$DEFINE DEBUG} + +uses + {$IFDEF UNIX} + cthreads, + {$ENDIF} + Classes, + SysUtils, + CustApp, + WeatherStation; + +const + version = '1.0'; + +type + + { TOneBRC } + + TOneBRC = class(TCustomApplication) + protected + procedure DoRun; override; + public + constructor Create(TheOwner: TComponent); override; + destructor Destroy; override; + procedure WriteHelp; virtual; + end; + + { TOneBRC } + + procedure TOneBRC.DoRun; + var + ErrorMsg: string; + filename: string = ''; + begin + // quick check parameters + ErrorMsg := CheckOptions('hvi:', ['help', 'version', 'input:']); + if ErrorMsg <> '' then + begin + // Commented out the default ShowException as the generated text is not user friendly. + // ShowException(Exception.Create(ErrorMsg)); + WriteLn('Error: ', ErrorMsg); + WriteHelp; + Terminate; + Exit; + end; + + // Parse h + if HasOption('h', 'help') then + begin + WriteHelp; + Terminate; + Exit; + end; + + // Parse v + if HasOption('v', 'version') then + begin + WriteLn('OneBRC version ', version); + Terminate; + Exit; + end; + + // Parse i + if HasOption('i', 'input') then + begin + filename := GetOptionValue('i', 'input'); + end; + + if (length(filename) < 4) then + begin + WriteLn('Input file seems invalid.'); + WriteHelp; + Terminate; + Exit; + end; + + // Start the main algorithm + WeatherStation.ProcessTempMeasurements(filename); + + // stop program loop + Terminate; + end; + + constructor TOneBRC.Create(TheOwner: TComponent); + begin + inherited Create(TheOwner); + StopOnException := True; + end; + + destructor TOneBRC.Destroy; + begin + inherited Destroy; + end; + + procedure TOneBRC.WriteHelp; + begin + WriteLn('OneBRC -- An entry to the One Billion Row Challenge for Object Pascal'); + WriteLn; + WriteLn('Usage: OneBRC [-h] [-v] [-i input_file]'); + WriteLn; + WriteLn(' -h | --help Show this help screen'); + WriteLn(' -v | --version Show the version number'); + WriteLn(' -i | --input-file <filename> Input text file to process.'); + WriteLn(' Each row is one temperature measurement in the format <string: station name>;<double: measurement>'); + end; + +var + Application: TOneBRC; +begin + Application := TOneBRC.Create(nil); + Application.Title := 'OneBRC'; + Application.Run; + Application.Free; +end. diff --git a/entries/ikelaiah/src/stopwatch.pas b/entries/ikelaiah/src/stopwatch.pas new file mode 100644 index 0000000..5489abe --- /dev/null +++ b/entries/ikelaiah/src/stopwatch.pas @@ -0,0 +1,64 @@ +unit Stopwatch; + +{$mode objfpc}{$H+}{$J-}{$modeSwitch advancedRecords} + +interface + +uses + Classes, SysUtils; + +procedure StartTimer; +procedure StopTimer; +procedure ResetTimer; +procedure DisplayTimer; + +implementation + +var + startTime: QWord = 0; + endTime: QWord = 0; + elapsedMilliseconds: QWord = 0; + hours, minutes, seconds, milliseconds: word; + +procedure StartTimer; +begin + startTime := GetTickCount64; +end; + +procedure StopTimer; +begin + endTime := GetTickCount64; +end; + +procedure ResetTimer; +begin + startTime := 0; + endTime := 0; + elapsedMilliseconds := 0; +end; + +procedure DisplayTimer; +begin + + // Elapsed milliseconds + elapsedMilliseconds := endTime - startTime; + + // Convert milliseconds to hours, minutes, seconds, and milliseconds + hours := elapsedMilliseconds div 3600000; + elapsedMilliseconds := elapsedMilliseconds mod 3600000; + + minutes := elapsedMilliseconds div 60000; + elapsedMilliseconds := elapsedMilliseconds mod 60000; + + seconds := elapsedMilliseconds div 1000; + milliseconds := elapsedMilliseconds mod 1000; + + WriteLn; + WriteLn('------------------------------'); + WriteLn('Elapsed time: ', hours, ' hours ', minutes, ' minutes ', + seconds, ' seconds ', milliseconds, ' milliseconds'); + //WriteLn('Elapsed time: ', (endTime - startTime), ' ms'); +end; + + +end. diff --git a/entries/ikelaiah/src/weatherstation.pas b/entries/ikelaiah/src/weatherstation.pas new file mode 100644 index 0000000..18b2b2f --- /dev/null +++ b/entries/ikelaiah/src/weatherstation.pas @@ -0,0 +1,226 @@ +unit WeatherStation; + +{$mode objfpc}{$H+}{$J-}{$modeSwitch advancedRecords} + +interface + +uses + {$IFDEF UNIX} + cthreads, + {$ENDIF} + Classes, + SysUtils, + Generics.Collections, + Stopwatch; + +type + // Create a record of temperature stats + TStat = record + var + min: single; + max: single; + sum: single; + count: word; + public + constructor Create(newMin: single; + newMax: single; + newSum: single; + newCount: word); + function ToString: string; + end; + +type + // Create a dictionary + TWeatherDictionary = specialize TDictionary<string, TStat>; + + +{// A helper function to add a city temperature into the TWeatherDictionary +procedure AddCityTemperature(cityName: string; + newTemp: single; + var weatherDictionary: TWeatherDictionary); +} + +// The main algorithm to process the temp measurements from various weather station +procedure ProcessTempMeasurements(filename: string); + + +implementation + +constructor TStat.Create(newMin: single; + newMax: single; + newSum: single; + newCount: word); +begin + self.min := newMin; + self.max := newMax; + self.sum := newSum; + self.count := newCount; +end; + +function TStat.ToString: string; +begin + {$IFDEF DEBUG} + Result := Format('Min: %.1f; Mean: %.1f; Maxp: %.1f; Sum: %.1f; Count %d', + [self.min, (self.sum / self.Count), self.max, + self.sum, self.Count]); + {$ENDIF DEBUG} + // Result := Format('%.1f/%.1f/%.1f', [self.min, (self.sum / self.count), self.max]); + Result := FormatFloat('0.0', self.min) + '/' + FormatFloat('0.0', (self.sum/self.count)) + '/' + FormatFloat('0.0', self.max) + +end; + +{ + A custom comparer for TStringList. + + The following procedure Written by Székely Balázs for the 1BRC for Object Pascal. + URL: https://github.com/gcarreno/1brc-ObjectPascal/tree/main +} +function CustomTStringListComparer(AList: TStringList; AIndex1, AIndex2: Integer): Integer; +var + Pos1, Pos2: Integer; + Str1, Str2: String; +begin + Result := 0; + Str1 := AList.Strings[AIndex1]; + Str2 := AList.Strings[AIndex2]; + Pos1 := Pos('=', Str1); + Pos2 := Pos('=', Str2); + if (Pos1 > 0) and (Pos2 > 0) then + begin + Str1 := Copy(Str1, 1, Pos1 - 1); + Str2 := Copy(Str2, 1, Pos2 - 1); + Result := CompareStr(Str1, Str2); + end; +end; + +procedure AddCityTemperature(cityName: string; + newTemp: single; + var weatherDictionary: TWeatherDictionary); +var + stat: TStat; +begin + // If city name exists, modify temp as needed + if weatherDictionary.ContainsKey(cityName) then + begin + + {$IFDEF DEBUG} + WriteLn('City found: ', cityName); + {$ENDIF DEBUG} + + // Get the temp record + stat := weatherDictionary[cityName]; + + // If the temp lower then min, set the new min. + if newTemp < stat.min then stat.min := newTemp; + + // If the temp higher than max, set the new max. + if newTemp > stat.max then stat.max := newTemp; + + // Add count for this city. + stat.sum := stat.sum + newTemp; + + // Increase the counter + stat.Count := stat.Count + 1; + + // Update the stat of this city + weatherDictionary.AddOrSetValue(cityName, stat); + end; + + // If city name doesn't exist add a new entry + if not weatherDictionary.ContainsKey(cityName) then + begin + weatherDictionary.Add(cityName, TStat.Create(newTemp, newTemp, newTemp, 1)); + {$IFDEF DEBUG} + WriteLn('Added: ', cityName); + {$ENDIF DEBUG} + end; +end; + +procedure ProcessTempMeasurements(filename: string); +var + wd: TWeatherDictionary; + line, ws: string; + lineSeparated: array of string; + weatherStationList: TStringList; + textFile: System.TextFile; + isFirstKey: boolean = True; +begin + + // Start a timer + // Stopwatch.StartTimer; + + // Create a city - weather dictionary + wd := TWeatherDictionary.Create; + weatherStationList := TStringList.Create; + try + + // Read text file ////////////////////////////////////////////////////////// + AssignFile(textFile, filename); + + // Perform the read operation in a try..except block to handle errors gracefully + try + // Open the file for reading + Reset(textFile); + + // Keep reading lines until the end of the file is reached + while not EOF(textFile) do + begin + // Read a line + ReadLn(textFile, line); + // If the line start with #, then continue/skip. + if (line[1] = '#') then continue; + + // Else, add an entry into the dictionary. + lineSeparated := line.Split([';']); + AddCityTemperature(lineSeparated[0], StrToFloat(lineSeparated[1]), wd); + + end; // end while loop reading line at a time + + // Close the file + CloseFile(textFile); + + except + on E: Exception do + WriteLn('File handling error occurred. Details: ', E.Message); + end; // End of file reading //////////////////////////////////////////////// + + // Format and sort weather station by name and temp stat /////////////////// + for ws in wd.Keys do + begin + weatherStationList.Add(ws + '=' + wd[ws].ToString); + end; + weatherStationList.CustomSort(@CustomTStringListComparer); + + // Print TStringList - sorted by weather station and temp stat ///////////// + Write('{'); + for ws in weatherStationList do + begin + // If it's not the first key, print a comma + if not isFirstKey then + Write(', '); + + // Print the weather station and the temp stat + Write(ws); + + // Set isFirstKey to False after printing the first key + isFirstKey := False; + end; + + Write('}'); + + {$IFDEF DEBUG} + WriteLn('DEBUG mode on'); + {$ENDIF DEBUG} + + finally + weatherStationList.Free; + wd.Free; + end; // End of processing TDictionary and TStringList + + // Stop a timer + // Stopwatch.StopTimer; + // Stopwatch.DisplayTimer; + +end; + +end.