Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 10 additions & 3 deletions entries/ikelaiah/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,11 @@ Iwan Kelaiah
* Revision release - Sequential approach. 3-5 mins on my Inspiron 15 7510 laptop, around 2m55s (no improvement on speed).
* Replaced `LGenerics` with `Generics.Collections` for the time being.

* 1.12
* Revision release - Sequential approach. 2-5 mins on my Inspiron 15 7510 laptop, around 2m40s (small improvement on speed).
* Called TStreamReader.ReadLn, twice in the while loop. This saves approx 5-10 seconds.
* Updated the Acknowledgments section.

## License

This project is licensed under the MIT License - see the LICENSE.md file for details
Expand All @@ -164,10 +169,12 @@ Inspiration, code snippets, libraries, etc.
- Borrowed and modified Gus' `WriteHelp` from the `baseline.lpr`.
3. A.Koverdyaev (avk)
- For the amazing (LGenerics)[https://github.com/avk959/LGenerics] library.
4. Székely Balázs.
4. Benito van der Zander (benibella)
- FOr providing the [Free Pascal Hashmaps Benchmark](https://www.benibela.de/fpc-map-benchmark_en.html).
5. Székely Balázs.
- Now I know what `Single` data type is!
- I borrowed the custom `TStringList` comparer from the `baseline` program.
5. Shraddha Agrawal - https://www.bytesizego.com/blog/one-billion-row-challenge-go.
6. Shraddha Agrawal - https://www.bytesizego.com/blog/one-billion-row-challenge-go.
- The advice for not storing measurements for each station in a data structure.
6. Arman Hajisafi - https://arman-hs.github.io
7. Arman Hajisafi - https://arman-hs.github.io
- Encouragements and inspirations.
158 changes: 141 additions & 17 deletions entries/ikelaiah/src/weatherstation.pas
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,12 @@ interface
uses
Classes
, SysUtils
, Math
, streamex
, bufstream
//, lgHashMap
, generics.Collections
, csvdocument
, csvdataset
{$IFDEF DEBUG}
, Stopwatch
{$ENDIF}
Expand All @@ -29,31 +30,36 @@ TStat = record
sum: int64;
cnt: int64;
public
function ToString: ShortString;
function ToString: shortstring;
end;
{Using pointer to TStat saves approx. 30-60 seconds for processing 1 billion rows}
PStat = ^TStat;

type
// Using this dictionary, now approx 4 mins faster than Generics.Collections.TDictionary
TWeatherDictionaryLG = specialize TFastHashMap<ShortString, PStat>;
// THashMap<shortstring, PStat> - takes around 120s.
// TFastHash<shortstring, PStat> - takes around 100s.
TWeatherDictionary = specialize TFastHashMap<shortstring, PStat>;

type
// a type for storing valid lookup temperature
TValidTemperatureDictionary = specialize TFastHashMap<ShortString, int64>;
TValidTemperatureDictionary = specialize TFastHashMap<shortstring, int64>;

type
// Create a class to encapsulate the temperature observations of each weather station.
TWeatherStation = class
private
fname: string;
weatherDictionary: TWeatherDictionaryLG;
weatherDictionary: TWeatherDictionary;
weatherStationList: TStringList;
lookupStrFloatToIntList: TValidTemperatureDictionary;
procedure CreateLookupTemp;
procedure ReadMeasurements;
procedure ParseStationAndTemp(const line: ShortString);
procedure AddCityTemperatureLG(const cityName: ShortString; const newTemp: int64);
procedure ReadMeasurementsBuffered;
procedure ReadMeasurementsV2;
procedure ReadMeasurementsV3;
procedure ParseStationAndTemp(const line: shortstring);
procedure AddCityTemperatureLG(const cityName: shortstring; const newTemp: int64);
procedure SortWeatherStationAndStats;
procedure PrintSortedWeatherStationAndStats;
public
Expand Down Expand Up @@ -92,7 +98,7 @@ function CustomTStringListComparer(AList: TStringList;
end;

// Remove dots from a string
function RemoveDots(const line: ShortString): ShortString;
function RemoveDots(const line: shortstring): shortstring;
var
index: integer;
begin
Expand All @@ -104,7 +110,7 @@ function RemoveDots(const line: ShortString): ShortString;
end;
end;

function TStat.ToString: ShortString;
function TStat.ToString: shortstring;
var
minR, meanR, maxR: double; // Store the rounded values prior saving to TStringList.
begin
Expand All @@ -124,15 +130,15 @@ constructor TWeatherStation.Create(const filename: string);
// Set expected capacity - saves 10 seconds.
self.lookupStrFloatToIntList.Capacity := 44691;
// Create a dictionary
weatherDictionary := TWeatherDictionaryLG.Create;
weatherDictionary := TWeatherDictionary.Create;
weatherDictionary.Capacity := 44691;
// Create a TStringList for sorting
weatherStationList := TStringList.Create;
end;

destructor TWeatherStation.Destroy;
var
stationName: ShortString;
stationName: shortstring;
begin

// Free the lookup dictionary
Expand Down Expand Up @@ -206,7 +212,7 @@ procedure TWeatherStation.PrintSortedWeatherStationAndStats;

procedure TWeatherStation.SortWeatherStationAndStats;
var
wsKey: ShortString;
wsKey: shortstring;
begin

{$IFDEF DEBUG}
Expand Down Expand Up @@ -235,7 +241,7 @@ procedure TWeatherStation.SortWeatherStationAndStats;
{$ENDIF DEBUG}
end;

procedure TWeatherStation.AddCityTemperatureLG(const cityName: ShortString;
procedure TWeatherStation.AddCityTemperatureLG(const cityName: shortstring;
const newTemp: int64);
var
stat: PStat;
Expand Down Expand Up @@ -288,12 +294,15 @@ procedure TWeatherStation.AddCityTemperatureLG(const cityName: ShortString;
end;
end;

procedure TWeatherStation.ParseStationAndTemp(const line: ShortString);
procedure TWeatherStation.ParseStationAndTemp(const line: shortstring);
var
delimiterPos: integer;
parsedStation, strFloatTemp: ShortString;
parsedTemp, valCode: int64;
strFloatTemp: shortstring;
parsedTemp: int64;
begin

if length(line) = 0 then Exit;

// Get position of the delimiter
delimiterPos := Pos(';', line);
if delimiterPos > 0 then
Expand All @@ -314,6 +323,7 @@ procedure TWeatherStation.ParseStationAndTemp(const line: ShortString);
end;
end;

{This approach turned out to be the faster method than the TCSVDocument method.}
procedure TWeatherStation.ReadMeasurements;
var
fileStream: TFileStream;
Expand All @@ -328,7 +338,117 @@ procedure TWeatherStation.ReadMeasurements;
// Read and parse chunks of data until EOF -------------------------------
while not streamReader.EOF do
begin
// line := streamReader.ReadLine;
//streamReader.ReadLine;
//streamReader.ReadLine;
self.ParseStationAndTemp(streamReader.ReadLine);
self.ParseStationAndTemp(streamReader.ReadLine);
end;// End of read and parse chunks of data ------------------------------
finally
streamReader.Free;
end;
finally
// Close the file
fileStream.Free;
end;
end;

{TCSVDocument Method. Easiest to use. About 2 times slower then the first method.}
procedure TWeatherStation.ReadMeasurementsV2;
var
fileStream: TFileStream;
buffStream: TReadBufStream;
csvReader: TCSVDocument;
index, totalLines, parsedTemp: int64;
begin
totalLines := 0;
fileStream := TFileStream.Create(self.fname, fmOpenRead);
try
buffStream := TReadBufStream.Create(fileStream, 65536);
try
csvReader := TCSVDocument.Create;
try
csvReader.Delimiter := ';';
csvReader.LoadFromStream(buffStream);

totalLines := csvReader.RowCount;

for index := 0 to totalLines - 1 do
begin
if self.lookupStrFloatToIntList.TryGetValue(csvReader.Cells[1, index],
parsedTemp) then
begin
self.AddCityTemperatureLG(csvReader.Cells[0, index], parsedTemp);
end;
end;

finally
csvReader.Free;
end;
finally
buffStream.Free;
end;
finally
end;
fileStream.Free;
end;

{This method is twice times slower than the first one.}
procedure TWeatherStation.ReadMeasurementsV3;
var
fileStream: TFileStream;
buffStream: TReadBufStream;
csvDataset: TCSVDataset;
parsedTemp: int64;
begin
fileStream := TFileStream.Create(self.fname, fmOpenRead);
try
buffStream := TReadBufStream.Create(fileStream);
try
csvDataset := TCSVDataset.Create(nil);
try
csvDataset.CSVOptions.Delimiter := ';';
csvDataset.CSVOptions.FirstLineAsFieldNames := False;
csvDataset.LoadFromCSVStream(buffStream);

// Move to first record
csvDataset.First;

while not csvDataset.EOF do
begin
// WriteLn('Field1 is ', csvDataset.Fields[0].AsString, ' and Field2 is ', csvDataset.Fields[1].AsString);
if self.lookupStrFloatToIntList.TryGetValue(csvDataset.Fields[1].AsString, parsedTemp) then
begin
self.AddCityTemperatureLG(csvDataset.Fields[0].AsString, parsedTemp);
end;
csvDataset.Next;
end;
finally
csvDataset.Free;
end;
finally
buffStream.Free;
end;
finally
end;
fileStream.Free;
end;

{This aproach is surprisingly 10 seconds slower than the first one.}
procedure TWeatherStation.ReadMeasurementsBuffered;
var
fileStream: TBufferedFileStream;
streamReader: TStreamReader;
begin

// Open the file for reading
fileStream := TBufferedFileStream.Create(self.fname, fmOpenRead);
try
streamReader := TStreamReader.Create(fileStream);
try
// Read and parse chunks of data until EOF -------------------------------
while not streamReader.EOF do
begin
self.ParseStationAndTemp(streamReader.ReadLine);
self.ParseStationAndTemp(streamReader.ReadLine);
end;// End of read and parse chunks of data ------------------------------
finally
Expand All @@ -340,11 +460,15 @@ procedure TWeatherStation.ReadMeasurements;
end;
end;


// The main algorithm
procedure TWeatherStation.ProcessMeasurements;
begin
self.CreateLookupTemp;
self.ReadMeasurements;
//self.ReadMeasurementsBuffered;
//self.ReadMeasurementsV2;
//self.ReadMeasurementsV3;
self.SortWeatherStationAndStats;
self.PrintSortedWeatherStationAndStats;
end;
Expand Down