From e8e9421ef79cbb87667d88135b47c03f0b2a2724 Mon Sep 17 00:00:00 2001 From: Iwan Kelaiah Date: Thu, 11 Apr 2024 06:44:26 +1000 Subject: [PATCH 1/5] Update - Clean up code --- entries/ikelaiah/src/weatherstation.pas | 58 +++---------------------- 1 file changed, 7 insertions(+), 51 deletions(-) diff --git a/entries/ikelaiah/src/weatherstation.pas b/entries/ikelaiah/src/weatherstation.pas index 374131b..d34276c 100644 --- a/entries/ikelaiah/src/weatherstation.pas +++ b/entries/ikelaiah/src/weatherstation.pas @@ -29,8 +29,6 @@ TStat = record sum: int64; cnt: int64; public - constructor Create(const newMin: int64; const newMax: int64; - const newSum: int64; const newCount: int64); function ToString: string; end; {Using pointer to TStat saves approx. 30-60 seconds for processing 1 billion rows} @@ -38,11 +36,11 @@ TStat = record type // Using this dictionary, now approx 4 mins faster than Generics.Collections.TDictionary - TWeatherDictionaryLG = specialize TGHashMapQP; + TWeatherDictionaryLG = specialize TGHashMapLP; type // a type for storing valid lookup temperature - TValidTemperatureDictionary = specialize TGHashMapQP; + TValidTemperatureDictionary = specialize TGHashMapLP; type // Create a class to encapsulate the temperature observations of each weather station. @@ -106,15 +104,6 @@ function RemoveDots(const line: string): string; end; end; -constructor TStat.Create(const newMin: int64; const newMax: int64; - const newSum: int64; const newCount: int64); -begin - self.min := newMin; - self.max := newMax; - self.sum := newSum; - self.cnt := newCount; -end; - function TStat.ToString: string; var minR, meanR, maxR: double; // Store the rounded values prior saving to TStringList. @@ -314,7 +303,9 @@ procedure TWeatherStation.ParseStationAndTemp(const line: string); // Get the weather station name // Using Copy and POS - as suggested by Gemini AI. // This part saves 3 mins faster when processing 1 billion rows. - //parsedStation := Copy(line, 1, delimiterPos - 1); + + // No need to create a string + // parsedStation := Copy(line, 1, delimiterPos - 1); strFloatTemp := Copy(line, delimiterPos + 1, Length(line)); // Using a lookup value speeds up 30-45 seconds @@ -324,49 +315,19 @@ procedure TWeatherStation.ParseStationAndTemp(const line: string); self.AddCityTemperatureLG(Copy(line, 1, delimiterPos - 1), parsedTemp); end; - end; - - {// Get position of the delimiter - delimiterPos := Pos(';', line); - if delimiterPos > 0 then - begin - // Get the weather station name - // Using Copy and POS - as suggested by Gemini AI. - // This part saves 3 mins faster when processing 1 billion rows. - parsedStation := Copy(line, 1, delimiterPos - 1); - - // Get the temperature recorded, as string, remove '.' from string float - // because we want to save it as int64. - strFloatTemp := Copy(line, delimiterPos + 1, Length(line)); - - // strFloatTemp := StringReplace(strFloatTemp, '.', '', [rfReplaceAll]); - // The above operation is a bit expensive. - // Rewrote a simple function which prevents creation of new string - // in each iteration. Saved approx 20-30 seconds for 1 billion row. - // Remove dots turns a float into an int. - strFloatTemp := RemoveDots(strFloatTemp); - - // Add the weather station and the recorded temp (as int64) in the TDictionary - Val(strFloatTemp, - parsedTemp, - valCode); - if valCode <> 0 then Exit; - - // Add a record in TWeatherDictionary - self.AddCityTemperatureLG(parsedStation, parsedTemp); - end;} end; procedure TWeatherStation.ReadMeasurements; var fileStream: TFileStream; + bufStream: TReadBufStream; streamReader: TStreamReader; line: string; begin // Open the file for reading - fileStream := TFileStream.Create(self.fname, fmOpenRead or fmShareDenyNone); + fileStream := TFileStream.Create(self.fname, fmOpenRead); try streamReader := TStreamReader.Create(fileStream, 65536 * 2, False); try @@ -390,11 +351,6 @@ procedure TWeatherStation.ProcessMeasurements; begin self.CreateLookupTemp; self.ReadMeasurements; - // self.ReadMeasurementsBuf; - // self.ReadMeasurementsClassic; - {This chunking method cuts ~ 30 - 40 seconds of processing time from ~6.45 to 6.00 - But the SHA256 at the end is incorrect} - // self.ReadMeasurementsInChunks(self.fname); self.SortWeatherStationAndStats; self.PrintSortedWeatherStationAndStats; end; From f2fc4f4890e4c6fe8c5e2a3b4584c4769a3656dc Mon Sep 17 00:00:00 2001 From: Iwan Kelaiah Date: Fri, 12 Apr 2024 07:52:27 +1000 Subject: [PATCH 2/5] Update - Removed double lookup on dictionaries. --- entries/ikelaiah/README.md | 4 + entries/ikelaiah/src/weatherstation.pas | 99 +++++++++++++++++++++++-- 2 files changed, 97 insertions(+), 6 deletions(-) diff --git a/entries/ikelaiah/README.md b/entries/ikelaiah/README.md index 2228dba..6874f49 100644 --- a/entries/ikelaiah/README.md +++ b/entries/ikelaiah/README.md @@ -127,6 +127,10 @@ Iwan Kelaiah * Converting Float as String to Int was a bit slow, so resorted to a lookup instead. This saves 30-45 seconds. * Re-arranged `if` statements in two places. This saves 10-15 seconds x 2 = ~ 30 seconds saving. +* 1.8 + * Revision release - Sequential approach. 4-6 mins on my Inspiron 15 7510 laptop (a little improvement on speed). + * Removed double lookup on dictionaries. + ## License This project is licensed under the MIT License - see the LICENSE.md file for details diff --git a/entries/ikelaiah/src/weatherstation.pas b/entries/ikelaiah/src/weatherstation.pas index d34276c..aba90b7 100644 --- a/entries/ikelaiah/src/weatherstation.pas +++ b/entries/ikelaiah/src/weatherstation.pas @@ -52,6 +52,7 @@ TWeatherStation = class lookupStrFloatToIntList: TValidTemperatureDictionary; procedure CreateLookupTemp; procedure ReadMeasurements; + procedure ReadMeasurementsBuf; procedure ParseStationAndTemp(const line: string); procedure AddCityTemperatureLG(const cityName: string; const newTemp: int64); procedure SortWeatherStationAndStats; @@ -239,10 +240,10 @@ procedure TWeatherStation.AddCityTemperatureLG(const cityName: string; stat: PStat; begin // If city name esxists, modify temp as needed - if self.weatherDictionary.Contains(cityName) then + if self.weatherDictionary.TryGetValue(cityName, stat) then begin // Get the temp record - stat := self.weatherDictionary[cityName]; + // stat := self.weatherDictionary[cityName]; // Update min and max temps if needed // Re-arranged the if statement, to achieve minimal if checks. @@ -309,11 +310,9 @@ procedure TWeatherStation.ParseStationAndTemp(const line: string); strFloatTemp := Copy(line, delimiterPos + 1, Length(line)); // Using a lookup value speeds up 30-45 seconds - if self.lookupStrFloatToIntList.Contains(strFloatTemp) then + if self.lookupStrFloatToIntList.TryGetValue(strFloatTemp, parsedTemp) then begin - parsedTemp := self.lookupStrFloatToIntList[strFloatTemp]; - self.AddCityTemperatureLG(Copy(line, 1, delimiterPos - 1), - parsedTemp); + self.AddCityTemperatureLG(Copy(line, 1, delimiterPos - 1), parsedTemp); end; end; end; @@ -346,11 +345,99 @@ procedure TWeatherStation.ReadMeasurements; end; end; +procedure TWeatherStation.ReadMeasurementsBuf; +var + fileStream: TFileStream; + memStream: TMemoryStream; + streamReader: TStreamReader; + buffer: TBytes; + bytesRead, totalBytesRead, chunkSize, lineBreakPos, chunkIndex, + index, lineCount: int64; +begin + + chunksize := 536870912 * 1; + + // Open the file for reading + fileStream := TFileStream.Create(self.fname, fmOpenRead); + SetLength(buffer, chunkSize); + try + memStream := TMemoryStream.Create; + try + totalBytesRead := 0; + chunkIndex := 0; + lineCount := 0; + + // Read and parse chunks of data until EOF + while totalBytesRead < fileStream.Size do + begin + // Read more bytes and keep track on bytes read + bytesRead := fileStream.Read(buffer[0], chunkSize); + Inc(totalBytesRead, bytesRead); + + // Find the position of the last newline character in the chunk + lineBreakPos := BytesRead; + while (lineBreakPos > 0) and (Buffer[lineBreakPos - 1] <> Ord(#10)) do + Dec(lineBreakPos); + + { Now, must ensure that if the last byte read in the current chunk + is not a newline character, the file pointer is moved back to include + that byte and any preceding bytes of the partial line in the next + chunk's read operation. + + Also, no need to update the BytesRead variable in this context because + it represents the actual number of bytes read from the file, including + any partial line that may have been included due to moving the file + pointer back. + Ref: https://www.freepascal.org/docs-html/rtl/classes/tstream.seek.html} + if lineBreakPos < bytesRead then + fileStream.Seek(-(bytesRead - lineBreakPos), soCurrent); + //{$IFDEF DEBUG} + // Do something with the chunk here + // Like counting line + for index := 0 to lineBreakPos - 1 do + if buffer[index] = Ord(#10) then + lineCount := lineCount + 1; + //{$ENDIF DEBUG} + + // Use memory stream & stream reader + memStream.Write(buffer[0], lineBreakPos - 1); + memStream.Position:=0; + streamReader := TStreamReader.Create(memStream); + try + while not streamReader.EOF do + begin + // WriteLn(streamReader.ReadLine); + self.ParseStationAndTemp(streamReader.ReadLine); + end; + finally + streamReader.Free; + end; + //{$IFDEF DEBUG} + // Display user feedback + WriteLn('Line count: ', IntToStr(lineCount)); + WriteLn('Chunk ', chunkIndex, ', Total bytes read:', IntToStr(totalBytesRead)); + //{$ENDIF DEBUG} + + //{$IFDEF DEBUG} + // Increase chunk index - a counter + Inc(chunkIndex); + //{$ENDIF DEBUG} + end; + finally + memStream.Free; + end; + finally + // Close the file + fileStream.Free; + end; +end; + // The main algorithm procedure TWeatherStation.ProcessMeasurements; begin self.CreateLookupTemp; self.ReadMeasurements; + //self.ReadMeasurementsBuf; self.SortWeatherStationAndStats; self.PrintSortedWeatherStationAndStats; end; From e91fe95d9d9305bd5a90ad918dbd7d0a56d18e0d Mon Sep 17 00:00:00 2001 From: Iwan Kelaiah Date: Fri, 12 Apr 2024 10:03:12 +1000 Subject: [PATCH 3/5] Update - Minor edits on comments and debug statements. --- entries/ikelaiah/README.md | 8 ++++---- entries/ikelaiah/src/weatherstation.pas | 14 +++++++------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/entries/ikelaiah/README.md b/entries/ikelaiah/README.md index 6874f49..fa2b0f6 100644 --- a/entries/ikelaiah/README.md +++ b/entries/ikelaiah/README.md @@ -123,13 +123,13 @@ Iwan Kelaiah * Introduced a pointer to the weather record, `PStat = ^TStat`. This saves approx. 30 - 60 seconds. * 1.7 - * Revision release - Sequential approach. 4-6 mins on my Inspiron 15 7510 laptop (a little improvement on speed). + * Revision release - Sequential approach. 4-6 mins on my Inspiron 15 7510 laptop, around 4m50s (a little improvement on speed). * Converting Float as String to Int was a bit slow, so resorted to a lookup instead. This saves 30-45 seconds. - * Re-arranged `if` statements in two places. This saves 10-15 seconds x 2 = ~ 30 seconds saving. + * Re-arranged `if` statements in two places. This saves 10-15 seconds x 2 = ~ 30 seconds. * 1.8 - * Revision release - Sequential approach. 4-6 mins on my Inspiron 15 7510 laptop (a little improvement on speed). - * Removed double lookup on dictionaries. + * Revision release - Sequential approach. 3-5 mins on my Inspiron 15 7510 laptop, around 3m50s (a little improvement on speed). + * Removed double lookup on dictionaries; removed `.Contains` and used `TryGetValue` instead. This saves approx 60 seconds. ## License diff --git a/entries/ikelaiah/src/weatherstation.pas b/entries/ikelaiah/src/weatherstation.pas index aba90b7..1b3170e 100644 --- a/entries/ikelaiah/src/weatherstation.pas +++ b/entries/ikelaiah/src/weatherstation.pas @@ -391,17 +391,17 @@ procedure TWeatherStation.ReadMeasurementsBuf; Ref: https://www.freepascal.org/docs-html/rtl/classes/tstream.seek.html} if lineBreakPos < bytesRead then fileStream.Seek(-(bytesRead - lineBreakPos), soCurrent); - //{$IFDEF DEBUG} + {$IFDEF DEBUG} // Do something with the chunk here // Like counting line for index := 0 to lineBreakPos - 1 do if buffer[index] = Ord(#10) then lineCount := lineCount + 1; - //{$ENDIF DEBUG} + {$ENDIF DEBUG} // Use memory stream & stream reader memStream.Write(buffer[0], lineBreakPos - 1); - memStream.Position:=0; + memStream.Position := 0; streamReader := TStreamReader.Create(memStream); try while not streamReader.EOF do @@ -412,16 +412,16 @@ procedure TWeatherStation.ReadMeasurementsBuf; finally streamReader.Free; end; - //{$IFDEF DEBUG} + {$IFDEF DEBUG} // Display user feedback WriteLn('Line count: ', IntToStr(lineCount)); WriteLn('Chunk ', chunkIndex, ', Total bytes read:', IntToStr(totalBytesRead)); - //{$ENDIF DEBUG} + {$ENDIF DEBUG} - //{$IFDEF DEBUG} + {$IFDEF DEBUG} // Increase chunk index - a counter Inc(chunkIndex); - //{$ENDIF DEBUG} + {$ENDIF DEBUG} end; finally memStream.Free; From 886db84e99a72660727ca8ca1a5b8c1a519789d9 Mon Sep 17 00:00:00 2001 From: Iwan Kelaiah Date: Fri, 12 Apr 2024 18:00:10 +1000 Subject: [PATCH 4/5] Update - Minor edits. --- entries/ikelaiah/src/weatherstation.pas | 91 ++++++++++++++++++++++++- 1 file changed, 88 insertions(+), 3 deletions(-) diff --git a/entries/ikelaiah/src/weatherstation.pas b/entries/ikelaiah/src/weatherstation.pas index 1b3170e..bc4d63b 100644 --- a/entries/ikelaiah/src/weatherstation.pas +++ b/entries/ikelaiah/src/weatherstation.pas @@ -53,6 +53,7 @@ TWeatherStation = class procedure CreateLookupTemp; procedure ReadMeasurements; procedure ReadMeasurementsBuf; + procedure ReadMeasurementsBufSL; procedure ParseStationAndTemp(const line: string); procedure AddCityTemperatureLG(const cityName: string; const newTemp: int64); procedure SortWeatherStationAndStats; @@ -328,7 +329,7 @@ procedure TWeatherStation.ReadMeasurements; // Open the file for reading fileStream := TFileStream.Create(self.fname, fmOpenRead); try - streamReader := TStreamReader.Create(fileStream, 65536 * 2, False); + streamReader := TStreamReader.Create(fileStream, 65536 * 16, False); try // Read and parse chunks of data until EOF ------------------------------- while not streamReader.EOF do @@ -355,7 +356,7 @@ procedure TWeatherStation.ReadMeasurementsBuf; index, lineCount: int64; begin - chunksize := 536870912 * 1; + chunksize := 4194304 * 1; // Open the file for reading fileStream := TFileStream.Create(self.fname, fmOpenRead); @@ -432,12 +433,96 @@ procedure TWeatherStation.ReadMeasurementsBuf; end; end; +procedure TWeatherStation.ReadMeasurementsBufSL; +var + fileStream: TFileStream; + strList: TStringList; + streamReader: TStreamReader; + buffer, trimmedBuffer: TBytes; + bytesRead, totalBytesRead, chunkSize, lineBreakPos, chunkIndex, + slIndex, lineCount: int64; +begin + + chunksize := 8192 * 1; + + // Open the file for reading + fileStream := TFileStream.Create(self.fname, fmOpenRead); + SetLength(buffer, chunkSize); + try + strList := TStringList.Create; + try + totalBytesRead := 0; + chunkIndex := 0; + lineCount := 0; + + // Read and parse chunks of data until EOF + while totalBytesRead < fileStream.Size do + begin + // Read more bytes and keep track on bytes read + bytesRead := fileStream.Read(buffer[0], chunkSize); + Inc(totalBytesRead, bytesRead); + + // Find the position of the last newline character in the chunk + lineBreakPos := BytesRead; + while (lineBreakPos > 0) and (Buffer[lineBreakPos - 1] <> Ord(#10)) do + Dec(lineBreakPos); + + { Now, must ensure that if the last byte read in the current chunk + is not a newline character, the file pointer is moved back to include + that byte and any preceding bytes of the partial line in the next + chunk's read operation. + + Also, no need to update the BytesRead variable in this context because + it represents the actual number of bytes read from the file, including + any partial line that may have been included due to moving the file + pointer back. + Ref: https://www.freepascal.org/docs-html/rtl/classes/tstream.seek.html} + if lineBreakPos < bytesRead then + fileStream.Seek(-(bytesRead - lineBreakPos), soCurrent); + {$IFDEF DEBUG} + // Do something with the chunk here + // Like counting line + for index := 0 to lineBreakPos - 1 do + if buffer[index] = Ord(#10) then + lineCount := lineCount + 1; + {$ENDIF DEBUG} + + // Use TStringList and a sub-TBytes array up to lineBreakPos + SetLength(trimmedBuffer, lineBreakPos); + // Index 'n' is inclusive, so add 1 to the length + Move(buffer[0], trimmedBuffer[0], Length(trimmedBuffer)); // Copy the bytes + strList.Clear; + strList.Text := ansistring(trimmedBuffer); + for slIndex := 0 to strList.Count - 1 do + self.ParseStationAndTemp(strList[slIndex]); + + {$IFDEF DEBUG} + // Display user feedback + WriteLn('Line count: ', IntToStr(lineCount)); + WriteLn('Chunk ', chunkIndex, ', Total bytes read:', IntToStr(totalBytesRead)); + {$ENDIF DEBUG} + + {$IFDEF DEBUG} + // Increase chunk index - a counter + Inc(chunkIndex); + {$ENDIF DEBUG} + end; + finally + strList.Free; + end; + finally + // Close the file + fileStream.Free; + end; +end; + // The main algorithm procedure TWeatherStation.ProcessMeasurements; begin self.CreateLookupTemp; self.ReadMeasurements; - //self.ReadMeasurementsBuf; + // self.ReadMeasurementsBuf; + //self.ReadMeasurementsBufSL; self.SortWeatherStationAndStats; self.PrintSortedWeatherStationAndStats; end; From 001f71200241d2cb302dc5a6d18093435fde5626 Mon Sep 17 00:00:00 2001 From: Iwan Kelaiah Date: Sat, 13 Apr 2024 14:55:50 +1000 Subject: [PATCH 5/5] Update - Tidy up. --- entries/ikelaiah/src/weatherstation.pas | 184 +----------------------- 1 file changed, 2 insertions(+), 182 deletions(-) diff --git a/entries/ikelaiah/src/weatherstation.pas b/entries/ikelaiah/src/weatherstation.pas index bc4d63b..bfcf397 100644 --- a/entries/ikelaiah/src/weatherstation.pas +++ b/entries/ikelaiah/src/weatherstation.pas @@ -11,7 +11,6 @@ interface , streamex , bufstream , lgHashMap - , StrUtils {$IFDEF DEBUG} , Stopwatch {$ENDIF} @@ -52,8 +51,6 @@ TWeatherStation = class lookupStrFloatToIntList: TValidTemperatureDictionary; procedure CreateLookupTemp; procedure ReadMeasurements; - procedure ReadMeasurementsBuf; - procedure ReadMeasurementsBufSL; procedure ParseStationAndTemp(const line: string); procedure AddCityTemperatureLG(const cityName: string; const newTemp: int64); procedure SortWeatherStationAndStats; @@ -243,9 +240,6 @@ procedure TWeatherStation.AddCityTemperatureLG(const cityName: string; // If city name esxists, modify temp as needed if self.weatherDictionary.TryGetValue(cityName, stat) then begin - // Get the temp record - // stat := self.weatherDictionary[cityName]; - // Update min and max temps if needed // Re-arranged the if statement, to achieve minimal if checks. // This saves approx 15 seconds when processing 1 billion row. @@ -303,7 +297,7 @@ procedure TWeatherStation.ParseStationAndTemp(const line: string); if delimiterPos > 0 then begin // Get the weather station name - // Using Copy and POS - as suggested by Gemini AI. + // Using Copy and POS instead of SplitString - as suggested by Gemini AI. // This part saves 3 mins faster when processing 1 billion rows. // No need to create a string @@ -321,15 +315,13 @@ procedure TWeatherStation.ParseStationAndTemp(const line: string); procedure TWeatherStation.ReadMeasurements; var fileStream: TFileStream; - bufStream: TReadBufStream; streamReader: TStreamReader; - line: string; begin // Open the file for reading fileStream := TFileStream.Create(self.fname, fmOpenRead); try - streamReader := TStreamReader.Create(fileStream, 65536 * 16, False); + streamReader := TStreamReader.Create(fileStream, 65536 * 32, False); try // Read and parse chunks of data until EOF ------------------------------- while not streamReader.EOF do @@ -346,183 +338,11 @@ procedure TWeatherStation.ReadMeasurements; end; end; -procedure TWeatherStation.ReadMeasurementsBuf; -var - fileStream: TFileStream; - memStream: TMemoryStream; - streamReader: TStreamReader; - buffer: TBytes; - bytesRead, totalBytesRead, chunkSize, lineBreakPos, chunkIndex, - index, lineCount: int64; -begin - - chunksize := 4194304 * 1; - - // Open the file for reading - fileStream := TFileStream.Create(self.fname, fmOpenRead); - SetLength(buffer, chunkSize); - try - memStream := TMemoryStream.Create; - try - totalBytesRead := 0; - chunkIndex := 0; - lineCount := 0; - - // Read and parse chunks of data until EOF - while totalBytesRead < fileStream.Size do - begin - // Read more bytes and keep track on bytes read - bytesRead := fileStream.Read(buffer[0], chunkSize); - Inc(totalBytesRead, bytesRead); - - // Find the position of the last newline character in the chunk - lineBreakPos := BytesRead; - while (lineBreakPos > 0) and (Buffer[lineBreakPos - 1] <> Ord(#10)) do - Dec(lineBreakPos); - - { Now, must ensure that if the last byte read in the current chunk - is not a newline character, the file pointer is moved back to include - that byte and any preceding bytes of the partial line in the next - chunk's read operation. - - Also, no need to update the BytesRead variable in this context because - it represents the actual number of bytes read from the file, including - any partial line that may have been included due to moving the file - pointer back. - Ref: https://www.freepascal.org/docs-html/rtl/classes/tstream.seek.html} - if lineBreakPos < bytesRead then - fileStream.Seek(-(bytesRead - lineBreakPos), soCurrent); - {$IFDEF DEBUG} - // Do something with the chunk here - // Like counting line - for index := 0 to lineBreakPos - 1 do - if buffer[index] = Ord(#10) then - lineCount := lineCount + 1; - {$ENDIF DEBUG} - - // Use memory stream & stream reader - memStream.Write(buffer[0], lineBreakPos - 1); - memStream.Position := 0; - streamReader := TStreamReader.Create(memStream); - try - while not streamReader.EOF do - begin - // WriteLn(streamReader.ReadLine); - self.ParseStationAndTemp(streamReader.ReadLine); - end; - finally - streamReader.Free; - end; - {$IFDEF DEBUG} - // Display user feedback - WriteLn('Line count: ', IntToStr(lineCount)); - WriteLn('Chunk ', chunkIndex, ', Total bytes read:', IntToStr(totalBytesRead)); - {$ENDIF DEBUG} - - {$IFDEF DEBUG} - // Increase chunk index - a counter - Inc(chunkIndex); - {$ENDIF DEBUG} - end; - finally - memStream.Free; - end; - finally - // Close the file - fileStream.Free; - end; -end; - -procedure TWeatherStation.ReadMeasurementsBufSL; -var - fileStream: TFileStream; - strList: TStringList; - streamReader: TStreamReader; - buffer, trimmedBuffer: TBytes; - bytesRead, totalBytesRead, chunkSize, lineBreakPos, chunkIndex, - slIndex, lineCount: int64; -begin - - chunksize := 8192 * 1; - - // Open the file for reading - fileStream := TFileStream.Create(self.fname, fmOpenRead); - SetLength(buffer, chunkSize); - try - strList := TStringList.Create; - try - totalBytesRead := 0; - chunkIndex := 0; - lineCount := 0; - - // Read and parse chunks of data until EOF - while totalBytesRead < fileStream.Size do - begin - // Read more bytes and keep track on bytes read - bytesRead := fileStream.Read(buffer[0], chunkSize); - Inc(totalBytesRead, bytesRead); - - // Find the position of the last newline character in the chunk - lineBreakPos := BytesRead; - while (lineBreakPos > 0) and (Buffer[lineBreakPos - 1] <> Ord(#10)) do - Dec(lineBreakPos); - - { Now, must ensure that if the last byte read in the current chunk - is not a newline character, the file pointer is moved back to include - that byte and any preceding bytes of the partial line in the next - chunk's read operation. - - Also, no need to update the BytesRead variable in this context because - it represents the actual number of bytes read from the file, including - any partial line that may have been included due to moving the file - pointer back. - Ref: https://www.freepascal.org/docs-html/rtl/classes/tstream.seek.html} - if lineBreakPos < bytesRead then - fileStream.Seek(-(bytesRead - lineBreakPos), soCurrent); - {$IFDEF DEBUG} - // Do something with the chunk here - // Like counting line - for index := 0 to lineBreakPos - 1 do - if buffer[index] = Ord(#10) then - lineCount := lineCount + 1; - {$ENDIF DEBUG} - - // Use TStringList and a sub-TBytes array up to lineBreakPos - SetLength(trimmedBuffer, lineBreakPos); - // Index 'n' is inclusive, so add 1 to the length - Move(buffer[0], trimmedBuffer[0], Length(trimmedBuffer)); // Copy the bytes - strList.Clear; - strList.Text := ansistring(trimmedBuffer); - for slIndex := 0 to strList.Count - 1 do - self.ParseStationAndTemp(strList[slIndex]); - - {$IFDEF DEBUG} - // Display user feedback - WriteLn('Line count: ', IntToStr(lineCount)); - WriteLn('Chunk ', chunkIndex, ', Total bytes read:', IntToStr(totalBytesRead)); - {$ENDIF DEBUG} - - {$IFDEF DEBUG} - // Increase chunk index - a counter - Inc(chunkIndex); - {$ENDIF DEBUG} - end; - finally - strList.Free; - end; - finally - // Close the file - fileStream.Free; - end; -end; - // The main algorithm procedure TWeatherStation.ProcessMeasurements; begin self.CreateLookupTemp; self.ReadMeasurements; - // self.ReadMeasurementsBuf; - //self.ReadMeasurementsBufSL; self.SortWeatherStationAndStats; self.PrintSortedWeatherStationAndStats; end;