-
Notifications
You must be signed in to change notification settings - Fork 0
Feature/2 implement tail n #1
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,8 @@ | ||
| [package] | ||
| name = "tail" | ||
| version = "0.1.0" | ||
| edition = "2024" | ||
|
|
||
| [dependencies] | ||
| clap = { version = "4.5.50", features = ["derive"] } | ||
| io = "0.0.2" | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,4 @@ | ||
| # Task: Implement tail -n | ||
| Write a Rust program that prints the last N lines of a file efficiently, even for very large files (>1GB). | ||
| Work directly with bytes (treat lines ending with b'\n') and parse command-line arguments using clap crate. | ||
| Optional: support UTF-8 lines. |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,89 @@ | ||
| use std::{ | ||
| cmp::min, | ||
| fs::File, | ||
| io::{Read, Seek, SeekFrom}, | ||
| }; | ||
|
|
||
| use clap::Parser; | ||
|
|
||
| #[derive(Parser, Debug)] | ||
| #[command(version, about, long_about = None)] | ||
| struct Args { | ||
| #[arg(short, default_value_t = 10)] | ||
| n: u64, | ||
|
|
||
| filename: String, | ||
| } | ||
|
|
||
| const READ_BUFFER_SIZE: usize = 4096; | ||
|
|
||
| /* Buffered approach: | ||
| * 1. Seek to the end of the file. | ||
| * 2. Crawl backwards in chunks of `READ_BUFFER_SIZE` bytes. | ||
| * 3. In each chunk, search for newline characters (`\n`). | ||
| * 4. Accumulate the relevant text into a final buffer that will contain the last N lines. | ||
| */ | ||
| fn get_last_n_lines(filename: &String, n: u64) -> String { | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. FYI: it is almost always preferred to pass |
||
| let mut file = match File::open(filename) { | ||
| Err(why) => panic!("Failed to open {}: {}", &filename, why), | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. FYI: formatting arguments can be inlined (just my personal preference, you can choose however you like it): panic!("Failed to open {filename}: {why}") |
||
| Ok(file) => file, | ||
| }; | ||
|
|
||
| file.seek(SeekFrom::End(0)).unwrap(); | ||
|
|
||
| let mut file_size = file.stream_position().unwrap(); | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you can get |
||
| let mut tailed_lines = 0; | ||
| let mut buf = [0; READ_BUFFER_SIZE]; | ||
| let mut tailed_output = String::new(); | ||
| while tailed_lines < n && file_size > 0 { | ||
| let read_size = min(file_size, READ_BUFFER_SIZE as u64); | ||
| // get next portion of data to read (crawl backwards) | ||
| file.seek(SeekFrom::Current(-(read_size as i64))).unwrap(); | ||
|
|
||
| // adjust buf to the desired amount of data to read (max READ_BUFFER_SIZE) | ||
| // size of this slice automatically lets 'read_exact' how many bytes to read | ||
| let buf_slice = &mut buf[..read_size as usize]; | ||
| match file.read_exact(buf_slice) { | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it is better to use if let Err(e) = file.read_exact(buf_slice) {
let pos = file.stream_position();
panic!("Couldn't read data from position {pos:?}, err: {e}");
}PS: we didn't talk about pattern matching yet, will be a forecasting for you. |
||
| Err(e) => { | ||
| let pos = file.stream_position(); | ||
| panic!("Couldn't read data from position {pos:?}, err: {e}"); | ||
| } | ||
| _ => (), | ||
| } | ||
|
|
||
| // Try to find newlines and mark amount of data that we would want to read from this buf slice | ||
| // Sometimes it can be lass than its max size as we may already found all sentences in this batch | ||
| let mut text_slice = &buf_slice[..]; | ||
| for (pos, ch) in buf_slice.iter().rev().enumerate() { | ||
| if matches!(ch, b'\n') { | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
FYI: you can also look at |
||
| tailed_lines += 1; | ||
|
|
||
| if tailed_lines > n { | ||
| // subtract '-1' as we don't want to account for newline char which belongs just to string above us (11th) | ||
| text_slice = &buf_slice[(read_size as usize - pos - 1)..]; | ||
| break; | ||
| } | ||
| } | ||
| } | ||
|
|
||
| // NOTE: Seems that we can also pass &text_slice here - no difference? | ||
| if let Ok(str) = str::from_utf8(text_slice) { | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is also not correct when working with utf8. chunk might start/end in the middle of the utf8 code point and you'll get an error here. utf8 code unit - 1 byte |
||
| // TODO: It doesn't seem to be efficient... | ||
| tailed_output = format!("{}{}", str, tailed_output); | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes, this is not efficient. |
||
| } | ||
|
|
||
| // get backwards once again as file pointer was moved by previous 'read_exact' call | ||
| file_size = file.seek(SeekFrom::Current(-(read_size as i64))).unwrap(); | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. double seek can be mitigated by offseting from start/end instead of current position |
||
| } | ||
|
|
||
| // TODO: This method was returning 'tailed_output' even without any explicit instructions | ||
| // WHY? | ||
| tailed_output | ||
| } | ||
|
|
||
| fn main() { | ||
| let args = Args::parse(); | ||
|
|
||
| let tailed_output = get_last_n_lines(&args.filename, args.n); | ||
| print!("{tailed_output:?}"); | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
probably not needed