diff --git a/linked-list/Cargo.toml b/linked-list/Cargo.toml new file mode 100644 index 0000000..389c3d3 --- /dev/null +++ b/linked-list/Cargo.toml @@ -0,0 +1,6 @@ +[package] +name = "linked-list" +version = "0.1.0" +edition = "2024" + +[dependencies] diff --git a/linked-list/src/main.rs b/linked-list/src/main.rs new file mode 100644 index 0000000..f417daa --- /dev/null +++ b/linked-list/src/main.rs @@ -0,0 +1,140 @@ +mod node; + +#[cfg(test)] +mod linked_list_tests { + use crate::node::Node; + + #[test] + fn trivial() { + let mut node = Node::new(1); + node.insert(2).insert(3).insert(4); + println!("{node}"); + assert_eq!(node.to_string(), "1,2,3,4"); + } + + #[test] + fn easy() { + let mut node = Node::new(42); + assert_eq!(**node, 42); + **node = 13; + assert_eq!(**node, 13); + } + + #[test] + fn normal() { + let mut node1 = Node::new(1); + let node2 = node1.insert(3); // node2 keeps reference to just inserted value which is owned by node1 * + node2.insert(4); + node1.insert(2); // * and thus we can't swap this line with the previous one because of ref lifetimes + // We can't get mut ref for node1 while having active immutable ref + assert_eq!( + // No need to implement collect for my iter type - blanket impl would work just fine here + // + // Why use "copied"? + // "copied" is method from Iterator trait which returns iterator which produces copies of original iter items + // When we iterate using Copied iter we would do: self.it.next().copied(); + // "self.it" is our iter forward. We just call its next method -> return copied value + // + // "collect" - one another method from Iterator trait which is used to convert iter -> collection + // Again, we are good using blanket implementation + // "collect" will accept Copied iter and use it to create collection, which we specified to be > + // > - that is called "turbofish" and it helps Rust to figure out the collection type we want to get + // and thus find the right "collect" implementation. We don't specify concrete type of elements in the vector + // as Rust can compare > with B returned from "collect" which is trait bounded to B: FromIterator>. + // We see that it specifies Self::Item which is already known to the compiler as i32 (taken now from the Copied iter, our + // original iter features "&i32" + // + // Default impl of "collect" calls FromIterator::from_iter(self) + // Compiler will find "from_iter" implementation which returns turbofished type > and call it + node1.iter_forward().copied().collect::>(), + vec![1, 2, 3, 4] + ); + } + + #[test] + fn hard() { + let mut node1 = Node::new(1); + node1.insert(2).insert(3).insert(4); + let node2 = node1.clone(); + assert_eq!( + node1.iter_forward().collect::>(), + node2.iter_forward().collect::>(), + ); + } + + #[test] + fn nightmare() { + // let a = 1; + // println!("Stack init ptr: {:p}", &a); + // I used this to print top address of the stack; I also added print in node's drop method + // First stack address was 0x7f4dc13fe47c and the last printed in drop method 0x7f4dc1200404 + // Diff (0x7f4dc13fe47c−0x7f4dc1200404) = 2_089_080 = ~2MB + // That is exactly the stack size of thread which run tests under Rust test runner + // Then we crashed due to stack overflow issue + + let mut node = Node::new(1); + + for index in 0..10_000_000 { + node.insert(index); + } + // We can survive while dropping exactly 16322 items (and stack overflows as it tries dropping 16_323th node) + // First I figured out that all the values are inserted without any problems - I saw prints for all values up to 10M + // Then I implemented custom drop, put print there and figured out that the last value dropped was 9_983_678 + // Nodes are stored in the following order (featuring their values below): + // 1 -> 10M-1 -> 10M-2 -> ... -> 2 -> 1 -> 0 + // Values are deleted from head and the last value deleted was node with value 9_983_678. + // That means once I have deleted 1 + (9_999_999 - 9_983_678) = 1 + 16321 = 16322 and tried deleting 16_323th node, the stack overflowed + + // did it panic ?? + } + + #[test] + fn ultra_nightmare() { + // let mut node = Node::new(1); + // node.insert(2).insert(3).insert(4); + // let last = node.iter_forward().last().unwrap(); + // assert_eq!(last.iter_bacwards().collect(), [4, 3, 2, 1]); + } + + #[test] + fn test_6_from_iter() { + let node = Node::from_iter([1, 2, 3, 4]); + assert_eq!(Vec::from_iter(node.into_iter()), [1, 2, 3, 4]); + + let node = Node::from_iter(vec![1, 2, 3, 4]); + assert_eq!(Vec::from_iter(node.into_iter()), [1, 2, 3, 4]); + } + + #[test] + fn test_7_extend() { + let mut node = Node::new(1); + node.extend([2, 3]); + node.extend(vec![4, 5]); + assert_eq!(Vec::from_iter(node.into_iter()), [1, 2, 3, 4, 5]); + } + + #[test] + fn test_8_filter_fn() { + let node = Node::from_iter([1, 2, 3, 4]); + // this "node" shadowing makes no harm here as previous value is moved into "remove_if" method + let node = node.remove_if(|e| e % 2 == 0).unwrap(); + assert_eq!(Vec::from_iter(node.into_iter()), [1, 3]); + } + + #[test] + fn test_9_filter_fn_with_capture() { + let removed_value = "1".to_string(); + let node = Node::from_iter(["1", "2"]); + let node = node.remove_if(|e| *e == removed_value).unwrap(); + assert_eq!(Vec::from_iter(node.into_iter()), ["2"]); + } + + #[test] + fn test_10_filter_all() { + let node = Node::from_iter([1, 2, 3, 4]); + let node = node.remove_if(|_| true); + assert!(node.is_none()); + } +} + +fn main() {} diff --git a/linked-list/src/node/mod.rs b/linked-list/src/node/mod.rs new file mode 100644 index 0000000..793b713 --- /dev/null +++ b/linked-list/src/node/mod.rs @@ -0,0 +1,235 @@ +use std::clone; +use std::fmt::{Debug, Display}; +use std::io::Write; +use std::ops::{Deref, DerefMut}; + +use crate::node; + +pub mod node_forward_iter; +pub mod node_owner_iter; + +#[derive(Debug)] +pub struct Node { + value: T, + next: Option>>, +} + +impl Node { + /// creates new node with a value + pub fn new(value: T) -> Box { + Box::new(Node { + value, + next: Option::None, + }) + } + + /// creates new node with a value and inserts it after this one + pub fn insert(&mut self, value: T) -> &mut Node { + let Some(_) = &self.next else { + return self.next.insert(Node::new(value)); + }; + + let mut new_node = Node::new(value); + new_node.next = self.next.take(); + + // this "insert" can be confusing here but it is called for the Option + // so there it no recursion + self.next.insert(new_node); + + // let a = 1; + // println!("{} a: {:p}", val, &a); // this value always has constant address and seems that stack doesn't move here + // ok, so this method actually executes till the end and then fails + // maybe it has something to do with drop?? + + return self.next.as_deref_mut().unwrap(); + } + + // Keep it as "iter_forward" to match "iter_backward". + // If "iter_backward" didn't exist, it should be named "iter". + pub fn iter_forward(&self) -> node_forward_iter::NodeForwardIter<'_, T> { + node_forward_iter::NodeForwardIter { + current: Some(self), + } + } + + pub fn remove_if(self, mut closure: F) -> Option + where + F: Fn(&T) -> bool, // changed from "FnMut" -> "Fn"; seems to be saficient here + { + let mut head: Option = None; + let mut prev_node: Option<&mut Self> = None; + for value in self.into_iter() { + if !closure(&value) { + // get head for the new list + let Some(prev_node_unwrapped) = prev_node else { + head = Some(Node { value, next: None }); + prev_node = head.as_mut(); + continue; + }; + + // insert nodes which satisfies the condition + prev_node_unwrapped.next = Some(Box::new(Node { value, next: None })); + prev_node = prev_node_unwrapped.next.as_deref_mut(); + } + } + + head + } +} + +impl Display for Node { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let mut iter = self.iter_forward(); + + if let Some(value) = iter.next() { + write!(f, "{}", value)?; + while let Some(value) = iter.next() { + write!(f, ",{}", value)?; + } + } + + Ok(()) + } +} + +// why does specifying Display + Clone for "T" throws compiler error?? +// because we can't specialize "Drop" impl for the type - it MUST always be single implementation +impl Drop for Node { + fn drop(&mut self) { + // println!("Dropping {:?}", self.value); // last value dropped here was 9_983_678 + + // let a = 1; + // println!("{:p}", &a); <-- shows that stack increases as drop is called recursively + + // do I need to clean it from the end?? + // then it wouldn't go into infinite recursion? nope I guess.. + // actually I would use list above then I wouldn't have a problem that I need to implement common dropper + // as list would be cleaned by itearting over nodes and deleting just one node at a time + // here node is itself a list, kind of.. + + let mut preserved_node = self.next.take(); + // drop(self); + // ^^^ we don't need this; first orphaned node will be dropped automatically at the end of this method + while let Some(mut next_node) = preserved_node { + preserved_node = next_node.next.take(); // orphan "next_node", preserved_node keeps reference to the next node + // drop(next_node); // will automatically be dropped here; next_node is newly created on each iteration + } + } +} + +impl Deref for Node { + type Target = T; + + fn deref(&self) -> &Self::Target { + &self.value + } +} + +impl DerefMut for Node { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.value + } +} + +// "T" MUST implement Clone trait as we try cloning value here which is of type "T" +impl Clone for Node { + fn clone(&self) -> Self { + // use clone on the value as we don't want to move it and most certainly it won't be copiable + let mut cloned_head_node = Node::new(self.value.clone()); + + // start looping from the second node as we already copied the first just above ^^^ + // "skip" returns new iterator which skips first N elements (1 in this case) + // on the first call to "next" it will return value of (N + 1) element or None + // let mut iter = self.iter_forward().skip(1); + let mut last_node_ref = &mut *cloned_head_node; + for value in self.iter_forward().skip(1) { + last_node_ref = last_node_ref.insert(value.clone()); + } + + // data will be moved from heap to stack + *cloned_head_node + } +} + +/* There are three common methods which can create iterators from a collection: + * iter() - iterates over &T. + * iter_mut() - iterates over &mut T. + * into_iter() - iterates over T. This method is specifically used to convert a collection into an iterator by moving its ownership. */ +impl IntoIterator for Node { + type Item = T; + type IntoIter = node_owner_iter::NodeOwnerIter; + + fn into_iter(self) -> Self::IntoIter { + return node_owner_iter::NodeOwnerIter { + current: Some(self), + }; + // self.iter_forward() + } +} + +impl FromIterator for Node { + fn from_iter>(iter: I) -> Self { + // We can also accept a collection here - that is why "I" MUST implement IntoIterator + let mut iter_i_swear = iter.into_iter(); + + // TODO: is that the right behaviour to panic if Iter is empty?? + let mut from_iter_list = Node::new(iter_i_swear.next().expect("Iter is empty")); + let mut insert_position = &mut *from_iter_list; + for value in iter_i_swear { + insert_position = insert_position.insert(value); + } + + *from_iter_list + } +} + +impl Extend for Node { + fn extend>(&mut self, iter: I) { + /* All commented code below fails test as it tries to extend list + * from within while we should actually get to the end of it first + * and then extend it. + * However, it was pretty useful in terms of learning different Rust syntax and + * getting to know about Rust's NLL BC limitation. See below for more. + */ + // let mut collected_nodes = Node::from_iter(iter.into_iter()); + // let Some(next_node) = self.next.take() else { + // self.next = Some(Box::new(collected_nodes)); + // return; + // }; + + // let mut cur_node = &mut collected_nodes; + // loop { + // // let Some(last_node) = &mut cur_node.next else { + + // // Above line would fail compilation. + // // Though in both cases "last_node" has the same type "&mut Box>", + // // in first case we actually do "ref-match" which is same as borrowing whole "cur_node" + // // while in second case we do "match-ref" which means borrowing "cur_node.next" specifically. + // // This way, in the first case we would end up with compilation error, stating that we can't assign + // // to "cur_node" which was already borrowed. + // // That is limitation of current NLL borrow checker (BC), see: + // // https://github.com/rust-lang/rfcs/blob/master/text/2094-nll.md#problem-case-4-mutating-mut-references + // let Some(ref mut last_node) = cur_node.next else { + // break; + // }; + // // cur_node = last_node.deref_mut(); // <--- this line is identical to the below + // cur_node = &mut *last_node; + // // Same as + // // cur_node = last_node.deref() + // // is identical to + // // cur_node = *last_node; + // } + + // cur_node.next = Some(next_node); + // self.next = Some(Box::new(collected_nodes)); + + // >>>>>>> Right solution which gets to the end of the list and then extend it + let mut cur_node = self; + while let Some(ref mut last_node) = cur_node.next { + cur_node = last_node.deref_mut(); + } + + // Q: What actually happens here when Box::new receives Node from "from_iter" method?? + cur_node.next = Some(Box::new(Node::from_iter(iter.into_iter()))); + } +} diff --git a/linked-list/src/node/node_forward_iter.rs b/linked-list/src/node/node_forward_iter.rs new file mode 100644 index 0000000..117ff5c --- /dev/null +++ b/linked-list/src/node/node_forward_iter.rs @@ -0,0 +1,23 @@ +use std::fmt::Debug; + +use super::Node; + +pub struct NodeForwardIter<'a, T: Debug + Clone> { + pub current: Option<&'a Node>, +} + +impl<'a, T: Debug + Clone> Iterator for NodeForwardIter<'a, T> { + type Item = &'a T; + + fn next(&mut self) -> Option { + let Some(current) = self.current else { + return None; + }; + + let value = ¤t.value; + + self.current = current.next.as_deref(); + + Some(value) + } +} diff --git a/linked-list/src/node/node_owner_iter.rs b/linked-list/src/node/node_owner_iter.rs new file mode 100644 index 0000000..e0f036d --- /dev/null +++ b/linked-list/src/node/node_owner_iter.rs @@ -0,0 +1,30 @@ +use std::fmt::Debug; + +use super::Node; + +pub struct NodeOwnerIter { + pub current: Option>, +} + +impl Iterator for NodeOwnerIter { + type Item = T; + + fn next(&mut self) -> Option { + // move Node which is hold insider current field inside Option + // this value will be destroyed as we return from this method + let Some(mut current) = self.current.take() else { + return None; + }; + + // we can't move value from Node as it implements the Drop trait + let value = current.value.clone(); + // I check if there is one more node after the current one + // if so - I move it and prevent it from being dropped along with "current" + if let Some(next_node) = current.next.take() { + // move Node from the Box + self.current = Some(*next_node); + } + + Some(value) + } +} diff --git a/tail/Cargo.toml b/tail/Cargo.toml new file mode 100644 index 0000000..ee113a3 --- /dev/null +++ b/tail/Cargo.toml @@ -0,0 +1,8 @@ +[package] +name = "tail" +version = "0.1.0" +edition = "2024" + +[dependencies] +clap = { version = "4.5.50", features = ["derive"] } +io = "0.0.2" diff --git a/tail/README b/tail/README new file mode 100644 index 0000000..3b8ac35 --- /dev/null +++ b/tail/README @@ -0,0 +1,4 @@ +# Task: Implement tail -n +Write a Rust program that prints the last N lines of a file efficiently, even for very large files (>1GB). +Work directly with bytes (treat lines ending with b'\n') and parse command-line arguments using clap crate. +Optional: support UTF-8 lines. diff --git a/tail/src/main.rs b/tail/src/main.rs new file mode 100644 index 0000000..ee61725 --- /dev/null +++ b/tail/src/main.rs @@ -0,0 +1,89 @@ +use std::{ + cmp::min, + fs::File, + io::{Read, Seek, SeekFrom}, +}; + +use clap::Parser; + +#[derive(Parser, Debug)] +#[command(version, about, long_about = None)] +struct Args { + #[arg(short, default_value_t = 10)] + n: u64, + + filename: String, +} + +const READ_BUFFER_SIZE: usize = 4096; + +/* Buffered approach: + * 1. Seek to the end of the file. + * 2. Crawl backwards in chunks of `READ_BUFFER_SIZE` bytes. + * 3. In each chunk, search for newline characters (`\n`). + * 4. Accumulate the relevant text into a final buffer that will contain the last N lines. + */ +fn get_last_n_lines(filename: &String, n: u64) -> String { + let mut file = match File::open(filename) { + Err(why) => panic!("Failed to open {}: {}", &filename, why), + Ok(file) => file, + }; + + file.seek(SeekFrom::End(0)).unwrap(); + + let mut file_size = file.stream_position().unwrap(); + let mut tailed_lines = 0; + let mut buf = [0; READ_BUFFER_SIZE]; + let mut tailed_output = String::new(); + while tailed_lines < n && file_size > 0 { + let read_size = min(file_size, READ_BUFFER_SIZE as u64); + // get next portion of data to read (crawl backwards) + file.seek(SeekFrom::Current(-(read_size as i64))).unwrap(); + + // adjust buf to the desired amount of data to read (max READ_BUFFER_SIZE) + // size of this slice automatically lets 'read_exact' how many bytes to read + let buf_slice = &mut buf[..read_size as usize]; + match file.read_exact(buf_slice) { + Err(e) => { + let pos = file.stream_position(); + panic!("Couldn't read data from position {pos:?}, err: {e}"); + } + _ => (), + } + + // Try to find newlines and mark amount of data that we would want to read from this buf slice + // Sometimes it can be lass than its max size as we may already found all sentences in this batch + let mut text_slice = &buf_slice[..]; + for (pos, ch) in buf_slice.iter().rev().enumerate() { + if matches!(ch, b'\n') { + tailed_lines += 1; + + if tailed_lines > n { + // subtract '-1' as we don't want to account for newline char which belongs just to string above us (11th) + text_slice = &buf_slice[(read_size as usize - pos - 1)..]; + break; + } + } + } + + // NOTE: Seems that we can also pass &text_slice here - no difference? + if let Ok(str) = str::from_utf8(text_slice) { + // TODO: It doesn't seem to be efficient... + tailed_output = format!("{}{}", str, tailed_output); + } + + // get backwards once again as file pointer was moved by previous 'read_exact' call + file_size = file.seek(SeekFrom::Current(-(read_size as i64))).unwrap(); + } + + // TODO: This method was returning 'tailed_output' even without any explicit instructions + // WHY? + tailed_output +} + +fn main() { + let args = Args::parse(); + + let tailed_output = get_last_n_lines(&args.filename, args.n); + print!("{tailed_output:?}"); +}