Less memory for look up mode, faster start

Byron · Aug 6, 2020 · 395c7e7 · 395c7e7
1 parent 70ba33a
commit 395c7e7
Show file tree

Hide file tree

Showing 4 changed files with 12 additions and 15 deletions.
diff --git a/git-odb/src/pack/index/access.rs b/git-odb/src/pack/index/access.rs
@@ -126,7 +126,7 @@ impl index::File {
         None
     }
 
-    pub fn iter<'a>(&'a self) -> Box<dyn Iterator<Item = Entry> + 'a> {
+    pub fn iter<'a>(&'a self) -> Box<dyn Iterator<Item = Entry> + 'a + Send> {
         match self.kind {
             index::Kind::V2 => Box::new(self.iter_v2()),
             index::Kind::V1 => Box::new(self.iter_v1()),

diff --git a/git-odb/src/pack/index/traverse/lookup.rs b/git-odb/src/pack/index/traverse/lookup.rs
@@ -27,15 +27,9 @@ impl index::File {
             &mut <<P as Progress>::SubProgress as Progress>::SubProgress,
         ) -> Result<(), Box<dyn std::error::Error + Send + Sync>>,
     {
-        // TODO: Doesn't need to be sorted, and doesn't need to be in memory
-        let index_entries =
-            util::index_entries_sorted_by_offset_ascending(self, root.add_child("collecting sorted index"));
-
-        let (chunk_size, thread_limit, available_cores) =
-            parallel::optimize_chunk_size_and_thread_limit(1000, Some(index_entries.len()), thread_limit, None);
-        let there_are_enough_entries_to_process = || index_entries.len() > chunk_size * available_cores;
-        // TODO: Use Chunks iterator here for dynamically generated chunks from iterators
-        let input_chunks = index_entries.chunks(chunk_size.max(chunk_size));
+        let (chunk_size, thread_limit, _) =
+            parallel::optimize_chunk_size_and_thread_limit(1000, Some(self.num_objects as usize), thread_limit, None);
+        let there_are_enough_entries_to_process = || self.num_objects > 10_000;
         let reduce_progress = parking_lot::Mutex::new({
             let mut p = root.add_child("Traversing");
             p.init(Some(self.num_objects()), Some("objects"));
@@ -52,10 +46,13 @@ impl index::File {
 
         in_parallel_if(
             there_are_enough_entries_to_process,
-            input_chunks,
+            util::Chunks {
+                iter: self.iter(),
+                size: chunk_size,
+            },
             thread_limit,
             state_per_thread,
-            |entries: &[index::Entry],
+            |entries: Vec<index::Entry>,
              (cache, ref mut processor, buf, progress)|
              -> Result<Vec<decode::Outcome>, Error> {
                 progress.init(Some(entries.len() as u32), Some("entries"));

diff --git a/src/plumbing/lean.rs b/src/plumbing/lean.rs
@@ -130,7 +130,7 @@ mod options {
         /// owned objects, causing plenty of allocation to occour.
         pub re_encode: bool,
 
-        #[argh(option)]
+        #[argh(option, short = 'a')]
         /// the algorithm used to verify the pack. They differ in costs.
         ///
         /// Possible values are "less-time" and "less-memory". Default is "less-memory".

diff --git a/tasks.md b/tasks.md
@@ -24,8 +24,8 @@
   * **fixes**
     * [x] figure out why resolving the linux pack is so slow and fix it
     * [x] Allow to provide a pre-resolve phase to initialize the resolver
-    * [ ] Use Tree in verify impl
-    * [ ] fix lookup todos
+    * [x] Use Tree in verify impl
+    * [x] fix lookup todos
     * [ ] nicer errors with descriptive messages
     * [ ] handle ctrl+c similarly to the pretty version to prevent leakage (or find a way to use
       system temp files)