Skip to content

Commit

Permalink
Merge branch 'baseline-improvements'
Browse files Browse the repository at this point in the history
  • Loading branch information
Byron committed Nov 22, 2022
2 parents dfaf1be + 87e49b5 commit a80c7fa
Show file tree
Hide file tree
Showing 6 changed files with 299 additions and 56 deletions.
15 changes: 15 additions & 0 deletions .github/workflows/cron.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
name: cron

on:
schedule:
- cron: '0 13,1 * * *'
workflow_dispatch:

jobs:
stress:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: Swatinem/rust-cache@v2
- name: baseline
run: baseline-atomic
154 changes: 147 additions & 7 deletions src/index/diff/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,24 @@ use std::sync::atomic::AtomicBool;
mod delegate;
use delegate::Delegate;

/// The order we maintain for the produced changes.
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub enum Order {
/// Compare provided trees or commits without applying any other logic, with the order being influenced by
/// factors like hashmaps.
///
/// The benefit is mode is the optimal performance as only one diff is created.
ImplementationDefined,
/// If the provided revisions are commits, single step through the history that connects them to maintain
/// the order in which changes were submitted to the crates-index for all user-defined changes.
///
/// Admin changes are still implementation defined, but typically involve only deletions.
///
/// The shortcomings of this approach is that each pair of commits has to be diffed individually, increasing
/// the amount of work linearly.
AsInCratesIndex,
}

/// The error returned by methods dealing with obtaining index changes.
#[derive(Debug, thiserror::Error)]
#[allow(missing_docs)]
Expand Down Expand Up @@ -49,9 +67,22 @@ pub enum Error {

/// Find changes without modifying the underling repository
impl Index {
/// As `peek_changes_with_options`, but without the options.
/// As `peek_changes_with_options()`, but without the options.
pub fn peek_changes(&self) -> Result<(Vec<Change>, git::hash::ObjectId), Error> {
self.peek_changes_with_options(git::progress::Discard, &AtomicBool::default())
self.peek_changes_with_options(
git::progress::Discard,
&AtomicBool::default(),
Order::ImplementationDefined,
)
}

/// As `peek_changes()` but provides changes similar to those in the crates index.
pub fn peek_changes_ordered(&self) -> Result<(Vec<Change>, git::hash::ObjectId), Error> {
self.peek_changes_with_options(
git::progress::Discard,
&AtomicBool::default(),
Order::ImplementationDefined,
)
}

/// Return all `Change`s that are observed between the last time `peek_changes*(…)` was called
Expand All @@ -63,6 +94,9 @@ impl Index {
/// If one would set the `last_seen_reference()` to that object, the effect is exactly the same
/// as if `fetch_changes(…)` had been called.
///
/// The `progress` and `should_interrupt` parameters are used to provide progress for fetches and allow
/// these operations to be interrupted gracefully.
///
/// # Resource Usage
///
/// As this method fetches the git repository, loose objects or small packs may be created. Over time,
Expand All @@ -75,6 +109,7 @@ impl Index {
&self,
progress: P,
should_interrupt: &AtomicBool,
order: Order,
) -> Result<(Vec<Change>, git::hash::ObjectId), Error>
where
P: git::Progress,
Expand Down Expand Up @@ -159,15 +194,21 @@ impl Index {
.detach()
};

Ok((self.changes_between_commits(from, to)?, to))
Ok((
match order {
Order::ImplementationDefined => self.changes_between_commits(from, to)?,
Order::AsInCratesIndex => self.changes_between_ancestor_commits(from, to)?.0,
},
to,
))
}

/// Similar to `changes()`, but requires `from` and `to` objects to be provided. They may point
/// to either `Commit`s or `Tree`s.
///
/// # Returns
///
/// A list of atomic chanes that were performed on the index
/// A list of atomic changes that were performed on the index
/// between the two revisions.
/// The changes are grouped by the crate they belong to.
/// The order of the changes for each crate are **non-deterministic**.
Expand All @@ -194,13 +235,106 @@ impl Index {
.for_each_to_obtain_tree(&to, |change| delegate.handle(change))?;
delegate.into_result()
}

/// Similar to `changes()`, but requires `ancestor_commit` and `current_commit` objects to be provided
/// with `ancestor_commit` being in the ancestry of `current_commit`.
///
/// If the invariants regarding `ancestor_commit` and `current_commit` are not upheld, we fallback
/// to `changes_between_commits()` which doesn't have such restrictions.
/// This can happen if the crates-index was squashed for instance.
///
/// # Returns
///
/// A list of atomic changes that were performed on the index
/// between the two revisions, but looking at it one commit at a time, along with the `Order`
/// that the changes are actually in in case one of the invariants wasn't met.
pub fn changes_between_ancestor_commits(
&self,
ancestor_commit: impl Into<git::hash::ObjectId>,
current_commit: impl Into<git::hash::ObjectId>,
) -> Result<(Vec<Change>, Order), Error> {
let from_commit = ancestor_commit.into();
let to_commit = current_commit.into();
match self.commit_ancestry(from_commit, to_commit) {
Some(commits) => {
let mut changes = Vec::new();
for from_to in commits.windows(2) {
let from = from_to[0];
let to = from_to[1];
changes.extend(self.changes_between_commits(from, to)?);
}
Ok((changes, Order::AsInCratesIndex))
}
None => self
.changes_between_commits(from_commit, to_commit)
.map(|c| (c, Order::ImplementationDefined)),
}
}

/// Return a list of commits like `from_commit..=to_commits`.
fn commit_ancestry(
&self,
ancestor_commit: git::hash::ObjectId,
current_commit: git::hash::ObjectId,
) -> Option<Vec<git::hash::ObjectId>> {
let time_in_seconds_since_epoch = ancestor_commit
.attach(&self.repo)
.object()
.ok()?
.try_into_commit()
.ok()?
.committer()
.ok()?
.time
.seconds_since_unix_epoch;
let mut commits = current_commit
.attach(&self.repo)
.ancestors()
.sorting(
git::traverse::commit::Sorting::ByCommitTimeNewestFirstCutoffOlderThan {
time_in_seconds_since_epoch,
},
)
.first_parent_only()
.all()
.ok()?
.map(|c| c.map(|c| c.detach()))
.collect::<Result<Vec<_>, _>>()
.ok()?;

commits.reverse();
if *commits.first()? != ancestor_commit {
// try harder, commit resolution is just a second.
let pos = commits.iter().position(|c| *c == ancestor_commit)?;
commits = commits[pos..].into();
}
assert_eq!(
commits[commits.len() - 1],
current_commit,
"the iterator includes the tips"
);
Some(commits)
}
}

/// Find changes while changing the underlying repository in one way or another.
impl Index {
/// As `fetch_changes_with_options`, but without the options.
/// As `fetch_changes_with_options()`, but without the options.
pub fn fetch_changes(&self) -> Result<Vec<Change>, Error> {
self.fetch_changes_with_options(git::progress::Discard, &AtomicBool::default())
self.fetch_changes_with_options(
git::progress::Discard,
&AtomicBool::default(),
Order::ImplementationDefined,
)
}

/// As `fetch_changes()`, but returns an ordered result.
pub fn fetch_changes_ordered(&self) -> Result<Vec<Change>, Error> {
self.fetch_changes_with_options(
git::progress::Discard,
&AtomicBool::default(),
Order::AsInCratesIndex,
)
}

/// Return all `Change`s that are observed between the last time this method was called
Expand All @@ -209,6 +343,11 @@ impl Index {
/// The `last_seen_reference()` will be created or adjusted to point to the latest fetched
/// state, which causes this method to have a different result each time it is called.
///
/// The `progress` and `should_interrupt` parameters are used to provide progress for fetches and allow
/// these operations to be interrupted gracefully.
///
/// `order` configures how changes should be ordered.
///
/// # Resource Usage
///
/// As this method fetches the git repository, loose objects or small packs may be created. Over time,
Expand All @@ -220,12 +359,13 @@ impl Index {
&self,
progress: P,
should_interrupt: &AtomicBool,
order: Order,
) -> Result<Vec<Change>, Error>
where
P: git::Progress,
P::SubProgress: 'static,
{
let (changes, to) = self.peek_changes_with_options(progress, should_interrupt)?;
let (changes, to) = self.peek_changes_with_options(progress, should_interrupt, order)?;
self.set_last_seen_reference(to)?;
Ok(changes)
}
Expand Down
5 changes: 4 additions & 1 deletion tests/baseline_atomic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,8 @@ mod shared;
#[cfg_attr(debug_assertions, ignore)]
#[test]
fn one_per_commit() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
shared::baseline(Step::OnePerCommit)
shared::baseline(Step::Realistic {
ordered_partitions: 2,
unordered_partitions: 38,
})
}
46 changes: 42 additions & 4 deletions tests/index/changes_between_commits.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use crate::index::index_ro;
use crates_index_diff::index::diff::Order;
use crates_index_diff::{Change, CrateVersion, Index};
use git_repository as git;

Expand All @@ -13,20 +14,57 @@ fn directory_deletions_are_not_picked_up() -> crate::Result {
Ok(())
}

#[test]
fn ancestor_commits_retain_order() -> crate::Result {
let index = index_ro()?;
let repo = index.repository();
let from = repo.rev_parse_single("@^{/Yanking crate `gitten#0.3.1`}~1")?;
let to = repo.rev_parse_single(":/Yanking crate `gitten#0.3.0`")?;
let (changes, order) = index.changes_between_ancestor_commits(from, to)?;

assert_eq!(order, Order::AsInCratesIndex, "both commits are connected");
assert_eq!(
changes.len(),
2,
"we did specify one more than we needed as the `from` commit would otherwise not be included (hence `~1`)"
);

assert_eq!(
changes[0].yanked().expect("yanked").version,
"0.3.1",
"this goes against ascending order, but is what's recorded in the crates index"
);

assert_eq!(changes[1].yanked().expect("yanked").version, "0.3.0");
Ok(())
}

#[test]
fn updates_before_yanks_are_picked_up() -> crate::Result {
let index = index_ro()?;
let repo = index.repository();
let mut changes = index.changes_between_commits(
repo.rev_parse_single("@^{/updating ansi-color-codec 0.3.11}~1")?,
repo.rev_parse_single("@^{/yanking ansi-color-codec 0.3.5}")?,
)?;
let from = repo.rev_parse_single("@^{/updating ansi-color-codec 0.3.11}~1")?;
let to = repo.rev_parse_single("@^{/yanking ansi-color-codec 0.3.5}")?;
let mut changes = index.changes_between_commits(from, to)?;

assert_eq!(changes.len(), 3, "1 update and 2 yanks");
changes.sort_by_key(|change| change.versions()[0].version.clone());
assert_eq!(changes[0].added().expect("first updated").version, "0.3.11");
assert_eq!(changes[1].yanked().expect("second yanked").version, "0.3.4");
assert_eq!(changes[2].yanked().expect("third yanked").version, "0.3.5");

let (mut changes, order) = index.changes_between_ancestor_commits(from, to)?;
assert_eq!(
order,
Order::AsInCratesIndex,
"we provided commits, so ancestry should pan out"
);

assert_eq!(changes.len(), 3, "1 update and 2 yanks");
changes.sort_by_key(|change| change.versions()[0].version.clone());
assert_eq!(changes[0].added().expect("first updated").version, "0.3.11");
assert_eq!(changes[1].yanked().expect("second yanked").version, "0.3.4");
assert_eq!(changes[2].yanked().expect("third yanked").version, "0.3.5");
Ok(())
}

Expand Down
52 changes: 29 additions & 23 deletions tests/index/mod.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use crates_index_diff::index::diff::Order;
use crates_index_diff::Index;
use git_repository as git;
use git_repository::refs::transaction::PreviousValue;
Expand All @@ -13,30 +14,35 @@ const NUM_CHANGES_SINCE_EVER: usize = 3523;
fn peek_changes() -> crate::Result {
let mut index = index_ro()?;
index.branch_name = "main";
assert!(
index.last_seen_reference().is_err(),
"marker ref doesn't exist"
);
let (changes, last_seen_revision) =
index.peek_changes_with_options(git::progress::Discard, &AtomicBool::default())?;
assert_eq!(
changes.len(),
NUM_CHANGES_SINCE_EVER,
"all changes since the beginning of history"
);
for order in [Order::ImplementationDefined, Order::AsInCratesIndex] {
assert!(
index.last_seen_reference().is_err(),
"marker ref doesn't exist"
);
let (changes, last_seen_revision) = index.peek_changes_with_options(
git::progress::Discard,
&AtomicBool::default(),
order,
)?;
assert_eq!(
changes.len(),
NUM_CHANGES_SINCE_EVER,
"all changes since the beginning of history"
);

let origin_main = index
.repository()
.find_reference("refs/remotes/origin/main")?;
assert_eq!(
last_seen_revision,
origin_main.id(),
"last seen reference should the latest state from the clone"
);
assert!(
index.last_seen_reference().is_err(),
"the last-seen reference has not been created"
);
let origin_main = index
.repository()
.find_reference("refs/remotes/origin/main")?;
assert_eq!(
last_seen_revision,
origin_main.id(),
"last seen reference should the latest state from the clone"
);
assert!(
index.last_seen_reference().is_err(),
"the last-seen reference has not been created"
);
}
Ok(())
}

Expand Down
Loading

0 comments on commit a80c7fa

Please sign in to comment.