Skip to content

Commit

Permalink
first stab at normalization can reduce 245 version, but… (#16)
Browse files Browse the repository at this point in the history
…that's unexpectedly low. What's the matter here?
  • Loading branch information
Byron committed Aug 28, 2022
1 parent 68ff142 commit ae3f971
Show file tree
Hide file tree
Showing 4 changed files with 35 additions and 13 deletions.
27 changes: 17 additions & 10 deletions src/index/diff/delegate.rs
Expand Up @@ -3,10 +3,11 @@ use crate::{Change, CrateVersion};
use git_repository as git;
use git_repository::diff::tree::visit::Action;
use similar::ChangeTag;
use std::collections::BTreeSet;

pub(crate) struct Delegate<'repo> {
changes: Vec<Change>,
deletes: Vec<CrateVersion>,
delete_version_ids: BTreeSet<u64>,
file_name: git::bstr::BString,
err: Option<Error>,
repo: &'repo git::Repository,
Expand All @@ -16,7 +17,7 @@ impl<'repo> Delegate<'repo> {
pub fn from_repo(repo: &'repo git::Repository) -> Self {
Delegate {
changes: Vec::new(),
deletes: Vec::new(),
delete_version_ids: BTreeSet::new(),
err: None,
file_name: Default::default(),
repo,
Expand Down Expand Up @@ -74,7 +75,7 @@ impl<'repo> Delegate<'repo> {
Change::Added(version)
});
} else {
self.deletes.push(version);
self.delete_version_ids.insert(version.id());
}
}
ChangeTag::Equal => {}
Expand All @@ -85,15 +86,21 @@ impl<'repo> Delegate<'repo> {
}
Ok(())
}
pub fn into_result(self) -> Result<Vec<Change>, Error> {
// assert_eq!(
// self.deletes.len(),
// 0,
// "TODO: handle apparent version deletions"
// );
pub fn into_result(mut self) -> Result<Vec<Change>, Error> {
match self.err {
Some(err) => Err(err),
None => Ok(self.changes),
None => {
if !self.delete_version_ids.is_empty() {
let deleted_version_ids = &self.delete_version_ids;
self.changes.retain(|change| match change {
Change::Added(v) | Change::Yanked(v) => {
!deleted_version_ids.contains(&v.id())
}
Change::Deleted { .. } => true,
})
}
Ok(self.changes)
}
}
}
}
Expand Down
17 changes: 16 additions & 1 deletion src/types.rs
Expand Up @@ -2,6 +2,7 @@ use std::collections::HashMap;

use git_repository as git;
use std::fmt;
use std::hash::{Hash, Hasher};

/// A wrapper for a repository of the crates.io index.
pub struct Index {
Expand Down Expand Up @@ -88,8 +89,22 @@ pub struct CrateVersion {
pub dependencies: Vec<Dependency>,
}

impl CrateVersion {
pub(crate) fn id(&self) -> u64 {
let mut s = std::collections::hash_map::DefaultHasher::new();
self.name.hash(&mut s);
self.yanked.hash(&mut s);
self.version.hash(&mut s);
self.checksum.hash(&mut s);
self.dependencies.hash(&mut s);
s.finish()
}
}

/// A single dependency of a specific crate version
#[derive(Clone, serde::Serialize, serde::Deserialize, Ord, PartialOrd, Eq, PartialEq, Debug)]
#[derive(
Clone, serde::Serialize, serde::Deserialize, Ord, PartialOrd, Eq, PartialEq, Debug, Hash,
)]
pub struct Dependency {
/// The crate name
pub name: String,
Expand Down
2 changes: 1 addition & 1 deletion tests/index/changes_between_commits.rs
Expand Up @@ -71,7 +71,7 @@ fn normalization() -> crate::Result {
let changes = changes(index_ro()?, ":/normalize")?;
assert_eq!(
changes.len(),
2356, // should be 0
2111, // should be 0
"normalization changes the representation, but the data itself stays the same, BUT we can't do it yet"
);
Ok(())
Expand Down
2 changes: 1 addition & 1 deletion tests/index/mod.rs
Expand Up @@ -84,7 +84,7 @@ fn quick_changes_since_last_fetch() -> crate::Result {
"seen branch was updated again"
);
assert_eq!(
num_seen_after_reset, 2357,
num_seen_after_reset, 2112,
"normalization has no changes, but the commit before has one"
);

Expand Down

0 comments on commit ae3f971

Please sign in to comment.