Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Turn quadratic time on number of impl blocks into linear time
Previously, if you had a lot of inherent impl blocks on a type like:

struct Foo;

impl Foo { fn foo_1() {} }
...
impl Foo { fn foo_100_000() {} }

The compiler would be very slow at processing it, because
an internal algorithm would run in O(n^2), where n is the number
of impl blocks. Now, we add a new algorithm that allocates but
is faster asymptotically.

If there is an overlap between multiple impl blocks in terms of
identifiers, we still run a O(m^2) algorithm on groups of impl
blocks that have overlaps, but that m refers to the size of the
connected component, which is hopefully smaller than the n
that refers to the sum of all connected components.
  • Loading branch information
est31 committed Dec 7, 2020
1 parent 9709ef1 commit 7208a01
Showing 1 changed file with 147 additions and 5 deletions.
152 changes: 147 additions & 5 deletions compiler/rustc_typeck/src/coherence/inherent_impls_overlap.rs
@@ -1,10 +1,13 @@
use rustc_data_structures::fx::{FxHashMap, FxHashSet};
use rustc_errors::struct_span_err;
use rustc_hir as hir;
use rustc_hir::def_id::{CrateNum, DefId, LOCAL_CRATE};
use rustc_hir::itemlikevisit::ItemLikeVisitor;
use rustc_middle::ty::{self, TyCtxt};
use rustc_span::Symbol;
use rustc_trait_selection::traits::{self, SkipLeakCheck};
use smallvec::SmallVec;
use std::collections::hash_map::Entry;

pub fn crate_inherent_impls_overlap_check(tcx: TyCtxt<'_>, crate_num: CrateNum) {
assert_eq!(crate_num, LOCAL_CRATE);
Expand Down Expand Up @@ -45,7 +48,7 @@ impl InherentOverlapChecker<'tcx> {
false
}

fn compare_hygienically(&self, item1: &'tcx ty::AssocItem, item2: &'tcx ty::AssocItem) -> bool {
fn compare_hygienically(&self, item1: &ty::AssocItem, item2: &ty::AssocItem) -> bool {
// Symbols and namespace match, compare hygienically.
item1.kind.namespace() == item2.kind.namespace()
&& item1.ident.normalize_to_macros_2_0() == item2.ident.normalize_to_macros_2_0()
Expand Down Expand Up @@ -134,10 +137,149 @@ impl ItemLikeVisitor<'v> for InherentOverlapChecker<'tcx> {
.map(|impl_def_id| (impl_def_id, self.tcx.associated_items(*impl_def_id)))
.collect::<SmallVec<[_; 8]>>();

for (i, &(&impl1_def_id, impl_items1)) in impls_items.iter().enumerate() {
for &(&impl2_def_id, impl_items2) in &impls_items[(i + 1)..] {
if self.impls_have_common_items(impl_items1, impl_items2) {
self.check_for_overlapping_inherent_impls(impl1_def_id, impl2_def_id);
// Perform a O(n^2) algorithm for small n,
// otherwise switch to an allocating algorithm with
// faster asymptotic runtime.
if impls.len() < 30 {
for (i, &(&impl1_def_id, impl_items1)) in impls_items.iter().enumerate() {
for &(&impl2_def_id, impl_items2) in &impls_items[(i + 1)..] {
if self.impls_have_common_items(impl_items1, impl_items2) {
self.check_for_overlapping_inherent_impls(
impl1_def_id,
impl2_def_id,
);
}
}
}
} else {
// Build a set of connected regions of impl blocks.
// Two impl blocks are regarded as connected if they share
// an item with the same unhygienic identifier.
// After we have assembled the connected regions,
// run the O(n^2) algorithm on each connected region.
// This is advantageous to running the algorithm over the
// entire graph when there are many connected regions.

struct ConnectedRegion {
idents: SmallVec<[Symbol; 8]>,
impl_blocks: FxHashSet<usize>,
}
// Highest connected region id
let mut highest_region_id = 0;
let mut connected_region_ids = FxHashMap::default();
let mut connected_regions = FxHashMap::default();

for (i, &(&_impl_def_id, impl_items)) in impls_items.iter().enumerate() {
if impl_items.len() == 0 {
continue;
}
// First obtain a list of existing connected region ids
let mut idents_to_add = SmallVec::<[Symbol; 8]>::new();
let ids = impl_items
.in_definition_order()
.filter_map(|item| {
let entry = connected_region_ids.entry(item.ident.name);
if let Entry::Occupied(e) = &entry {
Some(*e.get())
} else {
idents_to_add.push(item.ident.name);
None
}
})
.collect::<FxHashSet<usize>>();
match ids.len() {
0 | 1 => {
let id_to_set = if ids.len() == 0 {
// Create a new connected region
let region = ConnectedRegion {
idents: idents_to_add,
impl_blocks: std::iter::once(i).collect(),
};
connected_regions.insert(highest_region_id, region);
(highest_region_id, highest_region_id += 1).0
} else {
// Take the only id inside the list
let id_to_set = *ids.iter().next().unwrap();
let region = connected_regions.get_mut(&id_to_set).unwrap();
region.impl_blocks.insert(i);
region.idents.extend_from_slice(&idents_to_add);
id_to_set
};
let (_id, region) = connected_regions.iter().next().unwrap();
// Update the connected region ids
for ident in region.idents.iter() {
connected_region_ids.insert(*ident, id_to_set);
}
}
_ => {
// We have multiple connected regions to merge.
// In the worst case this might add impl blocks
// one by one and can thus be O(n^2) in the size
// of the resulting final connected region, but
// this is no issue as the final step to check
// for overlaps runs in O(n^2) as well.

// Take the smallest id from the list
let id_to_set = *ids.iter().min().unwrap();

// Sort the id list so that the algorithm is deterministic
let mut ids = ids.into_iter().collect::<SmallVec<[_; 8]>>();
ids.sort();

let mut region = connected_regions.remove(&id_to_set).unwrap();
region.idents.extend_from_slice(&idents_to_add);
region.impl_blocks.insert(i);

for &id in ids.iter() {
if id == id_to_set {
continue;
}
let r = connected_regions.remove(&id).unwrap();
// Update the connected region ids
for ident in r.idents.iter() {
connected_region_ids.insert(*ident, id_to_set);
}
region.idents.extend_from_slice(&r.idents);
region.impl_blocks.extend(r.impl_blocks);
}
connected_regions.insert(id_to_set, region);
}
}
}

debug!(
"churning through {} components (sum={}, avg={}, var={}, max={})",
connected_regions.len(),
impls.len(),
impls.len() / connected_regions.len(),
{
let avg = impls.len() / connected_regions.len();
let s = connected_regions
.iter()
.map(|r| r.1.impl_blocks.len() as isize - avg as isize)
.map(|v| v.abs() as usize)
.sum::<usize>();
s / connected_regions.len()
},
connected_regions.iter().map(|r| r.1.impl_blocks.len()).max().unwrap()
);
// List of connected regions is built. Now, run the overlap check
// for each pair of impl blocks in the same connected region.
for (_id, region) in connected_regions.into_iter() {
let mut impl_blocks =
region.impl_blocks.into_iter().collect::<SmallVec<[_; 8]>>();
impl_blocks.sort();
for (i, &impl1_items_idx) in impl_blocks.iter().enumerate() {
let &(&impl1_def_id, impl_items1) = &impls_items[impl1_items_idx];
for &impl2_items_idx in impl_blocks[(i + 1)..].iter() {
let &(&impl2_def_id, impl_items2) = &impls_items[impl2_items_idx];
if self.impls_have_common_items(impl_items1, impl_items2) {
self.check_for_overlapping_inherent_impls(
impl1_def_id,
impl2_def_id,
);
}
}
}
}
}
Expand Down

0 comments on commit 7208a01

Please sign in to comment.