Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Currently, the logging dataflows activate once every introspection interval to process data. This can cause a significant buildup of data before it is processed. In addition to activating the replay operator periodically, also activate it once a sufficient amount of data has been accumulated. Ideally, the data processing should be activated as soon as some data is available, but this could cause a situation where activating causes additional data to be produced. For this reason, the dataflow activates after 32 batches have been published by the logging infrastructure. This seems to be a reasonable trade-off between memory (currently at most 8KiB per batch) and latency. The time-based activation is still required in case there is no data for a specific dataflow. In this case, we still need to advance the clock, which the time-based activation takes care of. Signed-off-by: Moritz Hoffmann <mh@materialize.com>
- Loading branch information
Showing
9 changed files
with
388 additions
and
18 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
// Copyright Materialize, Inc. and contributors. All rights reserved. | ||
// | ||
// Use of this software is governed by the Business Source License | ||
// included in the LICENSE file. | ||
// | ||
// As of the Change Date specified in that file, in accordance with | ||
// the Business Source License, use of this software will be governed | ||
// by the Apache License, Version 2.0. | ||
|
||
//! Utilities to activate dataflows based on external triggers. | ||
|
||
use std::cell::RefCell; | ||
use std::rc::Rc; | ||
use timely::scheduling::Activator; | ||
|
||
/// An shared handle to multiple activators with support for triggering and acknowledging | ||
/// activations. | ||
#[derive(Debug, Clone)] | ||
pub struct RcActivator { | ||
inner: Rc<RefCell<ActivatorInner>>, | ||
} | ||
|
||
impl RcActivator { | ||
/// Construct a new [RcActivator] with the given name. | ||
pub fn new(name: String) -> Self { | ||
let inner = ActivatorInner::new(name); | ||
Self { | ||
inner: Rc::new(RefCell::new(inner)), | ||
} | ||
} | ||
|
||
/// Register an additional [Activator] with this [RcActivator] | ||
pub fn register(&mut self, activator: Activator) { | ||
self.inner.borrow_mut().register(activator) | ||
} | ||
|
||
/// Activate all contained activators. | ||
/// | ||
/// The implementation is free to ignore activations and only release them once a sufficient | ||
/// volume has been accumulated. | ||
pub fn activate(&mut self) { | ||
self.inner.borrow_mut().activate() | ||
} | ||
|
||
/// Acknowledge the activation, which enables new activations to be scheduled. | ||
pub fn ack(&mut self) { | ||
self.inner.borrow_mut().ack() | ||
} | ||
} | ||
|
||
#[derive(Debug)] | ||
struct ActivatorInner { | ||
activated: usize, | ||
activators: Vec<Activator>, | ||
name: String, | ||
} | ||
|
||
impl ActivatorInner { | ||
const THRESHOLD: usize = 32; | ||
|
||
fn new(name: String) -> Self { | ||
Self { | ||
name, | ||
activated: 0, | ||
activators: Vec::new(), | ||
} | ||
} | ||
|
||
fn register(&mut self, activator: Activator) { | ||
self.activators.push(activator) | ||
} | ||
|
||
fn activate(&mut self) { | ||
if self.activators.is_empty() { | ||
return; | ||
} | ||
self.activated += 1; | ||
if self.activated == ActivatorInner::THRESHOLD { | ||
for activator in &self.activators { | ||
activator.activate(); | ||
} | ||
} | ||
} | ||
|
||
fn ack(&mut self) { | ||
self.activated = 0; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,128 @@ | ||
// Copyright Materialize, Inc. and contributors. All rights reserved. | ||
// | ||
// Use of this software is governed by the Business Source License | ||
// included in the LICENSE file. | ||
// | ||
// As of the Change Date specified in that file, in accordance with | ||
// the Business Source License, use of this software will be governed | ||
// by the Apache License, Version 2.0. | ||
|
||
//! Collection abstractions to handle `(key, value)` transparently. | ||
|
||
use crate::operator::CollectionExt; | ||
use differential_dataflow::difference::{Abelian, Semigroup}; | ||
use differential_dataflow::{AsCollection, Collection, Data}; | ||
use std::ops::Mul; | ||
use timely::dataflow::operators::generic::operator; | ||
use timely::dataflow::Scope; | ||
|
||
/// A collection of rows split in a key and value | ||
#[derive(Clone)] | ||
pub struct KeyedCollection<S: Scope, D: Data, R: Semigroup> { | ||
collection: Collection<S, (D, D), R>, | ||
} | ||
|
||
impl<S: Scope, D: Data, R: Semigroup> KeyedCollection<S, D, R> { | ||
/// Construct a new KeyedCollection | ||
pub fn new(collection: Collection<S, (D, D), R>) -> Self { | ||
Self { collection } | ||
} | ||
} | ||
|
||
impl<S: Scope, D: Data, R: Semigroup> AsCollection<S, D, R> for KeyedCollection<S, D, R> { | ||
// TODO: We want this gone | ||
fn as_collection(&self) -> Collection<S, D, R> { | ||
self.collection.map(|(_k, v)| v) | ||
} | ||
} | ||
|
||
impl<S: Scope, D: Data, R: Semigroup> From<Collection<S, (D, D), R>> for KeyedCollection<S, D, R> { | ||
fn from(collection: Collection<S, (D, D), R>) -> Self { | ||
Self::new(collection) | ||
} | ||
} | ||
|
||
impl<S: Scope, D: Data, R: Abelian> KeyedCollection<S, D, R> { | ||
pub fn negate(&self) -> Self { | ||
self.collection.negate().into() | ||
} | ||
} | ||
|
||
impl<S: Scope, D: Data, R: Semigroup> CollectionExt<S, (D, D), R> for KeyedCollection<S, D, R> { | ||
fn empty(scope: &S) -> Collection<S, (D, D), R> { | ||
operator::empty(scope).as_collection() | ||
} | ||
|
||
fn flat_map_fallible<D2, E, I, L>( | ||
&self, | ||
name: &str, | ||
logic: L, | ||
) -> (Collection<S, D2, R>, Collection<S, E, R>) | ||
where | ||
D2: timely::Data, | ||
E: timely::Data, | ||
I: IntoIterator<Item = Result<D2, E>>, | ||
L: FnMut((D, D)) -> I + 'static, | ||
{ | ||
todo!() | ||
} | ||
|
||
fn explode_fallible<D2, E, R2, I, L>( | ||
&self, | ||
name: &str, | ||
logic: L, | ||
) -> ( | ||
Collection<S, D2, <R2 as Mul<R>>::Output>, | ||
Collection<S, E, <R2 as Mul<R>>::Output>, | ||
) | ||
where | ||
D2: timely::Data, | ||
E: timely::Data, | ||
R2: Semigroup + Mul<R>, | ||
<R2 as Mul<R>>::Output: timely::Data + Semigroup, | ||
I: IntoIterator<Item = (Result<D2, E>, R2)>, | ||
L: FnMut((D, D)) -> I + 'static, | ||
{ | ||
todo!() | ||
} | ||
} | ||
|
||
/// Concatenates multiple collections. | ||
/// | ||
/// This method has the effect of a sequence of calls to `concat`, but it does | ||
/// so in one operator rather than a chain of many operators. | ||
/// | ||
/// # Examples | ||
/// | ||
/// ``` | ||
/// extern crate timely; | ||
/// extern crate differential_dataflow; | ||
/// | ||
/// use differential_dataflow::input::Input; | ||
/// | ||
/// fn main() { | ||
/// ::timely::example(|scope| { | ||
/// | ||
/// let data = scope.new_collection_from(1 .. 10).1; | ||
/// | ||
/// let odds = data.filter(|x| x % 2 == 1); | ||
/// let evens = data.filter(|x| x % 2 == 0); | ||
/// | ||
/// differential_dataflow::collection::concatenate(scope, vec![odds, evens]) | ||
/// .assert_eq(&data); | ||
/// }); | ||
/// } | ||
/// ``` | ||
pub fn concatenate<G, D, R, I>(scope: &mut G, iterator: I) -> KeyedCollection<G, D, R> | ||
where | ||
G: Scope, | ||
D: Data, | ||
R: Semigroup, | ||
I: IntoIterator<Item = KeyedCollection<G, D, R>>, | ||
{ | ||
differential_dataflow::collection::concatenate( | ||
scope, | ||
iterator.into_iter().map(|kc| kc.collection), | ||
) | ||
.into() | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.