Skip to content

Commit

Permalink
aw-transform: Add documentation to transforms
Browse files Browse the repository at this point in the history
  • Loading branch information
johan-bjareholt committed Jul 31, 2020
1 parent 4e4b364 commit b04d38b
Show file tree
Hide file tree
Showing 9 changed files with 131 additions and 1 deletion.
19 changes: 19 additions & 0 deletions aw-transform/src/chunk.rs
@@ -1,5 +1,24 @@
use aw_models::Event;

/// Chunks together events with the same key
///
/// NOTE: In most cases you should use merge_events_by_keys instead, this
/// transform is mostly just for backwards compatibility with older versions
/// of aw-webui
/// NOTE: Does not support sub-chunking which aw-server-python supports
/// Without sub-chunking it is pretty much the same as merge_events_by_key
///
/// # Example
/// ```ignore
/// key: a
/// input:
/// { duration: 1.0, data: { "a": 1, "b": 1 } }
/// { duration: 1.0, data: { "a": 1, "b": 2 } }
/// { duration: 1.0, data: { "a": 2, "b": 1 } }
/// output:
/// { duration: 2.0, data: { "a": 1 } }
/// { duration: 1.0, data: { "a": 2 } }
/// ```
pub fn chunk_events_by_key(events: Vec<Event>, key: &str) -> Vec<Event> {
let mut chunked_events: Vec<Event> = Vec::new();
for event in events {
Expand Down
19 changes: 19 additions & 0 deletions aw-transform/src/filter_keyvals.rs
Expand Up @@ -3,6 +3,15 @@ use serde_json::value::Value;

use aw_models::Event;

/// Drops events not matching the specified key and value(s)
///
/// # Example
/// ```ignore
/// key: a
/// vals: [1,2]
/// input: [a:1][a:2][a:3][b:4]
/// output: [a:1][a:2]
/// ```
pub fn filter_keyvals(mut events: Vec<Event>, key: &str, vals: &[Value]) -> Vec<Event> {
let mut filtered_events = Vec::new();
for event in events.drain(..) {
Expand All @@ -18,6 +27,16 @@ pub fn filter_keyvals(mut events: Vec<Event>, key: &str, vals: &[Value]) -> Vec<
filtered_events
}

/// Drops events not matching the regex on the value for a specified key
/// Will only match if the value is a string
///
/// # Example
/// ```ignore
/// key: a
/// regex: "[A-Z]+"
/// input: [a:"HELLO"][a:"hello"][a:3][b:"HELLO"]
/// output: [a:"HELLO"]
/// ```
pub fn filter_keyvals_regex(mut events: Vec<Event>, key: &str, regex: &Regex) -> Vec<Event> {
let mut filtered_events = Vec::new();

Expand Down
14 changes: 14 additions & 0 deletions aw-transform/src/filter_period.rs
@@ -1,5 +1,19 @@
use aw_models::Event;

/// Removes events not intersecting with the provided filter_events
///
/// Usually used to filter buckets unaware if the user is making any activity with an bucket which
/// is aware if the user is at the computer or not.
/// For example the events from aw-watcher-window should be called with filter_period_intersect
/// with the "not-afk" events from aw-watcher-afk to give events with durations of only when the
/// user is at the computer.
///
/// # Example
/// ```ignore
/// events: [a ][b ]
/// filter_events: [ ] [ ]
/// output: [a ] [b ]
/// ```
pub fn filter_period_intersect(events: &[Event], filter_events: &[Event]) -> Vec<Event> {
let mut filtered_events = Vec::new();
for filter in filter_events {
Expand Down
1 change: 1 addition & 0 deletions aw-transform/src/find_bucket.rs
@@ -1,3 +1,4 @@
/// Finds the first bucket which starts with the specified string
pub fn find_bucket<'a>(
bucket_filter: &str,
bucketnames: impl IntoIterator<Item = &'a String>,
Expand Down
10 changes: 10 additions & 0 deletions aw-transform/src/flood.rs
Expand Up @@ -2,6 +2,16 @@ use aw_models::Event;

use crate::sort_by_timestamp;

/// Floods event to the nearest neighbouring event if within the specified pulsetime
///
/// Also merges events if they have the same data and are within the pulsetime
///
/// # Example
/// ```ignore
/// pulsetime: 1 second (one space)
/// input: [a] [a] [b][b] [b][c]
/// output: [a ][b ] [b][c]
/// ```
pub fn flood(events: Vec<Event>, pulsetime: chrono::Duration) -> Vec<Event> {
let mut warned_negative_gap_safe = false;
let mut warned_negative_gap_unsafe = false;
Expand Down
9 changes: 9 additions & 0 deletions aw-transform/src/heartbeat.rs
@@ -1,5 +1,14 @@
use aw_models::Event;

/// Returns a merged event if two events have the same data and are within the pulsetime
///
/// # Example
///
/// ```ignore
/// pulsetime: 1 second (one space)
/// input: [a] [a] [a][b]
/// output: [a ] [a][b]
/// ```
pub fn heartbeat(last_event: &Event, heartbeat: &Event, pulsetime: f64) -> Option<Event> {
// Verify that data is the same
if heartbeat.data != last_event.data {
Expand Down
37 changes: 37 additions & 0 deletions aw-transform/src/merge.rs
Expand Up @@ -2,6 +2,43 @@ use std::collections::HashMap;

use aw_models::Event;

/// Merge events with the same values at the specified keys
///
/// Doesn't care about if events are neighbouring or not, this transform merges
/// all events with the same key.
/// The timestamp will be the timestamp of the first event with a specific key value
///
/// # Example 1
/// A simple example only using one key
///
/// ```ignore
/// keys: ["a"]
/// input:
/// { duration: 1.0, data: { "a": 1 } }
/// { duration: 1.0, data: { "a": 1 } }
/// { duration: 1.0, data: { "a": 2 } }
/// { duration: 1.0, data: { "b": 1 } }
/// { duration: 1.0, data: { "a": 1 } }
/// output:
/// { duration: 3.0, data: { "a": 1 } }
/// { duration: 1.0, data: { "a": 2 } }
/// { duration: 1.0, data: { "b": 1 } }
/// ```
///
/// # Example 2
/// A more complex example only using two keys
/// ```ignore
/// keys: ["a", "b"]
/// input:
/// { duration: 1.0, data: { "a": 1, "b": 1 } }
/// { duration: 1.0, data: { "a": 2, "b": 2 } }
/// { duration: 1.0, data: { "a": 1, "b": 1 } }
/// { duration: 1.0, data: { "a": 1, "b": 2 } }
/// output:
/// { duration: 2.0, data: { "a": 1, "b": 1 } }
/// { duration: 1.0, data: { "a": 2, "b": 2 } }
/// { duration: 1.0, data: { "a": 1, "b": 2 } }
/// ```
#[allow(clippy::map_entry)]
pub fn merge_events_by_keys(events: Vec<Event>, keys: Vec<String>) -> Vec<Event> {
if keys.is_empty() {
Expand Down
3 changes: 2 additions & 1 deletion aw-transform/src/sort.rs
@@ -1,11 +1,12 @@
use aw_models::Event;

/// Sort a list of events by timestamp
pub fn sort_by_timestamp(mut events: Vec<Event>) -> Vec<Event> {
events.sort_by(|e1, e2| e1.timestamp.cmp(&e2.timestamp));
events
}

/* Highest first */
/// Sort a list of events by duration with the highest duration first
pub fn sort_by_duration(mut events: Vec<Event>) -> Vec<Event> {
events.sort_by(|e1, e2| e2.duration.cmp(&e1.duration));
events
Expand Down
20 changes: 20 additions & 0 deletions aw-transform/src/split_url.rs
@@ -1,6 +1,26 @@
use aw_models::Event;
use serde_json::value::Value;

/// Adds $protocol, $domain, $path and $params keys for events with an "url" key
///
/// But it only adds the generated field if it exists, for example if a url does not have a path
/// the path value will not be set at all.
///
/// # Example
/// ```ignore
/// input: {
/// "data": {
/// "url": "http://google.com/test"
/// }
/// }
/// output: {
/// "data": {
/// "$domain": "google.com",
/// "$path": "/test",
/// "$protocol": "http"
/// }
/// }
/// ```
pub fn split_url_event(event: &mut Event) {
use rocket::http::uri::Absolute;
let uri_str = match event.data.get("url") {
Expand Down

0 comments on commit b04d38b

Please sign in to comment.