Skip to content
This repository has been archived by the owner on May 12, 2024. It is now read-only.

Groupby and Aggregates - Instructions and Codegen overhaul #26

Open
wants to merge 26 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
21ef435
wip(codegen,ic): groupby and aggregates
Samyak2 Jan 26, 2023
c061302
feat(ic): remove Value. add inp, out regs to GroupBy.
Samyak2 Feb 1, 2023
b2f6c6c
refactor: rename ic to ir
Samyak2 Feb 1, 2023
5dfadc5
wip: special codegen for agg functions
Samyak2 Feb 2, 2023
fab2a33
feat: a special path for codegen of agg under group bys
Samyak2 Feb 21, 2023
82f3f0e
fix: build errors
Samyak2 Mar 1, 2023
cc208c2
fix: make tests build and some refactoring
Samyak2 Mar 2, 2023
519f1e5
fix(codegen): separate pre and post groupby projections
Samyak2 Mar 5, 2023
33a5577
fix(codegen): incorrect agg checks. Non-groupby tests pass now.
Samyak2 Mar 12, 2023
35d41d9
fix(codegen): remove unused fn
Samyak2 Mar 12, 2023
8637fef
test(codegen): fix groupby test output
Samyak2 Mar 12, 2023
f22dc6c
chore(codegen): remove unused field in context
Samyak2 Mar 12, 2023
0c25910
test(codegen): add case of multiple groupbys
Samyak2 Mar 14, 2023
1acd25e
fix(codegen): derive Debug for AggregateFunction
Samyak2 Mar 14, 2023
1210125
fix(codegen): throw error when having clause has inline aggs
Samyak2 Apr 9, 2023
2984aec
wip(codegen): IntermediateExpr and codegen for it
Samyak2 Apr 9, 2023
6e6511a
feat(codegen): impl IntermediateExpr-based codegen for function
Samyak2 May 5, 2023
58c8a75
feat(codegen): handle IntermediateExpr in non agg cases
Samyak2 May 5, 2023
a19f22e
fix(codegen): lifetime/borrowing issues
Samyak2 May 6, 2023
bb61b8d
feat(codegen): use IntermediateExpr in actual codegen for select
Samyak2 May 6, 2023
4835a9b
test(codegen): all tests are now passing
Samyak2 May 7, 2023
d8f1f37
chore(codegen): remove unused code and add distinct check back
Samyak2 May 7, 2023
da47614
chore(codegen): remove more unused code
Samyak2 May 7, 2023
cc5e3c2
fix(codegen): move non-agg projects to final table too
Samyak2 May 7, 2023
76d72a3
fix(codegen): use pre-project table for non-aggs
Samyak2 May 7, 2023
ee82df5
test(codegen): fix order of non-aggs
Samyak2 May 7, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ permutation = "0.4.1"
ordered-float = "3.1.0"
tabled = { version = "0.10.0", optional = true }
fmt-derive = "0.0.5"
phf = { version = "0.11.1", features = ["macros"] }

[features]
default = ["terminal-output"]
Expand Down
1,579 changes: 1,462 additions & 117 deletions src/codegen.rs

Large diffs are not rendered by default.

36 changes: 36 additions & 0 deletions src/expr/agg.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
use super::ExprError;
use std::fmt::Display;

#[derive(Debug, Clone, PartialEq, Eq)]
/// Functions that reduce an entire column to a single value.
pub enum AggregateFunction {
Count,
Max,
Sum,
}

impl AggregateFunction {
/// Get an aggregation function by name.
pub fn from_name(name: &str) -> Result<Self, ExprError> {
match name.to_lowercase().as_str() {
"count" => Ok(Self::Count),
"max" => Ok(Self::Max),
"sum" => Ok(Self::Sum),
_ => Err(ExprError::UnknownAggregateFunction(name.to_owned())),
}
}
}

impl Display for AggregateFunction {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"Agg({})",
match self {
AggregateFunction::Count => "count",
AggregateFunction::Max => "max",
AggregateFunction::Sum => "sum",
}
)
}
}
332 changes: 0 additions & 332 deletions src/expr/eval.rs
Original file line number Diff line number Diff line change
Expand Up @@ -153,335 +153,3 @@ impl Display for ExprExecError {
}

impl Error for ExprExecError {}

#[cfg(test)]
mod test {
use sqlparser::{
ast::{ColumnOption, ColumnOptionDef, DataType},
dialect::GenericDialect,
parser::Parser,
tokenizer::Tokenizer,
};

use crate::{
column::Column,
expr::{BinOp, Expr, UnOp},
table::{Row, Table},
value::{Value, ValueBinaryOpError, ValueUnaryOpError},
};

use super::ExprExecError;

fn str_to_expr(s: &str) -> Expr {
let dialect = GenericDialect {};
let mut tokenizer = Tokenizer::new(&dialect, s);
let tokens = tokenizer.tokenize().unwrap();
let mut parser = Parser::new(tokens, &dialect);
parser.parse_expr().unwrap().try_into().unwrap()
}

fn exec_expr_no_context(expr: Expr) -> Result<Value, ExprExecError> {
let mut table = Table::new_temp(0);
table.new_row(vec![]);
Expr::execute(&expr, &table, table.all_data()[0].to_shared())
}

fn exec_str_no_context(s: &str) -> Result<Value, ExprExecError> {
let expr = str_to_expr(s);
exec_expr_no_context(expr)
}

fn exec_str_with_context(s: &str, table: &Table, row: &Row) -> Result<Value, ExprExecError> {
let expr = str_to_expr(s);
Expr::execute(&expr, table, row.to_shared())
}

#[test]
fn exec_value() {
assert_eq!(exec_str_no_context("NULL"), Ok(Value::Null));

assert_eq!(exec_str_no_context("true"), Ok(Value::Bool(true)));

assert_eq!(exec_str_no_context("1"), Ok(Value::Int64(1)));

assert_eq!(exec_str_no_context("1.1"), Ok(Value::Float64(1.1.into())));

assert_eq!(exec_str_no_context(".1"), Ok(Value::Float64(0.1.into())));

assert_eq!(
exec_str_no_context("'str'"),
Ok(Value::String("str".to_owned()))
);
}

#[test]
fn exec_logical() {
assert_eq!(exec_str_no_context("true and true"), Ok(Value::Bool(true)));
assert_eq!(
exec_str_no_context("true and false"),
Ok(Value::Bool(false))
);
assert_eq!(
exec_str_no_context("false and true"),
Ok(Value::Bool(false))
);
assert_eq!(
exec_str_no_context("false and false"),
Ok(Value::Bool(false))
);
assert_eq!(
exec_str_no_context("false and 10"),
Err(ValueBinaryOpError {
operator: BinOp::And,
values: (Value::Bool(false), Value::Int64(10))
}
.into())
);
assert_eq!(
exec_str_no_context("10 and false"),
Err(ValueBinaryOpError {
operator: BinOp::And,
values: (Value::Int64(10), Value::Bool(false))
}
.into())
);

assert_eq!(exec_str_no_context("true or true"), Ok(Value::Bool(true)));
assert_eq!(exec_str_no_context("true or false"), Ok(Value::Bool(true)));
assert_eq!(exec_str_no_context("false or true"), Ok(Value::Bool(true)));
assert_eq!(
exec_str_no_context("false or false"),
Ok(Value::Bool(false))
);
assert_eq!(
exec_str_no_context("true or 10"),
Err(ValueBinaryOpError {
operator: BinOp::Or,
values: (Value::Bool(true), Value::Int64(10))
}
.into())
);
assert_eq!(
exec_str_no_context("10 or true"),
Err(ValueBinaryOpError {
operator: BinOp::Or,
values: (Value::Int64(10), Value::Bool(true))
}
.into())
);
}

#[test]
fn exec_arithmetic() {
assert_eq!(exec_str_no_context("1 + 1"), Ok(Value::Int64(2)));
assert_eq!(
exec_str_no_context("1.1 + 1.1"),
Ok(Value::Float64(2.2.into()))
);

// this applies to all binary ops
assert_eq!(
exec_str_no_context("1 + 1.1"),
Err(ValueBinaryOpError {
operator: BinOp::Plus,
values: (Value::Int64(1), Value::Float64(1.1.into()))
}
.into())
);

assert_eq!(exec_str_no_context("4 - 2"), Ok(Value::Int64(2)));
assert_eq!(exec_str_no_context("4 - 6"), Ok(Value::Int64(-2)));
assert_eq!(
exec_str_no_context("4.5 - 2.2"),
Ok(Value::Float64(2.3.into()))
);

assert_eq!(exec_str_no_context("4 * 2"), Ok(Value::Int64(8)));
assert_eq!(
exec_str_no_context("0.5 * 2.2"),
Ok(Value::Float64(1.1.into()))
);

assert_eq!(exec_str_no_context("4 / 2"), Ok(Value::Int64(2)));
assert_eq!(exec_str_no_context("4 / 3"), Ok(Value::Int64(1)));
assert_eq!(
exec_str_no_context("4.0 / 2.0"),
Ok(Value::Float64(2.0.into()))
);
assert_eq!(
exec_str_no_context("5.1 / 2.5"),
Ok(Value::Float64(2.04.into()))
);

assert_eq!(exec_str_no_context("5 % 2"), Ok(Value::Int64(1)));
assert_eq!(
exec_str_no_context("5.5 % 2.5"),
Ok(Value::Float64(0.5.into()))
);
}

#[test]
fn exec_comparison() {
assert_eq!(exec_str_no_context("1 = 1"), Ok(Value::Bool(true)));
assert_eq!(exec_str_no_context("1 = 2"), Ok(Value::Bool(false)));
assert_eq!(exec_str_no_context("1 != 2"), Ok(Value::Bool(true)));
assert_eq!(exec_str_no_context("1.1 = 1.1"), Ok(Value::Bool(true)));
assert_eq!(exec_str_no_context("1.2 = 1.22"), Ok(Value::Bool(false)));
assert_eq!(exec_str_no_context("1.2 != 1.22"), Ok(Value::Bool(true)));

assert_eq!(exec_str_no_context("1 < 2"), Ok(Value::Bool(true)));
assert_eq!(exec_str_no_context("1 < 1"), Ok(Value::Bool(false)));
assert_eq!(exec_str_no_context("1 <= 2"), Ok(Value::Bool(true)));
assert_eq!(exec_str_no_context("1 <= 1"), Ok(Value::Bool(true)));
assert_eq!(exec_str_no_context("3 > 2"), Ok(Value::Bool(true)));
assert_eq!(exec_str_no_context("3 > 3"), Ok(Value::Bool(false)));
assert_eq!(exec_str_no_context("3 >= 2"), Ok(Value::Bool(true)));
assert_eq!(exec_str_no_context("3 >= 3"), Ok(Value::Bool(true)));
}

#[test]
fn exec_pattern_match() {
assert_eq!(
exec_str_no_context("'my name is yoshikage kira' LIKE 'kira'"),
Ok(Value::Bool(true))
);
assert_eq!(
exec_str_no_context("'my name is yoshikage kira' LIKE 'KIRA'"),
Ok(Value::Bool(false))
);
assert_eq!(
exec_str_no_context("'my name is yoshikage kira' LIKE 'kira yoshikage'"),
Ok(Value::Bool(false))
);

assert_eq!(
exec_str_no_context("'my name is Yoshikage Kira' ILIKE 'kira'"),
Ok(Value::Bool(true))
);
assert_eq!(
exec_str_no_context("'my name is Yoshikage Kira' ILIKE 'KIRA'"),
Ok(Value::Bool(true))
);
assert_eq!(
exec_str_no_context("'my name is Yoshikage Kira' ILIKE 'KIRAA'"),
Ok(Value::Bool(false))
);
}

#[test]
fn exec_unary() {
assert_eq!(exec_str_no_context("+1"), Ok(Value::Int64(1)));
assert_eq!(exec_str_no_context("+ -1"), Ok(Value::Int64(-1)));
assert_eq!(exec_str_no_context("-1"), Ok(Value::Int64(-1)));
assert_eq!(exec_str_no_context("- -1"), Ok(Value::Int64(1)));
assert_eq!(exec_str_no_context("not true"), Ok(Value::Bool(false)));
assert_eq!(exec_str_no_context("not false"), Ok(Value::Bool(true)));

assert_eq!(exec_str_no_context("true is true"), Ok(Value::Bool(true)));
assert_eq!(exec_str_no_context("false is false"), Ok(Value::Bool(true)));
assert_eq!(exec_str_no_context("false is true"), Ok(Value::Bool(false)));
assert_eq!(exec_str_no_context("true is false"), Ok(Value::Bool(false)));
assert_eq!(
exec_str_no_context("1 is true"),
Err(ValueUnaryOpError {
operator: UnOp::IsTrue,
value: Value::Int64(1)
}
.into())
);

assert_eq!(exec_str_no_context("NULL is NULL"), Ok(Value::Bool(true)));
assert_eq!(
exec_str_no_context("NULL is not NULL"),
Ok(Value::Bool(false))
);
assert_eq!(exec_str_no_context("1 is NULL"), Ok(Value::Bool(false)));
assert_eq!(exec_str_no_context("1 is not NULL"), Ok(Value::Bool(true)));
assert_eq!(exec_str_no_context("0 is not NULL"), Ok(Value::Bool(true)));
assert_eq!(exec_str_no_context("'' is not NULL"), Ok(Value::Bool(true)));
}

#[test]
fn exec_wildcard() {
assert_eq!(
exec_expr_no_context(Expr::Wildcard),
Err(ExprExecError::CannotExecute(Expr::Wildcard))
);
}

#[test]
fn exec_column_ref() {
let mut table = Table::new(
"table1".into(),
vec![
Column::new(
"col1".into(),
DataType::Int(None),
vec![ColumnOptionDef {
name: None,
option: ColumnOption::Unique { is_primary: true },
}],
false,
),
Column::new(
"col2".into(),
DataType::Int(None),
vec![ColumnOptionDef {
name: None,
option: ColumnOption::Unique { is_primary: false },
}],
false,
),
Column::new("col3".into(), DataType::String, vec![], false),
],
);
table.new_row(vec![
Value::Int64(4),
Value::Int64(10),
Value::String("brr".to_owned()),
]);

assert_eq!(
table.all_data(),
vec![Row::new(vec![
Value::Int64(4),
Value::Int64(10),
Value::String("brr".to_owned())
])]
);

assert_eq!(
exec_str_with_context("col1", &table, &table.all_data()[0]),
Ok(Value::Int64(4))
);

assert_eq!(
exec_str_with_context("col3", &table, &table.all_data()[0]),
Ok(Value::String("brr".to_owned()))
);

assert_eq!(
exec_str_with_context("col1 = 4", &table, &table.all_data()[0]),
Ok(Value::Bool(true))
);

assert_eq!(
exec_str_with_context("col1 + 1", &table, &table.all_data()[0]),
Ok(Value::Int64(5))
);

assert_eq!(
exec_str_with_context("col1 + col2", &table, &table.all_data()[0]),
Ok(Value::Int64(14))
);

assert_eq!(
exec_str_with_context(
"col1 + col2 = 10 or col1 * col2 = 40",
&table,
&table.all_data()[0]
),
Ok(Value::Bool(true))
);
}
}
Loading