diff --git a/Cargo.toml b/Cargo.toml index a3c02fda..4b7a49f1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,5 +1,3 @@ - - # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [package] @@ -10,7 +8,6 @@ edition = "2021" [lib] doctest = false - [dependencies] log = "^0.4" sqlparser = "0.34.0" @@ -33,6 +30,7 @@ tokio-process = "0.2.5" serde = { version = "1", features = ["derive", "rc"] } serde_json = "1" async-trait = "0.1.68" +integer-encoding = "3.0.4" [dev-dependencies] ctor = "0.2.0" diff --git a/src/binder/mod.rs b/src/binder/mod.rs index 0ad74c2a..38e66420 100644 --- a/src/binder/mod.rs +++ b/src/binder/mod.rs @@ -6,17 +6,18 @@ mod select; use std::collections::HashMap; use crate::{ - catalog::{CatalogRef, TableRefId}, + catalog::CatalogRef, expression::ScalarExpression, planner::LogicalPlan, }; use anyhow::Result; use sqlparser::ast::Statement; +use crate::types::TableId; pub struct BinderContext { catalog: CatalogRef, - bind_table: HashMap, + bind_table: HashMap, aliases: HashMap, group_by_exprs: Vec, agg_calls: Vec, diff --git a/src/binder/select.rs b/src/binder/select.rs index df7ae730..efec0f6d 100644 --- a/src/binder/select.rs +++ b/src/binder/select.rs @@ -1,7 +1,7 @@ use std::{borrow::Borrow, sync::Arc}; use crate::{ - catalog::{ColumnRefId, DEFAULT_DATABASE_NAME, DEFAULT_SCHEMA_NAME}, + catalog::ColumnRefId, expression::ScalarExpression, planner::{ logical_select_plan::LogicalSelectPlan, @@ -22,6 +22,7 @@ use sqlparser::ast::{ Expr, Ident, Join, JoinConstraint, JoinOperator, Offset, OrderByExpr, Query, Select, SelectItem, SetExpr, TableFactor, TableWithJoins, }; +use crate::catalog::{DEFAULT_DATABASE_NAME, DEFAULT_SCHEMA_NAME}; impl Binder { pub(super) fn bind_query(&mut self, query: &Query) -> Result { @@ -127,7 +128,7 @@ impl Binder { .map(|ident| Ident::new(ident.value.to_lowercase())) .collect_vec(); - let (database, schema, mut table): (&str, &str, &str) = match obj_name.as_slice() { + let (_database, _schema, mut table): (&str, &str, &str) = match obj_name.as_slice() { [table] => (DEFAULT_DATABASE_NAME, DEFAULT_SCHEMA_NAME, &table.value), [schema, table] => (DEFAULT_DATABASE_NAME, &schema.value, &table.value), [database, schema, table] => (&database.value, &schema.value, &table.value), @@ -147,7 +148,7 @@ impl Binder { let table_ref_id = self .context .catalog - .get_table_id_by_name(database, schema, table) + .get_table_id_by_name(table) .ok_or_else(|| anyhow::Error::msg(format!("bind table {}", table)))?; self.context.bind_table.insert(table.into(), table_ref_id); @@ -198,7 +199,7 @@ impl Binder { fn bind_all_column_refs(&mut self) -> Result> { let mut exprs = vec![]; for ref_id in self.context.bind_table.values().cloned().collect_vec() { - let table = self.context.catalog.get_table(&ref_id).unwrap(); + let table = self.context.catalog.get_table(ref_id).unwrap(); for (col_id, col) in &table.get_all_columns() { let column_ref_id = ColumnRefId::from_table(ref_id, *col_id); // self.record_regular_table_column( @@ -209,8 +210,8 @@ impl Binder { // ); let expr = ScalarExpression::ColumnRef { column_ref_id, - primary_key: col.is_primary(), - desc: col.desc().clone(), + primary_key: col.desc.is_primary(), + desc: col.desc.clone(), }; exprs.push(expr); } diff --git a/src/catalog/column.rs b/src/catalog/column.rs index cb4388ce..c94f0a51 100644 --- a/src/catalog/column.rs +++ b/src/catalog/column.rs @@ -1,34 +1,26 @@ -use crate::types::{ColumnIdT, DataType}; +use crate::types::{ColumnId, DataType, IdGenerator}; -/// The descriptor of a column. -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct ColumnDesc { - column_datatype: DataType, - is_primary: bool, +#[derive(Clone)] +pub struct Column { + pub id: ColumnId, + pub name: String, + pub desc: ColumnDesc, } -impl ColumnDesc { - pub(crate) const fn new(column_datatype: DataType, is_primary: bool) -> ColumnDesc { - ColumnDesc { - column_datatype, - is_primary, +impl Column { + pub(crate) fn new( + column_name: String, + column_desc: ColumnDesc, + ) -> Column { + Column { + id: IdGenerator::build(), + name: column_name, + desc: column_desc, } } - pub(crate) fn is_primary(&self) -> bool { - self.is_primary - } - - pub(crate) fn set_primary(&mut self, is_primary: bool) { - self.is_primary = is_primary; - } - - pub(crate) fn is_nullable(&self) -> bool { - self.column_datatype.is_nullable() - } - - pub(crate) fn get_datatype(&self) -> DataType { - self.column_datatype.clone() + pub(crate) fn datatype(&self) -> &DataType { + &self.desc.column_datatype } } @@ -43,56 +35,31 @@ impl DataType { } } -/// Column catalog +/// The descriptor of a column. #[derive(Debug, Clone, PartialEq, Eq)] -pub struct Column { - id: ColumnIdT, - name: String, - desc: ColumnDesc, +pub struct ColumnDesc { + column_datatype: DataType, + is_primary: bool, } -impl Column { - pub(crate) fn new( - column_id: ColumnIdT, - column_name: String, - column_desc: ColumnDesc, - ) -> Column { - Column { - id: column_id, - name: column_name, - desc: column_desc, +impl ColumnDesc { + pub(crate) const fn new(column_datatype: DataType, is_primary: bool) -> ColumnDesc { + ColumnDesc { + column_datatype, + is_primary, } } - pub(crate) fn id(&self) -> ColumnIdT { - self.id - } - pub fn set_id(&mut self, column_id: ColumnIdT) { - self.id = column_id - } - - pub(crate) fn name(&self) -> &str { - &self.name - } - - pub fn desc(&self) -> &ColumnDesc { - &self.desc - } - - pub fn datatype(&self) -> DataType { - self.desc.column_datatype.clone() - } - - pub(crate) fn set_primary(&mut self, is_primary: bool) { - self.desc.set_primary(is_primary) - } - pub(crate) fn is_primary(&self) -> bool { - self.desc.is_primary() + self.is_primary } pub(crate) fn is_nullable(&self) -> bool { - self.desc.is_nullable() + self.column_datatype.is_nullable() + } + + pub(crate) fn get_datatype(&self) -> DataType { + self.column_datatype.clone() } } @@ -104,16 +71,12 @@ mod tests { #[test] fn test_column_catalog() { let mut col_catalog = Column::new( - 0, "test".to_string(), DataTypeKind::Int(None).not_null().to_column(), ); - assert_eq!(col_catalog.id(), 0); - assert_eq!(col_catalog.is_primary(), false); - assert_eq!(col_catalog.is_nullable(), false); - assert_eq!(col_catalog.name(), "test"); - col_catalog.set_primary(true); - assert_eq!(col_catalog.is_primary(), true); + assert_eq!(col_catalog.desc.is_primary(), false); + assert_eq!(col_catalog.desc.is_nullable(), false); + assert_eq!(col_catalog.name, "test"); } } diff --git a/src/catalog/database.rs b/src/catalog/database.rs deleted file mode 100644 index a519847f..00000000 --- a/src/catalog/database.rs +++ /dev/null @@ -1,121 +0,0 @@ -use crate::catalog::{CatalogError, Schema, SchemaCatalogRef, DEFAULT_SCHEMA_NAME}; -use crate::types::{DatabaseIdT, SchemaIdT}; -use parking_lot::Mutex; -use std::collections::{BTreeMap, HashMap}; -use std::sync::Arc; - -pub(crate) struct Database { - database_id: DatabaseIdT, - inner: Mutex, -} - -struct Inner { - database_name: String, - /// schema_name -> schema_id - schema_idxs: HashMap, - /// schema_id -> schema_catalog - schemas: BTreeMap>, - next_schema_id: SchemaIdT, -} - -impl Database { - pub(crate) fn new(database_id: DatabaseIdT, database_name: String) -> Self { - let db_catalog = Database { - database_id, - inner: Mutex::new(Inner { - database_name, - schema_idxs: HashMap::new(), - schemas: BTreeMap::new(), - next_schema_id: 0, - }), - }; - let _ = db_catalog.add_schema(DEFAULT_SCHEMA_NAME.into()).is_ok(); - db_catalog - } - - pub(crate) fn add_schema(&self, schema_name: String) -> Result { - let mut inner = self.inner.lock(); - if inner.schema_idxs.contains_key(&schema_name) { - return Err(CatalogError::Duplicated("schema", schema_name)); - } - let schema_id = inner.next_schema_id; - inner.next_schema_id += 1; - let schema_catalog = Arc::new(Schema::new(schema_id, schema_name.clone())); - inner.schema_idxs.insert(schema_name, schema_id); - inner.schemas.insert(schema_id, schema_catalog); - Ok(schema_id) - } - - pub(crate) fn delete_schema(&self, schema_name: &str) -> Result<(), CatalogError> { - let mut inner = self.inner.lock(); - let id = inner - .schema_idxs - .remove(schema_name) - .ok_or_else(|| CatalogError::NotFound("schema", schema_name.into()))?; - inner.schemas.remove(&id); - Ok(()) - } - - pub(crate) fn get_all_schemas(&self) -> BTreeMap { - let inner = self.inner.lock(); - inner.schemas.clone() - } - - pub(crate) fn get_schema_id_by_name(&self, name: &str) -> Option { - let inner = self.inner.lock(); - inner.schema_idxs.get(name).cloned() - } - - pub(crate) fn get_schema_by_id(&self, schema_id: SchemaIdT) -> Option> { - let inner = self.inner.lock(); - inner.schemas.get(&schema_id).cloned() - } - - pub(crate) fn get_schema_by_name(&self, name: &str) -> Option> { - let inner = self.inner.lock(); - inner - .schema_idxs - .get(name) - .and_then(|schema_id| inner.schemas.get(schema_id)) - .cloned() - } - - pub(crate) fn name(&self) -> String { - let inner = self.inner.lock(); - inner.database_name.clone() - } - - pub(crate) fn id(&self) -> DatabaseIdT { - self.database_id - } -} - -#[cfg(test)] -mod test { - use crate::catalog::{Column, Database, Schema, Table}; - use crate::types::{DataTypeExt, DataTypeKind}; - - #[test] - fn test_database_catalog() { - let col0 = Column::new( - 0, - "a".to_string(), - DataTypeKind::Int(Some(32)).not_null().to_column(), - ); - let col1 = Column::new( - 1, - "b".to_string(), - DataTypeKind::Boolean.not_null().to_column(), - ); - let col_catalogs = vec![col0, col1]; - let mut _schema_catalog = Schema::new(0, "test_scheme".to_string()); - let _table_catalog = Table::new(0, "test_table".to_string(), col_catalogs, false); - - let database_catalog = Database::new(0, "test_database".to_string()); - let schema_id = database_catalog.add_schema("test_schema".into()).unwrap(); - assert_eq!(schema_id, 1); - - let schema_catalog = database_catalog.get_schema_by_id(schema_id).unwrap(); - assert_eq!(schema_catalog.name(), "test_schema"); - } -} diff --git a/src/catalog/mod.rs b/src/catalog/mod.rs index 6f671ffc..b1d7443a 100644 --- a/src/catalog/mod.rs +++ b/src/catalog/mod.rs @@ -1,69 +1,33 @@ // Module: catalog pub(crate) use self::column::*; -pub(crate) use self::database::*; pub(crate) use self::root::*; -pub(crate) use self::schema::*; pub(crate) use self::table::*; -use crate::types::DatabaseIdT; -use crate::types::{ColumnIdT, SchemaIdT, TableIdT}; +use crate::types::{ColumnId, TableId}; use std::sync::Arc; -pub(crate) type ColumnCatalogRef = Arc; -pub(crate) type TableCatalogRef = Arc; -pub(crate) type SchemaCatalogRef = Arc; -pub(crate) type DatabaseCatalogRef = Arc; -pub(crate) type RootCatalogRef = Arc; /// The type of catalog reference. -pub type CatalogRef = Arc; +pub type CatalogRef = Arc; +pub(crate) type TableRef = Arc
; +pub(crate) type ColumnRef = Arc; pub(crate) static DEFAULT_DATABASE_NAME: &str = "kipsql"; pub(crate) static DEFAULT_SCHEMA_NAME: &str = "kipsql"; mod column; -mod database; mod root; -mod schema; mod table; -/// The reference ID of a table. -#[derive(Debug, PartialEq, Eq, Hash, Copy, Clone)] -pub struct TableRefId { - pub database_id: DatabaseIdT, - pub schema_id: SchemaIdT, - pub table_id: TableIdT, -} - -impl TableRefId { - pub const fn new(database_id: DatabaseIdT, schema_id: SchemaIdT, table_id: TableIdT) -> Self { - TableRefId { - schema_id, - table_id, - database_id, - } - } -} - /// The reference ID of a column. #[derive(Debug, PartialEq, Eq, Hash, Copy, Clone)] pub struct ColumnRefId { - pub schema_id: SchemaIdT, - pub table_id: TableIdT, - pub column_id: ColumnIdT, + pub table_id: TableId, + pub column_id: ColumnId, } impl ColumnRefId { - pub const fn from_table(table: TableRefId, column_id: ColumnIdT) -> Self { - ColumnRefId { - schema_id: table.schema_id, - table_id: table.table_id, - column_id, - } - } - - pub const fn new(schema_id: SchemaIdT, table_id: TableIdT, column_id: ColumnIdT) -> Self { + pub const fn from_table(table_id: TableId, column_id: ColumnId) -> Self { ColumnRefId { - schema_id, table_id, column_id, } diff --git a/src/catalog/root.rs b/src/catalog/root.rs index 9f2abedc..f139df64 100644 --- a/src/catalog/root.rs +++ b/src/catalog/root.rs @@ -1,147 +1,68 @@ -use crate::catalog::{CatalogError, Database, DatabaseCatalogRef, DEFAULT_DATABASE_NAME}; -use crate::types::DatabaseIdT; -use parking_lot::Mutex; -use std::collections::{BTreeMap, HashMap}; -use std::sync::Arc; +use crate::catalog::{CatalogError, Column, Table}; +use crate::types::TableId; +use std::collections::BTreeMap; -use super::{Table, TableRefId}; - -pub struct RootCatalog { - inner: Mutex, -} - -#[derive(Default)] -pub struct Inner { - /// Database name to database id mapping - database_idxs: HashMap, - /// Database id to database catalog mapping - databases: BTreeMap>, - next_database_id: DatabaseIdT, -} - -impl Default for RootCatalog { - fn default() -> Self { - Self::new() - } +pub struct Root { + table_idxs: BTreeMap, + tables: BTreeMap, } -impl RootCatalog { - pub(crate) fn new() -> RootCatalog { - let root_catalog = RootCatalog { - inner: Mutex::new(Inner::default()), - }; - let _ = root_catalog - .add_database(DEFAULT_DATABASE_NAME.into()) - .is_ok(); - root_catalog - } - - pub(crate) fn add_database(&self, database_name: String) -> Result { - let mut inner = self.inner.lock(); - if inner.database_idxs.contains_key(&database_name) { - return Err(CatalogError::Duplicated("database", database_name)); - } - let database_id = inner.next_database_id; - inner.next_database_id += 1; - let database_catalog = Arc::new(Database::new(database_id, database_name.clone())); - inner.database_idxs.insert(database_name, database_id); - inner.databases.insert(database_id, database_catalog); - Ok(database_id) - } - - pub(crate) fn delete_database(&mut self, database_name: &str) -> Result<(), CatalogError> { - let mut inner = self.inner.lock(); - let id = inner - .database_idxs - .remove(database_name) - .ok_or_else(|| CatalogError::NotFound("database", database_name.into()))?; - inner.databases.remove(&id); - Ok(()) +impl Root { + #[allow(dead_code)] + pub(crate) fn new() -> Self { + Root { table_idxs: Default::default(), tables: Default::default() } } - pub(crate) fn get_all_databases(&self) -> BTreeMap { - let inner = self.inner.lock(); - inner.databases.clone() + pub(crate) fn get_table_id_by_name(&self, name: &str) -> Option { + self.table_idxs.get(name).cloned() } - pub fn get_database_id_by_name(&self, name: &str) -> Option { - let inner = self.inner.lock(); - inner.database_idxs.get(name).cloned() + pub(crate) fn get_table(&self, table_id: TableId) -> Option<&Table> { + self.tables.get(&table_id) } - pub(crate) fn get_database_by_id(&self, database_id: DatabaseIdT) -> Option> { - let inner = self.inner.lock(); - inner.databases.get(&database_id).cloned() - } - - pub(crate) fn get_database_by_name(&self, name: &str) -> Option> { - let inner = self.inner.lock(); - inner - .database_idxs - .get(name) - .and_then(|id| inner.databases.get(id)) - .cloned() - } - - pub fn get_table(&self, table_ref_id: &TableRefId) -> Option> { - let db = self.get_database_by_id(table_ref_id.database_id)?; - let schema = db.get_schema_by_id(table_ref_id.schema_id)?; - schema.get_table_by_id(table_ref_id.table_id) - } + pub(crate) fn add_table(&mut self, table_name: String, columns: Vec) -> Result { + if self.table_idxs.contains_key(&table_name) { + return Err(CatalogError::Duplicated("column", table_name)); + } + let table = Table::new(table_name.to_owned(), columns)?; + let table_id = table.id; - pub fn get_table_id_by_name( - &self, - database_name: &str, - schema_name: &str, - table_name: &str, - ) -> Option { - let db = self.get_database_by_name(database_name)?; - let schema = db.get_schema_by_name(schema_name)?; - let table = schema.get_table_by_name(table_name)?; + self.table_idxs.insert(table_name, table_id); + self.tables.insert(table_id, table); - Some(TableRefId { - schema_id: schema.id(), - table_id: table.id(), - database_id: db.id(), - }) + Ok(table_id) } } #[cfg(test)] mod tests { use super::*; - use crate::catalog::{Column, DEFAULT_SCHEMA_NAME}; + use crate::catalog::Column; use crate::types::{DataTypeExt, DataTypeKind}; #[test] fn test_root_catalog() { - let root_catalog = RootCatalog::new(); - let database_id = root_catalog - .get_database_id_by_name(DEFAULT_DATABASE_NAME) - .unwrap(); - let database_catalog = root_catalog.get_database_by_id(database_id).unwrap(); - let schema_catalog = database_catalog - .get_schema_by_name(DEFAULT_SCHEMA_NAME) - .unwrap(); + let mut root_catalog = Root::new(); let col0 = Column::new( - 0, "a".to_string(), DataTypeKind::Int(None).not_null().to_column(), ); let col1 = Column::new( - 1, "b".to_string(), DataTypeKind::Boolean.not_null().to_column(), ); let col_catalogs = vec![col0, col1]; - let table_id = schema_catalog - .add_table("test_table".into(), col_catalogs, false) + let table_id_1 = root_catalog + .add_table("test_table_1".into(), col_catalogs.clone()) + .unwrap(); + + let table_id_2 = root_catalog + .add_table("test_table_2".into(), col_catalogs) .unwrap(); - assert_eq!(table_id, 0); - assert_eq!(database_catalog.name(), DEFAULT_DATABASE_NAME); - assert_eq!(schema_catalog.name(), DEFAULT_SCHEMA_NAME); + assert_ne!(table_id_1, table_id_2); } } diff --git a/src/catalog/schema.rs b/src/catalog/schema.rs deleted file mode 100644 index 0cad15b3..00000000 --- a/src/catalog/schema.rs +++ /dev/null @@ -1,139 +0,0 @@ -use crate::catalog::{CatalogError, Column, Table}; -use crate::types::{SchemaIdT, TableIdT}; -use parking_lot::Mutex; -use std::collections::{BTreeMap, HashMap}; -use std::sync::Arc; - -pub(crate) struct Schema { - schema_id: SchemaIdT, - inner: Mutex, -} - -struct Inner { - schema_name: String, - table_idxs: HashMap, - tables: BTreeMap>, - next_table_id: TableIdT, -} - -impl Schema { - pub(crate) fn new(schema_id: SchemaIdT, schema_name: String) -> Schema { - Schema { - schema_id, - inner: Mutex::new(Inner { - schema_name, - table_idxs: HashMap::new(), - tables: BTreeMap::new(), - next_table_id: 0, - }), - } - } - - pub(crate) fn add_table( - &self, - table_name: String, - columns: Vec, - is_materialized_view: bool, - ) -> Result { - let mut inner = self.inner.lock(); - if inner.table_idxs.contains_key(&table_name) { - return Err(CatalogError::Duplicated("column", table_name)); - } - let table_id = inner.next_table_id; - inner.next_table_id += 1; - let table_catalog = Arc::new(Table::new( - table_id, - table_name.clone(), - columns, - is_materialized_view, - )); - inner.table_idxs.insert(table_name, table_id); - inner.tables.insert(table_id, table_catalog); - Ok(table_id) - } - - pub(crate) fn delete_table(&mut self, table_name: &str) -> Result<(), CatalogError> { - let mut inner = self.inner.lock(); - - let id = inner - .table_idxs - .remove(table_name) - .ok_or_else(|| CatalogError::NotFound("table", table_name.into()))?; - inner.tables.remove(&id); - Ok(()) - } - - pub(crate) fn get_all_tables(&self) -> BTreeMap> { - let inner = self.inner.lock(); - inner.tables.clone() - } - - pub(crate) fn get_table_id_by_name(&self, name: &str) -> Option { - let inner = self.inner.lock(); - inner.table_idxs.get(name).cloned() - } - - pub(crate) fn get_table_by_id(&self, table_id: TableIdT) -> Option> { - let inner = self.inner.lock(); - inner.tables.get(&table_id).cloned() - } - - pub(crate) fn get_table_by_name(&self, name: &str) -> Option> { - let inner = self.inner.lock(); - inner - .table_idxs - .get(name) - .and_then(|id| inner.tables.get(id)) - .cloned() - } - - pub(crate) fn id(&self) -> SchemaIdT { - self.schema_id - } - pub fn name(&self) -> String { - let inner = self.inner.lock(); - inner.schema_name.clone() - } -} - -#[cfg(test)] -mod tests { - use super::*; - - use crate::types::{DataTypeExt, DataTypeKind}; - - #[test] - fn test_schema_catalog() { - let col0 = Column::new( - 0, - "a".into(), - DataTypeKind::Int(None).not_null().to_column(), - ); - let col1 = Column::new(1, "b".into(), DataTypeKind::Boolean.not_null().to_column()); - let col_catalogs = vec![col0, col1]; - let mut schema_catalog = Schema::new(0, "test_scheme".to_string()); - let table_id = schema_catalog - .add_table("test_table".to_string(), col_catalogs, false) - .unwrap(); - assert_eq!(table_id, 0); - - let table_catalog = schema_catalog.get_table_by_id(table_id).unwrap(); - assert_eq!(table_catalog.name(), "test_table"); - - let table_catalog = schema_catalog - .get_table_by_name(&String::from("test_table")) - .unwrap(); - assert_eq!(table_catalog.name(), "test_table"); - - let table_catalog = schema_catalog - .delete_table(&String::from("test_table")) - .unwrap(); - assert_eq!(table_catalog, ()); - - let table_catalog = schema_catalog.delete_table(&String::from("test_table")); - assert_eq!( - table_catalog, - Err(CatalogError::NotFound("table", "test_table".into())) - ); - } -} diff --git a/src/catalog/table.rs b/src/catalog/table.rs index 546bfa46..38987a2e 100644 --- a/src/catalog/table.rs +++ b/src/catalog/table.rs @@ -1,114 +1,69 @@ -use crate::catalog::{CatalogError, Column}; -use crate::types::{ColumnIdT, TableIdT}; -use parking_lot::Mutex; use std::collections::{BTreeMap, HashMap}; +use itertools::Itertools; +use crate::catalog::{CatalogError, Column}; +use crate::types::{ColumnId, IdGenerator, TableId}; pub struct Table { - table_id: TableIdT, - inner: Mutex, -} - -struct Inner { - name: String, + pub id: TableId, + pub name: String, /// Mapping from column names to column ids - column_idxs: HashMap, - /// Mapping from column ids to column catalogs - columns: BTreeMap, - - #[allow(dead_code)] - /// The next column id to be assigned - is_materialized_view: bool, - /// Whether the table is a materialized view - next_column_id: ColumnIdT, + column_idxs: HashMap, + columns: BTreeMap, } impl Table { - /// Create a new table catalog with the given table id and table name. - pub(crate) fn new( - table_id: TableIdT, - table_name: String, - columns: Vec, - is_materialized_view: bool, - ) -> Table { - let table_catalog = Table { - table_id, - inner: Mutex::new(Inner { - name: table_name, - column_idxs: HashMap::new(), - columns: BTreeMap::new(), - is_materialized_view, - next_column_id: 0, - }), - }; - for col_catalog in columns.into_iter() { - let _ = table_catalog.add_column(col_catalog).is_ok(); - } - table_catalog + pub(crate) fn get_column_by_id(&self, id: ColumnId) -> Option<&Column> { + self.columns.get(&id) } - /// Add a column to the table catalog. - pub(crate) fn add_column(&self, col_catalog: Column) -> Result { - let mut inner = self.inner.lock(); - - if inner.column_idxs.contains_key(col_catalog.name()) { - return Err(CatalogError::Duplicated( - "column", - col_catalog.name().into(), - )); - } - inner.next_column_id += 1; - let id = col_catalog.id(); - - inner - .column_idxs - .insert(col_catalog.name().to_owned(), col_catalog.id()); - inner.columns.insert(id, col_catalog); - Ok(id) + pub(crate) fn get_column_id_by_name(&self, name: &str) -> Option { + self.column_idxs.get(name).cloned() } - /// Check if the table catalog contains a column with the given name. pub(crate) fn contains_column(&self, name: &str) -> bool { - let inner = self.inner.lock(); - inner.column_idxs.contains_key(name) + self.column_idxs.contains_key(name) } - /// Get all columns in the table catalog. - pub fn get_all_columns(&self) -> BTreeMap { - let inner = self.inner.lock(); - inner.columns.clone() + pub(crate) fn get_all_columns(&self) -> Vec<(ColumnId, &Column)> { + self.columns.iter() + .map(|(col_id, col)| (*col_id, col)) + .collect_vec() } - /// Get the column id of the column with the given name. - pub(crate) fn get_column_id_by_name(&self, name: &str) -> Option { - let inner = self.inner.lock(); - inner.column_idxs.get(name).cloned() - } + /// Add a column to the table catalog. + pub(crate) fn add_column(&mut self, col_catalog: Column) -> Result { + if self.column_idxs.contains_key(&col_catalog.name) { + return Err(CatalogError::Duplicated( + "column", + col_catalog.name.into(), + )); + } - /// Get the column catalog of the column with the given id. - pub(crate) fn get_column_by_id(&self, column_id: ColumnIdT) -> Option { - let inner = self.inner.lock(); - inner.columns.get(&column_id).cloned() - } + let col_id = col_catalog.id; - /// Get the column catalog of the column with the given name. - pub(crate) fn get_column_by_name(&self, name: &String) -> Option { - let inner = self.inner.lock(); - let column_id = inner.column_idxs.get(name)?; - inner.columns.get(column_id).cloned() - } + self.column_idxs.insert(col_catalog.name.to_owned(), col_id); + self.columns.insert(col_id, col_catalog); - /// Get the table id of the table. - pub(crate) fn id(&self) -> TableIdT { - self.table_id + Ok(col_id) } - /// Get the table name of the table. - pub(crate) fn name(&self) -> String { - let inner = self.inner.lock(); - inner.name.clone() + pub(crate) fn new(table_name: String, columns: Vec) -> Result { + let mut table_catalog = Table { + id: IdGenerator::build(), + name: table_name, + column_idxs: HashMap::new(), + columns: BTreeMap::new(), + }; + + for col_catalog in columns.into_iter() { + let _ = table_catalog.add_column(col_catalog)?; + } + + Ok(table_catalog) } } +#[cfg(test)] mod tests { use super::*; use crate::types::{DataType, DataTypeExt, DataTypeKind}; @@ -120,33 +75,39 @@ mod tests { // | 2 | false | fn test_table_catalog() { let col0 = Column::new( - 0, "a".into(), DataTypeKind::Int(None).not_null().to_column(), ); - let col1 = Column::new(1, "b".into(), DataTypeKind::Boolean.not_null().to_column()); + let col1 = Column::new( + "b".into(), + DataTypeKind::Boolean.not_null().to_column() + ); let col_catalogs = vec![col0, col1]; - let table_catalog = Table::new(0, "test".to_string(), col_catalogs, false); + let table_catalog = Table::new( + "test".to_string(), + col_catalogs + ).unwrap(); assert_eq!(table_catalog.contains_column("a"), true); assert_eq!(table_catalog.contains_column("b"), true); assert_eq!(table_catalog.contains_column("c"), false); - assert_eq!(table_catalog.get_column_id_by_name("a"), Some(0)); - assert_eq!(table_catalog.get_column_id_by_name("b"), Some(1)); + let col_a_id = table_catalog.get_column_id_by_name("a").unwrap(); + let col_b_id = table_catalog.get_column_id_by_name("b").unwrap(); + assert!(col_a_id < col_b_id); - let column_catalog = table_catalog.get_column_by_id(0).unwrap(); - assert_eq!(column_catalog.name(), "a"); + let column_catalog = table_catalog.get_column_by_id(col_a_id).unwrap(); + assert_eq!(column_catalog.name, "a"); assert_eq!( column_catalog.datatype(), - DataType::new(DataTypeKind::Int(None), false) + &DataType::new(DataTypeKind::Int(None), false) ); - let column_catalog = table_catalog.get_column_by_id(1).unwrap(); - assert_eq!(column_catalog.name(), "b"); + let column_catalog = table_catalog.get_column_by_id(col_b_id).unwrap(); + assert_eq!(column_catalog.name, "b"); assert_eq!( column_catalog.datatype(), - DataType::new(DataTypeKind::Boolean, false) + &DataType::new(DataTypeKind::Boolean, false) ); } } diff --git a/src/executor/physical/mod.rs b/src/executor/physical/mod.rs index d3f7d3dc..79bad656 100644 --- a/src/executor/physical/mod.rs +++ b/src/executor/physical/mod.rs @@ -5,8 +5,6 @@ pub mod physical_project; pub mod physical_scan; pub mod physical_sort; -use serde::{Deserialize, Serialize}; - use self::{ physical_filter::PhysicalFilter, physical_limit::PhysicalLimit, physical_project::PhysicalProject, physical_scan::PhysicalTableScan, diff --git a/src/executor/physical/physical_project.rs b/src/executor/physical/physical_project.rs index 85d6624a..dce4524d 100644 --- a/src/executor/physical/physical_project.rs +++ b/src/executor/physical/physical_project.rs @@ -1,8 +1,6 @@ -use serde::{Deserialize, Serialize}; - use super::PhysicalPlanBoxed; + pub struct PhysicalProject { pub plan_id: u32, - pub input: PhysicalPlanBoxed, } diff --git a/src/executor/physical/physical_scan.rs b/src/executor/physical/physical_scan.rs index 41d6bb37..01f22704 100644 --- a/src/executor/physical/physical_scan.rs +++ b/src/executor/physical/physical_scan.rs @@ -1,6 +1,3 @@ -use serde::{Deserialize, Serialize}; -use std::sync::Arc; - use crate::planner::operator::scan::ScanOperator; pub struct PhysicalTableScan { diff --git a/src/planner/logical_plan_builder.rs b/src/planner/logical_plan_builder.rs index 5e3fcd7f..1c2c6673 100644 --- a/src/planner/logical_plan_builder.rs +++ b/src/planner/logical_plan_builder.rs @@ -1,5 +1,3 @@ -use std::sync::Arc; - use crate::parser; use anyhow::Result; diff --git a/src/planner/operator/join.rs b/src/planner/operator/join.rs index e0439d66..9a7ac592 100644 --- a/src/planner/operator/join.rs +++ b/src/planner/operator/join.rs @@ -2,7 +2,7 @@ use std::sync::Arc; use crate::{ expression::ScalarExpression, - planner::{logical_select_plan::LogicalSelectPlan, LogicalPlan}, + planner::{logical_select_plan::LogicalSelectPlan}, }; use super::Operator; diff --git a/src/planner/operator/scan.rs b/src/planner/operator/scan.rs index a704599c..78ee03ad 100644 --- a/src/planner/operator/scan.rs +++ b/src/planner/operator/scan.rs @@ -1,16 +1,17 @@ use std::sync::Arc; use crate::{ - catalog::{ColumnRefId, TableRefId}, + catalog::ColumnRefId, expression::ScalarExpression, planner::logical_select_plan::LogicalSelectPlan, }; +use crate::types::TableId; use super::{sort::SortField, Operator}; #[derive(Debug, Clone)] pub struct ScanOperator { - pub table_ref_id: TableRefId, + pub table_ref_id: TableId, pub columns: Vec, pub sort_fields: Vec, // Support push down predicate. @@ -20,7 +21,7 @@ pub struct ScanOperator { pub limit: Option, } impl ScanOperator { - pub fn new(table_ref_id: TableRefId) -> LogicalSelectPlan { + pub fn new(table_ref_id: TableId) -> LogicalSelectPlan { LogicalSelectPlan { operator: Arc::new(Operator::Scan(ScanOperator { table_ref_id, diff --git a/src/types/mod.rs b/src/types/mod.rs index c92f8980..aca39275 100644 --- a/src/types/mod.rs +++ b/src/types/mod.rs @@ -1,6 +1,12 @@ pub mod value; +use std::sync::atomic::AtomicU32; +use std::sync::atomic::Ordering::{Acquire, Release}; +use integer_encoding::FixedInt; pub use sqlparser::ast::DataType as DataTypeKind; + +static ID_BUF: AtomicU32 = AtomicU32::new(0); + /// Inner data type #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct DataType { @@ -39,7 +45,56 @@ impl DataTypeExt for DataTypeKind { } } -pub type DatabaseIdT = u32; -pub type SchemaIdT = u32; -pub type TableIdT = u32; -pub type ColumnIdT = u32; +pub(crate) struct IdGenerator { } + +impl IdGenerator { + pub(crate) fn encode_to_raw() -> Vec { + ID_BUF + .load(Acquire) + .encode_fixed_vec() + } + + pub(crate) fn from_raw(buf: &[u8]) { + Self::init(u32::decode_fixed(buf)) + } + + pub(crate) fn init(init_value: u32) { + ID_BUF.store(init_value, Release) + } + + pub(crate) fn build() -> u32 { + ID_BUF.fetch_add(1, Release) + } +} + +pub type TableId = u32; +pub type ColumnId = u32; + + +#[cfg(test)] +mod test { + use std::sync::atomic::Ordering::Release; + use crate::types::{ID_BUF, IdGenerator}; + + /// Tips: 由于IdGenerator为static全局性质生成的id,因此需要单独测试避免其他测试方法干扰 + #[test] + #[ignore] + fn test_id_generator() { + assert_eq!(IdGenerator::build(), 0); + assert_eq!(IdGenerator::build(), 1); + + let buf = IdGenerator::encode_to_raw(); + test_id_generator_reset(); + + assert_eq!(IdGenerator::build(), 0); + + IdGenerator::from_raw(&buf); + + assert_eq!(IdGenerator::build(), 2); + assert_eq!(IdGenerator::build(), 3); + } + + fn test_id_generator_reset() { + ID_BUF.store(0, Release) + } +} \ No newline at end of file