From e4559e65f3e6917699e368a9328dcacf854384b7 Mon Sep 17 00:00:00 2001 From: Fletcher555 Date: Tue, 9 Sep 2025 00:13:06 -0400 Subject: [PATCH 1/2] Implement DISTINCT with SELECT statements --- src/db/table/delete/mod.rs | 2 +- src/db/table/helpers/common.rs | 36 +++++++++++- src/db/table/select/mod.rs | 8 ++- src/db/table/select/select_statement.rs | 57 +++++++++++++++++-- src/db/table/select/set_operator_evaluator.rs | 5 +- src/db/table/update/mod.rs | 2 +- .../ast/helpers/select_statement.rs | 41 ++++++++++++- src/interpreter/ast/mod.rs | 9 +++ src/interpreter/ast/parser.rs | 4 +- src/interpreter/ast/select_statement_stack.rs | 4 ++ 10 files changed, 154 insertions(+), 14 deletions(-) diff --git a/src/db/table/delete/mod.rs b/src/db/table/delete/mod.rs index ecfa914..a75910c 100644 --- a/src/db/table/delete/mod.rs +++ b/src/db/table/delete/mod.rs @@ -6,7 +6,7 @@ use crate::db::table::helpers::common::get_row_indicies_matching_clauses; pub fn delete(table: &mut Table, statement: DeleteStatement) -> Result<(), String> { - let row_indicies_to_delete = get_row_indicies_matching_clauses(table, &statement.where_clause, &statement.order_by_clause, &statement.limit_clause)?; + let row_indicies_to_delete = get_row_indicies_matching_clauses(table, None, &statement.where_clause, &statement.order_by_clause, &statement.limit_clause)?; swap_remove_bulk(table, row_indicies_to_delete)?; Ok(()) } diff --git a/src/db/table/helpers/common.rs b/src/db/table/helpers/common.rs index 8486735..e549c61 100644 --- a/src/db/table/helpers/common.rs +++ b/src/db/table/helpers/common.rs @@ -1,8 +1,14 @@ +use std::collections::HashSet; + use crate::db::table::{Table, Value, DataType}; use crate::interpreter::ast::{SelectStatementColumns, WhereStackElement, OrderByClause, LimitClause}; use crate::db::table::helpers::where_stack::matches_where_stack; use crate::db::table::helpers::{order_by_clause::get_ordered_row_indicies, limit_clause::get_limited_rows}; +pub struct DistinctOn<'a> { + pub columns: &'a SelectStatementColumns, +} + pub fn validate_and_clone_row(table: &Table, row: &Vec) -> Result, String> { if row.len() != table.width() { return Err(format!("Rows have incorrect width")); @@ -62,9 +68,14 @@ pub fn get_columns_from_row(table: &Table, row: &Vec, selected_columns: & return Ok(row_values); } -pub fn get_row_indicies_matching_clauses(table: &Table, where_clause: &Option>, order_by_clause: &Option>, limit_clause: &Option) -> Result, String> { +pub fn get_row_indicies_matching_clauses(table: &Table, mode: Option, where_clause: &Option>, order_by_clause: &Option>, limit_clause: &Option) -> Result, String> { let mut row_indicies = get_row_indicies_matching_where_clause(table, where_clause)?; + if let Some(mode) = mode { + row_indicies = remove_duplicate_rows_from_indicies(table, row_indicies, &mode.columns)?; + + } + if let Some(order_by_clause) = order_by_clause { row_indicies = get_ordered_row_indicies(table, row_indicies, &order_by_clause)?; } @@ -74,5 +85,28 @@ pub fn get_row_indicies_matching_clauses(table: &Table, where_clause: &Option>) -> Vec> { + let set = rows.into_iter().collect::>>(); + let result = set.into_iter().collect::>>(); + return result; +} + +pub fn remove_duplicate_rows_from_indicies(table: &Table, mut row_indicies: Vec, columns: &SelectStatementColumns) -> Result, String> { + let mut set = HashSet::new(); + let mut index = row_indicies.len(); + while index > 0 { + index -= 1; + let row = get_columns_from_row(table, &table.rows[row_indicies[index]], columns)?; + if set.contains(&row) { + row_indicies.swap_remove(index); + } + else { + set.insert(row); + } + + } return Ok(row_indicies); } \ No newline at end of file diff --git a/src/db/table/select/mod.rs b/src/db/table/select/mod.rs index 8f60009..46680fc 100644 --- a/src/db/table/select/mod.rs +++ b/src/db/table/select/mod.rs @@ -75,7 +75,7 @@ pub fn select_statement_stack(database: &Database, statement: SelectStatementSta mod tests { use super::*; use crate::db::table::test_utils::default_database; - use crate::interpreter::ast::{SelectStatement, SelectStatementColumns, WhereStackElement, WhereCondition, Operand, Operator, LogicalOperator}; + use crate::interpreter::ast::{SelectStatement, SelectStatementColumns, WhereStackElement, WhereCondition, Operand, Operator, LogicalOperator, SelectMode}; #[test] @@ -85,6 +85,7 @@ mod tests { columns: SelectStatementColumns::All, elements: vec![SelectStatementStackElement::SelectStatement(SelectStatement { table_name: "users".to_string(), + mode: SelectMode::All, columns: SelectStatementColumns::All, where_clause: None, order_by_clause: None, @@ -112,6 +113,7 @@ mod tests { elements: vec![ SelectStatementStackElement::SelectStatement(SelectStatement { table_name: "users".to_string(), + mode: SelectMode::All, columns: SelectStatementColumns::All, where_clause: Some(vec![WhereStackElement::Condition(WhereCondition { l_side: Operand::Identifier("id".to_string()), @@ -123,6 +125,7 @@ mod tests { }), SelectStatementStackElement::SelectStatement(SelectStatement { table_name: "users".to_string(), + mode: SelectMode::All, columns: SelectStatementColumns::All, where_clause: None, order_by_clause: None, @@ -148,6 +151,7 @@ mod tests { columns: SelectStatementColumns::All, elements: vec![SelectStatementStackElement::SelectStatement(SelectStatement { table_name: "users".to_string(), + mode: SelectMode::All, columns: SelectStatementColumns::All, where_clause: None, order_by_clause: None, @@ -155,6 +159,7 @@ mod tests { }), SelectStatementStackElement::SelectStatement(SelectStatement { table_name: "users".to_string(), + mode: SelectMode::All, columns: SelectStatementColumns::All, where_clause: Some(vec![WhereStackElement::Condition(WhereCondition { l_side: Operand::Identifier("id".to_string()), @@ -174,6 +179,7 @@ mod tests { SelectStatementStackElement::SetOperator(SetOperator::Intersect), SelectStatementStackElement::SelectStatement(SelectStatement { table_name: "users".to_string(), + mode: SelectMode::All, columns: SelectStatementColumns::All, where_clause: Some(vec![WhereStackElement::Condition(WhereCondition { l_side: Operand::Identifier("id".to_string()), diff --git a/src/db/table/select/select_statement.rs b/src/db/table/select/select_statement.rs index 9612992..7bd5f4d 100644 --- a/src/db/table/select/select_statement.rs +++ b/src/db/table/select/select_statement.rs @@ -1,12 +1,16 @@ use crate::db::table::{Table, Value}; -use crate::interpreter::ast::{SelectStatement}; -use crate::db::table::helpers::common::{get_row_indicies_matching_clauses, get_row_columns_from_indicies}; +use crate::interpreter::ast::{SelectStatement, SelectMode}; +use crate::db::table::helpers::common::{get_row_indicies_matching_clauses, get_row_columns_from_indicies, DistinctOn}; + pub fn select_statement(table: &Table, statement: &SelectStatement) -> Result>, String> { - let row_indicies = get_row_indicies_matching_clauses(table, &statement.where_clause, &statement.order_by_clause, &statement.limit_clause)?; - + let mode = match statement.mode { + SelectMode::All => None, + SelectMode::Distinct => Some(DistinctOn { columns: &statement.columns }), + }; + let row_indicies = get_row_indicies_matching_clauses(table, mode, &statement.where_clause, &statement.order_by_clause, &statement.limit_clause)?; return Ok(get_row_columns_from_indicies(table, row_indicies, Some(&statement.columns))?); } @@ -14,17 +18,21 @@ pub fn select_statement(table: &Table, statement: &SelectStatement) -> Result>>, @@ -32,8 +32,7 @@ impl SetOperatorEvaluator { let second = self.pop()?; let mut first = self.pop()?; first.extend(second.into_iter()); - let set = first.into_iter().collect::>>(); - let result = set.into_iter().collect::>>(); + let result = remove_duplicate_rows(first); self.push(result); Ok(()) } diff --git a/src/db/table/update/mod.rs b/src/db/table/update/mod.rs index c3428df..1546c47 100644 --- a/src/db/table/update/mod.rs +++ b/src/db/table/update/mod.rs @@ -4,7 +4,7 @@ use crate::db::table::helpers::common::get_row_indicies_matching_clauses; use crate::db::table::DataType; pub fn update(table: &mut Table, statement: UpdateStatement) -> Result<(), String> { - let row_indicies = get_row_indicies_matching_clauses(table, &statement.where_clause, &statement.order_by_clause, &statement.limit_clause)?; + let row_indicies = get_row_indicies_matching_clauses(table, None, &statement.where_clause, &statement.order_by_clause, &statement.limit_clause)?; update_rows_from_indicies(table, row_indicies, statement.update_values)?; Ok(()) } diff --git a/src/interpreter/ast/helpers/select_statement.rs b/src/interpreter/ast/helpers/select_statement.rs index 37f656e..c0c7955 100644 --- a/src/interpreter/ast/helpers/select_statement.rs +++ b/src/interpreter/ast/helpers/select_statement.rs @@ -1,6 +1,6 @@ use crate::{interpreter::{ ast::{ - parser::Parser, SelectStatement, SelectStatementColumns, WhereStackElement, + parser::Parser, SelectStatement, SelectStatementColumns, WhereStackElement, SelectMode, helpers::{ common::{tokens_to_identifier_list, get_table_name, expect_token_type}, order_by_clause::get_order_by, where_stack::get_where_clause, limit_clause::get_limit @@ -11,6 +11,13 @@ use crate::{interpreter::{ pub fn get_statement(parser: &mut Parser) -> Result { parser.advance()?; + let mode = match parser.current_token()?.token_type { + TokenTypes::Distinct => { + parser.advance()?; + SelectMode::Distinct + } + _ => SelectMode::All + }; let columns = get_columns(parser)?; expect_token_type(parser, TokenTypes::From)?; parser.advance()?; @@ -21,6 +28,7 @@ pub fn get_statement(parser: &mut Parser) -> Result { return Ok(SelectStatement { table_name: table_name, + mode: mode, columns: columns, where_clause: where_clause, order_by_clause: order_by_clause, @@ -69,6 +77,7 @@ mod tests { let statement = result.unwrap(); assert_eq!(statement, SelectStatement { table_name: "users".to_string(), + mode: SelectMode::All, columns: SelectStatementColumns::All, where_clause: None, order_by_clause: None, @@ -92,6 +101,7 @@ mod tests { let statement = result.unwrap(); assert_eq!(statement, SelectStatement { table_name: "guests".to_string(), + mode: SelectMode::All, columns: SelectStatementColumns::Specific(vec![ "id".to_string(), ]), @@ -119,6 +129,7 @@ mod tests { let statement = result.unwrap(); assert_eq!(statement, SelectStatement { table_name: "users".to_string(), + mode: SelectMode::All, columns: SelectStatementColumns::Specific(vec![ "id".to_string(), "name".to_string(), @@ -162,6 +173,7 @@ mod tests { let statement = result.unwrap(); let expected = SelectStatement { table_name: "guests".to_string(), + mode: SelectMode::All, columns: SelectStatementColumns::Specific(vec![ "id".to_string(), ]), @@ -193,4 +205,31 @@ mod tests { }; assert_eq!(expected, statement); } + + #[test] + fn select_statement_with_distinct_mode_is_generated_correctly() { + // SELECT DISTINCT id FROM guests; + let tokens = vec![ + token(TokenTypes::Select, "SELECT"), + token(TokenTypes::Distinct, "DISTINCT"), + token(TokenTypes::Identifier, "id"), + token(TokenTypes::From, "FROM"), + token(TokenTypes::Identifier, "guests"), + token(TokenTypes::SemiColon, ";"), + ]; + let mut parser = Parser::new(tokens); + let result = get_statement(&mut parser); + assert!(result.is_ok()); + let statement = result.unwrap(); + assert_eq!(statement, SelectStatement { + table_name: "guests".to_string(), + mode: SelectMode::Distinct, + columns: SelectStatementColumns::Specific(vec![ + "id".to_string(), + ]), + where_clause: None, + order_by_clause: None, + limit_clause: None, + }); + } } \ No newline at end of file diff --git a/src/interpreter/ast/mod.rs b/src/interpreter/ast/mod.rs index 69b17cc..57cf16d 100644 --- a/src/interpreter/ast/mod.rs +++ b/src/interpreter/ast/mod.rs @@ -98,6 +98,7 @@ impl SetOperator { #[derive(Debug, PartialEq)] pub struct SelectStatement { pub table_name: String, + pub mode: SelectMode, pub columns: SelectStatementColumns, pub where_clause: Option>, pub order_by_clause: Option>, @@ -141,6 +142,12 @@ pub struct ColumnValue { pub value: Value, } +#[derive(Debug, PartialEq)] +pub enum SelectMode { + All, + Distinct, +} + #[derive(Debug, PartialEq, Clone)] pub enum SelectStatementColumns { All, @@ -400,6 +407,7 @@ mod tests { columns: SelectStatementColumns::All, elements: vec![SelectStatementStackElement::SelectStatement(SelectStatement { table_name: "users".to_string(), + mode: SelectMode::All, columns: SelectStatementColumns::All, where_clause: None, order_by_clause: None, @@ -493,6 +501,7 @@ mod tests { columns: SelectStatementColumns::All, elements: vec![SelectStatementStackElement::SelectStatement(SelectStatement { table_name: "users".to_string(), + mode: SelectMode::All, columns: SelectStatementColumns::All, where_clause: None, order_by_clause: None, diff --git a/src/interpreter/ast/parser.rs b/src/interpreter/ast/parser.rs index 38cf953..41a6b64 100644 --- a/src/interpreter/ast/parser.rs +++ b/src/interpreter/ast/parser.rs @@ -105,7 +105,7 @@ impl<'a> Parser<'a> { #[cfg(test)] mod tests { use super::*; - use crate::interpreter::ast::{CreateTableStatement, InsertIntoStatement, SelectStatement, SelectStatementColumns, SelectStatementStack, SelectStatementStackElement}; + use crate::interpreter::ast::{CreateTableStatement, InsertIntoStatement, SelectStatement, SelectStatementColumns, SelectStatementStack, SelectStatementStackElement, SelectMode}; use crate::interpreter::ast::test_utils::{token_with_location, token}; #[test] @@ -154,6 +154,7 @@ mod tests { columns: SelectStatementColumns::All, elements: vec![SelectStatementStackElement::SelectStatement(SelectStatement { table_name: "users".to_string(), + mode: SelectMode::All, columns: SelectStatementColumns::All, where_clause: None, order_by_clause: None, @@ -218,6 +219,7 @@ mod tests { columns: SelectStatementColumns::All, elements: vec![SelectStatementStackElement::SelectStatement(SelectStatement { table_name: "users".to_string(), + mode: SelectMode::All, columns: SelectStatementColumns::All, where_clause: None, order_by_clause: None, diff --git a/src/interpreter/ast/select_statement_stack.rs b/src/interpreter/ast/select_statement_stack.rs index 70d83cc..9231060 100644 --- a/src/interpreter/ast/select_statement_stack.rs +++ b/src/interpreter/ast/select_statement_stack.rs @@ -162,6 +162,7 @@ mod tests { use crate::interpreter::ast::OrderByClause; use crate::interpreter::ast::OrderByDirection; use crate::interpreter::ast::LimitClause; + use crate::interpreter::ast::SelectMode; fn simple_select_statement_tokens(id: &'static str) -> Vec> { vec![ @@ -179,6 +180,7 @@ mod tests { fn expected_simple_select_statement(id: i64) -> SelectStatementStackElement { SelectStatementStackElement::SelectStatement(SelectStatement { table_name: "users".to_string(), + mode: SelectMode::All, columns: SelectStatementColumns::All, where_clause: Some(vec![WhereStackElement::Condition(WhereCondition { l_side: Operand::Identifier("id".to_string()), @@ -342,6 +344,7 @@ mod tests { elements: vec![ SelectStatementStackElement::SelectStatement(SelectStatement { table_name: "employees".to_string(), + mode: SelectMode::All, columns: SelectStatementColumns::Specific(vec!["name".to_string()]), where_clause: Some(vec![WhereStackElement::Condition(WhereCondition { l_side: Operand::Identifier("name".to_string()), @@ -353,6 +356,7 @@ mod tests { }), SelectStatementStackElement::SelectStatement(SelectStatement { table_name: "employees".to_string(), + mode: SelectMode::All, columns: SelectStatementColumns::Specific(vec!["name".to_string()]), where_clause: Some(vec![WhereStackElement::Condition(WhereCondition { l_side: Operand::Identifier("name".to_string()), From 969f399d0b8d7eb443b4af0e37623486913b4255 Mon Sep 17 00:00:00 2001 From: Fletcher555 Date: Tue, 9 Sep 2025 00:17:38 -0400 Subject: [PATCH 2/2] Integration test for DISTINCT clause --- tests/crud_test.rs | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/tests/crud_test.rs b/tests/crud_test.rs index fd0039b..4dfc3e1 100644 --- a/tests/crud_test.rs +++ b/tests/crud_test.rs @@ -167,4 +167,29 @@ fn test_alter_table() { assert!(result[7..=10].iter().all(|result| result.is_err())); assert_eq!(expected_errors, result[7..=10].iter().map(|result| result.as_ref().err().unwrap()).collect::>()); +} + +#[test] +fn test_distinct_mode() { + let mut database = Database::new(); + let sql = " + CREATE TABLE users ( + id INTEGER, + name TEXT + ); + INSERT INTO users (id, name) VALUES (1, 'John'); + INSERT INTO users (id, name) VALUES (2, 'Jane'); + INSERT INTO users (id, name) VALUES (3, 'Jim'); + INSERT INTO users (id, name) VALUES (4, 'John'); + SELECT DISTINCT name FROM users ORDER BY name ASC; + "; + let result = run_sql(&mut database, sql); + assert!(result.iter().all(|result| result.is_ok())); + let expected = vec![ + vec![Value::Text("Jane".to_string())], + vec![Value::Text("Jim".to_string())], + vec![Value::Text("John".to_string())], + ]; + let row = result[5].as_ref().unwrap().as_ref().unwrap(); + assert_eq!(expected, *row); } \ No newline at end of file