From 8179ab08480287d854d97d2be8cf4ad78dd896e5 Mon Sep 17 00:00:00 2001 From: Fletcher555 Date: Thu, 28 Aug 2025 01:12:56 -0400 Subject: [PATCH 1/5] Fixed passing the interpreter by reference. --- src/cli/ast/create_statement.rs | 26 ++++++++++++++++++++++++-- src/cli/ast/interpreter.rs | 8 ++++---- 2 files changed, 28 insertions(+), 6 deletions(-) diff --git a/src/cli/ast/create_statement.rs b/src/cli/ast/create_statement.rs index 9b63303..f18b5c5 100644 --- a/src/cli/ast/create_statement.rs +++ b/src/cli/ast/create_statement.rs @@ -1,5 +1,27 @@ -use crate::cli::{ast::SqlStatement, ast::interpreter::Interpreter}; +use crate::cli::{ast::{interpreter::Interpreter, CreateTableStatement, SqlStatement, SqlStatement::CreateTable}, tokenizer::token::TokenTypes}; -pub fn build(interpreter: &Interpreter) -> Result { +pub fn build(interpreter: &mut Interpreter) -> Result { + interpreter.advance(); + match interpreter.current_token() { + Some(token) => { + match token.token_type { + TokenTypes::Table => { + return table_statement(interpreter); + }, + TokenTypes::Index => { + return index_statement(interpreter); + }, + _ => return Err(interpreter.format_error()), + } + }, + None => return Err(interpreter.format_error()), + } +} + +fn table_statement(interpreter: &mut Interpreter) -> Result { todo!() } + +fn index_statement(interpreter: &mut Interpreter) -> Result { + todo!() +} \ No newline at end of file diff --git a/src/cli/ast/interpreter.rs b/src/cli/ast/interpreter.rs index c256f9f..e7437d2 100644 --- a/src/cli/ast/interpreter.rs +++ b/src/cli/ast/interpreter.rs @@ -24,7 +24,7 @@ impl<'a> Interpreter<'a> { self.current += 1; } - fn format_error(&self) -> String { + pub fn format_error(&self) -> String { if let Some(token) = self.current_token() { return format!( "Error at line {:?}, column {:?}: Unexpected type: {:?}", @@ -40,9 +40,9 @@ impl<'a> Interpreter<'a> { return Some(Err("No tokens to parse".to_string())); } return match self.current_token()?.token_type { - TokenTypes::Create => Some(create_statement::build(&self)), - TokenTypes::Insert => Some(insert_statement::build(&self)), - TokenTypes::Select => Some(select_statement::build(&self)), + TokenTypes::Create => Some(create_statement::build(self)), + TokenTypes::Insert => Some(insert_statement::build(self)), + TokenTypes::Select => Some(select_statement::build(self)), _ => { self.advance(); Some(Err(self.format_error())) From 903a52b00359c5db9bddf11fa7e023a02c7d6041 Mon Sep 17 00:00:00 2001 From: Fletcher555 Date: Thu, 28 Aug 2025 16:38:30 -0400 Subject: [PATCH 2/5] Implement correct datatypes for SQLite parity. --- src/cli/tokenizer/mod.rs | 21 +++++++++------------ src/cli/tokenizer/scanner.rs | 9 +++------ src/cli/tokenizer/token.rs | 4 ++-- 3 files changed, 14 insertions(+), 20 deletions(-) diff --git a/src/cli/tokenizer/mod.rs b/src/cli/tokenizer/mod.rs index 6f1a425..db6dd69 100644 --- a/src/cli/tokenizer/mod.rs +++ b/src/cli/tokenizer/mod.rs @@ -78,8 +78,8 @@ mod tests { let statement = r#" CREATE SELECT INSERT TABLE FROM INTO VALUES WHERE UPDATE DELETE DROP INDEX - INTEGER TEXT VARCHAR CHAR FLOAT BOOLEAN TIMESTAMP - PRIMARY KEY NOT NULL UNIQUE DEFAULT AUTOINCREMENT + INTEGER REAL TEXT BLOB NULL + PRIMARY KEY NOT UNIQUE DEFAULT AUTOINCREMENT ORDER BY GROUP HAVING DISTINCT ALL AS INNER LEFT RIGHT FULL OUTER JOIN ON UNION LIMIT OFFSET @@ -107,19 +107,16 @@ mod tests { token(TokenTypes::Drop, "DROP", 22, 3), token(TokenTypes::Index, "INDEX", 27, 3), token(TokenTypes::Integer, "INTEGER", 8, 4), - token(TokenTypes::Text, "TEXT", 16, 4), - token(TokenTypes::Varchar, "VARCHAR", 21, 4), - token(TokenTypes::Char, "CHAR", 29, 4), - token(TokenTypes::Float, "FLOAT", 34, 4), - token(TokenTypes::Boolean, "BOOLEAN", 40, 4), - token(TokenTypes::Timestamp, "TIMESTAMP", 48, 4), + token(TokenTypes::Real, "REAL", 16, 4), + token(TokenTypes::Text, "TEXT", 21, 4), + token(TokenTypes::Blob, "BLOB", 26, 4), + token(TokenTypes::Null, "NULL", 31, 4), token(TokenTypes::Primary, "PRIMARY", 8, 5), token(TokenTypes::Key, "KEY", 16, 5), token(TokenTypes::Not, "NOT", 20, 5), - token(TokenTypes::Null, "NULL", 24, 5), - token(TokenTypes::Unique, "UNIQUE", 29, 5), - token(TokenTypes::Default, "DEFAULT", 36, 5), - token(TokenTypes::AutoIncrement, "AUTOINCREMENT", 44, 5), + token(TokenTypes::Unique, "UNIQUE", 24, 5), + token(TokenTypes::Default, "DEFAULT", 31, 5), + token(TokenTypes::AutoIncrement, "AUTOINCREMENT", 39, 5), token(TokenTypes::Order, "ORDER", 8, 6), token(TokenTypes::By, "BY", 14, 6), token(TokenTypes::Group, "GROUP", 17, 6), diff --git a/src/cli/tokenizer/scanner.rs b/src/cli/tokenizer/scanner.rs index 3786e73..cd7efe2 100644 --- a/src/cli/tokenizer/scanner.rs +++ b/src/cli/tokenizer/scanner.rs @@ -90,16 +90,13 @@ impl<'a> Scanner<'a> { slice if slice.eq_ignore_ascii_case("DROP") => TokenTypes::Drop, slice if slice.eq_ignore_ascii_case("INDEX") => TokenTypes::Index, slice if slice.eq_ignore_ascii_case("INTEGER") => TokenTypes::Integer, + slice if slice.eq_ignore_ascii_case("REAL") => TokenTypes::Real, slice if slice.eq_ignore_ascii_case("TEXT") => TokenTypes::Text, - slice if slice.eq_ignore_ascii_case("VARCHAR") => TokenTypes::Varchar, - slice if slice.eq_ignore_ascii_case("CHAR") => TokenTypes::Char, - slice if slice.eq_ignore_ascii_case("FLOAT") => TokenTypes::Float, - slice if slice.eq_ignore_ascii_case("BOOLEAN") => TokenTypes::Boolean, - slice if slice.eq_ignore_ascii_case("TIMESTAMP") => TokenTypes::Timestamp, + slice if slice.eq_ignore_ascii_case("BLOB") => TokenTypes::Blob, + slice if slice.eq_ignore_ascii_case("NULL") => TokenTypes::Null, slice if slice.eq_ignore_ascii_case("PRIMARY") => TokenTypes::Primary, slice if slice.eq_ignore_ascii_case("KEY") => TokenTypes::Key, slice if slice.eq_ignore_ascii_case("NOT") => TokenTypes::Not, - slice if slice.eq_ignore_ascii_case("NULL") => TokenTypes::Null, slice if slice.eq_ignore_ascii_case("UNIQUE") => TokenTypes::Unique, slice if slice.eq_ignore_ascii_case("DEFAULT") => TokenTypes::Default, slice if slice.eq_ignore_ascii_case("AUTOINCREMENT") => TokenTypes::AutoIncrement, diff --git a/src/cli/tokenizer/token.rs b/src/cli/tokenizer/token.rs index 34125dd..f4b815f 100644 --- a/src/cli/tokenizer/token.rs +++ b/src/cli/tokenizer/token.rs @@ -4,9 +4,9 @@ pub enum TokenTypes { Create, Select, Insert, Table, From, Into, Values, Where, Update, Delete, Drop, Index, // Data Types - Integer, Text, Varchar, Char, Float, Boolean, Timestamp, + Integer, Real, Text, Blob, Null, // Constraints - Primary, Key, Not, Null, Unique, Default, AutoIncrement, + Primary, Key, Not, Unique, Default, AutoIncrement, // Clauses Order, By, Group, Having, Distinct, All, As, Inner, Left, Right, Full, Outer, Join, On, Union, From 693d9531a545b2d4153610ad619f0fc0f0a40307 Mon Sep 17 00:00:00 2001 From: Fletcher555 Date: Thu, 28 Aug 2025 17:12:48 -0400 Subject: [PATCH 3/5] Implement Create Table with datatypes. --- src/cli/ast/create_statement.rs | 107 +++++++++++++++++++++++++++++++- src/cli/table.rs | 19 ++++-- 2 files changed, 118 insertions(+), 8 deletions(-) diff --git a/src/cli/ast/create_statement.rs b/src/cli/ast/create_statement.rs index f18b5c5..a4fbd4f 100644 --- a/src/cli/ast/create_statement.rs +++ b/src/cli/ast/create_statement.rs @@ -1,4 +1,6 @@ -use crate::cli::{ast::{interpreter::Interpreter, CreateTableStatement, SqlStatement, SqlStatement::CreateTable}, tokenizer::token::TokenTypes}; +use std::num::Saturating; + +use crate::cli::{ast::{interpreter::Interpreter, CreateTableStatement, SqlStatement::{self, CreateTable}}, table::{ColumnDefinition, DataType}, tokenizer::{scanner::Token, token::TokenTypes}}; pub fn build(interpreter: &mut Interpreter) -> Result { interpreter.advance(); @@ -19,7 +21,108 @@ pub fn build(interpreter: &mut Interpreter) -> Result { } fn table_statement(interpreter: &mut Interpreter) -> Result { - todo!() + interpreter.advance(); + let table_name = match interpreter.current_token() { + Some(token) => { + if token.token_type != TokenTypes::Identifier { + return Err(interpreter.format_error()); + } + let name = token.value.to_string(); + interpreter.advance(); + name + }, + None => return Err(interpreter.format_error()), + }; + let column_definitions = column_definitions(interpreter)?; + return Ok(CreateTable(CreateTableStatement { + table_name, + columns: column_definitions, + })); +} + +fn column_definitions(interpreter: &mut Interpreter) -> Result, String> { + let mut columns: Vec = vec![]; + if let Some(token) = interpreter.current_token() { + if token.token_type != TokenTypes::LeftParen { + return Err(interpreter.format_error()); + } + else { + interpreter.advance(); + loop { + let column_name = match interpreter.current_token() { + Some(token) => { + if token.token_type != TokenTypes::Identifier { + return Err(interpreter.format_error()); + } + token.value.to_string() + }, + None => return Err(interpreter.format_error()), + }; + interpreter.advance(); + + // Grab the column data type + let column_data_type = token_to_data_type(interpreter)?; + interpreter.advance(); + + // TODO: Modifiers and Constraints + + // Ensure we have a comma or right paren + if let Some(token) = interpreter.current_token() { + match &token.token_type { + TokenTypes::Comma => { + columns.push(crate::cli::table::ColumnDefinition { + name: column_name, + data_type: column_data_type, + constraints: vec![] // TODO, + }); + } + TokenTypes::RightParen => { + columns.push(crate::cli::table::ColumnDefinition { + name: column_name, + data_type: column_data_type, + constraints: vec![] // TODO, + }); + break; + }, + _ => return Err(interpreter.format_error()), + } + } else { + return Err(interpreter.format_error()); + } + interpreter.advance(); + } + return Ok(columns); + } + } else { + return Err(interpreter.format_error()); + } +} + +fn token_to_data_type(interpreter: &mut Interpreter) -> Result { + if let Some(token) = interpreter.current_token() { + match token.token_type { + TokenTypes::Integer => { + return Ok(DataType::Integer); + }, + TokenTypes::Real => { + return Ok(DataType::Real); + }, + TokenTypes::Text => { + return Ok(DataType::Text); + }, + TokenTypes::Blob => { + return Ok(DataType::Blob); + }, + TokenTypes::Null => { + return Ok(DataType::Null); + }, + _ => { + return Err(interpreter.format_error()); + } + } + } else { + return Err(interpreter.format_error()); + } } fn index_statement(interpreter: &mut Interpreter) -> Result { diff --git a/src/cli/table.rs b/src/cli/table.rs index 0b536b0..f42f976 100644 --- a/src/cli/table.rs +++ b/src/cli/table.rs @@ -1,8 +1,9 @@ pub enum DataType { Integer, - Float, + Real, Text, - Boolean, + Blob, + Null, } pub struct Table { @@ -13,8 +14,13 @@ pub struct Table { } pub struct ColumnDefinition { - name: String, - data_type: DataType, + pub name: String, + pub data_type: DataType, + pub constraints: Vec, +} + +pub struct ColumnConstraint { + pub constraint_type: String, } struct Row { @@ -24,7 +30,8 @@ struct Row { pub enum Value { Integer(i64), - Float(f64), + Real(f64), Text(String), - Bool(bool), + Blob(Vec), + Null } From c72feb4a519bbffb417c5ffbe2553b507ea1ab6e Mon Sep 17 00:00:00 2001 From: Fletcher555 Date: Thu, 28 Aug 2025 17:55:03 -0400 Subject: [PATCH 4/5] Update table and sql statement definitions for testing --- src/cli/ast/mod.rs | 9 +++++++-- src/cli/table.rs | 5 +++++ 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/src/cli/ast/mod.rs b/src/cli/ast/mod.rs index b178fdd..f3e9f36 100644 --- a/src/cli/ast/mod.rs +++ b/src/cli/ast/mod.rs @@ -1,33 +1,38 @@ -use crate::cli::{self, tokenizer::scanner::Token}; +use crate::cli::{self, tokenizer::scanner::Token, table::Value}; mod create_statement; mod insert_statement; mod interpreter; mod select_statement; +#[derive(Debug, PartialEq)] pub enum SqlStatement { CreateTable(CreateTableStatement), Insert(InsertStatement), Select(SelectStatement), } +#[derive(Debug, PartialEq)] pub struct CreateTableStatement { pub table_name: String, pub columns: Vec, } +#[derive(Debug, PartialEq)] pub struct InsertStatement { pub table_name: String, pub columns: Vec, - pub values: Vec, + pub values: Vec, } +#[derive(Debug, PartialEq)] pub struct SelectStatement { pub table_name: String, pub columns: Vec, pub where_clause: Option, } +#[derive(Debug, PartialEq)] pub struct WhereClause { pub column: String, pub value: cli::table::Value, diff --git a/src/cli/table.rs b/src/cli/table.rs index f42f976..7a4631e 100644 --- a/src/cli/table.rs +++ b/src/cli/table.rs @@ -1,3 +1,5 @@ + +#[derive(Debug, PartialEq)] pub enum DataType { Integer, Real, @@ -13,12 +15,14 @@ pub struct Table { length: usize, } +#[derive(Debug, PartialEq)] pub struct ColumnDefinition { pub name: String, pub data_type: DataType, pub constraints: Vec, } +#[derive(Debug, PartialEq)] pub struct ColumnConstraint { pub constraint_type: String, } @@ -28,6 +32,7 @@ struct Row { values: Vec, } +#[derive(Debug, PartialEq)] pub enum Value { Integer(i64), Real(f64), From 5145e0ef12272dda789666cee772f3eb95097506 Mon Sep 17 00:00:00 2001 From: Fletcher555 Date: Thu, 28 Aug 2025 17:55:26 -0400 Subject: [PATCH 5/5] Add tests for create table statement generation --- src/cli/ast/create_statement.rs | 160 ++++++++++++++++++++++++++++++-- 1 file changed, 151 insertions(+), 9 deletions(-) diff --git a/src/cli/ast/create_statement.rs b/src/cli/ast/create_statement.rs index a4fbd4f..47b8634 100644 --- a/src/cli/ast/create_statement.rs +++ b/src/cli/ast/create_statement.rs @@ -1,23 +1,34 @@ -use std::num::Saturating; - -use crate::cli::{ast::{interpreter::Interpreter, CreateTableStatement, SqlStatement::{self, CreateTable}}, table::{ColumnDefinition, DataType}, tokenizer::{scanner::Token, token::TokenTypes}}; +use crate::cli::{ast::{interpreter::Interpreter, CreateTableStatement, SqlStatement::{self, CreateTable}}, table::{ColumnDefinition, DataType}, tokenizer::token::TokenTypes}; pub fn build(interpreter: &mut Interpreter) -> Result { interpreter.advance(); + let statement: Result; match interpreter.current_token() { Some(token) => { match token.token_type { TokenTypes::Table => { - return table_statement(interpreter); + statement = table_statement(interpreter); }, TokenTypes::Index => { - return index_statement(interpreter); + statement = index_statement(interpreter); }, _ => return Err(interpreter.format_error()), } }, None => return Err(interpreter.format_error()), } + // Ensure SemiColon + interpreter.advance(); + match interpreter.current_token() { + Some(token) => { + if token.token_type != TokenTypes::SemiColon { + return Err(interpreter.format_error()); + } + }, + None => return Err(interpreter.format_error()), + } + + return statement; } fn table_statement(interpreter: &mut Interpreter) -> Result { @@ -70,14 +81,14 @@ fn column_definitions(interpreter: &mut Interpreter) -> Result { - columns.push(crate::cli::table::ColumnDefinition { + columns.push(ColumnDefinition { name: column_name, data_type: column_data_type, constraints: vec![] // TODO, }); } TokenTypes::RightParen => { - columns.push(crate::cli::table::ColumnDefinition { + columns.push(ColumnDefinition { name: column_name, data_type: column_data_type, constraints: vec![] // TODO, @@ -125,6 +136,137 @@ fn token_to_data_type(interpreter: &mut Interpreter) -> Result } } -fn index_statement(interpreter: &mut Interpreter) -> Result { - todo!() +fn index_statement(_interpreter: &mut Interpreter) -> Result { + return Err("Index statements not yet implemented".to_string()); +} + + +#[cfg(test)] +mod tests { + use super::*; + use crate::cli::tokenizer::scanner::Token; + + fn token(tt: TokenTypes, val: &'static str, col_num: usize) -> Token<'static> { + Token { + token_type: tt, + value: val, + col_num: col_num, + line_num: 1, + } + } + + #[test] + fn create_table_generates_proper_statement(){ + let tokens = vec![ + token(TokenTypes::Create, "CREATE", 0), + token(TokenTypes::Table, "TABLE", 7), + token(TokenTypes::Identifier, "users", 13), + token(TokenTypes::LeftParen, "(", 18), + token(TokenTypes::Identifier, "id", 19), + token(TokenTypes::Integer, "INTEGER", 22), + token(TokenTypes::Comma, ",", 29), + token(TokenTypes::Identifier, "name", 31), + token(TokenTypes::Text, "TEXT", 36), + token(TokenTypes::RightParen, ")", 40), + token(TokenTypes::SemiColon, ";", 41), + token(TokenTypes::EOF, "", 0), + ]; + let mut interpreter = Interpreter::new(tokens); + let result = build(&mut interpreter); + let expected = SqlStatement::CreateTable(CreateTableStatement { + table_name: "users".to_string(), + columns: vec![ + ColumnDefinition { + name: "id".to_string(), + data_type: DataType::Integer, + constraints: vec![], + }, + ColumnDefinition { + name: "name".to_string(), + data_type: DataType::Text, + constraints: vec![], + }, + ], + }); + assert_eq!(result.unwrap(), expected); + } + + #[test] + fn create_table_statement_missing_semicolon() { + let tokens = vec![ + token(TokenTypes::Create, "CREATE", 0), + token(TokenTypes::Table, "TABLE", 7), + token(TokenTypes::Identifier, "users", 13), + token(TokenTypes::LeftParen, "(", 18), + token(TokenTypes::Identifier, "num", 19), + token(TokenTypes::Integer, "REAL", 22), + token(TokenTypes::Comma, ",", 29), + token(TokenTypes::Identifier, "my_blob", 31), + token(TokenTypes::Blob, "BLOB", 36), + token(TokenTypes::Comma, ",", 29), + token(TokenTypes::Identifier, "my_null", 31), + token(TokenTypes::Null, "Null", 36), + token(TokenTypes::RightParen, ")", 40), + // Missing SemiColon + token(TokenTypes::EOF, "", 0), + ]; + let mut interpreter = Interpreter::new(tokens); + let result = build(&mut interpreter); + assert!(result.is_err()); + } + + #[test] + fn create_table_with_bad_data_type() { + let tokens = vec![ + token(TokenTypes::Create, "CREATE", 0), + token(TokenTypes::Table, "TABLE", 7), + token(TokenTypes::Identifier, "users", 13), + token(TokenTypes::LeftParen, "(", 18), + token(TokenTypes::Identifier, "id", 19), + token(TokenTypes::Asterisk, "*", 22), // Bad Data Type + token(TokenTypes::Comma, ",", 23), + token(TokenTypes::Identifier, "name", 25), + token(TokenTypes::Text, "TEXT", 30), + token(TokenTypes::RightParen, ")", 34), + token(TokenTypes::SemiColon, ";", 35), + token(TokenTypes::EOF, "", 0), + ]; + let mut interpreter = Interpreter::new(tokens); + let result = build(&mut interpreter); + assert!(result.is_err()); + } + + #[test] + fn create_table_missing_comma() { + let tokens = vec![ + token(TokenTypes::Create, "CREATE", 0), + token(TokenTypes::Table, "TABLE", 7), + token(TokenTypes::Identifier, "users", 13), + token(TokenTypes::LeftParen, "(", 18), + token(TokenTypes::Identifier, "id", 19), + token(TokenTypes::Integer, "INTEGER", 22), // Missing Comma + token(TokenTypes::Identifier, "name", 31), + token(TokenTypes::Text, "TEXT", 36), + token(TokenTypes::RightParen, ")", 40), + token(TokenTypes::SemiColon, ";", 41), + token(TokenTypes::EOF, "", 0), + ]; + let mut interpreter = Interpreter::new(tokens); + let result = build(&mut interpreter); + assert!(result.is_err()); + } + + #[test] + fn index_statement_not_implemented() { + let tokens = vec![ + token(TokenTypes::Create, "CREATE", 0), + token(TokenTypes::Index, "INDEX", 7), + token(TokenTypes::Identifier, "my_index", 13), + token(TokenTypes::SemiColon, ";", 22), + token(TokenTypes::EOF, "", 0), + ]; + let mut interpreter = Interpreter::new(tokens); + let result = build(&mut interpreter); + assert!(result.is_err()); + } } \ No newline at end of file