diff --git a/Cargo.lock b/Cargo.lock index 590c55b..fad6bdb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,15 @@ # It is not intended for manual editing. version = 4 +[[package]] +name = "hex" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" + [[package]] name = "mollydb" version = "0.1.0" +dependencies = [ + "hex", +] diff --git a/Cargo.toml b/Cargo.toml index 2b3a20b..02ea815 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,3 +4,4 @@ version = "0.1.0" edition = "2024" [dependencies] +hex = "0.4.3" \ No newline at end of file diff --git a/src/cli/ast/create_statement.rs b/src/cli/ast/create_statement.rs index 47b8634..1535a1a 100644 --- a/src/cli/ast/create_statement.rs +++ b/src/cli/ast/create_statement.rs @@ -18,7 +18,6 @@ pub fn build(interpreter: &mut Interpreter) -> Result { None => return Err(interpreter.format_error()), } // Ensure SemiColon - interpreter.advance(); match interpreter.current_token() { Some(token) => { if token.token_type != TokenTypes::SemiColon { @@ -38,12 +37,13 @@ fn table_statement(interpreter: &mut Interpreter) -> Result return Err(interpreter.format_error()), }; + interpreter.advance(); + let column_definitions = column_definitions(interpreter)?; return Ok(CreateTable(CreateTableStatement { table_name, @@ -93,6 +93,7 @@ fn column_definitions(interpreter: &mut Interpreter) -> Result return Err(interpreter.format_error()), @@ -146,30 +147,31 @@ mod tests { use super::*; use crate::cli::tokenizer::scanner::Token; - fn token(tt: TokenTypes, val: &'static str, col_num: usize) -> Token<'static> { + fn token(tt: TokenTypes, val: &'static str) -> Token<'static> { Token { token_type: tt, value: val, - col_num: col_num, + col_num: 0, line_num: 1, } } #[test] fn create_table_generates_proper_statement(){ + // CREATE TABLE users (id INTEGER, name TEXT); let tokens = vec![ - token(TokenTypes::Create, "CREATE", 0), - token(TokenTypes::Table, "TABLE", 7), - token(TokenTypes::Identifier, "users", 13), - token(TokenTypes::LeftParen, "(", 18), - token(TokenTypes::Identifier, "id", 19), - token(TokenTypes::Integer, "INTEGER", 22), - token(TokenTypes::Comma, ",", 29), - token(TokenTypes::Identifier, "name", 31), - token(TokenTypes::Text, "TEXT", 36), - token(TokenTypes::RightParen, ")", 40), - token(TokenTypes::SemiColon, ";", 41), - token(TokenTypes::EOF, "", 0), + token(TokenTypes::Create, "CREATE"), + token(TokenTypes::Table, "TABLE"), + token(TokenTypes::Identifier, "users"), + token(TokenTypes::LeftParen, "("), + token(TokenTypes::Identifier, "id"), + token(TokenTypes::Integer, "INTEGER"), + token(TokenTypes::Comma, ","), + token(TokenTypes::Identifier, "name"), + token(TokenTypes::Text, "TEXT"), + token(TokenTypes::RightParen, ")"), + token(TokenTypes::SemiColon, ";"), + token(TokenTypes::EOF, ""), ]; let mut interpreter = Interpreter::new(tokens); let result = build(&mut interpreter); @@ -193,22 +195,23 @@ mod tests { #[test] fn create_table_statement_missing_semicolon() { + // CREATE TABLE users (num REAL, my_blob BLOB, my_null NULL) let tokens = vec![ - token(TokenTypes::Create, "CREATE", 0), - token(TokenTypes::Table, "TABLE", 7), - token(TokenTypes::Identifier, "users", 13), - token(TokenTypes::LeftParen, "(", 18), - token(TokenTypes::Identifier, "num", 19), - token(TokenTypes::Integer, "REAL", 22), - token(TokenTypes::Comma, ",", 29), - token(TokenTypes::Identifier, "my_blob", 31), - token(TokenTypes::Blob, "BLOB", 36), - token(TokenTypes::Comma, ",", 29), - token(TokenTypes::Identifier, "my_null", 31), - token(TokenTypes::Null, "Null", 36), - token(TokenTypes::RightParen, ")", 40), + token(TokenTypes::Create, "CREATE"), + token(TokenTypes::Table, "TABLE"), + token(TokenTypes::Identifier, "users"), + token(TokenTypes::LeftParen, "("), + token(TokenTypes::Identifier, "num"), + token(TokenTypes::Integer, "REAL"), + token(TokenTypes::Comma, ","), + token(TokenTypes::Identifier, "my_blob"), + token(TokenTypes::Blob, "BLOB"), + token(TokenTypes::Comma, ","), + token(TokenTypes::Identifier, "my_null"), + token(TokenTypes::Null, "Null"), + token(TokenTypes::RightParen, ")"), // Missing SemiColon - token(TokenTypes::EOF, "", 0), + token(TokenTypes::EOF, ""), ]; let mut interpreter = Interpreter::new(tokens); let result = build(&mut interpreter); @@ -217,19 +220,20 @@ mod tests { #[test] fn create_table_with_bad_data_type() { + // CREATE TABLE users (id *, name TEXT); let tokens = vec![ - token(TokenTypes::Create, "CREATE", 0), - token(TokenTypes::Table, "TABLE", 7), - token(TokenTypes::Identifier, "users", 13), - token(TokenTypes::LeftParen, "(", 18), - token(TokenTypes::Identifier, "id", 19), - token(TokenTypes::Asterisk, "*", 22), // Bad Data Type - token(TokenTypes::Comma, ",", 23), - token(TokenTypes::Identifier, "name", 25), - token(TokenTypes::Text, "TEXT", 30), - token(TokenTypes::RightParen, ")", 34), - token(TokenTypes::SemiColon, ";", 35), - token(TokenTypes::EOF, "", 0), + token(TokenTypes::Create, "CREATE"), + token(TokenTypes::Table, "TABLE"), + token(TokenTypes::Identifier, "users"), + token(TokenTypes::LeftParen, "("), + token(TokenTypes::Identifier, "id"), + token(TokenTypes::Asterisk, "*"), // Bad Data Type + token(TokenTypes::Comma, ","), + token(TokenTypes::Identifier, "name"), + token(TokenTypes::Text, "TEXT"), + token(TokenTypes::RightParen, ")"), + token(TokenTypes::SemiColon, ";"), + token(TokenTypes::EOF, ""), ]; let mut interpreter = Interpreter::new(tokens); let result = build(&mut interpreter); @@ -238,18 +242,19 @@ mod tests { #[test] fn create_table_missing_comma() { + // CREATE TABLE users (id INTEGER name TEXT); let tokens = vec![ - token(TokenTypes::Create, "CREATE", 0), - token(TokenTypes::Table, "TABLE", 7), - token(TokenTypes::Identifier, "users", 13), - token(TokenTypes::LeftParen, "(", 18), - token(TokenTypes::Identifier, "id", 19), - token(TokenTypes::Integer, "INTEGER", 22), // Missing Comma - token(TokenTypes::Identifier, "name", 31), - token(TokenTypes::Text, "TEXT", 36), - token(TokenTypes::RightParen, ")", 40), - token(TokenTypes::SemiColon, ";", 41), - token(TokenTypes::EOF, "", 0), + token(TokenTypes::Create, "CREATE"), + token(TokenTypes::Table, "TABLE"), + token(TokenTypes::Identifier, "users"), + token(TokenTypes::LeftParen, "("), + token(TokenTypes::Identifier, "id"), + token(TokenTypes::Integer, "INTEGER"), // Missing Comma + token(TokenTypes::Identifier, "name"), + token(TokenTypes::Text, "TEXT"), + token(TokenTypes::RightParen, ")"), + token(TokenTypes::SemiColon, ";"), + token(TokenTypes::EOF, ""), ]; let mut interpreter = Interpreter::new(tokens); let result = build(&mut interpreter); @@ -258,12 +263,13 @@ mod tests { #[test] fn index_statement_not_implemented() { + // CREATE INDEX my_index; let tokens = vec![ - token(TokenTypes::Create, "CREATE", 0), - token(TokenTypes::Index, "INDEX", 7), - token(TokenTypes::Identifier, "my_index", 13), - token(TokenTypes::SemiColon, ";", 22), - token(TokenTypes::EOF, "", 0), + token(TokenTypes::Create, "CREATE"), + token(TokenTypes::Index, "INDEX"), + token(TokenTypes::Identifier, "my_index"), + token(TokenTypes::SemiColon, ";"), + token(TokenTypes::EOF, ""), ]; let mut interpreter = Interpreter::new(tokens); let result = build(&mut interpreter); diff --git a/src/cli/ast/insert_statement.rs b/src/cli/ast/insert_statement.rs index 9b63303..1e68cd3 100644 --- a/src/cli/ast/insert_statement.rs +++ b/src/cli/ast/insert_statement.rs @@ -1,5 +1,357 @@ -use crate::cli::{ast::SqlStatement, ast::interpreter::Interpreter}; +use crate::cli::{ast::{interpreter::{self, Interpreter}, InsertIntoStatement, SqlStatement::{self, InsertInto}}, table::Value, tokenizer::token::TokenTypes}; +use hex::decode; -pub fn build(interpreter: &Interpreter) -> Result { - todo!() +pub fn build(interpreter: &mut Interpreter) -> Result { + interpreter.advance(); + let statement: Result; + match interpreter.current_token() { + Some(token) => { + match token.token_type { + TokenTypes::Into => { + statement = into_statement(interpreter); + }, + TokenTypes::Or => { + statement = or_statement(interpreter); + }, + _ => return Err(interpreter.format_error()), + } + }, + None => return Err(interpreter.format_error()), + } + // Ensure SemiColon + match interpreter.current_token() { + Some(token) => { + if token.token_type != TokenTypes::SemiColon { + return Err(interpreter.format_error()); + } + }, + None => return Err(interpreter.format_error()), + } + + return statement; +} + +fn into_statement(interpreter: &mut Interpreter) -> Result { + interpreter.advance(); + let table_name = match interpreter.current_token() { + Some(token) => { + if token.token_type != TokenTypes::Identifier { + return Err(interpreter.format_error()); + } + let name = token.value.to_string(); + interpreter.advance(); + name + }, + None => return Err(interpreter.format_error()), + }; + let columns = match interpreter.current_token() { + Some(token) => { + if token.token_type == TokenTypes::LeftParen { + Some(get_columns(interpreter)?) + } + else { + None + } + }, + None => return Err(interpreter.format_error()), + }; + let mut values = vec![]; + match interpreter.current_token() { + Some(token) => { + if token.token_type == TokenTypes::Values { + interpreter.advance(); + loop { + values.push(get_values(interpreter)?); + match interpreter.current_token() { + Some(token) if token.token_type == TokenTypes::Comma => { + interpreter.advance(); + }, + Some(token) if token.token_type == TokenTypes::SemiColon => break, + _ => break, + } + } + } + else { + return Err(interpreter.format_error()); + } + }, + None => return Err(interpreter.format_error()), + }; + return Ok(InsertInto(InsertIntoStatement { + table_name: table_name, + columns: columns, + values: values, + })); +} + +fn get_values(interpreter: &mut Interpreter) -> Result, String> { + // Check for LeftParen + match interpreter.current_token() { + Some(token) if token.token_type == TokenTypes::LeftParen => { + interpreter.advance(); + let mut values: Vec = vec![]; + loop { + match interpreter.current_token() { + Some(token) => { + match token.token_type { + TokenTypes::IntLiteral => { + match token.value.parse::() { + Ok(num) => values.push(Value::Integer(num)), + Err(_) => return Err(interpreter.format_error()), + } + interpreter.advance(); + }, + TokenTypes::RealLiteral => { + match token.value.parse::() { + Ok(num) => values.push(Value::Real(num)), + Err(_) => return Err(interpreter.format_error()), + } + interpreter.advance(); + }, + TokenTypes::String => { + values.push(Value::Text(token.value.to_string())); + interpreter.advance(); + }, + TokenTypes::Blob => { + match decode(token.value) { + Ok(bytes) => values.push(Value::Blob(bytes)), + Err(_) => return Err(interpreter.format_error()), + } + interpreter.advance(); + }, + TokenTypes::Null => { + values.push(Value::Null); + interpreter.advance(); + }, + _ => return Err(interpreter.format_error()), + } + match interpreter.current_token() { + Some(token) if token.token_type == TokenTypes::Comma => { + interpreter.advance(); + }, + Some(token) if token.token_type == TokenTypes::RightParen => { + interpreter.advance(); + return Ok(values); + }, + _ => return Err(interpreter.format_error()), + } + }, + None => return Err(interpreter.format_error()), + } + } + } + _ => return Err(interpreter.format_error()), + } +} + +fn get_columns(interpreter: &mut Interpreter) -> Result, String> { + interpreter.advance(); + let mut columns: Vec = vec![]; + loop { + match interpreter.current_token() { + Some(token) => { + if token.token_type != TokenTypes::Identifier{ + return Err(interpreter.format_error()); + } + columns.push(token.value.to_string()); + } + None => { + return Err(interpreter.format_error()) + } + } + interpreter.advance(); + + match interpreter.current_token() { + Some(token) => { + match token.token_type { + TokenTypes::Comma => { + interpreter.advance(); + } + TokenTypes::RightParen => { + interpreter.advance(); + break; + } + _ => { + return Err(interpreter.format_error()) + } + } + } + None => { + return Err(interpreter.format_error()); + } + } + } + return Ok(columns); +} + +fn or_statement(_interpreter: &mut Interpreter) -> Result { + return Err("INSERT OR ... not yet implemented".to_string()); } + + +#[cfg(test)] +mod tests { + use super::*; + use crate::cli::tokenizer::scanner::Token; + + fn token(tt: TokenTypes, val: &'static str) -> Token<'static> { + Token { + token_type: tt, + value: val, + col_num: 0, + line_num: 1, + } + } + + #[test] + fn single_row_insert_statement_is_generated_correctly() { + // INSERT INTO users VALUES (1, "Alice"); + let tokens = vec![ + token(TokenTypes::Insert, "INSERT"), + token(TokenTypes::Into, "INTO"), + token(TokenTypes::Identifier, "users"), + token(TokenTypes::Values, "VALUES"), + token(TokenTypes::LeftParen, "("), + token(TokenTypes::IntLiteral, "1"), + token(TokenTypes::Comma, ","), + token(TokenTypes::String, "Alice"), + token(TokenTypes::RightParen, ")"), + token(TokenTypes::SemiColon, ";"), + ]; + let mut interpreter = Interpreter::new(tokens); + let result = build(&mut interpreter); + assert!(result.is_ok()); + let statement = result.unwrap(); + assert_eq!(statement, SqlStatement::InsertInto(InsertIntoStatement { + table_name: "users".to_string(), + columns: None, + values: vec![ + vec![ + Value::Integer(1), + Value::Text("Alice".to_string()), + ] + ], + })); + } + + #[test] + fn multi_row_insert_statement_is_generated_correctly() { + // INSERT INTO users VALUES (1, "Alice"), (2, "Bob"); + let tokens = vec![ + token(TokenTypes::Insert, "INSERT"), + token(TokenTypes::Into, "INTO"), + token(TokenTypes::Identifier, "guests"), + token(TokenTypes::Values, "VALUES"), + token(TokenTypes::LeftParen, "("), + token(TokenTypes::IntLiteral, "1"), + token(TokenTypes::Comma, ","), + token(TokenTypes::String, "Alice"), + token(TokenTypes::RightParen, ")"), + token(TokenTypes::Comma, ","), + token(TokenTypes::LeftParen, "("), + token(TokenTypes::IntLiteral, "2"), + token(TokenTypes::Comma, ","), + token(TokenTypes::String, "Bob"), + token(TokenTypes::RightParen, ")"), + token(TokenTypes::SemiColon, ";"), + ]; + let mut interpreter = Interpreter::new(tokens); + let result = build(&mut interpreter); + assert!(result.is_ok()); + let statement = result.unwrap(); + assert_eq!(statement, SqlStatement::InsertInto(InsertIntoStatement { + table_name: "guests".to_string(), + columns: None, + values: vec![ + vec![ + Value::Integer(1), + Value::Text("Alice".to_string()), + ], + vec![ + Value::Integer(2), + Value::Text("Bob".to_string()), + ] + ], + })); + } + + #[test] + fn single_row_insert_with_column_specifiers_is_generated_correctly() { + // INSERT INTO users (id, name, email) VALUES (1, "Fletcher", NULL); + let tokens = vec![ + token(TokenTypes::Insert, "INSERT"), + token(TokenTypes::Into, "INTO"), + token(TokenTypes::Identifier, "users"), + token(TokenTypes::LeftParen, "("), + token(TokenTypes::Identifier, "id"), + token(TokenTypes::Comma, ","), + token(TokenTypes::Identifier, "name"), + token(TokenTypes::Comma, ","), + token(TokenTypes::Identifier, "email"), + token(TokenTypes::RightParen, ")"), + token(TokenTypes::Values, "VALUES"), + token(TokenTypes::LeftParen, "("), + token(TokenTypes::RealLiteral, "1.1"), + token(TokenTypes::Comma, ","), + token(TokenTypes::Blob, "AAB000"), + token(TokenTypes::Comma, ","), + token(TokenTypes::Null, "NULL"), + token(TokenTypes::RightParen, ")"), + token(TokenTypes::SemiColon, ";"), + ]; + let mut interpreter = Interpreter::new(tokens); + let result = build(&mut interpreter); + assert!(result.is_ok()); + let statement = result.unwrap(); + assert_eq!(statement, SqlStatement::InsertInto(InsertIntoStatement { + table_name: "users".to_string(), + columns: Some(vec![ + "id".to_string(), + "name".to_string(), + "email".to_string(), + ]), + values: vec![ + vec![ + Value::Real(1.1), + Value::Blob(vec![0xAA, 0xB0, 0x00]), + Value::Null, + ] + ], + })); + } + + #[test] + fn insert_into_without_table_name_is_error() { + // INSERT INTO VALUES (1, "Alice"); + let tokens = vec![ + token(TokenTypes::Insert, "INSERT"), + token(TokenTypes::Into, "INTO"), + token(TokenTypes::Values, "VALUES"), + token(TokenTypes::LeftParen, "("), + token(TokenTypes::IntLiteral, "1"), + token(TokenTypes::Comma, ","), + token(TokenTypes::String, "Alice"), + token(TokenTypes::RightParen, ")"), + token(TokenTypes::SemiColon, ";"), + ]; + let mut interpreter = Interpreter::new(tokens); + let result = build(&mut interpreter); + assert!(result.is_err()); + } + + #[test] + fn insert_or_is_not_implemented_error() { + // INSERT OR users VALUES (1, "Alice"); + let tokens = vec![ + token(TokenTypes::Insert, "INSERT"), + token(TokenTypes::Or, "OR"), + token(TokenTypes::Identifier, "users"), + token(TokenTypes::Values, "VALUES"), + token(TokenTypes::LeftParen, "("), + token(TokenTypes::IntLiteral, "1"), + ]; + let mut interpreter = Interpreter::new(tokens); + let result = build(&mut interpreter); + assert!(result.is_err()); + } +} \ No newline at end of file diff --git a/src/cli/ast/mod.rs b/src/cli/ast/mod.rs index f3e9f36..70295bb 100644 --- a/src/cli/ast/mod.rs +++ b/src/cli/ast/mod.rs @@ -1,4 +1,4 @@ -use crate::cli::{self, tokenizer::scanner::Token, table::Value}; +use crate::cli::{self, tokenizer::scanner::Token, table::{Value, ColumnDefinition}}; mod create_statement; mod insert_statement; @@ -8,21 +8,21 @@ mod select_statement; #[derive(Debug, PartialEq)] pub enum SqlStatement { CreateTable(CreateTableStatement), - Insert(InsertStatement), + InsertInto(InsertIntoStatement), Select(SelectStatement), } #[derive(Debug, PartialEq)] pub struct CreateTableStatement { pub table_name: String, - pub columns: Vec, + pub columns: Vec, } #[derive(Debug, PartialEq)] -pub struct InsertStatement { +pub struct InsertIntoStatement { pub table_name: String, - pub columns: Vec, - pub values: Vec, + pub columns: Option>, + pub values: Vec>, } #[derive(Debug, PartialEq)] @@ -35,7 +35,7 @@ pub struct SelectStatement { #[derive(Debug, PartialEq)] pub struct WhereClause { pub column: String, - pub value: cli::table::Value, + pub value: Value, } pub fn generate(tokens: Vec) -> Vec> {