From f72608b6169c1dc5f5a7012dc5693ac55e871e4c Mon Sep 17 00:00:00 2001 From: Tomasz Okon Date: Mon, 1 Sep 2025 19:15:19 +0000 Subject: [PATCH 1/3] Code Generator interface --- include/minic/CodeGenerator.hpp | 112 ++++++++++++++++++++++++++++++++ 1 file changed, 112 insertions(+) create mode 100644 include/minic/CodeGenerator.hpp diff --git a/include/minic/CodeGenerator.hpp b/include/minic/CodeGenerator.hpp new file mode 100644 index 0000000..1a68a84 --- /dev/null +++ b/include/minic/CodeGenerator.hpp @@ -0,0 +1,112 @@ +#ifndef MINIC_CODE_GENERATOR_HPP +#define MINIC_CODE_GENERATOR_HPP + +#include "minic/IR.hpp" +#include +#include +#include + +namespace minic { + +/** + * @namespace minic + * @brief Contains components for the miniC language, including the CodeGenerator that + * emits assembly/text output from the intermediate representation (IR). + */ + +/** + * @class CodeGenerator + * @brief Generates target assembly (or textual) output from a miniC IRProgram. + * + * The CodeGenerator traverses an IRProgram and emits textual output to a file stream. + * It handles function and block emission, instruction lowering, simple temporary-to-register + * mapping, and local stack allocation bookkeeping. + */ +class CodeGenerator { +public: + /** + * @brief Generate output for the given IR program into the specified file. + * @param ir_program The IR program to generate code for. + * @param output_file Path to the output file to write the generated code. + */ + void generate(const IRProgram& ir_program, const std::string& output_file); + +private: + /** + * @brief Output stream used to write generated code. + */ + std::ofstream out_; + + /** + * @brief Mapping from miniC token types (used for declarations) to target directives. + * + * For example, integer types may map to a data allocation directive ("dq" for 64-bit). + * TokenType values come from the IR/Token definitions. + */ + std::map type_map_ = { + {TokenType::KEYWORD_INT, "dq"}, // 64-bit int + {TokenType::KEYWORD_VOID, ""} // No allocation + }; + + /** + * @brief Current stack offset (in bytes) used when allocating locals. + * + * This is a running total used to compute per-variable offsets on the stack. + */ + int stack_offset_ = 0; // Track stack for locals + + /** + * @brief Maps variable names to their stack offsets (negative offsets relative to base/frame). + */ + std::map var_offsets_; // Var to stack offset + + /** + * @brief Emit an entire IR program (top-level traversal). + * @param program The IRProgram to emit. + */ + void emit_program(const IRProgram& program); + + /** + * @brief Emit code for a single function IR node. + * @param func The IRFunction to emit. + * + * Responsible for function prologue/epilogue, stack allocation, and emitting blocks. + */ + void emit_function(const IRFunction& func); + + /** + * @brief Emit code for a basic block. + * @param block The BasicBlock to emit. + * + * Handles block labels and emits contained instructions in order. + */ + void emit_block(const BasicBlock& block); + + /** + * @brief Lower a single IR instruction to target output. + * @param instr The IRInstruction to lower/emit. + */ + void emit_instruction(const IRInstruction& instr); + + /** + * @brief Map a temporary name (SSA/temp) to a target register or spill location string. + * @param temp The temporary identifier from IR. + * @return A string representing the register or memory operand to use in emitted code. + * + * This provides a simple mapping for temps to textual registers or stack locations. + */ + std::string reg_for_temp(const std::string& temp); + + /** + * @brief Allocate stack space for locals used by a function and populate var_offsets_. + * @param func The function whose locals are being allocated. + * + * Computes stack_offset_ and var_offsets_ to be used when emitting instructions that + * reference local variables. + */ + void allocate_stack(const IRFunction& func); +}; + +} // namespace minic + +#endif // MINIC_CODE_GENERATOR_HPP \ No newline at end of file From a0fa6fdac65471cb201ec4a09e8f817b19c237cf Mon Sep 17 00:00:00 2001 From: Tomasz Okon Date: Sun, 7 Sep 2025 21:25:26 +0000 Subject: [PATCH 2/3] Basic working code generator without tests --- docs/dev.md | 1 + include/minic/CodeGenerator.hpp | 159 ++++++----- src/CodeGenerator.cpp | 468 ++++++++++++++++++++++++++++++++ src/IRGenerator.cpp | 8 +- src/main.cpp | 44 ++- tests/CMakeLists.txt | 3 +- tests/TestIRGenerator.cpp | 15 +- tests/TestLexer.cpp | 104 +++++++ tests/TestParser.cpp | 17 ++ 9 files changed, 743 insertions(+), 76 deletions(-) create mode 100644 src/CodeGenerator.cpp diff --git a/docs/dev.md b/docs/dev.md index 3a166e0..f33d862 100644 --- a/docs/dev.md +++ b/docs/dev.md @@ -1,6 +1,7 @@ # Compile - cmake -DPRODUCTION=ON -DBUILD_TESTS=ON for prod - cmake -DPROCUCTION=OFF -DBUILD_TESTS=ON for dev +- make -j${nproc} # Format code - clang-format -i -style=file $(find . -type f \( -name "*.cpp" -o -name "*.h" -o -name "*.c" -o -name "*.hpp" \)) \ No newline at end of file diff --git a/include/minic/CodeGenerator.hpp b/include/minic/CodeGenerator.hpp index 1a68a84..db4c9d4 100644 --- a/include/minic/CodeGenerator.hpp +++ b/include/minic/CodeGenerator.hpp @@ -1,112 +1,143 @@ -#ifndef MINIC_CODE_GENERATOR_HPP -#define MINIC_CODE_GENERATOR_HPP +#ifndef MINIC_CODEGENERATOR_HPP +#define MINIC_CODEGENERATOR_HPP -#include "minic/IR.hpp" -#include -#include +#include "minic/IRGenerator.hpp" +#include #include +#include +#include +#include +#include -namespace minic { - -/** - * @namespace minic - * @brief Contains components for the miniC language, including the CodeGenerator that - * emits assembly/text output from the intermediate representation (IR). - */ +namespace minic +{ /** * @class CodeGenerator - * @brief Generates target assembly (or textual) output from a miniC IRProgram. + * @brief Generate target code (textual assembly) from IRProgram. * - * The CodeGenerator traverses an IRProgram and emits textual output to a file stream. - * It handles function and block emission, instruction lowering, simple temporary-to-register - * mapping, and local stack allocation bookkeeping. + * CodeGenerator takes an IRProgram produced by the IRGenerator and emits a + * textual representation (or writes to a file). It maintains per-function + * state such as stack allocation offsets, label mapping and the current + * output stream. */ -class CodeGenerator { +class CodeGenerator +{ public: /** - * @brief Generate output for the given IR program into the specified file. - * @param ir_program The IR program to generate code for. - * @param output_file Path to the output file to write the generated code. - */ - void generate(const IRProgram& ir_program, const std::string& output_file); - -private: - /** - * @brief Output stream used to write generated code. + * @brief Construct a CodeGenerator with the given output stream. + * + * The provided stream is used as the default place to emit generated + * code. The constructor initializes internal mappings and state. + * + * @param out Output stream to write generated code to (defaults to std::cout). */ - std::ofstream out_; + explicit CodeGenerator(std::ostream& out = std::cout); /** - * @brief Mapping from miniC token types (used for declarations) to target directives. + * @brief Generate code for the given IR program. + * + * Traverses the IRProgram and emits code for each function and block. + * Optionally writes the result to an output file if output_file is not empty. * - * For example, integer types may map to a data allocation directive ("dq" for 64-bit). - * TokenType values come from the IR/Token definitions. + * @param ir_program IR representation to generate code from. + * @param output_file Optional path to write the emitted code into. */ - std::map type_map_ = { - {TokenType::KEYWORD_INT, "dq"}, // 64-bit int - {TokenType::KEYWORD_VOID, ""} // No allocation - }; + void generate(const IRProgram& ir_program, const std::string& output_file = ""); +private: /** - * @brief Current stack offset (in bytes) used when allocating locals. + * @brief Emit the entire program (all functions and global data). + * + * Called by generate() to handle program-level emission. * - * This is a running total used to compute per-variable offsets on the stack. + * @param program IRProgram to emit. */ - int stack_offset_ = 0; // Track stack for locals + void emit_program(const IRProgram& program); /** - * @brief Maps variable names to their stack offsets (negative offsets relative to base/frame). + * @brief Emit a single IRFunction. + * + * Outputs function prologue/epilogue and emits its basic blocks. + * + * @param func Function IR to emit. */ - std::map var_offsets_; // Var to stack offset + void emit_function(const IRFunction& func); /** - * @brief Emit an entire IR program (top-level traversal). - * @param program The IRProgram to emit. + * @brief Emit a basic block. + * + * Writes the block label (if needed) and emits contained instructions in order. + * + * @param block BasicBlock to emit. */ - void emit_program(const IRProgram& program); + void emit_block(const BasicBlock& block); /** - * @brief Emit code for a single function IR node. - * @param func The IRFunction to emit. + * @brief Emit a single IR instruction. * - * Responsible for function prologue/epilogue, stack allocation, and emitting blocks. + * Translates an IRInstruction into one or more target assembly/text lines + * and writes them to the output stream. + * + * @param instr Instruction to emit. */ - void emit_function(const IRFunction& func); + void emit_instruction(const IRInstruction& instr); /** - * @brief Emit code for a basic block. - * @param block The BasicBlock to emit. + * @brief Allocate stack space for function-local variables. + * + * Computes offsets for locals, updates stack_offset_ and var_offsets_ + * so subsequent instructions refer to correct stack locations. * - * Handles block labels and emits contained instructions in order. + * @param func Function whose stack frame to allocate. */ - void emit_block(const BasicBlock& block); + void allocate_stack(const IRFunction& func); /** - * @brief Lower a single IR instruction to target output. - * @param instr The IRInstruction to lower/emit. + * @brief Get the textual location for a variable name. + * + * Returns a string describing where the named variable is stored + * (e.g., a stack reference or register name) based on current offsets. + * + * @param name Variable name or temporary. + * @return Textual location used in emitted code. */ - void emit_instruction(const IRInstruction& instr); + std::string get_loc(const std::string& name); /** - * @brief Map a temporary name (SSA/temp) to a target register or spill location string. - * @param temp The temporary identifier from IR. - * @return A string representing the register or memory operand to use in emitted code. + * @brief Find a label that contains the provided substring. + * + * Useful for heuristics when mapping control-flow targets back to labels. * - * This provides a simple mapping for temps to textual registers or stack locations. + * @param substr Substring to search for inside known labels. + * @return Matching label name, or empty string if none found. */ - std::string reg_for_temp(const std::string& temp); + std::string find_label_with_substr(const std::string& substr) const; /** - * @brief Allocate stack space for locals used by a function and populate var_offsets_. - * @param func The function whose locals are being allocated. + * @brief Infer the branch target label for the current block. * - * Computes stack_offset_ and var_offsets_ to be used when emitting instructions that - * reference local variables. + * Uses block ordering and label maps to determine the most reasonable + * fall-through or explicit target for branches emitted from the current block. + * + * @return Inferred label name for the current block's primary target. */ - void allocate_stack(const IRFunction& func); + std::string infer_target_label_for_current_block() const; + + std::ostream* out_; ///< Output stream used for emitted code. + std::unordered_map type_map_; ///< Mapping IR types to textual types. + std::string current_function_; ///< Name of the function currently being emitted. + std::string current_block_label_; ///< Label of the current basic block. + int stack_offset_; ///< Current stack offset for locals within the active function. + std::unordered_map var_offsets_; ///< Map from variable name to stack offset. + std::vector block_labels_; ///< Ordered list of block labels for the current function. + std::unordered_map block_index_; ///< Mapping block label -> index in block_labels_. + std::unordered_set labels_; ///< Set of labels already emitted/known. + std::string last_written_loc_; ///< Last emitted location string (to avoid redundant moves). + + friend class PublicCodeGenerator; }; } // namespace minic -#endif // MINIC_CODE_GENERATOR_HPP \ No newline at end of file +#endif // MINIC_CODEGENERATOR_HPP \ No newline at end of file diff --git a/src/CodeGenerator.cpp b/src/CodeGenerator.cpp new file mode 100644 index 0000000..67066de --- /dev/null +++ b/src/CodeGenerator.cpp @@ -0,0 +1,468 @@ +// minic/CodeGenerator.cpp +#include "minic/CodeGenerator.hpp" +#include +#include +#include +#include +#include + +namespace minic +{ + +CodeGenerator::CodeGenerator(std::ostream& out) + : out_(&out) + , type_map_({ { TokenType::KEYWORD_INT, "dq" }, + { TokenType::KEYWORD_VOID, "" }, + { TokenType::KEYWORD_STR, "db" } }) + , stack_offset_(0) + , last_written_loc_("") +{ +} + +void CodeGenerator::generate(const IRProgram& ir_program, const std::string& output_file) +{ + std::ofstream file; + std::ostream* previous_out = out_; + std::cout << "[CodeGen] generate: output_file='" << output_file << "'\n"; + if (!output_file.empty()) + { + file.open(output_file, std::ios::trunc); + if (!file.is_open()) + { + throw std::runtime_error("Could not open output file: " + output_file); + } + out_ = &file; + std::cout << "[CodeGen] Writing to file: " << output_file << "\n"; + } + else + { + std::cout << "[CodeGen] Writing to provided ostream\n"; + } + + (*out_) << "section .data\n"; + (*out_) << "section .text\n"; + (*out_) << "global _start\n"; + (*out_) << "_start:\n"; + (*out_) << " call main\n"; + (*out_) << " mov rdi, rax\n"; + (*out_) << " mov rax, 60\n"; + (*out_) << " syscall\n\n"; + + std::cout << "[CodeGen] Emitting program\n"; + emit_program(ir_program); + std::cout << "[CodeGen] Emission complete\n"; + + out_->flush(); + if (!(*out_)) + { + out_ = previous_out; + throw std::runtime_error("Failed while writing to output stream/file."); + } + + out_ = previous_out; +} + +void CodeGenerator::emit_program(const IRProgram& program) +{ + std::cout << "[CodeGen] emit_program: function_count=" << program.functions.size() << "\n"; + for (const auto& func : program.functions) + { + emit_function(*func); + } +} + +void CodeGenerator::emit_function(const IRFunction& func) +{ + std::cout << "[CodeGen] emit_function: " << func.name << " params=" << func.parameters.size() << " blocks=" << func.blocks.size() << "\n"; + current_function_ = func.name; + stack_offset_ = 0; + var_offsets_.clear(); + block_labels_.clear(); + block_index_.clear(); + labels_.clear(); + last_written_loc_.clear(); + + for (size_t i = 0; i < func.blocks.size(); ++i) + { + const std::string& lbl = func.blocks[i]->label; + block_labels_.push_back(lbl); + block_index_[lbl] = i; + labels_.insert(lbl); + } + + allocate_stack(func); + + std::cout << "[CodeGen] Function '" << func.name << "' stack_offset=" << stack_offset_ << " var_count=" << var_offsets_.size() << "\n"; + + (*out_) << func.name << ":\n"; + (*out_) << " push rbp\n"; + (*out_) << " mov rbp, rsp\n"; + if (stack_offset_ > 0) + { + (*out_) << " sub rsp, " << stack_offset_ << "\n"; + } + + const std::string param_regs[] = { "rdi", "rsi", "rdx", "rcx", "r8", "r9" }; + size_t param_idx = 0; + for (const auto& param : func.parameters) + { + if (param_idx < 6) + { + (*out_) << " mov [rbp - " << var_offsets_[param.name] << "], " << param_regs[param_idx] << "\n"; + std::cout << "[CodeGen] Param move: " << param.name << " <- " << param_regs[param_idx] << " offset=" << var_offsets_[param.name] << "\n"; + } + param_idx++; + } + + for (const auto& block : func.blocks) + { + emit_block(*block); + } + + (*out_) << current_function_ << "_epilogue:\n"; + (*out_) << " leave\n"; + (*out_) << " ret\n\n"; + std::cout << "[CodeGen] Finished function: " << func.name << "\n"; +} + +void CodeGenerator::emit_block(const BasicBlock& block) +{ + current_block_label_ = block.label; + std::cout << "[CodeGen] emit_block: " << block.label << " instructions=" << block.instructions.size() << "\n"; + (*out_) << block.label << ":\n"; + for (const auto& instr : block.instructions) + { + emit_instruction(instr); + } + + if (!block.instructions.empty()) + { + const IRInstruction& last = block.instructions.back(); + if (last.opcode != IROpcode::JUMP && last.opcode != IROpcode::JUMPIF && last.opcode != IROpcode::JUMPIFNOT && last.opcode != IROpcode::RETURN) + { + size_t idx = block_index_.at(current_block_label_); + if (idx + 1 < block_labels_.size()) + { + (*out_) << " jmp " << block_labels_[idx + 1] << "\n"; + std::cout << "[CodeGen] Auto-jmp to " << block_labels_[idx + 1] << " from " << current_block_label_ << "\n"; + } + } + } + else + { + size_t idx = block_index_.at(current_block_label_); + if (idx + 1 < block_labels_.size()) + { + (*out_) << " jmp " << block_labels_[idx + 1] << "\n"; + std::cout << "[CodeGen] Empty block auto-jmp to " << block_labels_[idx + 1] << "\n"; + } + } +} + +void CodeGenerator::emit_instruction(const IRInstruction& instr) +{ + std::string res_loc = get_loc(instr.result); + std::string op1_loc = get_loc(instr.operand1); + std::string op2_loc = get_loc(instr.operand2); + + std::cout << "[CodeGen] emit_instruction: opcode=" << static_cast(instr.opcode) + << " result='" << instr.result << "' operand1='" << instr.operand1 << "' operand2='" << instr.operand2 << "'\n"; + std::cout << "[CodeGen] locations: res=" << res_loc << " op1=" << op1_loc << " op2=" << op2_loc << "\n"; + + // For control flow instructions, handle specially if no explicit condition + if ((instr.opcode == IROpcode::JUMPIF || instr.opcode == IROpcode::JUMPIFNOT) && (instr.operand1.empty() || op1_loc == "0")) + { + if (!last_written_loc_.empty()) + { + op1_loc = last_written_loc_; + std::cout << "[CodeGen] Using last_written_loc for condition: " << last_written_loc_ << "\n"; + } + } + + switch (instr.opcode) + { + case IROpcode::ASSIGN: + if (instr.operand1.find_first_not_of("0123456789") == std::string::npos) + { + // Literal assignment + if (res_loc.find("[rbp") != std::string::npos) + (*out_) << " mov qword " << res_loc << ", " << instr.operand1 << "\n"; + else + (*out_) << " mov " << res_loc << ", " << instr.operand1 << "\n"; + std::cout << "[CodeGen] ASSIGN literal: " << instr.operand1 << " -> " << res_loc << "\n"; + } + else + { + // Variable assignment + (*out_) << " mov rax, " << op1_loc << "\n"; + (*out_) << " mov " << res_loc << ", rax\n"; + std::cout << "[CodeGen] ASSIGN var: " << op1_loc << " -> " << res_loc << "\n"; + } + break; + case IROpcode::ADD: + (*out_) << " mov rax, " << op1_loc << "\n"; + (*out_) << " add rax, " << op2_loc << "\n"; + (*out_) << " mov " << res_loc << ", rax\n"; + break; + case IROpcode::SUB: + (*out_) << " mov rax, " << op1_loc << "\n"; + (*out_) << " sub rax, " << op2_loc << "\n"; + (*out_) << " mov " << res_loc << ", rax\n"; + break; + case IROpcode::MUL: + (*out_) << " mov rax, " << op1_loc << "\n"; + (*out_) << " imul rax, " << op2_loc << "\n"; + (*out_) << " mov " << res_loc << ", rax\n"; + break; + case IROpcode::DIV: + (*out_) << " mov rax, " << op1_loc << "\n"; + (*out_) << " cqo\n"; + if (op2_loc.find_first_not_of("0123456789") == std::string::npos) + { + (*out_) << " mov rbx, " << op2_loc << "\n"; + (*out_) << " idiv rbx\n"; + } + else + { + (*out_) << " mov rbx, " << op2_loc << "\n"; + (*out_) << " idiv rbx\n"; + } + (*out_) << " mov " << res_loc << ", rax\n"; + break; + case IROpcode::NEG: + (*out_) << " mov rax, " << op1_loc << "\n"; + (*out_) << " neg rax\n"; + (*out_) << " mov " << res_loc << ", rax\n"; + break; + case IROpcode::NOT: + (*out_) << " mov rax, " << op1_loc << "\n"; + (*out_) << " test rax, rax\n"; + (*out_) << " setz al\n"; + (*out_) << " movzx rax, al\n"; + (*out_) << " mov " << res_loc << ", rax\n"; + break; + case IROpcode::EQ: + (*out_) << " mov rax, " << op1_loc << "\n"; + (*out_) << " cmp rax, " << op2_loc << "\n"; + (*out_) << " sete al\n"; + (*out_) << " movzx rax, al\n"; + (*out_) << " mov " << res_loc << ", rax\n"; + break; + case IROpcode::NEQ: + (*out_) << " mov rax, " << op1_loc << "\n"; + (*out_) << " cmp rax, " << op2_loc << "\n"; + (*out_) << " setne al\n"; + (*out_) << " movzx rax, al\n"; + (*out_) << " mov " << res_loc << ", rax\n"; + break; + case IROpcode::LT: + (*out_) << " mov rax, " << op1_loc << "\n"; + (*out_) << " cmp rax, " << op2_loc << "\n"; + (*out_) << " setl al\n"; + (*out_) << " movzx rax, al\n"; + (*out_) << " mov " << res_loc << ", rax\n"; + break; + case IROpcode::GT: + (*out_) << " mov rax, " << op1_loc << "\n"; + (*out_) << " cmp rax, " << op2_loc << "\n"; + (*out_) << " setg al\n"; + (*out_) << " movzx rax, al\n"; + (*out_) << " mov " << res_loc << ", rax\n"; + break; + case IROpcode::LE: + (*out_) << " mov rax, " << op1_loc << "\n"; + (*out_) << " cmp rax, " << op2_loc << "\n"; + (*out_) << " setle al\n"; + (*out_) << " movzx rax, al\n"; + (*out_) << " mov " << res_loc << ", rax\n"; + break; + case IROpcode::GE: + (*out_) << " mov rax, " << op1_loc << "\n"; + (*out_) << " cmp rax, " << op2_loc << "\n"; + (*out_) << " setge al\n"; + (*out_) << " movzx rax, al\n"; + (*out_) << " mov " << res_loc << ", rax\n"; + break; + case IROpcode::JUMP: + { + std::string target = instr.operand1; + if (target.empty()) + target = infer_target_label_for_current_block(); + if (target.empty()) + { + (*out_) << " ; missing jump target in " << current_function_ << " " << current_block_label_ << "\n"; + std::cout << "[CodeGen] JUMP: missing target in " << current_function_ << " " << current_block_label_ << "\n"; + } + else + { + (*out_) << " jmp " << target << "\n"; + std::cout << "[CodeGen] JUMP -> " << target << "\n"; + } + break; + } + case IROpcode::JUMPIF: + { + std::string target = instr.operand2; + if (target.empty()) + target = infer_target_label_for_current_block(); + if (target.empty()) + { + (*out_) << " mov rax, " << op1_loc << "\n"; + (*out_) << " cmp rax, 0\n"; + (*out_) << " ; missing jump target (JUMPIF) in " << current_function_ << " " << current_block_label_ << "\n"; + std::cout << "[CodeGen] JUMPIF: missing target, condition=" << op1_loc << "\n"; + } + else + { + (*out_) << " mov rax, " << op1_loc << "\n"; + (*out_) << " cmp rax, 0\n"; + (*out_) << " jne " << target << "\n"; + std::cout << "[CodeGen] JUMPIF -> " << target << " if " << op1_loc << " != 0\n"; + } + break; + } + case IROpcode::JUMPIFNOT: + { + std::string target = instr.operand2; + if (target.empty()) + target = infer_target_label_for_current_block(); + if (target.empty()) + { + (*out_) << " mov rax, " << op1_loc << "\n"; + (*out_) << " cmp rax, 0\n"; + (*out_) << " ; missing jump target (JUMPIFNOT) in " << current_function_ << " " << current_block_label_ << "\n"; + std::cout << "[CodeGen] JUMPIFNOT: missing target, condition=" << op1_loc << "\n"; + } + else + { + (*out_) << " mov rax, " << op1_loc << "\n"; + (*out_) << " cmp rax, 0\n"; + (*out_) << " je " << target << "\n"; + std::cout << "[CodeGen] JUMPIFNOT -> " << target << " if " << op1_loc << " == 0\n"; + } + break; + } + case IROpcode::RETURN: + if (!instr.operand1.empty()) + { + (*out_) << " mov rax, " << op1_loc << "\n"; + std::cout << "[CodeGen] RETURN value moved to rax: " << op1_loc << "\n"; + } + (*out_) << " jmp " << current_function_ << "_epilogue\n"; + std::cout << "[CodeGen] RETURN -> epilogue\n"; + break; + default: + throw std::runtime_error("Unsupported IR opcode in NASM codegen"); + } + + if (!instr.result.empty() && instr.opcode != IROpcode::JUMP && instr.opcode != IROpcode::JUMPIF && instr.opcode != IROpcode::JUMPIFNOT && instr.opcode != IROpcode::RETURN) + { + last_written_loc_ = res_loc; + std::cout << "[CodeGen] last_written_loc updated to " << last_written_loc_ << "\n"; + } +} + +std::string CodeGenerator::get_loc(const std::string& name) +{ + if (name.empty()) + return "0"; + if (name.find_first_not_of("0123456789") == std::string::npos) + return name; + if (labels_.count(name)) + return name; + auto it = var_offsets_.find(name); + if (it != var_offsets_.end()) + return "[rbp - " + std::to_string(it->second) + "]"; + // if unknown, allocate a slot for it now (ensures consistency) + int newOff = stack_offset_ + 8; + stack_offset_ = newOff; + var_offsets_[name] = newOff; + std::cout << "[CodeGen] get_loc: allocated new var '" << name << "' offset=" << newOff << " new stack_offset=" << stack_offset_ << "\n"; + return "[rbp - " + std::to_string(newOff) + "]"; +} + +std::string CodeGenerator::find_label_with_substr(const std::string& substr) const +{ + for (const auto& lbl : block_labels_) + { + if (lbl.find(substr) != std::string::npos) + return lbl; + } + return ""; +} + +std::string CodeGenerator::infer_target_label_for_current_block() const +{ + if (current_block_label_.find("body") != std::string::npos) + { + std::string found = find_label_with_substr("cond"); + if (!found.empty()) + { + std::cout << "[CodeGen] infer_target: body -> cond -> " << found << "\n"; + return found; + } + } + size_t idx = block_index_.at(current_block_label_); + if (idx + 1 < block_labels_.size()) + { + std::cout << "[CodeGen] infer_target: next block -> " << block_labels_[idx + 1] << "\n"; + return block_labels_[idx + 1]; + } + std::cout << "[CodeGen] infer_target: none found for block " << current_block_label_ << "\n"; + return ""; +} + +void CodeGenerator::allocate_stack(const IRFunction& func) +{ + std::cout << "[CodeGen] allocate_stack for " << func.name << "\n"; + std::unordered_set all_vars; + for (const auto& p : func.parameters) + all_vars.insert(p.name); + for (const auto& block : func.blocks) + { + for (const auto& instr : block->instructions) + { + if (!instr.result.empty() && labels_.count(instr.result) == 0) + all_vars.insert(instr.result); + if (!instr.operand1.empty() && instr.operand1.find_first_not_of("0123456789") != std::string::npos && labels_.count(instr.operand1) == 0) + all_vars.insert(instr.operand1); + if (!instr.operand2.empty() && instr.operand2.find_first_not_of("0123456789") != std::string::npos && labels_.count(instr.operand2) == 0) + all_vars.insert(instr.operand2); + } + } + + std::vector params; + for (const auto& p : func.parameters) + params.push_back(p.name); + + std::vector locals; + for (const auto& v : all_vars) + { + if (std::find(params.begin(), params.end(), v) == params.end()) + locals.push_back(v); + } + + std::sort(locals.begin(), locals.end()); + + int offset = 0; + for (const auto& p : params) + { + std::cout << "Param: " << p << " Offset: " << (offset + 8) << "\n"; + offset += 8; + var_offsets_[p] = offset; + } + for (const auto& v : locals) + { + std::cout << "Local: " << v << " Offset: " << (offset + 8) << "\n"; + offset += 8; + var_offsets_[v] = offset; + } + + stack_offset_ = offset; + if (stack_offset_ % 16 != 0) + stack_offset_ = ((stack_offset_ + 15) / 16) * 16; + + std::cout << "[CodeGen] allocate_stack done: final_stack_offset=" << stack_offset_ << " var_count=" << var_offsets_.size() << "\n"; +} + +} // namespace minic \ No newline at end of file diff --git a/src/IRGenerator.cpp b/src/IRGenerator.cpp index afb058e..150013e 100644 --- a/src/IRGenerator.cpp +++ b/src/IRGenerator.cpp @@ -82,7 +82,7 @@ void IRGenerator::visit(const Stmt& stmt) std::string else_label = new_label("if_else"); std::string end_label = new_label("if_end"); - emit(IROpcode::JUMPIFNOT, cond_temp, else_label); // Jump if false + emit(IROpcode::JUMPIFNOT, "", cond_temp, else_label); // Then branch auto then_block = std::make_unique(then_label); @@ -90,7 +90,7 @@ void IRGenerator::visit(const Stmt& stmt) current_function_->blocks.push_back(std::move(then_block)); for (const auto& s : if_stmt->then_branch) visit(*s); - emit(IROpcode::JUMP, end_label); + emit(IROpcode::JUMP, "", end_label); // Else branch auto else_block = std::make_unique(else_label); @@ -98,7 +98,7 @@ void IRGenerator::visit(const Stmt& stmt) current_function_->blocks.push_back(std::move(else_block)); for (const auto& s : if_stmt->else_branch) visit(*s); - emit(IROpcode::JUMP, end_label); + emit(IROpcode::JUMP, "", end_label); // End auto end_block = std::make_unique(end_label); @@ -118,7 +118,7 @@ void IRGenerator::visit(const Stmt& stmt) current_block_ = cond_block.get(); current_function_->blocks.push_back(std::move(cond_block)); std::string cond_temp = generate_expr(*while_stmt->condition); - emit(IROpcode::JUMPIFNOT, cond_temp, end_label); // Jump if false + emit(IROpcode::JUMPIFNOT, "", cond_temp, end_label); // Jump if false // Body block auto body_block = std::make_unique(body_label); diff --git a/src/main.cpp b/src/main.cpp index baeba8c..17d5181 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -1,28 +1,36 @@ +#include "minic/CodeGenerator.hpp" +#include "minic/IRGenerator.hpp" #include "minic/Lexer.hpp" #include "minic/Parser.hpp" #include "minic/SemanticAnalyzer.hpp" #include #include +#include + +int compile_file(const std::string& filename, const std::string& source); int main(int argc, char** argv) { if (argc < 2) { - std::cerr << "Usage: minic \n"; + std::cerr << "Usage: cminusminus \n"; return 1; } std::ifstream input_file(argv[1]); if (!input_file) { - std::cerr << "Error: Could not open input file.\n"; + std::cerr << "Error: Could not open input file '" << argv[1] << "'.\n"; return 1; } - std::cout << "Compiling: " << argv[1] << "\n"; + std::string source((std::istreambuf_iterator(input_file)), std::istreambuf_iterator()); + return compile_file(argv[1], source); +} - std::string source((std::istreambuf_iterator(input_file)), - std::istreambuf_iterator()); +int compile_file(const std::string& filename, const std::string& source) +{ + std::cout << "Compiling: " << filename << "\n"; std::vector tokens; try @@ -55,7 +63,31 @@ int main(int argc, char** argv) } catch (const std::exception& e) { - std::cerr << "Error while analyzing semantics: " << e.what() << "\n"; + std::cerr << "Error during semantic analysis: " << e.what() << "\n"; + return 1; + } + + std::unique_ptr ir_program; + try + { + minic::IRGenerator ir_gen; + ir_program = ir_gen.generate(*program); + } + catch (const std::exception& e) + { + std::cerr << "Error during IR generation: " << e.what() << "\n"; + return 1; + } + + try + { + minic::CodeGenerator code_gen; + code_gen.generate(*ir_program, "output.asm"); + std::cout << "Assembly generated to output.asm\n"; + } + catch (const std::exception& e) + { + std::cerr << "Error during code generation: " << e.what() << "\n"; return 1; } diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 72e7e4a..b3a3ce3 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -19,7 +19,8 @@ add_executable(minic_tests ${TEST_SOURCES} ${CMAKE_SOURCE_DIR}/src/Lexer.cpp ${CMAKE_SOURCE_DIR}/src/Parser.cpp ${CMAKE_SOURCE_DIR}/src/SemanticAnalyzer.cpp - ${CMAKE_SOURCE_DIR}/src/IRGenerator.cpp) + ${CMAKE_SOURCE_DIR}/src/IRGenerator.cpp + ${CMAKE_SOURCE_DIR}/src/CodeGenerator.cpp) # Link against Google Test and compiler sources target_link_libraries(minic_tests PRIVATE gtest gtest_main) diff --git a/tests/TestIRGenerator.cpp b/tests/TestIRGenerator.cpp index 1639248..ed46a91 100644 --- a/tests/TestIRGenerator.cpp +++ b/tests/TestIRGenerator.cpp @@ -741,4 +741,17 @@ TEST_F(IRGeneratorTest, PrivateCurrentPointers) EXPECT_EQ(generator_.current_function_->name, "test"); } -} \ No newline at end of file +TEST_F(IRGeneratorTest, GenerateIRForFullProgram) +{ + std::string source = "int main() {\n" + " int x = 5;\n" + " if (x > 0) {\n" + " while (x < 10) {\n" + " x = x - 1;\n" + " }\n" + " }\n" + " return x;\n" + "}\n"; +} + +} // namespace minic \ No newline at end of file diff --git a/tests/TestLexer.cpp b/tests/TestLexer.cpp index 8bc6101..39daa5c 100644 --- a/tests/TestLexer.cpp +++ b/tests/TestLexer.cpp @@ -564,3 +564,107 @@ TEST_F(LexerTest, AnotherComplexProgram) ASSERT_EQ(tokens[41].type, minic::TokenType::RBRACE); ASSERT_EQ(tokens[42].type, minic::TokenType::END_OF_FILE); } + +TEST_F(LexerTest, ComplexProgram3) +{ + std::string source = "\n" + "int main() {\n" + " int x = 5 + 3;\n" + " if (x > 0) {\n" + " while (x < 10) {\n" + " x = x - 1;\n" + " }\n" + " }\n" + " return x;\n" + "}\n"; + + lexer.source_ = source; + lexer.pos_ = 0; + lexer.column_ = 1; + lexer.line_ = 1; + + std::vector tokens = lexer.Lex(); + + // Total tokens: leading NEWLINE, int, main, (, ), {, NEWLINE, + // int, x, =, 5, +, 3, ;, NEWLINE, + // if, (, x, >, 0, ), {, NEWLINE, + // while, (, x, <, 10, ), {, NEWLINE, + // x, =, x, -, 1, ;, NEWLINE, + // }, NEWLINE, + // }, NEWLINE, + // return, x, ;, NEWLINE, + // }, NEWLINE, EOF + ASSERT_EQ(tokens.size(), 49); + + ASSERT_EQ(tokens[0].type, minic::TokenType::NEWLINE); + + ASSERT_EQ(tokens[1].type, minic::TokenType::KEYWORD_INT); + ASSERT_EQ(tokens[2].type, minic::TokenType::IDENTIFIER); + ASSERT_EQ(std::get(tokens[2].value), "main"); + ASSERT_EQ(tokens[3].type, minic::TokenType::LPAREN); + ASSERT_EQ(tokens[4].type, minic::TokenType::RPAREN); + ASSERT_EQ(tokens[5].type, minic::TokenType::LBRACE); + ASSERT_EQ(tokens[6].type, minic::TokenType::NEWLINE); + + ASSERT_EQ(tokens[7].type, minic::TokenType::KEYWORD_INT); + ASSERT_EQ(tokens[8].type, minic::TokenType::IDENTIFIER); + ASSERT_EQ(std::get(tokens[8].value), "x"); + ASSERT_EQ(tokens[9].type, minic::TokenType::OP_ASSIGN); + ASSERT_EQ(tokens[10].type, minic::TokenType::LITERAL_INT); + ASSERT_EQ(std::get(tokens[10].value), 5); + ASSERT_EQ(tokens[11].type, minic::TokenType::OP_PLUS); + ASSERT_EQ(tokens[12].type, minic::TokenType::LITERAL_INT); + ASSERT_EQ(std::get(tokens[12].value), 3); + ASSERT_EQ(tokens[13].type, minic::TokenType::SEMICOLON); + ASSERT_EQ(tokens[14].type, minic::TokenType::NEWLINE); + + ASSERT_EQ(tokens[15].type, minic::TokenType::KEYWORD_IF); + ASSERT_EQ(tokens[16].type, minic::TokenType::LPAREN); + ASSERT_EQ(tokens[17].type, minic::TokenType::IDENTIFIER); + ASSERT_EQ(std::get(tokens[17].value), "x"); + ASSERT_EQ(tokens[18].type, minic::TokenType::OP_GREATER); + ASSERT_EQ(tokens[19].type, minic::TokenType::LITERAL_INT); + ASSERT_EQ(std::get(tokens[19].value), 0); + ASSERT_EQ(tokens[20].type, minic::TokenType::RPAREN); + ASSERT_EQ(tokens[21].type, minic::TokenType::LBRACE); + ASSERT_EQ(tokens[22].type, minic::TokenType::NEWLINE); + + ASSERT_EQ(tokens[23].type, minic::TokenType::KEYWORD_WHILE); + ASSERT_EQ(tokens[24].type, minic::TokenType::LPAREN); + ASSERT_EQ(tokens[25].type, minic::TokenType::IDENTIFIER); + ASSERT_EQ(std::get(tokens[25].value), "x"); + ASSERT_EQ(tokens[26].type, minic::TokenType::OP_LESS); + ASSERT_EQ(tokens[27].type, minic::TokenType::LITERAL_INT); + ASSERT_EQ(std::get(tokens[27].value), 10); + ASSERT_EQ(tokens[28].type, minic::TokenType::RPAREN); + ASSERT_EQ(tokens[29].type, minic::TokenType::LBRACE); + ASSERT_EQ(tokens[30].type, minic::TokenType::NEWLINE); + + ASSERT_EQ(tokens[31].type, minic::TokenType::IDENTIFIER); + ASSERT_EQ(std::get(tokens[31].value), "x"); + ASSERT_EQ(tokens[32].type, minic::TokenType::OP_ASSIGN); + ASSERT_EQ(tokens[33].type, minic::TokenType::IDENTIFIER); + ASSERT_EQ(std::get(tokens[33].value), "x"); + ASSERT_EQ(tokens[34].type, minic::TokenType::OP_MINUS); + ASSERT_EQ(tokens[35].type, minic::TokenType::LITERAL_INT); + ASSERT_EQ(std::get(tokens[35].value), 1); + ASSERT_EQ(tokens[36].type, minic::TokenType::SEMICOLON); + ASSERT_EQ(tokens[37].type, minic::TokenType::NEWLINE); + + ASSERT_EQ(tokens[38].type, minic::TokenType::RBRACE); + ASSERT_EQ(tokens[39].type, minic::TokenType::NEWLINE); + + ASSERT_EQ(tokens[40].type, minic::TokenType::RBRACE); + ASSERT_EQ(tokens[41].type, minic::TokenType::NEWLINE); + + ASSERT_EQ(tokens[42].type, minic::TokenType::KEYWORD_RETURN); + ASSERT_EQ(tokens[43].type, minic::TokenType::IDENTIFIER); + ASSERT_EQ(std::get(tokens[43].value), "x"); + ASSERT_EQ(tokens[44].type, minic::TokenType::SEMICOLON); + ASSERT_EQ(tokens[45].type, minic::TokenType::NEWLINE); + + ASSERT_EQ(tokens[46].type, minic::TokenType::RBRACE); + ASSERT_EQ(tokens[47].type, minic::TokenType::NEWLINE); + + ASSERT_EQ(tokens[48].type, minic::TokenType::END_OF_FILE); +} \ No newline at end of file diff --git a/tests/TestParser.cpp b/tests/TestParser.cpp index 2bec72c..5693e50 100644 --- a/tests/TestParser.cpp +++ b/tests/TestParser.cpp @@ -541,4 +541,21 @@ TEST_F(ParserTest, ParseFullProgram) minic::Lexer lexer(source); tokens_ = lexer.Lex(); EXPECT_NO_THROW(parser_.parse()); +} + +TEST_F(ParserTest, ParseComplexProgram) +{ + std::string source = "\n" + "int main() {\n" + " int x = 5 + 3;\n" + " if (x > 0) {\n" + " while (x < 10) {\n" + " x = x - 1;\n" + " }\n" + " }\n" + " return x;\n" + "}\n"; + minic::Lexer lexer(source); + tokens_ = lexer.Lex(); + EXPECT_NO_THROW(parser_.parse()); } \ No newline at end of file From 01a4c80c7245f5ffa151936704f91468f258cfca Mon Sep 17 00:00:00 2001 From: Tomasz Okon Date: Sun, 7 Sep 2025 21:26:53 +0000 Subject: [PATCH 3/3] format --- include/minic/CodeGenerator.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/minic/CodeGenerator.hpp b/include/minic/CodeGenerator.hpp index db4c9d4..ea585c1 100644 --- a/include/minic/CodeGenerator.hpp +++ b/include/minic/CodeGenerator.hpp @@ -2,12 +2,12 @@ #define MINIC_CODEGENERATOR_HPP #include "minic/IRGenerator.hpp" +#include #include #include #include #include #include -#include namespace minic {