From 0d413650769539ec0c10f5f911471d1935d97548 Mon Sep 17 00:00:00 2001 From: Lens Date: Sat, 21 Jan 2023 15:48:04 -0800 Subject: [PATCH 01/97] Initial Mess --- TODO.md | 1 + examples/byte.un | 4 + src/codegen.c | 15 +- src/codegen/intermediate_representation.c | 55 ++++--- src/codegen/intermediate_representation.h | 14 +- src/codegen/x86_64/arch_x86_64.c | 170 +++++++++++++++++++--- src/ir_parser.c | 17 ++- 7 files changed, 217 insertions(+), 59 deletions(-) create mode 100644 examples/byte.un diff --git a/TODO.md b/TODO.md index f9dd4c26c..8a960b949 100644 --- a/TODO.md +++ b/TODO.md @@ -38,6 +38,7 @@ - [ ] Types in the IR - [ ] Byte type - [ ] Type info in codegen/IR. + - [ ] Sema needs to "lower" integer_literal to "integer" if no other makes sense - [ ] Backend: Handle size/alignment requirements - [ ] Use eax, ax, al, etc. - [ ] Actually implementing casts. diff --git a/examples/byte.un b/examples/byte.un new file mode 100644 index 000000000..3a4563cf3 --- /dev/null +++ b/examples/byte.un @@ -0,0 +1,4 @@ +a : byte = 34 +b : byte = 35 +c : byte = a + b +c diff --git a/src/codegen.c b/src/codegen.c index 88a408bb7..c4a61e0a3 100644 --- a/src/codegen.c +++ b/src/codegen.c @@ -153,7 +153,7 @@ static void codegen_expr(CodegenContext *ctx, Node *expr) { case NODE_DECLARATION: expr->ir = expr->declaration.static_ ? ir_create_static(ctx, expr->type, as_span(expr->declaration.name)) - : ir_stack_allocate(ctx, type_sizeof(expr->type)); + : ir_stack_allocate(ctx, expr->type); /// Emit the initialiser if there is one. if (expr->declaration.init) { @@ -215,7 +215,7 @@ static void codegen_expr(CodegenContext *ctx, Node *expr) { /// Insert a phi node for the result of the if in the join block. if (!type_is_void(expr->type)) { - IRInstruction *phi = ir_phi(ctx); + IRInstruction *phi = ir_phi(ctx, expr->type); ir_phi_argument(phi, last_then_block, expr->if_.then->ir); ir_phi_argument(phi, last_else_block, expr->if_.else_->ir); expr->ir = phi; @@ -423,7 +423,8 @@ static void codegen_expr(CodegenContext *ctx, Node *expr) { /// Literal expression. Only integer literals are supported for now. case NODE_LITERAL: if (expr->literal.type != TK_NUMBER) DIAG(DIAG_SORRY, expr->source_location, "Emitting non-integer literals not supported"); - expr->ir = ir_immediate(ctx, expr->literal.integer); + // TODO: SEMA should probably have already lowered integer_literal type, so we *should* have a type already available on the literal node... + expr->ir = ir_immediate(ctx, expr->type, expr->literal.integer); return; /// Variable reference. @@ -456,13 +457,11 @@ void codegen_function(CodegenContext *ctx, Node *node) { /// Emit the function body. codegen_expr(ctx, node->function.body); - /// If the we can return from here, and this function doesn’t return void, + /// If we can return from here, and this function doesn’t return void, /// then return the return value; otherwise, just return nothing. - if (!ir_is_closed(ctx->block) && !type_is_void(node->type->function.return_type)) { + if (!ir_is_closed(ctx->block) && !type_is_void(node->type->function.return_type)) ir_return(ctx, node->function.body->ir); - } else { - ir_return(ctx, NULL); - } + else ir_return(ctx, NULL); } /// =========================================================================== diff --git a/src/codegen/intermediate_representation.c b/src/codegen/intermediate_representation.c index 590b37f93..1a3042a99 100644 --- a/src/codegen/intermediate_representation.c +++ b/src/codegen/intermediate_representation.c @@ -104,11 +104,9 @@ void ir_remove(IRInstruction* instruction) { vector_delete(instruction->users); ir_unmark_usees(instruction); /// Parameters / static refs should not be freed here. - if (instruction->kind != IR_PARAMETER && instruction->kind != IR_STATIC_REF) { + if (instruction->kind != IR_PARAMETER && instruction->kind != IR_STATIC_REF) free(instruction); - } else { - vector_push(instruction->parent_block->function->context->removed_instructions, instruction); - } + else vector_push(instruction->parent_block->function->context->removed_instructions, instruction); } void ir_remove_and_free_block(IRBlock *block) { @@ -263,6 +261,9 @@ void ir_femit_instruction ICE("Invalid IRType %d\n", inst->kind); } + // Print type that the value this instruction returns is of. + if (inst->type) fprint(file, " %31| %T", inst->type); + #ifdef DEBUG_USES /// Print users fprint(file, "%m\033[60GUsers: "); @@ -359,10 +360,11 @@ IRInstruction *ir_parameter /// Add a parameter to a function. This alters the number of /// parameters the function takes, so use it with caution. -void ir_add_parameter_to_function(IRFunction *f) { +void ir_add_parameter_to_function(IRFunction *f, Type *type) { INSTRUCTION(parameter, IR_PARAMETER); parameter->imm = f->parameters.size; parameter->id = (u32) f->parameters.size; + parameter->type = type; ir_insert(f->context, parameter); vector_push(f->parameters, parameter); } @@ -384,9 +386,7 @@ void ir_phi_argument IRPhiArgument *arg = calloc(1, sizeof *arg); arg->block = phi_predecessor; arg->value = argument; - - vector_push(phi->phi_args, arg); - mark_used(argument, phi); + ir_phi_add_argument(phi, arg); } void ir_phi_remove_argument(IRInstruction *phi, IRBlock *block) { @@ -399,8 +399,9 @@ void ir_phi_remove_argument(IRInstruction *phi, IRBlock *block) { } } -IRInstruction *ir_phi(CodegenContext *context) { +IRInstruction *ir_phi(CodegenContext *context, Type *type) { INSTRUCTION(phi, IR_PHI); + phi->type = type; INSERT(phi); return phi; } @@ -442,10 +443,11 @@ IRFunction *ir_function(CodegenContext *context, span name, Type *function_type) vector_push(context->functions, function); /// Generate param refs. - for (u64 i = 1; i <= function_type->function.parameters.size; i++) { + for (u64 i = 1; i <= function_type->function.parameters.size; ++i) { INSTRUCTION(param, IR_PARAMETER); param->imm = i - 1; param->id = (u32) i; + param->type = function_type->function.parameters.data[i - 1].type; vector_push(function->parameters, param); INSERT(param); } @@ -455,17 +457,20 @@ IRFunction *ir_function(CodegenContext *context, span name, Type *function_type) IRInstruction *ir_funcref(CodegenContext *context, IRFunction *function) { INSTRUCTION(funcref, IR_FUNC_REF); funcref->function_ref = function; + funcref->type = function->type; INSERT(funcref); return funcref; } IRInstruction *ir_immediate (CodegenContext *context, + Type *type, u64 immediate ) { INSTRUCTION(imm, IR_IMMEDIATE); imm->imm = immediate; + imm->type = type; INSERT(imm); return imm; } @@ -478,6 +483,9 @@ IRInstruction *ir_load INSTRUCTION(load, IR_LOAD); load->operand = address; + // TODO: Is this right? + load->type = address->type; + mark_used(address, load); INSERT(load); @@ -490,9 +498,9 @@ IRInstruction *ir_direct_call ) { ASSERT(callee, "Cannot create direct call to NULL function"); - (void) context; INSTRUCTION(call, IR_CALL); call->call.callee_function = callee; + call->type = callee->type->function.return_type; return call; } @@ -501,10 +509,10 @@ IRInstruction *ir_indirect_call IRInstruction *function ) { - (void) context; INSTRUCTION(call, IR_CALL); call->call.callee_instruction = function; call->call.is_indirect = true; + call->type = function->type->function.return_type; mark_used(function, call); return call; } @@ -518,6 +526,7 @@ IRInstruction *ir_store INSTRUCTION(store, IR_STORE); store->store.addr = address; store->store.value = data; + store->type = t_void; //> A store instruction does not return anything. mark_used(address, store); mark_used(data, store); INSERT(store); @@ -538,6 +547,8 @@ IRInstruction *ir_branch_conditional branch->cond_br.then = then_block; branch->cond_br.else_ = otherwise_block; + + branch->type = t_void; INSERT(branch); return branch; } @@ -552,6 +563,7 @@ IRInstruction *ir_branch_into_block branch->destination_block = destination; branch->parent_block = block; list_push_back(block->instructions, branch); + branch->type = t_void; return branch; } @@ -566,6 +578,7 @@ IRInstruction *ir_branch IRInstruction *ir_return(CodegenContext *context, IRInstruction* return_value) { INSTRUCTION(branch, IR_RETURN); branch->operand = return_value; + branch->type = t_void; INSERT(branch); if (return_value) mark_used(return_value, branch); return branch; @@ -580,6 +593,7 @@ IRInstruction *ir_copy_unused (void) context; INSTRUCTION(copy, IR_COPY); copy->operand = source; + copy->type = source->type; return copy; } @@ -601,6 +615,8 @@ IRInstruction *ir_not { INSTRUCTION(x, IR_NOT); x->operand = source; + // TODO: Is this right? Should we use source->type? + x->type = t_integer; mark_used(source, x); INSERT(x); return x; @@ -608,7 +624,8 @@ IRInstruction *ir_not #define CREATE_BINARY_INSTRUCTION(enumerator, name) \ IRInstruction *ir_##name(CodegenContext *context, IRInstruction *lhs, IRInstruction *rhs) { \ - INSTRUCTION(x, IR_##enumerator); \ + INSTRUCTION(x, IR_##enumerator); \ + x->type = t_integer; \ set_pair_and_mark(x, lhs, rhs); \ INSERT(x); \ return x; \ @@ -618,19 +635,20 @@ ALL_BINARY_INSTRUCTION_TYPES(CREATE_BINARY_INSTRUCTION) IRInstruction *ir_create_static (CodegenContext *context, - Type *ty, + Type *type, span name) { /// Create the variable. IRStaticVariable *v = calloc(1, sizeof *v); v->name = string_dup(name); - v->type = ty; - v->cached_size = type_sizeof(ty); + v->type = type; + v->cached_size = type_sizeof(type); v->cached_alignment = 8; /// TODO. vector_push(context->static_vars, v); /// Create an instruction to reference it and return it. INSTRUCTION(ref, IR_STATIC_REF); ref->static_ref = v; + ref->type = v->type; v->reference = ref; INSERT(ref); return ref; @@ -638,11 +656,12 @@ IRInstruction *ir_create_static IRInstruction *ir_stack_allocate (CodegenContext *context, - usz size + Type *type ) { INSTRUCTION(alloca, IR_ALLOCA); - alloca->alloca.size = size; + alloca->alloca.size = type_sizeof(type); + alloca->type = type; INSERT(alloca); return alloca; } diff --git a/src/codegen/intermediate_representation.h b/src/codegen/intermediate_representation.h index 5a3c0d08d..2baa88edd 100644 --- a/src/codegen/intermediate_representation.h +++ b/src/codegen/intermediate_representation.h @@ -9,7 +9,8 @@ #define INSTRUCTION(name, given_type) \ IRInstruction *(name) = calloc(1, sizeof(IRInstruction)); \ - (name)->kind = (given_type) + (name)->kind = (given_type); \ + (name)->type = t_void #define FOREACH_INSTRUCTION_N(context, function, block, instruction) \ foreach_ptr (IRFunction *, function, context->functions) \ @@ -129,6 +130,8 @@ typedef struct IRInstruction { enum IRType kind; Register result; + Type *type; + /// TODO: do we really need both of these? u32 id; u32 index; @@ -249,9 +252,9 @@ void insert_instruction_before(IRInstruction *i, IRInstruction *before); void insert_instruction_after(IRInstruction *i, IRInstruction *after); IRInstruction *ir_parameter(CodegenContext *context, size_t index); -void ir_add_parameter_to_function(IRFunction *); +void ir_add_parameter_to_function(IRFunction *, Type *); -IRInstruction *ir_phi(CodegenContext *context); +IRInstruction *ir_phi(CodegenContext *context, Type *type); void ir_phi_add_argument(IRInstruction *phi, IRPhiArgument *argument); void ir_phi_remove_argument(IRInstruction *phi, IRBlock *block); void ir_phi_argument @@ -271,6 +274,7 @@ IRInstruction *ir_indirect_call IRInstruction *ir_immediate (CodegenContext *context, + Type *type, u64 immediate); IRInstruction *ir_load @@ -320,12 +324,12 @@ ALL_BINARY_INSTRUCTION_TYPES(DECLARE_BINARY_INSTRUCTION) /// Create a variable with static storage duration. IRInstruction *ir_create_static (CodegenContext *context, - Type *ty, + Type *type, span name); IRInstruction *ir_stack_allocate (CodegenContext *context, - usz size); + Type *type); /// Check if an instruction returns a value. bool ir_is_value(IRInstruction *instruction); diff --git a/src/codegen/x86_64/arch_x86_64.c b/src/codegen/x86_64/arch_x86_64.c index 8cab6ff91..ba3c96b43 100644 --- a/src/codegen/x86_64/arch_x86_64.c +++ b/src/codegen/x86_64/arch_x86_64.c @@ -186,13 +186,36 @@ enum Instruction { I_COUNT }; +enum RegSize { + r128, + r64, + r32, + r16, + r8, +}; + +/// Return the corresponding RegSize enum value to the given amount of +/// bytes (smallest fit). ICE if can not contain. +static enum RegSize regsize_from_bytes(u64 bytes) { + switch (bytes) { + case 1: return r8; + case 2: return r16; + case 4: return r32; + case 8: return r64; + case 16: return r128; + default: + ICE("Byte size can not be converted into register size on x86_64: %"PRIu64, bytes); + break; + } +} + enum InstructionOperands_x86_64 { IMMEDIATE, ///< int64_t imm MEMORY, ///< Reg reg, int64_t offset REGISTER, ///< Reg reg NAME, ///< const char* name - IMMEDIATE_TO_REGISTER, ///< int64_t imm, Reg dest + IMMEDIATE_TO_REGISTER, ///< int64_t imm, Reg dest, RegSize size IMMEDIATE_TO_MEMORY, ///< int64_t imm, Reg address, int64_t offset MEMORY_TO_REGISTER, ///< Reg address, int64_t offset, Reg dest NAME_TO_REGISTER, ///< Reg address, const char* name, Reg dest @@ -286,9 +309,22 @@ static enum IndirectJumpType negate_jump(enum IndirectJumpType j) { static void femit_imm_to_reg(CodegenContext *context, enum Instruction inst, va_list args) { int64_t immediate = va_arg(args, int64_t); RegisterDescriptor destination_register = va_arg(args, RegisterDescriptor); + enum RegSize size = va_arg(args, enum RegSize); const char *mnemonic = instruction_mnemonic(context, inst); - const char *destination = register_name(destination_register); + const char *destination = NULL; + switch (size) { + case r128: + TODO("Support 128 bit registers on x86_64..."); + break; + case r64: destination = register_name(destination_register); break; + case r32: destination = register_name_32(destination_register); break; + case r16: destination = register_name_16(destination_register); break; + case r8: destination = register_name_8(destination_register); break; + default: + UNREACHABLE(); + break; + } switch (context->dialect) { case CG_ASM_DIALECT_ATT: @@ -328,10 +364,23 @@ static void femit_mem_to_reg(CodegenContext *context, enum Instruction inst, va_ RegisterDescriptor address_register = va_arg(args, RegisterDescriptor); int64_t offset = va_arg(args, int64_t); RegisterDescriptor destination_register = va_arg(args, RegisterDescriptor); + enum RegSize size = va_arg(args, enum RegSize); const char *mnemonic = instruction_mnemonic(context, inst); const char *address = register_name(address_register); - const char *destination = register_name(destination_register); + const char *destination = NULL; + switch (size) { + case r128: + TODO("Support 128 bit registers on x86_64..."); + break; + case r64: destination = register_name(destination_register); break; + case r32: destination = register_name_32(destination_register); break; + case r16: destination = register_name_16(destination_register); break; + case r8: destination = register_name_8(destination_register); break; + default: + UNREACHABLE(); + break; + } switch (context->dialect) { case CG_ASM_DIALECT_ATT: @@ -350,10 +399,23 @@ static void femit_name_to_reg(CodegenContext *context, enum Instruction inst, va RegisterDescriptor address_register = va_arg(args, RegisterDescriptor); char *name = va_arg(args, char *); RegisterDescriptor destination_register = va_arg(args, RegisterDescriptor); + enum RegSize size = va_arg(args, enum RegSize); const char *mnemonic = instruction_mnemonic(context, inst); const char *address = register_name(address_register); - const char *destination = register_name(destination_register); + const char *destination = NULL; + switch (size) { + case r128: + TODO("Support 128 bit registers on x86_64..."); + break; + case r64: destination = register_name(destination_register); break; + case r32: destination = register_name_32(destination_register); break; + case r16: destination = register_name_16(destination_register); break; + case r8: destination = register_name_8(destination_register); break; + default: + UNREACHABLE(); + break; + } switch (context->dialect) { case CG_ASM_DIALECT_ATT: @@ -370,11 +432,24 @@ static void femit_name_to_reg(CodegenContext *context, enum Instruction inst, va static void femit_reg_to_mem(CodegenContext *context, enum Instruction inst, va_list args) { RegisterDescriptor source_register = va_arg(args, RegisterDescriptor); + enum RegSize size = va_arg(args, enum RegSize); RegisterDescriptor address_register = va_arg(args, RegisterDescriptor); int64_t offset = va_arg(args, int64_t); const char *mnemonic = instruction_mnemonic(context, inst); - const char *source = register_name(source_register); + const char *source = NULL; + switch (size) { + case r128: + TODO("Support 128 bit registers on x86_64..."); + break; + case r64: source = register_name(source_register); break; + case r32: source = register_name_32(source_register); break; + case r16: source = register_name_16(source_register); break; + case r8: source = register_name_8(source_register); break; + default: + UNREACHABLE(); + break; + } const char *address = register_name(address_register); switch (context->dialect) { @@ -425,12 +500,25 @@ static void femit_reg_to_reg(CodegenContext *context, enum Instruction inst, va_ } static void femit_reg_to_name(CodegenContext *context, enum Instruction inst, va_list args) { - RegisterDescriptor source_register = va_arg(args, RegisterDescriptor); + RegisterDescriptor source_register = va_arg(args, RegisterDescriptor); + enum RegSize size = va_arg(args, enum RegSize); RegisterDescriptor address_register = va_arg(args, RegisterDescriptor); char *name = va_arg(args, char *); const char *mnemonic = instruction_mnemonic(context, inst); - const char *source = register_name(source_register); + const char *source = NULL; + switch (size) { + case r128: + TODO("Support 128 bit registers on x86_64..."); + break; + case r64: source = register_name(source_register); break; + case r32: source = register_name_32(source_register); break; + case r16: source = register_name_16(source_register); break; + case r8: source = register_name_8(source_register); break; + default: + UNREACHABLE(); + break; + } const char *address = register_name(address_register); switch (context->dialect) { @@ -808,7 +896,7 @@ static RegisterDescriptor codegen_comparison // Perform the comparison. femit(cg_context, I_CMP, REGISTER_TO_REGISTER, rhs, lhs); - femit(cg_context, I_MOV, IMMEDIATE_TO_REGISTER, (int64_t)0, result); + femit(cg_context, I_MOV, IMMEDIATE_TO_REGISTER, (int64_t)0, result, r64); femit(cg_context, I_SETCC, type, result); return result; @@ -860,14 +948,14 @@ static void codegen_prologue(CodegenContext *cg_context, IRFunction *f) { case CG_CALL_CONV_LINUX: break; default: ICE("Unknown calling convention"); } - femit(cg_context, I_SUB, IMMEDIATE_TO_REGISTER, locals_offset, REG_RSP); + femit(cg_context, I_SUB, IMMEDIATE_TO_REGISTER, locals_offset, REG_RSP, r64); } break; case FRAME_MINIMAL: { switch (cg_context->call_convention) { /// See comment above. case CG_CALL_CONV_MSWIN: - femit(cg_context, I_SUB, IMMEDIATE_TO_REGISTER, (int64_t)(4 * 8 + 8), REG_RSP); + femit(cg_context, I_SUB, IMMEDIATE_TO_REGISTER, (int64_t)(4 * 8 + 8), REG_RSP, r64); break; case CG_CALL_CONV_LINUX: femit(cg_context, I_PUSH, REGISTER, REG_RBP); @@ -893,7 +981,7 @@ static void codegen_epilogue(CodegenContext *cg_context, IRFunction *f) { switch (cg_context->call_convention) { /// See comment above. case CG_CALL_CONV_MSWIN: - femit(cg_context, I_ADD, IMMEDIATE_TO_REGISTER, (int64_t)(4 * 8 + 8), REG_RSP); + femit(cg_context, I_ADD, IMMEDIATE_TO_REGISTER, (int64_t)(4 * 8 + 8), REG_RSP, r64); break; case CG_CALL_CONV_LINUX: femit(cg_context, I_POP, REGISTER, REG_RBP); @@ -912,7 +1000,34 @@ static void emit_instruction(CodegenContext *context, IRInstruction *inst) { case IR_UNREACHABLE: break; case IR_IMMEDIATE: - femit(context, I_MOV, IMMEDIATE_TO_REGISTER, inst->imm, inst->result); + // TODO: This probably shouldn't be done here. Do this in a pass before-hand or something. + if (inst->type == t_integer_literal) { + // TODO: I don't think this is the best way of doing things. + //femit(context, I_MOV, IMMEDIATE_TO_REGISTER, inst->imm, inst->result, r64); + if (inst->imm <= UINT8_MAX) { + femit(context, I_MOV, IMMEDIATE_TO_REGISTER, inst->imm, inst->result, r8); + } else if (inst->imm <= UINT16_MAX) { + femit(context, I_MOV, IMMEDIATE_TO_REGISTER, inst->imm, inst->result, r16); + } else if (inst->imm <= UINT32_MAX) { + femit(context, I_MOV, IMMEDIATE_TO_REGISTER, inst->imm, inst->result, r32); + } else if (inst->imm <= UINT64_MAX) { + femit(context, I_MOV, IMMEDIATE_TO_REGISTER, inst->imm, inst->result, r64); + } else { + ICE("Unsupported integer literal immediate on x86_64 (out of range)"); + } + } else { + if (type_sizeof(inst->type) == 1) { + femit(context, I_MOV, IMMEDIATE_TO_REGISTER, inst->imm, inst->result, r8); + } else if (type_sizeof(inst->type) == 2) { + femit(context, I_MOV, IMMEDIATE_TO_REGISTER, inst->imm, inst->result, r16); + } else if (type_sizeof(inst->type) <= 4) { + femit(context, I_MOV, IMMEDIATE_TO_REGISTER, inst->imm, inst->result, r32); + } else if (type_sizeof(inst->type) <= 8) { + femit(context, I_MOV, IMMEDIATE_TO_REGISTER, inst->imm, inst->result, r64); + } else { + ICE("Unsupported immediate size on x86_64: %d", type_sizeof(inst->type)); + } + } break; case IR_NOT: femit(context, I_NOT, REGISTER, inst->operand->result); @@ -1091,56 +1206,65 @@ static void emit_instruction(CodegenContext *context, IRInstruction *inst) { break; case IR_LOAD: + // TODO: Handle size of type and stuff /// Load from a static variable. if (inst->operand->kind == IR_STATIC_REF) { + enum RegSize size = regsize_from_bytes(type_sizeof(inst->operand->type)); femit(context, I_MOV, NAME_TO_REGISTER, REG_RIP, inst->operand->static_ref->name.data, - inst->result); + inst->result, size); } /// Load from a local. else if (inst->operand->kind == IR_ALLOCA) { + //enum RegSize size = regsize_from_bytes(type_sizeof(inst->operand->type)); + enum RegSize size = regsize_from_bytes(inst->operand->alloca.size); femit(context, I_MOV, MEMORY_TO_REGISTER, - REG_RBP, (int64_t)-inst->operand->alloca.offset, inst->result); + REG_RBP, (int64_t)-inst->operand->alloca.offset, inst->result, size); } /// Load from a pointer else { + // TODO: Is this right? Do we need to get size of pointed to type? + enum RegSize size = regsize_from_bytes(type_sizeof(inst->operand->type)); femit(context, I_MOV, MEMORY_TO_REGISTER, inst->operand->result, (int64_t)0, - inst->result); + inst->result, size); } break; case IR_STORE: /// Store to a static variable. if (inst->store.addr->kind == IR_STATIC_REF) { - femit(context, I_MOV, REGISTER_TO_NAME, inst->store.value->result, + enum RegSize size = regsize_from_bytes(type_sizeof(inst->store.value->type)); + femit(context, I_MOV, REGISTER_TO_NAME, inst->store.value->result, size, REG_RIP, inst->store.addr->static_ref->name.data); } /// Store to a local. else if (inst->store.addr->kind == IR_ALLOCA) { - femit(context, I_MOV, REGISTER_TO_MEMORY, inst->store.value->result, + enum RegSize size = regsize_from_bytes(type_sizeof(inst->store.value->type)); + femit(context, I_MOV, REGISTER_TO_MEMORY, + inst->store.value->result, size, REG_RBP, (int64_t)-inst->store.addr->alloca.offset); break; } /// Store to a pointer. else { - femit(context, I_MOV, REGISTER_TO_MEMORY, inst->store.value->result, + enum RegSize size = regsize_from_bytes(type_sizeof(inst->store.value->type)); + femit(context, I_MOV, REGISTER_TO_MEMORY, + inst->store.value->result, size, inst->store.addr->result, (int64_t)0); } break; case IR_STATIC_REF: - if (inst->result) femit(context, I_LEA, NAME_TO_REGISTER, REG_RIP, inst->static_ref->name.data, inst->result); + if (inst->result) femit(context, I_LEA, NAME_TO_REGISTER, REG_RIP, inst->static_ref->name.data, inst->result, r64); break; case IR_FUNC_REF: - if (inst->result) femit(context, I_LEA, NAME_TO_REGISTER, REG_RIP, inst->function_ref->name.data, inst->result); + if (inst->result) femit(context, I_LEA, NAME_TO_REGISTER, REG_RIP, inst->function_ref->name.data, inst->result, r64); break; case IR_ALLOCA: - femit(context, I_LEA, MEMORY_TO_REGISTER, - REG_RBP, - (int64_t)-inst->alloca.offset, inst->result); + femit(context, I_LEA, MEMORY_TO_REGISTER, REG_RBP, (int64_t)-inst->alloca.offset, inst->result, r64); break; default: diff --git a/src/ir_parser.c b/src/ir_parser.c index 42d87267c..5892971ba 100644 --- a/src/ir_parser.c +++ b/src/ir_parser.c @@ -572,7 +572,8 @@ static bool parse_instruction_or_branch(IRParser *p) { /// An instruction may be a number. /// ::= NUMBER if (p->tok_type == tk_number) { - i = ir_immediate(p->context, p->integer); + // TODO: Better type recognition here. + i = ir_immediate(p->context, t_integer, p->integer); next_token(p); } @@ -655,7 +656,8 @@ static bool parse_instruction_or_branch(IRParser *p) { /// PHI { "[" ":" "]" } case PHI: { next_token(p); - i = ir_phi(p->context); + // TODO: Set ir_phi type... + i = ir_phi(p->context, t_void); /// Parse the phi arguments. while (p->tok_type == tk_lbrack) { @@ -695,7 +697,8 @@ static bool parse_instruction_or_branch(IRParser *p) { case IMMEDIATE: { next_token(p); if (p->tok_type != tk_number) ERR("Expected number after 'imm'"); - i = ir_immediate(p->context, p->integer); + // TODO: Better type recognition + i = ir_immediate(p->context, t_integer, p->integer); next_token(p); } break; @@ -780,6 +783,8 @@ static bool parse_instruction_or_branch(IRParser *p) { if (p->tok_type != tk_number) ERR_AT(i_loc, "Expected physical register after REGISTER"); INSTRUCTION(reg, IR_REGISTER); ir_insert(p->context, reg); + // TODO: Type of IR_REGISTER? + reg->type = t_void; reg->result = (Register) p->integer; i = reg; next_token(p); @@ -788,7 +793,8 @@ static bool parse_instruction_or_branch(IRParser *p) { /// ALLOCA case ALLOCA: { next_token(p); - i = ir_stack_allocate(p->context, 8); + // TODO: Not everything is an 8 byte signed integer + i = ir_stack_allocate(p->context, t_integer); } break; /// ::= UNREACHABLE "\n" | ... @@ -996,7 +1002,8 @@ static void parse_parameters(IRParser *p) { /// Create a parameter reference. if (p->tok_type != tk_temp) ERR("Expected temporary after '(' or ','"); if (p->tok.data[0] == '#') ERR("Function parameter must be a temporary register"); - ir_add_parameter_to_function(vector_back(p->context->functions)); + // TODO: Parse param type... + ir_add_parameter_to_function(vector_back(p->context->functions), t_void); IRInstruction *param = ir_parameter(p->context, param_count++); make_temp(p, here(p), p->tok, param); next_token(p); From fe4610d1adebb8c2a97f1c4cac5d9c7adeefb198 Mon Sep 17 00:00:00 2001 From: Lens Date: Sat, 21 Jan 2023 16:03:45 -0800 Subject: [PATCH 02/97] Make it work more betterer --- TODO.md | 8 ++++---- src/codegen/x86_64/arch_x86_64.c | 5 +++-- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/TODO.md b/TODO.md index 8a960b949..40dba8ce4 100644 --- a/TODO.md +++ b/TODO.md @@ -36,12 +36,12 @@ - [ ] Disallow overloading on the return value. - [ ] Proper checking for incomplete types in the parser. - [ ] Types in the IR - - [ ] Byte type - - [ ] Type info in codegen/IR. - - [ ] Sema needs to "lower" integer_literal to "integer" if no other makes sense + - [x] Byte type + - [x] Type info in codegen/IR. - [ ] Backend: Handle size/alignment requirements - - [ ] Use eax, ax, al, etc. + - [x] Use eax, ax, al, etc. - [ ] Actually implementing casts. + - [ ] During codegen, we should actually output `zext`/`sext` if needed. Otherwise truncation is automatic. - [ ] Update IR parser - [ ] Arrays - [ ] Semantic analysis for static arrays. diff --git a/src/codegen/x86_64/arch_x86_64.c b/src/codegen/x86_64/arch_x86_64.c index ba3c96b43..7c6ff0121 100644 --- a/src/codegen/x86_64/arch_x86_64.c +++ b/src/codegen/x86_64/arch_x86_64.c @@ -1210,14 +1210,15 @@ static void emit_instruction(CodegenContext *context, IRInstruction *inst) { /// Load from a static variable. if (inst->operand->kind == IR_STATIC_REF) { enum RegSize size = regsize_from_bytes(type_sizeof(inst->operand->type)); + if (size == r8 || size == r16) femit(context, I_XOR, REGISTER_TO_REGISTER, inst->result, inst->result); femit(context, I_MOV, NAME_TO_REGISTER, REG_RIP, inst->operand->static_ref->name.data, inst->result, size); } /// Load from a local. else if (inst->operand->kind == IR_ALLOCA) { - //enum RegSize size = regsize_from_bytes(type_sizeof(inst->operand->type)); enum RegSize size = regsize_from_bytes(inst->operand->alloca.size); + if (size == r8 || size == r16) femit(context, I_XOR, REGISTER_TO_REGISTER, inst->result, inst->result); femit(context, I_MOV, MEMORY_TO_REGISTER, REG_RBP, (int64_t)-inst->operand->alloca.offset, inst->result, size); } @@ -1234,7 +1235,7 @@ static void emit_instruction(CodegenContext *context, IRInstruction *inst) { case IR_STORE: /// Store to a static variable. if (inst->store.addr->kind == IR_STATIC_REF) { - enum RegSize size = regsize_from_bytes(type_sizeof(inst->store.value->type)); + enum RegSize size = regsize_from_bytes(type_sizeof(inst->store.addr->static_ref->type)); femit(context, I_MOV, REGISTER_TO_NAME, inst->store.value->result, size, REG_RIP, inst->store.addr->static_ref->name.data); } From 2f4d462bf3c2882ac50d19105ebc4493e69683db Mon Sep 17 00:00:00 2001 From: Lens Date: Sat, 21 Jan 2023 17:40:04 -0800 Subject: [PATCH 03/97] [Bugfix] Zero extending loads fixes *a lot* --- src/codegen/x86_64/arch_x86_64.c | 27 ++++++++++++--------------- tst/tests/byte.un | 4 ++-- 2 files changed, 14 insertions(+), 17 deletions(-) diff --git a/src/codegen/x86_64/arch_x86_64.c b/src/codegen/x86_64/arch_x86_64.c index 7c6ff0121..85cd5d530 100644 --- a/src/codegen/x86_64/arch_x86_64.c +++ b/src/codegen/x86_64/arch_x86_64.c @@ -1005,8 +1005,10 @@ static void emit_instruction(CodegenContext *context, IRInstruction *inst) { // TODO: I don't think this is the best way of doing things. //femit(context, I_MOV, IMMEDIATE_TO_REGISTER, inst->imm, inst->result, r64); if (inst->imm <= UINT8_MAX) { + femit(context, I_XOR, REGISTER_TO_REGISTER, inst->result, inst->result); femit(context, I_MOV, IMMEDIATE_TO_REGISTER, inst->imm, inst->result, r8); } else if (inst->imm <= UINT16_MAX) { + femit(context, I_XOR, REGISTER_TO_REGISTER, inst->result, inst->result); femit(context, I_MOV, IMMEDIATE_TO_REGISTER, inst->imm, inst->result, r16); } else if (inst->imm <= UINT32_MAX) { femit(context, I_MOV, IMMEDIATE_TO_REGISTER, inst->imm, inst->result, r32); @@ -1017,15 +1019,17 @@ static void emit_instruction(CodegenContext *context, IRInstruction *inst) { } } else { if (type_sizeof(inst->type) == 1) { + femit(context, I_XOR, REGISTER_TO_REGISTER, inst->result, inst->result); femit(context, I_MOV, IMMEDIATE_TO_REGISTER, inst->imm, inst->result, r8); } else if (type_sizeof(inst->type) == 2) { + femit(context, I_XOR, REGISTER_TO_REGISTER, inst->result, inst->result); femit(context, I_MOV, IMMEDIATE_TO_REGISTER, inst->imm, inst->result, r16); } else if (type_sizeof(inst->type) <= 4) { femit(context, I_MOV, IMMEDIATE_TO_REGISTER, inst->imm, inst->result, r32); } else if (type_sizeof(inst->type) <= 8) { femit(context, I_MOV, IMMEDIATE_TO_REGISTER, inst->imm, inst->result, r64); } else { - ICE("Unsupported immediate size on x86_64: %d", type_sizeof(inst->type)); + ICE("Unsupported immediate size on x86_64: %Z", type_sizeof(inst->type)); } } break; @@ -1159,40 +1163,33 @@ static void emit_instruction(CodegenContext *context, IRInstruction *inst) { case IR_DIV: ASSERT(inst->rhs->result != REG_RAX, "Register allocation must not allocate RAX to divisor."); - femit(context, I_MOV, REGISTER_TO_REGISTER, inst->lhs->result, - REG_RAX); + femit(context, I_MOV, REGISTER_TO_REGISTER, inst->lhs->result, REG_RAX); femit(context, I_CQO); femit(context, I_IDIV, REGISTER, inst->rhs->result); - femit(context, I_MOV, REGISTER_TO_REGISTER, - REG_RAX, inst->result); + femit(context, I_MOV, REGISTER_TO_REGISTER, REG_RAX, inst->result); break; case IR_MOD: ASSERT(inst->rhs->result != REG_RAX, "Register allocation must not allocate RAX to divisor."); - femit(context, I_MOV, REGISTER_TO_REGISTER, inst->lhs->result, - REG_RAX); + femit(context, I_MOV, REGISTER_TO_REGISTER, inst->lhs->result, REG_RAX); femit(context, I_CQO); femit(context, I_IDIV, REGISTER, inst->rhs->result); - femit(context, I_MOV, REGISTER_TO_REGISTER, - REG_RDX, inst->result); + femit(context, I_MOV, REGISTER_TO_REGISTER, REG_RDX, inst->result); break; case IR_SHL: ASSERT(inst->lhs->result != REG_RCX, "Register allocation must not allocate RCX to result of lhs of shift."); - femit(context, I_MOV, REGISTER_TO_REGISTER, inst->rhs->result, - REG_RCX); + femit(context, I_MOV, REGISTER_TO_REGISTER, inst->rhs->result, REG_RCX); femit(context, I_SHL, REGISTER, inst->lhs->result); femit(context, I_MOV, REGISTER_TO_REGISTER, inst->lhs->result, inst->result); break; case IR_SHR: - femit(context, I_MOV, REGISTER_TO_REGISTER, inst->rhs->result, - REG_RCX); + femit(context, I_MOV, REGISTER_TO_REGISTER, inst->rhs->result, REG_RCX); femit(context, I_SHR, REGISTER, inst->lhs->result); femit(context, I_MOV, REGISTER_TO_REGISTER, inst->lhs->result, inst->result); break; case IR_SAR: - femit(context, I_MOV, REGISTER_TO_REGISTER, inst->rhs->result, - REG_RCX); + femit(context, I_MOV, REGISTER_TO_REGISTER, inst->rhs->result, REG_RCX); femit(context, I_SAR, REGISTER, inst->lhs->result); femit(context, I_MOV, REGISTER_TO_REGISTER, inst->lhs->result, inst->result); break; diff --git a/tst/tests/byte.un b/tst/tests/byte.un index 8d8aab31e..c5a7c9f48 100644 --- a/tst/tests/byte.un +++ b/tst/tests/byte.un @@ -1,5 +1,5 @@ -; SKIP ; 144 a : byte = 200 -a + a \ No newline at end of file +a := a + a +a \ No newline at end of file From 9a013f3ea86de4ce802ff173632ea1e6bda86bad Mon Sep 17 00:00:00 2001 From: Lens Date: Sat, 21 Jan 2023 17:49:40 -0800 Subject: [PATCH 04/97] [Bug] Identify bug with binary operator type --- TODO.md | 1 + tst/tests/byte.un | 3 +-- tst/tests/byte2.un | 5 +++++ 3 files changed, 7 insertions(+), 2 deletions(-) create mode 100644 tst/tests/byte2.un diff --git a/TODO.md b/TODO.md index 40dba8ce4..b6cd1e89c 100644 --- a/TODO.md +++ b/TODO.md @@ -43,6 +43,7 @@ - [ ] Actually implementing casts. - [ ] During codegen, we should actually output `zext`/`sext` if needed. Otherwise truncation is automatic. - [ ] Update IR parser + - [ ] Binary operators need to pick return type instead of strictly returning `integer` - [ ] Arrays - [ ] Semantic analysis for static arrays. - [ ] Codegen diff --git a/tst/tests/byte.un b/tst/tests/byte.un index c5a7c9f48..6cf42ec0e 100644 --- a/tst/tests/byte.un +++ b/tst/tests/byte.un @@ -1,5 +1,4 @@ ; 144 a : byte = 200 -a := a + a -a \ No newline at end of file +a + a diff --git a/tst/tests/byte2.un b/tst/tests/byte2.un new file mode 100644 index 000000000..c5a7c9f48 --- /dev/null +++ b/tst/tests/byte2.un @@ -0,0 +1,5 @@ +; 144 + +a : byte = 200 +a := a + a +a \ No newline at end of file From 583109493081d0323ba631eb365580e1d1b454d1 Mon Sep 17 00:00:00 2001 From: Lens Date: Mon, 23 Jan 2023 11:15:30 -0800 Subject: [PATCH 05/97] [Bugfix] Fix error in format string I forgot we have special ways to do that now --- src/codegen.c | 3 ++- src/codegen/x86_64/arch_x86_64.c | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/codegen.c b/src/codegen.c index c4a61e0a3..82201906c 100644 --- a/src/codegen.c +++ b/src/codegen.c @@ -322,7 +322,8 @@ static void codegen_expr(CodegenContext *ctx, Node *expr) { /// Binary expression. case NODE_BINARY: { - Node * const lhs = expr->binary.lhs, * const rhs = expr->binary.rhs; + Node *const lhs = expr->binary.lhs; + Node *const rhs = expr->binary.rhs; /// Assignment needs to be handled separately. if (expr->binary.op == TK_COLON_EQ) { diff --git a/src/codegen/x86_64/arch_x86_64.c b/src/codegen/x86_64/arch_x86_64.c index 85cd5d530..207fc53a0 100644 --- a/src/codegen/x86_64/arch_x86_64.c +++ b/src/codegen/x86_64/arch_x86_64.c @@ -204,7 +204,7 @@ static enum RegSize regsize_from_bytes(u64 bytes) { case 8: return r64; case 16: return r128; default: - ICE("Byte size can not be converted into register size on x86_64: %"PRIu64, bytes); + ICE("Byte size can not be converted into register size on x86_64: %U", bytes); break; } } From 055931c9c28e96086997be444b9e1a07c406f431 Mon Sep 17 00:00:00 2001 From: Lens Date: Fri, 27 Jan 2023 11:47:43 -0800 Subject: [PATCH 06/97] [Codegen] Handle size of type when calculating address offset --- src/codegen.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/src/codegen.c b/src/codegen.c index 82201906c..0ea38c11e 100644 --- a/src/codegen.c +++ b/src/codegen.c @@ -351,9 +351,24 @@ static void codegen_expr(CodegenContext *ctx, Node *expr) { return; } + // TODO: Just use lhs operand of subscript operator when right hand + // side is a compile-time-known zero value. + /// Emit the operands. codegen_expr(ctx, lhs); codegen_expr(ctx, rhs); + if (expr->binary.op == TK_LBRACK) { + // An array subscript needs multiplied by the sizeof the array's base type. + if (lhs->type->kind == TYPE_ARRAY) { + IRInstruction *immediate = ir_immediate(ctx, t_integer, type_sizeof(lhs->type->array.of)); + rhs->ir = ir_mul(ctx, rhs->ir, immediate); + } + // A pointer subscript needs multiplied by the sizeof the pointer's base type. + else if (lhs->type->kind == TYPE_ARRAY) { + IRInstruction *immediate = ir_immediate(ctx, t_integer, type_sizeof(lhs->type->pointer.to)); + rhs->ir = ir_mul(ctx, rhs->ir, immediate); + } + } /// Emit the binary instruction. switch (expr->binary.op) { @@ -401,9 +416,9 @@ static void codegen_expr(CodegenContext *ctx, Node *expr) { case TK_AT: /// TODO: This check for a function pointer is a bit sus. We shouldn’t /// even get here if this is actually a function pointer... - if (expr->unary.value->type->pointer.to->kind == TYPE_FUNCTION) { + if (expr->unary.value->type->pointer.to->kind == TYPE_FUNCTION) expr->ir = expr->unary.value->ir; - } else { + else { expr->ir = ir_load(ctx, expr->unary.value->ir); } return; From 08f8d36c510a1d89aa06b5c04ec2ad14dd6ea689 Mon Sep 17 00:00:00 2001 From: Lens Date: Fri, 27 Jan 2023 11:50:58 -0800 Subject: [PATCH 07/97] [Codegen] Make IR_LOAD great again (arrays! (again)) --- TODO.md | 3 ++ src/ast.h | 8 +++- src/codegen.c | 4 ++ src/codegen/intermediate_representation.c | 5 ++- src/codegen/x86_64/arch_x86_64.c | 45 ++++++++++++++++++----- tst/tests/arrays.un | 1 - tst/tests/byte.un | 3 ++ tst/tests/local_arrays.un | 29 +++++++++++++++ 8 files changed, 84 insertions(+), 14 deletions(-) create mode 100644 tst/tests/local_arrays.un diff --git a/TODO.md b/TODO.md index b6cd1e89c..2fa7042e0 100644 --- a/TODO.md +++ b/TODO.md @@ -10,6 +10,9 @@ - [ ] Function bodies must be blocks or preceded by `=`. - [ ] `ext` functions may have a body. - [ ] Implicit cast sema pass: `a + b` where `a` is a `byte` and `b` an `integer` -> `(a as integer) + b` +- [ ] Optimisation + - [ ] Zero subscript still does add/multiply when it doesn't need to + - [ ] Eliminate unused parameters (they are currently allocated registers) - [ ] Attributes - [ ] Parsing - [ ] `[[noreturn]]` diff --git a/src/ast.h b/src/ast.h index c95cec30d..fc82bfcfe 100644 --- a/src/ast.h +++ b/src/ast.h @@ -514,9 +514,11 @@ Type *ast_make_type_function( /// =========================================================================== /// Get a string representation of a type. /// \return The string representation of the type. The string is allocated -/// as if with `malloc` and must be freed by the caller. +/// with malloc() and must be freed by the caller. string typename(Type *type, bool colour); +// FIXME: I don't know what canonical means, and this docstring doesn't +// help me :P /// Get the canonical type of a type. /// \return NULL if the type is incomplete. Type *type_canonical(Type *type); @@ -526,13 +528,15 @@ Type *type_canonical(Type *type); /// This function strips nested named types until there is only one left. Type *type_last_alias(Type *type); +// FIXME: What makes a type complete vs incomplete, in the eyes of this +// function? /// Check if a type is incomplete. bool type_is_incomplete(Type *type); /// Check if a canonical type is incomplete. bool type_is_incomplete_canon(Type *type); -/// Get the size of a type. +/// Get the size of a type, in bytes. usz type_sizeof(Type *type); /// Check if a type is void. diff --git a/src/codegen.c b/src/codegen.c index 0ea38c11e..fac78ad87 100644 --- a/src/codegen.c +++ b/src/codegen.c @@ -446,6 +446,10 @@ static void codegen_expr(CodegenContext *ctx, Node *expr) { /// Variable reference. case NODE_VARIABLE_REFERENCE: expr->ir = ir_load(ctx, expr->var->val.node->ir); + // TODO: Be smarter about when an array should decay to a pointer or not. + // Maybe it never should, and this should be implemented per backend? + if (expr->ir->type->kind == TYPE_ARRAY) + expr->ir->type = ast_make_type_pointer(ctx->ast, expr->type->source_location, expr->type->array.of); return; /// Function reference. These should have all been removed by the semantic analyser. diff --git a/src/codegen/intermediate_representation.c b/src/codegen/intermediate_representation.c index 1a3042a99..8d81f0ad2 100644 --- a/src/codegen/intermediate_representation.c +++ b/src/codegen/intermediate_representation.c @@ -1,5 +1,7 @@ -#include #include + +#include +#include #include #include @@ -483,7 +485,6 @@ IRInstruction *ir_load INSTRUCTION(load, IR_LOAD); load->operand = address; - // TODO: Is this right? load->type = address->type; mark_used(address, load); diff --git a/src/codegen/x86_64/arch_x86_64.c b/src/codegen/x86_64/arch_x86_64.c index 207fc53a0..842a8d2f5 100644 --- a/src/codegen/x86_64/arch_x86_64.c +++ b/src/codegen/x86_64/arch_x86_64.c @@ -1206,26 +1206,53 @@ static void emit_instruction(CodegenContext *context, IRInstruction *inst) { // TODO: Handle size of type and stuff /// Load from a static variable. if (inst->operand->kind == IR_STATIC_REF) { - enum RegSize size = regsize_from_bytes(type_sizeof(inst->operand->type)); + enum RegSize size = -1; + // TODO: Should this array to pointer decay happen here? Or higher up in codegen? + // TODO: type_sizeof(t_pointer) or something to load a pointer sized thing. + // WE SHOULD NOT USE t_integer here!! + if (inst->operand->type->kind == TYPE_ARRAY) size = regsize_from_bytes(type_sizeof(t_integer)); + else size = regsize_from_bytes(type_sizeof(inst->operand->type)); if (size == r8 || size == r16) femit(context, I_XOR, REGISTER_TO_REGISTER, inst->result, inst->result); - femit(context, I_MOV, NAME_TO_REGISTER, REG_RIP, inst->operand->static_ref->name.data, - inst->result, size); + if (inst->operand->type->kind == TYPE_ARRAY) + femit(context, I_LEA, NAME_TO_REGISTER, REG_RIP, inst->operand->static_ref->name.data, + inst->result, size); + else + femit(context, I_MOV, NAME_TO_REGISTER, REG_RIP, inst->operand->static_ref->name.data, + inst->result, size); } /// Load from a local. else if (inst->operand->kind == IR_ALLOCA) { - enum RegSize size = regsize_from_bytes(inst->operand->alloca.size); + enum RegSize size = -1; + // TODO: Should this array to pointer decay happen here? Or higher up in codegen? + // TODO: type_sizeof(t_pointer) or something to load a pointer sized thing. + // WE SHOULD NOT USE t_integer here!! + if (inst->operand->type->kind == TYPE_ARRAY) size = regsize_from_bytes(type_sizeof(t_integer)); + else size = regsize_from_bytes(inst->operand->alloca.size); if (size == r8 || size == r16) femit(context, I_XOR, REGISTER_TO_REGISTER, inst->result, inst->result); - femit(context, I_MOV, MEMORY_TO_REGISTER, - REG_RBP, (int64_t)-inst->operand->alloca.offset, inst->result, size); + if (inst->operand->type->kind == TYPE_ARRAY) + femit(context, I_LEA, MEMORY_TO_REGISTER, + REG_RBP, (int64_t)-inst->operand->alloca.offset, inst->result, size); + else + femit(context, I_MOV, MEMORY_TO_REGISTER, + REG_RBP, (int64_t)-inst->operand->alloca.offset, inst->result, size); } /// Load from a pointer else { + enum RegSize size = -1; + // TODO: Should this array to pointer decay happen here? Or higher up in codegen? + // TODO: type_sizeof(t_pointer) or something to load a pointer sized thing. + // WE SHOULD NOT USE t_integer here!! + if (inst->operand->type->kind == TYPE_ARRAY) size = regsize_from_bytes(type_sizeof(t_integer)); // TODO: Is this right? Do we need to get size of pointed to type? - enum RegSize size = regsize_from_bytes(type_sizeof(inst->operand->type)); - femit(context, I_MOV, MEMORY_TO_REGISTER, inst->operand->result, (int64_t)0, - inst->result, size); + else size = regsize_from_bytes(type_sizeof(inst->operand->type)); + if (inst->operand->type->kind == TYPE_ARRAY) + femit(context, I_LEA, MEMORY_TO_REGISTER, inst->operand->result, (int64_t)0, + inst->result, size); + else + femit(context, I_MOV, MEMORY_TO_REGISTER, inst->operand->result, (int64_t)0, + inst->result, size); } break; diff --git a/tst/tests/arrays.un b/tst/tests/arrays.un index 54d65518f..6ad920079 100644 --- a/tst/tests/arrays.un +++ b/tst/tests/arrays.un @@ -1,4 +1,3 @@ -; SKIP ; 69 ;; This is an array declaration. diff --git a/tst/tests/byte.un b/tst/tests/byte.un index 6cf42ec0e..9f4999c8d 100644 --- a/tst/tests/byte.un +++ b/tst/tests/byte.un @@ -2,3 +2,6 @@ a : byte = 200 a + a + +; NOTE: Currently fails because `+` returns a t_integer, which is 8 bytes. +; Return status is (errantly?) 400 diff --git a/tst/tests/local_arrays.un b/tst/tests/local_arrays.un new file mode 100644 index 000000000..e8d4c6dbd --- /dev/null +++ b/tst/tests/local_arrays.un @@ -0,0 +1,29 @@ +; 69 + +foo : integer() { + ;; This is an array declaration. + ;; It has a base type of "integer" and a capacity of "4". + ;; Memory is allocated in the executable for the entire array, either + ;; in stack space, or in the `.data` section. + int_array : integer[4] + + ;; To get the memory address of any specific item in the array, use the + ;; square bracket array index operator. + first_int_pointer : @integer = int_array[0] + + ;; To access the value of an item in the array, and not the address, + ;; dereference the address returned by array index operator. + first_int : integer = @int_array[0] + + ;; To reassign, use the dereference operator on the memory address + ;; you'd like to write to. + @int_array[0] := 69 + @int_array[1] := 420 + @int_array[2] := 69 + @int_array[3] := 420 + + ;; Return the third integer in the array. + @int_array[2] +} + +foo() From 2578acc1f677a20ff1eb93f4c83475076f710fde Mon Sep 17 00:00:00 2001 From: Lens Date: Fri, 27 Jan 2023 11:51:34 -0800 Subject: [PATCH 08/97] [x86_64/Bugfix] Missing register size parameter in femit --- src/codegen/x86_64/arch_x86_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/codegen/x86_64/arch_x86_64.c b/src/codegen/x86_64/arch_x86_64.c index 842a8d2f5..b507a4898 100644 --- a/src/codegen/x86_64/arch_x86_64.c +++ b/src/codegen/x86_64/arch_x86_64.c @@ -1065,7 +1065,7 @@ static void emit_instruction(CodegenContext *context, IRInstruction *inst) { } // Align stack pointer before call, if necessary. if (regs_pushed_count & 0b1) { - femit(context, I_SUB, IMMEDIATE_TO_REGISTER, (int64_t)8, REG_RSP); + femit(context, I_SUB, IMMEDIATE_TO_REGISTER, (int64_t)8, REG_RSP, r64); } for (Register i = REG_RAX + 1; i < sizeof(func_regs) * 8; ++i) { if (func_regs & (1 << i) && is_caller_saved(i)) { From 89337aea5dbff45614adbe460a299c4348b28aac Mon Sep 17 00:00:00 2001 From: Lens Date: Fri, 27 Jan 2023 11:51:59 -0800 Subject: [PATCH 09/97] [MSWIN/Bugfix] Handle error of `SymFromAddr()` --- src/platform.c | 73 +++++++++++++++++++++++++++++++------------------- 1 file changed, 46 insertions(+), 27 deletions(-) diff --git a/src/platform.c b/src/platform.c index 090dd1366..75b56f252 100644 --- a/src/platform.c +++ b/src/platform.c @@ -19,6 +19,7 @@ #include #include +#include #include #include #include @@ -187,37 +188,45 @@ void platform_print_backtrace(int ignore) { #else typedef BOOL SymInitializeFunc( - _In_ HANDLE hProcess, - _In_ PCSTR UserSearchPath, - _In_ BOOL fInvadeProcess + _In_ HANDLE hProcess, + _In_ PCSTR UserSearchPath, + _In_ BOOL fInvadeProcess ); typedef BOOL SymFromAddrFunc( - _In_ HANDLE hProcess, - _In_ DWORD64 Address, - _Out_ PDWORD64 Displacement, - _Inout_ PSYMBOL_INFO Symbol + _In_ HANDLE hProcess, + _In_ DWORD64 Address, + _Out_ PDWORD64 Displacement, + _Inout_ PSYMBOL_INFO Symbol ); typedef BOOL SymGetLineFromAddr64Func( - _In_ HANDLE hProcess, - _In_ DWORD64 qwAddr, - _Out_ PDWORD pdwDisplacement, - _Out_ PIMAGEHLP_LINE64 Line64 + _In_ HANDLE hProcess, + _In_ DWORD64 qwAddr, + _Out_ PDWORD pdwDisplacement, + _Out_ PIMAGEHLP_LINE64 Line64 + ); + + typedef DWORD SymSetOptionsFunc( + _In_ DWORD SymOptions ); /// Get the stacktrace. - void* stack[100]; + void *stack[100] = { 0 }; WORD frames = CaptureStackBackTrace(ignore, 100, stack, NULL); + if (!frames) { + eprint(" Could not capture backtrace\n"); + return; + } /// Load DbgHelp.dll. HMODULE dbghelp = LoadLibrary(TEXT("DbgHelp.dll")); if (!dbghelp) { - /// Loading failed. Print just the addresses. print_raw: + /// Loading failed. Print just the addresses. eprint(" Cannot obtain symbols from backtrace: Could not load DbgHelp.dll\n"); for (WORD i = 0; i < frames; i++) - eprint(" at address %p", stack[i]); + eprint(" at address %p\n", stack[i]); return; } @@ -227,19 +236,30 @@ void platform_print_backtrace(int ignore) { SymFromAddrFunc *SymFromAddr = (SymFromAddrFunc*)GetProcAddress(dbghelp, "SymFromAddr"); if (!SymFromAddr) goto print_raw; + SymSetOptionsFunc *SymSetOptions = (SymSetOptionsFunc*)GetProcAddress(dbghelp, "SymSetOptions"); + if (!SymSetOptions) goto print_raw; + SymGetLineFromAddr64Func *SymGetLineFromAddr64 = (SymGetLineFromAddr64Func*)GetProcAddress(dbghelp, "SymGetLineFromAddr64"); HANDLE process = GetCurrentProcess(); - SymInitialize(process, NULL, TRUE); + if (!process) goto print_raw; + SymSetOptions(SYMOPT_UNDNAME | SYMOPT_DEFERRED_LOADS | SYMOPT_LOAD_LINES); + if (!SymInitialize(process, NULL, TRUE)) goto print_raw; - - char* symbol_info_data[sizeof(SYMBOL_INFO) + 256 * sizeof(char)]; - SYMBOL_INFO* symbol = (SYMBOL_INFO*)symbol_info_data; - symbol->MaxNameLen = 255; + char symbol_info_data[sizeof(SYMBOL_INFO) + 256 * sizeof(char)]; + memset(symbol_info_data, 0, sizeof symbol_info_data); + PSYMBOL_INFO symbol = (PSYMBOL_INFO)symbol_info_data; symbol->SizeOfStruct = sizeof(SYMBOL_INFO); - - for (int i = 0; i < frames; i++) { - SymFromAddr(process, (DWORD64)(stack[i]), 0, symbol); + symbol->MaxNameLen = 255; + + DWORD64 displacement64 = 0; + for (WORD i = 0; i < frames; ++i) { + DWORD64 frame_addr = (DWORD64)stack[i]; + if (!SymFromAddr(process, frame_addr, &displacement64, symbol)) { + fprintf(stderr, "Could not print stackframe: SymFromAddr returned false (err code %lu)!\n", GetLastError()); + //break; + continue; + } /// Attempt to get the line from the address. IMAGEHLP_LINE64 line = { 0 }; @@ -247,16 +267,15 @@ void platform_print_backtrace(int ignore) { DWORD displacement = 0; bool have_line = false; if (SymGetLineFromAddr64) { - have_line = SymGetLineFromAddr64(process, (DWORD64)(stack[i]), &displacement, &line); + have_line = SymGetLineFromAddr64(process, frame_addr, &displacement, &line); } - if (have_line) { - eprint(" in function %s%s():%D%s\n", + fprintf(stderr, " in function %s%s():%ld%s\n", term ? "\033[m\033[1;38m" : "", symbol->Name, line.LineNumber, term ? "\033[m" : ""); } else { - eprint(" in function %s%s()%s at offset %s%X%s\n", + fprintf(stderr, " in function %s%s()%s at offset %s%"PRIx64"%s\n", term ? "\033[m\033[1;38m" : "", symbol->Name, term ? "\033[m" : "", - term ? "\033[m\033[1;38m" : "", (u64)symbol->Address, term ? "\033[m" : ""); + term ? "\033[m\033[1;38m" : "", symbol->Address, term ? "\033[m" : ""); } if (strcmp(symbol->Name, "main") == 0) break; From 9fa948596619cd603db6c27e564b5abeecaf9e3b Mon Sep 17 00:00:00 2001 From: Lens Date: Fri, 27 Jan 2023 12:16:42 -0800 Subject: [PATCH 10/97] [Sema] Handle some basic type casting cases in typechecker --- src/codegen.c | 11 ++++++++++- src/typechecker.c | 31 +++++++++++++++++++++++++++++-- tst/tests/cast.un | 7 ++----- tst/tests/cast_from_incomplete.un | 5 +++++ tst/tests/cast_to_incomplete.un | 5 +++++ 5 files changed, 51 insertions(+), 8 deletions(-) create mode 100644 tst/tests/cast_from_incomplete.un create mode 100644 tst/tests/cast_to_incomplete.un diff --git a/src/codegen.c b/src/codegen.c index fac78ad87..123292349 100644 --- a/src/codegen.c +++ b/src/codegen.c @@ -1,5 +1,6 @@ #include +#include #include #include #include @@ -318,7 +319,15 @@ static void codegen_expr(CodegenContext *ctx, Node *expr) { } /// Typecast. - case NODE_CAST: { TODO(); } + case NODE_CAST: { + Type *t_to = expr->type; + Type *t_from = expr->cast.value->type; + + usz to_sz = type_sizeof(t_to); + usz from_sz = type_sizeof(t_from); + + TODO("Codegen cast from %T to %T", t_from, t_to); + } /// Binary expression. case NODE_BINARY: { diff --git a/src/typechecker.c b/src/typechecker.c index 460b3564c..514858ee8 100644 --- a/src/typechecker.c +++ b/src/typechecker.c @@ -102,7 +102,6 @@ NODISCARD static bool types_equal(Type *a, Type *b) { /// Check if a canonical type is an integer type. NODISCARD static bool is_integer_canon(Type *t) { - /// Currently, all primitive types are integers. return t == t_integer || t == t_integer_literal || t == t_byte; } @@ -890,7 +889,35 @@ NODISCARD bool typecheck_expression(AST *ast, Node *expr) { } break; /// Make sure a cast is even possible. - case NODE_CAST: TODO(); + case NODE_CAST: { + Type *t_to = expr->type; + if (type_is_incomplete(t_to)) + ERR(t_to->source_location, "Can not cast to incomplete type %T", t_to); + + if (!typecheck_expression(ast, expr->cast.value)) + return false; + + Type *t_from = expr->cast.value->type; + + // TODO: What do we do when from and to types are equal? + + if (type_is_incomplete(t_from)) + ERR(expr->cast.value->source_location, "Can not cast from an incomplete type %T", t_from); + + // FROM any integer type TO any integer type is ALLOWED + if (is_integer(t_from) && is_integer(t_to)) break; + + // FROM any pointer type TO any integer type is ALLOWED + if (is_pointer(t_from) && is_integer(t_to)) break; + + // FROM any integer type TO any pointer type is DISALLOWED + if (is_integer(t_from) && is_pointer(t_to)) + ERR(expr->source_location, "Can not cast from integer type %T to pointer type %T", t_from, t_to); + + // TODO: arrays, functions, function pointers... + + TODO(); + } /// Binary expression. This is a complicated one. case NODE_BINARY: { diff --git a/tst/tests/cast.un b/tst/tests/cast.un index 57de4db79..880035951 100644 --- a/tst/tests/cast.un +++ b/tst/tests/cast.un @@ -1,5 +1,2 @@ -; SKIP -; 1 - -a : integer -1 < &a as integer \ No newline at end of file +; 144 +400 as byte diff --git a/tst/tests/cast_from_incomplete.un b/tst/tests/cast_from_incomplete.un new file mode 100644 index 000000000..d9bd8fb88 --- /dev/null +++ b/tst/tests/cast_from_incomplete.un @@ -0,0 +1,5 @@ +; ERROR + +a : void + +a as byte diff --git a/tst/tests/cast_to_incomplete.un b/tst/tests/cast_to_incomplete.un new file mode 100644 index 000000000..f4b526c9b --- /dev/null +++ b/tst/tests/cast_to_incomplete.un @@ -0,0 +1,5 @@ +; ERROR + +a : byte + +a as void From 3457d53f65798b8df6f824d181b7ab712a836aa7 Mon Sep 17 00:00:00 2001 From: Lens Date: Fri, 27 Jan 2023 12:23:14 -0800 Subject: [PATCH 11/97] [Typo] Forgetting a newline is bad, mmkay? --- tst/tests/cast.un | 1 + 1 file changed, 1 insertion(+) diff --git a/tst/tests/cast.un b/tst/tests/cast.un index 880035951..4ffc20a83 100644 --- a/tst/tests/cast.un +++ b/tst/tests/cast.un @@ -1,2 +1,3 @@ ; 144 + 400 as byte From b5b3183fd40408e865507602fb900b48e2320a82 Mon Sep 17 00:00:00 2001 From: Lens Date: Sun, 29 Jan 2023 13:02:54 -0800 Subject: [PATCH 12/97] [Minor] Fix horribly phrased comment --- src/codegen/intermediate_representation.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/codegen/intermediate_representation.c b/src/codegen/intermediate_representation.c index 8d81f0ad2..78c013ace 100644 --- a/src/codegen/intermediate_representation.c +++ b/src/codegen/intermediate_representation.c @@ -263,7 +263,7 @@ void ir_femit_instruction ICE("Invalid IRType %d\n", inst->kind); } - // Print type that the value this instruction returns is of. + // Print type of instruction. if (inst->type) fprint(file, " %31| %T", inst->type); #ifdef DEBUG_USES From 8a940372635bb3cbc4a7d1f6dc2f838d731cfddd Mon Sep 17 00:00:00 2001 From: Lens Date: Sun, 29 Jan 2023 13:06:28 -0800 Subject: [PATCH 13/97] [Docs] Better comments regarding type helpers in `ast.h` --- src/ast.h | 35 ++++++++++++++++++++++++++++------- 1 file changed, 28 insertions(+), 7 deletions(-) diff --git a/src/ast.h b/src/ast.h index fc82bfcfe..090d7a70d 100644 --- a/src/ast.h +++ b/src/ast.h @@ -517,10 +517,19 @@ Type *ast_make_type_function( /// with malloc() and must be freed by the caller. string typename(Type *type, bool colour); -// FIXME: I don't know what canonical means, and this docstring doesn't -// help me :P -/// Get the canonical type of a type. -/// \return NULL if the type is incomplete. +/** + * Get the canonical type of a type. + * + * The ‘canonical’ type of a type T is T stripped of any aliases. E.g. in C + * typedef int foo; + * typedef foo bar + * The canonical type of bar would be int. We need this because, + * currently, builtin types are just named types (i.e. typedefs) that + * refer to the actual primitive types, and if we ever introduce + * something like typedef, it will just work out of the box. + * + * \return NULL if the type is incomplete. + */ Type *type_canonical(Type *type); /// Get the last alias of a type. @@ -528,9 +537,21 @@ Type *type_canonical(Type *type); /// This function strips nested named types until there is only one left. Type *type_last_alias(Type *type); -// FIXME: What makes a type complete vs incomplete, in the eyes of this -// function? -/// Check if a type is incomplete. +/** Check if a type is incomplete. + * + * A type T is incomplete, iff + * - the canonical type of T is void, or + * - T has no canonical type. (e.g. if we have @foo, but foo is never + * defined, then foo (which is parsed as a named type) is incomplete + * because it has no canonical type. + * + * Basically, ‘incomplete’ means that we don’t know its size/alignment + * and therefore, we can’t allocate a variable of that type. + * + * void is a special case because it is purposefully incomplete. + * + * \return true iff the type is incomplete. + */ bool type_is_incomplete(Type *type); /// Check if a canonical type is incomplete. From 61f28a8db73f2f37cfa590e2e3e8b33dbcfcde39 Mon Sep 17 00:00:00 2001 From: Lens Date: Sun, 29 Jan 2023 13:07:34 -0800 Subject: [PATCH 14/97] [Minor] Add signedness reminder to casting codegen todo --- src/codegen.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/codegen.c b/src/codegen.c index 123292349..6264ab590 100644 --- a/src/codegen.c +++ b/src/codegen.c @@ -326,6 +326,8 @@ static void codegen_expr(CodegenContext *ctx, Node *expr) { usz to_sz = type_sizeof(t_to); usz from_sz = type_sizeof(t_from); + // TODO: Take signedness into account (zext/sext). + TODO("Codegen cast from %T to %T", t_from, t_to); } From 98ae71e51bce7e273411a302e0a1bf2c049d72ea Mon Sep 17 00:00:00 2001 From: Lens Date: Sun, 29 Jan 2023 13:08:17 -0800 Subject: [PATCH 15/97] [Bugfix] Copy and paste strikes again :I --- src/codegen.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/codegen.c b/src/codegen.c index 6264ab590..efc2eb935 100644 --- a/src/codegen.c +++ b/src/codegen.c @@ -375,7 +375,7 @@ static void codegen_expr(CodegenContext *ctx, Node *expr) { rhs->ir = ir_mul(ctx, rhs->ir, immediate); } // A pointer subscript needs multiplied by the sizeof the pointer's base type. - else if (lhs->type->kind == TYPE_ARRAY) { + else if (lhs->type->kind == TYPE_POINTER) { IRInstruction *immediate = ir_immediate(ctx, t_integer, type_sizeof(lhs->type->pointer.to)); rhs->ir = ir_mul(ctx, rhs->ir, immediate); } From f06972ac0ae138bae06bd8468d6f99ec0c6fd855 Mon Sep 17 00:00:00 2001 From: Lens Date: Sun, 29 Jan 2023 13:09:35 -0800 Subject: [PATCH 16/97] [Codegen] Handle subscript operator all at once, instead of split MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I'd say it's much easier to see how subscript is handled when it isn't split between two different places :Þ --- src/codegen.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/codegen.c b/src/codegen.c index efc2eb935..a0a99d1f1 100644 --- a/src/codegen.c +++ b/src/codegen.c @@ -379,12 +379,14 @@ static void codegen_expr(CodegenContext *ctx, Node *expr) { IRInstruction *immediate = ir_immediate(ctx, t_integer, type_sizeof(lhs->type->pointer.to)); rhs->ir = ir_mul(ctx, rhs->ir, immediate); } + expr->ir = ir_add(ctx, lhs->ir, rhs->ir); + return; } /// Emit the binary instruction. switch (expr->binary.op) { default: ICE("Cannot emit binary expression of type %d", expr->binary.op); - case TK_LBRACK: expr->ir = ir_add(ctx, lhs->ir, rhs->ir); return; + case TK_LBRACK: UNREACHABLE(); case TK_LT: expr->ir = ir_lt(ctx, lhs->ir, rhs->ir); return; case TK_LE: expr->ir = ir_le(ctx, lhs->ir, rhs->ir); return; case TK_GT: expr->ir = ir_gt(ctx, lhs->ir, rhs->ir); return; From aa23eaebf64caaca3cd31b9157afce07c5c5a4e3 Mon Sep 17 00:00:00 2001 From: Lens Date: Sun, 29 Jan 2023 13:11:50 -0800 Subject: [PATCH 17/97] [Minor] Add commentary on array pointer decay codegen --- src/codegen.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/codegen.c b/src/codegen.c index a0a99d1f1..d1f7eb0ea 100644 --- a/src/codegen.c +++ b/src/codegen.c @@ -461,6 +461,8 @@ static void codegen_expr(CodegenContext *ctx, Node *expr) { expr->ir = ir_load(ctx, expr->var->val.node->ir); // TODO: Be smarter about when an array should decay to a pointer or not. // Maybe it never should, and this should be implemented per backend? + // "I’d just emit a load of the array and have the backend + // deal w/ copying 1000 ints." ~ Sirraide if (expr->ir->type->kind == TYPE_ARRAY) expr->ir->type = ast_make_type_pointer(ctx->ast, expr->type->source_location, expr->type->array.of); return; From c9be95c8a209c31f2150e44b71cc1be4e97e8bb0 Mon Sep 17 00:00:00 2001 From: Lens Date: Sun, 29 Jan 2023 13:15:02 -0800 Subject: [PATCH 18/97] [Minor] Remove unnecessary `t_void` assignments from IR helpers --- src/codegen/intermediate_representation.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/codegen/intermediate_representation.c b/src/codegen/intermediate_representation.c index 78c013ace..60d016448 100644 --- a/src/codegen/intermediate_representation.c +++ b/src/codegen/intermediate_representation.c @@ -527,7 +527,6 @@ IRInstruction *ir_store INSTRUCTION(store, IR_STORE); store->store.addr = address; store->store.value = data; - store->type = t_void; //> A store instruction does not return anything. mark_used(address, store); mark_used(data, store); INSERT(store); @@ -549,7 +548,6 @@ IRInstruction *ir_branch_conditional branch->cond_br.then = then_block; branch->cond_br.else_ = otherwise_block; - branch->type = t_void; INSERT(branch); return branch; } @@ -564,7 +562,6 @@ IRInstruction *ir_branch_into_block branch->destination_block = destination; branch->parent_block = block; list_push_back(block->instructions, branch); - branch->type = t_void; return branch; } @@ -579,7 +576,6 @@ IRInstruction *ir_branch IRInstruction *ir_return(CodegenContext *context, IRInstruction* return_value) { INSTRUCTION(branch, IR_RETURN); branch->operand = return_value; - branch->type = t_void; INSERT(branch); if (return_value) mark_used(return_value, branch); return branch; From 61d343d309d765c70347bc5ff9381bbf73847303 Mon Sep 17 00:00:00 2001 From: Lens Date: Sun, 29 Jan 2023 13:15:24 -0800 Subject: [PATCH 19/97] [Bugfix] IR type of bitwise not was incorrectly always `t_integer` --- src/codegen/intermediate_representation.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/codegen/intermediate_representation.c b/src/codegen/intermediate_representation.c index 60d016448..a1ea3abe8 100644 --- a/src/codegen/intermediate_representation.c +++ b/src/codegen/intermediate_representation.c @@ -612,8 +612,7 @@ IRInstruction *ir_not { INSTRUCTION(x, IR_NOT); x->operand = source; - // TODO: Is this right? Should we use source->type? - x->type = t_integer; + x->type = source->type; mark_used(source, x); INSERT(x); return x; From 1ad6dbf4c3b5d9a0c438654067df1b1cc07acc3e Mon Sep 17 00:00:00 2001 From: Lens Date: Sun, 29 Jan 2023 13:16:31 -0800 Subject: [PATCH 20/97] [Codegen/x86_64] Remove useless `r128` RegSize --- src/codegen/x86_64/arch_x86_64.c | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/src/codegen/x86_64/arch_x86_64.c b/src/codegen/x86_64/arch_x86_64.c index b507a4898..6698fcf58 100644 --- a/src/codegen/x86_64/arch_x86_64.c +++ b/src/codegen/x86_64/arch_x86_64.c @@ -187,7 +187,6 @@ enum Instruction { }; enum RegSize { - r128, r64, r32, r16, @@ -202,7 +201,6 @@ static enum RegSize regsize_from_bytes(u64 bytes) { case 2: return r16; case 4: return r32; case 8: return r64; - case 16: return r128; default: ICE("Byte size can not be converted into register size on x86_64: %U", bytes); break; @@ -314,9 +312,6 @@ static void femit_imm_to_reg(CodegenContext *context, enum Instruction inst, va_ const char *mnemonic = instruction_mnemonic(context, inst); const char *destination = NULL; switch (size) { - case r128: - TODO("Support 128 bit registers on x86_64..."); - break; case r64: destination = register_name(destination_register); break; case r32: destination = register_name_32(destination_register); break; case r16: destination = register_name_16(destination_register); break; @@ -370,9 +365,6 @@ static void femit_mem_to_reg(CodegenContext *context, enum Instruction inst, va_ const char *address = register_name(address_register); const char *destination = NULL; switch (size) { - case r128: - TODO("Support 128 bit registers on x86_64..."); - break; case r64: destination = register_name(destination_register); break; case r32: destination = register_name_32(destination_register); break; case r16: destination = register_name_16(destination_register); break; @@ -405,9 +397,6 @@ static void femit_name_to_reg(CodegenContext *context, enum Instruction inst, va const char *address = register_name(address_register); const char *destination = NULL; switch (size) { - case r128: - TODO("Support 128 bit registers on x86_64..."); - break; case r64: destination = register_name(destination_register); break; case r32: destination = register_name_32(destination_register); break; case r16: destination = register_name_16(destination_register); break; @@ -439,9 +428,6 @@ static void femit_reg_to_mem(CodegenContext *context, enum Instruction inst, va_ const char *mnemonic = instruction_mnemonic(context, inst); const char *source = NULL; switch (size) { - case r128: - TODO("Support 128 bit registers on x86_64..."); - break; case r64: source = register_name(source_register); break; case r32: source = register_name_32(source_register); break; case r16: source = register_name_16(source_register); break; @@ -508,9 +494,6 @@ static void femit_reg_to_name(CodegenContext *context, enum Instruction inst, va const char *mnemonic = instruction_mnemonic(context, inst); const char *source = NULL; switch (size) { - case r128: - TODO("Support 128 bit registers on x86_64..."); - break; case r64: source = register_name(source_register); break; case r32: source = register_name_32(source_register); break; case r16: source = register_name_16(source_register); break; From d82cd3def10647e005630bb3987bd53c7c4f901c Mon Sep 17 00:00:00 2001 From: Lens Date: Sun, 29 Jan 2023 13:19:38 -0800 Subject: [PATCH 21/97] [Codegen/x86_64] Don't emit xor + mov when a 32-bit mov is usable --- src/codegen/x86_64/arch_x86_64.c | 23 +++++------------------ 1 file changed, 5 insertions(+), 18 deletions(-) diff --git a/src/codegen/x86_64/arch_x86_64.c b/src/codegen/x86_64/arch_x86_64.c index 6698fcf58..54f8ea160 100644 --- a/src/codegen/x86_64/arch_x86_64.c +++ b/src/codegen/x86_64/arch_x86_64.c @@ -879,7 +879,7 @@ static RegisterDescriptor codegen_comparison // Perform the comparison. femit(cg_context, I_CMP, REGISTER_TO_REGISTER, rhs, lhs); - femit(cg_context, I_MOV, IMMEDIATE_TO_REGISTER, (int64_t)0, result, r64); + femit(cg_context, I_MOV, IMMEDIATE_TO_REGISTER, (int64_t)0, result, r32); femit(cg_context, I_SETCC, type, result); return result; @@ -983,17 +983,10 @@ static void emit_instruction(CodegenContext *context, IRInstruction *inst) { case IR_UNREACHABLE: break; case IR_IMMEDIATE: - // TODO: This probably shouldn't be done here. Do this in a pass before-hand or something. if (inst->type == t_integer_literal) { - // TODO: I don't think this is the best way of doing things. - //femit(context, I_MOV, IMMEDIATE_TO_REGISTER, inst->imm, inst->result, r64); - if (inst->imm <= UINT8_MAX) { - femit(context, I_XOR, REGISTER_TO_REGISTER, inst->result, inst->result); - femit(context, I_MOV, IMMEDIATE_TO_REGISTER, inst->imm, inst->result, r8); - } else if (inst->imm <= UINT16_MAX) { - femit(context, I_XOR, REGISTER_TO_REGISTER, inst->result, inst->result); - femit(context, I_MOV, IMMEDIATE_TO_REGISTER, inst->imm, inst->result, r16); - } else if (inst->imm <= UINT32_MAX) { + // TODO: integer_literal probably shouldn't be handled here. + // Do this in a pass before-hand or something. + if (inst->imm <= UINT32_MAX) { femit(context, I_MOV, IMMEDIATE_TO_REGISTER, inst->imm, inst->result, r32); } else if (inst->imm <= UINT64_MAX) { femit(context, I_MOV, IMMEDIATE_TO_REGISTER, inst->imm, inst->result, r64); @@ -1001,13 +994,7 @@ static void emit_instruction(CodegenContext *context, IRInstruction *inst) { ICE("Unsupported integer literal immediate on x86_64 (out of range)"); } } else { - if (type_sizeof(inst->type) == 1) { - femit(context, I_XOR, REGISTER_TO_REGISTER, inst->result, inst->result); - femit(context, I_MOV, IMMEDIATE_TO_REGISTER, inst->imm, inst->result, r8); - } else if (type_sizeof(inst->type) == 2) { - femit(context, I_XOR, REGISTER_TO_REGISTER, inst->result, inst->result); - femit(context, I_MOV, IMMEDIATE_TO_REGISTER, inst->imm, inst->result, r16); - } else if (type_sizeof(inst->type) <= 4) { + if (type_sizeof(inst->type) <= 4) { femit(context, I_MOV, IMMEDIATE_TO_REGISTER, inst->imm, inst->result, r32); } else if (type_sizeof(inst->type) <= 8) { femit(context, I_MOV, IMMEDIATE_TO_REGISTER, inst->imm, inst->result, r64); From 9b30942185582a6b56bff0d537b716e7130e6b02 Mon Sep 17 00:00:00 2001 From: Lens Date: Sun, 29 Jan 2023 14:39:21 -0800 Subject: [PATCH 22/97] [Codegen/IR] Make variable allocation instructions pointers to type This feels like too much bodge; I'm sure there is some refactor that will make this more elegant, but this gets the same amount of tests passing and failing, as well as the same tests. --- TODO.md | 2 ++ src/codegen/intermediate_representation.c | 8 +++++--- src/codegen/x86_64/arch_x86_64.c | 15 +++++++++------ 3 files changed, 16 insertions(+), 9 deletions(-) diff --git a/TODO.md b/TODO.md index 2fa7042e0..7a2cd19a9 100644 --- a/TODO.md +++ b/TODO.md @@ -10,6 +10,7 @@ - [ ] Function bodies must be blocks or preceded by `=`. - [ ] `ext` functions may have a body. - [ ] Implicit cast sema pass: `a + b` where `a` is a `byte` and `b` an `integer` -> `(a as integer) + b` + - [ ] Testing Framework: Add ability to only show failing tests - [ ] Optimisation - [ ] Zero subscript still does add/multiply when it doesn't need to - [ ] Eliminate unused parameters (they are currently allocated registers) @@ -74,6 +75,7 @@ - [ ] `type` keyword in the parser/grammar. - [ ] Make sure nested structs work. - [ ] Arbitrary compile-time struct literals. + - [ ] Syntax? - [ ] Structs as parameters. - [ ] Structs as return values. - [ ] Modules. diff --git a/src/codegen/intermediate_representation.c b/src/codegen/intermediate_representation.c index a1ea3abe8..60b2d68eb 100644 --- a/src/codegen/intermediate_representation.c +++ b/src/codegen/intermediate_representation.c @@ -638,13 +638,15 @@ IRInstruction *ir_create_static v->name = string_dup(name); v->type = type; v->cached_size = type_sizeof(type); - v->cached_alignment = 8; /// TODO. + // TODO: Don't just use 8 byte alignment for every static. + // Should we just use the nearest larger (or equal) power of two, in the generic case? + v->cached_alignment = 8; vector_push(context->static_vars, v); /// Create an instruction to reference it and return it. INSTRUCTION(ref, IR_STATIC_REF); ref->static_ref = v; - ref->type = v->type; + ref->type = ast_make_type_pointer(context->ast, v->type->source_location, v->type); v->reference = ref; INSERT(ref); return ref; @@ -657,7 +659,7 @@ IRInstruction *ir_stack_allocate { INSTRUCTION(alloca, IR_ALLOCA); alloca->alloca.size = type_sizeof(type); - alloca->type = type; + alloca->type = ast_make_type_pointer(context->ast, type->source_location, type); INSERT(alloca); return alloca; } diff --git a/src/codegen/x86_64/arch_x86_64.c b/src/codegen/x86_64/arch_x86_64.c index 54f8ea160..d7b8f9c12 100644 --- a/src/codegen/x86_64/arch_x86_64.c +++ b/src/codegen/x86_64/arch_x86_64.c @@ -1,5 +1,6 @@ #include +#include #include #include #include @@ -1180,10 +1181,11 @@ static void emit_instruction(CodegenContext *context, IRInstruction *inst) { // TODO: Should this array to pointer decay happen here? Or higher up in codegen? // TODO: type_sizeof(t_pointer) or something to load a pointer sized thing. // WE SHOULD NOT USE t_integer here!! - if (inst->operand->type->kind == TYPE_ARRAY) size = regsize_from_bytes(type_sizeof(t_integer)); + if (inst->operand->type->kind == TYPE_ARRAY || inst->operand->type->pointer.to->kind == TYPE_ARRAY) + size = regsize_from_bytes(type_sizeof(t_integer)); else size = regsize_from_bytes(type_sizeof(inst->operand->type)); - if (size == r8 || size == r16) femit(context, I_XOR, REGISTER_TO_REGISTER, inst->result, inst->result); - if (inst->operand->type->kind == TYPE_ARRAY) + if (size == r8 || size == r16) size = r32; + if (inst->operand->type->kind == TYPE_ARRAY || inst->operand->type->pointer.to->kind == TYPE_ARRAY) femit(context, I_LEA, NAME_TO_REGISTER, REG_RIP, inst->operand->static_ref->name.data, inst->result, size); else @@ -1197,10 +1199,11 @@ static void emit_instruction(CodegenContext *context, IRInstruction *inst) { // TODO: Should this array to pointer decay happen here? Or higher up in codegen? // TODO: type_sizeof(t_pointer) or something to load a pointer sized thing. // WE SHOULD NOT USE t_integer here!! - if (inst->operand->type->kind == TYPE_ARRAY) size = regsize_from_bytes(type_sizeof(t_integer)); + if (inst->operand->type->kind == TYPE_ARRAY || inst->operand->type->pointer.to->kind == TYPE_ARRAY) + size = regsize_from_bytes(type_sizeof(t_integer)); else size = regsize_from_bytes(inst->operand->alloca.size); - if (size == r8 || size == r16) femit(context, I_XOR, REGISTER_TO_REGISTER, inst->result, inst->result); - if (inst->operand->type->kind == TYPE_ARRAY) + if (size == r8 || size == r16) size = r32; + if (inst->operand->type->kind == TYPE_ARRAY || inst->operand->type->pointer.to->kind == TYPE_ARRAY) femit(context, I_LEA, MEMORY_TO_REGISTER, REG_RBP, (int64_t)-inst->operand->alloca.offset, inst->result, size); else From 20e42a0904f05d7e8e277a140f017feff568907f Mon Sep 17 00:00:00 2001 From: Lens Date: Sun, 29 Jan 2023 14:55:51 -0800 Subject: [PATCH 23/97] [Sema/Minor] Some updates to typecasting --- src/typechecker.c | 34 ++++++++++++++++++++++------------ 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/src/typechecker.c b/src/typechecker.c index 514858ee8..d13e53203 100644 --- a/src/typechecker.c +++ b/src/typechecker.c @@ -899,24 +899,34 @@ NODISCARD bool typecheck_expression(AST *ast, Node *expr) { Type *t_from = expr->cast.value->type; - // TODO: What do we do when from and to types are equal? - - if (type_is_incomplete(t_from)) - ERR(expr->cast.value->source_location, "Can not cast from an incomplete type %T", t_from); - - // FROM any integer type TO any integer type is ALLOWED - if (is_integer(t_from) && is_integer(t_to)) break; - - // FROM any pointer type TO any integer type is ALLOWED + // TODO: Is complete to incomplete allowed? + + // ALLOWED + // FROM any type T TO type T + if (types_equal(t_to, t_from)) break; + // FROM any pointer type TO any pointer type + if (is_pointer(t_from) && is_pointer(t_to)) break; + // FROM any pointer type TO any integer type if (is_pointer(t_from) && is_integer(t_to)) break; + // FROM any integer type TO any integer type + if (is_integer(t_from) && is_integer(t_to)) break; - // FROM any integer type TO any pointer type is DISALLOWED + // DISALLOWED + // FROM any integer type TO any pointer type is currently DISALLOWED, but very well may change if (is_integer(t_from) && is_pointer(t_to)) - ERR(expr->source_location, "Can not cast from integer type %T to pointer type %T", t_from, t_to); + ERR(expr->cast.value->source_location, + "Can not cast from an integer type %T to pointer type %T", + t_from, t_to); + + // FROM any incomplete type TO any complete type is DISALLOWED + if (type_is_incomplete(t_from) && !type_is_incomplete(t_to)) + ERR(expr->cast.value->source_location, + "Can not cast from an incomplete type %T to a complete type %T", + t_from, t_to); // TODO: arrays, functions, function pointers... - TODO(); + TODO("Casting from %T to %T is currently not supported by the typechecker, sorry", t_from, t_to); } /// Binary expression. This is a complicated one. From 3ad239267d59b0e2bdba9634286c45f68ead5b78 Mon Sep 17 00:00:00 2001 From: Lens Date: Sun, 29 Jan 2023 14:57:44 -0800 Subject: [PATCH 24/97] [TODO] `as void` discard cast --- TODO.md | 1 + 1 file changed, 1 insertion(+) diff --git a/TODO.md b/TODO.md index 7a2cd19a9..8f68ee3ae 100644 --- a/TODO.md +++ b/TODO.md @@ -11,6 +11,7 @@ - [ ] `ext` functions may have a body. - [ ] Implicit cast sema pass: `a + b` where `a` is a `byte` and `b` an `integer` -> `(a as integer) + b` - [ ] Testing Framework: Add ability to only show failing tests + - [ ] `x as void` used to discard `x` - [ ] Optimisation - [ ] Zero subscript still does add/multiply when it doesn't need to - [ ] Eliminate unused parameters (they are currently allocated registers) From e7d77a708122b03762c40e9960c5acb8d649f09d Mon Sep 17 00:00:00 2001 From: Lens Date: Sun, 29 Jan 2023 14:58:05 -0800 Subject: [PATCH 25/97] [Tests] Lines with trailing whitespace keep me up at night --- tst/main.a68 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tst/main.a68 b/tst/main.a68 index 2d76f8351..b04e6437a 100755 --- a/tst/main.a68 +++ b/tst/main.a68 @@ -1,4 +1,4 @@ -#!/usr/bin/a68g --script +#!/usr/bin/a68g --script PROC platform is windows = BOOL: get env ("WINDIR") NE ""; STRING platform dir sep = (platform is windows | "\" | "/" ); @@ -246,7 +246,7 @@ BEGIN get (f, (str, new line)); str := str[3..] CO If we have labels, and this test doesn’t, skip it CO - ELIF labels NE "" THEN + ELIF labels NE "" THEN GO TO return FI; From b4af7c94512537ab69e2efcf780765249fa3afaf Mon Sep 17 00:00:00 2001 From: Lens Date: Mon, 30 Jan 2023 10:21:12 -0800 Subject: [PATCH 26/97] [Codegen/x86_64] Update incorrect comments regarding var. args --- src/codegen/x86_64/arch_x86_64.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/codegen/x86_64/arch_x86_64.c b/src/codegen/x86_64/arch_x86_64.c index d7b8f9c12..ecaac5eb9 100644 --- a/src/codegen/x86_64/arch_x86_64.c +++ b/src/codegen/x86_64/arch_x86_64.c @@ -208,6 +208,7 @@ static enum RegSize regsize_from_bytes(u64 bytes) { } } +// TODO: Pass necessary RegSize in more of these cases enum InstructionOperands_x86_64 { IMMEDIATE, ///< int64_t imm MEMORY, ///< Reg reg, int64_t offset @@ -216,11 +217,11 @@ enum InstructionOperands_x86_64 { IMMEDIATE_TO_REGISTER, ///< int64_t imm, Reg dest, RegSize size IMMEDIATE_TO_MEMORY, ///< int64_t imm, Reg address, int64_t offset - MEMORY_TO_REGISTER, ///< Reg address, int64_t offset, Reg dest - NAME_TO_REGISTER, ///< Reg address, const char* name, Reg dest - REGISTER_TO_MEMORY, ///< Reg src, Reg address, int64_t offset + MEMORY_TO_REGISTER, ///< Reg address, int64_t offset, Reg dest, RegSize size + NAME_TO_REGISTER, ///< Reg address, const char* name, Reg dest, RegSize size + REGISTER_TO_MEMORY, ///< Reg src, RegSize size, Reg address, int64_t offset REGISTER_TO_REGISTER, ///< Reg src, Reg dest - REGISTER_TO_NAME, ///< Reg src, Reg address, const char* name + REGISTER_TO_NAME, ///< Reg src, RegSize size, Reg address, const char* name }; const char *setcc_suffixes_x86_64[COMPARE_COUNT] = { From 8013c43522b629642cfec30e7a5815d0b42c5f4a Mon Sep 17 00:00:00 2001 From: Lens Date: Mon, 30 Jan 2023 10:27:53 -0800 Subject: [PATCH 27/97] [Codegen/x86_64] Mention `movzx` instruction where applicable --- src/codegen/x86_64/arch_x86_64.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/codegen/x86_64/arch_x86_64.c b/src/codegen/x86_64/arch_x86_64.c index ecaac5eb9..92eaf2c39 100644 --- a/src/codegen/x86_64/arch_x86_64.c +++ b/src/codegen/x86_64/arch_x86_64.c @@ -1185,7 +1185,8 @@ static void emit_instruction(CodegenContext *context, IRInstruction *inst) { if (inst->operand->type->kind == TYPE_ARRAY || inst->operand->type->pointer.to->kind == TYPE_ARRAY) size = regsize_from_bytes(type_sizeof(t_integer)); else size = regsize_from_bytes(type_sizeof(inst->operand->type)); - if (size == r8 || size == r16) size = r32; + // TODO: Use `movzx`/`movzbl` + if (size == r8 || size == r16) femit(context, I_XOR, REGISTER_TO_REGISTER, inst->result, inst->result); if (inst->operand->type->kind == TYPE_ARRAY || inst->operand->type->pointer.to->kind == TYPE_ARRAY) femit(context, I_LEA, NAME_TO_REGISTER, REG_RIP, inst->operand->static_ref->name.data, inst->result, size); @@ -1203,7 +1204,8 @@ static void emit_instruction(CodegenContext *context, IRInstruction *inst) { if (inst->operand->type->kind == TYPE_ARRAY || inst->operand->type->pointer.to->kind == TYPE_ARRAY) size = regsize_from_bytes(type_sizeof(t_integer)); else size = regsize_from_bytes(inst->operand->alloca.size); - if (size == r8 || size == r16) size = r32; + // TODO: Use `movzx`/`movzbl` + if (size == r8 || size == r16) femit(context, I_XOR, REGISTER_TO_REGISTER, inst->result, inst->result); if (inst->operand->type->kind == TYPE_ARRAY || inst->operand->type->pointer.to->kind == TYPE_ARRAY) femit(context, I_LEA, MEMORY_TO_REGISTER, REG_RBP, (int64_t)-inst->operand->alloca.offset, inst->result, size); @@ -1221,6 +1223,7 @@ static void emit_instruction(CodegenContext *context, IRInstruction *inst) { if (inst->operand->type->kind == TYPE_ARRAY) size = regsize_from_bytes(type_sizeof(t_integer)); // TODO: Is this right? Do we need to get size of pointed to type? else size = regsize_from_bytes(type_sizeof(inst->operand->type)); + if (size == r8 || size == r16) femit(context, I_XOR, REGISTER_TO_REGISTER, inst->result, inst->result); if (inst->operand->type->kind == TYPE_ARRAY) femit(context, I_LEA, MEMORY_TO_REGISTER, inst->operand->result, (int64_t)0, inst->result, size); @@ -1483,6 +1486,8 @@ static size_t interfering_regs(IRInstruction *instruction) { return mask >> 1; } +// TODO: This should probably be used by every backend, so it should +// move "up" somewhere. static void mangle_type_to(string_buffer *buf, Type *t) { ASSERT(t); switch (t->kind) { From a836f0be1fab9f6f2ba412dc86b07425296fba95 Mon Sep 17 00:00:00 2001 From: Lens Date: Mon, 30 Jan 2023 10:36:21 -0800 Subject: [PATCH 28/97] [Codegen/x86_64] Add `t_pointer` primitive type This is most useful when you just need the sizeof a pointer, for example. --- src/ast.c | 62 +++++++++++++++++++------------- src/ast.h | 9 ++--- src/codegen/x86_64/arch_x86_64.c | 7 ++-- 3 files changed, 45 insertions(+), 33 deletions(-) diff --git a/src/ast.c b/src/ast.c index 77184859d..52991777e 100644 --- a/src/ast.c +++ b/src/ast.c @@ -12,14 +12,25 @@ uint8_t id; } TypePrimitive;*/ static Type t_void_def = { - .kind = TYPE_PRIMITIVE, - .source_location = {0}, - .primitive = { - .size = 0, - .alignment = 0, - .name = literal_span_raw("void"), - .id = 0, - }, + .kind = TYPE_PRIMITIVE, + .source_location = {0}, + .primitive = { + .size = 0, + .alignment = 0, + .name = literal_span_raw("void"), + .id = 0, + }, +}; + +static Type t_pointer_def = { + .kind = TYPE_POINTER, + .source_location = {0}, + .primitive = { + .size = sizeof(void*), + .alignment = sizeof(void*), //> FIXME + .name = literal_span_raw(""), + .id = 2, // FIXME: I have no idea what ID should be set to. + }, }; static Type t_integer_literal_def = { @@ -35,29 +46,30 @@ static Type t_integer_literal_def = { }; static Type t_integer_def = { - .kind = TYPE_PRIMITIVE, - .source_location = {0}, - .primitive = { - .size = 8, - .alignment = 8, - .name = literal_span_raw("integer"), - .is_signed = true, - .id = 1, - }, + .kind = TYPE_PRIMITIVE, + .source_location = {0}, + .primitive = { + .size = 8, + .alignment = 8, + .name = literal_span_raw("integer"), + .is_signed = true, + .id = 1, + }, }; static Type t_byte_def = { - .kind = TYPE_PRIMITIVE, - .source_location = {0}, - .primitive = { - .size = 1, - .alignment = 1, - .name = literal_span_raw("byte"), - .id = 3, - }, + .kind = TYPE_PRIMITIVE, + .source_location = {0}, + .primitive = { + .size = 1, + .alignment = 1, + .name = literal_span_raw("byte"), + .id = 3, + }, }; Type * const t_void = &t_void_def; +Type * const t_pointer = &t_pointer_def; Type * const t_integer_literal = &t_integer_literal_def; Type * const t_integer = &t_integer_def; Type * const t_byte = &t_byte_def; diff --git a/src/ast.h b/src/ast.h index 090d7a70d..c1f2845a0 100644 --- a/src/ast.h +++ b/src/ast.h @@ -587,9 +587,10 @@ void ast_replace_node(AST *ast, Node *old, Node *new); /// =========================================================================== /// Builtin types. /// =========================================================================== -extern Type * const t_void; -extern Type * const t_integer_literal; -extern Type * const t_integer; -extern Type * const t_byte; +extern Type *const t_void; +extern Type *const t_pointer; +extern Type *const t_integer_literal; +extern Type *const t_integer; +extern Type *const t_byte; #endif // FUNCOMPILER_AST_H diff --git a/src/codegen/x86_64/arch_x86_64.c b/src/codegen/x86_64/arch_x86_64.c index 92eaf2c39..9be476369 100644 --- a/src/codegen/x86_64/arch_x86_64.c +++ b/src/codegen/x86_64/arch_x86_64.c @@ -1218,10 +1218,9 @@ static void emit_instruction(CodegenContext *context, IRInstruction *inst) { else { enum RegSize size = -1; // TODO: Should this array to pointer decay happen here? Or higher up in codegen? - // TODO: type_sizeof(t_pointer) or something to load a pointer sized thing. - // WE SHOULD NOT USE t_integer here!! - if (inst->operand->type->kind == TYPE_ARRAY) size = regsize_from_bytes(type_sizeof(t_integer)); - // TODO: Is this right? Do we need to get size of pointed to type? + if (inst->operand->type->kind == TYPE_ARRAY) size = regsize_from_bytes(type_sizeof(t_pointer)); + // TODO: We are "supposed" to be loading sizeof pointed to type + // here, but that causes segfaults when handling arrays. else size = regsize_from_bytes(type_sizeof(inst->operand->type)); if (size == r8 || size == r16) femit(context, I_XOR, REGISTER_TO_REGISTER, inst->result, inst->result); if (inst->operand->type->kind == TYPE_ARRAY) From 9d23bd22b55e691958be5787916103df58d09fe0 Mon Sep 17 00:00:00 2001 From: Lens Date: Mon, 30 Jan 2023 10:47:04 -0800 Subject: [PATCH 29/97] [Sema] Fix `t_pointer` definition --- src/ast.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/src/ast.c b/src/ast.c index 52991777e..ca9f2efa5 100644 --- a/src/ast.c +++ b/src/ast.c @@ -25,12 +25,7 @@ static Type t_void_def = { static Type t_pointer_def = { .kind = TYPE_POINTER, .source_location = {0}, - .primitive = { - .size = sizeof(void*), - .alignment = sizeof(void*), //> FIXME - .name = literal_span_raw(""), - .id = 2, // FIXME: I have no idea what ID should be set to. - }, + .pointer = { .to = &t_void_def }, }; static Type t_integer_literal_def = { From 55e1ec04efb90937cfd59c01ada0ec351574cb6f Mon Sep 17 00:00:00 2001 From: Lens Date: Tue, 31 Jan 2023 10:23:58 -0800 Subject: [PATCH 30/97] [Sema] Disallow typecasting between arrays --- src/ast.c | 1 + src/typechecker.c | 10 ++++++++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/src/ast.c b/src/ast.c index ca9f2efa5..e1e0e471b 100644 --- a/src/ast.c +++ b/src/ast.c @@ -59,6 +59,7 @@ static Type t_byte_def = { .size = 1, .alignment = 1, .name = literal_span_raw("byte"), + .is_signed = false, .id = 3, }, }; diff --git a/src/typechecker.c b/src/typechecker.c index d13e53203..946ad54da 100644 --- a/src/typechecker.c +++ b/src/typechecker.c @@ -918,13 +918,19 @@ NODISCARD bool typecheck_expression(AST *ast, Node *expr) { "Can not cast from an integer type %T to pointer type %T", t_from, t_to); - // FROM any incomplete type TO any complete type is DISALLOWED + // FROM any incomplete type TO any complete type if (type_is_incomplete(t_from) && !type_is_incomplete(t_to)) ERR(expr->cast.value->source_location, "Can not cast from an incomplete type %T to a complete type %T", t_from, t_to); - // TODO: arrays, functions, function pointers... + // FROM any array type TO any array type + if (is_array(t_from) && is_array(t_to)) { + ERR(expr->cast.value->source_location, + "Can not cast between arrays."); + } + + // TODO: functions? TODO("Casting from %T to %T is currently not supported by the typechecker, sorry", t_from, t_to); } From e339100c981a846a832bcd203a7f4d89c4cbaec3 Mon Sep 17 00:00:00 2001 From: Lens Date: Tue, 31 Jan 2023 10:25:51 -0800 Subject: [PATCH 31/97] [Minor] TODO comment in Sema --- src/typechecker.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/typechecker.c b/src/typechecker.c index 946ad54da..41079a718 100644 --- a/src/typechecker.c +++ b/src/typechecker.c @@ -905,6 +905,7 @@ NODISCARD bool typecheck_expression(AST *ast, Node *expr) { // FROM any type T TO type T if (types_equal(t_to, t_from)) break; // FROM any pointer type TO any pointer type + // TODO: Check base type size + alignment... if (is_pointer(t_from) && is_pointer(t_to)) break; // FROM any pointer type TO any integer type if (is_pointer(t_from) && is_integer(t_to)) break; From 6c4570dddf7ee76c23f06a8114228018021c2c7d Mon Sep 17 00:00:00 2001 From: Lens Date: Tue, 31 Jan 2023 10:42:08 -0800 Subject: [PATCH 32/97] [Examples] Using a byte array as a C string --- examples/bytearray.un | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 examples/bytearray.un diff --git a/examples/bytearray.un b/examples/bytearray.un new file mode 100644 index 000000000..93785fe4a --- /dev/null +++ b/examples/bytearray.un @@ -0,0 +1,15 @@ +; 0 +; Eyyy + +puts : ext integer(str : @byte) + +foo : byte[69] +@foo[0] := 69 +@foo[1] := 121 +@foo[2] := 121 +@foo[3] := 121 +@foo[4] := 0 + +puts(foo[0]) + +0 From a94f836f9b27b10f13a730cfe84d8432169e17ab Mon Sep 17 00:00:00 2001 From: Lens Date: Wed, 1 Feb 2023 10:10:09 -0800 Subject: [PATCH 33/97] [Bugfix] Lexer no longer relies on null terminator --- src/parser.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/parser.c b/src/parser.c index 5eaaf9bdf..a04c73bdd 100644 --- a/src/parser.c +++ b/src/parser.c @@ -96,6 +96,7 @@ static void next_char(Parser *p) { /// Read the next character. p->lastc = *p->curr++; + if (p->lastc == 0) ERR("Lexer can not handle null bytes"); if (p->lastc == '\r') p->lastc = '\n'; } @@ -1083,7 +1084,7 @@ AST *parse(span source, const char *filename) { p.source = source; p.filename = filename; p.curr = source.data; - p.end = source.data + source.size; + p.end = source.data + source.size - 1; p.lastc = ' '; p.ast = ast_create(); p.ast->filename = string_create(filename); From 933ed59c84fc75e79e725a02c8699e4e91d6de6b Mon Sep 17 00:00:00 2001 From: Lens Date: Wed, 1 Feb 2023 10:20:54 -0800 Subject: [PATCH 34/97] [Frontend] Basic string literal support :O --- src/parser.c | 19 +++++++++++++++++++ src/typechecker.c | 4 ++++ 2 files changed, 23 insertions(+) diff --git a/src/parser.c b/src/parser.c index a04c73bdd..4686bbfaa 100644 --- a/src/parser.c +++ b/src/parser.c @@ -392,6 +392,25 @@ static void next_token(Parser *p) { break; } + // String. + // TODO: Decide on delimiters. Should single/double quote be equal? Raw vs escaped? etc. + if (p->lastc == '"') { + p->tok.type = TK_STRING; + p->tok.text.data = p->curr; + p->tok.text.size = 0; + + next_char(p); + while (p->lastc != '"' && p->lastc != 0) { + // TODO: Handle escapes + p->tok.text.size += 1; + next_char(p); + } + if (p->lastc == 0) ERR("Got EOF before end of string literal..."); + next_char(p); + + break; + } + /// Anything else is invalid. ERR("Invalid token"); } diff --git a/src/typechecker.c b/src/typechecker.c index 41079a718..3251f3a82 100644 --- a/src/typechecker.c +++ b/src/typechecker.c @@ -1069,6 +1069,10 @@ NODISCARD bool typecheck_expression(AST *ast, Node *expr) { /// Just set the type. case NODE_LITERAL: if (expr->literal.type == TK_NUMBER) expr->type = t_integer_literal; + else if (expr->literal.type == TK_STRING) { + string s = ast->strings.data[expr->literal.string_index]; + expr->type = ast_make_type_array(ast, expr->source_location, t_byte, s.size + 1); + } else TODO("Literal type '%s'.", token_type_to_string(expr->literal.type)); break; From 312d4e7b1f0392262e63394c03f71dd95769d675 Mon Sep 17 00:00:00 2001 From: Lens Date: Wed, 1 Feb 2023 10:22:05 -0800 Subject: [PATCH 35/97] [Codegen] Very basic codegen of string literals They're still all zeroes, but we're closer!! --- src/ast.c | 1 + src/codegen.c | 18 ++++++++++++++++-- src/codegen/x86_64/arch_x86_64.c | 4 ++++ 3 files changed, 21 insertions(+), 2 deletions(-) diff --git a/src/ast.c b/src/ast.c index e1e0e471b..98217b883 100644 --- a/src/ast.c +++ b/src/ast.c @@ -9,6 +9,7 @@ usz alignment; span name; bool is_signed; + /// A unique ID that is used to compare primitives. uint8_t id; } TypePrimitive;*/ static Type t_void_def = { diff --git a/src/codegen.c b/src/codegen.c index d1f7eb0ea..a61174ccd 100644 --- a/src/codegen.c +++ b/src/codegen.c @@ -8,6 +8,8 @@ #include #include #include +#include + #include #include #include @@ -451,9 +453,21 @@ static void codegen_expr(CodegenContext *ctx, Node *expr) { /// Literal expression. Only integer literals are supported for now. case NODE_LITERAL: - if (expr->literal.type != TK_NUMBER) DIAG(DIAG_SORRY, expr->source_location, "Emitting non-integer literals not supported"); + if (expr->literal.type == TK_NUMBER) expr->ir = ir_immediate(ctx, expr->type, expr->literal.integer); + else if (expr->literal.type == TK_STRING) { + char buf[48] = {0}; + static size_t string_literal_count = 0; + int len = snprintf(buf, 48, "__str_lit%zu", string_literal_count++); + // TODO: Two options. Since we can't currently assign to static + // variables with compile-time known constants, it means that + // strings go uninitialised... So what would really be ideal + // is if we could just, ya know, do that. (i.e. `.string` as + // directive or asciz or whathaveyou). Our other option is to + // emit (imm+store) pairs for every byte... + expr->ir = ir_create_static(ctx, expr->type, as_span(string_create(buf))); + } + else DIAG(DIAG_SORRY, expr->source_location, "Emitting non-integer literals not supported"); // TODO: SEMA should probably have already lowered integer_literal type, so we *should* have a type already available on the literal node... - expr->ir = ir_immediate(ctx, expr->type, expr->literal.integer); return; /// Variable reference. diff --git a/src/codegen/x86_64/arch_x86_64.c b/src/codegen/x86_64/arch_x86_64.c index 9be476369..76e49b351 100644 --- a/src/codegen/x86_64/arch_x86_64.c +++ b/src/codegen/x86_64/arch_x86_64.c @@ -1040,6 +1040,7 @@ static void emit_instruction(CodegenContext *context, IRInstruction *inst) { femit(context, I_SUB, IMMEDIATE_TO_REGISTER, (int64_t)8, REG_RSP, r64); } for (Register i = REG_RAX + 1; i < sizeof(func_regs) * 8; ++i) { + // TODO: Don't push registers that are used for arguments. if (func_regs & (1 << i) && is_caller_saved(i)) { femit(context, I_PUSH, REGISTER, i); } @@ -1567,6 +1568,9 @@ void codegen_emit_x86_64(CodegenContext *context) { fprint(context->code, ".section .data\n"); } + // TODO: Do compile-time known static assignment (i.e. of string + // literals) using assembler directives. + /// Allocate space for the variable. usz sz = type_sizeof(var->type); fprint(context->code, "%S: .space %zu\n", var->name, sz); From ac386a808ea0e3acfbfc23f500d9e1266ada172f Mon Sep 17 00:00:00 2001 From: Lens Date: Wed, 1 Feb 2023 10:22:54 -0800 Subject: [PATCH 36/97] [Examples] Basic string literal example --- examples/string_literals.un | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 examples/string_literals.un diff --git a/examples/string_literals.un b/examples/string_literals.un new file mode 100644 index 000000000..8f4f09924 --- /dev/null +++ b/examples/string_literals.un @@ -0,0 +1,5 @@ +puts : ext void(s : @byte) + +str : @byte = "Hello, LISP!"[0] + +puts(str) From ab4eac20c180d1ab582b4b16c5b1f9ff9b45bd3d Mon Sep 17 00:00:00 2001 From: Lens Date: Wed, 1 Feb 2023 10:23:22 -0800 Subject: [PATCH 37/97] [TODO] Arrays have two kinds of loading we need to be able to emit --- TODO.md | 1 + 1 file changed, 1 insertion(+) diff --git a/TODO.md b/TODO.md index 8f68ee3ae..c23a2f8d9 100644 --- a/TODO.md +++ b/TODO.md @@ -12,6 +12,7 @@ - [ ] Implicit cast sema pass: `a + b` where `a` is a `byte` and `b` an `integer` -> `(a as integer) + b` - [ ] Testing Framework: Add ability to only show failing tests - [ ] `x as void` used to discard `x` + - [ ] Resolve the two different kinds of array-type loading: entire copy vs pointer decay. Basically, `foo[2]` needs `foo` to be loaded as a pointer. `a : foo[2] = b` requires loading entire copy of `b` into `a`. - [ ] Optimisation - [ ] Zero subscript still does add/multiply when it doesn't need to - [ ] Eliminate unused parameters (they are currently allocated registers) From 4423fee3252f3dcc01b9143cc0eb1adc0c3ba6fb Mon Sep 17 00:00:00 2001 From: Lens Date: Thu, 2 Feb 2023 10:01:37 -0800 Subject: [PATCH 38/97] [AST] Make `ast_print_node` publicly available Also remove unused bitfield; we can add one if we end up adding a lot of bools there, I guess. --- src/ast.c | 2 +- src/ast.h | 8 +++++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/ast.c b/src/ast.c index 98217b883..d03f7ad27 100644 --- a/src/ast.c +++ b/src/ast.c @@ -606,7 +606,7 @@ static void ast_print_children( ); /// Print a node. -static void ast_print_node( +void ast_print_node( FILE *file, const Node *logical_parent, const Node *node, diff --git a/src/ast.h b/src/ast.h index c1f2845a0..41e9156ea 100644 --- a/src/ast.h +++ b/src/ast.h @@ -152,7 +152,7 @@ typedef struct NodeFunction { Node *body; string name; IRFunction *ir; - bool global : 1; + bool global; } NodeFunction; /// Variable declaration. @@ -578,6 +578,12 @@ void ast_print(FILE *file, const AST *ast); /// Print the scope tree of an AST. void ast_print_scope_tree(FILE *file, const AST *ast); +/// Print a node and all of it's children. +/// Use like so: +/// string_buffer buf = {0}; +/// ast_print_node(file, NULL, node, &buf); +void ast_print_node(FILE *file, const Node *logical_parent, const Node *node, string_buffer *leading_text); + /// Intern a string. size_t ast_intern_string(AST *ast, span string); From c57b3881f88f9752a1d26842ad223a8e1445b91b Mon Sep 17 00:00:00 2001 From: Lens Date: Thu, 2 Feb 2023 10:02:38 -0800 Subject: [PATCH 39/97] [Minor] Formatting Move an `if` into the surrounding switch with `case`. Make needless multiline declarations a single line. --- src/codegen/intermediate_representation.c | 11 ++----- src/parser.c | 38 +++++++++++------------ 2 files changed, 21 insertions(+), 28 deletions(-) diff --git a/src/codegen/intermediate_representation.c b/src/codegen/intermediate_representation.c index 60b2d68eb..6c9463857 100644 --- a/src/codegen/intermediate_representation.c +++ b/src/codegen/intermediate_representation.c @@ -629,10 +629,7 @@ IRInstruction *ir_not ALL_BINARY_INSTRUCTION_TYPES(CREATE_BINARY_INSTRUCTION) #undef CREATE_BINARY_INSTRUCTION -IRInstruction *ir_create_static -(CodegenContext *context, - Type *type, - span name) { +IRInstruction *ir_create_static(CodegenContext *context, Type *type, span name) { /// Create the variable. IRStaticVariable *v = calloc(1, sizeof *v); v->name = string_dup(name); @@ -652,11 +649,7 @@ IRInstruction *ir_create_static return ref; } -IRInstruction *ir_stack_allocate -(CodegenContext *context, - Type *type - ) -{ +IRInstruction *ir_stack_allocate(CodegenContext *context, Type *type) { INSTRUCTION(alloca, IR_ALLOCA); alloca->alloca.size = type_sizeof(type); alloca->type = ast_make_type_pointer(context->ast, type->source_location, type); diff --git a/src/parser.c b/src/parser.c index 4686bbfaa..55b825e0c 100644 --- a/src/parser.c +++ b/src/parser.c @@ -367,6 +367,25 @@ static void next_token(Parser *p) { } break; + // String. + case '"': + // TODO: Decide on delimiters. Should single/double quote be equal? Raw vs escaped? etc. + if (p->lastc == '"') { + p->tok.type = TK_STRING; + p->tok.text.data = p->curr; + p->tok.text.size = 0; + next_char(p); //> Eat beginning delimiter + while (p->lastc != '"' && p->lastc != 0) { + // TODO: Handle escapes + p->tok.text.size += 1; + next_char(p); + } + if (p->lastc == 0) ERR("Got EOF before end of string literal..."); + next_char(p); //> Eat ending delimiter + break; + } + break; + /// Number or identifier. default: /// Identifier. @@ -392,25 +411,6 @@ static void next_token(Parser *p) { break; } - // String. - // TODO: Decide on delimiters. Should single/double quote be equal? Raw vs escaped? etc. - if (p->lastc == '"') { - p->tok.type = TK_STRING; - p->tok.text.data = p->curr; - p->tok.text.size = 0; - - next_char(p); - while (p->lastc != '"' && p->lastc != 0) { - // TODO: Handle escapes - p->tok.text.size += 1; - next_char(p); - } - if (p->lastc == 0) ERR("Got EOF before end of string literal..."); - next_char(p); - - break; - } - /// Anything else is invalid. ERR("Invalid token"); } From ab7dcf8774e466fa698e8afebda18c08812fa14d Mon Sep 17 00:00:00 2001 From: Lens Date: Thu, 2 Feb 2023 10:04:31 -0800 Subject: [PATCH 40/97] [Sema] Emit error over previous todo as we now handle (some) casting --- src/typechecker.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/typechecker.c b/src/typechecker.c index 3251f3a82..0c3ba65d6 100644 --- a/src/typechecker.c +++ b/src/typechecker.c @@ -933,7 +933,9 @@ NODISCARD bool typecheck_expression(AST *ast, Node *expr) { // TODO: functions? - TODO("Casting from %T to %T is currently not supported by the typechecker, sorry", t_from, t_to); + ERR(expr->cast.value->source_location, + "Casting from %T to %T is not supported by the typechecker\n" + " Open an issue with the current maintainers if you feel like this is not the proper behaviour.", t_from, t_to); } /// Binary expression. This is a complicated one. From db2d432d89f2c8e3e736b05dd91dd12b8c316cd6 Mon Sep 17 00:00:00 2001 From: Lens Date: Thu, 2 Feb 2023 10:05:37 -0800 Subject: [PATCH 41/97] [Sema] Disallow non-equal incomplete types entirely --- src/typechecker.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/src/typechecker.c b/src/typechecker.c index 0c3ba65d6..4b698ad86 100644 --- a/src/typechecker.c +++ b/src/typechecker.c @@ -901,31 +901,31 @@ NODISCARD bool typecheck_expression(AST *ast, Node *expr) { // TODO: Is complete to incomplete allowed? - // ALLOWED - // FROM any type T TO type T + // FROM any type T TO type T is ALLOWED if (types_equal(t_to, t_from)) break; - // FROM any pointer type TO any pointer type + + // FROM any incomplete type is DISALLOWED + if (type_is_incomplete(t_from) || type_is_incomplete(t_to)) + ERR(expr->cast.value->source_location, "Can not cast from an incomplete type %T", t_from); + // TO any complete type is DISALLOWED + if (type_is_incomplete(t_from) || type_is_incomplete(t_to)) + ERR(expr->cast.value->source_location, "Can not cast to an incomplete type %T", t_to); + + // FROM any pointer type TO any pointer type is ALLOWED // TODO: Check base type size + alignment... if (is_pointer(t_from) && is_pointer(t_to)) break; - // FROM any pointer type TO any integer type + // FROM any pointer type TO any integer type is ALLOWED if (is_pointer(t_from) && is_integer(t_to)) break; - // FROM any integer type TO any integer type + // FROM any integer type TO any integer type is ALLOWED if (is_integer(t_from) && is_integer(t_to)) break; - // DISALLOWED // FROM any integer type TO any pointer type is currently DISALLOWED, but very well may change if (is_integer(t_from) && is_pointer(t_to)) ERR(expr->cast.value->source_location, "Can not cast from an integer type %T to pointer type %T", t_from, t_to); - // FROM any incomplete type TO any complete type - if (type_is_incomplete(t_from) && !type_is_incomplete(t_to)) - ERR(expr->cast.value->source_location, - "Can not cast from an incomplete type %T to a complete type %T", - t_from, t_to); - - // FROM any array type TO any array type + // FROM any array type TO any array type is DISALLOWED if (is_array(t_from) && is_array(t_to)) { ERR(expr->cast.value->source_location, "Can not cast between arrays."); From 1bd10b303714cbd2cc4e1fc92bd53387241f43f1 Mon Sep 17 00:00:00 2001 From: Lens Date: Thu, 2 Feb 2023 10:13:59 -0800 Subject: [PATCH 42/97] [Minor/AST] Add meaningful docstring to `type_is_incomplete_canon` --- src/ast.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/ast.h b/src/ast.h index 41e9156ea..c1181e275 100644 --- a/src/ast.h +++ b/src/ast.h @@ -548,13 +548,19 @@ Type *type_last_alias(Type *type); * Basically, ‘incomplete’ means that we don’t know its size/alignment * and therefore, we can’t allocate a variable of that type. * - * void is a special case because it is purposefully incomplete. + * `void` is a special case because it is purposefully incomplete. * * \return true iff the type is incomplete. */ bool type_is_incomplete(Type *type); -/// Check if a canonical type is incomplete. +/** Same as type_is_incomplete() but must be given a canonical type. + * + * \see type_canonical() + * \see type_is_incomplete() + * + * \return true iff the given canonical type is incomplete. + */ bool type_is_incomplete_canon(Type *type); /// Get the size of a type, in bytes. From 66f7289da6bd8b6f5fceacd6265cd903dcc3a87a Mon Sep 17 00:00:00 2001 From: Lens Date: Thu, 2 Feb 2023 10:15:53 -0800 Subject: [PATCH 43/97] [Minor/AST] Better formatting of docstring for `type_canonical` --- src/ast.h | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/src/ast.h b/src/ast.h index c1181e275..8a3782a41 100644 --- a/src/ast.h +++ b/src/ast.h @@ -517,16 +517,17 @@ Type *ast_make_type_function( /// with malloc() and must be freed by the caller. string typename(Type *type, bool colour); -/** - * Get the canonical type of a type. +/** Get the canonical type of any given type. * - * The ‘canonical’ type of a type T is T stripped of any aliases. E.g. in C - * typedef int foo; - * typedef foo bar - * The canonical type of bar would be int. We need this because, - * currently, builtin types are just named types (i.e. typedefs) that - * refer to the actual primitive types, and if we ever introduce - * something like typedef, it will just work out of the box. + * The ‘canonical’ type of a type T is T stripped of any aliases. E.g. in C, given + * typedef int foo; + * typedef foo bar + * the canonical type of `bar` would be `int`. + * + * Currently, builtin types are just named types (i.e. typedefs in C + * terminology) that refer to the actual primitive types, and if we + * ever introduce something like typedef, it will just work out of the + * box. * * \return NULL if the type is incomplete. */ From b125c170058b68fcedf706614f0dac6f9ada7167 Mon Sep 17 00:00:00 2001 From: Lens Date: Thu, 2 Feb 2023 10:19:38 -0800 Subject: [PATCH 44/97] [Minor/TODO] Update unnecessarily narrowing statement :P --- TODO.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/TODO.md b/TODO.md index c23a2f8d9..addb84a20 100644 --- a/TODO.md +++ b/TODO.md @@ -49,7 +49,7 @@ - [ ] Actually implementing casts. - [ ] During codegen, we should actually output `zext`/`sext` if needed. Otherwise truncation is automatic. - [ ] Update IR parser - - [ ] Binary operators need to pick return type instead of strictly returning `integer` + - [ ] Operators (unary and binary) need to pick return type instead of strictly returning `integer` - [ ] Arrays - [ ] Semantic analysis for static arrays. - [ ] Codegen From bee81364b7dfbdb64e04b781bd1cc173eddc035e Mon Sep 17 00:00:00 2001 From: Lens Date: Thu, 2 Feb 2023 10:23:37 -0800 Subject: [PATCH 45/97] [AST] Remove `id` system entirely With primitives now being implemented as the address of global definitions, we can compare pointers instead of needing a unique id. Plus, the one case that ids were handling (t_integer_literal and t_integer being implicitly equal) was already specifically handled in the one place that used the id system. --- src/ast.c | 4 ---- src/ast.h | 1 - src/typechecker.c | 3 ++- 3 files changed, 2 insertions(+), 6 deletions(-) diff --git a/src/ast.c b/src/ast.c index d03f7ad27..d68a57b02 100644 --- a/src/ast.c +++ b/src/ast.c @@ -19,7 +19,6 @@ static Type t_void_def = { .size = 0, .alignment = 0, .name = literal_span_raw("void"), - .id = 0, }, }; @@ -37,7 +36,6 @@ static Type t_integer_literal_def = { .alignment = 8, .name = literal_span_raw(""), .is_signed = true, - .id = 1, }, }; @@ -49,7 +47,6 @@ static Type t_integer_def = { .alignment = 8, .name = literal_span_raw("integer"), .is_signed = true, - .id = 1, }, }; @@ -61,7 +58,6 @@ static Type t_byte_def = { .alignment = 1, .name = literal_span_raw("byte"), .is_signed = false, - .id = 3, }, }; diff --git a/src/ast.h b/src/ast.h index 8a3782a41..7f2d4b8b9 100644 --- a/src/ast.h +++ b/src/ast.h @@ -233,7 +233,6 @@ typedef struct TypePrimitive { usz alignment; span name; bool is_signed; - uint8_t id; } TypePrimitive; /// Pointer type. diff --git a/src/typechecker.c b/src/typechecker.c index 4b698ad86..95dbbd7e3 100644 --- a/src/typechecker.c +++ b/src/typechecker.c @@ -42,9 +42,10 @@ NODISCARD static bool types_equal_canon(Type *a, Type *b) { default: ICE("Invalid type kind %d", a->kind); case TYPE_NAMED: UNREACHABLE(); case TYPE_PRIMITIVE: + // t_integer_literal is implicitly equal to t_integer if (a == t_integer_literal) return b == t_integer_literal || b == t_integer; if (b == t_integer_literal) return a == t_integer_literal || a == t_integer; - return a->primitive.id == b->primitive.id; + return a == b; case TYPE_POINTER: return types_equal(a->pointer.to, b->pointer.to); case TYPE_ARRAY: return a->array.size == b->array.size && types_equal(a->array.of, b->array.of); case TYPE_FUNCTION: { From afebae2fd10fcdc31af2d52633f2d7ae2969ffaf Mon Sep 17 00:00:00 2001 From: Lens Date: Thu, 2 Feb 2023 10:30:47 -0800 Subject: [PATCH 46/97] [Tests] Two tests---one passes, one fails This is hopefully some motivation to get codegen of static variable initialisation done. The passing one will ensure we can manipulate arrays --- examples/string_literals.un | 3 ++- tst/tests/bytearray.un | 15 +++++++++++++++ tst/tests/string_literals.un | 9 +++++++++ 3 files changed, 26 insertions(+), 1 deletion(-) create mode 100644 tst/tests/bytearray.un create mode 100644 tst/tests/string_literals.un diff --git a/examples/string_literals.un b/examples/string_literals.un index 8f4f09924..ac519167e 100644 --- a/examples/string_literals.un +++ b/examples/string_literals.un @@ -1,5 +1,6 @@ puts : ext void(s : @byte) -str : @byte = "Hello, LISP!"[0] +str : @byte = "Hello, LISP programmer!"[0] puts(str) +0 diff --git a/tst/tests/bytearray.un b/tst/tests/bytearray.un new file mode 100644 index 000000000..93785fe4a --- /dev/null +++ b/tst/tests/bytearray.un @@ -0,0 +1,15 @@ +; 0 +; Eyyy + +puts : ext integer(str : @byte) + +foo : byte[69] +@foo[0] := 69 +@foo[1] := 121 +@foo[2] := 121 +@foo[3] := 121 +@foo[4] := 0 + +puts(foo[0]) + +0 diff --git a/tst/tests/string_literals.un b/tst/tests/string_literals.un new file mode 100644 index 000000000..9fe27d61e --- /dev/null +++ b/tst/tests/string_literals.un @@ -0,0 +1,9 @@ +; 0 +; Lorem ipsum dolor sit amet consectetuer + +puts : ext void(s : @byte) + +str : @byte = "Lorem ipsum dolor sit amet consectetuer"[0] + +puts(str) +0 From e669db3fcc90afccd46961ddd7c835745f93f78d Mon Sep 17 00:00:00 2001 From: Lens Date: Thu, 2 Feb 2023 10:39:54 -0800 Subject: [PATCH 47/97] [Codegen/IR] Remove `cached_*` members from `IRStaticVariable` --- src/codegen.h | 2 -- src/codegen/intermediate_representation.c | 4 ---- 2 files changed, 6 deletions(-) diff --git a/src/codegen.h b/src/codegen.h index d8a735863..ed3dcc982 100644 --- a/src/codegen.h +++ b/src/codegen.h @@ -40,8 +40,6 @@ struct RegisterPool { typedef struct IRStaticVariable { string name; Type *type; - usz cached_size; - usz cached_alignment; IRInstruction *reference; } IRStaticVariable; diff --git a/src/codegen/intermediate_representation.c b/src/codegen/intermediate_representation.c index 6c9463857..234700cec 100644 --- a/src/codegen/intermediate_representation.c +++ b/src/codegen/intermediate_representation.c @@ -634,10 +634,6 @@ IRInstruction *ir_create_static(CodegenContext *context, Type *type, span name) IRStaticVariable *v = calloc(1, sizeof *v); v->name = string_dup(name); v->type = type; - v->cached_size = type_sizeof(type); - // TODO: Don't just use 8 byte alignment for every static. - // Should we just use the nearest larger (or equal) power of two, in the generic case? - v->cached_alignment = 8; vector_push(context->static_vars, v); /// Create an instruction to reference it and return it. From c9ccbb21fb41ed25d4b739eeeeba5da82b39093a Mon Sep 17 00:00:00 2001 From: Lens Date: Thu, 2 Feb 2023 10:43:12 -0800 Subject: [PATCH 48/97] [Minor/AST] Stub implementation of `type_alignof` --- src/ast.c | 5 +++++ src/ast.h | 3 +++ 2 files changed, 8 insertions(+) diff --git a/src/ast.c b/src/ast.c index d68a57b02..efe0d06f4 100644 --- a/src/ast.c +++ b/src/ast.c @@ -502,6 +502,11 @@ usz type_sizeof(Type *type) { } } +usz type_alignof(Type *type) { + TODO("Unimplemented, sorry"); + return 69; +} + bool type_is_void(Type *type) { return type_canonical(type) == t_void; } diff --git a/src/ast.h b/src/ast.h index 7f2d4b8b9..339a3ed6a 100644 --- a/src/ast.h +++ b/src/ast.h @@ -566,6 +566,9 @@ bool type_is_incomplete_canon(Type *type); /// Get the size of a type, in bytes. usz type_sizeof(Type *type); +/// Get the aligmnent of a type, in bytes. +usz type_alignof(Type *type); + /// Check if a type is void. bool type_is_void(Type *type); From 6615957756a69fd791bd3c3dd316eb4289146fcd Mon Sep 17 00:00:00 2001 From: Lens Date: Thu, 2 Feb 2023 10:45:10 -0800 Subject: [PATCH 49/97] [Codegen/x86_64] Use `type_sizeof(t_pointer)` where applicable --- src/codegen/x86_64/arch_x86_64.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/codegen/x86_64/arch_x86_64.c b/src/codegen/x86_64/arch_x86_64.c index 76e49b351..4e2d11d19 100644 --- a/src/codegen/x86_64/arch_x86_64.c +++ b/src/codegen/x86_64/arch_x86_64.c @@ -1181,10 +1181,8 @@ static void emit_instruction(CodegenContext *context, IRInstruction *inst) { if (inst->operand->kind == IR_STATIC_REF) { enum RegSize size = -1; // TODO: Should this array to pointer decay happen here? Or higher up in codegen? - // TODO: type_sizeof(t_pointer) or something to load a pointer sized thing. - // WE SHOULD NOT USE t_integer here!! if (inst->operand->type->kind == TYPE_ARRAY || inst->operand->type->pointer.to->kind == TYPE_ARRAY) - size = regsize_from_bytes(type_sizeof(t_integer)); + size = regsize_from_bytes(type_sizeof(t_pointer)); else size = regsize_from_bytes(type_sizeof(inst->operand->type)); // TODO: Use `movzx`/`movzbl` if (size == r8 || size == r16) femit(context, I_XOR, REGISTER_TO_REGISTER, inst->result, inst->result); @@ -1200,10 +1198,8 @@ static void emit_instruction(CodegenContext *context, IRInstruction *inst) { else if (inst->operand->kind == IR_ALLOCA) { enum RegSize size = -1; // TODO: Should this array to pointer decay happen here? Or higher up in codegen? - // TODO: type_sizeof(t_pointer) or something to load a pointer sized thing. - // WE SHOULD NOT USE t_integer here!! if (inst->operand->type->kind == TYPE_ARRAY || inst->operand->type->pointer.to->kind == TYPE_ARRAY) - size = regsize_from_bytes(type_sizeof(t_integer)); + size = regsize_from_bytes(type_sizeof(t_pointer)); else size = regsize_from_bytes(inst->operand->alloca.size); // TODO: Use `movzx`/`movzbl` if (size == r8 || size == r16) femit(context, I_XOR, REGISTER_TO_REGISTER, inst->result, inst->result); From 9b719820e918f2c58df280958b51132a42e43810 Mon Sep 17 00:00:00 2001 From: Lens Date: Thu, 2 Feb 2023 10:51:10 -0800 Subject: [PATCH 50/97] [Codegen/IR] Fix overwrite of `rhs` of subscript operator This was bad due to optimisation expecting an unscaled value in the subscripts `rhs` --- src/codegen.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/codegen.c b/src/codegen.c index a61174ccd..5be3194fb 100644 --- a/src/codegen.c +++ b/src/codegen.c @@ -372,16 +372,17 @@ static void codegen_expr(CodegenContext *ctx, Node *expr) { codegen_expr(ctx, rhs); if (expr->binary.op == TK_LBRACK) { // An array subscript needs multiplied by the sizeof the array's base type. + IRInstruction *scaled_rhs = NULL; if (lhs->type->kind == TYPE_ARRAY) { IRInstruction *immediate = ir_immediate(ctx, t_integer, type_sizeof(lhs->type->array.of)); - rhs->ir = ir_mul(ctx, rhs->ir, immediate); + scaled_rhs = ir_mul(ctx, rhs->ir, immediate); } // A pointer subscript needs multiplied by the sizeof the pointer's base type. else if (lhs->type->kind == TYPE_POINTER) { IRInstruction *immediate = ir_immediate(ctx, t_integer, type_sizeof(lhs->type->pointer.to)); - rhs->ir = ir_mul(ctx, rhs->ir, immediate); + scaled_rhs = ir_mul(ctx, rhs->ir, immediate); } - expr->ir = ir_add(ctx, lhs->ir, rhs->ir); + expr->ir = ir_add(ctx, lhs->ir, scaled_rhs); return; } From dc49514856d6b26ff8862643552f9efebd68eec5 Mon Sep 17 00:00:00 2001 From: Lens Date: Thu, 2 Feb 2023 10:55:02 -0800 Subject: [PATCH 51/97] [Minor/Codegen] Update diagnostic message --- src/codegen.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/codegen.c b/src/codegen.c index 5be3194fb..5253ceca6 100644 --- a/src/codegen.c +++ b/src/codegen.c @@ -467,8 +467,8 @@ static void codegen_expr(CodegenContext *ctx, Node *expr) { // emit (imm+store) pairs for every byte... expr->ir = ir_create_static(ctx, expr->type, as_span(string_create(buf))); } - else DIAG(DIAG_SORRY, expr->source_location, "Emitting non-integer literals not supported"); - // TODO: SEMA should probably have already lowered integer_literal type, so we *should* have a type already available on the literal node... + // TODO: This diagnostic could be better. + else DIAG(DIAG_SORRY, expr->source_location, "Emitting literals of type %u not supported", expr->literal.type); return; /// Variable reference. From 4e3e0f8c6ff0b0432bdde8adb20cb16c84f5586d Mon Sep 17 00:00:00 2001 From: Lens Date: Thu, 2 Feb 2023 11:05:23 -0800 Subject: [PATCH 52/97] [Tests] Add `negative-numbers.un` --- tst/tests/negative-numbers.un | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 tst/tests/negative-numbers.un diff --git a/tst/tests/negative-numbers.un b/tst/tests/negative-numbers.un new file mode 100644 index 000000000..409d73a82 --- /dev/null +++ b/tst/tests/negative-numbers.un @@ -0,0 +1,5 @@ +; 0 + +a : integer = -2 + +a + 2 From ec7b772f4fd54939191c334be7b844aed1bb8847 Mon Sep 17 00:00:00 2001 From: Lens Date: Thu, 2 Feb 2023 11:06:26 -0800 Subject: [PATCH 53/97] [Minor] Be slightly more explicit about primitive type defined name --- src/ast.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ast.c b/src/ast.c index efe0d06f4..22b50e410 100644 --- a/src/ast.c +++ b/src/ast.c @@ -22,7 +22,7 @@ static Type t_void_def = { }, }; -static Type t_pointer_def = { +static Type t_void_pointer_def = { .kind = TYPE_POINTER, .source_location = {0}, .pointer = { .to = &t_void_def }, @@ -62,7 +62,7 @@ static Type t_byte_def = { }; Type * const t_void = &t_void_def; -Type * const t_pointer = &t_pointer_def; +Type * const t_pointer = &t_void_pointer_def; Type * const t_integer_literal = &t_integer_literal_def; Type * const t_integer = &t_integer_def; Type * const t_byte = &t_byte_def; From 974bdff6f8a86e336a4c723f70c67a59867fa206 Mon Sep 17 00:00:00 2001 From: Lens Date: Thu, 2 Feb 2023 11:09:33 -0800 Subject: [PATCH 54/97] [Minor/Codegen] Fix scuffed diagnostic message --- src/codegen.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/codegen.c b/src/codegen.c index 5253ceca6..b425ef33f 100644 --- a/src/codegen.c +++ b/src/codegen.c @@ -467,8 +467,7 @@ static void codegen_expr(CodegenContext *ctx, Node *expr) { // emit (imm+store) pairs for every byte... expr->ir = ir_create_static(ctx, expr->type, as_span(string_create(buf))); } - // TODO: This diagnostic could be better. - else DIAG(DIAG_SORRY, expr->source_location, "Emitting literals of type %u not supported", expr->literal.type); + else DIAG(DIAG_SORRY, expr->source_location, "Emitting literals of type %T not supported", expr->type); return; /// Variable reference. From 32f6a44230b93a550dedabf457e6bb4dbba74b8f Mon Sep 17 00:00:00 2001 From: Lens Date: Fri, 3 Feb 2023 10:24:16 -0800 Subject: [PATCH 55/97] [IR] Create `ir_static_reference()` to reference an existing static --- src/codegen/intermediate_representation.c | 16 ++++++++++++++++ src/codegen/intermediate_representation.h | 3 +++ 2 files changed, 19 insertions(+) diff --git a/src/codegen/intermediate_representation.c b/src/codegen/intermediate_representation.c index 234700cec..76be58de8 100644 --- a/src/codegen/intermediate_representation.c +++ b/src/codegen/intermediate_representation.c @@ -645,6 +645,22 @@ IRInstruction *ir_create_static(CodegenContext *context, Type *type, span name) return ref; } +IRInstruction *ir_static_reference(CodegenContext *context, span name) { + foreach_ptr(IRStaticVariable *, v, context->static_vars) { + if (string_eq(v->name, name)) { + INSTRUCTION(ref, IR_STATIC_REF); + ref->static_ref = v; + ref->type = ast_make_type_pointer(context->ast, v->type->source_location, v->type); + // TODO: `v->reference` may need to become list of references? I think this is why + // optimisation is broken. + INSERT(ref); + return ref; + } + } + + ICE("Can not create static reference to %S as it does not exist in the codegen context.", name); +} + IRInstruction *ir_stack_allocate(CodegenContext *context, Type *type) { INSTRUCTION(alloca, IR_ALLOCA); alloca->alloca.size = type_sizeof(type); diff --git a/src/codegen/intermediate_representation.h b/src/codegen/intermediate_representation.h index 2baa88edd..95ecb9e33 100644 --- a/src/codegen/intermediate_representation.h +++ b/src/codegen/intermediate_representation.h @@ -327,6 +327,9 @@ IRInstruction *ir_create_static Type *type, span name); +/// Create a reference to a variable with static storage duration. +IRInstruction *ir_static_reference(CodegenContext *context, span name); + IRInstruction *ir_stack_allocate (CodegenContext *context, Type *type); From b2d090dee4d5d50c4e8f08bea8e849182ffdd027 Mon Sep 17 00:00:00 2001 From: Lens Date: Fri, 3 Feb 2023 10:24:44 -0800 Subject: [PATCH 56/97] [Codegen] Use `ir_static_reference()` to create many references --- src/codegen.c | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/src/codegen.c b/src/codegen.c index b425ef33f..aa9dd3d2a 100644 --- a/src/codegen.c +++ b/src/codegen.c @@ -357,35 +357,39 @@ static void codegen_expr(CodegenContext *ctx, Node *expr) { return; } - /// Anything else is an error (I think?). - /// Otherwise, actually emit the LHS and load from that. - /*codegen_expr(ctx, lhs); - expr->ir = ir_store(ctx, lhs->ir, rhs->ir);*/ - return; + ICE("Invalid type of lhs of assignment (should have been caught by sema): %T", lhs->type); } - // TODO: Just use lhs operand of subscript operator when right hand - // side is a compile-time-known zero value. - - /// Emit the operands. - codegen_expr(ctx, lhs); - codegen_expr(ctx, rhs); if (expr->binary.op == TK_LBRACK) { - // An array subscript needs multiplied by the sizeof the array's base type. + codegen_expr(ctx, rhs); + + IRInstruction *subs_lhs = NULL; + if (lhs->kind == NODE_VARIABLE_REFERENCE) { + subs_lhs = ir_static_reference(ctx, as_span(lhs->var->name)); + } else ERR("LHS of subscript operator has invalid kind %d", lhs->kind); + + // TODO: Just use lhs operand of subscript operator when right hand + // side is a compile-time-known zero value. + IRInstruction *scaled_rhs = NULL; + // An array subscript needs multiplied by the sizeof the array's base type. if (lhs->type->kind == TYPE_ARRAY) { IRInstruction *immediate = ir_immediate(ctx, t_integer, type_sizeof(lhs->type->array.of)); scaled_rhs = ir_mul(ctx, rhs->ir, immediate); } - // A pointer subscript needs multiplied by the sizeof the pointer's base type. + // A pointer subscript needs multiplied by the sizeof the pointer's base type. else if (lhs->type->kind == TYPE_POINTER) { IRInstruction *immediate = ir_immediate(ctx, t_integer, type_sizeof(lhs->type->pointer.to)); scaled_rhs = ir_mul(ctx, rhs->ir, immediate); } - expr->ir = ir_add(ctx, lhs->ir, scaled_rhs); + expr->ir = ir_add(ctx, subs_lhs, scaled_rhs); return; } + /// Emit the operands. + codegen_expr(ctx, lhs); + codegen_expr(ctx, rhs); + /// Emit the binary instruction. switch (expr->binary.op) { default: ICE("Cannot emit binary expression of type %d", expr->binary.op); From b14fa1c89626fb7ae543f64201a0812212fdc614 Mon Sep 17 00:00:00 2001 From: Lens Date: Fri, 3 Feb 2023 10:34:10 -0800 Subject: [PATCH 57/97] [Bugfix/x86_64] Add missing parameter... --- src/codegen/x86_64/arch_x86_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/codegen/x86_64/arch_x86_64.c b/src/codegen/x86_64/arch_x86_64.c index 4e2d11d19..098e9b5f2 100644 --- a/src/codegen/x86_64/arch_x86_64.c +++ b/src/codegen/x86_64/arch_x86_64.c @@ -1057,7 +1057,7 @@ static void emit_instruction(CodegenContext *context, IRInstruction *inst) { } // Restore stack pointer from stack alignment, if necessary. if (regs_pushed_count & 0b1) { - femit(context, I_ADD, IMMEDIATE_TO_REGISTER, (int64_t)8, REG_RSP); + femit(context, I_ADD, IMMEDIATE_TO_REGISTER, (int64_t)8, REG_RSP, r64); } femit(context, I_MOV, REGISTER_TO_REGISTER, REG_RAX, inst->result); From 8281a441b44a9b2f6d5912f276e901f71ec9ca94 Mon Sep 17 00:00:00 2001 From: Lens Date: Fri, 3 Feb 2023 11:45:36 -0800 Subject: [PATCH 58/97] [Minor/Codegen] Add TODO comment --- src/codegen.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/codegen.c b/src/codegen.c index aa9dd3d2a..e940b23a7 100644 --- a/src/codegen.c +++ b/src/codegen.c @@ -365,6 +365,7 @@ static void codegen_expr(CodegenContext *ctx, Node *expr) { IRInstruction *subs_lhs = NULL; if (lhs->kind == NODE_VARIABLE_REFERENCE) { + // TODO: Handle local variable references, somehow. How can we tell if it's local/static? subs_lhs = ir_static_reference(ctx, as_span(lhs->var->name)); } else ERR("LHS of subscript operator has invalid kind %d", lhs->kind); From ec35e50a119b39edae9607c3c8b2570611d2c013 Mon Sep 17 00:00:00 2001 From: Lens Date: Fri, 3 Feb 2023 11:45:53 -0800 Subject: [PATCH 59/97] [Codegen/x86_64] Begin move away from `va_list` towards typed params --- src/codegen/x86_64/arch_x86_64.c | 205 ++++++++++++++----------------- 1 file changed, 91 insertions(+), 114 deletions(-) diff --git a/src/codegen/x86_64/arch_x86_64.c b/src/codegen/x86_64/arch_x86_64.c index 098e9b5f2..c3a0a8eed 100644 --- a/src/codegen/x86_64/arch_x86_64.c +++ b/src/codegen/x86_64/arch_x86_64.c @@ -463,15 +463,12 @@ static void femit_reg_to_mem(CodegenContext *context, enum Instruction inst, va_ } } -static void femit_reg_to_reg(CodegenContext *context, enum Instruction inst, va_list args) { - RegisterDescriptor source_register = va_arg(args, RegisterDescriptor); - RegisterDescriptor destination_register = va_arg(args, RegisterDescriptor); - +static void femit_reg_to_reg(CodegenContext *context, enum Instruction inst, RegisterDescriptor source_register, RegisterDescriptor destination_register) { const char *mnemonic = instruction_mnemonic(context, inst); const char *source = register_name(source_register); const char *destination = register_name(destination_register); - // Optimise away moves from a register to itself + // Always optimise away moves from a register to itself if (inst == I_MOV && source_register == destination_register) return; switch (context->dialect) { @@ -519,13 +516,9 @@ static void femit_reg_to_name(CodegenContext *context, enum Instruction inst, va } } -static void femit_mem(CodegenContext *context, enum Instruction inst, va_list args) { - int64_t offset = va_arg(args, int64_t); - RegisterDescriptor address_register = va_arg(args, RegisterDescriptor); - +static void femit_mem(CodegenContext *context, enum Instruction inst, int64_t offset, RegisterDescriptor address_register) { const char *mnemonic = instruction_mnemonic(context, inst); const char *address = register_name(address_register); - switch (context->dialect) { case CG_ASM_DIALECT_ATT: fprint(context->code, " %s %D(%%%s)\n", @@ -539,12 +532,10 @@ static void femit_mem(CodegenContext *context, enum Instruction inst, va_list ar } } -static void femit_reg(CodegenContext *context, enum Instruction inst, va_list args) { - RegisterDescriptor source_register = va_arg(args, RegisterDescriptor); - +static void femit_reg(CodegenContext *context, enum Instruction inst, RegisterDescriptor reg) { + if (inst == I_JMP || inst == I_CALL) ICE("Use femit_indirect_branch() to emit an indirect branch"); const char *mnemonic = instruction_mnemonic(context, inst); - const char *source = register_name(source_register); - + const char *source = register_name(reg); switch (context->dialect) { case CG_ASM_DIALECT_ATT: fprint(context->code, " %s %%%s\n", @@ -558,11 +549,8 @@ static void femit_reg(CodegenContext *context, enum Instruction inst, va_list ar } } -static void femit_imm(CodegenContext *context, enum Instruction inst, va_list args) { - int64_t immediate = va_arg(args, int64_t); - +static void femit_imm(CodegenContext *context, enum Instruction inst, int64_t immediate) { const char *mnemonic = instruction_mnemonic(context, inst); - switch (context->dialect) { case CG_ASM_DIALECT_ATT: fprint(context->code, " %s $%D\n", @@ -576,12 +564,9 @@ static void femit_imm(CodegenContext *context, enum Instruction inst, va_list ar } } -static void femit_indirect_branch(CodegenContext *context, enum Instruction inst, va_list args) { - RegisterDescriptor address_register = va_arg(args, RegisterDescriptor); - +static void femit_indirect_branch(CodegenContext *context, enum Instruction inst, RegisterDescriptor address_register) { const char *mnemonic = instruction_mnemonic(context, inst); const char *address = register_name(address_register); - switch (context->dialect) { case CG_ASM_DIALECT_ATT: fprint(context->code, " %s *%%%s\n", @@ -622,7 +607,6 @@ static void femit case IMMEDIATE_TO_MEMORY: femit_imm_to_mem(context, instruction, args); break; case MEMORY_TO_REGISTER: femit_mem_to_reg(context, instruction, args); break; case REGISTER_TO_MEMORY: femit_reg_to_mem(context, instruction, args); break; - case REGISTER_TO_REGISTER: femit_reg_to_reg(context, instruction, args); break; case REGISTER_TO_NAME: femit_reg_to_name(context, instruction, args); break; case NAME_TO_REGISTER: femit_name_to_reg(context, instruction, args); break; } @@ -642,16 +626,6 @@ static void femit switch (operands) { default: ICE("femit() only accepts MEMORY_TO_REGISTER or REGISTER_TO_REGISTER operand type with IMUL instruction."); case MEMORY_TO_REGISTER: femit_mem_to_reg(context, instruction, args); break; - case REGISTER_TO_REGISTER: femit_reg_to_reg(context, instruction, args); break; - } - } break; - - case I_IDIV: { - enum InstructionOperands_x86_64 operand = va_arg(args, enum InstructionOperands_x86_64); - switch (operand) { - default: ICE("femit() only accepts MEMORY or REGISTER operand type with IDIV instruction."); - case MEMORY: femit_mem(context, instruction, args); break; - case REGISTER: femit_reg(context, instruction, args); break; } } break; @@ -687,7 +661,6 @@ static void femit enum InstructionOperands_x86_64 operand = va_arg(args, enum InstructionOperands_x86_64); switch (operand) { default: ICE("femit() only accepts REGISTER or NAME operand type with CALL/JMP instruction."); - case REGISTER: femit_indirect_branch(context, instruction, args); break; case NAME: { char *label = va_arg(args, char *); const char *mnemonic = instruction_mnemonic(context, instruction); @@ -710,19 +683,11 @@ static void femit enum InstructionOperands_x86_64 operand = va_arg(args, enum InstructionOperands_x86_64); switch (operand) { default: ICE("femit() only accepts REGISTER, MEMORY, or IMMEDIATE operand type with PUSH instruction."); - case REGISTER: femit_reg(context, instruction, args); break; - case MEMORY: femit_mem(context, instruction, args); break; - case IMMEDIATE: femit_imm(context, instruction, args); break; - } - } break; - - case I_NOT: - case I_POP: { - enum InstructionOperands_x86_64 operand = va_arg(args, enum InstructionOperands_x86_64); - switch (operand) { - default: ICE("femit() only accepts REGISTER or MEMORY operand type with POP instruction."); - case REGISTER: femit_reg(context, instruction, args); break; - case MEMORY: femit_mem(context, instruction, args); break; + case MEMORY: { + int64_t offset = va_arg(args, int64_t); + RegisterDescriptor r = va_arg(args, RegisterDescriptor); + femit_mem(context, instruction, offset, r); + } break; } } break; @@ -730,7 +695,6 @@ static void femit enum InstructionOperands_x86_64 operands = va_arg(args, enum InstructionOperands_x86_64); switch (operands) { default: ICE("femit(): invalid operands for XCHG instruction: %d", operands); - case REGISTER_TO_REGISTER: femit_reg_to_reg(context, instruction, args); break; case MEMORY_TO_REGISTER: femit_mem_to_reg(context, instruction, args); break; } } break; @@ -880,8 +844,9 @@ static RegisterDescriptor codegen_comparison // Zero out result register. // Perform the comparison. - femit(cg_context, I_CMP, REGISTER_TO_REGISTER, rhs, lhs); + femit_reg_to_reg(cg_context, I_CMP, rhs, lhs); femit(cg_context, I_MOV, IMMEDIATE_TO_REGISTER, (int64_t)0, result, r32); + // femit_imm_to_reg(cg_context, I_MOV, 0, result, r32); femit(cg_context, I_SETCC, type, result); return result; @@ -919,8 +884,9 @@ static void codegen_prologue(CodegenContext *cg_context, IRFunction *f) { case FRAME_FULL: { size_t locals_offset = f->locals_total_size; - femit(cg_context, I_PUSH, REGISTER, REG_RBP); - femit(cg_context, I_MOV, REGISTER_TO_REGISTER, REG_RSP, REG_RBP); + femit_reg(cg_context, I_PUSH, REG_RBP); + femit_reg_to_reg(cg_context, I_MOV, REG_RSP, REG_RBP); + // femit_reg_to_reg(context, I_MOV, REG_RSP, REG_RBP); switch (cg_context->call_convention) { ///> Even if the called function has fewer than 4 parameters, these 4 ///> stack locations are effectively owned by the called function, and @@ -934,6 +900,7 @@ static void codegen_prologue(CodegenContext *cg_context, IRFunction *f) { default: ICE("Unknown calling convention"); } femit(cg_context, I_SUB, IMMEDIATE_TO_REGISTER, locals_offset, REG_RSP, r64); + // femit_imm_to_reg(context, I_SUB, locals_offset, REG_RSP, r64); } break; case FRAME_MINIMAL: { @@ -941,9 +908,10 @@ static void codegen_prologue(CodegenContext *cg_context, IRFunction *f) { /// See comment above. case CG_CALL_CONV_MSWIN: femit(cg_context, I_SUB, IMMEDIATE_TO_REGISTER, (int64_t)(4 * 8 + 8), REG_RSP, r64); + // femit_imm_to_reg(context, I_SUB, 4 * 8 + 8, REG_RSP, r64); break; case CG_CALL_CONV_LINUX: - femit(cg_context, I_PUSH, REGISTER, REG_RBP); + femit_reg(cg_context, I_PUSH, REG_RBP); break; default: ICE("Unknown calling convention"); } @@ -958,8 +926,8 @@ static void codegen_epilogue(CodegenContext *cg_context, IRFunction *f) { case FRAME_NONE: break; case FRAME_FULL: { - femit(cg_context, I_MOV, REGISTER_TO_REGISTER, REG_RBP, REG_RSP); - femit(cg_context, I_POP, REGISTER, REG_RBP); + femit_reg_to_reg(cg_context, I_MOV, REG_RBP, REG_RSP); + femit_reg(cg_context, I_POP, REG_RBP); } break; case FRAME_MINIMAL: { @@ -967,9 +935,10 @@ static void codegen_epilogue(CodegenContext *cg_context, IRFunction *f) { /// See comment above. case CG_CALL_CONV_MSWIN: femit(cg_context, I_ADD, IMMEDIATE_TO_REGISTER, (int64_t)(4 * 8 + 8), REG_RSP, r64); + // femit_imm_to_reg(context, I_ADD, 4 * 8 + 8, REG_RSP, r64); break; case CG_CALL_CONV_LINUX: - femit(cg_context, I_POP, REGISTER, REG_RBP); + femit_reg(cg_context, I_POP, REG_RBP); break; default: ICE("Unknown calling convention"); } @@ -990,27 +959,31 @@ static void emit_instruction(CodegenContext *context, IRInstruction *inst) { // Do this in a pass before-hand or something. if (inst->imm <= UINT32_MAX) { femit(context, I_MOV, IMMEDIATE_TO_REGISTER, inst->imm, inst->result, r32); + // femit_imm_to_reg(context, I_MOV, inst->imm, inst->result, r32); } else if (inst->imm <= UINT64_MAX) { femit(context, I_MOV, IMMEDIATE_TO_REGISTER, inst->imm, inst->result, r64); + // femit_imm_to_reg(context, I_MOV, inst->imm, inst->result, r64); } else { ICE("Unsupported integer literal immediate on x86_64 (out of range)"); } } else { if (type_sizeof(inst->type) <= 4) { femit(context, I_MOV, IMMEDIATE_TO_REGISTER, inst->imm, inst->result, r32); + // femit_imm_to_reg(context, I_MOV, inst->imm, inst->result, r32); } else if (type_sizeof(inst->type) <= 8) { femit(context, I_MOV, IMMEDIATE_TO_REGISTER, inst->imm, inst->result, r64); + // femit_imm_to_reg(context, I_MOV, inst->imm, inst->result, r64); } else { ICE("Unsupported immediate size on x86_64: %Z", type_sizeof(inst->type)); } } break; case IR_NOT: - femit(context, I_NOT, REGISTER, inst->operand->result); - femit(context, I_MOV, REGISTER_TO_REGISTER, inst->operand->result, inst->result); + femit_reg(context, I_NOT, inst->operand->result); + femit_reg_to_reg(context, I_MOV, inst->operand->result, inst->result); break; case IR_COPY: - femit(context, I_MOV, REGISTER_TO_REGISTER, inst->operand->result, inst->result); + femit_reg_to_reg(context, I_MOV, inst->operand->result, inst->result); break; case IR_CALL: { // Save caller saved registers used in caller function. @@ -1021,7 +994,7 @@ static void emit_instruction(CodegenContext *context, IRInstruction *inst) { if (inst->call.tail_call) { // Restore the frame pointer if we have one. codegen_epilogue(context, inst->parent_block->function); - if (inst->call.is_indirect) femit(context, I_JMP, REGISTER, inst->call.callee_instruction->result); + if (inst->call.is_indirect) femit_indirect_branch(context, I_JMP, inst->call.callee_instruction->result); else femit(context, I_JMP, NAME, inst->call.callee_function->name.data); if (inst->parent_block) inst->parent_block->done = true; break; @@ -1038,36 +1011,38 @@ static void emit_instruction(CodegenContext *context, IRInstruction *inst) { // Align stack pointer before call, if necessary. if (regs_pushed_count & 0b1) { femit(context, I_SUB, IMMEDIATE_TO_REGISTER, (int64_t)8, REG_RSP, r64); + // femit_imm_to_reg(context, I_SUB, 8, REG_RSP, r64); } for (Register i = REG_RAX + 1; i < sizeof(func_regs) * 8; ++i) { - // TODO: Don't push registers that are used for arguments. if (func_regs & (1 << i) && is_caller_saved(i)) { - femit(context, I_PUSH, REGISTER, i); + // TODO: Don't push registers that are used for arguments. + femit_reg(context, I_PUSH, i); } } - if (inst->call.is_indirect) femit(context, I_CALL, REGISTER, inst->call.callee_instruction->result); + if (inst->call.is_indirect) femit_indirect_branch(context, I_CALL, inst->call.callee_instruction->result); else femit(context, I_CALL, NAME, inst->call.callee_function->name.data); + // femit_name(context, I_CALL, inst->call.callee_function->name.data); // Restore caller saved registers used in called function. for (Register i = sizeof(func_regs) * 8 - 1; i > REG_RAX; --i) { if (func_regs & (1 << i) && is_caller_saved(i)) { - femit(context, I_POP, REGISTER, i); + femit_reg(context, I_POP, i); } } // Restore stack pointer from stack alignment, if necessary. if (regs_pushed_count & 0b1) { femit(context, I_ADD, IMMEDIATE_TO_REGISTER, (int64_t)8, REG_RSP, r64); + // femit_imm_to_reg(context, I_MOV, 8, inst->result, r64); } - femit(context, I_MOV, REGISTER_TO_REGISTER, - REG_RAX, inst->result); + femit_reg_to_reg(context, I_MOV, REG_RAX, inst->result); } break; case IR_RETURN: // Restore callee-saved registers used in the function. for (Register i = sizeof(inst->parent_block->function->registers_in_use) * 8 - 1; i > 0; --i) { if (inst->parent_block->function->registers_in_use & ((size_t)1 << i) && is_callee_saved(i)) { - femit(context, I_POP, REGISTER, i); + femit_reg(context, I_POP, i); } } codegen_epilogue(context, inst->parent_block->function); @@ -1086,9 +1061,7 @@ static void emit_instruction(CodegenContext *context, IRInstruction *inst) { case IR_BRANCH_CONDITIONAL: { IRBranchConditional *branch = &inst->cond_br; - femit(context, I_TEST, REGISTER_TO_REGISTER, - branch->condition->result, - branch->condition->result); + femit_reg_to_reg(context, I_TEST, branch->condition->result, branch->condition->result); /// If either target is the next block, arrange the jumps in such a way /// that we can save one and simply fallthrough to the next block. @@ -1103,76 +1076,67 @@ static void emit_instruction(CodegenContext *context, IRInstruction *inst) { if (optimise && inst->parent_block) inst->parent_block->done = true; } break; - case IR_LE: - codegen_comparison(context, COMPARE_LE, inst->lhs->result, inst->rhs->result, inst->result); - break; - case IR_LT: - codegen_comparison(context, COMPARE_LT, inst->lhs->result, inst->rhs->result, inst->result); - break; - case IR_GE: - codegen_comparison(context, COMPARE_GE, inst->lhs->result, inst->rhs->result, inst->result); - break; - case IR_GT: - codegen_comparison(context, COMPARE_GT, inst->lhs->result, inst->rhs->result, inst->result); - break; - case IR_EQ: - codegen_comparison(context, COMPARE_EQ, inst->lhs->result, inst->rhs->result, inst->result); - break; - case IR_NE: - codegen_comparison(context, COMPARE_NE, inst->lhs->result, inst->rhs->result, inst->result); - break; + case IR_LE: codegen_comparison(context, COMPARE_LE, inst->lhs->result, inst->rhs->result, inst->result); break; + case IR_LT: codegen_comparison(context, COMPARE_LT, inst->lhs->result, inst->rhs->result, inst->result); break; + case IR_GE: codegen_comparison(context, COMPARE_GE, inst->lhs->result, inst->rhs->result, inst->result); break; + case IR_GT: codegen_comparison(context, COMPARE_GT, inst->lhs->result, inst->rhs->result, inst->result); break; + case IR_EQ: codegen_comparison(context, COMPARE_EQ, inst->lhs->result, inst->rhs->result, inst->result); break; + case IR_NE: codegen_comparison(context, COMPARE_NE, inst->lhs->result, inst->rhs->result, inst->result); break; case IR_ADD: - femit(context, I_ADD, REGISTER_TO_REGISTER, inst->rhs->result, inst->lhs->result); - femit(context, I_MOV, REGISTER_TO_REGISTER, inst->lhs->result, inst->result); + femit_reg_to_reg(context, I_ADD, inst->rhs->result, inst->lhs->result); + femit_reg_to_reg(context, I_MOV, inst->lhs->result, inst->result); break; case IR_SUB: - femit(context, I_SUB, REGISTER_TO_REGISTER, inst->rhs->result, inst->lhs->result); - femit(context, I_MOV, REGISTER_TO_REGISTER, inst->lhs->result, inst->result); + femit_reg_to_reg(context, I_SUB, inst->rhs->result, inst->lhs->result); + femit_reg_to_reg(context, I_MOV, inst->lhs->result, inst->result); break; case IR_MUL: - femit(context, I_IMUL, REGISTER_TO_REGISTER, inst->lhs->result, inst->rhs->result); - femit(context, I_MOV, REGISTER_TO_REGISTER, inst->rhs->result, inst->result); + femit_reg_to_reg(context, I_IMUL, inst->lhs->result, inst->rhs->result); + femit_reg_to_reg(context, I_MOV, inst->rhs->result, inst->result); break; case IR_DIV: ASSERT(inst->rhs->result != REG_RAX, "Register allocation must not allocate RAX to divisor."); - femit(context, I_MOV, REGISTER_TO_REGISTER, inst->lhs->result, REG_RAX); + femit_reg_to_reg(context, I_MOV, inst->lhs->result, REG_RAX); femit(context, I_CQO); - femit(context, I_IDIV, REGISTER, inst->rhs->result); - femit(context, I_MOV, REGISTER_TO_REGISTER, REG_RAX, inst->result); + femit_reg(context, I_IDIV, inst->rhs->result); + femit_reg_to_reg(context, I_MOV, REG_RAX, inst->result); break; case IR_MOD: ASSERT(inst->rhs->result != REG_RAX, "Register allocation must not allocate RAX to divisor."); - femit(context, I_MOV, REGISTER_TO_REGISTER, inst->lhs->result, REG_RAX); + femit_reg_to_reg(context, I_MOV, inst->rhs->result, REG_RAX); femit(context, I_CQO); - femit(context, I_IDIV, REGISTER, inst->rhs->result); - femit(context, I_MOV, REGISTER_TO_REGISTER, REG_RDX, inst->result); + femit_reg(context, I_IDIV, inst->rhs->result); + femit_reg_to_reg(context, I_MOV, REG_RDX, inst->result); break; case IR_SHL: ASSERT(inst->lhs->result != REG_RCX, "Register allocation must not allocate RCX to result of lhs of shift."); - femit(context, I_MOV, REGISTER_TO_REGISTER, inst->rhs->result, REG_RCX); + femit_reg_to_reg(context, I_MOV, inst->rhs->result, REG_RCX); femit(context, I_SHL, REGISTER, inst->lhs->result); - femit(context, I_MOV, REGISTER_TO_REGISTER, inst->lhs->result, inst->result); + // TODO: femit_shift or something + femit_reg_to_reg(context, I_MOV, inst->lhs->result, inst->result); break; case IR_SHR: - femit(context, I_MOV, REGISTER_TO_REGISTER, inst->rhs->result, REG_RCX); + femit_reg_to_reg(context, I_MOV, inst->rhs->result, REG_RCX); femit(context, I_SHR, REGISTER, inst->lhs->result); - femit(context, I_MOV, REGISTER_TO_REGISTER, inst->lhs->result, inst->result); + // TODO: femit_shift or something + femit_reg_to_reg(context, I_MOV, inst->lhs->result, inst->result); break; case IR_SAR: - femit(context, I_MOV, REGISTER_TO_REGISTER, inst->rhs->result, REG_RCX); + femit_reg_to_reg(context, I_MOV, inst->rhs->result, REG_RCX); femit(context, I_SAR, REGISTER, inst->lhs->result); - femit(context, I_MOV, REGISTER_TO_REGISTER, inst->lhs->result, inst->result); + // TODO: femit_shift or something + femit_reg_to_reg(context, I_MOV, inst->lhs->result, inst->result); break; case IR_AND: - femit(context, I_AND, REGISTER_TO_REGISTER, inst->lhs->result, inst->rhs->result); - femit(context, I_MOV, REGISTER_TO_REGISTER, inst->rhs->result, inst->result); + femit_reg_to_reg(context, I_AND, inst->lhs->result, inst->rhs->result); + femit_reg_to_reg(context, I_MOV, inst->rhs->result, inst->result); break; case IR_OR: - femit(context, I_OR, REGISTER_TO_REGISTER, inst->lhs->result, inst->rhs->result); - femit(context, I_MOV, REGISTER_TO_REGISTER, inst->rhs->result, inst->result); + femit_reg_to_reg(context, I_OR, inst->lhs->result, inst->rhs->result); + femit_reg_to_reg(context, I_MOV, inst->rhs->result, inst->result); break; case IR_LOAD: @@ -1185,13 +1149,16 @@ static void emit_instruction(CodegenContext *context, IRInstruction *inst) { size = regsize_from_bytes(type_sizeof(t_pointer)); else size = regsize_from_bytes(type_sizeof(inst->operand->type)); // TODO: Use `movzx`/`movzbl` - if (size == r8 || size == r16) femit(context, I_XOR, REGISTER_TO_REGISTER, inst->result, inst->result); + if (size == r8 || size == r16) femit_reg_to_reg(context, I_XOR, inst->result, inst->result); + // femit_reg_to_reg(context, I_XOR, inst->result, inst->result); if (inst->operand->type->kind == TYPE_ARRAY || inst->operand->type->pointer.to->kind == TYPE_ARRAY) femit(context, I_LEA, NAME_TO_REGISTER, REG_RIP, inst->operand->static_ref->name.data, inst->result, size); + // femit_name_to_reg(context, I_LEA, REG_RIP, inst->operand->static_ref->name.data, inst->result, size); else femit(context, I_MOV, NAME_TO_REGISTER, REG_RIP, inst->operand->static_ref->name.data, inst->result, size); + // femit_name_to_reg(context, I_MOV, REG_RIP, inst->operand->static_ref->name.data, inst->result, size); } /// Load from a local. @@ -1202,13 +1169,15 @@ static void emit_instruction(CodegenContext *context, IRInstruction *inst) { size = regsize_from_bytes(type_sizeof(t_pointer)); else size = regsize_from_bytes(inst->operand->alloca.size); // TODO: Use `movzx`/`movzbl` - if (size == r8 || size == r16) femit(context, I_XOR, REGISTER_TO_REGISTER, inst->result, inst->result); + if (size == r8 || size == r16) femit_reg_to_reg(context, I_XOR, inst->result, inst->result); if (inst->operand->type->kind == TYPE_ARRAY || inst->operand->type->pointer.to->kind == TYPE_ARRAY) femit(context, I_LEA, MEMORY_TO_REGISTER, REG_RBP, (int64_t)-inst->operand->alloca.offset, inst->result, size); + // femit_mem_to_reg(context, I_LEA, REG_RBP, -inst->operand->alloca.offset, inst->result, size); else femit(context, I_MOV, MEMORY_TO_REGISTER, REG_RBP, (int64_t)-inst->operand->alloca.offset, inst->result, size); + // femit_mem_to_reg(context, I_MOV, REG_RBP, -inst->operand->alloca.offset, inst->result, size); } /// Load from a pointer @@ -1219,13 +1188,15 @@ static void emit_instruction(CodegenContext *context, IRInstruction *inst) { // TODO: We are "supposed" to be loading sizeof pointed to type // here, but that causes segfaults when handling arrays. else size = regsize_from_bytes(type_sizeof(inst->operand->type)); - if (size == r8 || size == r16) femit(context, I_XOR, REGISTER_TO_REGISTER, inst->result, inst->result); + if (size == r8 || size == r16) femit_reg_to_reg(context, I_XOR, inst->result, inst->result); if (inst->operand->type->kind == TYPE_ARRAY) femit(context, I_LEA, MEMORY_TO_REGISTER, inst->operand->result, (int64_t)0, inst->result, size); + // femit_mem_to_reg(context, I_LEA, inst->operand->result, 0, inst->result, size); else femit(context, I_MOV, MEMORY_TO_REGISTER, inst->operand->result, (int64_t)0, inst->result, size); + // femit_mem_to_reg(context, I_MOV, inst->operand->result, 0, inst->result, size); } break; @@ -1235,6 +1206,7 @@ static void emit_instruction(CodegenContext *context, IRInstruction *inst) { enum RegSize size = regsize_from_bytes(type_sizeof(inst->store.addr->static_ref->type)); femit(context, I_MOV, REGISTER_TO_NAME, inst->store.value->result, size, REG_RIP, inst->store.addr->static_ref->name.data); + // femit_reg_to_name(context, I_MOV, inst->store.value->result, size, REG_RIP, inst->store.addr->static_ref->name.data); } /// Store to a local. @@ -1243,6 +1215,7 @@ static void emit_instruction(CodegenContext *context, IRInstruction *inst) { femit(context, I_MOV, REGISTER_TO_MEMORY, inst->store.value->result, size, REG_RBP, (int64_t)-inst->store.addr->alloca.offset); + // femit_reg_to_mem(context, I_MOV, inst->store.value->result, size, REG_RBP, -inst->store.addr->alloca.offset); break; } @@ -1252,17 +1225,21 @@ static void emit_instruction(CodegenContext *context, IRInstruction *inst) { femit(context, I_MOV, REGISTER_TO_MEMORY, inst->store.value->result, size, inst->store.addr->result, (int64_t)0); + // femit_reg_to_mem(context, I_MOV, inst->store.value->result, size, inst->store.addr->result, 0); } break; case IR_STATIC_REF: if (inst->result) femit(context, I_LEA, NAME_TO_REGISTER, REG_RIP, inst->static_ref->name.data, inst->result, r64); + // femit_name_to_reg(context, I_LEA, REG_RIP, inst->static_ref->name.data, inst->result, r64); break; case IR_FUNC_REF: if (inst->result) femit(context, I_LEA, NAME_TO_REGISTER, REG_RIP, inst->function_ref->name.data, inst->result, r64); + // femit_name_to_reg(context, I_LEA, REG_RIP, inst->function_ref->name.data, inst->result, r64); break; case IR_ALLOCA: femit(context, I_LEA, MEMORY_TO_REGISTER, REG_RBP, (int64_t)-inst->alloca.offset, inst->result, r64); + // femit_mem_to_reg(context, I_LEA, REG_RBP, (int64_t)-inst->alloca.offset, inst->result, r64); break; default: @@ -1294,7 +1271,7 @@ void emit_function(CodegenContext *context, IRFunction *function) { // Save all callee-saved registers in use in the function. for (Register i = 1; i < sizeof(function->registers_in_use) * 8; ++i) { if ((size_t)function->registers_in_use & ((size_t)1 << i) && is_callee_saved(i)) { - femit(context, I_PUSH, REGISTER, i); + femit_reg(context, I_PUSH, i); } } list_foreach (IRBlock*, block, function->blocks) { emit_block(context, block); } From 6574a1db12b800168f620d178a80a0cc0b72433a Mon Sep 17 00:00:00 2001 From: Lens Date: Fri, 3 Feb 2023 11:58:17 -0800 Subject: [PATCH 60/97] [Codegen/x86_64] Move further away from variable args... Still more todo; it's mostly memory interaction stuff --- src/codegen/x86_64/arch_x86_64.c | 137 +++++++++++++------------------ 1 file changed, 58 insertions(+), 79 deletions(-) diff --git a/src/codegen/x86_64/arch_x86_64.c b/src/codegen/x86_64/arch_x86_64.c index c3a0a8eed..6064ad56e 100644 --- a/src/codegen/x86_64/arch_x86_64.c +++ b/src/codegen/x86_64/arch_x86_64.c @@ -306,11 +306,7 @@ static enum IndirectJumpType negate_jump(enum IndirectJumpType j) { } } -static void femit_imm_to_reg(CodegenContext *context, enum Instruction inst, va_list args) { - int64_t immediate = va_arg(args, int64_t); - RegisterDescriptor destination_register = va_arg(args, RegisterDescriptor); - enum RegSize size = va_arg(args, enum RegSize); - +static void femit_imm_to_reg(CodegenContext *context, enum Instruction inst, int64_t immediate, RegisterDescriptor destination_register, enum RegSize size) { const char *mnemonic = instruction_mnemonic(context, inst); const char *destination = NULL; switch (size) { @@ -532,8 +528,48 @@ static void femit_mem(CodegenContext *context, enum Instruction inst, int64_t of } } +static void femit_reg_shift(CodegenContext *context, enum Instruction inst, RegisterDescriptor register_to_shift) { + const char *mnemonic = instruction_mnemonic(context, inst); + const char *cl = register_name_8(REG_RCX); + switch (context->dialect) { + case CG_ASM_DIALECT_ATT: + fprint(context->code, " %s %%%s, %%%s\n", + mnemonic, cl, register_name(register_to_shift)); + break; + case CG_ASM_DIALECT_INTEL: + fprint(context->code, " %s %s, %s\n", + mnemonic, register_name(register_to_shift), cl); + break; + default: ICE("ERROR: femit(): Unsupported dialect %d for shift instruction", context->dialect); + } +} + +/// You should probably use `femit_reg` +static void femit_indirect_branch(CodegenContext *context, enum Instruction inst, RegisterDescriptor address_register) { + const char *mnemonic = instruction_mnemonic(context, inst); + const char *address = register_name(address_register); + switch (context->dialect) { + case CG_ASM_DIALECT_ATT: + fprint(context->code, " %s *%%%s\n", + mnemonic, address); + break; + case CG_ASM_DIALECT_INTEL: + fprint(context->code, " %s %s\n", + mnemonic, address); + break; + default: ICE("ERROR: femit_indirect_branch(): Unsupported dialect %d", context->dialect); + } +} + static void femit_reg(CodegenContext *context, enum Instruction inst, RegisterDescriptor reg) { - if (inst == I_JMP || inst == I_CALL) ICE("Use femit_indirect_branch() to emit an indirect branch"); + if (inst == I_JMP || inst == I_CALL) { + femit_indirect_branch(context, inst, reg); + return; + } + if (inst == I_SAL || inst == I_SAR || inst == I_SHL || inst == I_SHR) { + femit_reg_shift(context, inst, reg); + return; + } const char *mnemonic = instruction_mnemonic(context, inst); const char *source = register_name(reg); switch (context->dialect) { @@ -564,22 +600,6 @@ static void femit_imm(CodegenContext *context, enum Instruction inst, int64_t im } } -static void femit_indirect_branch(CodegenContext *context, enum Instruction inst, RegisterDescriptor address_register) { - const char *mnemonic = instruction_mnemonic(context, inst); - const char *address = register_name(address_register); - switch (context->dialect) { - case CG_ASM_DIALECT_ATT: - fprint(context->code, " %s *%%%s\n", - mnemonic, address); - break; - case CG_ASM_DIALECT_INTEL: - fprint(context->code, " %s %s\n", - mnemonic, address); - break; - default: ICE("ERROR: femit_indirect_branch(): Unsupported dialect %d", context->dialect); - } -} - static void femit (CodegenContext *context, enum Instruction instruction, @@ -603,7 +623,6 @@ static void femit enum InstructionOperands_x86_64 operands = va_arg(args, enum InstructionOperands_x86_64); switch (operands) { default: ICE("Unhandled operand type %d in x86_64 code generation for %d.", operands, instruction); - case IMMEDIATE_TO_REGISTER: femit_imm_to_reg(context, instruction, args); break; case IMMEDIATE_TO_MEMORY: femit_imm_to_mem(context, instruction, args); break; case MEMORY_TO_REGISTER: femit_mem_to_reg(context, instruction, args); break; case REGISTER_TO_MEMORY: femit_reg_to_mem(context, instruction, args); break; @@ -629,33 +648,6 @@ static void femit } } break; - case I_SAL: - case I_SAR: - case I_SHR: { - enum InstructionOperands_x86_64 operand = va_arg(args, enum InstructionOperands_x86_64); - switch (operand) { - default: ICE("femit() only accepts REGISTER OR IMMEDIATE_TO_REGISTER operand type with shift instructions."); - case IMMEDIATE_TO_REGISTER: femit_imm_to_reg(context, instruction, args); break; - case REGISTER: { - RegisterDescriptor register_to_shift = va_arg(args, RegisterDescriptor); - const char *mnemonic = instruction_mnemonic(context, instruction); - const char *cl = register_name_8(REG_RCX); - - switch (context->dialect) { - case CG_ASM_DIALECT_ATT: - fprint(context->code, " %s %%%s, %%%s\n", - mnemonic, cl, register_name(register_to_shift)); - break; - case CG_ASM_DIALECT_INTEL: - fprint(context->code, " %s %s, %s\n", - mnemonic, register_name(register_to_shift), cl); - break; - default: ICE("ERROR: femit(): Unsupported dialect %d for shift instruction", context->dialect); - } - } break; - } - } break; - case I_JMP: case I_CALL: { enum InstructionOperands_x86_64 operand = va_arg(args, enum InstructionOperands_x86_64); @@ -845,8 +837,7 @@ static RegisterDescriptor codegen_comparison // Perform the comparison. femit_reg_to_reg(cg_context, I_CMP, rhs, lhs); - femit(cg_context, I_MOV, IMMEDIATE_TO_REGISTER, (int64_t)0, result, r32); - // femit_imm_to_reg(cg_context, I_MOV, 0, result, r32); + femit_imm_to_reg(cg_context, I_MOV, 0, result, r32); femit(cg_context, I_SETCC, type, result); return result; @@ -899,16 +890,14 @@ static void codegen_prologue(CodegenContext *cg_context, IRFunction *f) { case CG_CALL_CONV_LINUX: break; default: ICE("Unknown calling convention"); } - femit(cg_context, I_SUB, IMMEDIATE_TO_REGISTER, locals_offset, REG_RSP, r64); - // femit_imm_to_reg(context, I_SUB, locals_offset, REG_RSP, r64); + femit_imm_to_reg(cg_context, I_SUB, locals_offset, REG_RSP, r64); } break; case FRAME_MINIMAL: { switch (cg_context->call_convention) { /// See comment above. case CG_CALL_CONV_MSWIN: - femit(cg_context, I_SUB, IMMEDIATE_TO_REGISTER, (int64_t)(4 * 8 + 8), REG_RSP, r64); - // femit_imm_to_reg(context, I_SUB, 4 * 8 + 8, REG_RSP, r64); + femit_imm_to_reg(cg_context, I_SUB, 4 * 8 + 8, REG_RSP, r64); break; case CG_CALL_CONV_LINUX: femit_reg(cg_context, I_PUSH, REG_RBP); @@ -934,8 +923,7 @@ static void codegen_epilogue(CodegenContext *cg_context, IRFunction *f) { switch (cg_context->call_convention) { /// See comment above. case CG_CALL_CONV_MSWIN: - femit(cg_context, I_ADD, IMMEDIATE_TO_REGISTER, (int64_t)(4 * 8 + 8), REG_RSP, r64); - // femit_imm_to_reg(context, I_ADD, 4 * 8 + 8, REG_RSP, r64); + femit_imm_to_reg(cg_context, I_ADD, 4 * 8 + 8, REG_RSP, r64); break; case CG_CALL_CONV_LINUX: femit_reg(cg_context, I_POP, REG_RBP); @@ -958,21 +946,17 @@ static void emit_instruction(CodegenContext *context, IRInstruction *inst) { // TODO: integer_literal probably shouldn't be handled here. // Do this in a pass before-hand or something. if (inst->imm <= UINT32_MAX) { - femit(context, I_MOV, IMMEDIATE_TO_REGISTER, inst->imm, inst->result, r32); - // femit_imm_to_reg(context, I_MOV, inst->imm, inst->result, r32); + femit_imm_to_reg(context, I_MOV, inst->imm, inst->result, r32); } else if (inst->imm <= UINT64_MAX) { - femit(context, I_MOV, IMMEDIATE_TO_REGISTER, inst->imm, inst->result, r64); - // femit_imm_to_reg(context, I_MOV, inst->imm, inst->result, r64); + femit_imm_to_reg(context, I_MOV, inst->imm, inst->result, r64); } else { ICE("Unsupported integer literal immediate on x86_64 (out of range)"); } } else { if (type_sizeof(inst->type) <= 4) { - femit(context, I_MOV, IMMEDIATE_TO_REGISTER, inst->imm, inst->result, r32); - // femit_imm_to_reg(context, I_MOV, inst->imm, inst->result, r32); + femit_imm_to_reg(context, I_MOV, inst->imm, inst->result, r32); } else if (type_sizeof(inst->type) <= 8) { - femit(context, I_MOV, IMMEDIATE_TO_REGISTER, inst->imm, inst->result, r64); - // femit_imm_to_reg(context, I_MOV, inst->imm, inst->result, r64); + femit_imm_to_reg(context, I_MOV, inst->imm, inst->result, r64); } else { ICE("Unsupported immediate size on x86_64: %Z", type_sizeof(inst->type)); } @@ -994,7 +978,7 @@ static void emit_instruction(CodegenContext *context, IRInstruction *inst) { if (inst->call.tail_call) { // Restore the frame pointer if we have one. codegen_epilogue(context, inst->parent_block->function); - if (inst->call.is_indirect) femit_indirect_branch(context, I_JMP, inst->call.callee_instruction->result); + if (inst->call.is_indirect) femit_reg(context, I_JMP, inst->call.callee_instruction->result); else femit(context, I_JMP, NAME, inst->call.callee_function->name.data); if (inst->parent_block) inst->parent_block->done = true; break; @@ -1010,8 +994,7 @@ static void emit_instruction(CodegenContext *context, IRInstruction *inst) { } // Align stack pointer before call, if necessary. if (regs_pushed_count & 0b1) { - femit(context, I_SUB, IMMEDIATE_TO_REGISTER, (int64_t)8, REG_RSP, r64); - // femit_imm_to_reg(context, I_SUB, 8, REG_RSP, r64); + femit_imm_to_reg(context, I_SUB, 8, REG_RSP, r64); } for (Register i = REG_RAX + 1; i < sizeof(func_regs) * 8; ++i) { if (func_regs & (1 << i) && is_caller_saved(i)) { @@ -1020,7 +1003,7 @@ static void emit_instruction(CodegenContext *context, IRInstruction *inst) { } } - if (inst->call.is_indirect) femit_indirect_branch(context, I_CALL, inst->call.callee_instruction->result); + if (inst->call.is_indirect) femit_reg(context, I_CALL, inst->call.callee_instruction->result); else femit(context, I_CALL, NAME, inst->call.callee_function->name.data); // femit_name(context, I_CALL, inst->call.callee_function->name.data); @@ -1032,8 +1015,7 @@ static void emit_instruction(CodegenContext *context, IRInstruction *inst) { } // Restore stack pointer from stack alignment, if necessary. if (regs_pushed_count & 0b1) { - femit(context, I_ADD, IMMEDIATE_TO_REGISTER, (int64_t)8, REG_RSP, r64); - // femit_imm_to_reg(context, I_MOV, 8, inst->result, r64); + femit_imm_to_reg(context, I_ADD, 8, REG_RSP, r64); } femit_reg_to_reg(context, I_MOV, REG_RAX, inst->result); } break; @@ -1114,25 +1096,22 @@ static void emit_instruction(CodegenContext *context, IRInstruction *inst) { ASSERT(inst->lhs->result != REG_RCX, "Register allocation must not allocate RCX to result of lhs of shift."); femit_reg_to_reg(context, I_MOV, inst->rhs->result, REG_RCX); - femit(context, I_SHL, REGISTER, inst->lhs->result); - // TODO: femit_shift or something + femit_reg(context, I_SHL, inst->lhs->result); femit_reg_to_reg(context, I_MOV, inst->lhs->result, inst->result); break; case IR_SHR: femit_reg_to_reg(context, I_MOV, inst->rhs->result, REG_RCX); - femit(context, I_SHR, REGISTER, inst->lhs->result); - // TODO: femit_shift or something + femit_reg(context, I_SHR, inst->lhs->result); femit_reg_to_reg(context, I_MOV, inst->lhs->result, inst->result); break; case IR_SAR: femit_reg_to_reg(context, I_MOV, inst->rhs->result, REG_RCX); - femit(context, I_SAR, REGISTER, inst->lhs->result); - // TODO: femit_shift or something + femit_reg(context, I_SAR, inst->lhs->result); femit_reg_to_reg(context, I_MOV, inst->lhs->result, inst->result); break; case IR_AND: femit_reg_to_reg(context, I_AND, inst->lhs->result, inst->rhs->result); - femit_reg_to_reg(context, I_MOV, inst->rhs->result, inst->result); + femit_reg_to_reg(context, I_MOV, inst->rhs->result, inst->result); break; case IR_OR: femit_reg_to_reg(context, I_OR, inst->lhs->result, inst->rhs->result); From e830a9f71e643b9b48c4e1a7f72903fb1f18e548 Mon Sep 17 00:00:00 2001 From: Lens Date: Fri, 3 Feb 2023 12:09:26 -0800 Subject: [PATCH 61/97] [Codegen/x86_64] Moving even further away from variable args... --- src/codegen/x86_64/arch_x86_64.c | 119 ++++++++----------------------- 1 file changed, 28 insertions(+), 91 deletions(-) diff --git a/src/codegen/x86_64/arch_x86_64.c b/src/codegen/x86_64/arch_x86_64.c index 6064ad56e..dc7083c45 100644 --- a/src/codegen/x86_64/arch_x86_64.c +++ b/src/codegen/x86_64/arch_x86_64.c @@ -208,6 +208,22 @@ static enum RegSize regsize_from_bytes(u64 bytes) { } } +static const char * regname(RegisterDescriptor reg, enum RegSize size) { + switch (size) { + case r64: return register_name(reg); + case r32: return register_name_32(reg); + case r16: return register_name_16(reg); + case r8: return register_name_8(reg); + default: + UNREACHABLE(); + break; + } +} + +static const char * regname_from_bytes(RegisterDescriptor reg, u64 bytes) { + return regname(reg, regsize_from_bytes(bytes)); +} + // TODO: Pass necessary RegSize in more of these cases enum InstructionOperands_x86_64 { IMMEDIATE, ///< int64_t imm @@ -215,9 +231,6 @@ enum InstructionOperands_x86_64 { REGISTER, ///< Reg reg NAME, ///< const char* name - IMMEDIATE_TO_REGISTER, ///< int64_t imm, Reg dest, RegSize size - IMMEDIATE_TO_MEMORY, ///< int64_t imm, Reg address, int64_t offset - MEMORY_TO_REGISTER, ///< Reg address, int64_t offset, Reg dest, RegSize size NAME_TO_REGISTER, ///< Reg address, const char* name, Reg dest, RegSize size REGISTER_TO_MEMORY, ///< Reg src, RegSize size, Reg address, int64_t offset REGISTER_TO_REGISTER, ///< Reg src, Reg dest @@ -308,17 +321,7 @@ static enum IndirectJumpType negate_jump(enum IndirectJumpType j) { static void femit_imm_to_reg(CodegenContext *context, enum Instruction inst, int64_t immediate, RegisterDescriptor destination_register, enum RegSize size) { const char *mnemonic = instruction_mnemonic(context, inst); - const char *destination = NULL; - switch (size) { - case r64: destination = register_name(destination_register); break; - case r32: destination = register_name_32(destination_register); break; - case r16: destination = register_name_16(destination_register); break; - case r8: destination = register_name_8(destination_register); break; - default: - UNREACHABLE(); - break; - } - + const char *destination = regname(destination_register, size); switch (context->dialect) { case CG_ASM_DIALECT_ATT: fprint(context->code, " %s $%D, %%%s\n", @@ -332,14 +335,9 @@ static void femit_imm_to_reg(CodegenContext *context, enum Instruction inst, int } } -static void femit_imm_to_mem(CodegenContext *context, enum Instruction inst, va_list args) { - int64_t immediate = va_arg(args, int64_t); - RegisterDescriptor address_register = va_arg(args, RegisterDescriptor); - int64_t offset = va_arg(args, int64_t); - +static void femit_imm_to_mem(CodegenContext *context, enum Instruction inst, int64_t immediate, RegisterDescriptor address_register, int64_t offset) { const char *mnemonic = instruction_mnemonic(context, inst); const char *address = register_name(address_register); - switch (context->dialect) { case CG_ASM_DIALECT_ATT: fprint(context->code, " %s $%D, %D(%%%s)\n", @@ -353,25 +351,10 @@ static void femit_imm_to_mem(CodegenContext *context, enum Instruction inst, va_ } } -static void femit_mem_to_reg(CodegenContext *context, enum Instruction inst, va_list args) { - RegisterDescriptor address_register = va_arg(args, RegisterDescriptor); - int64_t offset = va_arg(args, int64_t); - RegisterDescriptor destination_register = va_arg(args, RegisterDescriptor); - enum RegSize size = va_arg(args, enum RegSize); - +static void femit_mem_to_reg(CodegenContext *context, enum Instruction inst, RegisterDescriptor address_register, int64_t offset, RegisterDescriptor destination_register, enum RegSize size) { const char *mnemonic = instruction_mnemonic(context, inst); const char *address = register_name(address_register); - const char *destination = NULL; - switch (size) { - case r64: destination = register_name(destination_register); break; - case r32: destination = register_name_32(destination_register); break; - case r16: destination = register_name_16(destination_register); break; - case r8: destination = register_name_8(destination_register); break; - default: - UNREACHABLE(); - break; - } - + const char *destination = regname(destination_register, size); switch (context->dialect) { case CG_ASM_DIALECT_ATT: fprint(context->code, " %s %D(%%%s), %%%s\n", @@ -424,16 +407,7 @@ static void femit_reg_to_mem(CodegenContext *context, enum Instruction inst, va_ int64_t offset = va_arg(args, int64_t); const char *mnemonic = instruction_mnemonic(context, inst); - const char *source = NULL; - switch (size) { - case r64: source = register_name(source_register); break; - case r32: source = register_name_32(source_register); break; - case r16: source = register_name_16(source_register); break; - case r8: source = register_name_8(source_register); break; - default: - UNREACHABLE(); - break; - } + const char *source = regname(source_register, size); const char *address = register_name(address_register); switch (context->dialect) { @@ -487,16 +461,7 @@ static void femit_reg_to_name(CodegenContext *context, enum Instruction inst, va char *name = va_arg(args, char *); const char *mnemonic = instruction_mnemonic(context, inst); - const char *source = NULL; - switch (size) { - case r64: source = register_name(source_register); break; - case r32: source = register_name_32(source_register); break; - case r16: source = register_name_16(source_register); break; - case r8: source = register_name_8(source_register); break; - default: - UNREACHABLE(); - break; - } + const char *source = regname(source_register, size); const char *address = register_name(address_register); switch (context->dialect) { @@ -623,8 +588,6 @@ static void femit enum InstructionOperands_x86_64 operands = va_arg(args, enum InstructionOperands_x86_64); switch (operands) { default: ICE("Unhandled operand type %d in x86_64 code generation for %d.", operands, instruction); - case IMMEDIATE_TO_MEMORY: femit_imm_to_mem(context, instruction, args); break; - case MEMORY_TO_REGISTER: femit_mem_to_reg(context, instruction, args); break; case REGISTER_TO_MEMORY: femit_reg_to_mem(context, instruction, args); break; case REGISTER_TO_NAME: femit_reg_to_name(context, instruction, args); break; case NAME_TO_REGISTER: femit_name_to_reg(context, instruction, args); break; @@ -634,20 +597,11 @@ static void femit case I_LEA: { enum InstructionOperands_x86_64 operands = va_arg(args, enum InstructionOperands_x86_64); switch (operands) { - default: ICE("femit() only accepts MEMORY_TO_REGISTER or NAME_TO_REGISTER operand type with LEA instruction."); - case MEMORY_TO_REGISTER: femit_mem_to_reg(context, instruction, args); break; + default: ICE("femit() only accepts NAME_TO_REGISTER operand type with LEA instruction."); case NAME_TO_REGISTER: femit_name_to_reg(context, instruction, args); break; } } break; - case I_IMUL: { - enum InstructionOperands_x86_64 operands = va_arg(args, enum InstructionOperands_x86_64); - switch (operands) { - default: ICE("femit() only accepts MEMORY_TO_REGISTER or REGISTER_TO_REGISTER operand type with IMUL instruction."); - case MEMORY_TO_REGISTER: femit_mem_to_reg(context, instruction, args); break; - } - } break; - case I_JMP: case I_CALL: { enum InstructionOperands_x86_64 operand = va_arg(args, enum InstructionOperands_x86_64); @@ -683,14 +637,6 @@ static void femit } } break; - case I_XCHG: { - enum InstructionOperands_x86_64 operands = va_arg(args, enum InstructionOperands_x86_64); - switch (operands) { - default: ICE("femit(): invalid operands for XCHG instruction: %d", operands); - case MEMORY_TO_REGISTER: femit_mem_to_reg(context, instruction, args); break; - } - } break; - case I_SETCC: { enum ComparisonType comparison_type = va_arg(args, enum ComparisonType); RegisterDescriptor value_register = va_arg(args, RegisterDescriptor); @@ -1150,13 +1096,9 @@ static void emit_instruction(CodegenContext *context, IRInstruction *inst) { // TODO: Use `movzx`/`movzbl` if (size == r8 || size == r16) femit_reg_to_reg(context, I_XOR, inst->result, inst->result); if (inst->operand->type->kind == TYPE_ARRAY || inst->operand->type->pointer.to->kind == TYPE_ARRAY) - femit(context, I_LEA, MEMORY_TO_REGISTER, - REG_RBP, (int64_t)-inst->operand->alloca.offset, inst->result, size); - // femit_mem_to_reg(context, I_LEA, REG_RBP, -inst->operand->alloca.offset, inst->result, size); + femit_mem_to_reg(context, I_LEA, REG_RBP, -inst->operand->alloca.offset, inst->result, size); else - femit(context, I_MOV, MEMORY_TO_REGISTER, - REG_RBP, (int64_t)-inst->operand->alloca.offset, inst->result, size); - // femit_mem_to_reg(context, I_MOV, REG_RBP, -inst->operand->alloca.offset, inst->result, size); + femit_mem_to_reg(context, I_MOV, REG_RBP, -inst->operand->alloca.offset, inst->result, size); } /// Load from a pointer @@ -1169,13 +1111,9 @@ static void emit_instruction(CodegenContext *context, IRInstruction *inst) { else size = regsize_from_bytes(type_sizeof(inst->operand->type)); if (size == r8 || size == r16) femit_reg_to_reg(context, I_XOR, inst->result, inst->result); if (inst->operand->type->kind == TYPE_ARRAY) - femit(context, I_LEA, MEMORY_TO_REGISTER, inst->operand->result, (int64_t)0, - inst->result, size); - // femit_mem_to_reg(context, I_LEA, inst->operand->result, 0, inst->result, size); + femit_mem_to_reg(context, I_LEA, inst->operand->result, 0, inst->result, size); else - femit(context, I_MOV, MEMORY_TO_REGISTER, inst->operand->result, (int64_t)0, - inst->result, size); - // femit_mem_to_reg(context, I_MOV, inst->operand->result, 0, inst->result, size); + femit_mem_to_reg(context, I_MOV, inst->operand->result, 0, inst->result, size); } break; @@ -1217,8 +1155,7 @@ static void emit_instruction(CodegenContext *context, IRInstruction *inst) { // femit_name_to_reg(context, I_LEA, REG_RIP, inst->function_ref->name.data, inst->result, r64); break; case IR_ALLOCA: - femit(context, I_LEA, MEMORY_TO_REGISTER, REG_RBP, (int64_t)-inst->alloca.offset, inst->result, r64); - // femit_mem_to_reg(context, I_LEA, REG_RBP, (int64_t)-inst->alloca.offset, inst->result, r64); + femit_mem_to_reg(context, I_LEA, REG_RBP, -inst->alloca.offset, inst->result, r64); break; default: From 339e07cc4587545b23e8becc4feff5beb2b8fedf Mon Sep 17 00:00:00 2001 From: Lens Date: Fri, 3 Feb 2023 12:12:11 -0800 Subject: [PATCH 62/97] [Codegen/x86_64] `femit_name_to_reg()` typed parameters --- src/codegen/x86_64/arch_x86_64.c | 43 +++++--------------------------- 1 file changed, 6 insertions(+), 37 deletions(-) diff --git a/src/codegen/x86_64/arch_x86_64.c b/src/codegen/x86_64/arch_x86_64.c index dc7083c45..bae4b0cfa 100644 --- a/src/codegen/x86_64/arch_x86_64.c +++ b/src/codegen/x86_64/arch_x86_64.c @@ -231,7 +231,6 @@ enum InstructionOperands_x86_64 { REGISTER, ///< Reg reg NAME, ///< const char* name - NAME_TO_REGISTER, ///< Reg address, const char* name, Reg dest, RegSize size REGISTER_TO_MEMORY, ///< Reg src, RegSize size, Reg address, int64_t offset REGISTER_TO_REGISTER, ///< Reg src, Reg dest REGISTER_TO_NAME, ///< Reg src, RegSize size, Reg address, const char* name @@ -368,25 +367,10 @@ static void femit_mem_to_reg(CodegenContext *context, enum Instruction inst, Reg } } -static void femit_name_to_reg(CodegenContext *context, enum Instruction inst, va_list args) { - RegisterDescriptor address_register = va_arg(args, RegisterDescriptor); - char *name = va_arg(args, char *); - RegisterDescriptor destination_register = va_arg(args, RegisterDescriptor); - enum RegSize size = va_arg(args, enum RegSize); - +static void femit_name_to_reg(CodegenContext *context, enum Instruction inst, RegisterDescriptor address_register, const char *name, RegisterDescriptor destination_register, enum RegSize size) { const char *mnemonic = instruction_mnemonic(context, inst); const char *address = register_name(address_register); - const char *destination = NULL; - switch (size) { - case r64: destination = register_name(destination_register); break; - case r32: destination = register_name_32(destination_register); break; - case r16: destination = register_name_16(destination_register); break; - case r8: destination = register_name_8(destination_register); break; - default: - UNREACHABLE(); - break; - } - + const char *destination = regname(destination_register, size); switch (context->dialect) { case CG_ASM_DIALECT_ATT: fprint(context->code, " %s %s(%%%s), %%%s\n", @@ -590,15 +574,6 @@ static void femit default: ICE("Unhandled operand type %d in x86_64 code generation for %d.", operands, instruction); case REGISTER_TO_MEMORY: femit_reg_to_mem(context, instruction, args); break; case REGISTER_TO_NAME: femit_reg_to_name(context, instruction, args); break; - case NAME_TO_REGISTER: femit_name_to_reg(context, instruction, args); break; - } - } break; - - case I_LEA: { - enum InstructionOperands_x86_64 operands = va_arg(args, enum InstructionOperands_x86_64); - switch (operands) { - default: ICE("femit() only accepts NAME_TO_REGISTER operand type with LEA instruction."); - case NAME_TO_REGISTER: femit_name_to_reg(context, instruction, args); break; } } break; @@ -1077,13 +1052,9 @@ static void emit_instruction(CodegenContext *context, IRInstruction *inst) { if (size == r8 || size == r16) femit_reg_to_reg(context, I_XOR, inst->result, inst->result); // femit_reg_to_reg(context, I_XOR, inst->result, inst->result); if (inst->operand->type->kind == TYPE_ARRAY || inst->operand->type->pointer.to->kind == TYPE_ARRAY) - femit(context, I_LEA, NAME_TO_REGISTER, REG_RIP, inst->operand->static_ref->name.data, - inst->result, size); - // femit_name_to_reg(context, I_LEA, REG_RIP, inst->operand->static_ref->name.data, inst->result, size); + femit_name_to_reg(context, I_LEA, REG_RIP, inst->operand->static_ref->name.data, inst->result, size); else - femit(context, I_MOV, NAME_TO_REGISTER, REG_RIP, inst->operand->static_ref->name.data, - inst->result, size); - // femit_name_to_reg(context, I_MOV, REG_RIP, inst->operand->static_ref->name.data, inst->result, size); + femit_name_to_reg(context, I_MOV, REG_RIP, inst->operand->static_ref->name.data, inst->result, size); } /// Load from a local. @@ -1147,12 +1118,10 @@ static void emit_instruction(CodegenContext *context, IRInstruction *inst) { break; case IR_STATIC_REF: - if (inst->result) femit(context, I_LEA, NAME_TO_REGISTER, REG_RIP, inst->static_ref->name.data, inst->result, r64); - // femit_name_to_reg(context, I_LEA, REG_RIP, inst->static_ref->name.data, inst->result, r64); + if (inst->result) femit_name_to_reg(context, I_LEA, REG_RIP, inst->static_ref->name.data, inst->result, r64); break; case IR_FUNC_REF: - if (inst->result) femit(context, I_LEA, NAME_TO_REGISTER, REG_RIP, inst->function_ref->name.data, inst->result, r64); - // femit_name_to_reg(context, I_LEA, REG_RIP, inst->function_ref->name.data, inst->result, r64); + if (inst->result) femit_name_to_reg(context, I_LEA, REG_RIP, inst->function_ref->name.data, inst->result, r64); break; case IR_ALLOCA: femit_mem_to_reg(context, I_LEA, REG_RBP, -inst->alloca.offset, inst->result, r64); From 84f1e104bcacd58cde9221ed9303ec6069275004 Mon Sep 17 00:00:00 2001 From: Lens Date: Fri, 3 Feb 2023 12:15:58 -0800 Subject: [PATCH 63/97] [Codegen/x86_64] `femit_reg_to_mem()` typed parameters --- src/codegen/x86_64/arch_x86_64.c | 20 +++----------------- 1 file changed, 3 insertions(+), 17 deletions(-) diff --git a/src/codegen/x86_64/arch_x86_64.c b/src/codegen/x86_64/arch_x86_64.c index bae4b0cfa..ee74cfa37 100644 --- a/src/codegen/x86_64/arch_x86_64.c +++ b/src/codegen/x86_64/arch_x86_64.c @@ -231,7 +231,6 @@ enum InstructionOperands_x86_64 { REGISTER, ///< Reg reg NAME, ///< const char* name - REGISTER_TO_MEMORY, ///< Reg src, RegSize size, Reg address, int64_t offset REGISTER_TO_REGISTER, ///< Reg src, Reg dest REGISTER_TO_NAME, ///< Reg src, RegSize size, Reg address, const char* name }; @@ -384,16 +383,10 @@ static void femit_name_to_reg(CodegenContext *context, enum Instruction inst, Re } } -static void femit_reg_to_mem(CodegenContext *context, enum Instruction inst, va_list args) { - RegisterDescriptor source_register = va_arg(args, RegisterDescriptor); - enum RegSize size = va_arg(args, enum RegSize); - RegisterDescriptor address_register = va_arg(args, RegisterDescriptor); - int64_t offset = va_arg(args, int64_t); - +static void femit_reg_to_mem(CodegenContext *context, enum Instruction inst, RegisterDescriptor source_register, enum RegSize size, RegisterDescriptor address_register, int64_t offset) { const char *mnemonic = instruction_mnemonic(context, inst); const char *source = regname(source_register, size); const char *address = register_name(address_register); - switch (context->dialect) { case CG_ASM_DIALECT_ATT: if (offset) { @@ -572,7 +565,6 @@ static void femit enum InstructionOperands_x86_64 operands = va_arg(args, enum InstructionOperands_x86_64); switch (operands) { default: ICE("Unhandled operand type %d in x86_64 code generation for %d.", operands, instruction); - case REGISTER_TO_MEMORY: femit_reg_to_mem(context, instruction, args); break; case REGISTER_TO_NAME: femit_reg_to_name(context, instruction, args); break; } } break; @@ -1100,20 +1092,14 @@ static void emit_instruction(CodegenContext *context, IRInstruction *inst) { /// Store to a local. else if (inst->store.addr->kind == IR_ALLOCA) { enum RegSize size = regsize_from_bytes(type_sizeof(inst->store.value->type)); - femit(context, I_MOV, REGISTER_TO_MEMORY, - inst->store.value->result, size, - REG_RBP, (int64_t)-inst->store.addr->alloca.offset); - // femit_reg_to_mem(context, I_MOV, inst->store.value->result, size, REG_RBP, -inst->store.addr->alloca.offset); + femit_reg_to_mem(context, I_MOV, inst->store.value->result, size, REG_RBP, -inst->store.addr->alloca.offset); break; } /// Store to a pointer. else { enum RegSize size = regsize_from_bytes(type_sizeof(inst->store.value->type)); - femit(context, I_MOV, REGISTER_TO_MEMORY, - inst->store.value->result, size, - inst->store.addr->result, (int64_t)0); - // femit_reg_to_mem(context, I_MOV, inst->store.value->result, size, inst->store.addr->result, 0); + femit_reg_to_mem(context, I_MOV, inst->store.value->result, size, inst->store.addr->result, 0); } break; From 80749cf564fb29f211dd52775ebfcb43e9de9548 Mon Sep 17 00:00:00 2001 From: Lens Date: Fri, 3 Feb 2023 12:22:55 -0800 Subject: [PATCH 64/97] [Codegen/x86_64] Nearly completely moved away from variable args... --- src/codegen/x86_64/arch_x86_64.c | 94 +++++++------------------------- 1 file changed, 19 insertions(+), 75 deletions(-) diff --git a/src/codegen/x86_64/arch_x86_64.c b/src/codegen/x86_64/arch_x86_64.c index ee74cfa37..37cc09fd1 100644 --- a/src/codegen/x86_64/arch_x86_64.c +++ b/src/codegen/x86_64/arch_x86_64.c @@ -224,17 +224,6 @@ static const char * regname_from_bytes(RegisterDescriptor reg, u64 bytes) { return regname(reg, regsize_from_bytes(bytes)); } -// TODO: Pass necessary RegSize in more of these cases -enum InstructionOperands_x86_64 { - IMMEDIATE, ///< int64_t imm - MEMORY, ///< Reg reg, int64_t offset - REGISTER, ///< Reg reg - NAME, ///< const char* name - - REGISTER_TO_REGISTER, ///< Reg src, Reg dest - REGISTER_TO_NAME, ///< Reg src, RegSize size, Reg address, const char* name -}; - const char *setcc_suffixes_x86_64[COMPARE_COUNT] = { "e", "ne", @@ -431,16 +420,10 @@ static void femit_reg_to_reg(CodegenContext *context, enum Instruction inst, Reg } } -static void femit_reg_to_name(CodegenContext *context, enum Instruction inst, va_list args) { - RegisterDescriptor source_register = va_arg(args, RegisterDescriptor); - enum RegSize size = va_arg(args, enum RegSize); - RegisterDescriptor address_register = va_arg(args, RegisterDescriptor); - char *name = va_arg(args, char *); - +static void femit_reg_to_name(CodegenContext *context, enum Instruction inst, RegisterDescriptor source_register, enum RegSize size, RegisterDescriptor address_register, const char *name) { const char *mnemonic = instruction_mnemonic(context, inst); const char *source = regname(source_register, size); const char *address = register_name(address_register); - switch (context->dialect) { case CG_ASM_DIALECT_ATT: fprint(context->code, " %s %%%s, %s(%%%s)\n", @@ -542,6 +525,19 @@ static void femit_imm(CodegenContext *context, enum Instruction inst, int64_t im } } +static void femit_name(CodegenContext *context, enum Instruction inst, const char *name) { + ASSERT(name, "NAME must not be NULL."); + const char *mnemonic = instruction_mnemonic(context, inst); + switch (context->dialect) { + case CG_ASM_DIALECT_ATT: + case CG_ASM_DIALECT_INTEL: + fprint(context->code, " %s %s\n", + mnemonic, name); + break; + default: ICE("ERROR: femit(): Unsupported dialect %d for CALL/JMP instruction", context->dialect); + } +} + static void femit (CodegenContext *context, enum Instruction instruction, @@ -554,56 +550,6 @@ static void femit STATIC_ASSERT(I_COUNT == 24, "femit() must exhaustively handle all x86_64 instructions."); switch (instruction) { - case I_ADD: - case I_SUB: - case I_AND: - case I_OR: - case I_TEST: - case I_XOR: - case I_CMP: - case I_MOV: { - enum InstructionOperands_x86_64 operands = va_arg(args, enum InstructionOperands_x86_64); - switch (operands) { - default: ICE("Unhandled operand type %d in x86_64 code generation for %d.", operands, instruction); - case REGISTER_TO_NAME: femit_reg_to_name(context, instruction, args); break; - } - } break; - - case I_JMP: - case I_CALL: { - enum InstructionOperands_x86_64 operand = va_arg(args, enum InstructionOperands_x86_64); - switch (operand) { - default: ICE("femit() only accepts REGISTER or NAME operand type with CALL/JMP instruction."); - case NAME: { - char *label = va_arg(args, char *); - const char *mnemonic = instruction_mnemonic(context, instruction); - - ASSERT(label, "JMP/CALL label must not be NULL."); - - switch (context->dialect) { - case CG_ASM_DIALECT_ATT: - case CG_ASM_DIALECT_INTEL: - fprint(context->code, " %s %s\n", - mnemonic, label); - break; - default: ICE("ERROR: femit(): Unsupported dialect %d for CALL/JMP instruction", context->dialect); - } - } break; - } - } break; - - case I_PUSH: { - enum InstructionOperands_x86_64 operand = va_arg(args, enum InstructionOperands_x86_64); - switch (operand) { - default: ICE("femit() only accepts REGISTER, MEMORY, or IMMEDIATE operand type with PUSH instruction."); - case MEMORY: { - int64_t offset = va_arg(args, int64_t); - RegisterDescriptor r = va_arg(args, RegisterDescriptor); - femit_mem(context, instruction, offset, r); - } break; - } - } break; - case I_SETCC: { enum ComparisonType comparison_type = va_arg(args, enum ComparisonType); RegisterDescriptor value_register = va_arg(args, RegisterDescriptor); @@ -892,7 +838,7 @@ static void emit_instruction(CodegenContext *context, IRInstruction *inst) { // Restore the frame pointer if we have one. codegen_epilogue(context, inst->parent_block->function); if (inst->call.is_indirect) femit_reg(context, I_JMP, inst->call.callee_instruction->result); - else femit(context, I_JMP, NAME, inst->call.callee_function->name.data); + else femit_name(context, I_JMP, inst->call.callee_function->name.data); if (inst->parent_block) inst->parent_block->done = true; break; } @@ -917,7 +863,7 @@ static void emit_instruction(CodegenContext *context, IRInstruction *inst) { } if (inst->call.is_indirect) femit_reg(context, I_CALL, inst->call.callee_instruction->result); - else femit(context, I_CALL, NAME, inst->call.callee_function->name.data); + else femit_name(context, I_CALL, inst->call.callee_function->name.data); // femit_name(context, I_CALL, inst->call.callee_function->name.data); // Restore caller saved registers used in called function. @@ -949,7 +895,7 @@ static void emit_instruction(CodegenContext *context, IRInstruction *inst) { /// Only emit a jump if the target isn’t the next block. if (!optimise || (inst->parent_block && inst->destination_block != inst->parent_block->next && !inst->parent_block->done)) { - femit(context, I_JMP, NAME, inst->destination_block->name.data); + femit_name(context, I_JMP, inst->destination_block->name.data); } if (optimise && inst->parent_block) inst->parent_block->done = true; break; @@ -966,7 +912,7 @@ static void emit_instruction(CodegenContext *context, IRInstruction *inst) { femit(context, I_JCC, JUMP_TYPE_NZ, branch->then->name.data); } else { femit(context, I_JCC, JUMP_TYPE_Z, branch->else_->name.data); - femit(context, I_JMP, NAME, branch->then->name.data); + femit_name(context, I_JMP, branch->then->name.data); } if (optimise && inst->parent_block) inst->parent_block->done = true; @@ -1084,9 +1030,7 @@ static void emit_instruction(CodegenContext *context, IRInstruction *inst) { /// Store to a static variable. if (inst->store.addr->kind == IR_STATIC_REF) { enum RegSize size = regsize_from_bytes(type_sizeof(inst->store.addr->static_ref->type)); - femit(context, I_MOV, REGISTER_TO_NAME, inst->store.value->result, size, - REG_RIP, inst->store.addr->static_ref->name.data); - // femit_reg_to_name(context, I_MOV, inst->store.value->result, size, REG_RIP, inst->store.addr->static_ref->name.data); + femit_reg_to_name(context, I_MOV, inst->store.value->result, size, REG_RIP, inst->store.addr->static_ref->name.data); } /// Store to a local. From 81aaf8e34d2cae0902195ee1d6c98d7af64c1f9a Mon Sep 17 00:00:00 2001 From: Lens Date: Fri, 3 Feb 2023 12:28:17 -0800 Subject: [PATCH 65/97] [Codegen/x86_64] More comprehensive ICE error message in `femit()` --- src/codegen/x86_64/arch_x86_64.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/codegen/x86_64/arch_x86_64.c b/src/codegen/x86_64/arch_x86_64.c index 37cc09fd1..22f9d285f 100644 --- a/src/codegen/x86_64/arch_x86_64.c +++ b/src/codegen/x86_64/arch_x86_64.c @@ -596,7 +596,9 @@ static void femit fprint(context->code, " %s\n", mnemonic); } break; - default: ICE("Unhandled instruction in x86_64 code generation: %d.", instruction); + default: ICE("Unhandled instruction in femit(): %d (%s)\n" + " Consider using femit_x() or femit_x_to_x()", + instruction, instruction_mnemonic(context, instruction)); } va_end(args); From a400f576452055128ddea97ab01c8fa3c959b294 Mon Sep 17 00:00:00 2001 From: Lens Date: Fri, 3 Feb 2023 14:15:18 -0800 Subject: [PATCH 66/97] [Minor/x86_64] Comments, error message --- src/codegen/x86_64/arch_x86_64.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/codegen/x86_64/arch_x86_64.c b/src/codegen/x86_64/arch_x86_64.c index 22f9d285f..8abe6e58a 100644 --- a/src/codegen/x86_64/arch_x86_64.c +++ b/src/codegen/x86_64/arch_x86_64.c @@ -549,6 +549,7 @@ static void femit ASSERT(context); STATIC_ASSERT(I_COUNT == 24, "femit() must exhaustively handle all x86_64 instructions."); + // TODO: Extract setcc and jcc to their own functions, get rid of varargs switch (instruction) { case I_SETCC: { enum ComparisonType comparison_type = va_arg(args, enum ComparisonType); @@ -576,7 +577,7 @@ static void femit enum IndirectJumpType type = va_arg(args, enum IndirectJumpType); ASSERT(type < JUMP_TYPE_COUNT, "femit_direct_branch(): Invalid jump type %d", type); char *label = va_arg(args, char *); - //ASSERT(label, "JCC label must not be NULL."); + ASSERT(label, "JCC label must not be NULL."); const char *mnemonic = instruction_mnemonic(context, I_JCC); @@ -596,9 +597,10 @@ static void femit fprint(context->code, " %s\n", mnemonic); } break; - default: ICE("Unhandled instruction in femit(): %d (%s)\n" - " Consider using femit_x() or femit_x_to_x()", - instruction, instruction_mnemonic(context, instruction)); + default: + ICE("Unhandled instruction in femit(): %d (%s)\n" + " Consider using femit_x() or femit_x_to_x()", + instruction, instruction_mnemonic(context, instruction)); } va_end(args); @@ -1267,7 +1269,7 @@ static size_t interfering_regs(IRInstruction *instruction) { mask |= (1 << REG_RAX); mask |= (1 << REG_RDX); break; - case IR_CALL: + case IR_CALL: // FIXME: This seems specific to calling convention... mask |= (1 << REG_RAX); default: break; From cd2381a1975038795f8c715ae8bb3f8dc1ffbc94 Mon Sep 17 00:00:00 2001 From: Lens Date: Fri, 3 Feb 2023 14:26:55 -0800 Subject: [PATCH 67/97] [Minor/Codegen] Outline places where string literals need handled Always more todo... --- src/codegen.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/codegen.c b/src/codegen.c index e940b23a7..c28c505f4 100644 --- a/src/codegen.c +++ b/src/codegen.c @@ -364,10 +364,16 @@ static void codegen_expr(CodegenContext *ctx, Node *expr) { codegen_expr(ctx, rhs); IRInstruction *subs_lhs = NULL; + if (lhs->type->kind != TYPE_ARRAY && lhs->type->kind == TYPE_POINTER) { + ERR("Subscript operator may only operate on arrays and pointers, which type %T is not", lhs->type); + } if (lhs->kind == NODE_VARIABLE_REFERENCE) { // TODO: Handle local variable references, somehow. How can we tell if it's local/static? subs_lhs = ir_static_reference(ctx, as_span(lhs->var->name)); - } else ERR("LHS of subscript operator has invalid kind %d", lhs->kind); + } else if (lhs->kind == NODE_LITERAL && lhs->literal.type == TK_STRING) { + TODO("IR generation for subscript of string literal"); + } + else ERR("LHS of subscript operator has invalid kind %d", lhs->kind); // TODO: Just use lhs operand of subscript operator when right hand // side is a compile-time-known zero value. @@ -420,6 +426,11 @@ static void codegen_expr(CodegenContext *ctx, Node *expr) { switch (expr->unary.value->kind) { case NODE_DECLARATION: expr->ir = expr->unary.value->ir; return; case NODE_VARIABLE_REFERENCE: expr->ir = expr->unary.value->var->val.node->ir; return; + case NODE_LITERAL: { + if (expr->literal.type == TK_STRING) { + TODO("IR code generation of addressof string literal"); + } + } return; default: ICE("Cannot take address of expression of type %d", expr->unary.value->kind); } } From f922e21bb716f0c00591dc9faf0a05e239184e6a Mon Sep 17 00:00:00 2001 From: Lens Date: Fri, 3 Feb 2023 14:31:37 -0800 Subject: [PATCH 68/97] [Minor/Codegen] Yet still always more to do due soon --- src/codegen.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/codegen.c b/src/codegen.c index c28c505f4..b6bfe68c4 100644 --- a/src/codegen.c +++ b/src/codegen.c @@ -431,7 +431,7 @@ static void codegen_expr(CodegenContext *ctx, Node *expr) { TODO("IR code generation of addressof string literal"); } } return; - default: ICE("Cannot take address of expression of type %d", expr->unary.value->kind); + default: ICE("Cannot take address of expression of kind %d", expr->unary.value->kind); } } @@ -441,7 +441,7 @@ static void codegen_expr(CodegenContext *ctx, Node *expr) { /// Prefix expressions. if (!expr->unary.postfix) { switch (expr->unary.op) { - default: ICE("Cannot emit unary prefix expression of type %d", expr->unary.op); + default: ICE("Cannot emit unary prefix expression of token type %d", expr->unary.op); /// Load a value from an lvalue. /// Emitting an lvalue loads it, so we don’t need to do anything here. @@ -472,6 +472,9 @@ static void codegen_expr(CodegenContext *ctx, Node *expr) { case NODE_LITERAL: if (expr->literal.type == TK_NUMBER) expr->ir = ir_immediate(ctx, expr->type, expr->literal.integer); else if (expr->literal.type == TK_STRING) { + // TODO: We should probably set this name earlier, or have some + // way of getting this name from jujst a string index. Static + // variable is big bad. Valve, pls fix. Literally unplayable. char buf[48] = {0}; static size_t string_literal_count = 0; int len = snprintf(buf, 48, "__str_lit%zu", string_literal_count++); From 13b82101f2182a0192957bd032297be361916f3a Mon Sep 17 00:00:00 2001 From: Lens Date: Sat, 4 Feb 2023 11:13:32 -0800 Subject: [PATCH 69/97] [Bugfix] Add missing zero initialiser (needed by MSVC) --- src/utils.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utils.c b/src/utils.c index 3057488d5..fedc54b72 100644 --- a/src/utils.c +++ b/src/utils.c @@ -299,7 +299,7 @@ void format_to(string_buffer *buf, const char *fmt, ...) { /// Format a string. string vformat(const char *fmt, va_list args) { - string_buffer buf = {}; + string_buffer buf = {0}; vformat_to(&buf, fmt, args); return (string) { .data = buf.data, .size = buf.size }; } From 33c195c563a700315e6e5932ddfea2b98f14bcc6 Mon Sep 17 00:00:00 2001 From: Lens Date: Sat, 4 Feb 2023 11:50:35 -0800 Subject: [PATCH 70/97] [Minor] Remove accidentally tracked file --- .clang-format | 59 --------------------------------------------------- 1 file changed, 59 deletions(-) delete mode 100644 .clang-format diff --git a/.clang-format b/.clang-format deleted file mode 100644 index 960af11c0..000000000 --- a/.clang-format +++ /dev/null @@ -1,59 +0,0 @@ ---- -BasedOnStyle: LLVM -AccessModifierOffset: -4 -AlignAfterOpenBracket: BlockIndent -AlignConsecutiveMacros: Consecutive -AlignConsecutiveAssignments: None -AlignConsecutiveDeclarations: None -AlignEscapedNewlines: Left -AlignOperands: true -AlignTrailingComments: true -AllowAllArgumentsOnNextLine: true -AllowShortBlocksOnASingleLine: Always -AllowShortCaseLabelsOnASingleLine: true -AllowShortFunctionsOnASingleLine: All -AllowShortIfStatementsOnASingleLine: AllIfsAndElse -AllowShortLambdasOnASingleLine: All -AllowShortLoopsOnASingleLine: true -AlwaysBreakAfterDefinitionReturnType: None -AlwaysBreakAfterReturnType: None -AlwaysBreakBeforeMultilineStrings: false -AlwaysBreakTemplateDeclarations: 'No' -BinPackArguments: true -BinPackParameters: false -BreakBeforeBinaryOperators: NonAssignment -BreakBeforeTernaryOperators: true -BreakBeforeBraces: Attach -BreakStringLiterals: true -ColumnLimit: 0 -CompactNamespaces: false -DerivePointerAlignment: false -DisableFormat: false -FixNamespaceComments: true -ForEachMacros: ["foreach", "foreach_ptr", "foreach_index", "foreach_if", "foreach_ptr_if", "DLIST_FOREACH"] -IncludeBlocks: Regroup -IndentCaseLabels: true -IndentPPDirectives: AfterHash -IndentRequiresClause: false -IndentWidth: 2 -IndentWrappedFunctionNames: false -JavaScriptQuotes: Single -KeepEmptyLinesAtTheStartOfBlocks: false -Language: Cpp -MaxEmptyLinesToKeep: 1 -MacroBlockBegin: '^subroutine' -MacroBlockEnd: '^endsubroutine' -PointerAlignment: Right -ReflowComments: true -SortIncludes: CaseInsensitive -SortUsingDeclarations: true -SpaceAfterCStyleCast: true -SpaceAfterLogicalNot: false -SpaceBeforeAssignmentOperators: true -SpaceBeforeParens: ControlStatements -SpacesBeforeTrailingComments: 1 -SpacesInCStyleCastParentheses: false -TabWidth: 2 -TypenameMacros: ["Vector", "DLIST"] -UseTab: Never -... From 49c9970609854d8df692afdca587833e0e204ed0 Mon Sep 17 00:00:00 2001 From: Lens Date: Mon, 6 Feb 2023 15:25:31 -0800 Subject: [PATCH 71/97] [Sema] Fix copy-and-paste error --- src/typechecker.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/typechecker.c b/src/typechecker.c index 95dbbd7e3..d246a2bce 100644 --- a/src/typechecker.c +++ b/src/typechecker.c @@ -906,10 +906,10 @@ NODISCARD bool typecheck_expression(AST *ast, Node *expr) { if (types_equal(t_to, t_from)) break; // FROM any incomplete type is DISALLOWED - if (type_is_incomplete(t_from) || type_is_incomplete(t_to)) + if (type_is_incomplete(t_from)) ERR(expr->cast.value->source_location, "Can not cast from an incomplete type %T", t_from); // TO any complete type is DISALLOWED - if (type_is_incomplete(t_from) || type_is_incomplete(t_to)) + if (type_is_incomplete(t_to)) ERR(expr->cast.value->source_location, "Can not cast to an incomplete type %T", t_to); // FROM any pointer type TO any pointer type is ALLOWED From e594ee5745e9088c2e2f9707e97dd5a1b61ca8b1 Mon Sep 17 00:00:00 2001 From: Lens Date: Mon, 6 Feb 2023 20:35:04 -0800 Subject: [PATCH 72/97] [UX] Allow `colors` as well as `colours` in CLI --- src/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main.c b/src/main.c index 79b5afae4..19147bbc4 100644 --- a/src/main.c +++ b/src/main.c @@ -153,7 +153,7 @@ int handle_command_line_arguments(int argc, char **argv) { print_acceptable_formats(); return 1; } - } else if (strcmp(argument, "--colours") == 0) { + } else if (strcmp(argument, "--colours") == 0 || strcmp(argument, "--colors") == 0) { i++; if (i >= argc) { fprint(stderr, "Error: Expected option value after `--colours`\n"); From b95d8790bb506f079af4f80f7a986ce3c13a07b9 Mon Sep 17 00:00:00 2001 From: Lens Date: Mon, 6 Feb 2023 20:35:39 -0800 Subject: [PATCH 73/97] [TODO] String literals are parsed, now --- TODO.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/TODO.md b/TODO.md index addb84a20..03e9a53e9 100644 --- a/TODO.md +++ b/TODO.md @@ -65,7 +65,7 @@ - [ ] Length operator: `#` - [ ] Subscripting - [ ] Strings - - [ ] Parsing string literals. + - [x] Parsing string literals. - [ ] Codegen. - [ ] Backend. - [ ] Structs From b7be43862b1daa8fe400021b900eb85b1fd4c7e5 Mon Sep 17 00:00:00 2001 From: Lens Date: Mon, 6 Feb 2023 20:40:45 -0800 Subject: [PATCH 74/97] Disable IR intake as it is severely lacking in development right now --- src/main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/main.c b/src/main.c index 19147bbc4..6ec1e2517 100644 --- a/src/main.c +++ b/src/main.c @@ -269,6 +269,8 @@ int main(int argc, char **argv) { if (len >= 3 && memcmp(infile + len - 3, ".ir", 3) == 0) { ASSERT(s.data); + TODO("Development of IR parser and codegen is severely behind right now."); + if (!codegen( LANG_IR, output_format, From 49f6762494e15de2dc44fd97987537daecd6422c Mon Sep 17 00:00:00 2001 From: Lens Date: Mon, 6 Feb 2023 21:06:39 -0800 Subject: [PATCH 75/97] [Bugfix] Fix types in wrong order in error message Fixes #44 --- src/typechecker.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/typechecker.c b/src/typechecker.c index d246a2bce..448491177 100644 --- a/src/typechecker.c +++ b/src/typechecker.c @@ -790,9 +790,9 @@ NODISCARD bool typecheck_expression(AST *ast, Node *expr) { Type *ret = expr->type->function.return_type; Type *body = expr->function.body->type; if (!convertible(ret, body)) - ERR(expr->source_location, - "Type '%T' of function body is not convertible to return type '%T'.", - ret, body); + ERR(expr->function.body->block.children.data[expr->function.body->block.children.size - 1]->source_location, + "Type '%T' of function body is not convertible to return type '%T'.", + body, ret); } break; /// Typecheck declarations. From 6c49cacc82be9ee0220b66fa0a4f6e39f37072c6 Mon Sep 17 00:00:00 2001 From: Lens Date: Mon, 6 Feb 2023 21:21:45 -0800 Subject: [PATCH 76/97] [Sema] Move some `type_is_*` functions up to `ast.h` from `typechecker.c` There are more to come, I'm sure. --- src/ast.c | 10 ++++++++++ src/ast.h | 7 +++++++ src/typechecker.c | 4 ++-- 3 files changed, 19 insertions(+), 2 deletions(-) diff --git a/src/ast.c b/src/ast.c index 22b50e410..bb6f9bc88 100644 --- a/src/ast.c +++ b/src/ast.c @@ -511,6 +511,16 @@ bool type_is_void(Type *type) { return type_canonical(type) == t_void; } +bool type_is_pointer(Type *type) { + Type * t = type_canonical(type); + return t && t->kind == TYPE_POINTER; +} + +bool type_is_array(Type *type) { + Type * t = type_canonical(type); + return t && t->kind == TYPE_ARRAY; +} + /// =========================================================================== /// Miscellaneous AST functions. /// =========================================================================== diff --git a/src/ast.h b/src/ast.h index 339a3ed6a..da8fe7334 100644 --- a/src/ast.h +++ b/src/ast.h @@ -572,6 +572,13 @@ usz type_alignof(Type *type); /// Check if a type is void. bool type_is_void(Type *type); +/// Check if a type is of pointer type. +bool type_is_pointer(Type *type); + +/// Check if a type is of array type. +bool type_is_array(Type *type); + + /// =========================================================================== /// Miscellaneous AST functions. /// =========================================================================== diff --git a/src/typechecker.c b/src/typechecker.c index 448491177..c171db616 100644 --- a/src/typechecker.c +++ b/src/typechecker.c @@ -200,10 +200,10 @@ NODISCARD static Type *common_type(Type *a, Type *b) { } /// Check if a type is a pointer type. -NODISCARD static bool is_pointer(Type *type) { return type->kind == TYPE_POINTER; } +NODISCARD static bool is_pointer(Type *type) { return type_is_pointer(type); } /// Check if a type is an array type. -NODISCARD static bool is_array(Type *type) { return type->kind == TYPE_ARRAY; } +NODISCARD static bool is_array(Type *type) { return type_is_array(type); } /// Check if an expression is an lvalue. NODISCARD static bool is_lvalue(Node *expr) { From 7253339d335cdfa8372f578bcecd3f73b65c3a84 Mon Sep 17 00:00:00 2001 From: Lens Date: Mon, 6 Feb 2023 21:24:15 -0800 Subject: [PATCH 77/97] [Minor/Sema] Use `vector_back` instead of manual calculation --- src/typechecker.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/typechecker.c b/src/typechecker.c index c171db616..59d19d442 100644 --- a/src/typechecker.c +++ b/src/typechecker.c @@ -790,7 +790,7 @@ NODISCARD bool typecheck_expression(AST *ast, Node *expr) { Type *ret = expr->type->function.return_type; Type *body = expr->function.body->type; if (!convertible(ret, body)) - ERR(expr->function.body->block.children.data[expr->function.body->block.children.size - 1]->source_location, + ERR(vector_back(expr->function.body->block.children)->source_location, "Type '%T' of function body is not convertible to return type '%T'.", body, ret); } break; From 7b84f667dd8ba17689fd4670295e1ac01f20742a Mon Sep 17 00:00:00 2001 From: Lens Date: Mon, 6 Feb 2023 21:25:17 -0800 Subject: [PATCH 78/97] [Minor/Sema] Actually use `vector_back_or` in case of no children --- src/typechecker.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/typechecker.c b/src/typechecker.c index 59d19d442..874cc8ca0 100644 --- a/src/typechecker.c +++ b/src/typechecker.c @@ -790,7 +790,7 @@ NODISCARD bool typecheck_expression(AST *ast, Node *expr) { Type *ret = expr->type->function.return_type; Type *body = expr->function.body->type; if (!convertible(ret, body)) - ERR(vector_back(expr->function.body->block.children)->source_location, + ERR(vector_back_or(expr->function.body->block.children, expr)->source_location, "Type '%T' of function body is not convertible to return type '%T'.", body, ret); } break; From 771b9b4f6661dd7d5aa3738a47e8d75219728d55 Mon Sep 17 00:00:00 2001 From: Lens Date: Mon, 6 Feb 2023 21:29:06 -0800 Subject: [PATCH 79/97] [Minor/Sema] This should now work pretty much as expected Promise, last one lmao. Still have to fix assignment source location, eventually --- src/typechecker.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/typechecker.c b/src/typechecker.c index 874cc8ca0..c86988cab 100644 --- a/src/typechecker.c +++ b/src/typechecker.c @@ -789,10 +789,17 @@ NODISCARD bool typecheck_expression(AST *ast, Node *expr) { /// Make sure the return type of the body is convertible to that of the function. Type *ret = expr->type->function.return_type; Type *body = expr->function.body->type; - if (!convertible(ret, body)) - ERR(vector_back_or(expr->function.body->block.children, expr)->source_location, + if (!convertible(ret, body)) { + loc l = {0}; + if (expr->function.body->kind == NODE_BLOCK) { + l = vector_back_or(expr->function.body->block.children, expr)->source_location; + } else { + l = expr->function.body->source_location; + } + ERR(l, "Type '%T' of function body is not convertible to return type '%T'.", body, ret); + } } break; /// Typecheck declarations. From df8ffb09f2c415d9d10c4e241265d5cd90491bb8 Mon Sep 17 00:00:00 2001 From: Lens Date: Mon, 6 Feb 2023 21:30:42 -0800 Subject: [PATCH 80/97] [Minor/Sema] Very minor formatting Okay I lied this is the last one, lmao. --- src/typechecker.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/typechecker.c b/src/typechecker.c index c86988cab..dbd06575b 100644 --- a/src/typechecker.c +++ b/src/typechecker.c @@ -791,11 +791,9 @@ NODISCARD bool typecheck_expression(AST *ast, Node *expr) { Type *body = expr->function.body->type; if (!convertible(ret, body)) { loc l = {0}; - if (expr->function.body->kind == NODE_BLOCK) { + if (expr->function.body->kind == NODE_BLOCK) l = vector_back_or(expr->function.body->block.children, expr)->source_location; - } else { - l = expr->function.body->source_location; - } + else l = expr->function.body->source_location; ERR(l, "Type '%T' of function body is not convertible to return type '%T'.", body, ret); From 5a60f48b902ee50b919e269a2ce0ab516a3a10fd Mon Sep 17 00:00:00 2001 From: Lens Date: Mon, 6 Feb 2023 21:37:23 -0800 Subject: [PATCH 81/97] [Bugfix] Source spans of assignments were off; this fixes them --- src/parser.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/parser.c b/src/parser.c index 55b825e0c..9ec7dec6e 100644 --- a/src/parser.c +++ b/src/parser.c @@ -392,7 +392,6 @@ static void next_token(Parser *p) { if (isstart(p->lastc)) { next_identifier(p); - /// Check if the identifier is a keyword. for (size_t i = 0; i < sizeof keywords / sizeof *keywords; i++) { if (string_eq(keywords[i].kw, p->tok.text)) { p->tok.type = keywords[i].type; @@ -1074,14 +1073,13 @@ static Node *parse_expr_with_precedence(Parser *p, isz current_precedence) { if (prec == current_precedence && !is_right_associative(p, p->tok)) return lhs; /// Otherwise, we need to parse the RHS. - u32 start = p->tok.source_location.start; enum TokenType tt = p->tok.type; next_token(p); /// The `as` operator is special because its RHS is a type. if (tt == TK_AS) { Type *type = parse_type(p); - lhs = ast_make_cast(p->ast, (loc){.start = start, .end = type->source_location.end}, type, lhs); + lhs = ast_make_cast(p->ast, (loc){.start = lhs->source_location.start, .end = type->source_location.end}, type, lhs); continue; } @@ -1090,7 +1088,7 @@ static Node *parse_expr_with_precedence(Parser *p, isz current_precedence) { Node *rhs = parse_expr_with_precedence(p, prec); /// Combine the LHS and RHS into a binary expression. - lhs = ast_make_binary(p->ast, (loc){.start = start, .end = rhs->source_location.end}, tt, lhs, rhs); + lhs = ast_make_binary(p->ast, (loc){.start = lhs->source_location.start, .end = rhs->source_location.end}, tt, lhs, rhs); } } } From cc09b8bb062015aef1db80578cdc49f9619145ca Mon Sep 17 00:00:00 2001 From: Lens Date: Mon, 6 Feb 2023 21:50:35 -0800 Subject: [PATCH 82/97] [Sema] Use `type_is_*` instead of direct comparisons This will eventually allow for typedefs to be a drop-in-place feature, rather than requiring extensive rewriting --- src/codegen.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/codegen.c b/src/codegen.c index b6bfe68c4..ee70c6c2d 100644 --- a/src/codegen.c +++ b/src/codegen.c @@ -364,9 +364,9 @@ static void codegen_expr(CodegenContext *ctx, Node *expr) { codegen_expr(ctx, rhs); IRInstruction *subs_lhs = NULL; - if (lhs->type->kind != TYPE_ARRAY && lhs->type->kind == TYPE_POINTER) { + if (!type_is_array(lhs->type) && !type_is_pointer(lhs->type)) ERR("Subscript operator may only operate on arrays and pointers, which type %T is not", lhs->type); - } + if (lhs->kind == NODE_VARIABLE_REFERENCE) { // TODO: Handle local variable references, somehow. How can we tell if it's local/static? subs_lhs = ir_static_reference(ctx, as_span(lhs->var->name)); @@ -380,12 +380,12 @@ static void codegen_expr(CodegenContext *ctx, Node *expr) { IRInstruction *scaled_rhs = NULL; // An array subscript needs multiplied by the sizeof the array's base type. - if (lhs->type->kind == TYPE_ARRAY) { + if (type_is_array(lhs->type)) { IRInstruction *immediate = ir_immediate(ctx, t_integer, type_sizeof(lhs->type->array.of)); scaled_rhs = ir_mul(ctx, rhs->ir, immediate); } - // A pointer subscript needs multiplied by the sizeof the pointer's base type. - else if (lhs->type->kind == TYPE_POINTER) { + // A pointer subscript needs multiplied by the sizeof the pointer's base type. + else if (type_is_pointer(lhs->type)) { IRInstruction *immediate = ir_immediate(ctx, t_integer, type_sizeof(lhs->type->pointer.to)); scaled_rhs = ir_mul(ctx, rhs->ir, immediate); } From 836f99ba41896a893898875e7cf0ec41042da5b1 Mon Sep 17 00:00:00 2001 From: Lens Date: Mon, 6 Feb 2023 21:51:15 -0800 Subject: [PATCH 83/97] [Codegen] Improve codegen of subscript variable reference This will now allow `ir_static_reference` to take in a direct `IRStaticVariable`, when necessary, instead of just a name span. Still not sure how to go about handling the local version of this, to be honest. Maybe a `copy`? --- src/codegen.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/codegen.c b/src/codegen.c index ee70c6c2d..f31d6d71f 100644 --- a/src/codegen.c +++ b/src/codegen.c @@ -369,7 +369,13 @@ static void codegen_expr(CodegenContext *ctx, Node *expr) { if (lhs->kind == NODE_VARIABLE_REFERENCE) { // TODO: Handle local variable references, somehow. How can we tell if it's local/static? - subs_lhs = ir_static_reference(ctx, as_span(lhs->var->name)); + IRInstruction *var = lhs->var->val.node->ir; + // ASSERT(var); + if (var->kind == IR_STATIC_REF) + subs_lhs = ir_static_reference(ctx, as_span(lhs->var->name)); + else if (var->kind == IR_ALLOCA) + //subs_lhs = ir_(ctx, as_span(lhs->var->name)); + TODO("Codegen local variable reference"); } else if (lhs->kind == NODE_LITERAL && lhs->literal.type == TK_STRING) { TODO("IR generation for subscript of string literal"); } From acd725cd2ca7392e6871a70255c836c7135c3653 Mon Sep 17 00:00:00 2001 From: Lens Date: Mon, 6 Feb 2023 21:58:18 -0800 Subject: [PATCH 84/97] [AST] Update `ast_print_node` with easier to use signature --- src/ast.c | 12 +++++++++--- src/ast.h | 5 +---- src/codegen.c | 2 +- 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/src/ast.c b/src/ast.c index bb6f9bc88..931cae0ea 100644 --- a/src/ast.c +++ b/src/ast.c @@ -617,7 +617,7 @@ static void ast_print_children( ); /// Print a node. -void ast_print_node( +void ast_print_node_internal( FILE *file, const Node *logical_parent, const Node *node, @@ -765,6 +765,12 @@ void ast_print_node( } } +void ast_print_node(const Node *node) { + string_buffer buf = {0}; + ast_print_node_internal(stdout, NULL, node, &buf); + vector_delete(buf); +} + /// Scope tree for printing scopes. typedef struct scope_tree_node { const Scope *scope; @@ -846,7 +852,7 @@ void ast_print(FILE *file, const AST *ast) { string_buffer buf = {0}; /// Print the root node. - ast_print_node(file, NULL, ast->root, &buf); + ast_print_node_internal(file, NULL, ast->root, &buf); } /// Print the children of a node. @@ -871,7 +877,7 @@ static void ast_print_children( format_to(buf, "%s", node == vector_back(*nodes) ? " " : "│ "); /// Print the node. - ast_print_node(file, logical_parent, node, buf); + ast_print_node_internal(file, logical_parent, node, buf); /// Restore the leading text. buf->size = sz; diff --git a/src/ast.h b/src/ast.h index da8fe7334..98d10bcd7 100644 --- a/src/ast.h +++ b/src/ast.h @@ -595,10 +595,7 @@ void ast_print(FILE *file, const AST *ast); void ast_print_scope_tree(FILE *file, const AST *ast); /// Print a node and all of it's children. -/// Use like so: -/// string_buffer buf = {0}; -/// ast_print_node(file, NULL, node, &buf); -void ast_print_node(FILE *file, const Node *logical_parent, const Node *node, string_buffer *leading_text); +void ast_print_node(const Node *node); /// Intern a string. size_t ast_intern_string(AST *ast, span string); diff --git a/src/codegen.c b/src/codegen.c index f31d6d71f..4df61b018 100644 --- a/src/codegen.c +++ b/src/codegen.c @@ -375,7 +375,7 @@ static void codegen_expr(CodegenContext *ctx, Node *expr) { subs_lhs = ir_static_reference(ctx, as_span(lhs->var->name)); else if (var->kind == IR_ALLOCA) //subs_lhs = ir_(ctx, as_span(lhs->var->name)); - TODO("Codegen local variable reference"); + TODO("IR generation for subscript of local variable reference"); } else if (lhs->kind == NODE_LITERAL && lhs->literal.type == TK_STRING) { TODO("IR generation for subscript of string literal"); } From f3fab2d983e4caeba7e9a47c39292bb54508e78c Mon Sep 17 00:00:00 2001 From: Lens Date: Mon, 6 Feb 2023 22:11:17 -0800 Subject: [PATCH 85/97] [Codegen] Remove now-unnecessary array bodge in var. ref. codegen --- src/codegen.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/src/codegen.c b/src/codegen.c index 4df61b018..2637f1175 100644 --- a/src/codegen.c +++ b/src/codegen.c @@ -361,6 +361,9 @@ static void codegen_expr(CodegenContext *ctx, Node *expr) { } if (expr->binary.op == TK_LBRACK) { + // TODO: Just use lhs operand of subscript operator when right hand + // side is a compile-time-known zero value. + codegen_expr(ctx, rhs); IRInstruction *subs_lhs = NULL; @@ -381,9 +384,6 @@ static void codegen_expr(CodegenContext *ctx, Node *expr) { } else ERR("LHS of subscript operator has invalid kind %d", lhs->kind); - // TODO: Just use lhs operand of subscript operator when right hand - // side is a compile-time-known zero value. - IRInstruction *scaled_rhs = NULL; // An array subscript needs multiplied by the sizeof the array's base type. if (type_is_array(lhs->type)) { @@ -498,12 +498,6 @@ static void codegen_expr(CodegenContext *ctx, Node *expr) { /// Variable reference. case NODE_VARIABLE_REFERENCE: expr->ir = ir_load(ctx, expr->var->val.node->ir); - // TODO: Be smarter about when an array should decay to a pointer or not. - // Maybe it never should, and this should be implemented per backend? - // "I’d just emit a load of the array and have the backend - // deal w/ copying 1000 ints." ~ Sirraide - if (expr->ir->type->kind == TYPE_ARRAY) - expr->ir->type = ast_make_type_pointer(ctx->ast, expr->type->source_location, expr->type->array.of); return; /// Function reference. These should have all been removed by the semantic analyser. From a8241fa4a48c6a63bf09a570e82cbbe95f61994f Mon Sep 17 00:00:00 2001 From: Lens Date: Mon, 6 Feb 2023 22:26:50 -0800 Subject: [PATCH 86/97] =?UTF-8?q?[Codegen]=20=C2=A1Subscript=20of=20local?= =?UTF-8?q?=20variable!?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/codegen.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/codegen.c b/src/codegen.c index 2637f1175..82567230f 100644 --- a/src/codegen.c +++ b/src/codegen.c @@ -371,14 +371,12 @@ static void codegen_expr(CodegenContext *ctx, Node *expr) { ERR("Subscript operator may only operate on arrays and pointers, which type %T is not", lhs->type); if (lhs->kind == NODE_VARIABLE_REFERENCE) { - // TODO: Handle local variable references, somehow. How can we tell if it's local/static? IRInstruction *var = lhs->var->val.node->ir; // ASSERT(var); if (var->kind == IR_STATIC_REF) subs_lhs = ir_static_reference(ctx, as_span(lhs->var->name)); else if (var->kind == IR_ALLOCA) - //subs_lhs = ir_(ctx, as_span(lhs->var->name)); - TODO("IR generation for subscript of local variable reference"); + subs_lhs = var; } else if (lhs->kind == NODE_LITERAL && lhs->literal.type == TK_STRING) { TODO("IR generation for subscript of string literal"); } @@ -395,7 +393,7 @@ static void codegen_expr(CodegenContext *ctx, Node *expr) { IRInstruction *immediate = ir_immediate(ctx, t_integer, type_sizeof(lhs->type->pointer.to)); scaled_rhs = ir_mul(ctx, rhs->ir, immediate); } - expr->ir = ir_add(ctx, subs_lhs, scaled_rhs); + expr->ir = ir_add(ctx, scaled_rhs, subs_lhs); return; } From 6c027c7e7928ddc406bb8440ac9a5e1270cdff58 Mon Sep 17 00:00:00 2001 From: Lens Date: Mon, 6 Feb 2023 22:31:58 -0800 Subject: [PATCH 87/97] =?UTF-8?q?[Codegen]=20Apparently=20better=20=C2=A1C?= =?UTF-8?q?odegen=20of=20Subscript=20of=20local=20variable!?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/codegen.c | 2 +- src/codegen/x86_64/arch_x86_64.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/codegen.c b/src/codegen.c index 82567230f..c5feb02ec 100644 --- a/src/codegen.c +++ b/src/codegen.c @@ -393,7 +393,7 @@ static void codegen_expr(CodegenContext *ctx, Node *expr) { IRInstruction *immediate = ir_immediate(ctx, t_integer, type_sizeof(lhs->type->pointer.to)); scaled_rhs = ir_mul(ctx, rhs->ir, immediate); } - expr->ir = ir_add(ctx, scaled_rhs, subs_lhs); + expr->ir = ir_add(ctx, subs_lhs, scaled_rhs); return; } diff --git a/src/codegen/x86_64/arch_x86_64.c b/src/codegen/x86_64/arch_x86_64.c index 8abe6e58a..cd39f29f5 100644 --- a/src/codegen/x86_64/arch_x86_64.c +++ b/src/codegen/x86_64/arch_x86_64.c @@ -928,8 +928,8 @@ static void emit_instruction(CodegenContext *context, IRInstruction *inst) { case IR_EQ: codegen_comparison(context, COMPARE_EQ, inst->lhs->result, inst->rhs->result, inst->result); break; case IR_NE: codegen_comparison(context, COMPARE_NE, inst->lhs->result, inst->rhs->result, inst->result); break; case IR_ADD: - femit_reg_to_reg(context, I_ADD, inst->rhs->result, inst->lhs->result); - femit_reg_to_reg(context, I_MOV, inst->lhs->result, inst->result); + femit_reg_to_reg(context, I_ADD, inst->lhs->result, inst->rhs->result); + femit_reg_to_reg(context, I_MOV, inst->rhs->result, inst->result); break; case IR_SUB: femit_reg_to_reg(context, I_SUB, inst->rhs->result, inst->lhs->result); From c7f87aac0c465ef339c5ec36519ef210ecd53767 Mon Sep 17 00:00:00 2001 From: Lens Date: Mon, 6 Feb 2023 22:40:55 -0800 Subject: [PATCH 88/97] [Codegen/x86_64] Fix clobber values of `does_clobber` for shift instructions ``` femit_reg_to_reg(context, I_MOV, inst->rhs->result, REG_RCX); femit_reg(context, I_SAR, inst->lhs->result); femit_reg_to_reg(context, I_MOV, inst->lhs->result, inst->result); ``` This clobbers left, not the right. --- src/codegen/x86_64/arch_x86_64.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/codegen/x86_64/arch_x86_64.c b/src/codegen/x86_64/arch_x86_64.c index cd39f29f5..07f6ca954 100644 --- a/src/codegen/x86_64/arch_x86_64.c +++ b/src/codegen/x86_64/arch_x86_64.c @@ -1165,14 +1165,14 @@ Clobbers does_clobber(IRInstruction *instruction) { case IR_DIV: case IR_MUL: case IR_MOD: - case IR_SHL: - case IR_SHR: - case IR_SAR: case IR_AND: case IR_OR: return CLOBBERS_RIGHT; case IR_SUB: + case IR_SHL: + case IR_SHR: + case IR_SAR: return CLOBBERS_LEFT; case IR_NOT: From 88715b460c346f0e006e8b6870578f5ebd05aea5 Mon Sep 17 00:00:00 2001 From: Lens Date: Mon, 6 Feb 2023 22:42:22 -0800 Subject: [PATCH 89/97] [Tests] Add bit-shifting left and right tests --- tst/tests/shiftleft.un | 3 +++ tst/tests/shiftright.un | 3 +++ 2 files changed, 6 insertions(+) create mode 100644 tst/tests/shiftleft.un create mode 100644 tst/tests/shiftright.un diff --git a/tst/tests/shiftleft.un b/tst/tests/shiftleft.un new file mode 100644 index 000000000..e34326eea --- /dev/null +++ b/tst/tests/shiftleft.un @@ -0,0 +1,3 @@ +; 8 + +2 << 2 diff --git a/tst/tests/shiftright.un b/tst/tests/shiftright.un new file mode 100644 index 000000000..798a3c2fc --- /dev/null +++ b/tst/tests/shiftright.un @@ -0,0 +1,3 @@ +; 2 + +8 >> 2 From 1104d937a70f065746b8b8e9a1773152e7811ed0 Mon Sep 17 00:00:00 2001 From: Lens Date: Tue, 7 Feb 2023 08:57:45 -0800 Subject: [PATCH 90/97] [AST] Rename `t_pointer` to `t_void_ptr` --- src/ast.c | 2 +- src/ast.h | 2 +- src/codegen/x86_64/arch_x86_64.c | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/ast.c b/src/ast.c index 931cae0ea..cb333def8 100644 --- a/src/ast.c +++ b/src/ast.c @@ -62,7 +62,7 @@ static Type t_byte_def = { }; Type * const t_void = &t_void_def; -Type * const t_pointer = &t_void_pointer_def; +Type * const t_void_ptr = &t_void_pointer_def; Type * const t_integer_literal = &t_integer_literal_def; Type * const t_integer = &t_integer_def; Type * const t_byte = &t_byte_def; diff --git a/src/ast.h b/src/ast.h index 98d10bcd7..eac714065 100644 --- a/src/ast.h +++ b/src/ast.h @@ -607,7 +607,7 @@ void ast_replace_node(AST *ast, Node *old, Node *new); /// Builtin types. /// =========================================================================== extern Type *const t_void; -extern Type *const t_pointer; +extern Type *const t_void_ptr; extern Type *const t_integer_literal; extern Type *const t_integer; extern Type *const t_byte; diff --git a/src/codegen/x86_64/arch_x86_64.c b/src/codegen/x86_64/arch_x86_64.c index 07f6ca954..5caaf6c18 100644 --- a/src/codegen/x86_64/arch_x86_64.c +++ b/src/codegen/x86_64/arch_x86_64.c @@ -988,7 +988,7 @@ static void emit_instruction(CodegenContext *context, IRInstruction *inst) { enum RegSize size = -1; // TODO: Should this array to pointer decay happen here? Or higher up in codegen? if (inst->operand->type->kind == TYPE_ARRAY || inst->operand->type->pointer.to->kind == TYPE_ARRAY) - size = regsize_from_bytes(type_sizeof(t_pointer)); + size = regsize_from_bytes(type_sizeof(t_void_ptr)); else size = regsize_from_bytes(type_sizeof(inst->operand->type)); // TODO: Use `movzx`/`movzbl` if (size == r8 || size == r16) femit_reg_to_reg(context, I_XOR, inst->result, inst->result); @@ -1004,7 +1004,7 @@ static void emit_instruction(CodegenContext *context, IRInstruction *inst) { enum RegSize size = -1; // TODO: Should this array to pointer decay happen here? Or higher up in codegen? if (inst->operand->type->kind == TYPE_ARRAY || inst->operand->type->pointer.to->kind == TYPE_ARRAY) - size = regsize_from_bytes(type_sizeof(t_pointer)); + size = regsize_from_bytes(type_sizeof(t_void_ptr)); else size = regsize_from_bytes(inst->operand->alloca.size); // TODO: Use `movzx`/`movzbl` if (size == r8 || size == r16) femit_reg_to_reg(context, I_XOR, inst->result, inst->result); @@ -1018,7 +1018,7 @@ static void emit_instruction(CodegenContext *context, IRInstruction *inst) { else { enum RegSize size = -1; // TODO: Should this array to pointer decay happen here? Or higher up in codegen? - if (inst->operand->type->kind == TYPE_ARRAY) size = regsize_from_bytes(type_sizeof(t_pointer)); + if (inst->operand->type->kind == TYPE_ARRAY) size = regsize_from_bytes(type_sizeof(t_void_ptr)); // TODO: We are "supposed" to be loading sizeof pointed to type // here, but that causes segfaults when handling arrays. else size = regsize_from_bytes(type_sizeof(inst->operand->type)); From b222c7f5dcc896dd0ace51eab36d131992af55fb Mon Sep 17 00:00:00 2001 From: Lens Date: Tue, 7 Feb 2023 09:10:52 -0800 Subject: [PATCH 91/97] [Codegen/IR] Remove use of `ir_static_reference` --- src/codegen.c | 2 +- src/codegen/intermediate_representation.c | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/codegen.c b/src/codegen.c index c5feb02ec..af2be9c7e 100644 --- a/src/codegen.c +++ b/src/codegen.c @@ -374,7 +374,7 @@ static void codegen_expr(CodegenContext *ctx, Node *expr) { IRInstruction *var = lhs->var->val.node->ir; // ASSERT(var); if (var->kind == IR_STATIC_REF) - subs_lhs = ir_static_reference(ctx, as_span(lhs->var->name)); + subs_lhs = var; else if (var->kind == IR_ALLOCA) subs_lhs = var; } else if (lhs->kind == NODE_LITERAL && lhs->literal.type == TK_STRING) { diff --git a/src/codegen/intermediate_representation.c b/src/codegen/intermediate_representation.c index 76be58de8..3a4947c85 100644 --- a/src/codegen/intermediate_representation.c +++ b/src/codegen/intermediate_representation.c @@ -645,6 +645,9 @@ IRInstruction *ir_create_static(CodegenContext *context, Type *type, span name) return ref; } +/// NOTE: Currently unused, but can be used to load a static reference +/// a second time in a basic block, if need be, without generating +/// duplicate static variables. IRInstruction *ir_static_reference(CodegenContext *context, span name) { foreach_ptr(IRStaticVariable *, v, context->static_vars) { if (string_eq(v->name, name)) { From 6411d364fb55d1f81fff0a5f33ee85b3188a2976 Mon Sep 17 00:00:00 2001 From: Lens Date: Tue, 7 Feb 2023 09:11:47 -0800 Subject: [PATCH 92/97] [Minor/IR] Simplify wording of comment --- src/codegen/intermediate_representation.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/codegen/intermediate_representation.c b/src/codegen/intermediate_representation.c index 3a4947c85..70196b677 100644 --- a/src/codegen/intermediate_representation.c +++ b/src/codegen/intermediate_representation.c @@ -646,8 +646,7 @@ IRInstruction *ir_create_static(CodegenContext *context, Type *type, span name) } /// NOTE: Currently unused, but can be used to load a static reference -/// a second time in a basic block, if need be, without generating -/// duplicate static variables. +/// more than once without generating duplicate static variables. IRInstruction *ir_static_reference(CodegenContext *context, span name) { foreach_ptr(IRStaticVariable *, v, context->static_vars) { if (string_eq(v->name, name)) { From 40b4430e710e58b6999e9e7a3b8cb6a619da1c59 Mon Sep 17 00:00:00 2001 From: Lens Date: Tue, 7 Feb 2023 09:47:04 -0800 Subject: [PATCH 93/97] [Minor/Codegen] Comments; fixed a typo --- src/codegen.c | 7 ++++--- src/codegen/intermediate_representation.c | 3 +-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/codegen.c b/src/codegen.c index af2be9c7e..b00b050ac 100644 --- a/src/codegen.c +++ b/src/codegen.c @@ -364,8 +364,6 @@ static void codegen_expr(CodegenContext *ctx, Node *expr) { // TODO: Just use lhs operand of subscript operator when right hand // side is a compile-time-known zero value. - codegen_expr(ctx, rhs); - IRInstruction *subs_lhs = NULL; if (!type_is_array(lhs->type) && !type_is_pointer(lhs->type)) ERR("Subscript operator may only operate on arrays and pointers, which type %T is not", lhs->type); @@ -378,10 +376,13 @@ static void codegen_expr(CodegenContext *ctx, Node *expr) { else if (var->kind == IR_ALLOCA) subs_lhs = var; } else if (lhs->kind == NODE_LITERAL && lhs->literal.type == TK_STRING) { + // ctx->ast->strings.data[lhs->literal.string_index]; TODO("IR generation for subscript of string literal"); } else ERR("LHS of subscript operator has invalid kind %d", lhs->kind); + codegen_expr(ctx, rhs); + IRInstruction *scaled_rhs = NULL; // An array subscript needs multiplied by the sizeof the array's base type. if (type_is_array(lhs->type)) { @@ -477,7 +478,7 @@ static void codegen_expr(CodegenContext *ctx, Node *expr) { if (expr->literal.type == TK_NUMBER) expr->ir = ir_immediate(ctx, expr->type, expr->literal.integer); else if (expr->literal.type == TK_STRING) { // TODO: We should probably set this name earlier, or have some - // way of getting this name from jujst a string index. Static + // way of getting this name from just a string index. Static // variable is big bad. Valve, pls fix. Literally unplayable. char buf[48] = {0}; static size_t string_literal_count = 0; diff --git a/src/codegen/intermediate_representation.c b/src/codegen/intermediate_representation.c index 70196b677..82bebc533 100644 --- a/src/codegen/intermediate_representation.c +++ b/src/codegen/intermediate_representation.c @@ -653,8 +653,7 @@ IRInstruction *ir_static_reference(CodegenContext *context, span name) { INSTRUCTION(ref, IR_STATIC_REF); ref->static_ref = v; ref->type = ast_make_type_pointer(context->ast, v->type->source_location, v->type); - // TODO: `v->reference` may need to become list of references? I think this is why - // optimisation is broken. + // TODO: `v->reference` may need to become list of references? INSERT(ref); return ref; } From f1f876c1d6facb6629b6e21501baf982331181fc Mon Sep 17 00:00:00 2001 From: Lens Date: Tue, 7 Feb 2023 12:39:26 -0800 Subject: [PATCH 94/97] [Codegen] Rename `var` to the more correct `var_decl` Also add error for unhandled IR instruction types, just in case. --- src/codegen.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/codegen.c b/src/codegen.c index b00b050ac..45c021b40 100644 --- a/src/codegen.c +++ b/src/codegen.c @@ -369,12 +369,15 @@ static void codegen_expr(CodegenContext *ctx, Node *expr) { ERR("Subscript operator may only operate on arrays and pointers, which type %T is not", lhs->type); if (lhs->kind == NODE_VARIABLE_REFERENCE) { - IRInstruction *var = lhs->var->val.node->ir; + IRInstruction *var_decl = lhs->var->val.node->ir; // ASSERT(var); - if (var->kind == IR_STATIC_REF) - subs_lhs = var; - else if (var->kind == IR_ALLOCA) - subs_lhs = var; + if (var_decl->kind == IR_STATIC_REF || var_decl->kind == IR_ALLOCA) + subs_lhs = var_decl; + else { + ir_femit_instruction(stdout, var_decl); + ERR("Unhandled variable reference IR instruction kind %i", var_decl->kind); + } + } else if (lhs->kind == NODE_LITERAL && lhs->literal.type == TK_STRING) { // ctx->ast->strings.data[lhs->literal.string_index]; TODO("IR generation for subscript of string literal"); From 8282edc50843fc5ec882469198e1aedb149dd46e Mon Sep 17 00:00:00 2001 From: Lens Date: Tue, 7 Feb 2023 12:45:31 -0800 Subject: [PATCH 95/97] [Sema] Get rid of `is_*` wrappers, where possible There are still some we need to transfer over, like `is_integer` and such... We'll get there. --- src/ast.h | 22 +++++++++++----------- src/typechecker.c | 22 ++++++++-------------- 2 files changed, 19 insertions(+), 25 deletions(-) diff --git a/src/ast.h b/src/ast.h index eac714065..06eab650e 100644 --- a/src/ast.h +++ b/src/ast.h @@ -514,7 +514,7 @@ Type *ast_make_type_function( /// Get a string representation of a type. /// \return The string representation of the type. The string is allocated /// with malloc() and must be freed by the caller. -string typename(Type *type, bool colour); +NODISCARD string typename(Type *type, bool colour); /** Get the canonical type of any given type. * @@ -530,12 +530,12 @@ string typename(Type *type, bool colour); * * \return NULL if the type is incomplete. */ -Type *type_canonical(Type *type); +NODISCARD Type *type_canonical(Type *type); /// Get the last alias of a type. /// /// This function strips nested named types until there is only one left. -Type *type_last_alias(Type *type); +NODISCARD Type *type_last_alias(Type *type); /** Check if a type is incomplete. * @@ -552,7 +552,7 @@ Type *type_last_alias(Type *type); * * \return true iff the type is incomplete. */ -bool type_is_incomplete(Type *type); +NODISCARD bool type_is_incomplete(Type *type); /** Same as type_is_incomplete() but must be given a canonical type. * @@ -561,29 +561,29 @@ bool type_is_incomplete(Type *type); * * \return true iff the given canonical type is incomplete. */ -bool type_is_incomplete_canon(Type *type); +NODISCARD bool type_is_incomplete_canon(Type *type); /// Get the size of a type, in bytes. -usz type_sizeof(Type *type); +NODISCARD usz type_sizeof(Type *type); /// Get the aligmnent of a type, in bytes. -usz type_alignof(Type *type); +NODISCARD usz type_alignof(Type *type); /// Check if a type is void. -bool type_is_void(Type *type); +NODISCARD bool type_is_void(Type *type); /// Check if a type is of pointer type. -bool type_is_pointer(Type *type); +NODISCARD bool type_is_pointer(Type *type); /// Check if a type is of array type. -bool type_is_array(Type *type); +NODISCARD bool type_is_array(Type *type); /// =========================================================================== /// Miscellaneous AST functions. /// =========================================================================== /// Create a new AST. -AST *ast_create(); +NODISCARD AST *ast_create(); /// Free an AST. void ast_free(AST *ast); diff --git a/src/typechecker.c b/src/typechecker.c index dbd06575b..1a46537e8 100644 --- a/src/typechecker.c +++ b/src/typechecker.c @@ -199,12 +199,6 @@ NODISCARD static Type *common_type(Type *a, Type *b) { return NULL; } -/// Check if a type is a pointer type. -NODISCARD static bool is_pointer(Type *type) { return type_is_pointer(type); } - -/// Check if a type is an array type. -NODISCARD static bool is_array(Type *type) { return type_is_array(type); } - /// Check if an expression is an lvalue. NODISCARD static bool is_lvalue(Node *expr) { switch (expr->kind) { @@ -919,20 +913,20 @@ NODISCARD bool typecheck_expression(AST *ast, Node *expr) { // FROM any pointer type TO any pointer type is ALLOWED // TODO: Check base type size + alignment... - if (is_pointer(t_from) && is_pointer(t_to)) break; + if (type_is_pointer(t_from) && type_is_pointer(t_to)) break; // FROM any pointer type TO any integer type is ALLOWED - if (is_pointer(t_from) && is_integer(t_to)) break; + if (type_is_pointer(t_from) && is_integer(t_to)) break; // FROM any integer type TO any integer type is ALLOWED if (is_integer(t_from) && is_integer(t_to)) break; // FROM any integer type TO any pointer type is currently DISALLOWED, but very well may change - if (is_integer(t_from) && is_pointer(t_to)) + if (is_integer(t_from) && type_is_pointer(t_to)) ERR(expr->cast.value->source_location, "Can not cast from an integer type %T to pointer type %T", t_from, t_to); // FROM any array type TO any array type is DISALLOWED - if (is_array(t_from) && is_array(t_to)) { + if (type_is_array(t_from) && type_is_array(t_to)) { ERR(expr->cast.value->source_location, "Can not cast between arrays."); } @@ -958,7 +952,7 @@ NODISCARD bool typecheck_expression(AST *ast, Node *expr) { /// The subscript operator is basically pointer arithmetic. case TK_LBRACK: /// We can only subscript pointers and arrays. - if (!is_pointer(lhs->type) && !is_array(lhs->type)) + if (!type_is_pointer(lhs->type) && !type_is_array(lhs->type)) ERR(lhs->source_location, "Cannot subscript non-pointer, non-array type '%T'.", lhs->type); @@ -1046,7 +1040,7 @@ NODISCARD bool typecheck_expression(AST *ast, Node *expr) { /// We can only deference pointers. case TK_AT: - if (!is_pointer(expr->unary.value->type)) + if (!type_is_pointer(expr->unary.value->type)) ERR(expr->unary.value->source_location, "Argument of '@' must be a pointer."); @@ -1100,8 +1094,8 @@ NODISCARD bool typecheck_expression(AST *ast, Node *expr) { /// If this is a pointer type, make sure it doesn’t point to an incomplete type. Type *base = expr->type; - while (base && is_pointer(base)) base = base->pointer.to; - if (base && is_pointer(expr->type /** (!) **/) && type_is_incomplete(base)) + while (base && type_is_pointer(base)) base = base->pointer.to; + if (base && type_is_pointer(expr->type /** (!) **/) && type_is_incomplete(base)) ERR(expr->source_location, "Cannot use pointer to incomplete type '%T'.", expr->type->pointer.to); From 3a9c115c3eaaed8aaf91043eefb249e1f6cb28a1 Mon Sep 17 00:00:00 2001 From: Lens Date: Tue, 7 Feb 2023 13:02:03 -0800 Subject: [PATCH 96/97] [Codegen/IR] Fix assigned type of `IR_LOAD` instructions This is sort of a bodge, but is work towards the final destination. --- src/codegen/intermediate_representation.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/codegen/intermediate_representation.c b/src/codegen/intermediate_representation.c index 82bebc533..b46d01aaf 100644 --- a/src/codegen/intermediate_representation.c +++ b/src/codegen/intermediate_representation.c @@ -485,7 +485,18 @@ IRInstruction *ir_load INSTRUCTION(load, IR_LOAD); load->operand = address; - load->type = address->type; + + Type *t = type_canonical(address->type); + // TODO: Use !(t && type_is_pointer(t)), once binary operators can + // properly select their return types (not just integer). + if (!t) { + //print("address type: %T\n", address->type); + ir_femit_instruction(stdout, address); + if (t) ICE("Can not emit IR_LOAD from type %T as it is not a pointer", t); + else ICE("Can not emit IR_LOAD to NULL canonical type!"); + } + if (type_is_pointer(t)) load->type = t->pointer.to; + else load->type = t; mark_used(address, load); From 79c22b1572c3e787dc8262ed56d494de38df3ecb Mon Sep 17 00:00:00 2001 From: Lens Date: Tue, 7 Feb 2023 13:05:42 -0800 Subject: [PATCH 97/97] [TODO] NODISCARD should be default for non-void return types --- TODO.md | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/TODO.md b/TODO.md index 03e9a53e9..858e8dcae 100644 --- a/TODO.md +++ b/TODO.md @@ -15,12 +15,11 @@ - [ ] Resolve the two different kinds of array-type loading: entire copy vs pointer decay. Basically, `foo[2]` needs `foo` to be loaded as a pointer. `a : foo[2] = b` requires loading entire copy of `b` into `a`. - [ ] Optimisation - [ ] Zero subscript still does add/multiply when it doesn't need to - - [ ] Eliminate unused parameters (they are currently allocated registers) - [ ] Attributes - [ ] Parsing - [ ] `[[noreturn]]` - - [ ] `[[nodiscard]]` - - [ ] `[[nodiscard]]` on functions that return `void` should be an error. + - [ ] `[[discardable]]` + - [ ] Make "no discard" the default for all non-void returning functions. - [ ] `[[maybe_unused]]` - [ ] `[[deprecated]]` - [ ] `[[deprecated("reason")]]`