LensPlaysGames · LensPlaysGames · Feb 7, 2023 · Jan 21, 2023 · Jan 22, 2023 · Jan 22, 2023
diff --git a/TODO.md b/TODO.md
@@ -10,6 +10,9 @@
   - [ ] Function bodies must be blocks or preceded by `=`.
   - [ ] `ext` functions may have a body.
   - [ ] Implicit cast sema pass: `a + b` where `a` is a `byte` and `b` an `integer` -> `(a as integer) + b`
+- [ ] Optimisation
+  - [ ] Zero subscript still does add/multiply when it doesn't need to
+  - [ ] Eliminate unused parameters (they are currently allocated registers)
 - [ ] Attributes
   - [ ] Parsing
   - [ ] `[[noreturn]]`
@@ -36,12 +39,14 @@
     - [ ] Disallow overloading on the return value.
   - [ ] Proper checking for incomplete types in the parser.
 - [ ] Types in the IR
-  - [ ] Byte type
-  - [ ] Type info in codegen/IR.
+  - [x] Byte type
+  - [x] Type info in codegen/IR.
   - [ ] Backend: Handle size/alignment requirements
-  - [ ] Use eax, ax, al, etc.
+  - [x] Use eax, ax, al, etc.
   - [ ] Actually implementing casts.
+    - [ ] During codegen, we should actually output `zext`/`sext` if needed. Otherwise truncation is automatic.
   - [ ] Update IR parser
+  - [ ] Binary operators need to pick return type instead of strictly returning `integer`
 - [ ] Arrays
   - [ ] Semantic analysis for static arrays.
   - [ ] Codegen

diff --git a/examples/byte.un b/examples/byte.un
@@ -0,0 +1,4 @@
+a : byte = 34
+b : byte = 35
+c : byte = a + b
+c
diff --git a/src/ast.h b/src/ast.h
@@ -514,9 +514,11 @@ Type *ast_make_type_function(
 /// ===========================================================================
 /// Get a string representation of a type.
 /// \return The string representation of the type. The string is allocated
-///         as if with `malloc` and must be freed by the caller.
+///         with malloc() and must be freed by the caller.
 string typename(Type *type, bool colour);
 
+// FIXME: I don't know what canonical means, and this docstring doesn't
+// help me :P
 /// Get the canonical type of a type.
 /// \return NULL if the type is incomplete.
 Type *type_canonical(Type *type);
@@ -526,13 +528,15 @@ Type *type_canonical(Type *type);
 /// This function strips nested named types until there is only one left.
 Type *type_last_alias(Type *type);
 
+// FIXME: What makes a type complete vs incomplete, in the eyes of this
+// function?
 /// Check if a type is incomplete.
 bool type_is_incomplete(Type *type);
 
 /// Check if a canonical type is incomplete.
 bool type_is_incomplete_canon(Type *type);
 
-/// Get the size of a type.
+/// Get the size of a type, in bytes.
 usz type_sizeof(Type *type);
 
 /// Check if a type is void.

diff --git a/src/codegen.c b/src/codegen.c
@@ -1,5 +1,6 @@
 #include <codegen.h>
 
+#include <ast.h>
 #include <codegen/codegen_forward.h>
 #include <codegen/intermediate_representation.h>
 #include <codegen/x86_64/arch_x86_64.h>
@@ -153,7 +154,7 @@ static void codegen_expr(CodegenContext *ctx, Node *expr) {
   case NODE_DECLARATION:
       expr->ir = expr->declaration.static_
         ? ir_create_static(ctx, expr->type, as_span(expr->declaration.name))
-        : ir_stack_allocate(ctx, type_sizeof(expr->type));
+        : ir_stack_allocate(ctx, expr->type);
 
       /// Emit the initialiser if there is one.
       if (expr->declaration.init) {
@@ -215,7 +216,7 @@ static void codegen_expr(CodegenContext *ctx, Node *expr) {
 
     /// Insert a phi node for the result of the if in the join block.
     if (!type_is_void(expr->type)) {
-      IRInstruction *phi = ir_phi(ctx);
+      IRInstruction *phi = ir_phi(ctx, expr->type);
       ir_phi_argument(phi, last_then_block, expr->if_.then->ir);
       ir_phi_argument(phi, last_else_block, expr->if_.else_->ir);
       expr->ir = phi;
@@ -318,11 +319,20 @@ static void codegen_expr(CodegenContext *ctx, Node *expr) {
   }
 
   /// Typecast.
-  case NODE_CAST: { TODO(); }
+  case NODE_CAST: {
+    Type *t_to = expr->type;
+    Type *t_from = expr->cast.value->type;
+
+    usz to_sz = type_sizeof(t_to);
+    usz from_sz = type_sizeof(t_from);
+
+    TODO("Codegen cast from %T to %T", t_from, t_to);
+  }
 
   /// Binary expression.
   case NODE_BINARY: {
-    Node * const lhs = expr->binary.lhs, * const rhs = expr->binary.rhs;
+    Node *const lhs = expr->binary.lhs;
+    Node *const rhs = expr->binary.rhs;
 
     /// Assignment needs to be handled separately.
     if (expr->binary.op == TK_COLON_EQ) {
@@ -350,9 +360,24 @@ static void codegen_expr(CodegenContext *ctx, Node *expr) {
       return;
     }
 
+    // TODO: Just use lhs operand of subscript operator when right hand
+    // side is a compile-time-known zero value.
+
     /// Emit the operands.
     codegen_expr(ctx, lhs);
     codegen_expr(ctx, rhs);
+    if (expr->binary.op == TK_LBRACK) {
+      // An array subscript needs multiplied by the sizeof the array's base type.
+      if (lhs->type->kind == TYPE_ARRAY) {
+        IRInstruction *immediate = ir_immediate(ctx, t_integer, type_sizeof(lhs->type->array.of));
+        rhs->ir = ir_mul(ctx, rhs->ir, immediate);
+      }
+      // A pointer subscript needs multiplied by the sizeof the pointer's base type.
+      else if (lhs->type->kind == TYPE_ARRAY) {
+        IRInstruction *immediate = ir_immediate(ctx, t_integer, type_sizeof(lhs->type->pointer.to));
+        rhs->ir = ir_mul(ctx, rhs->ir, immediate);
+      }
+    }
 
     /// Emit the binary instruction.
     switch (expr->binary.op) {
@@ -400,9 +425,9 @@ static void codegen_expr(CodegenContext *ctx, Node *expr) {
         case TK_AT:
           /// TODO: This check for a function pointer is a bit sus. We shouldn’t
           ///       even get here if this is actually a function pointer...
-          if (expr->unary.value->type->pointer.to->kind == TYPE_FUNCTION) {
+          if (expr->unary.value->type->pointer.to->kind == TYPE_FUNCTION)
             expr->ir = expr->unary.value->ir;
-          } else {
+          else {
             expr->ir = ir_load(ctx, expr->unary.value->ir);
           }
           return;
@@ -423,12 +448,17 @@ static void codegen_expr(CodegenContext *ctx, Node *expr) {
   /// Literal expression. Only integer literals are supported for now.
   case NODE_LITERAL:
     if (expr->literal.type != TK_NUMBER) DIAG(DIAG_SORRY, expr->source_location, "Emitting non-integer literals not supported");
-    expr->ir = ir_immediate(ctx, expr->literal.integer);
+    // TODO: SEMA should probably have already lowered integer_literal type, so we *should* have a type already available on the literal node...
+    expr->ir = ir_immediate(ctx, expr->type, expr->literal.integer);
     return;
 
   /// Variable reference.
   case NODE_VARIABLE_REFERENCE:
     expr->ir = ir_load(ctx, expr->var->val.node->ir);
+    // TODO: Be smarter about when an array should decay to a pointer or not.
+    //       Maybe it never should, and this should be implemented per backend?
+    if (expr->ir->type->kind == TYPE_ARRAY)
+      expr->ir->type = ast_make_type_pointer(ctx->ast, expr->type->source_location, expr->type->array.of);
     return;
 
   /// Function reference. These should have all been removed by the semantic analyser.
@@ -456,13 +486,11 @@ void codegen_function(CodegenContext *ctx, Node *node) {
   /// Emit the function body.
   codegen_expr(ctx, node->function.body);
 
-  /// If the we can return from here, and this function doesn’t return void,
+  /// If we can return from here, and this function doesn’t return void,
   /// then return the return value; otherwise, just return nothing.
-  if (!ir_is_closed(ctx->block) && !type_is_void(node->type->function.return_type)) {
+  if (!ir_is_closed(ctx->block) && !type_is_void(node->type->function.return_type))
     ir_return(ctx, node->function.body->ir);
-  } else {
-    ir_return(ctx, NULL);
-  }
+  else ir_return(ctx, NULL);
 }
 
 /// ===========================================================================