diff --git a/parser/README.md b/parser/README.md index 523cd096a0c..12a154eb605 100644 --- a/parser/README.md +++ b/parser/README.md @@ -1,2 +1,5 @@ # fuel-vm-hll High Level Language (Name Subject to Change) for the FuelVM + +# Minimum supported Rust version +As of now, this code was developed on and is guaranteed to run on Rust 1.50 stable. diff --git a/parser/src/control_flow_analysis/analyze_return_paths.rs b/parser/src/control_flow_analysis/analyze_return_paths.rs new file mode 100644 index 00000000000..65e32493849 --- /dev/null +++ b/parser/src/control_flow_analysis/analyze_return_paths.rs @@ -0,0 +1,294 @@ +//! This is the flow graph, a graph which contains edges that represent possible steps of program +//! execution. + +use super::*; +use super::{ControlFlowGraph, EntryPoint, ExitPoint, Graph}; +use crate::semantics::{ + ast_node::{ + TypedCodeBlock, TypedDeclaration, TypedExpression, TypedFunctionDeclaration, + TypedReassignment, TypedVariableDeclaration, TypedWhileLoop, + }, + TypedAstNode, TypedAstNodeContent, +}; +use crate::types::ResolvedType; +use crate::Ident; +use crate::{error::*, semantics::TypedParseTree}; +use pest::Span; +use petgraph::prelude::NodeIndex; + +impl<'sc> ControlFlowGraph<'sc> { + pub(crate) fn construct_return_path_graph(ast: &TypedParseTree<'sc>) -> Self { + let mut graph = ControlFlowGraph { + graph: Graph::new(), + entry_points: vec![], + namespace: Default::default(), + }; + // do a depth first traversal and cover individual inner ast nodes + let mut leaves = vec![]; + for ast_entrypoint in ast.root_nodes.iter() { + let l_leaves = connect_node(ast_entrypoint, &mut graph, &leaves); + + match l_leaves { + NodeConnection::NextStep(nodes) => leaves = nodes, + _ => (), + } + } + + graph + } + /// This function looks through the control flow graph and ensures that all paths that are + /// required to return a value do, indeed, return a value of the correct type. + /// It does this by checking every function declaration in both the methods namespace + /// and the functions namespace and validating that all paths leading to the function exit node + /// return the same type. Additionally, if a function has a return type, all paths must indeed + /// lead to the function exit node. + pub(crate) fn analyze_return_paths(&self) -> Vec> { + let mut errors = vec![]; + for ( + name, + FunctionNamespaceEntry { + entry_point, + exit_point, + return_type, + }, + ) in &self.namespace.function_namespace + { + // For every node connected to the entry point + errors.append(&mut self.ensure_all_paths_reach_exit( + *entry_point, + *exit_point, + name.primary_name, + return_type, + )); + } + errors + } + fn ensure_all_paths_reach_exit( + &self, + entry_point: EntryPoint, + exit_point: ExitPoint, + function_name: &'sc str, + return_ty: &ResolvedType<'sc>, + ) -> Vec> { + let mut rovers = vec![entry_point]; + let mut errors = vec![]; + let mut max_iterations = 50; + while rovers.len() >= 1 && rovers[0] != exit_point && max_iterations > 0 { + max_iterations -= 1; + /* + println!( + "{:?}", + rovers + .iter() + .map(|ix| self.graph[*ix].clone()) + .collect::>() + ); + */ + rovers = rovers + .into_iter() + .filter(|idx| *idx != exit_point) + .collect(); + let mut next_rovers = vec![]; + for rover in rovers { + let mut neighbors = self + .graph + .neighbors_directed(rover, petgraph::Direction::Outgoing) + .collect::>(); + if neighbors.is_empty() && *return_ty != ResolvedType::Unit { + errors.push(CompileError::PathDoesNotReturn { + // TODO: unwrap_to_node is a shortcut. In reality, the graph type should be + // different. To save some code duplication, + span: self.graph[rover].unwrap_to_node().span.clone(), + function_name, + ty: return_ty.friendly_type_str(), + }); + } + next_rovers.append(&mut neighbors); + } + rovers = next_rovers; + } + + errors + } +} + +/// The resulting edges from connecting a node to the graph. +enum NodeConnection { + /// This represents a node that steps on to the next node. + NextStep(Vec), + /// This represents a return or implicit return node, which aborts the stepwise flow. + Return(NodeIndex), +} + +fn connect_node<'sc>( + node: &TypedAstNode<'sc>, + graph: &mut ControlFlowGraph<'sc>, + leaves: &[NodeIndex], +) -> NodeConnection { + let span = node.span.clone(); + match &node.content { + TypedAstNodeContent::ReturnStatement(_) + | TypedAstNodeContent::ImplicitReturnExpression(_) => { + let this_index = graph.add_node(node.into()); + for leaf_ix in leaves { + graph.add_edge(*leaf_ix, this_index, "".into()); + } + NodeConnection::Return(this_index) + } + TypedAstNodeContent::WhileLoop(TypedWhileLoop { .. }) => { + // An abridged version of the dead code analysis for a while loop + // since we don't really care about what the loop body contains when detecting + // divergent paths + NodeConnection::NextStep(vec![graph.add_node(node.into())]) + } + TypedAstNodeContent::Expression(TypedExpression { .. }) => { + let entry = graph.add_node(node.into()); + // insert organizational dominator node + // connected to all current leaves + for leaf in leaves { + graph.add_edge(*leaf, entry, "".into()); + } + NodeConnection::NextStep(vec![entry]) + } + TypedAstNodeContent::SideEffect => NodeConnection::NextStep(leaves.to_vec()), + TypedAstNodeContent::Declaration(decl) => { + NodeConnection::NextStep(connect_declaration(node, &decl, graph, span, leaves)) + } + } +} + +fn connect_declaration<'sc>( + node: &TypedAstNode<'sc>, + decl: &TypedDeclaration<'sc>, + graph: &mut ControlFlowGraph<'sc>, + span: Span<'sc>, + leaves: &[NodeIndex], +) -> Vec { + use TypedDeclaration::*; + match decl { + TraitDeclaration(_) | StructDeclaration(_) | EnumDeclaration(_) => vec![], + VariableDeclaration(TypedVariableDeclaration { .. }) => { + let entry_node = graph.add_node(node.into()); + for leaf in leaves { + graph.add_edge(*leaf, entry_node, "".into()); + } + vec![entry_node] + } + FunctionDeclaration(fn_decl) => { + let entry_node = graph.add_node(node.into()); + for leaf in leaves { + graph.add_edge(*leaf, entry_node, "".into()); + } + connect_typed_fn_decl(fn_decl, graph, entry_node, span); + vec![] + } + Reassignment(TypedReassignment { .. }) => { + let entry_node = graph.add_node(node.into()); + for leaf in leaves { + graph.add_edge(*leaf, entry_node, "".into()); + } + vec![entry_node] + } + ImplTrait { + trait_name, + methods, + .. + } => { + let entry_node = graph.add_node(node.into()); + for leaf in leaves { + graph.add_edge(*leaf, entry_node, "".into()); + } + connect_impl_trait(trait_name, graph, methods, entry_node); + vec![] + } + SideEffect | ErrorRecovery => { + unreachable!("These are error cases and should be removed in the type checking stage. ") + } + } +} + +/// Implementations of traits are top-level things that are not conditional, so +/// we insert an edge from the function's starting point to the declaration to show +/// that the declaration was indeed at some point implemented. +/// Additionally, we insert the trait's methods into the method namespace in order to +/// track which exact methods are dead code. +fn connect_impl_trait<'sc>( + trait_name: &Ident<'sc>, + graph: &mut ControlFlowGraph<'sc>, + methods: &[TypedFunctionDeclaration<'sc>], + entry_node: NodeIndex, +) { + let mut methods_and_indexes = vec![]; + // insert method declarations into the graph + for fn_decl in methods { + let fn_decl_entry_node = graph.add_node(ControlFlowGraphNode::MethodDeclaration { + span: fn_decl.span.clone(), + method_name: fn_decl.name.clone(), + }); + graph.add_edge(entry_node, fn_decl_entry_node, "".into()); + // connect the impl declaration node to the functions themselves, as all trait functions are + // public if the trait is in scope + connect_typed_fn_decl(&fn_decl, graph, fn_decl_entry_node, fn_decl.span.clone()); + methods_and_indexes.push((fn_decl.name.clone(), fn_decl_entry_node)); + } + // Now, insert the methods into the trait method namespace. + graph + .namespace + .insert_trait_methods(trait_name.clone(), methods_and_indexes); +} + +/// The strategy here is to populate the trait namespace with just one singular trait +/// and if it is ever implemented, by virtue of type checking, we know all interface points +/// were met. +/// Upon implementation, we can populate the methods namespace and track dead functions that way. +/// TL;DR: At this point, we _only_ track the wholistic trait declaration and not the functions +/// contained within. +/// +/// The trait node itself has already been added (as `entry_node`), so we just need to insert that +/// node index into the namespace for the trait. + +/// When connecting a function declaration, we are inserting a new root node into the graph that +/// has no entry points, since it is just a declaration. +/// When something eventually calls it, it gets connected to the declaration. +fn connect_typed_fn_decl<'sc>( + fn_decl: &TypedFunctionDeclaration<'sc>, + graph: &mut ControlFlowGraph<'sc>, + entry_node: NodeIndex, + _span: Span<'sc>, +) { + let fn_exit_node = graph.add_node(format!("\"{}\" fn exit", fn_decl.name.primary_name).into()); + let return_nodes = depth_first_insertion_code_block(&fn_decl.body, graph, &[entry_node]); + for node in return_nodes { + graph.add_edge(node, fn_exit_node, "return".into()); + } + + let namespace_entry = FunctionNamespaceEntry { + entry_point: entry_node, + exit_point: fn_exit_node, + return_type: fn_decl.return_type.clone(), + }; + graph + .namespace + .insert_function(fn_decl.name.clone(), namespace_entry); +} + +type ReturnStatementNodes = Vec; + +fn depth_first_insertion_code_block<'sc>( + node_content: &TypedCodeBlock<'sc>, + graph: &mut ControlFlowGraph<'sc>, + leaves: &[NodeIndex], +) -> ReturnStatementNodes { + let mut leaves = leaves.to_vec(); + let mut return_nodes = vec![]; + for node in node_content.contents.iter() { + let this_node = connect_node(node, graph, &leaves); + match this_node { + NodeConnection::NextStep(nodes) => leaves = nodes, + NodeConnection::Return(node) => { + return_nodes.push(node); + } + } + } + return_nodes +} diff --git a/parser/src/control_flow_analysis/dead_code_analysis.rs b/parser/src/control_flow_analysis/dead_code_analysis.rs new file mode 100644 index 00000000000..926da79220a --- /dev/null +++ b/parser/src/control_flow_analysis/dead_code_analysis.rs @@ -0,0 +1,573 @@ +use super::*; +use crate::{ + parse_tree::Visibility, + semantics::ast_node::{TypedExpressionVariant, TypedTraitDeclaration}, + Ident, TreeType, +}; +use crate::{ + semantics::{ + ast_node::{ + TypedCodeBlock, TypedDeclaration, TypedEnumDeclaration, TypedExpression, + TypedFunctionDeclaration, TypedReassignment, TypedVariableDeclaration, TypedWhileLoop, + }, + TypedAstNode, TypedAstNodeContent, TypedParseTree, + }, + CompileWarning, Warning, +}; +use pest::Span; +use petgraph::algo::has_path_connecting; +use petgraph::prelude::NodeIndex; + +impl<'sc> ControlFlowGraph<'sc> { + pub(crate) fn find_dead_code(&self) -> Vec> { + // dead code is code that has no path to the entry point + let mut dead_nodes = vec![]; + for destination in self.graph.node_indices() { + let mut is_connected = false; + for entry in &self.entry_points { + if has_path_connecting(&self.graph, *entry, destination, None) { + is_connected = true; + break; + } + } + if !is_connected { + dead_nodes.push(destination); + } + } + let dead_enum_variant_warnings = dead_nodes + .iter() + .filter_map(|x| match &self.graph[*x] { + ControlFlowGraphNode::EnumVariant { span, variant_name } => Some(CompileWarning { + span: span.clone(), + warning_content: Warning::DeadEnumVariant { + variant_name: variant_name.to_string(), + }, + }), + _ => None, + }) + .collect::>(); + + let dead_ast_node_warnings = dead_nodes + .into_iter() + .filter_map(|x| match &self.graph[x] { + ControlFlowGraphNode::ProgramNode(node) => { + Some(construct_dead_code_warning_from_node(node)) + } + ControlFlowGraphNode::EnumVariant { span, variant_name } => Some(CompileWarning { + span: span.clone(), + warning_content: Warning::DeadEnumVariant { + variant_name: variant_name.to_string(), + }, + }), + ControlFlowGraphNode::MethodDeclaration { span, .. } => Some(CompileWarning { + span: span.clone(), + warning_content: Warning::DeadMethod, + }), + ControlFlowGraphNode::OrganizationalDominator(..) => None, + }) + .collect::>(); + + let all_warnings = [dead_enum_variant_warnings, dead_ast_node_warnings].concat(); + // filter out any overlapping spans -- if a span is contained within another one, + // remove it. + all_warnings + .clone() + .into_iter() + .filter(|CompileWarning { span, .. }| { + // if any other warnings contain a span which completely covers this one, filter + // out this one. + all_warnings + .iter() + .find( + |CompileWarning { + span: other_span, .. + }| { + other_span.end() > span.end() && other_span.start() < span.start() + }, + ) + .is_none() + }) + .collect() + } + /// Constructs a graph that is designed to identify unused declarations and sections of code. + pub(crate) fn construct_dead_code_graph( + ast: &TypedParseTree<'sc>, + tree_type: TreeType, + ) -> Self { + let mut graph = ControlFlowGraph { + graph: Graph::new(), + entry_points: vec![], + namespace: Default::default(), + }; + // do a depth first traversal and cover individual inner ast nodes + let mut leaves = vec![]; + let exit_node = Some(graph.add_node(("Program exit".to_string()).into())); + for ast_entrypoint in ast.root_nodes.iter() { + let (l_leaves, _new_exit_node) = + connect_node(ast_entrypoint, &mut graph, &leaves, exit_node); + + leaves = l_leaves; + } + + // calculate the entry points based on the tree type + graph.entry_points = match tree_type { + TreeType::Predicate | TreeType::Script => { + // a predicate or script have a main function as the only entry point + vec![ + graph + .graph + .node_indices() + .find(|i| match graph.graph[*i] { + ControlFlowGraphNode::OrganizationalDominator(_) => false, + ControlFlowGraphNode::ProgramNode(TypedAstNode { + content: + TypedAstNodeContent::Declaration( + TypedDeclaration::FunctionDeclaration( + TypedFunctionDeclaration { ref name, .. }, + ), + ), + .. + }) => name.primary_name == "main", + _ => false, + }) + .unwrap(), + ] + } + TreeType::Contract | TreeType::Library => graph + .graph + .node_indices() + .filter(|i| match graph.graph[*i] { + ControlFlowGraphNode::OrganizationalDominator(_) => false, + ControlFlowGraphNode::ProgramNode(TypedAstNode { + content: + TypedAstNodeContent::Declaration(TypedDeclaration::FunctionDeclaration( + TypedFunctionDeclaration { + visibility: Visibility::Public, + .. + }, + )), + .. + }) => true, + ControlFlowGraphNode::ProgramNode(TypedAstNode { + content: + TypedAstNodeContent::Declaration(TypedDeclaration::TraitDeclaration( + TypedTraitDeclaration { + visibility: Visibility::Public, + .. + }, + )), + .. + }) => true, + ControlFlowGraphNode::ProgramNode(TypedAstNode { + content: + TypedAstNodeContent::Declaration(TypedDeclaration::ImplTrait { .. }), + .. + }) => true, + _ => false, + }) + .collect(), + }; + graph + } +} +fn connect_node<'sc>( + node: &TypedAstNode<'sc>, + graph: &mut ControlFlowGraph<'sc>, + leaves: &[NodeIndex], + exit_node: Option, +) -> (Vec, Option) { + // let mut graph = graph.clone(); + let span = node.span.clone(); + match &node.content { + TypedAstNodeContent::ReturnStatement(_) + | TypedAstNodeContent::ImplicitReturnExpression(_) => { + let this_index = graph.add_node(node.into()); + for leaf_ix in leaves { + graph.add_edge(*leaf_ix, this_index, "".into()); + } + // connect return to the exit node + if let Some(exit_node) = exit_node { + graph.add_edge(this_index, exit_node, "return".into()); + (vec![], None) + } else { + (vec![], None) + } + } + TypedAstNodeContent::WhileLoop(TypedWhileLoop { body, .. }) => { + // a while loop can loop back to the beginning, + // or it can terminate. + // so we connect the _end_ of the while loop _both_ to its beginning and the next node. + // the loop could also be entirely skipped + + let entry = graph.add_node(node.into()); + let while_loop_exit = graph.add_node("while loop exit".to_string().into()); + for leaf in leaves { + graph.add_edge(*leaf, entry, "".into()); + } + // it is possible for a whole while loop to be skipped so add edge from + // beginning of while loop straight to exit + graph.add_edge( + entry, + while_loop_exit, + "condition is initially false".into(), + ); + let mut leaves = vec![entry]; + let (l_leaves, _l_exit_node) = + depth_first_insertion_code_block(body, graph, &leaves, exit_node); + // insert edges from end of block back to beginning of it + for leaf in &l_leaves { + graph.add_edge(*leaf, entry, "loop repeats".into()); + } + + leaves = l_leaves; + for leaf in leaves { + graph.add_edge(leaf, while_loop_exit, "".into()); + } + (vec![while_loop_exit], exit_node) + } + TypedAstNodeContent::Expression(TypedExpression { + expression: expr_variant, + .. + }) => { + let entry = graph.add_node(node.into()); + // insert organizational dominator node + // connected to all current leaves + for leaf in leaves { + graph.add_edge(*leaf, entry, "".into()); + } + + ( + connect_expression(expr_variant, graph, &[entry], exit_node), + exit_node, + ) + } + TypedAstNodeContent::SideEffect => (leaves.to_vec(), exit_node), + TypedAstNodeContent::Declaration(decl) => { + // all leaves connect to this node, then this node is the singular leaf + let decl_node = graph.add_node(node.into()); + for leaf in leaves { + graph.add_edge(*leaf, decl_node, "".into()); + } + ( + connect_declaration(&decl, graph, decl_node, span, exit_node), + exit_node, + ) + } + } +} + +fn connect_declaration<'sc>( + decl: &TypedDeclaration<'sc>, + graph: &mut ControlFlowGraph<'sc>, + entry_node: NodeIndex, + span: Span<'sc>, + exit_node: Option, +) -> Vec { + use TypedDeclaration::*; + match decl { + VariableDeclaration(TypedVariableDeclaration { body, .. }) => { + connect_expression(&body.expression, graph, &[entry_node], exit_node) + } + FunctionDeclaration(fn_decl) => { + connect_typed_fn_decl(fn_decl, graph, entry_node, span, exit_node); + vec![] + } + TraitDeclaration(trait_decl) => { + connect_trait_declaration(&trait_decl, graph, entry_node); + vec![] + } + StructDeclaration(_) => todo!("track each struct field's usage"), + EnumDeclaration(enum_decl) => { + connect_enum_declaration(&enum_decl, graph, entry_node); + vec![] + } + Reassignment(TypedReassignment { rhs, .. }) => { + connect_expression(&rhs.expression, graph, &[entry_node], exit_node) + } + ImplTrait { + trait_name, + methods, + .. + } => { + connect_impl_trait(trait_name, graph, methods, entry_node); + vec![] + } + SideEffect | ErrorRecovery => { + unreachable!("These are error cases and should be removed in the type checking stage. ") + } + } +} + +/// Implementations of traits are top-level things that are not conditional, so +/// we insert an edge from the function's starting point to the declaration to show +/// that the declaration was indeed at some point implemented. +/// Additionally, we insert the trait's methods into the method namespace in order to +/// track which exact methods are dead code. +fn connect_impl_trait<'sc>( + trait_name: &Ident<'sc>, + graph: &mut ControlFlowGraph<'sc>, + methods: &[TypedFunctionDeclaration<'sc>], + entry_node: NodeIndex, +) { + let graph_c = graph.clone(); + let trait_decl_node = graph_c.namespace.find_trait(trait_name); + match trait_decl_node { + None => { + let edge_ix = graph.add_node("External trait".into()); + graph.add_edge(entry_node, edge_ix, "".into()); + } + Some(trait_decl_node) => { + graph.add_edge_from_entry(entry_node, "".into()); + graph.add_edge(entry_node, *trait_decl_node, "".into()); + } + } + let mut methods_and_indexes = vec![]; + // insert method declarations into the graph + for fn_decl in methods { + let fn_decl_entry_node = graph.add_node(ControlFlowGraphNode::MethodDeclaration { + span: fn_decl.span.clone(), + method_name: fn_decl.name.clone(), + }); + graph.add_edge(entry_node, fn_decl_entry_node, "".into()); + // connect the impl declaration node to the functions themselves, as all trait functions are + // public if the trait is in scope + connect_typed_fn_decl( + &fn_decl, + graph, + fn_decl_entry_node, + fn_decl.span.clone(), + None, + ); + methods_and_indexes.push((fn_decl.name.clone(), fn_decl_entry_node)); + } + // Now, insert the methods into the trait method namespace. + graph + .namespace + .insert_trait_methods(trait_name.clone(), methods_and_indexes); +} + +/// The strategy here is to populate the trait namespace with just one singular trait +/// and if it is ever implemented, by virtue of type checking, we know all interface points +/// were met. +/// Upon implementation, we can populate the methods namespace and track dead functions that way. +/// TL;DR: At this point, we _only_ track the wholistic trait declaration and not the functions +/// contained within. +/// +/// The trait node itself has already been added (as `entry_node`), so we just need to insert that +/// node index into the namespace for the trait. +fn connect_trait_declaration<'sc>( + decl: &TypedTraitDeclaration<'sc>, + graph: &mut ControlFlowGraph<'sc>, + entry_node: NodeIndex, +) { + graph.namespace.add_trait(decl.name.clone(), entry_node); +} + +/// For an enum declaration, we want to make a declaration node for every individual enum +/// variant. When a variant is constructed, we can point an edge at that variant. This way, +/// we can see clearly, and thusly warn, when individual variants are not ever constructed. +fn connect_enum_declaration<'sc>( + enum_decl: &TypedEnumDeclaration<'sc>, + graph: &mut ControlFlowGraph<'sc>, + entry_node: NodeIndex, +) { + // keep a mapping of each variant + for variant in &enum_decl.variants { + let variant_index = graph.add_node(variant.into()); + + // graph.add_edge(entry_node, variant_index, "".into()); + graph.namespace.insert_enum( + enum_decl.name.clone(), + entry_node, + variant.name.clone(), + variant_index, + ); + } +} + +/// When connecting a function declaration, we are inserting a new root node into the graph that +/// has no entry points, since it is just a declaration. +/// When something eventually calls it, it gets connected to the declaration. +fn connect_typed_fn_decl<'sc>( + fn_decl: &TypedFunctionDeclaration<'sc>, + graph: &mut ControlFlowGraph<'sc>, + entry_node: NodeIndex, + _span: Span<'sc>, + exit_node: Option, +) { + let fn_exit_node = graph.add_node(format!("\"{}\" fn exit", fn_decl.name.primary_name).into()); + let (_exit_nodes, _exit_node) = + depth_first_insertion_code_block(&fn_decl.body, graph, &[entry_node], Some(fn_exit_node)); + if let Some(exit_node) = exit_node { + graph.add_edge(fn_exit_node, exit_node, "".into()); + } + + let namespace_entry = FunctionNamespaceEntry { + entry_point: entry_node, + exit_point: fn_exit_node, + return_type: fn_decl.return_type.clone(), + }; + + graph + .namespace + .insert_function(fn_decl.name.clone(), namespace_entry); +} + +fn depth_first_insertion_code_block<'sc>( + node_content: &TypedCodeBlock<'sc>, + graph: &mut ControlFlowGraph<'sc>, + leaves: &[NodeIndex], + exit_node: Option, +) -> (Vec, Option) { + let mut leaves = leaves.to_vec(); + let mut exit_node = exit_node.clone(); + for node in node_content.contents.iter() { + let (this_node, l_exit_node) = connect_node(node, graph, &leaves, exit_node); + leaves = this_node; + exit_node = l_exit_node; + } + (leaves, exit_node) +} + +/// connects any inner parts of an expression to the graph +/// note the main expression node has already been inserted +fn connect_expression<'sc>( + expr_variant: &TypedExpressionVariant<'sc>, + graph: &mut ControlFlowGraph<'sc>, + leaves: &[NodeIndex], + exit_node: Option, +) -> Vec { + use TypedExpressionVariant::*; + match expr_variant { + FunctionApplication { name, .. } => { + let mut is_external = false; + // find the function in the namespace + let (fn_entrypoint, fn_exit_point) = graph + .namespace + .get_function(&name.suffix) + .cloned() + .map( + |FunctionNamespaceEntry { + entry_point, + exit_point, + .. + }| (entry_point, exit_point), + ) + .unwrap_or_else(|| { + let node_idx = + graph.add_node(format!("extern fn {}()", name.suffix.primary_name).into()); + is_external = true; + (node_idx, node_idx) + }); + for leaf in leaves { + graph.add_edge(*leaf, fn_entrypoint, "".into()); + } + // the exit points get connected to an exit node for the application + // if this is external, then we don't add the body to the graph so there's no point in + // an exit organizational dominator + if !is_external { + if let Some(exit_node) = exit_node { + graph.add_edge(fn_exit_point, exit_node, "".into()); + vec![exit_node] + } else { + vec![fn_exit_point] + } + } else { + vec![fn_entrypoint] + } + } + Literal(_lit) => leaves.to_vec(), + VariableExpression { .. } => leaves.to_vec(), + EnumInstantiation { + enum_name, + variant_name, + .. + } => { + // connect this particular instantiation to its variants declaration + connect_enum_instantiation(enum_name, variant_name, graph, leaves) + } + a => todo!("{:?}", a), + } +} + +fn connect_enum_instantiation<'sc>( + enum_name: &Ident<'sc>, + variant_name: &Ident<'sc>, + graph: &mut ControlFlowGraph, + leaves: &[NodeIndex], +) -> Vec { + let (decl_ix, variant_index) = graph + .namespace + .find_enum_variant_index(enum_name, variant_name) + .unwrap_or_else(|| { + let node_idx = graph.add_node( + format!( + "extern enum {}::{}", + enum_name.primary_name, variant_name.primary_name + ) + .into(), + ); + (node_idx, node_idx) + }); + + // insert organizational nodes for instantiation of enum + let enum_instantiation_entry_idx = graph.add_node("enum instantiation entry".into()); + let enum_instantiation_exit_idx = graph.add_node("enum instantiation exit".into()); + + // connect to declaration node itself to show that the declaration is used + graph.add_edge(enum_instantiation_entry_idx, decl_ix, "".into()); + for leaf in leaves { + graph.add_edge(*leaf, enum_instantiation_entry_idx, "".into()); + } + + graph.add_edge(decl_ix, variant_index, "".into()); + graph.add_edge(variant_index, enum_instantiation_exit_idx, "".into()); + + vec![enum_instantiation_exit_idx] +} + +fn construct_dead_code_warning_from_node<'sc>(node: &TypedAstNode<'sc>) -> CompileWarning<'sc> { + match node { + // if this is a function, struct, or trait declaration that is never called, then it is dead + // code. + TypedAstNode { + content: + TypedAstNodeContent::Declaration(TypedDeclaration::FunctionDeclaration( + TypedFunctionDeclaration { name, .. }, + )), + .. + } => CompileWarning { + span: name.span.clone(), + warning_content: Warning::DeadFunctionDeclaration, + }, + TypedAstNode { + content: TypedAstNodeContent::Declaration(TypedDeclaration::StructDeclaration { .. }), + span, + } => CompileWarning { + span: span.clone(), + warning_content: Warning::DeadDeclaration, + }, + TypedAstNode { + content: + TypedAstNodeContent::Declaration(TypedDeclaration::TraitDeclaration( + TypedTraitDeclaration { name, .. }, + )), + .. + } => CompileWarning { + span: name.span.clone(), + warning_content: Warning::DeadTrait, + }, + TypedAstNode { + content: TypedAstNodeContent::Declaration(TypedDeclaration::EnumDeclaration(..)), + span, + } => CompileWarning { + span: span.clone(), + warning_content: Warning::DeadDeclaration, + }, + // otherwise, this is unreachable. + TypedAstNode { span, .. } => CompileWarning { + span: span.clone(), + warning_content: Warning::UnreachableCode, + }, + } +} diff --git a/parser/src/control_flow_analysis/flow_graph/mod.rs b/parser/src/control_flow_analysis/flow_graph/mod.rs index 3ef90a4a68f..21b43fe15ac 100644 --- a/parser/src/control_flow_analysis/flow_graph/mod.rs +++ b/parser/src/control_flow_analysis/flow_graph/mod.rs @@ -1,38 +1,32 @@ //! This is the flow graph, a graph which contains edges that represent possible steps of program //! execution. -use crate::{ - parse_tree::Visibility, - semantics::ast_node::{TypedEnumVariant, TypedExpressionVariant, TypedTraitDeclaration}, - Ident, TreeType, -}; -use crate::{ - semantics::{ - ast_node::{ - TypedCodeBlock, TypedDeclaration, TypedEnumDeclaration, TypedExpression, - TypedFunctionDeclaration, TypedReassignment, TypedVariableDeclaration, TypedWhileLoop, - }, - TypedAstNode, TypedAstNodeContent, TypedParseTree, - }, - CompileWarning, Warning, -}; +use crate::semantics::TypedAstNode; +use crate::{semantics::ast_node::TypedEnumVariant, Ident}; use pest::Span; -use petgraph::algo::has_path_connecting; + use petgraph::{graph::EdgeIndex, prelude::NodeIndex}; mod namespace; use namespace::ControlFlowNamespace; +pub(crate) use namespace::FunctionNamespaceEntry; pub type EntryPoint = NodeIndex; pub type ExitPoint = NodeIndex; +#[derive(Clone)] +/// A graph that can be used to model the control flow of a fuel HLL program. +/// This graph is used as the basis for all of the algorithms in the control flow analysis portion +/// of the compiler. pub struct ControlFlowGraph<'sc> { - graph: Graph<'sc>, - entry_points: Vec, + pub(crate) graph: Graph<'sc>, + pub(crate) entry_points: Vec, + pub(crate) namespace: ControlFlowNamespace<'sc>, } -type Graph<'sc> = petgraph::Graph, ControlFlowGraphEdge>; +pub type Graph<'sc> = petgraph::Graph, ControlFlowGraphEdge>; +#[derive(Clone)] pub struct ControlFlowGraphEdge(String); impl std::fmt::Debug for ControlFlowGraphEdge { @@ -47,6 +41,7 @@ impl std::convert::From<&str> for ControlFlowGraphEdge { } } +#[derive(Clone)] pub enum ControlFlowGraphNode<'sc> { OrganizationalDominator(String), ProgramNode(TypedAstNode<'sc>), @@ -60,6 +55,15 @@ pub enum ControlFlowGraphNode<'sc> { }, } +impl<'sc> ControlFlowGraphNode<'sc> { + pub(crate) fn unwrap_to_node(&self) -> TypedAstNode<'sc> { + match self { + ControlFlowGraphNode::ProgramNode(node) => node.clone(), + _ => panic!("Called unwrap_to_node() on a non-program-node value."), + } + } +} + impl<'sc> std::fmt::Debug for ControlFlowGraphNode<'sc> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { let text = match self { @@ -104,10 +108,15 @@ impl std::convert::From<&str> for ControlFlowGraphNode<'_> { } impl<'sc> ControlFlowGraph<'sc> { - fn add_node(&mut self, node: ControlFlowGraphNode<'sc>) -> NodeIndex { + pub(crate) fn add_edge_from_entry(&mut self, to: NodeIndex, label: ControlFlowGraphEdge) { + for entry in &self.entry_points { + self.graph.add_edge(*entry, to, label.clone()); + } + } + pub(crate) fn add_node(&mut self, node: ControlFlowGraphNode<'sc>) -> NodeIndex { self.graph.add_node(node) } - fn add_edge( + pub(crate) fn add_edge( &mut self, from: NodeIndex, to: NodeIndex, @@ -115,563 +124,11 @@ impl<'sc> ControlFlowGraph<'sc> { ) -> EdgeIndex { self.graph.add_edge(from, to, edge) } - pub(crate) fn from_tree(ast: &TypedParseTree<'sc>, tree_type: TreeType) -> Self { - let mut graph = ControlFlowGraph { - graph: Graph::new(), - entry_points: vec![], - }; - let mut namespace = Default::default(); - // do a depth first traversal and cover individual inner ast nodes - let mut leaves = vec![]; - let exit_node = Some(graph.add_node(("Program exit".to_string()).into())); - for ast_entrypoint in ast.root_nodes.iter() { - let (l_leaves, _new_exit_node) = connect_node( - ast_entrypoint, - &mut graph, - &leaves, - &mut namespace, - exit_node, - ); - - leaves = l_leaves; - } - - // calculate the entry points based on the tree type - graph.entry_points = match tree_type { - TreeType::Predicate | TreeType::Script => { - // a predicate or script have a main function as the only entry point - vec![ - graph - .graph - .node_indices() - .find(|i| match graph.graph[*i] { - ControlFlowGraphNode::OrganizationalDominator(_) => false, - ControlFlowGraphNode::ProgramNode(TypedAstNode { - content: - TypedAstNodeContent::Declaration( - TypedDeclaration::FunctionDeclaration( - TypedFunctionDeclaration { ref name, .. }, - ), - ), - .. - }) => name.primary_name == "main", - _ => false, - }) - .unwrap(), - ] - } - TreeType::Contract | TreeType::Library => graph - .graph - .node_indices() - .filter(|i| match graph.graph[*i] { - ControlFlowGraphNode::OrganizationalDominator(_) => false, - ControlFlowGraphNode::ProgramNode(TypedAstNode { - content: - TypedAstNodeContent::Declaration(TypedDeclaration::FunctionDeclaration( - TypedFunctionDeclaration { - visibility: Visibility::Public, - .. - }, - )), - .. - }) => true, - ControlFlowGraphNode::ProgramNode(TypedAstNode { - content: - TypedAstNodeContent::Declaration(TypedDeclaration::TraitDeclaration( - TypedTraitDeclaration { - visibility: Visibility::Public, - .. - }, - )), - .. - }) => true, - ControlFlowGraphNode::ProgramNode(TypedAstNode { - content: - TypedAstNodeContent::Declaration(TypedDeclaration::ImplTrait { .. }), - .. - }) => true, - _ => false, - }) - .collect(), - }; - graph.visualize(); - - graph - } - - pub(crate) fn find_dead_code(&self) -> Vec> { - // dead code is code that has no path to the entry point - let mut dead_nodes = vec![]; - for destination in self.graph.node_indices() { - let mut is_connected = false; - for entry in &self.entry_points { - if has_path_connecting(&self.graph, *entry, destination, None) { - is_connected = true; - break; - } - } - if !is_connected { - dead_nodes.push(destination); - } - } - let dead_enum_variant_warnings = dead_nodes - .iter() - .filter_map(|x| match &self.graph[*x] { - ControlFlowGraphNode::EnumVariant { span, variant_name } => Some(CompileWarning { - span: span.clone(), - warning_content: Warning::DeadEnumVariant { - variant_name: variant_name.to_string(), - }, - }), - _ => None, - }) - .collect::>(); - - let dead_ast_node_warnings = dead_nodes - .into_iter() - .filter_map(|x| match &self.graph[x] { - ControlFlowGraphNode::ProgramNode(node) => { - Some(construct_dead_code_warning_from_node(node)) - } - ControlFlowGraphNode::EnumVariant { span, variant_name } => Some(CompileWarning { - span: span.clone(), - warning_content: Warning::DeadEnumVariant { - variant_name: variant_name.to_string(), - }, - }), - ControlFlowGraphNode::MethodDeclaration { span, .. } => Some(CompileWarning { - span: span.clone(), - warning_content: Warning::DeadMethod, - }), - ControlFlowGraphNode::OrganizationalDominator(..) => None, - }) - .collect::>(); - - let all_warnings = [dead_enum_variant_warnings, dead_ast_node_warnings].concat(); - // filter out any overlapping spans -- if a span is contained within another one, - // remove it. - all_warnings - .clone() - .into_iter() - .filter(|CompileWarning { span, .. }| { - // if any other warnings contain a span which completely covers this one, filter - // out this one. - all_warnings - .iter() - .find( - |CompileWarning { - span: other_span, .. - }| { - other_span.end() > span.end() && other_span.start() < span.start() - }, - ) - .is_none() - }) - .collect() - } #[allow(dead_code)] /// Prints out graphviz for this graph - fn visualize(&self) { + pub(crate) fn visualize(&self) { use petgraph::dot::Dot; println!("{:?}", Dot::with_config(&self.graph, &[])); } } - -fn connect_node<'sc>( - node: &TypedAstNode<'sc>, - graph: &mut ControlFlowGraph<'sc>, - leaves: &[NodeIndex], - namespace: &mut ControlFlowNamespace<'sc>, - exit_node: Option, -) -> (Vec, Option) { - // let mut graph = graph.clone(); - let span = node.span.clone(); - match &node.content { - TypedAstNodeContent::ReturnStatement(_) - | TypedAstNodeContent::ImplicitReturnExpression(_) => { - let this_index = graph.add_node(node.into()); - for leaf_ix in leaves { - graph.add_edge(*leaf_ix, this_index, "".into()); - } - // connect return to the exit node - if let Some(exit_node) = exit_node { - graph.add_edge(this_index, exit_node, "return".into()); - (vec![], None) - } else { - (vec![], None) - } - } - TypedAstNodeContent::WhileLoop(TypedWhileLoop { body, .. }) => { - // a while loop can loop back to the beginning, - // or it can terminate. - // so we connect the _end_ of the while loop _both_ to its beginning and the next node. - // the loop could also be entirely skipped - - let entry = graph.add_node(node.into()); - let while_loop_exit = graph.add_node("while loop exit".to_string().into()); - for leaf in leaves { - graph.add_edge(*leaf, entry, "".into()); - } - // it is possible for a whole while loop to be skipped so add edge from - // beginning of while loop straight to exit - graph.add_edge( - entry, - while_loop_exit, - "condition is initially false".into(), - ); - let mut leaves = vec![entry]; - let (l_leaves, _l_exit_node) = - depth_first_insertion_code_block(body, graph, &leaves, namespace, exit_node); - // insert edges from end of block back to beginning of it - for leaf in &l_leaves { - graph.add_edge(*leaf, entry, "loop repeats".into()); - } - - leaves = l_leaves; - for leaf in leaves { - graph.add_edge(leaf, while_loop_exit, "".into()); - } - (vec![while_loop_exit], exit_node) - } - TypedAstNodeContent::Expression(TypedExpression { - expression: expr_variant, - .. - }) => { - let entry = graph.add_node(node.into()); - // insert organizational dominator node - // connected to all current leaves - for leaf in leaves { - graph.add_edge(*leaf, entry, "".into()); - } - - ( - connect_expression(expr_variant, graph, &[entry], namespace, exit_node), - exit_node, - ) - } - TypedAstNodeContent::SideEffect => (leaves.to_vec(), exit_node), - TypedAstNodeContent::Declaration(decl) => { - // all leaves connect to this node, then this node is the singular leaf - let decl_node = graph.add_node(node.into()); - for leaf in leaves { - graph.add_edge(*leaf, decl_node, "".into()); - } - ( - connect_declaration(&decl, graph, decl_node, namespace, span, exit_node), - exit_node, - ) - } - } -} - -fn connect_declaration<'sc>( - decl: &TypedDeclaration<'sc>, - graph: &mut ControlFlowGraph<'sc>, - entry_node: NodeIndex, - namespace: &mut ControlFlowNamespace<'sc>, - span: Span<'sc>, - exit_node: Option, -) -> Vec { - use TypedDeclaration::*; - match decl { - VariableDeclaration(TypedVariableDeclaration { body, .. }) => { - connect_expression(&body.expression, graph, &[entry_node], namespace, exit_node) - } - FunctionDeclaration(fn_decl) => { - connect_typed_fn_decl(fn_decl, graph, entry_node, namespace, span, exit_node); - vec![] - } - TraitDeclaration(trait_decl) => { - connect_trait_declaration(&trait_decl, entry_node, namespace); - vec![] - } - StructDeclaration(_) => todo!("track each struct field's usage"), - EnumDeclaration(enum_decl) => { - connect_enum_declaration(&enum_decl, graph, entry_node, namespace); - vec![] - } - Reassignment(TypedReassignment { rhs, .. }) => { - connect_expression(&rhs.expression, graph, &[entry_node], namespace, exit_node) - } - ImplTrait { - trait_name, - methods, - .. - } => { - connect_impl_trait(trait_name, graph, methods, namespace, entry_node); - vec![] - } - SideEffect | ErrorRecovery => { - unreachable!("These are error cases and should be removed in the type checking stage. ") - } - } -} - -/// Implementations of traits are top-level things that are not conditional, so -/// we insert an edge from the function's starting point to the declaration to show -/// that the declaration was indeed at some point implemented. -/// Additionally, we insert the trait's methods into the method namespace in order to -/// track which exact methods are dead code. -fn connect_impl_trait<'sc>( - trait_name: &Ident<'sc>, - graph: &mut ControlFlowGraph<'sc>, - methods: &[TypedFunctionDeclaration<'sc>], - namespace: &mut ControlFlowNamespace<'sc>, - entry_node: NodeIndex, -) { - let trait_decl_node = namespace.find_trait(trait_name); - match trait_decl_node { - None => { - let edge_ix = graph.add_node("External trait".into()); - graph.add_edge(entry_node, edge_ix, "".into()); - } - Some(trait_decl_node) => { - // This is sort of a shortcut -- a path from the program exit to the trait will be - // included in the main execution path, since we are guaranteed to hit the program - // exit node. - // Eventually we can introduce a program entry dominator or something like that. - // This is not a risky shortcut and works fine for the time being. - graph.add_edge(0.into(), entry_node, "".into()); - graph.add_edge(entry_node, *trait_decl_node, "".into()); - } - } - let mut methods_and_indexes = vec![]; - // insert method declarations into the graph - for fn_decl in methods { - let fn_decl_entry_node = graph.add_node(ControlFlowGraphNode::MethodDeclaration { - span: fn_decl.span.clone(), - method_name: fn_decl.name.clone(), - }); - graph.add_edge(entry_node, fn_decl_entry_node, "".into()); - // connect the impl declaration node to the functions themselves, as all trait functions are - // public if the trait is in scope - connect_typed_fn_decl( - &fn_decl, - graph, - fn_decl_entry_node, - namespace, - fn_decl.span.clone(), - None, - ); - methods_and_indexes.push((fn_decl.name.clone(), fn_decl_entry_node)); - } - // Now, insert the methods into the trait method namespace. - namespace.insert_trait_methods(trait_name.clone(), methods_and_indexes); -} - -/// The strategy here is to populate the trait namespace with just one singular trait -/// and if it is ever implemented, by virtue of type checking, we know all interface points -/// were met. -/// Upon implementation, we can populate the methods namespace and track dead functions that way. -/// TL;DR: At this point, we _only_ track the wholistic trait declaration and not the functions -/// contained within. -/// -/// The trait node itself has already been added (as `entry_node`), so we just need to insert that -/// node index into the namespace for the trait. -fn connect_trait_declaration<'sc>( - decl: &TypedTraitDeclaration<'sc>, - entry_node: NodeIndex, - namespace: &mut ControlFlowNamespace<'sc>, -) { - namespace.add_trait(decl.name.clone(), entry_node); -} - -/// For an enum declaration, we want to make a declaration node for every individual enum -/// variant. When a variant is constructed, we can point an edge at that variant. This way, -/// we can see clearly, and thusly warn, when individual variants are not ever constructed. -fn connect_enum_declaration<'sc>( - enum_decl: &TypedEnumDeclaration<'sc>, - graph: &mut ControlFlowGraph<'sc>, - entry_node: NodeIndex, - namespace: &mut ControlFlowNamespace<'sc>, -) { - // keep a mapping of each variant - for variant in &enum_decl.variants { - let variant_index = graph.add_node(variant.into()); - - // graph.add_edge(entry_node, variant_index, "".into()); - namespace.insert_enum( - enum_decl.name.clone(), - entry_node, - variant.name.clone(), - variant_index, - ); - } -} - -/// When connecting a function declaration, we are inserting a new root node into the graph that -/// has no entry points, since it is just a declaration. -/// When something eventually calls it, it gets connected to the declaration. -fn connect_typed_fn_decl<'sc>( - fn_decl: &TypedFunctionDeclaration<'sc>, - graph: &mut ControlFlowGraph<'sc>, - entry_node: NodeIndex, - namespace: &mut ControlFlowNamespace<'sc>, - _span: Span<'sc>, - exit_node: Option, -) { - let fn_exit_node = graph.add_node(format!("\"{}\" fn exit", fn_decl.name.primary_name).into()); - let (_exit_nodes, _exit_node) = depth_first_insertion_code_block( - &fn_decl.body, - graph, - &[entry_node], - namespace, - Some(fn_exit_node), - ); - if let Some(exit_node) = exit_node { - graph.add_edge(fn_exit_node, exit_node, "".into()); - } - - namespace.insert_function(fn_decl.name.clone(), (entry_node, fn_exit_node)); -} - -fn depth_first_insertion_code_block<'sc>( - node_content: &TypedCodeBlock<'sc>, - graph: &mut ControlFlowGraph<'sc>, - leaves: &[NodeIndex], - namespace: &mut ControlFlowNamespace<'sc>, - exit_node: Option, -) -> (Vec, Option) { - let mut leaves = leaves.to_vec(); - let mut exit_node = exit_node.clone(); - for node in node_content.contents.iter() { - let (this_node, l_exit_node) = connect_node(node, graph, &leaves, namespace, exit_node); - leaves = this_node; - exit_node = l_exit_node; - } - (leaves, exit_node) -} - -/// connects any inner parts of an expression to the graph -/// note the main expression node has already been inserted -fn connect_expression<'sc>( - expr_variant: &TypedExpressionVariant<'sc>, - graph: &mut ControlFlowGraph<'sc>, - leaves: &[NodeIndex], - namespace: &mut ControlFlowNamespace<'sc>, - exit_node: Option, -) -> Vec { - use TypedExpressionVariant::*; - match expr_variant { - FunctionApplication { name, .. } => { - let mut is_external = false; - // find the function in the namespace - let (fn_entrypoint, fn_exit_point) = namespace - .get_function(&name.suffix) - .cloned() - .unwrap_or_else(|| { - let node_idx = - graph.add_node(format!("extern fn {}()", name.suffix.primary_name).into()); - is_external = true; - (node_idx, node_idx) - }); - for leaf in leaves { - graph.add_edge(*leaf, fn_entrypoint, "".into()); - } - // the exit points get connected to an exit node for the application - // if this is external, then we don't add the body to the graph so there's no point in - // an exit organizational dominator - if !is_external { - if let Some(exit_node) = exit_node { - graph.add_edge(fn_exit_point, exit_node, "".into()); - vec![exit_node] - } else { - vec![fn_exit_point] - } - } else { - vec![fn_entrypoint] - } - } - Literal(_lit) => leaves.to_vec(), - VariableExpression { .. } => leaves.to_vec(), - EnumInstantiation { - enum_name, - variant_name, - .. - } => { - // connect this particular instantiation to its variants declaration - connect_enum_instantiation(enum_name, variant_name, graph, namespace, leaves) - } - a => todo!("{:?}", a), - } -} - -fn connect_enum_instantiation<'sc>( - enum_name: &Ident<'sc>, - variant_name: &Ident<'sc>, - graph: &mut ControlFlowGraph, - namespace: &ControlFlowNamespace, - leaves: &[NodeIndex], -) -> Vec { - let (decl_ix, variant_index) = namespace - .find_enum_variant_index(enum_name, variant_name) - .unwrap_or_else(|| { - let node_idx = graph.add_node( - format!( - "extern enum {}::{}", - enum_name.primary_name, variant_name.primary_name - ) - .into(), - ); - (node_idx, node_idx) - }); - - // insert organizational nodes for instantiation of enum - let enum_instantiation_entry_idx = graph.add_node("enum instantiation entry".into()); - let enum_instantiation_exit_idx = graph.add_node("enum instantiation exit".into()); - - // connect to declaration node itself to show that the declaration is used - graph.add_edge(enum_instantiation_entry_idx, decl_ix, "".into()); - for leaf in leaves { - graph.add_edge(*leaf, enum_instantiation_entry_idx, "".into()); - } - - graph.add_edge(decl_ix, variant_index, "".into()); - graph.add_edge(variant_index, enum_instantiation_exit_idx, "".into()); - - vec![enum_instantiation_exit_idx] -} - -fn construct_dead_code_warning_from_node<'sc>(node: &TypedAstNode<'sc>) -> CompileWarning<'sc> { - match node { - // if this is a function, struct, or trait declaration that is never called, then it is dead - // code. - TypedAstNode { - content: TypedAstNodeContent::Declaration(TypedDeclaration::FunctionDeclaration { .. }), - span, - } => CompileWarning { - span: span.clone(), - warning_content: Warning::DeadDeclaration, - }, - TypedAstNode { - content: TypedAstNodeContent::Declaration(TypedDeclaration::StructDeclaration { .. }), - span, - } => CompileWarning { - span: span.clone(), - warning_content: Warning::DeadDeclaration, - }, - TypedAstNode { - content: - TypedAstNodeContent::Declaration(TypedDeclaration::TraitDeclaration( - TypedTraitDeclaration { name, .. }, - )), - .. - } => CompileWarning { - span: name.span.clone(), - warning_content: Warning::DeadTrait, - }, - TypedAstNode { - content: TypedAstNodeContent::Declaration(TypedDeclaration::EnumDeclaration(..)), - span, - } => CompileWarning { - span: span.clone(), - warning_content: Warning::DeadDeclaration, - }, - // otherwise, this is unreachable. - TypedAstNode { span, .. } => CompileWarning { - span: span.clone(), - warning_content: Warning::UnreachableCode, - }, - } -} diff --git a/parser/src/control_flow_analysis/flow_graph/namespace.rs b/parser/src/control_flow_analysis/flow_graph/namespace.rs index a100f872c61..08ae7e27866 100644 --- a/parser/src/control_flow_analysis/flow_graph/namespace.rs +++ b/parser/src/control_flow_analysis/flow_graph/namespace.rs @@ -1,9 +1,19 @@ use super::{EntryPoint, ExitPoint}; -use crate::Ident; +use crate::{types::ResolvedType, Ident}; use petgraph::prelude::NodeIndex; use std::collections::HashMap; -#[derive(Default)] +#[derive(Default, Clone)] +/// Represents a single entry in the [ControlFlowNamespace]'s function namespace. Contains various +/// metadata about a function including its node indexes in the graph, its return type, and more. +/// Used to both perform control flow analysis on functions as well as produce good error messages. +pub(crate) struct FunctionNamespaceEntry<'sc> { + pub(crate) entry_point: EntryPoint, + pub(crate) exit_point: ExitPoint, + pub(crate) return_type: ResolvedType<'sc>, +} + +#[derive(Default, Clone)] /// This namespace holds mappings from various declarations to their indexes in the graph. This is /// used for connecting those vertices when the declarations are instantiated. /// @@ -11,19 +21,23 @@ use std::collections::HashMap; /// of scope at this point, as that would have been caught earlier and aborted the compilation /// process. pub struct ControlFlowNamespace<'sc> { - function_namespace: HashMap, (EntryPoint, ExitPoint)>, - enum_namespace: HashMap, (NodeIndex, HashMap, NodeIndex>)>, - trait_namespace: HashMap, NodeIndex>, + pub(crate) function_namespace: HashMap, FunctionNamespaceEntry<'sc>>, + pub(crate) enum_namespace: HashMap, (NodeIndex, HashMap, NodeIndex>)>, + pub(crate) trait_namespace: HashMap, NodeIndex>, /// This is a mapping from trait name to method names and their node indexes - trait_method_namespace: HashMap, HashMap, NodeIndex>>, + pub(crate) trait_method_namespace: HashMap, HashMap, NodeIndex>>, } impl<'sc> ControlFlowNamespace<'sc> { - pub(crate) fn get_function(&self, ident: &Ident<'sc>) -> Option<&(EntryPoint, ExitPoint)> { + pub(crate) fn get_function(&self, ident: &Ident<'sc>) -> Option<&FunctionNamespaceEntry<'sc>> { self.function_namespace.get(ident) } - pub(crate) fn insert_function(&mut self, ident: Ident<'sc>, points: (EntryPoint, ExitPoint)) { - self.function_namespace.insert(ident, points); + pub(crate) fn insert_function( + &mut self, + ident: Ident<'sc>, + entry: FunctionNamespaceEntry<'sc>, + ) { + self.function_namespace.insert(ident, entry); } pub(crate) fn insert_enum( &mut self, diff --git a/parser/src/control_flow_analysis/mod.rs b/parser/src/control_flow_analysis/mod.rs index 7ae50f14023..9617eb6bb7f 100644 --- a/parser/src/control_flow_analysis/mod.rs +++ b/parser/src/control_flow_analysis/mod.rs @@ -1,33 +1,24 @@ //! //! This module contains all of the logic related to control flow analysis. //! -//! # Synopsis of Algorithm +//! # Synopsis of Dead-Code Analysis Algorithm +//! The dead code analysis algorithm constructs a node for every declaration, expression, and +//! statement. Then, from the entry points of the AST, we begin drawing edges along the control +//! flow path. If a declaration is instantiated, we draw an edge to it. If an expression or +//! statement is executed, an edge is drawn to it. Finally, we trace the edges from the entry +//! points of the AST. If there are no paths from any entry point to a node, then it is either a +//! dead declaration or an unreachable expression or statement. //! -//! The graph construction algorithm is as follows: +//! See the Terms section for details on how entry points are determined. //! -//! ```ignore -//! For every node in the syntax tree: -//! if it is non-branching: -//! push it onto all current not-yet-terminated tree leaves, thus adding it to the end of every path -//! else, if it is branching: -//! fork all not-yet-terminated leaves to have two paths coming off of them -//! in one path, put one of the node branches. in the other path, put the other node branch. -//! else if it is a termination point (i.e. aborting of this path): -//! mark the leaf node as terminated, preventing more nodes from being added. +//! # Synopsis of Return-Path Analysis Algorithm +//! The graph constructed for this algorithm does not go into the details of the contents of any +//! declaration except for function declarations. Inside of every function, it traces the execution +//! path along to ensure that all reachable paths do indeed return a value. We don't need to type +//! check the value that is returned, since type checking of return statements happens in the type +//! checking stage. Here, we know all present return statements have the right type, and we just +//! need to verify that all paths do indeed contain a return statement. //! -//! ``` -//! -//! After the graph which models control flow is constructed, certain relationships are examined: -//! 1. exhaustive returns from functions -//! - TODO - ensure all terminating nodes from a function have the right type, and that no path -//! makes it to the end of the block without terminating -//! 1. dead code -//! - TODO -- boolean "reached" flag for every ast node -//! -//! -//! Using this dominator tree, it analyzes these qualities of the program: -//! 1. Node reachability -//! 1. Type correctness on all paths //! //! # # Terms //! # # # Node @@ -37,6 +28,15 @@ //! concerned about in control flow analysis. More formally, //! A node _M_ dominates a node _N_ if every path from the entry that reaches node _N_ has to pass through node _M_. //! # # # Reachability - +//! A node _N_ is reachable if there is a path to it from any one of the tree's entry points. +//! # # # Entry Points +//! The entry points to an AST depend on what type of AST it is. If it is a predicate or script, +//! then the main function is the sole entry point. If it is a library or contract, then public +//! functions or declarations are entry points. +//! +mod analyze_return_paths; +mod dead_code_analysis; mod flow_graph; +pub use analyze_return_paths::*; +pub use dead_code_analysis::*; pub use flow_graph::*; diff --git a/parser/src/error.rs b/parser/src/error.rs index fd40c40a608..63000d0154c 100644 --- a/parser/src/error.rs +++ b/parser/src/error.rs @@ -173,6 +173,7 @@ pub enum Warning<'sc> { }, OverridingTraitImplementation, DeadDeclaration, + DeadFunctionDeclaration, DeadTrait, UnreachableCode, DeadEnumVariant { @@ -197,6 +198,7 @@ impl<'sc> Warning<'sc> { OverridesOtherSymbol { name } => format!("This import would override another symbol with the same name \"{}\" in this namespace.", name), OverridingTraitImplementation => format!("This trait implementation overrides another one that was previously defined."), DeadDeclaration => "This declaration is never used.".into(), + DeadFunctionDeclaration => "This function is never called.".into(), UnreachableCode => "This code is unreachable.".into(), DeadEnumVariant { variant_name } => format!("Enum variant {} is never constructed.", variant_name), DeadTrait => "This trait is never implemented.".into(), @@ -384,6 +386,12 @@ pub enum CompileError<'sc> { }, #[error("This enum variant requires an instantiation expression. Try initializing it with arguments in parentheses.")] MissingEnumInstantiator { span: Span<'sc> }, + #[error("This path must return a value of type \"{ty}\" from function \"{function_name}\", but it does not.")] + PathDoesNotReturn { + span: Span<'sc>, + ty: String, + function_name: &'sc str, + }, } impl<'sc> std::convert::From> for CompileError<'sc> { @@ -509,6 +517,7 @@ impl<'sc> CompileError<'sc> { UnqualifiedSelfType { span, .. } => (span.start(), span.end()), NotAType { span, .. } => (span.start(), span.end()), MissingEnumInstantiator { span, .. } => (span.start(), span.end()), + PathDoesNotReturn { span, .. } => (span.start(), span.end()), } } } diff --git a/parser/src/lib.rs b/parser/src/lib.rs index 47f39b5746b..0ce79e40a76 100644 --- a/parser/src/lib.rs +++ b/parser/src/lib.rs @@ -320,11 +320,13 @@ fn perform_control_flow_analysis<'sc>( ) -> (Vec>, Vec>) { match tree { Some(tree) => { - let graph = ControlFlowGraph::from_tree(tree, tree_type); + let graph = ControlFlowGraph::construct_dead_code_graph(tree, tree_type); let mut warnings = vec![]; - let mut dead_code_warnings = graph.find_dead_code(); - warnings.append(&mut dead_code_warnings); - (warnings, vec![]) + let mut errors = vec![]; + warnings.append(&mut graph.find_dead_code()); + let graph = ControlFlowGraph::construct_return_path_graph(tree); + errors.append(&mut graph.analyze_return_paths()); + (warnings, errors) } None => (vec![], vec![]), } @@ -333,11 +335,14 @@ fn perform_control_flow_analysis_on_library_exports<'sc>( lib: &LibraryExports<'sc>, ) -> (Vec>, Vec>) { let mut warnings = vec![]; + let mut errors = vec![]; for tree in &lib.trees { - let graph = ControlFlowGraph::from_tree(tree, TreeType::Library); + let graph = ControlFlowGraph::construct_dead_code_graph(tree, TreeType::Library); warnings.append(&mut graph.find_dead_code()); + let graph = ControlFlowGraph::construct_return_path_graph(tree); + errors.append(&mut graph.analyze_return_paths()); } - (warnings, vec![]) + (warnings, errors) } // strategy: parse top level things diff --git a/parser/src/semantics/ast_node/code_block.rs b/parser/src/semantics/ast_node/code_block.rs index ddd0800fffc..4832afb6fe1 100644 --- a/parser/src/semantics/ast_node/code_block.rs +++ b/parser/src/semantics/ast_node/code_block.rs @@ -8,13 +8,13 @@ pub(crate) struct TypedCodeBlock<'sc> { } impl<'sc> TypedCodeBlock<'sc> { - pub(crate) fn type_check<'manifest>( + pub(crate) fn type_check( other: CodeBlock<'sc>, namespace: &Namespace<'sc>, // this is for the return or implicit return type_annotation: Option>, help_text: impl Into + Clone, - ) -> CompileResult<'sc, (Self, ResolvedType<'sc>)> { + ) -> CompileResult<'sc, (Self, Option>)> { let mut warnings = Vec::new(); let mut errors = Vec::new(); let mut evaluated_contents = Vec::new(); @@ -57,37 +57,36 @@ impl<'sc> TypedCodeBlock<'sc> { } // find the implicit return, if any, and use it as the code block's return type. // The fact that there is at most one implicit return is an invariant held by the parser. - let return_type = evaluated_contents - .iter() - .find_map(|x| match x { - TypedAstNode { - content: - TypedAstNodeContent::ImplicitReturnExpression(TypedExpression { - ref return_type, - .. - }), - .. - } => Some(return_type.clone()), - _ => None, - }) - .unwrap_or(ResolvedType::Unit); - if let Some(type_annotation) = type_annotation { - let convertability = return_type.is_convertable( - &type_annotation, - implicit_return_span.unwrap_or(other.whole_block_span.clone()), - help_text, - ); - match convertability { - Ok(warning) => { - if let Some(warning) = warning { - warnings.push(CompileWarning { - warning_content: warning, - span: other.whole_block_span, - }); + let return_type = evaluated_contents.iter().find_map(|x| match x { + TypedAstNode { + content: + TypedAstNodeContent::ImplicitReturnExpression(TypedExpression { + ref return_type, + .. + }), + .. + } => Some(return_type.clone()), + _ => None, + }); + if let Some(ref return_type) = return_type { + if let Some(type_annotation) = type_annotation { + let convertability = return_type.is_convertable( + &type_annotation, + implicit_return_span.unwrap_or(other.whole_block_span.clone()), + help_text, + ); + match convertability { + Ok(warning) => { + if let Some(warning) = warning { + warnings.push(CompileWarning { + warning_content: warning, + span: other.whole_block_span, + }); + } + } + Err(err) => { + errors.push(err.into()); } - } - Err(err) => { - errors.push(err.into()); } } } diff --git a/parser/src/semantics/ast_node/declaration.rs b/parser/src/semantics/ast_node/declaration.rs index 920627fa204..8a702518758 100644 --- a/parser/src/semantics/ast_node/declaration.rs +++ b/parser/src/semantics/ast_node/declaration.rs @@ -270,18 +270,20 @@ impl<'sc> TypedFunctionDeclaration<'sc> { } else { return_type }; + // If there are no implicit block returns, then we do not want to type check them, so we + // stifle the errors. If there _are_ implicit block returns, we want to type_check them. let (body, _implicit_block_return) = type_check!( - TypedCodeBlock::type_check( - body, - &namespace, - Some(return_type.clone()), - "Function body's return type does not match up with its return type annotation." - ), - (TypedCodeBlock { contents: vec![] }, ResolvedType::ErrorRecovery), - warnings, - errors - ); + TypedCodeBlock::type_check( + body, + &namespace, + Some(return_type.clone()), + "Function body's return type does not match up with its return type annotation." + ), + (TypedCodeBlock { contents: vec![] }, Some(ResolvedType::ErrorRecovery)), + warnings, + errors + ); // check the generic types in the arguments, make sure they are in the type // scope diff --git a/parser/src/semantics/ast_node/expression/typed_expression.rs b/parser/src/semantics/ast_node/expression/typed_expression.rs index e9fefb54b8e..877d503ae1b 100644 --- a/parser/src/semantics/ast_node/expression/typed_expression.rs +++ b/parser/src/semantics/ast_node/expression/typed_expression.rs @@ -213,10 +213,14 @@ impl<'sc> TypedExpression<'sc> { type_annotation.clone(), help_text.clone() ), - (TypedCodeBlock { contents: vec![] }, ResolvedType::Unit), + (TypedCodeBlock { contents: vec![] }, Some(ResolvedType::Unit)), warnings, errors ); + let block_return_type = match block_return_type { + Some(ty) => ty, + None => todo!("Expected code block to have implicit return error") + }; TypedExpression { expression: TypedExpressionVariant::CodeBlock(TypedCodeBlock { contents: typed_block.contents, diff --git a/parser/src/semantics/ast_node/mod.rs b/parser/src/semantics/ast_node/mod.rs index f5940cdc83f..573e57f4baa 100644 --- a/parser/src/semantics/ast_node/mod.rs +++ b/parser/src/semantics/ast_node/mod.rs @@ -473,7 +473,7 @@ impl<'sc> TypedAstNode<'sc> { Some(ResolvedType::Unit), "A while loop's loop body cannot implicitly return a value.\ Try assigning it to a mutable variable declared outside of the loop instead."), - (TypedCodeBlock { contents: vec![] }, ResolvedType::Unit), + (TypedCodeBlock { contents: vec![] }, Some(ResolvedType::Unit)), warnings, errors ); diff --git a/parser/src/types/resolved_type.rs b/parser/src/types/resolved_type.rs index 2befaace125..27c17ad0d6b 100644 --- a/parser/src/types/resolved_type.rs +++ b/parser/src/types/resolved_type.rs @@ -33,6 +33,12 @@ pub enum ResolvedType<'sc> { ErrorRecovery, } +impl Default for ResolvedType<'_> { + fn default() -> Self { + ResolvedType::Unit + } +} + impl<'sc> ResolvedType<'sc> { pub(crate) fn friendly_type_str(&self) -> String { use ResolvedType::*;