Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Return path analysis #30

Merged
merged 5 commits into from
Apr 15, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
3 changes: 3 additions & 0 deletions parser/README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,5 @@
# fuel-vm-hll
High Level Language (Name Subject to Change) for the FuelVM

# Minimum supported Rust version
As of now, this code was developed on and is guaranteed to run on Rust 1.50 stable.
294 changes: 294 additions & 0 deletions parser/src/control_flow_analysis/analyze_return_paths.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,294 @@
//! This is the flow graph, a graph which contains edges that represent possible steps of program
//! execution.

use super::*;
use super::{ControlFlowGraph, EntryPoint, ExitPoint, Graph};
use crate::semantics::{
ast_node::{
TypedCodeBlock, TypedDeclaration, TypedExpression, TypedFunctionDeclaration,
TypedReassignment, TypedVariableDeclaration, TypedWhileLoop,
},
TypedAstNode, TypedAstNodeContent,
};
use crate::types::ResolvedType;
use crate::Ident;
use crate::{error::*, semantics::TypedParseTree};
use pest::Span;
use petgraph::prelude::NodeIndex;

impl<'sc> ControlFlowGraph<'sc> {
pub(crate) fn construct_return_path_graph(ast: &TypedParseTree<'sc>) -> Self {
let mut graph = ControlFlowGraph {
graph: Graph::new(),
entry_points: vec![],
namespace: Default::default(),
};
// do a depth first traversal and cover individual inner ast nodes
let mut leaves = vec![];
for ast_entrypoint in ast.root_nodes.iter() {
let l_leaves = connect_node(ast_entrypoint, &mut graph, &leaves);

match l_leaves {
NodeConnection::NextStep(nodes) => leaves = nodes,
_ => (),
}
}

graph
}
/// This function looks through the control flow graph and ensures that all paths that are
/// required to return a value do, indeed, return a value of the correct type.
/// It does this by checking every function declaration in both the methods namespace
/// and the functions namespace and validating that all paths leading to the function exit node
/// return the same type. Additionally, if a function has a return type, all paths must indeed
/// lead to the function exit node.
pub(crate) fn analyze_return_paths(&self) -> Vec<CompileError<'sc>> {
let mut errors = vec![];
for (
name,
FunctionNamespaceEntry {
entry_point,
exit_point,
return_type,
},
) in &self.namespace.function_namespace
{
// For every node connected to the entry point
errors.append(&mut self.ensure_all_paths_reach_exit(
*entry_point,
*exit_point,
name.primary_name,
return_type,
));
}
errors
}
fn ensure_all_paths_reach_exit(
&self,
entry_point: EntryPoint,
exit_point: ExitPoint,
function_name: &'sc str,
return_ty: &ResolvedType<'sc>,
) -> Vec<CompileError<'sc>> {
let mut rovers = vec![entry_point];
let mut errors = vec![];
let mut max_iterations = 50;
while rovers.len() >= 1 && rovers[0] != exit_point && max_iterations > 0 {
max_iterations -= 1;
/*
println!(
"{:?}",
rovers
.iter()
.map(|ix| self.graph[*ix].clone())
.collect::<Vec<_>>()
);
*/
rovers = rovers
.into_iter()
.filter(|idx| *idx != exit_point)
.collect();
let mut next_rovers = vec![];
for rover in rovers {
let mut neighbors = self
.graph
.neighbors_directed(rover, petgraph::Direction::Outgoing)
.collect::<Vec<_>>();
if neighbors.is_empty() && *return_ty != ResolvedType::Unit {
errors.push(CompileError::PathDoesNotReturn {
// TODO: unwrap_to_node is a shortcut. In reality, the graph type should be
// different. To save some code duplication,
span: self.graph[rover].unwrap_to_node().span.clone(),
function_name,
ty: return_ty.friendly_type_str(),
});
}
next_rovers.append(&mut neighbors);
}
rovers = next_rovers;
}

errors
}
}

/// The resulting edges from connecting a node to the graph.
enum NodeConnection {
/// This represents a node that steps on to the next node.
NextStep(Vec<NodeIndex>),
/// This represents a return or implicit return node, which aborts the stepwise flow.
Return(NodeIndex),
}

fn connect_node<'sc>(
node: &TypedAstNode<'sc>,
graph: &mut ControlFlowGraph<'sc>,
leaves: &[NodeIndex],
) -> NodeConnection {
let span = node.span.clone();
match &node.content {
TypedAstNodeContent::ReturnStatement(_)
| TypedAstNodeContent::ImplicitReturnExpression(_) => {
let this_index = graph.add_node(node.into());
for leaf_ix in leaves {
graph.add_edge(*leaf_ix, this_index, "".into());
}
NodeConnection::Return(this_index)
}
TypedAstNodeContent::WhileLoop(TypedWhileLoop { .. }) => {
// An abridged version of the dead code analysis for a while loop
// since we don't really care about what the loop body contains when detecting
// divergent paths
NodeConnection::NextStep(vec![graph.add_node(node.into())])
}
TypedAstNodeContent::Expression(TypedExpression { .. }) => {
let entry = graph.add_node(node.into());
// insert organizational dominator node
// connected to all current leaves
for leaf in leaves {
graph.add_edge(*leaf, entry, "".into());
}
NodeConnection::NextStep(vec![entry])
}
TypedAstNodeContent::SideEffect => NodeConnection::NextStep(leaves.to_vec()),
TypedAstNodeContent::Declaration(decl) => {
NodeConnection::NextStep(connect_declaration(node, &decl, graph, span, leaves))
}
}
}

fn connect_declaration<'sc>(
node: &TypedAstNode<'sc>,
decl: &TypedDeclaration<'sc>,
graph: &mut ControlFlowGraph<'sc>,
span: Span<'sc>,
leaves: &[NodeIndex],
) -> Vec<NodeIndex> {
use TypedDeclaration::*;
match decl {
TraitDeclaration(_) | StructDeclaration(_) | EnumDeclaration(_) => vec![],
VariableDeclaration(TypedVariableDeclaration { .. }) => {
let entry_node = graph.add_node(node.into());
for leaf in leaves {
graph.add_edge(*leaf, entry_node, "".into());
}
vec![entry_node]
}
FunctionDeclaration(fn_decl) => {
let entry_node = graph.add_node(node.into());
for leaf in leaves {
graph.add_edge(*leaf, entry_node, "".into());
}
connect_typed_fn_decl(fn_decl, graph, entry_node, span);
vec![]
}
Reassignment(TypedReassignment { .. }) => {
let entry_node = graph.add_node(node.into());
for leaf in leaves {
graph.add_edge(*leaf, entry_node, "".into());
}
vec![entry_node]
}
ImplTrait {
trait_name,
methods,
..
} => {
let entry_node = graph.add_node(node.into());
for leaf in leaves {
graph.add_edge(*leaf, entry_node, "".into());
}
connect_impl_trait(trait_name, graph, methods, entry_node);
vec![]
}
SideEffect | ErrorRecovery => {
unreachable!("These are error cases and should be removed in the type checking stage. ")
}
}
}

/// Implementations of traits are top-level things that are not conditional, so
/// we insert an edge from the function's starting point to the declaration to show
/// that the declaration was indeed at some point implemented.
/// Additionally, we insert the trait's methods into the method namespace in order to
/// track which exact methods are dead code.
fn connect_impl_trait<'sc>(
trait_name: &Ident<'sc>,
graph: &mut ControlFlowGraph<'sc>,
methods: &[TypedFunctionDeclaration<'sc>],
entry_node: NodeIndex,
) {
let mut methods_and_indexes = vec![];
// insert method declarations into the graph
for fn_decl in methods {
let fn_decl_entry_node = graph.add_node(ControlFlowGraphNode::MethodDeclaration {
span: fn_decl.span.clone(),
method_name: fn_decl.name.clone(),
});
graph.add_edge(entry_node, fn_decl_entry_node, "".into());
// connect the impl declaration node to the functions themselves, as all trait functions are
// public if the trait is in scope
connect_typed_fn_decl(&fn_decl, graph, fn_decl_entry_node, fn_decl.span.clone());
methods_and_indexes.push((fn_decl.name.clone(), fn_decl_entry_node));
}
// Now, insert the methods into the trait method namespace.
graph
.namespace
.insert_trait_methods(trait_name.clone(), methods_and_indexes);
}

/// The strategy here is to populate the trait namespace with just one singular trait
/// and if it is ever implemented, by virtue of type checking, we know all interface points
/// were met.
/// Upon implementation, we can populate the methods namespace and track dead functions that way.
/// TL;DR: At this point, we _only_ track the wholistic trait declaration and not the functions
/// contained within.
///
/// The trait node itself has already been added (as `entry_node`), so we just need to insert that
/// node index into the namespace for the trait.

/// When connecting a function declaration, we are inserting a new root node into the graph that
/// has no entry points, since it is just a declaration.
/// When something eventually calls it, it gets connected to the declaration.
fn connect_typed_fn_decl<'sc>(
fn_decl: &TypedFunctionDeclaration<'sc>,
graph: &mut ControlFlowGraph<'sc>,
entry_node: NodeIndex,
_span: Span<'sc>,
) {
let fn_exit_node = graph.add_node(format!("\"{}\" fn exit", fn_decl.name.primary_name).into());
let return_nodes = depth_first_insertion_code_block(&fn_decl.body, graph, &[entry_node]);
for node in return_nodes {
graph.add_edge(node, fn_exit_node, "return".into());
}

let namespace_entry = FunctionNamespaceEntry {
entry_point: entry_node,
exit_point: fn_exit_node,
return_type: fn_decl.return_type.clone(),
};
graph
.namespace
.insert_function(fn_decl.name.clone(), namespace_entry);
}

type ReturnStatementNodes = Vec<NodeIndex>;

fn depth_first_insertion_code_block<'sc>(
node_content: &TypedCodeBlock<'sc>,
graph: &mut ControlFlowGraph<'sc>,
leaves: &[NodeIndex],
) -> ReturnStatementNodes {
let mut leaves = leaves.to_vec();
let mut return_nodes = vec![];
for node in node_content.contents.iter() {
let this_node = connect_node(node, graph, &leaves);
match this_node {
NodeConnection::NextStep(nodes) => leaves = nodes,
NodeConnection::Return(node) => {
return_nodes.push(node);
}
}
}
return_nodes
}