###   Hello UFO Dataflow
![Hello UFO](https://docs.google.com/drawings/d/16SLY1EuIB7TMhaTl--MoDxQYDmz78BZvDB3A2Axjh20/pub?w=2404&h=1165)

In [1]:
import pandas as pd
import dataflowkit.datasets as D
import dataflowkit.recipes as R
from dataflowkit.graphs import BaseGraph

### Implement the logic of recipes

In [2]:
# skipped, let all datasets be InMemory and all recipes be BaseRecipe
# formatted is Local not InMemory in this demostration which will explain later

### Declare the graph

In [3]:
class Graph(BaseGraph):
    def _declare_datasets(self, d):
        d.ufo_observation = D.InMemory()
        d.formatted = D.Local('/path')
        d.speed = D.InMemory()
        d.color = D.InMemory()
        d.shape = D.InMemory()
        d.general_info = D.InMemory()
        d.type = D.InMemory()
        return d
        
    def _declare_recipes(self, r):
        r.formatter = R.BaseRecipe()
        r.speed_parser = R.BaseRecipe()
        r.color_parser = R.BaseRecipe()
        r.shape_parser = R.BaseRecipe()
        r.general_parser = R.BaseRecipe()
        r.type_classifier = R.BaseRecipe()
        return r
        
    def _declare_graph(self, R, D):
        # The graph is not need to be in sequence, it will be handled automatically
        graph = [
            # (Recipe, [In-Dataset], [Out-Dataset])
            (R.formatter, [D.ufo_observation], [D.formatted]),
            (R.speed_parser, [D.formatted], [D.speed]),
            (R.color_parser, [D.formatted], [D.color]),
            (R.shape_parser, [D.formatted], [D.shape]),
            (R.general_parser, [D.formatted], [D.general_info]),
            (R.type_classifier, [D.speed, D.color, D.shape], [D.type])
        ]
        return graph
    
    def update_datasets(self):
        # implementing this method allowing reusing the graph with replacing new location for datasets
        # it our case, since all datasets are in-memory, it is trivila
        d = self._declare_datasets()
        self._update_datasets(d)

graph = Graph()

### Execute the graph

In [4]:
graph.execute(desc=True) # with desc=True, it will describe the sequence of the graph withou executing

-> ufo_observation
-> formatter
-> formatted
-> shape_parser
-> shape
-> general_parser
-> general_info
-> speed_parser
-> speed
-> color_parser
-> color
-> type_classifier
-> type


### Force only related recipes be execute
- For example, the logic of the shape_parser is updated, only the related recipes should be re-executed
- It will execute all the related childs and the parents if necessary
- In this case, since formatted is stored in Local, which checkpoint is True
- It means it can be reload by itself, such that fomatter does not need to be re-executed

In [5]:
graph.execute_related([graph.R.shape_parser], desc=True)

-> formatted
-> color_parser
-> color
-> shape_parser
-> shape
-> speed_parser
-> speed
-> type_classifier
-> type
