In [1]:
import pandas as pd
from graphviz import Digraph
from enum import Enum
from dataclasses import dataclass

In [2]:
table = 'datapipeline.hist_cp_fill'

In [3]:
class Shape(Enum):
    source = {'shape':'record', 'fillcolor': '#90EE90', 'style': 'filled'}
    derived  = {'shape':'oval', 'fillcolor': '#E6F5FF', 'style': 'filled'}
    UNK  = {'shape':'hexagon', 'fillcolor': '#FFB6C1', 'style': 'filled'}


In [4]:
def dictionary_factory():
    return {}

class DependencyGraph:
    def __init__(self,table):
        self.table = table
        self.nodes = dictionary_factory()
        self.dot = Digraph(format='svg', graph_attr={'rankdir':'LR'})

    def draw_dot(self) -> None:
        node_df = self._load_graph_data()
        for _, row in node_df.iterrows():
            shape = Shape[row['type']].value
            self.create_node(row, shape)
            self.dot.edge(row['source'], row['target'])
        self.dot.render(filename=f"graphs/{self.table}")
        return

    def _load_graph_data(self) -> pd.DataFrame:
        try:
            df = pd.read_csv(f'{self.table}.csv')
        except FileNotFoundError:
            raise FileNotFoundError(f"{self.table}.csv not found.")
        return df
    
    def create_node(self, row: pd.Series, shape: dict) -> None:
        self._create_node(row['source'], row['URL'], shape)
        self._create_node(row['target'], row['URL'], shape)

    def _create_node(self, node_name: str, url: str, shape: dict) -> None:
        if node_name not in self.nodes:
            node_attrs = {'name': node_name, 'label': node_name, 'URL': url}
            node_attrs.update(shape)
            self.dot.node(**node_attrs)
            self.nodes[node_name] = True
            self.nodes[node_name] = True


In [5]:
graph = DependencyGraph(table)
graph.draw_dot()