In [1]:
!pip install graphviz



In [2]:
import pandas as pd
from graphviz import Digraph

In [3]:
# Node definition
class QDTreeNode:
    def __init__(self, data_range, customers, depth=0):
        self.range = data_range  # (min_val, max_val)
        self.customers = customers
        self.left = None
        self.right = None
        self.depth = depth

    def label(self):
        return f"[{self.range[0]:.1f}, {self.range[1]:.1f}]\nCount: {len(self.customers)}"


In [4]:
# Load customer.tbl
def load_customer_table(filepath):
    columns = [
        "c_custkey", "c_name", "c_address", "c_nationkey", "c_phone",
        "c_acctbal", "c_mktsegment", "c_comment"
    ]
    df = pd.read_csv(filepath, sep='|', header=None, names=columns, usecols=range(8))
    df["c_acctbal"] = pd.to_numeric(df["c_acctbal"], errors='coerce')
    return df

In [5]:
# Build the qd-tree recursively
def build_qd_tree(data, attr, depth=0, max_depth=4, min_size=10):
    if len(data) <= min_size or depth == max_depth:
        return QDTreeNode((data[attr].min(), data[attr].max()), data, depth)
    
    median = data[attr].median()
    left_data = data[data[attr] < median]
    right_data = data[data[attr] >= median]

    node = QDTreeNode((data[attr].min(), data[attr].max()), data, depth)
    node.left = build_qd_tree(left_data, attr, depth+1, max_depth, min_size)
    node.right = build_qd_tree(right_data, attr, depth+1, max_depth, min_size)
    return node

In [6]:
# Visualize the QD-tree using Graphviz
def visualize_tree(root, filename="qd_tree"):
    dot = Digraph()
    
    def add_nodes_edges(node, parent_id=None):
        if node is None:
            return
        node_id = str(id(node))
        dot.node(node_id, node.label())
        if parent_id:
            dot.edge(parent_id, node_id)
        add_nodes_edges(node.left, node_id)
        add_nodes_edges(node.right, node_id)

    add_nodes_edges(root)
    dot.render(filename, format="png", cleanup=True)
    print(f"Tree visual saved to {filename}.png")

In [7]:
# === Main Execution ===
if __name__ == "__main__":
    filepath = "C:\\Users\\admin\\Desktop\\customer.tbl"  # Update path if needed
    df = load_customer_table(filepath)

    root = build_qd_tree(df, "c_acctbal", max_depth=4, min_size=50)
    visualize_tree(root, filename="qd_tree_acctbal")

Tree visual saved to qd_tree_acctbal.png
