In [2]:
import xgboost as xgb

model = xgb.XGBRegressor(n_estimators=1000, max_depth=10)
model.load_model('xgb_hop1_miss2_len8_large.json')

In [5]:
import json

# Get the booster and dump trees as JSON
booster = model.get_booster()
model_dump = booster.get_dump(dump_format='json')

def count_nodes(tree):
    """Count internal nodes (splits) in a tree."""
    if 'children' in tree:
        # Internal node
        return 1 + sum(count_nodes(child) for child in tree['children'])
    else:
        # Leaf node
        return 0

def count_leaves(tree):
    """Count leaves in a tree."""
    if 'children' in tree:
        return sum(count_leaves(child) for child in tree['children'])
    else:
        return 1

total_internal_nodes = 0
total_leaves = 0

for tree_str in model_dump:
    tree = json.loads(tree_str)
    total_internal_nodes += count_nodes(tree)
    total_leaves += count_leaves(tree)

# XGBoost: 2 params per internal node (feature, threshold), 1 per leaf (weight)
total_parameters = total_internal_nodes * 2 + total_leaves

print(f"Total internal nodes (splits): {total_internal_nodes}")
print(f"Total leaves: {total_leaves}")
print(f"Total parameters: {total_parameters}")

Total internal nodes (splits): 687782
Total leaves: 688782
Total parameters: 2064346


In [6]:
import json

# Get the model dump to analyze structure
trees = model.get_booster().get_dump(dump_format='json')

total_params = 0
for tree_str in trees:
    tree = json.loads(tree_str)
    
    def count_nodes(node):
        if 'leaf' in node:
            return 1  # Leaf node has 1 parameter (value)
        else:
            # Internal node has 2 parameters (feature + threshold)
            return 2 + count_nodes(node['children'][0]) + count_nodes(node['children'][1])
    
    total_params += count_nodes(tree)

print(f"Total parameters: {total_params}")

Total parameters: 2064346
