In [36]:
import json
import pandas as pd

In [37]:
with open('figmaTree.json', 'r', encoding='utf-8') as file:
    data = json.load(file)

In [38]:

def extract_features(node, depth=0, parent_tag=None, sibling_count=0):
    """
    Recursively extracts features from a Figma node, including hierarchy, 
    layout, size, style, and text features.

    Hierarchy Features:
      - depth: How deep the node is in the hierarchy.
      - num_children: Number of direct child nodes.
      - parent_tag: The tag of the parent node.
      - sibling_count: Number of siblings.
      - is_leaf: Whether this node is a leaf (has no children).

    Style Features:
      - font_size: Text font size (if available).
      - font_weight: Derived from fontName.style (if available).
      - color: Extracted from the first SOLID fill (as an RGB string).
      - background_color: Same as color if applicable.
      - border_radius: Average of the four corner radii (if available).
      - visibility: Defaults to "visible".

    Text Features:
      - text_length: Length of inner text (0 if no text).
      - word_count: Number of words in the text.
      - contains_number: Whether the text contains any digits.
      - contains_special_chars: Whether the text contains special symbols.
      - text_type: "short" for brief texts (e.g., labels, buttons) and "long" for extended texts (e.g., paragraphs).
    """
    features = []
    
    # Basic node properties from the "node" object.
    tag = node.get("tag", "")
    node_data = node.get("node", {})
    
    # Extract basic text information
    text = node_data.get("characters", "") or ""
    text_length = len(text)
    word_count = len(text.split())
    contains_number = any(ch.isdigit() for ch in text)
    contains_special_chars = any(not ch.isalnum() and not ch.isspace() for ch in text)
    # Categorize text as "long" if word_count is greater than 10, otherwise "short".
    text_type = "long" if word_count > 10 else "short"
    
    # Compute hierarchy features
    children = node.get("children", [])
    num_children = len(children)
    is_leaf = (num_children == 0)
    
    # Create the feature dictionary
    feature = {
        "tag": tag,
        "type": node_data.get("type", ""),
        "x": node_data.get("x", None),
        "y": node_data.get("y", None),
        "width": node_data.get("width", None),
        "height": node_data.get("height", None),
        "characters": text,
        # Hierarchy features
        "depth": depth,
        "num_children": num_children,
        "parent_tag": parent_tag,
        "sibling_count": sibling_count,
        "is_leaf": is_leaf,
        # Style features
        "font_size": node_data.get("fontSize", None),
        "font_weight": node_data.get("fontName", {}).get("style", None) if node_data.get("fontName") else None,
        "color": None,
        "background_color": None,
        "border_radius": None,
        "visibility": "visible",  # Default to visible
        # Text features
        "text_length": text_length,
        "word_count": word_count,
        "contains_number": contains_number,
        "contains_special_chars": contains_special_chars,
        "text_type": text_type
    }
    
    # Extract color information from fills (if available)
    fills = node_data.get("fills", [])
    for fill in fills:
        if fill.get("type") == "SOLID" and "color" in fill:
            r = int(fill["color"].get("r", 0) * 255)
            g = int(fill["color"].get("g", 0) * 255)
            b = int(fill["color"].get("b", 0) * 255)
            rgb_str = f"rgb({r}, {g}, {b})"
            feature["color"] = rgb_str
            feature["background_color"] = rgb_str
            break  # Use the first SOLID fill found
    
    # Compute border_radius as the average of the four corner radii if available
    br_top_left = node_data.get("topLeftRadius", None)
    br_top_right = node_data.get("topRightRadius", None)
    br_bottom_left = node_data.get("bottomLeftRadius", None)
    br_bottom_right = node_data.get("bottomRightRadius", None)
    if all(v is not None for v in [br_top_left, br_top_right, br_bottom_left, br_bottom_right]):
        feature["border_radius"] = (br_top_left + br_top_right + br_bottom_left + br_bottom_right) / 4
    
    features.append(feature)
    
    # Recursively process children, updating hierarchy features.
    for child in children:
        features.extend(extract_features(child, depth=depth+1, parent_tag=tag, sibling_count=len(children)-1))
    
    return features

In [39]:

# Extract features using the recursive function starting at the root (depth 0, no parent, no siblings)
features_list = extract_features(data, depth=0, parent_tag=None, sibling_count=0)

# Create a Pandas DataFrame from the extracted features
df = pd.DataFrame(features_list)

# Display the DataFrame before any new calculations
print("Initial DataFrame:")
display(df.head())

# --- Normalization of Coordinates ---
# Compute the minimum x and y values (ignoring None values)
min_x = df['x'].min() if df['x'].notnull().any() else 0
min_y = df['y'].min() if df['y'].notnull().any() else 0

# Create new columns for normalized coordinates
df['x_normalized'] = df['x'] - min_x
df['y_normalized'] = df['y'] - min_y

# --- Relative Positioning ---
# Compute the center coordinates for each node
df['x_center'] = df['x'] + df['width'] / 2
df['y_center'] = df['y'] + df['height'] / 2

# Determine total_width and total_height from the BODY node, if available.
body_node = df[df['tag'] == 'BODY']
if not body_node.empty:
    total_width = body_node.iloc[0]['width']
    total_height = body_node.iloc[0]['height']
    print(f"Using BODY node dimensions: total_width={total_width}, total_height={total_height}")
else:
    # Fallback: compute total width/height as maximum x+width and y+height
    total_width = (df['x'] + df['width']).max()
    total_height = (df['y'] + df['height']).max()
    print(f"Computed dimensions: total_width={total_width}, total_height={total_height}")

# Compute quarter values as relative partitions of the body dimensions
df['x_quarter'] = df['x_center'] / total_width
df['y_quarter'] = df['y_center'] / total_height



# --- Additional Size Features ---
# Aspect ratio: width / height (avoid division by zero)
df['aspect_ratio'] = df.apply(lambda row: row['width'] / row['height'] if row['height'] and row['height'] != 0 else None, axis=1)
# Area: width * height
df['area'] = df['width'] * df['height']
# Normalized width and height relative to the body dimensions
df['normalized_width'] = df['width'] / total_width
df['normalized_height'] = df['height'] / total_height



# Display the DataFrame after adding new columns
print("DataFrame with Normalized and Relative Coordinates:")
display(df.head())

# Save the DataFrame to an Excel file
output_excel_file = "figma_features_with_relative_positions.xlsx"
df.to_excel(output_excel_file, index=False)
print(f"Extracted features with normalized and relative positions have been saved to {output_excel_file}")

Initial DataFrame:


Unnamed: 0,tag,type,x,y,width,height,characters,depth,num_children,parent_tag,...,font_weight,color,background_color,border_radius,visibility,text_length,word_count,contains_number,contains_special_chars,text_type
0,BODY,FRAME,20,21,760.0,797,,0,12,,...,,"rgb(250, 250, 250)","rgb(250, 250, 250)",0.0,visible,0,0,False,False,short
1,H1,TEXT,20,21,760.0,38,Figma Node Extractor Test,1,0,BODY,...,normal,"rgb(0, 0, 0)","rgb(0, 0, 0)",,visible,25,4,False,False,short
2,P,TEXT,20,80,760.0,19,This is a sample text node contained within a ...,1,0,BODY,...,normal,"rgb(0, 0, 0)","rgb(0, 0, 0)",,visible,64,11,False,True,long
3,IMG,RECTANGLE,20,116,300.0,168,,1,0,BODY,...,,,,0.0,visible,0,0,False,False,short
4,PICTURE,RECTANGLE,325,268,300.0,18,,1,0,BODY,...,,,,0.0,visible,0,0,False,False,short


Using BODY node dimensions: total_width=760.0, total_height=797
DataFrame with Normalized and Relative Coordinates:


Unnamed: 0,tag,type,x,y,width,height,characters,depth,num_children,parent_tag,...,x_normalized,y_normalized,x_center,y_center,x_quarter,y_quarter,aspect_ratio,area,normalized_width,normalized_height
0,BODY,FRAME,20,21,760.0,797,,0,12,,...,0,0,400.0,419.5,0.526316,0.526349,0.953576,605720.0,1.0,1.0
1,H1,TEXT,20,21,760.0,38,Figma Node Extractor Test,1,0,BODY,...,0,0,400.0,40.0,0.526316,0.050188,20.0,28880.0,1.0,0.047679
2,P,TEXT,20,80,760.0,19,This is a sample text node contained within a ...,1,0,BODY,...,0,59,400.0,89.5,0.526316,0.112296,40.0,14440.0,1.0,0.023839
3,IMG,RECTANGLE,20,116,300.0,168,,1,0,BODY,...,0,95,170.0,200.0,0.223684,0.250941,1.785714,50400.0,0.394737,0.21079
4,PICTURE,RECTANGLE,325,268,300.0,18,,1,0,BODY,...,305,247,475.0,277.0,0.625,0.347553,16.666667,5400.0,0.394737,0.022585


Extracted features with normalized and relative positions have been saved to figma_features_with_relative_positions.xlsx
