In [1]:
import os
import json
import pandas as pd

In [2]:
def extract_features(node, depth=0, parent_tag=None, sibling_count=0):
    """
    Recursively extracts features from a Figma node, including hierarchy, 
    layout, size, style, and text features.

    Hierarchy Features:
      - depth: How deep the node is in the hierarchy.
      - num_children: Number of direct child nodes.
      - parent_tag: The tag of the parent node.
      - sibling_count: Number of siblings.
      - is_leaf: Whether this node is a leaf (has no children).

    Style Features:
      - font_size: Text font size (if available).
      - font_weight: Derived from fontName.style (if available).
      - color: Extracted from the first SOLID fill (as an RGB string).
      - background_color: Same as color if applicable.
      - border_radius: Average of the four corner radii (if available).
      - visibility: Defaults to "visible".
      - border_type: Extracted from strokes.type.
      - border_color: Extracted from strokes.color.
      - border_opacity: Extracted from strokes.opacity.
      - border_weight: Extracted from strokeWeight.
      - border_pattern: Extracted from dashPattern.
      - shadow_type: Extracts the type of shadow effect.
      - shadow_color: Retrieves the color of the shadow.
      - shadow_offset: Captures the shadowâ€™s x and y offset.
      - shadow_radius: Represents the blur radius of the shadow.

    Text Features:
      - text_length: Length of inner text (0 if no text).
      - word_count: Number of words in the text.
      - contains_number: Whether the text contains any digits.
      - contains_special_chars: Whether the text contains special symbols.
      - text_type: "short" for brief texts (e.g., labels, buttons) and "long" for extended texts (e.g., paragraphs).
    """
    features = []
    
    # Basic node properties from the "node" object.
    tag = node.get("tag", "")
    node_data = node.get("node", {})
    
    # Extract basic text information
    text = node_data.get("characters", "") or ""
    text_length = len(text)
    word_count = len(text.split())
    contains_number = any(ch.isdigit() for ch in text)
    contains_special_chars = any(not ch.isalnum() and not ch.isspace() for ch in text)
    # Categorize text as "long" if word_count is greater than 10, otherwise "short".
    text_type = "long" if word_count > 10 else "short"
    
    # Compute hierarchy features
    children = node.get("children", [])
    num_children = len(children)
    is_leaf = (num_children == 0)
    
    # Create the feature dictionary
    feature = {
        "tag": tag,
        "type": node_data.get("type", ""),
        "x": node_data.get("x", None),
        "y": node_data.get("y", None),
        "width": node_data.get("width", None),
        "height": node_data.get("height", None),
        "characters": text,
        # Hierarchy features
        "depth": depth,
        "num_children": num_children,
        "parent_tag": parent_tag,
        "sibling_count": sibling_count,
        "is_leaf": is_leaf,
        # Style features
        "font_size": node_data.get("fontSize", None),
        "font_weight": node_data.get("fontName", {}).get("style", None) if node_data.get("fontName") else None,
        "color": None,
        "background_color": None,
        "border_radius": None,
        "visibility": "visible",  # Default to visible
        "border_type": None,
        "border_color": None,
        "border_opacity": None,
        "border_weight": node_data.get("strokeWeight", None),
        "border_pattern": node_data.get("dashPattern", []),
        "shadow_type": None,
        "shadow_color": None,
        "shadow_offset": None,
        "shadow_radius": None,
        # Text features
        "text_length": text_length,
        "word_count": word_count,
        "contains_number": contains_number,
        "contains_special_chars": contains_special_chars,
        "text_type": text_type
    }
    
    # Extract color information from fills (if available)
    fills = node_data.get("fills", [])
    for fill in fills:
        if fill.get("type") == "SOLID" and "color" in fill:
            r = int(fill["color"].get("r", 0) * 255)
            g = int(fill["color"].get("g", 0) * 255)
            b = int(fill["color"].get("b", 0) * 255)
            rgb_str = f"rgb({r}, {g}, {b})"
            feature["color"] = rgb_str
            feature["background_color"] = rgb_str
            break  # Use the first SOLID fill found
    
    # Compute border information
    strokes = node_data.get("strokes", [])
    if strokes:
        stroke = strokes[0]
        feature["border_type"] = stroke.get("type", None)
        if "color" in stroke:
            r = int(stroke["color"].get("r", 0) * 255)
            g = int(stroke["color"].get("g", 0) * 255)
            b = int(stroke["color"].get("b", 0) * 255)
            feature["border_color"] = f"rgb({r}, {g}, {b})"
        feature["border_opacity"] = stroke.get("opacity", None)
    
    # Compute border_radius as the average of the four corner radii if available
    br_top_left = node_data.get("topLeftRadius", None)
    br_top_right = node_data.get("topRightRadius", None)
    br_bottom_left = node_data.get("bottomLeftRadius", None)
    br_bottom_right = node_data.get("bottomRightRadius", None)
    if all(v is not None for v in [br_top_left, br_top_right, br_bottom_left, br_bottom_right]):
        feature["border_radius"] = (br_top_left + br_top_right + br_bottom_left + br_bottom_right) / 4
    
    # Extract shadow properties
    effects = node_data.get("effects", [])
    for effect in effects:
        if effect.get("type") == "DROP_SHADOW":
            feature["shadow_type"] = "DROP_SHADOW"
            if "color" in effect:
                r = int(effect["color"].get("r", 0) * 255)
                g = int(effect["color"].get("g", 0) * 255)
                b = int(effect["color"].get("b", 0) * 255)
                feature["shadow_color"] = f"rgb({r}, {g}, {b})"
            feature["shadow_offset"] = effect.get("offset", None)
            feature["shadow_radius"] = effect.get("radius", None)
            break  # Only consider the first drop shadow
    
    features.append(feature)
    
    # Recursively process children, updating hierarchy features.
    for child in children:
        features.extend(extract_features(child, depth=depth+1, parent_tag=tag, sibling_count=len(children)-1))
    
    return features

In [3]:
# Folder containing JSON files
data_folder = "data"

# Initialize an empty DataFrame to store all extracted features
df_all = pd.DataFrame()

# Iterate over all JSON files in the data folder
for filename in os.listdir(data_folder):
    if filename.endswith(".json"):  # Ensure we only process JSON files
        file_path = os.path.join(data_folder, filename)
        print(f"Processing {file_path}...")
        
        with open(file_path, "r", encoding="utf-8") as file:
            data = json.load(file)
        
        # Extract features using the recursive function starting at the root
        features_list = extract_features(data, depth=0, parent_tag=None, sibling_count=0)
        
        # Create a DataFrame from the extracted features
        df = pd.DataFrame(features_list)
        
        # Compute the minimum x and y values (ignoring None values)
        min_x = df['x'].min() if df['x'].notnull().any() else 0
        min_y = df['y'].min() if df['y'].notnull().any() else 0

        # Normalize coordinates
        df['x_normalized'] = df['x'] - min_x
        df['y_normalized'] = df['y'] - min_y

        # Compute the center coordinates for each node
        df['x_center'] = df['x'] + df['width'] / 2
        df['y_center'] = df['y'] + df['height'] / 2

        # Determine total width and height from the BODY node, if available
        body_node = df[df['tag'] == 'BODY']
        if not body_node.empty:
            total_width = body_node.iloc[0]['width']
            total_height = body_node.iloc[0]['height']
        else:
            total_width = (df['x'] + df['width']).max()
            total_height = (df['y'] + df['height']).max()

        # Compute relative positioning
        df['x_quarter'] = df['x_center'] / total_width
        df['y_quarter'] = df['y_center'] / total_height

        # Additional size features
        df['aspect_ratio'] = df.apply(lambda row: row['width'] / row['height'] if row['height'] and row['height'] != 0 else None, axis=1)
        df['area'] = df['width'] * df['height']
        df['normalized_width'] = df['width'] / total_width
        df['normalized_height'] = df['height'] / total_height

        # Append to the main DataFrame
        df_all = pd.concat([df_all, df], ignore_index=True)

# Save the aggregated DataFrame to an Excel file
output_excel_file = "figma_features_with_relative_positions.xlsx"
df_all.to_excel(output_excel_file, index=False)
print(f"Extracted features from all JSON files have been saved to {output_excel_file}")


Processing data\figmaTree_1.json...
Processing data\figmaTree_10.json...
Processing data\figmaTree_100.json...
Processing data\figmaTree_11.json...
Processing data\figmaTree_12.json...


  df_all = pd.concat([df_all, df], ignore_index=True)


Processing data\figmaTree_13.json...
Processing data\figmaTree_14.json...
Processing data\figmaTree_15.json...


  df_all = pd.concat([df_all, df], ignore_index=True)
  df_all = pd.concat([df_all, df], ignore_index=True)
  df_all = pd.concat([df_all, df], ignore_index=True)
  df_all = pd.concat([df_all, df], ignore_index=True)
  df_all = pd.concat([df_all, df], ignore_index=True)


Processing data\figmaTree_16.json...
Processing data\figmaTree_17.json...
Processing data\figmaTree_18.json...
Processing data\figmaTree_19.json...
Processing data\figmaTree_2.json...


  df_all = pd.concat([df_all, df], ignore_index=True)
  df_all = pd.concat([df_all, df], ignore_index=True)
  df_all = pd.concat([df_all, df], ignore_index=True)
  df_all = pd.concat([df_all, df], ignore_index=True)
  df_all = pd.concat([df_all, df], ignore_index=True)


Processing data\figmaTree_20.json...
Processing data\figmaTree_21.json...
Processing data\figmaTree_22.json...
Processing data\figmaTree_23.json...
Processing data\figmaTree_24.json...
Processing data\figmaTree_25.json...
Processing data\figmaTree_27.json...
Processing data\figmaTree_28.json...
Processing data\figmaTree_29.json...
Processing data\figmaTree_3.json...
Processing data\figmaTree_30.json...


  df_all = pd.concat([df_all, df], ignore_index=True)
  df_all = pd.concat([df_all, df], ignore_index=True)


Processing data\figmaTree_31.json...
Processing data\figmaTree_32.json...
Processing data\figmaTree_33.json...
Processing data\figmaTree_34.json...
Processing data\figmaTree_35.json...


  df_all = pd.concat([df_all, df], ignore_index=True)
  df_all = pd.concat([df_all, df], ignore_index=True)
  df_all = pd.concat([df_all, df], ignore_index=True)
  df_all = pd.concat([df_all, df], ignore_index=True)


Processing data\figmaTree_36.json...
Processing data\figmaTree_37.json...
Processing data\figmaTree_38.json...
Processing data\figmaTree_39.json...
Processing data\figmaTree_4.json...
Processing data\figmaTree_40.json...
Processing data\figmaTree_41.json...
Processing data\figmaTree_42.json...
Processing data\figmaTree_43.json...


  df_all = pd.concat([df_all, df], ignore_index=True)
  df_all = pd.concat([df_all, df], ignore_index=True)


Processing data\figmaTree_44.json...
Processing data\figmaTree_45.json...
Processing data\figmaTree_47.json...
Processing data\figmaTree_48.json...


  df_all = pd.concat([df_all, df], ignore_index=True)


Processing data\figmaTree_49.json...
Processing data\figmaTree_5.json...
Processing data\figmaTree_50.json...
Processing data\figmaTree_51.json...
Processing data\figmaTree_52.json...


  df_all = pd.concat([df_all, df], ignore_index=True)


Processing data\figmaTree_53.json...
Processing data\figmaTree_54.json...
Processing data\figmaTree_55.json...
Processing data\figmaTree_57.json...


  df_all = pd.concat([df_all, df], ignore_index=True)
  df_all = pd.concat([df_all, df], ignore_index=True)
  df_all = pd.concat([df_all, df], ignore_index=True)


Processing data\figmaTree_58.json...
Processing data\figmaTree_59.json...
Processing data\figmaTree_6.json...
Processing data\figmaTree_60.json...
Processing data\figmaTree_61.json...
Processing data\figmaTree_62.json...
Processing data\figmaTree_63.json...
Processing data\figmaTree_64.json...


  df_all = pd.concat([df_all, df], ignore_index=True)
  df_all = pd.concat([df_all, df], ignore_index=True)


Processing data\figmaTree_65.json...
Processing data\figmaTree_66.json...
Processing data\figmaTree_67.json...
Processing data\figmaTree_68.json...


  df_all = pd.concat([df_all, df], ignore_index=True)


Processing data\figmaTree_69.json...
Processing data\figmaTree_70.json...
Processing data\figmaTree_71.json...
Processing data\figmaTree_72.json...
Processing data\figmaTree_73.json...
Processing data\figmaTree_74.json...
Processing data\figmaTree_75.json...
Processing data\figmaTree_76.json...
Processing data\figmaTree_77.json...


  df_all = pd.concat([df_all, df], ignore_index=True)
  df_all = pd.concat([df_all, df], ignore_index=True)
  df_all = pd.concat([df_all, df], ignore_index=True)
  df_all = pd.concat([df_all, df], ignore_index=True)


Processing data\figmaTree_78.json...
Processing data\figmaTree_79.json...
Processing data\figmaTree_8.json...
Processing data\figmaTree_80.json...
Processing data\figmaTree_81.json...
Processing data\figmaTree_82.json...
Processing data\figmaTree_83.json...
Processing data\figmaTree_84.json...
Processing data\figmaTree_85.json...
Processing data\figmaTree_86.json...
Processing data\figmaTree_88.json...
Processing data\figmaTree_89.json...


  df_all = pd.concat([df_all, df], ignore_index=True)


Processing data\figmaTree_9.json...
Processing data\figmaTree_90.json...
Processing data\figmaTree_91.json...


  df_all = pd.concat([df_all, df], ignore_index=True)
  df_all = pd.concat([df_all, df], ignore_index=True)
  df_all = pd.concat([df_all, df], ignore_index=True)
  df_all = pd.concat([df_all, df], ignore_index=True)


Processing data\figmaTree_92.json...
Processing data\figmaTree_93.json...
Processing data\figmaTree_94.json...
Processing data\figmaTree_95.json...
Processing data\figmaTree_96.json...
Processing data\figmaTree_97.json...
Processing data\figmaTree_98.json...


  df_all = pd.concat([df_all, df], ignore_index=True)


Processing data\figmaTree_99.json...
Extracted features from all JSON files have been saved to figma_features_with_relative_positions.xlsx
