In [None]:
import os
import json
import xml.etree.ElementTree as ET
from tqdm import tqdm
# Set your input dataset directory and output JSON file path here
dataset_dir = "dataset/cubicasa5k"  # <- adjust to your actual dataset location
output_file = "floorplan_metadata.json"

def parse_svg(file_path):
    namespaces = {
        'svg': 'http://www.w3.org/2000/svg',
        'xlink': 'http://www.w3.org/1999/xlink'
    }

    try:
        tree = ET.parse(file_path)
        root = tree.getroot()

        def parse_dimension(attr):
            val = root.attrib.get(attr, '0').replace('px', '')
            try:
                return float(val)
            except ValueError:
                return 0.0

        width = parse_dimension('width')
        height = parse_dimension('height')
        viewBox = root.attrib.get('viewBox', None)

        metadata = {
            'width': width,
            'height': height,
            'viewBox': viewBox,
            'spaces': [],
            'walls': [],
            'doors': [],
            'windows': [],
            'furniture': []
        }

        def extract_elements(xpath, element_list, tag_name):
            for element in root.findall(xpath, namespaces):
                element_class = element.attrib.get('class', '')
                if tag_name not in element_class:
                    continue

                polygon = element.find(".//svg:polygon", namespaces)
                if polygon is None:
                    continue

                raw_points = polygon.attrib.get('points', '')
                if not raw_points.strip():
                    continue

                # Parse points
                try:
                    pts = [[float(x), float(y)] for x, y in 
                        (pt.split(',') for pt in raw_points.strip().split())]
                except:
                    continue  # skip malformed

                # Try to get room type from class or id
                element_id = element.attrib.get('id', '')
                room_type = element_class.replace('Space ', '').split()[-1] if tag_name == 'Space' else tag_name

                element_list.append({
                    'id': element_id,
                    'type': room_type,
                    'points': pts
                })

        extract_elements(".//svg:g", metadata['spaces'], 'Space')
        extract_elements(".//svg:g", metadata['walls'], 'Wall')
        extract_elements(".//svg:g", metadata['doors'], 'Door')
        extract_elements(".//svg:g", metadata['windows'], 'Window')
        extract_elements(".//svg:g", metadata['furniture'], 'FixedFurniture')

        return metadata

    except ET.ParseError as e:
        print(f"Parse error in {file_path}: {e}")
        return None
    except Exception as e:
        print(f"General error in {file_path}: {e}")
        return None


def extract_metadata_from_dataset(dataset_dir, output_file):
    all_metadata = []

    for root_dir, _, files in os.walk(dataset_dir):
        for file in tqdm(files, desc="Extracting Metadata"):
            if file == "model.svg":
                file_path = os.path.join(root_dir, file)
                metadata = parse_svg(file_path)
                if metadata:
                    all_metadata.append({
                        'file_path': file_path,
                        'width': metadata['width'],
                        'height': metadata['height'],
                        'rooms': metadata['spaces'],
                        'walls': metadata['walls'],
                        'doors': metadata['doors'],
                        'windows': metadata['windows'],
                        'furniture': metadata['furniture']
                    })

    with open(output_file, 'w') as f:
        json.dump(all_metadata, f, indent=2)
    print(f"✅ Saved metadata to {output_file}")
extract_metadata_from_dataset(dataset_dir, output_file)

Extracting Metadata: 100%|███████████████████████████████████████████████████████████████████████| 3/3 [00:00<?, ?it/s]
Extracting Metadata: 0it [00:00, ?it/s]
Extracting Metadata: 100%|██████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 125.07it/s]
Extracting Metadata: 100%|██████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 175.14it/s]
Extracting Metadata: 100%|██████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 174.72it/s]
Extracting Metadata: 100%|██████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 174.23it/s]
Extracting Metadata: 100%|██████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 196.39it/s]
Extracting Metadata: 100%|██████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 171.00it/s]
Extracting Metadata: 100%|██████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 155.73it/s]


In [5]:
import json

# Load metadata
with open('floorplan_metadata.json', 'r') as f:
    metadata = json.load(f)

In [4]:
import os
from tqdm import tqdm
import cairosvg

def convert_svg_to_png(input_dir, output_dir, out_size=(512, 512)):
    os.makedirs(output_dir, exist_ok=True)

    count = 0
    for root, _, files in os.walk(input_dir):
        for file in files:
            if file == "model.svg":
                svg_path = os.path.join(root, file)
                rel_path = os.path.relpath(svg_path, input_dir)
                out_name = rel_path.replace("\\", "_").replace("/", "_").replace(".svg", ".png")
                png_path = os.path.join(output_dir, out_name)

                try:
                    cairosvg.svg2png(url=svg_path, write_to=png_path, output_width=out_size[0], output_height=out_size[1])
                    count += 1
                except Exception as e:
                    print(f"⚠️ Failed to convert {svg_path}: {e}")

    print(f"✅ Converted {count} SVGs to PNGs in {output_dir}")

# Example usage:
convert_svg_to_png(
    input_dir="dataset/cubicasa5k",
    output_dir="dataset/rendered_pngs",
    out_size=(512, 512)
)


⚠️ Failed to convert dataset/cubicasa5k\high_quality\1692\model.svg: must be real number, not NoneType
⚠️ Failed to convert dataset/cubicasa5k\high_quality\7092\model.svg: must be real number, not NoneType
⚠️ Failed to convert dataset/cubicasa5k\high_quality_architectural\10074\model.svg: not enough values to unpack (expected 2, got 1)
⚠️ Failed to convert dataset/cubicasa5k\high_quality_architectural\10754\model.svg: must be real number, not NoneType
⚠️ Failed to convert dataset/cubicasa5k\high_quality_architectural\10769\model.svg: must be real number, not NoneType
⚠️ Failed to convert dataset/cubicasa5k\high_quality_architectural\14611\model.svg: must be real number, not NoneType
⚠️ Failed to convert dataset/cubicasa5k\high_quality_architectural\2003\model.svg: must be real number, not NoneType
⚠️ Failed to convert dataset/cubicasa5k\high_quality_architectural\2565\model.svg: must be real number, not NoneType
⚠️ Failed to convert dataset/cubicasa5k\high_quality_architectural\6143\mo

In [16]:
import json
import os
from collections import defaultdict, Counter

def polygon_area(pts):
    x, y = zip(*pts)
    return 0.5 * abs(sum(x[i]*y[i+1] - x[i+1]*y[i] for i in range(-1, len(pts)-1)))

def extract_room_counts_and_areas(json_path):
    with open(json_path, 'r') as f:
        data = json.load(f)

    vectors = []

    for item in data:
        room_counter = Counter()
        room_areas = defaultdict(float)

        for room in item['rooms']:
            rtype = room['type']
            area = polygon_area(room['points'])
            room_counter[rtype] += 1
            room_areas[rtype] += area

        # Generate a proper name like colorful_30_img.png
        svg_path = item['file_path'].replace("\\", "/")
        parts = svg_path.split('/')
        if 'colorful' in parts:
            base_name = f"colorful_{parts[parts.index('colorful')+1]}_img.png"
        elif 'high_quality_architectural' in parts:
            base_name = f"high_quality_architectural_{parts[parts.index('high_quality_architectural')+1]}_img.png"
        elif 'high_quality' in parts:
            base_name = f"high_quality_{parts[parts.index('high_quality')+1]}_img.png"
        else:
            base_name = os.path.basename(svg_path).replace('model.svg', 'model.png')

        vectors.append({
            "input": {
                "counts": dict(room_counter),
                "areas": {k: round(v, 2) for k, v in room_areas.items()}
            },
            "image": base_name
        })

    return vectors

# Example usage
INPUT_JSON = 'dataset/floorplan_metadata.json'
OUTPUT_JSON = 'room_vectors_with_area.json'

vectors = extract_room_counts_and_areas(INPUT_JSON)

with open(OUTPUT_JSON, 'w') as f:
    json.dump(vectors, f, indent=2)

print(f"✅ Saved updated vectors with image names to {OUTPUT_JSON}")


✅ Saved updated vectors with image names to room_vectors_with_area.json
