In [2]:
import fitz  # PyMuPDF
import re

# Function to extract measurements (lengths, widths, etc.) from text
def extract_measurements(text):
    # Regular expression to find measurements (e.g., "10.5 m", "3.2 cm", "15 mm")
    measurement_pattern = r"(\d+\.?\d*)\s*(m|cm|mm)"
    measurements = re.findall(measurement_pattern, text)
    return measurements

# Function to process a PDF and extract data
def extract_data_from_pdf(pdf_path):
    # Open the PDF
    pdf_document = fitz.open(pdf_path)
    extracted_data = []

    # Iterate through each page
    for page_num in range(len(pdf_document)):
        page = pdf_document.load_page(page_num)
        text = page.get_text("text")  # Extract text from the page

        # Extract measurements from the text
        measurements = extract_measurements(text)

        # Store the extracted data
        extracted_data.append({
            "page": page_num + 1,
            "text": text,
            "measurements": measurements
        })

    return extracted_data

# Function to save extracted data to a CSV file
def save_to_csv(data, output_file):
    import csv

    with open(output_file, mode="w", newline="", encoding="utf-8") as file:
        writer = csv.writer(file)
        writer.writerow(["Page", "Text", "Measurements"])  # Write header

        for entry in data:
            writer.writerow([
                entry["page"],
                entry["text"],
                "; ".join([f"{value} {unit}" for value, unit in entry["measurements"]])
            ])

# Main script
if __name__ == "__main__":
    # Path to the PDF file
    pdf_path = "data_lake/blueprints/bygg/6.4 Ritningar/6.4.1 Arkitekt ritningar.pdf"

    # Extract data from the PDF
    extracted_data = extract_data_from_pdf(pdf_path)

    # Save the extracted data to a CSV file
    save_to_csv(extracted_data, "extracted_data.csv")

    print("Data extraction complete. Results saved to 'extracted_data.csv'.")

Data extraction complete. Results saved to 'extracted_data.csv'.


In [None]:
from ultralytics import YOLO
import cv2
import os

# Load the YOLO model
model = YOLO("yolov8n.pt")  # Replace with your YOLO model file

# Directory containing PNG images
image_dir = "path/to/png/images"
output_dir = "path/to/save/results"
os.makedirs(output_dir, exist_ok=True)

# Process each PNG image
for image_name in os.listdir(image_dir):
    if image_name.endswith(".png"):
        # Load the image
        image_path = os.path.join(image_dir, image_name)
        image = cv2.imread(image_path)

        # Run YOLO on the image
        results = model(image)

        # Visualize the results
        for result in results:
            # Draw bounding boxes and labels on the image
            result.show()  # Display the image with detections

            # Save the results
            output_path = os.path.join(output_dir, image_name)
            result.save(filename=output_path)

print("Processing complete. Results saved to:", output_dir)