In [None]:
# Convert .pts and .seg to .txt
import os

def convert_files_to_txt(folder, old_ext, new_ext=".txt"):
    """
    Converts all files in the given folder from one extension to another.
    """
    for filename in os.listdir(folder):
        if filename.endswith(old_ext):
            old_path = os.path.join(folder, filename)
            new_filename = filename.replace(old_ext, new_ext)
            new_path = os.path.join(folder, new_filename)

            # Read the file as text and write to new file
            with open(old_path, "r") as f:
                content = f.readlines()
            with open(new_path, "w") as f:
                f.writelines(content)

            print(f"Converted: {old_path} -> {new_path}")

# Define the folders
points_folder = "data/points"
labels_folder = "data/labels"

# Convert .pts to .txt
convert_files_to_txt(points_folder, ".pts")

# Convert .seg to .txt
convert_files_to_txt(labels_folder, ".seg")

In [None]:
def find_max_pointcloud_length(folder, extension=".txt"):
    """
    Finds the maximum number of points in all point cloud files.
    """
    max_length = 0
    for filename in os.listdir(folder):
        if filename.endswith(extension):
            file_path = os.path.join(folder, filename)
            with open(file_path, "r") as f:
                num_lines = sum(1 for _ in f)  # Count lines (points)
            max_length = max(max_length, num_lines)
            print(f"{filename}: {num_lines} points")

    print(f"Maximum point cloud length: {max_length}")
    return max_length

# Find max length in point cloud files
max_points = find_max_pointcloud_length(points_folder)

In [None]:
import os
import random
import shutil

def count_files_in_folder(folder, extension=".txt"):
    """
    Counts the number of files in the given folder with the given extension.
    """
    num_files = 0
    for filename in os.listdir(folder):
        if filename.endswith(extension):
            num_files += 1
    print(f"Number of {extension} files in {folder}: {num_files}")
    return num_files

def pick_random_related_files(points_folder, labels_folder, sample_points_folder, sample_labels_folder, num_files=100, extension=".txt"):
    """
    Picks random related files from the points and labels folders and copies them to the sample folders.
    """
    if not os.path.exists(sample_points_folder):
        os.makedirs(sample_points_folder)
    if not os.path.exists(sample_labels_folder):
        os.makedirs(sample_labels_folder)
    
    points_files = [f for f in os.listdir(points_folder) if f.endswith(extension)]
    labels_files = [f for f in os.listdir(labels_folder) if f.endswith(extension)]
    
    # Find common files in both folders
    common_files = list(set(points_files) & set(labels_files))
    
    selected_files = random.sample(common_files, min(num_files, len(common_files)))
    
    for file in selected_files:
        shutil.copy(os.path.join(points_folder, file), os.path.join(sample_points_folder, file))
        shutil.copy(os.path.join(labels_folder, file), os.path.join(sample_labels_folder, file))
    
    print(f"Copied {len(selected_files)} related files to {sample_points_folder} and {sample_labels_folder}")

# Define source and destination folders
points_folder = "data/points"
labels_folder = "data/labels"
sample_points_folder = "data/sample_points_50"
sample_labels_folder = "data/sample_labels_50"

# Count files in the folders
num_points_files = count_files_in_folder(points_folder)
num_labels_files = count_files_in_folder(labels_folder)

# Pick random 100 related files from both folders
pick_random_related_files(points_folder, labels_folder, sample_points_folder, sample_labels_folder, num_files=50, extension=".txt")

In [2]:
import plotly.graph_objs as go
import numpy as np

# Function to read and extract coordinates and labels from a txt file
def read_data(file_path):
    x, y, z, labels = [], [], [], []
    
    with open(file_path, 'r') as file:
        for line in file:
            values = line.split()
            x.append(float(values[0]))   # X coordinate
            y.append(float(values[1]))   # Y coordinate
            z.append(float(values[2]))   # Z coordinate
            labels.append(int(values[-2]))  # Label (2nd last value)
    
    return np.array(x), np.array(y), np.array(z), np.array(labels)

# Example: Reading data from a txt file
file_path = 'data/roofNTNU/roof_clouds_normed/10457866.txt'  # Replace with your file path
x, y, z, labels = read_data(file_path)

# Get unique labels
unique_labels = np.unique(labels)

# Assign colors for each unique label
color_map = [
    'red', 'blue', 'green', 'purple', 'orange', 'cyan', 'magenta', 'yellow'
]  # Extend if more labels exist

# Create traces for each unique label
traces = []
for i, label in enumerate(unique_labels):
    mask = labels == label  # Filter points by label
    trace = go.Scatter3d(
        x=x[mask], 
        y=y[mask], 
        z=z[mask], 
        mode='markers', 
        marker=dict(
            size=2,
            color=color_map[i % len(color_map)],  # Assign color
            opacity=0.8
        ),
        name=f'Label {label}'  # Legend entry
    )
    traces.append(trace)

# Layout settings
layout = go.Layout(
    scene=dict(
        xaxis_title='X', 
        yaxis_title='Y', 
        zaxis_title='Z'
    ),
    title='3D Scatter Plot of RoofNTNU Data (Colored by Labels)',
    showlegend=True
)

# Create the figure and display it
fig = go.Figure(data=traces, layout=layout)
fig.show()

In [1]:
import os
import numpy as np

# Define the folder path containing the .xyz files
folder_path = "data/roofNTNU/roof_clouds_normed/"  # Replace with your actual path

# List all .xyz files in the folder
xyz_files = [f for f in os.listdir(folder_path) if f.endswith(".txt")]

# Count the number of files
num_files = len(xyz_files)

# List to store the number of lines in each file
line_counts = []

# Read each file and count the lines
for file_name in xyz_files:
    file_path = os.path.join(folder_path, file_name)
    with open(file_path, 'r') as file:
        num_lines = sum(1 for _ in file)  # Count lines
        line_counts.append(num_lines)

# Compute statistics
max_lines = np.max(line_counts) if line_counts else 0
min_lines = np.min(line_counts) if line_counts else 0
avg_lines = np.mean(line_counts) if line_counts else 0
median_lines = np.median(line_counts) if line_counts else 0
std_dev_lines = np.std(line_counts) if line_counts else 0

# Print summary results
print(f"Number of .txt files: {num_files}")
print(f"Max lines in a file: {max_lines}")
print(f"Min lines in a file: {min_lines}")
print(f"Average lines per file: {avg_lines:.2f}")
print(f"Median lines per file: {median_lines}")
print(f"Standard deviation of lines: {std_dev_lines:.2f}")

Number of .txt files: 1032
Max lines in a file: 39172
Min lines in a file: 31
Average lines per file: 2177.60
Median lines per file: 1789.0
Standard deviation of lines: 2012.50


In [3]:
import os
import json
import random
import numpy as np
# Define the folder path containing the .txt files
folder_path = "data/roofNTNU/roof_clouds_normed/"  # Replace with your actual path

# List all .txt files in the folder
xyz_files = [f for f in os.listdir(folder_path) if f.endswith(".txt")]

# List to store files with at least 512 lines
valid_files = []

# Read each file and count the lines
for file_name in xyz_files:
    file_path = os.path.join(folder_path, file_name)
    with open(file_path, 'r') as file:
        num_lines = sum(1 for _ in file)  # Count lines
        if num_lines >= 512:
            valid_files.append(file_name)

# Shuffle the valid files
random.shuffle(valid_files)

# Split into training (200 files) and testing (20 files)
train_files = valid_files[:200]
test_files = valid_files[200:220]

# Save the training files to a JSON file
train_output_file = "data/roofNTNU/train/files_200_512_train.json"
with open(train_output_file, 'w') as json_file:
    json.dump(train_files, json_file, indent=4)

# Save the testing files to a JSON file
test_output_file = "data/roofNTNU/train/files_20_512_test.json"
with open(test_output_file, 'w') as json_file:
    json.dump(test_files, json_file, indent=4)

print(f"Selected {len(train_files)} training files saved to {train_output_file}")
print(f"Selected {len(test_files)} testing files saved to {test_output_file}")

Selected 200 training files saved to data/roofNTNU/train/files_200_512_train.json
Selected 20 testing files saved to data/roofNTNU/train/files_20_512_test.json


In [4]:
import os
import json

# Define paths
base_folder = "data/roofNTNU/"
source_folder = os.path.join(base_folder, "roof_clouds_normed/")
output_folder = os.path.join(base_folder, "train/")
train_json = os.path.join(output_folder, "files_200_512_train.json")
test_json = os.path.join(output_folder, "files_20_512_test.json")

# Create the required folder structure
os.makedirs(os.path.join(output_folder, "sample_points"), exist_ok=True)
os.makedirs(os.path.join(output_folder, "sample_labels"), exist_ok=True)
os.makedirs(os.path.join(output_folder, "sample_points_test"), exist_ok=True)
os.makedirs(os.path.join(output_folder, "sample_labels_test"), exist_ok=True)

def process_files(file_list, points_folder, labels_folder):
    """
    Process the files to extract points and labels and save them to the respective folders.
    """
    for file_name in file_list:
        input_file = os.path.join(source_folder, file_name)
        
        # Output file paths
        points_file = os.path.join(points_folder, file_name)
        labels_file = os.path.join(labels_folder, file_name)
        
        with open(input_file, 'r') as infile, \
             open(points_file, 'w') as points_out, \
             open(labels_file, 'w') as labels_out:
            
            for line in infile:
                values = line.strip().split()
                if len(values) >= 4:  # Ensure the line has enough values
                    # Write the first 3 values to the points file
                    points_out.write(" ".join(values[:3]) + "\n")
                    # Write the second last value to the labels file
                    labels_out.write(values[-2] + "\n")

# Load the JSON files
with open(train_json, 'r') as f:
    train_files = json.load(f)

with open(test_json, 'r') as f:
    test_files = json.load(f)

# Process training files
process_files(train_files, 
              os.path.join(output_folder, "sample_points"), 
              os.path.join(output_folder, "sample_labels"))

# Process testing files
process_files(test_files, 
              os.path.join(output_folder, "sample_points_test"), 
              os.path.join(output_folder, "sample_labels_test"))

print("Processing complete. Files saved in the respective folders.")

Processing complete. Files saved in the respective folders.


In [3]:
import os
import json

# Define paths
base_folder = "data/roofNTNU/"
source_folder = os.path.join(base_folder, "roof_clouds_normed/")
output_folder = os.path.join(base_folder, "train_test_split/")
train_json = os.path.join(output_folder, "shuffled_train_file_list.json")
test_json = os.path.join(output_folder, "shuffled_test_file_list.json")

# Create the required folder structure
os.makedirs(os.path.join(output_folder, "points_train_n"), exist_ok=True)
os.makedirs(os.path.join(output_folder, "labels_train_n"), exist_ok=True)
os.makedirs(os.path.join(output_folder, "points_test_n"), exist_ok=True)
os.makedirs(os.path.join(output_folder, "labels_test_n"), exist_ok=True)

def process_files(file_list, points_folder, labels_folder):
    """
    Process the files to extract points and labels and save them to the respective folders.
    """
    for file_name in file_list:
        input_file = os.path.join(source_folder, file_name+'.txt')
        
        # Output file paths
        points_file = os.path.join(points_folder, file_name+'.txt')
        labels_file = os.path.join(labels_folder, file_name+'.txt')
        
        with open(input_file, 'r') as infile, \
             open(points_file, 'w') as points_out, \
             open(labels_file, 'w') as labels_out:
            
            for line in infile:
                values = line.strip().split()
                if len(values) >= 4:  # Ensure the line has enough values
                    # Write the first 3 values to the points file
                    points_out.write(" ".join(values[:3]) + "\n")
                    # Write the second last value to the labels file
                    label = int(values[-2]) + 1
                    labels_out.write(str(label) + "\n")

# Load the JSON files
with open(train_json, 'r') as f:
    train_files = json.load(f)

with open(test_json, 'r') as f:
    test_files = json.load(f)

# Process training files
process_files(train_files, 
              os.path.join(output_folder, "points_train_n"), 
              os.path.join(output_folder, "labels_train_n"))

# Process testing files
process_files(test_files, 
              os.path.join(output_folder, "points_test_n"), 
              os.path.join(output_folder, "labels_test_n"))

print("Processing complete. Files saved in the respective folders.")

Processing complete. Files saved in the respective folders.
