In [1]:
import os
import json

# Define paths
base_folder = "data/roofNTNU/"
source_folder = os.path.join(base_folder, "roof_clouds_normed/")
output_folder = os.path.join(base_folder, "train_test_split/")
train_json = os.path.join(output_folder, "shuffled_train_file_list.json")
test_json = os.path.join(output_folder, "shuffled_test_file_list.json")
val_json = os.path.join(output_folder, "shuffled_val_file_list.json")

# Create the required folder structure
os.makedirs(os.path.join(output_folder, "points_train_n"), exist_ok=True)
os.makedirs(os.path.join(output_folder, "labels_train_n"), exist_ok=True)
os.makedirs(os.path.join(output_folder, "points_test_n"), exist_ok=True)
os.makedirs(os.path.join(output_folder, "labels_test_n"), exist_ok=True)
os.makedirs(os.path.join(output_folder, "points_val_n"), exist_ok=True)
os.makedirs(os.path.join(output_folder, "labels_val_n"), exist_ok=True)

def process_files(file_list, points_folder, labels_folder):
    """
    Process the files to extract points and labels and save them to the respective folders.
    """
    for file_name in file_list:
        input_file = os.path.join(source_folder, file_name+'.txt')
        
        # Output file paths
        points_file = os.path.join(points_folder, file_name+'.txt')
        labels_file = os.path.join(labels_folder, file_name+'.txt')
        
        with open(input_file, 'r') as infile, \
             open(points_file, 'w') as points_out, \
             open(labels_file, 'w') as labels_out:
            
            for line in infile:
                values = line.strip().split()
                if len(values) >= 4:  # Ensure the line has enough values
                    # Write the first 3 values to the points file
                    points_out.write(" ".join(values[:3]) + "\n")
                    # Write the second last value to the labels file
                    label = int(values[-2]) + 1
                    labels_out.write(str(label) + "\n")

# Load the JSON files
with open(train_json, 'r') as f:
    train_files = json.load(f)

with open(test_json, 'r') as f:
    test_files = json.load(f)

with open(val_json, 'r') as f:
    val_files = json.load(f)

# Process training files
process_files(train_files, 
              os.path.join(output_folder, "points_train_n"), 
              os.path.join(output_folder, "labels_train_n"))

# Process testing files
process_files(test_files, 
              os.path.join(output_folder, "points_test_n"), 
              os.path.join(output_folder, "labels_test_n"))

# Process validation files
process_files(val_files,
                os.path.join(output_folder, "points_val_n"), 
                os.path.join(output_folder, "labels_val_n"))

print("Processing complete. Files saved in the respective folders.")

Processing complete. Files saved in the respective folders.


In [2]:
import os
import numpy as np

# Define the folder path containing the .xyz files
folder_path = "data/roofNTNU/roof_clouds_normed/"  # Replace with your actual path

# List all .xyz files in the folder
xyz_files = [f for f in os.listdir(folder_path) if f.endswith(".txt")]

# Count the number of files
num_files = len(xyz_files)

# List to store the number of lines in each file
line_counts = []

# Read each file and count the lines
for file_name in xyz_files:
    file_path = os.path.join(folder_path, file_name)
    with open(file_path, 'r') as file:
        num_lines = sum(1 for _ in file)  # Count lines
        line_counts.append(num_lines)

# Compute statistics
max_lines = np.max(line_counts) if line_counts else 0
min_lines = np.min(line_counts) if line_counts else 0
avg_lines = np.mean(line_counts) if line_counts else 0
median_lines = np.median(line_counts) if line_counts else 0
std_dev_lines = np.std(line_counts) if line_counts else 0

# Print summary results
print(f"Number of .txt files: {num_files}")
print(f"Max lines in a file: {max_lines}")
print(f"Min lines in a file: {min_lines}")
print(f"Average lines per file: {avg_lines:.2f}")
print(f"Median lines per file: {median_lines}")
print(f"Standard deviation of lines: {std_dev_lines:.2f}")

Number of .txt files: 1032
Max lines in a file: 39172
Min lines in a file: 31
Average lines per file: 2177.60
Median lines per file: 1789.0
Standard deviation of lines: 2012.50


In [None]:
import os
import numpy as np

# Define the folder path containing the .xyz files
folder_path = "data/roofNTNU/train_test_split/points_train_n"  # Replace with your actual path

# List all .xyz files in the folder
xyz_files = [f for f in os.listdir(folder_path) if f.endswith(".txt")]

# Count the number of files
num_files = len(xyz_files)

# List to store the number of lines in each file
line_counts = []

# Read each file and count the lines
for file_name in xyz_files:
    file_path = os.path.join(folder_path, file_name)
    with open(file_path, 'r') as file:
        num_lines = sum(1 for _ in file)  # Count lines
        line_counts.append(num_lines)

# Compute statistics
max_lines = np.max(line_counts) if line_counts else 0
min_lines = np.min(line_counts) if line_counts else 0
avg_lines = np.mean(line_counts) if line_counts else 0
median_lines = np.median(line_counts) if line_counts else 0
std_dev_lines = np.std(line_counts) if line_counts else 0

# Print summary results
print(f"Number of .txt files: {num_files}")
print(f"Max lines in a file: {max_lines}")
print(f"Min lines in a file: {min_lines}")
print(f"Average lines per file: {avg_lines:.2f}")
print(f"Median lines per file: {median_lines}")
print(f"Standard deviation of lines: {std_dev_lines:.2f}")

In [None]:
import os
import numpy as np

# Define the folder path containing the .xyz files
folder_path = "data/roofNTNU/train_test_split/points_test_n"  # Replace with your actual path

# List all .xyz files in the folder
xyz_files = [f for f in os.listdir(folder_path) if f.endswith(".txt")]

# Count the number of files
num_files = len(xyz_files)

# List to store the number of lines in each file
line_counts = []

# Read each file and count the lines
for file_name in xyz_files:
    file_path = os.path.join(folder_path, file_name)
    with open(file_path, 'r') as file:
        num_lines = sum(1 for _ in file)  # Count lines
        line_counts.append(num_lines)

# Compute statistics
max_lines = np.max(line_counts) if line_counts else 0
min_lines = np.min(line_counts) if line_counts else 0
avg_lines = np.mean(line_counts) if line_counts else 0
median_lines = np.median(line_counts) if line_counts else 0
std_dev_lines = np.std(line_counts) if line_counts else 0

# Print summary results
print(f"Number of .txt files: {num_files}")
print(f"Max lines in a file: {max_lines}")
print(f"Min lines in a file: {min_lines}")
print(f"Average lines per file: {avg_lines:.2f}")
print(f"Median lines per file: {median_lines}")
print(f"Standard deviation of lines: {std_dev_lines:.2f}")

In [None]:
import os
import numpy as np

# Define the folder path containing the .xyz files
folder_path = "data/roofNTNU/train_test_split/points_val_n"  # Replace with your actual path

# List all .xyz files in the folder
xyz_files = [f for f in os.listdir(folder_path) if f.endswith(".txt")]

# Count the number of files
num_files = len(xyz_files)

# List to store the number of lines in each file
line_counts = []

# Read each file and count the lines
for file_name in xyz_files:
    file_path = os.path.join(folder_path, file_name)
    with open(file_path, 'r') as file:
        num_lines = sum(1 for _ in file)  # Count lines
        line_counts.append(num_lines)

# Compute statistics
max_lines = np.max(line_counts) if line_counts else 0
min_lines = np.min(line_counts) if line_counts else 0
avg_lines = np.mean(line_counts) if line_counts else 0
median_lines = np.median(line_counts) if line_counts else 0
std_dev_lines = np.std(line_counts) if line_counts else 0

# Print summary results
print(f"Number of .txt files: {num_files}")
print(f"Max lines in a file: {max_lines}")
print(f"Min lines in a file: {min_lines}")
print(f"Average lines per file: {avg_lines:.2f}")
print(f"Median lines per file: {median_lines}")
print(f"Standard deviation of lines: {std_dev_lines:.2f}")