In [17]:
# AUTO-SET PROJECT ROOT (contains 'analysis/') – ONE CELL, NO FILES CREATED
import os
from pathlib import Path

# Start from current directory
current = Path.cwd()
original = current

# Walk up until we find a folder that has 'analysis' as a subfolder
while current != current.parent:  # stop at filesystem root
    if (current / "analysis").is_dir():
        break
    current = current.parent
else:
    # If not found, stay where we are and warn
    print("Warning: 'analysis/' folder not found in any parent. Staying in:", original)
    current = original

# Change to the root (the folder *containing* analysis/)
os.chdir(current)
print(f"Working directory set to: {os.getcwd()}")

# Optional: quick sanity check
if (Path.cwd() / "analysis").is_dir():
    print("Success: 'analysis/' directory is ready!")
else:
    print("Warning: Still no 'analysis/' folder. You may need to create it.")

Working directory set to: c:\Users\HP\Desktop\Projects\navigation\9-daniel-cremers-random-motion-collect
Success: 'analysis/' directory is ready!


In [18]:
# analysis/run_analysis.py
import os
import numpy as np
from analysis import (
    load_parquet, load_embeddings, prepare_data, compute_pca_rgb,
    compute_features, align_arrays, plot_embeddings_on_map,
    plot_oriented_embeddings, plot_random_cluster, plot_correlations
)

# ================== CONFIG ==================
PARQUET_PATH = "output/2025-11-14-213949_random_walk_10k/merged_100.parquet"
EMBEDDINGS_PATH = (
    "output/2025-11-14-213949_random_walk_10k/train_2025-11-14-215407/final_embeddings.npy"
)
SAVE_DIR = "output/2025-11-14-213949_random_walk_10k/train_2025-11-14-215407/analysis_images"
os.makedirs(SAVE_DIR, exist_ok=True)

# ================== LOAD & PREPARE ==================
df = load_parquet(PARQUET_PATH)
embeddings = load_embeddings(EMBEDDINGS_PATH)

# 1. Bring df and embeddings to the same length
x, y, theta, lidar_data = prepare_data(df, embeddings)

# 2. PCA → RGB (still matches embeddings length)
rgb = compute_pca_rgb(embeddings)

# 3. Hand-crafted features
dist_to_wall, openness, turn_intensity = compute_features(lidar_data, theta)

# ------------------------------------------------------------------
# 4. ALIGN *EVERY* array that will ever be indexed together
# ------------------------------------------------------------------
arrays_to_align = (
    x, y, theta,               # pose
    dist_to_wall, openness, turn_intensity,   # features
    embeddings, rgb          # learned representation
)

aligned = align_arrays(*arrays_to_align)

x, y, theta, dist_to_wall, openness, turn_intensity, embeddings, rgb = aligned

# ------------------------------------------------------------------
# 5. DEBUG: print lengths (you will see them all equal)
# ------------------------------------------------------------------
print("\n=== ARRAY LENGTHS AFTER ALIGNMENT ===")
for name, arr in [
    ("x", x), ("y", y), ("theta", theta),
    ("dist_to_wall", dist_to_wall), ("openness", openness), ("turn_intensity", turn_intensity),
    ("embeddings", embeddings), ("rgb", rgb),
]:
    print(f"{name:>15}: {len(arr)}")

# ------------------------------------------------------------------
# 6. OPTIONAL: sanity-check that we really have the same length
# ------------------------------------------------------------------
assert len({len(a) for a in aligned}) == 1, "Not all arrays have the same length!"

# ================== PLOTS ==================
max_points = 100_000          # feel free to lower for faster previews
map_size   = 40               # size of the hand-drawn map (see analysis/plotting.py)

# 1. All embeddings
plot_embeddings_on_map(
    x, y, rgb,
    max_points=max_points, map_size=map_size,
    save_path=os.path.join(SAVE_DIR, "1_all_embeddings.pdf")
)

# 2. Oriented subset (180° ±10°)
plot_oriented_embeddings(
    x, y, theta, rgb,
    target_orientation=180, tolerance=10, map_size=map_size,
    save_path=os.path.join(SAVE_DIR, "2_oriented_180.pdf")
)

# 3. Random K-means cluster
plot_random_cluster(
    x, y, embeddings,
    n_clusters=50, map_size=map_size,
    save_path=os.path.join(SAVE_DIR, "3_random_cluster.pdf")
)

# 4. Correlation bar-charts
features = {
    "Distance to Wall": dist_to_wall,
    "Openness": openness
}
plot_correlations(
    features, rgb,
    save_path=os.path.join(SAVE_DIR, "4_correlations.pdf")
)

print("\nAll analysis complete! Check:", SAVE_DIR)

[INFO] Loading output/2025-11-14-213949_random_walk_10k/merged_100.parquet ...
[INFO] Total columns: 105
[INFO] Loaded embeddings: (9998, 64)

=== ARRAY LENGTHS AFTER ALIGNMENT ===
              x: 9998
              y: 9998
          theta: 9998
   dist_to_wall: 9998
       openness: 9998
 turn_intensity: 9998
     embeddings: 9998
            rgb: 9998
[INFO] Subsampling to 100000 points for plotting... len=9998
[INFO] Saved to output/2025-11-14-213949_random_walk_10k/train_2025-11-14-215407/analysis_images\1_all_embeddings.pdf
[INFO] Saved to output/2025-11-14-213949_random_walk_10k/train_2025-11-14-215407/analysis_images\2_oriented_180.pdf


  plt.tight_layout()


[INFO] Saved to output/2025-11-14-213949_random_walk_10k/train_2025-11-14-215407/analysis_images\3_random_cluster.pdf
[INFO] Saved to output/2025-11-14-213949_random_walk_10k/train_2025-11-14-215407/analysis_images\4_correlations.pdf

All analysis complete! Check: output/2025-11-14-213949_random_walk_10k/train_2025-11-14-215407/analysis_images
