In [None]:
"""
Just to get a bit off track here, can we try the following: 

Check if we have sequences like 
* View 1 -> Action 1 -> View 2 -> Action 2 -> View 3 
Where View 1 and View 3 are very similar... 

Write a script that does this and sort of lists down the actions in question
"""

In [2]:
import numpy as np
from scipy.spatial.distance import cosine
import os
import glob
from datetime import datetime

try:
    from tqdm import tqdm
    TQDM_AVAILABLE = True
except ImportError:
    TQDM_AVAILABLE = False
    print("tqdm not installed. Progress bars disabled. Install with 'pip install tqdm'.")

# Configuration
output_dir = r"C:\Users\HP\Desktop\DanielCremers2-RandomMotion\output\20250825-221040"
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")  # New timestamp for output
result_dir = os.path.join(output_dir, timestamp)
os.makedirs(result_dir, exist_ok=True)
similarity_threshold = 0.95  # Cosine similarity threshold for "very similar"
ray_length = 200  # From robot_control.py
num_rays = 100  # From robot_control.py

def load_data(base_dir):
    """Load all lidar and action .npy files, sorted by step number."""
    # Look for files directly in base_dir, not subdirectories
    lidar_files = sorted(glob.glob(os.path.join(base_dir, "lidar_*.npy")) +
                         glob.glob(os.path.join(base_dir, "lidar_final_*.npy")))
    action_files = sorted(glob.glob(os.path.join(base_dir, "actions_*.npy")) +
                          glob.glob(os.path.join(base_dir, "actions_final_*.npy")))
    path_files = sorted(glob.glob(os.path.join(base_dir, "path_*.npy")) +
                        glob.glob(os.path.join(base_dir, "path_final_*.npy")))
    
    # Debugging: Print found files
    print(f"Found {len(lidar_files)} lidar files")
    print(f"Found {len(action_files)} action files")
    print(f"Found {len(path_files)} path files")

    if not lidar_files or not action_files or not path_files:
        raise FileNotFoundError("No matching .npy files found in " + base_dir)
    
    lidar_data = []
    action_data = []
    step_counts = []
    
    # Load files with progress bar
    iterator = zip(lidar_files, action_files)
    if TQDM_AVAILABLE:
        iterator = tqdm(iterator, total=len(lidar_files), desc="Loading data files")
    
    for lidar_file, action_file in iterator:
        try:
            lidars = np.load(lidar_file, allow_pickle=True)
            actions = np.load(action_file, allow_pickle=True)
            lidar_data.append(lidars)
            action_data.append(actions)
            step_counts.append(len(actions))
        except Exception as e:
            print(f"Error loading {lidar_file} or {action_file}: {e}")
    
    # Concatenate data
    if not lidar_data or not action_data:
        raise ValueError("No data loaded from files")
    
    all_lidars = np.concatenate(lidar_data)
    all_actions = np.concatenate(action_data)
    return all_lidars, all_actions, step_counts, path_files

def lidar_to_distances(lidar_scan, robot_x, robot_y):
    """Convert lidar intersections to distance vector, None as ray_length."""
    distances = np.ones(num_rays) * ray_length  # Default to max distance
    for i, intersection in enumerate(lidar_scan):
        if intersection is not None:
            dist = np.sqrt((intersection[0] - robot_x) ** 2 + (intersection[1] - robot_y) ** 2)
            distances[i] = min(dist, ray_length)  # Cap at ray_length
    return distances

def find_similar_sequences(lidar_data, action_data, path_data, threshold):
    """Find sequences where View 1 and View 3 are similar, return action pairs."""
    results = []
    # Iterate with progress bar
    iterator = range(len(lidar_data) - 2)
    if TQDM_AVAILABLE:
        iterator = tqdm(iterator, total=len(lidar_data) - 2, desc="Analyzing sequences")
    
    for i in iterator:
        view1 = lidar_data[i]
        view2 = lidar_data[i + 1]
        view3 = lidar_data[i + 2]
        action1 = action_data[i]
        action2 = action_data[i + 1]
        
        # Get robot positions for distance calculations
        robot_x1, robot_y1 = path_data[i][:2]
        robot_x3, robot_y3 = path_data[i + 2][:2]
        
        # Convert lidar scans to distance vectors
        dist1 = lidar_to_distances(view1, robot_x1, robot_y1)
        dist3 = lidar_to_distances(view3, robot_x3, robot_y3)
        
        # Compute cosine similarity (1 - cosine distance for similarity)
        try:
            similarity = 1 - cosine(dist1, dist3)
            if similarity > threshold:
                results.append((i, action1, action2, similarity))
        except Exception as e:
            print(f"Error computing similarity at step {i}: {e}")
    
    return results

def main():
    # Load data
    try:
        all_lidars, all_actions, step_counts, path_files = load_data(output_dir)
    except Exception as e:
        print(f"Failed to load data: {e}")
        return
    
    # Load path data with progress bar
    try:
        if TQDM_AVAILABLE:
            path_data = np.concatenate([np.load(f, allow_pickle=True) for f in tqdm(path_files, desc="Loading path files")])
        else:
            path_data = np.concatenate([np.load(f, allow_pickle=True) for f in path_files])
    except Exception as e:
        print(f"Error loading path files: {e}")
        return
    
    # Find sequences
    results = find_similar_sequences(all_lidars, all_actions, path_data, similarity_threshold)
    
    # Save and print results
    output_file = os.path.join(result_dir, "similar_view_sequences.txt")
    with open(output_file, 'w') as f:
        f.write(f"Similar View Sequences (threshold: {similarity_threshold})\n")
        f.write("Step | Action 1 | Action 2 | Similarity\n")
        f.write("-" * 50 + "\n")
        for step, action1, action2, sim in results:
            line = f"{step:6d} | {action1:8s} | {action2:8s} | {sim:.4f}\n"
            f.write(line)
            print(line.strip())
    
    print(f"\nFound {len(results)} sequences. Results saved to {output_file}")

if __name__ == "__main__":
    main()

Found 376 lidar files
Found 376 action files
Found 376 path files


Loading data files: 100%|██████████| 376/376 [00:14<00:00, 26.43it/s]
Loading path files: 100%|██████████| 376/376 [00:05<00:00, 69.61it/s]
Analyzing sequences: 100%|██████████| 37598/37598 [00:26<00:00, 1431.25it/s]


2 | right    | back     | 1.0000
4 | back     | left     | 1.0000
6 | right    | right    | 1.0000
10 | right    | forward  | 0.9999
15 | left     | left     | 0.9521
16 | left     | forward  | 0.9676
17 | forward  | left     | 1.0000
18 | left     | right    | 0.9676
19 | right    | back     | 1.0000
20 | back     | forward  | 1.0000
21 | forward  | back     | 1.0000
22 | back     | forward  | 0.9521
27 | left     | forward  | 1.0000
28 | forward  | back     | 0.9879
32 | right    | forward  | 1.0000
35 | right    | back     | 1.0000
36 | back     | forward  | 0.9938
37 | forward  | forward  | 1.0000
42 | left     | back     | 0.9943
47 | left     | back     | 0.9613
48 | back     | left     | 0.9635
56 | forward  | right    | 1.0000
62 | left     | left     | 0.9919
63 | left     | back     | 0.9994
64 | back     | back     | 1.0000
65 | back     | forward  | 0.9994
66 | forward  | forward  | 0.9991
67 | forward  | forward  | 0.9930
68 | forward  | left     | 0.9739
69 | left     | l