In [None]:
import re
import matplotlib.pyplot as plt
import numpy as np

def extract_times_from_file(file_path):
    """
    Reads a file and extracts timing data from lines containing 'times:'.

    Args:
        file_path (str): Path to the file containing log data.

    Returns:
        list: A list of lists where each inner list contains the 4 timing components.
    """
    times_data = []
    with open(file_path, 'r') as file:
        for line in file:
            # Look for 'times:' and extract the numbers inside the brackets
            match = re.search(r'times: \[(.*?)\]', line)
            if match:
                # Convert extracted numbers into a list of floats
                times = list(map(float, match.group(1).split(',')))
                times_data.append(times)
    return times_data

def smooth(data, window_size=5):
    """
    Applies a moving average to smooth the data.

    Args:
        data (list): List of data points to smooth.
        window_size (int): Size of the moving window.

    Returns:
        list: Smoothed data.
    """
    return np.convolve(data, np.ones(window_size) / window_size, mode='valid')

def plot_times(times_data, smooth_window=5):
    """
    Plots the total times and individual components from the timing data.

    Args:
        times_data (list): List of lists containing timing components.
        smooth_window (int): Window size for smoothing.
    """
    # Compute total times and separate components
    batch_indices = list(range(len(times_data)))
    total_times = [sum(times) for times in times_data]
    time_components = list(zip(*times_data))  # Transpose to get each component

    # Smooth data
    smoothed_total_times = smooth(total_times, window_size=smooth_window)
    smoothed_components = [smooth(component, window_size=smooth_window) for component in time_components]

    # Adjust indices for smoothed data
    smoothed_indices = batch_indices[:len(smoothed_total_times)]

    # Plot total times
    plt.figure(figsize=(10, 6))
    plt.plot(smoothed_indices, smoothed_total_times, label="Smoothed Total Time", color="blue")
    plt.xlabel("Batch Index")
    plt.ylabel("Total Time (s)")
    plt.title("Smoothed Total Time per Batch")
    plt.legend()
    plt.grid()

    # Plot individual components
    plt.figure(figsize=(10, 6))
    for i, smoothed_component in enumerate(smoothed_components):
        plt.plot(smoothed_indices, smoothed_component, label=f"Smoothed Component {i+1}")
    plt.xlabel("Batch Index")
    plt.ylabel("Time (s)")
    plt.title("Smoothed Time Components per Batch")
    plt.legend()
    plt.grid()

    plt.show()

# Example usage
file_path = "/home/albert/Desktop/times"  # Replace with your file path
times_data = extract_times_from_file(file_path)
if times_data:
    plot_times(times_data, smooth_window=10)  # Adjust smoothing window as needed
else:
    print("No timing data found in the file.")


In [None]:
# from berkeley_humanoid.scripts.rsl_rl.dataset import ThreadSafeDict
import sys
sys.path.append('/home/albert/github/embodiment-scaling-law-sim2real/scripts/rsl_rl')
from dataset import ThreadSafeDict

In [None]:
import random
import time


def test_operation_speed():
    """
    Tests whether the speed of 'put' and 'get' operations decays after a large number of operations.
    """
    print("Starting operation speed test...")
    max_size = 1  # Maximum number of items in cache
    cache = ThreadSafeDict(max_size=max_size)
    num_operations = int(1e6)  # Total number of operations
    array_size = (100, 100)  # Size of NumPy arrays to use in 'put'
    operation_times = {"put": [], "get": []}  # Record times for each operation

    # Perform random 'put' and 'get' operations
    for i in range(num_operations):
        operation = random.choice(["put", "get"])  # Randomly select operation
        key = random.randint(0, max_size - 1)  # Random key within range

        if operation == "put":
            large_array = np.random.rand(*array_size)  # Create a small NumPy array
            start_time = time.time()
            cache.put(key, large_array)  # Perform 'put'
            operation_times["put"].append(time.time() - start_time)
        elif operation == "get":
            start_time = time.time()
            _ = cache.get(key)  # Perform 'get'
            operation_times["get"].append(time.time() - start_time)

        # Print progress every 10,000 iterations
        if i % 100_000 == 0 and i > 0:
            print(f"Completed {i} operations...")

    # Compute average times for 'put' and 'get'
    avg_put_time = sum(operation_times["put"]) / len(operation_times["put"])
    avg_get_time = sum(operation_times["get"]) / len(operation_times["get"])

    print(f"Average 'put' time: {avg_put_time:.6f} seconds")
    print(f"Average 'get' time: {avg_get_time:.6f} seconds")

    # Optional: Return recorded times for further analysis
    return operation_times


if __name__ == "__main__":
    operation_times = test_operation_speed()

    # Optionally, you can plot the trends of 'put' and 'get' times
    import matplotlib.pyplot as plt

    # Plot 'put' and 'get' times
    plt.figure(figsize=(12, 6))
    plt.plot(operation_times["put"], label="Put Times", alpha=0.7)
    plt.plot(operation_times["get"], label="Get Times", alpha=0.7)
    plt.xlabel("Operation Index")
    plt.ylabel("Time (seconds)")
    plt.title("Operation Times for 'put' and 'get'")
    plt.legend()
    plt.grid()
    plt.show()
