In [1]:
# generate_data.py
import numpy as np

def generate_branch_history(num_iterations=200):
    """Generates a history based on our simple scenario."""
    history = []
    # Simulate initial random history (doesn't matter much for correlation)
    history.extend(np.random.randint(0, 2, size=5).tolist())

    for i in range(5, num_iterations):
        loop_pattern = (i % 4 == 0) # True if multiple of 4
        correlation_pattern = (history[-1] == 0 and history[-2] == 0) # True if last two were NT

        # Branch logic: Usually Taken (1), unless EITHER pattern is met
        if loop_pattern or correlation_pattern:
            outcome = 0 # Not Taken
        else:
            outcome = 1 # Taken
        history.append(outcome)
    return np.array(history)

def create_windows(data, history_window_size):
    """Creates sliding windows for sequence prediction."""
    X, y = [], []
    if len(data) <= history_window_size:
        print("Error: Data length must be greater than history_window_size")
        return np.array(X), np.array(y)
        
    for i in range(history_window_size, len(data)):
        X.append(data[i-history_window_size:i])
        y.append(data[i])
    return np.array(X), np.array(y)

if __name__ == "__main__":
    HISTORY_WINDOW_SIZE = 16 # How many past branches BranchNet looks at
    NUM_SAMPLES = 500     # Total length of branch history to generate

    print(f"Generating {NUM_SAMPLES} branch outcomes...")
    full_history = generate_branch_history(NUM_SAMPLES)

    print(f"Creating training windows (size {HISTORY_WINDOW_SIZE})...")
    X, y = create_windows(full_history, HISTORY_WINDOW_SIZE)

    # Reshape X for CNN/LSTM input: (num_samples, timesteps, features)
    # Here, features = 1 (just the outcome)
    X = X.reshape(X.shape[0], X.shape[1], 1)

    print(f"Generated X shape: {X.shape}") # Should be (NUM_SAMPLES - HISTORY_WINDOW_SIZE, HISTORY_WINDOW_SIZE, 1)
    print(f"Generated y shape: {y.shape}") # Should be (NUM_SAMPLES - HISTORY_WINDOW_SIZE,)

    # Save the generated data
    np.savez('branchnet_training_data.npz', X=X, y=y, full_history=full_history)
    print("Data saved to branchnet_training_data.npz")

Generating 500 branch outcomes...
Creating training windows (size 16)...
Generated X shape: (484, 16, 1)
Generated y shape: (484,)
Data saved to branchnet_training_data.npz
