In [None]:
#Required: https://youtu.be/IDxrMbXPVTA?si=PHfGry-HQj__3Xne
# https://ngrok.com/

In [None]:
# Install required packages
!pip install ipywidgets optuna tensorboard pyngrok watchdog


In [None]:
import ipywidgets as widgets
from IPython.display import display, HTML, clear_output
import logging

# Set up logging
logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)

# Global state
state = {
    'setup_complete': False,
    'optimization_complete': False,
    'training_complete': False,
    'analysis_complete': False
}

logger.debug("Initial imports and setup complete.")

In [None]:

# Import necessary libraries
import ipywidgets as widgets
from IPython.display import display, HTML, clear_output
import os
import json
import subprocess
import time
from datetime import datetime
import logging

# Set up logging
logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)

# Debug statement
logger.debug("Environment setup complete. Libraries imported successfully.")



In [None]:
# Create widgets for user-defined parameters
dev_mode = widgets.Checkbox(value=False, description='Development Mode')
ngrok_auth_token = widgets.Text(value="YOUR_NGROK_AUTH_TOKEN", description='Ngrok Auth Token')
use_best_params = widgets.Checkbox(value=True, description='Use Best Parameters')
perform_hyperparameter_tuning = widgets.Checkbox(value=True, description='Perform Hyperparameter Tuning')
n_trials = widgets.IntText(value=100, description='Number of Trials')

# Create widgets for manual parameters
manual_params = {
    "n_embd": widgets.IntText(value=26, description='n_embd'),
    "n_head": widgets.IntText(value=2, description='n_head'),
    "n_layer": widgets.IntText(value=5, description='n_layer'),
    "batch_size": widgets.IntText(value=13, description='batch_size'),
    "learning_rate": widgets.FloatText(value=0.005936106784234055, description='learning_rate'),
    "max_epochs": widgets.IntText(value=50, description='max_epochs')
}

# Create widgets for hyperparameter ranges
hp_ranges = {
    "n_embd": (widgets.IntText(value=1, description='Min'), widgets.IntText(value=1024, description='Max')),
    "n_head": (widgets.IntText(value=2, description='Min'), widgets.IntText(value=16, description='Max')),
    "n_layer": (widgets.IntText(value=2, description='Min'), widgets.IntText(value=16, description='Max')),
    "batch_size": (widgets.IntText(value=1, description='Min'), widgets.IntText(value=40, description='Max')),
    "learning_rate": (widgets.FloatText(value=1e-4, description='Min'), widgets.FloatText(value=1e-1, description='Max')),
    "max_epochs": (widgets.IntText(value=15, description='Min'), widgets.IntText(value=40, description='Max')),
    "n_head_exp": (widgets.IntText(value=1, description='Min'), widgets.IntText(value=10, description='Max')),
    "n_embd_multiplier": (widgets.IntText(value=1, description='Min'), widgets.IntText(value=1, description='Max'))
}

# Debug statement
logger.debug("Widgets created successfully.")

# Function to update config based on widget values
def update_config():
    config = {
        "dev_mode": dev_mode.value,
        "ngrok_auth_token": ngrok_auth_token.value,
        "use_best_params": use_best_params.value,
        "perform_hyperparameter_tuning": perform_hyperparameter_tuning.value,
        "n_trials": n_trials.value,
        "manual_params": {k: v.value for k, v in manual_params.items()},
        "hp_ranges": {k: (v[0].value, v[1].value) for k, v in hp_ranges.items()}
    }
    logger.debug(f"Config updated: {json.dumps(config, indent=2)}")
    return config



In [None]:
tab_titles = ['Setup', 'Hyperparameter Optimization', 'Model Training', 'Result Analysis']
tabs = widgets.Tab()
tabs.children = [widgets.Output() for _ in range(len(tab_titles))]
for i, title in enumerate(tab_titles):
    tabs.set_title(i, title)


In [None]:
# Setup tab
with tabs.children[0]:
    setup_button = widgets.Button(description="Setup Environment")
    setup_output = widgets.Output()
    display(setup_button, setup_output)

    def setup_environment(b):
        with setup_output:
            clear_output()
            config = update_config()
            logger.debug("Starting environment setup...")
            # Add your existing setup code here, using config values instead of global variables
            print("Setting up environment...")
            # Example: Print the configuration
            print(json.dumps(config, indent=2))
            # Add more of your setup code here
            logger.debug("Environment setup completed.")

    setup_button.on_click(setup_environment)

# Optimization tab
with tabs.children[1]:
    optimize_button = widgets.Button(description="Start Optimization")
    optimize_output = widgets.Output()
    display(optimize_button, optimize_output)

    def start_optimization(b):
        with optimize_output:
            clear_output()
            config = update_config()
            logger.debug("Starting hyperparameter optimization...")
            # Add your existing optimization code here, using config values
            print("Starting hyperparameter optimization...")
            # Example: Print optimization parameters
            print(f"Number of trials: {config['n_trials']}")
            print(f"Hyperparameter ranges: {json.dumps(config['hp_ranges'], indent=2)}")
            # Add more of your optimization code here
            logger.debug("Hyperparameter optimization completed.")

    optimize_button.on_click(start_optimization)

logger.debug("Setup and Optimization tabs populated with buttons and outputs.")

In [None]:
# Model Training tab
with tabs.children[2]:
    extract_button = widgets.Button(description="Extract Best Parameters")
    train_button = widgets.Button(description="Start Training")
    training_output = widgets.Output()
    display(extract_button, train_button, training_output)

    def extract_best_params(b):
        with training_output:
            clear_output()
            config = update_config()
            logger.debug("Extracting best parameters...")
            # Add your existing code for extracting best parameters
            print("Extracting best parameters...")
            # Example: Print a mock result
            best_params = {
                "n_embd": 128,
                "n_head": 4,
                "n_layer": 6,
                "batch_size": 32,
                "learning_rate": 0.001
            }
            print(f"Best parameters: {json.dumps(best_params, indent=2)}")
            # Add more of your extraction code here
            logger.debug("Best parameters extracted.")

    def start_training(b):
        with training_output:
            clear_output()
            config = update_config()
            logger.debug("Starting model training...")
            # Add your existing training code here, using config values
            print("Starting model training...")
            # Example: Print training configuration
            print(f"Training configuration: {json.dumps(config['manual_params'], indent=2)}")
            # Add more of your training code here
            logger.debug("Model training completed.")

    extract_button.on_click(extract_best_params)
    train_button.on_click(start_training)

logger.debug("Model Training tab populated with buttons and output.")

In [None]:
import matplotlib.pyplot as plt

# Result Analysis tab
with tabs.children[3]:
    analyze_button = widgets.Button(description="Analyze Results")
    plot_dropdown = widgets.Dropdown(
        options=['Test Accuracy', 'Test Diff Accuracy', 'Complete Task Accuracy', 'Task Accuracy Heatmap'],
        description='Plot:'
    )
    plot_output = widgets.Output()
    display(analyze_button, plot_dropdown, plot_output)

    def analyze_results(b):
        with plot_output:
            clear_output()
            logger.debug("Analyzing results...")
            # Add your existing analysis code here
            print("Analyzing results...")
            # Example: Generate mock data
            import numpy as np
            epochs = np.arange(1, 51)
            accuracy = np.random.rand(50) * 0.5 + 0.5
            plt.figure(figsize=(10, 6))
            plt.plot(epochs, accuracy)
            plt.title('Mock Test Accuracy Over Epochs')
            plt.xlabel('Epochs')
            plt.ylabel('Accuracy')
            plt.show()
            logger.debug("Results analysis completed.")

    def update_plot(change):
        with plot_output:
            clear_output()
            logger.debug(f"Updating plot to {change.new}")
            if change.new == 'Test Accuracy':
                # Plot Test Accuracy
                print("Plotting Test Accuracy")
                # Add your Test Accuracy plotting code here
            elif change.new == 'Test Diff Accuracy':
                # Plot Test Diff Accuracy
                print("Plotting Test Diff Accuracy")
                # Add your Test Diff Accuracy plotting code here
            elif change.new == 'Complete Task Accuracy':
                # Plot Complete Task Accuracy
                print("Plotting Complete Task Accuracy")
                # Add your Complete Task Accuracy plotting code here
            elif change.new == 'Task Accuracy Heatmap':
                # Plot Task Accuracy Heatmap
                print("Plotting Task Accuracy Heatmap")
                # Add your Task Accuracy Heatmap plotting code here
            logger.debug("Plot updated successfully.")

    analyze_button.on_click(analyze_results)
    plot_dropdown.observe(update_plot, names='value')

logger.debug("Result Analysis tab populated with buttons, dropdown, and output.")

In [None]:
def display_external_link(url, name):
    link_html = f'<a href="{url}" target="_blank" style="font-size: 16px; padding: 10px; background-color: #4CAF50; color: white; text-decoration: none; border-radius: 5px;">{name}</a>'
    display(HTML(link_html))
    logger.debug(f"External link displayed: {name} - {url}")

# Example usage (uncomment when you have actual URLs)
# display_external_link("http://localhost:6006", "Open TensorBoard")
# display_external_link("http://localhost:8081", "Open Optuna Dashboard")

logger.debug("External link display function created.")

In [None]:
def start_background_processes(b):
    logger.debug("Starting background processes...")
    # Your existing code for starting background processes
    print("Background processes started.")
    # Add your background process code here
    logger.debug("Background processes initiated successfully.")



In [None]:
background_button = widgets.Button(description="Start Background Processes")
background_button.on_click(start_background_processes)
display(background_button)

logger.debug("Background processes button created and displayed.")
# Display widgets
display(dev_mode, ngrok_auth_token, use_best_params, perform_hyperparameter_tuning, n_trials)
display(widgets.Label('Manual Parameters:'))
display(widgets.VBox([v for v in manual_params.values()]))
display(widgets.Label('Hyperparameter Ranges:'))
for k, v in hp_ranges.items():
    display(widgets.HBox([widgets.Label(k), v[0], v[1]]))

logger.debug("Widgets displayed successfully.")
display(tabs)

Button(description='Start Background Processes', style=ButtonStyle())

Checkbox(value=False, description='Development Mode')

Text(value='YOUR_NGROK_AUTH_TOKEN', description='Ngrok Auth Token')

Checkbox(value=True, description='Use Best Parameters')

Checkbox(value=True, description='Perform Hyperparameter Tuning')

IntText(value=100, description='Number of Trials')

Label(value='Manual Parameters:')

VBox(children=(IntText(value=26, description='n_embd'), IntText(value=2, description='n_head'), IntText(value=…

Label(value='Hyperparameter Ranges:')

HBox(children=(Label(value='n_embd'), IntText(value=1, description='Min'), IntText(value=1024, description='Ma…

HBox(children=(Label(value='n_head'), IntText(value=2, description='Min'), IntText(value=16, description='Max'…

HBox(children=(Label(value='n_layer'), IntText(value=2, description='Min'), IntText(value=16, description='Max…

HBox(children=(Label(value='batch_size'), IntText(value=1, description='Min'), IntText(value=40, description='…

HBox(children=(Label(value='learning_rate'), FloatText(value=0.0001, description='Min'), FloatText(value=0.1, …

HBox(children=(Label(value='max_epochs'), IntText(value=15, description='Min'), IntText(value=40, description=…

HBox(children=(Label(value='n_head_exp'), IntText(value=1, description='Min'), IntText(value=10, description='…

HBox(children=(Label(value='n_embd_multiplier'), IntText(value=1, description='Min'), IntText(value=1, descrip…

Tab(children=(Output(), Output(), Output(), Output()), _titles={'0': 'Setup', '1': 'Hyperparameter Optimizatio…

Background processes started.


# STUFF YOU CHANGE

In [None]:
# ========== User-Defined Parameters (Top of Notebook) ==========

dev_mode = False  # Set to True for development mode (Miguel's coding machine)


# Replace with your actual authtoken
ngrok_auth_token = "2NCEuxuUBMj6zsdTokQHkYJ4AZz_3E7e2pyW87otgFg3UdSC3"

# 1. Base directory for storing date-based experiment result folders.
import os

# Define the base directory for the arc-neural-reasoning-model
arc_model_dir = "/content/arc-neural-reasoning-model/"
parent_dir = "/content/drive/MyDrive/ArcGPT/"
#parent_dir = "/content/"
print(f"Parent Directory: {parent_dir}")
print(f"ARC Model Directory: {arc_model_dir}")

# 2. Boolean flag to choose between best hyperparameters from previous experiments (True) or manual parameters (False).
use_best_params = True  # Set to False to use manual parameters
perform_hyperparameter_tuning = True  # Set to True to perform hyperparameter tuning

# 3. Dictionary of manually set hyperparameters used when use_best_params is False.
#    Includes model architecture and training settings.
manual_params = {
    "n_embd": 2*13,     # Embedding dimension
    "n_head": 2**1,       # Number of attention heads
    "n_layer": 5,     # Number of transformer layers
    "batch_size": 13,  # Batch size for training
    "learning_rate": 0.005936106784234055,  # Learning rate
    "max_epochs": 50   # Maximum number of epochs for training
}

# Hyperparameter tuning search space settings
# 4. Number of Optuna trials for hyperparameter tuning.
#    More trials can lead to better optimization but increase computation time.
n_trials = 100

# 5. Range for embedding dimension in hyperparameter search space.
n_embd_min, n_embd_max = 1, 1024

# 6. Range for number of attention heads in transformer model during tuning.
n_head_min, n_head_max = 2, 16

# 7. Range for number of transformer layers in model architecture during optimization.
n_layer_min, n_layer_max = 2, 16

# 8. Range of batch sizes to explore. Larger batches can speed up training but may require more memory.
batch_size_min, batch_size_max = 1, 40

# 9. Range for learning rate in hyperparameter search space. Crucial for model convergence and performance.
learning_rate_min, learning_rate_max = 1e-4, 1e-1

# 10. Range for number of training epochs to consider during hyperparameter optimization.
max_epochs_min, max_epochs_max = 15, 40

# 11. Range for n_head exponent in hyperparameter search space.
n_head_exp_min, n_head_exp_max = 1, 10

# 12. Range for n_embd multiplier in hyperparameter search space.
n_embd_multiplier_min, n_embd_multiplier_max = 1, 1

# ========== End of User-Defined Parameters ==========

# These parameters allow flexible experimentation with different model configurations and training settings,
# enabling comprehensive exploration of the hyperparameter space for optimal model performance.

# Validate manual parameters
def validate_manual_params(params):
    assert params["n_embd"] % params["n_head"] == 0, f"n_embd ({params['n_embd']}) must be divisible by n_head ({params['n_head']})"
    assert params["n_embd"] >= params["n_head"], f"n_embd ({params['n_embd']}) must be greater than or equal to n_head ({params['n_head']})"
    assert params["n_layer"] > 0, f"n_layer ({params['n_layer']}) must be positive"
    print("Manual parameters validated successfully")


# Validate the manual parameters
validate_manual_params(manual_params)

# Print configurations for verification
print("Current Configuration:")
print(f"Parent Directory: {parent_dir}")
print(f"Use Best Parameters: {use_best_params}")
print(f"Manual Parameters: {manual_params}")
print(f"Number of Optuna Trials: {n_trials}")
print("Hyperparameter Ranges:")
print(f"  n_embd: {n_embd_min} to {n_embd_max}")
print(f"  n_head: {n_head_min} to {n_head_max}")
print(f"  n_layer: {n_layer_min} to {n_layer_max}")
print(f"  batch_size: {batch_size_min} to {batch_size_max}")
print(f"  learning_rate: {learning_rate_min} to {learning_rate_max}")
print(f"  max_epochs: {max_epochs_min} to {max_epochs_max}")

# Save configuration
import json

def save_config(config, filename="config.json"):
    full_path = os.path.join(parent_dir, filename)
    os.makedirs(parent_dir, exist_ok=True)
    with open(full_path, 'w') as f:
        json.dump(config, f, indent=4)

config = {
    "parent_dir": parent_dir,
    "use_best_params": use_best_params,
    "manual_params": manual_params,
    "tuning": {
        "n_trials": n_trials,
        "n_embd": (n_embd_min, n_embd_max),
        "n_head": (n_head_min, n_head_max),
        "n_layer": (n_layer_min, n_layer_max),
        "batch_size": (batch_size_min, batch_size_max),
        "learning_rate": (learning_rate_min, learning_rate_max),
        "max_epochs": (max_epochs_min, max_epochs_max)
    }
}

# Check GPU availability
import torch
print("\nGPU Available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("GPU Name:", torch.cuda.get_device_name(0))

# CODE

### 1. Set up the Colab environment:

In [None]:
if dev_mode != True:
    print("Setting up Colab environment...")
    try:
        from google.colab import drive
        drive.mount('/content/drive')
        save_config(config)
        print(f"Configuration saved to config.json")

    #general error
    except:
        print("Google Colab not detected. Skipping drive mount.")

    %cd /content/
    !rm -rf /content/arc-neural-reasoning-model/
    !git clone https://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@github.com/ImmortalDemonGod/arc-neural-reasoning-model.git
    !pip install -r /content/arc-neural-reasoning-model/gpt2_arc/requirements.txt
    !pip install optuna
    !pip install torchsummary
    # Install TensorBoard if not already installed
    !pip install tensorboard

    !pip install jupyterlab jupyterlab-optuna
    # Install the required packages
    !pip install optuna-dashboard pyngrok
    #!ngrok config add-authtoken 2NCEuxuUBMj6zsdTokQHkYJ4AZz_3E7e2pyW87otgFg3UdSC3
    !pip install tensorboard
    !pip install watchdog
    !pip install numpy
    !pip install --upgrade jax jaxlib torch pytorch_lightning
    #!pip uninstall tensorflow -y
    #!find . -type d -name "__pycache__" -exec rm -r {} +
    !rm -rf /tmp/libtpu_lockfile
    !rm -rf /content/arc-neural-reasoning-model/arc_sat_solver
    !rm -rf /content/arc-neural-reasoning-model/benchmark_results
    !rm -rf /content/arc-neural-reasoning-model/checkpoints
    !rm -rf /content/arc-neural-reasoning-model/tmp
    %cd /content/arc-neural-reasoning-model/
    !pip install -e .
else:
    print("Development mode is enabled. Using Miguel's coding machine.")
    arc_model_dir = "/workspaces/arc-neural-reasoning-model/"
    parent_dir = "/workspaces/arc-neural-reasoning-model/EXPERIMENTAL/"
    %cd /workspaces/arc-neural-reasoning-model/EXPERIMENTAL/
    save_config(config)
    print(f"Configuration saved to config.json")

# Setup ngrok for remote access (ensure you have your authtoken configured)
from pyngrok import ngrok
ngrok.set_auth_token(ngrok_auth_token)


Setting up Colab environment...
Mounted at /content/drive
Configuration saved to config.json
/content
Cloning into 'arc-neural-reasoning-model'...
remote: Enumerating objects: 6572, done.[K
remote: Counting objects: 100% (694/694), done.[K
remote: Compressing objects: 100% (219/219), done.[K
remote: Total 6572 (delta 531), reused 628 (delta 475), pack-reused 5878 (from 1)[K
Receiving objects: 100% (6572/6572), 6.48 MiB | 24.58 MiB/s, done.
Resolving deltas: 100% (4773/4773), done.
/content/arc-neural-reasoning-model
Obtaining file:///content/arc-neural-reasoning-model
  Installing build dependencies ... [?25l[?25hdone
  Checking if build backend supports build_editable ... [?25l[?25hdone
  Getting requirements to build editable ... [?25l[?25hdone
  Preparing editable metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: gpt2_arc
  Building editable for gpt2_arc (pyproject.toml) ... [?25l[?25hdone
  Created wheel for gpt2_arc: filename=gpt2_a

### 2. Run hyperparameter tuning (in the background) click the **links** to see the dashboards:

In [None]:
import os
from datetime import datetime

# Get the current date in YYYYMMDD format
current_date = datetime.now().strftime('%Y%m%d')


# Create a folder named after the current date
date_folder = os.path.join(parent_dir, current_date)
if not os.path.exists(date_folder):
    os.makedirs(date_folder)

# Change into the newly created folder
%cd {date_folder}

# Now all your operations will save to /content/YYYYMMDD

/content/drive/MyDrive/ArcGPT/20240928


In [None]:
import subprocess
import sys
# Function to start TensorBoard
def start_tensorboard(log_dir):
    print("Starting TensorBoard...")
    tensorboard_command = [
        sys.executable, "-m", "tensorboard.main", "--logdir", log_dir, "--port", "6006"
    ]
    return subprocess.Popen(tensorboard_command)

# Start TensorBoard
log_dir = os.path.join(date_folder, "runs")
os.makedirs(log_dir, exist_ok=True)
tensorboard_process = start_tensorboard(log_dir)
public_url_tb = ngrok.connect(6006)
print(f"TensorBoard ngrok tunnel is accessible at: {public_url_tb}")

Starting TensorBoard...


ERROR:pyngrok.process.ngrok:t=2024-09-28T20:59:09+0000 lvl=eror msg="failed to reconnect session" obj=tunnels.session err="authentication failed: Your account is limited to 1 simultaneous ngrok agent sessions.\nYou can run multiple simultaneous tunnels from a single agent session by defining the tunnels in your agent configuration file and starting them with the command `ngrok start --all`.\nRead more about the agent configuration file: https://ngrok.com/docs/secure-tunnels/ngrok-agent/reference/config\nYou can view your current agent sessions in the dashboard:\nhttps://dashboard.ngrok.com/tunnels/agents\r\n\r\nERR_NGROK_108\r\n"

ERROR:  authentication failed: Your account is limited to 1 simultaneous ngrok agent sessions.
ERROR:  You can run multiple simultaneous tunnels from a single agent session by defining the tunnels in your agent configuration file and starting them with the command `ngrok start --all`.
ERROR:  Read more about the agent configuration file: https://ngrok.com/doc

PyngrokNgrokError: The ngrok process errored on start: authentication failed: Your account is limited to 1 simultaneous ngrok agent sessions.\nYou can run multiple simultaneous tunnels from a single agent session by defining the tunnels in your agent configuration file and starting them with the command `ngrok start --all`.\nRead more about the agent configuration file: https://ngrok.com/docs/secure-tunnels/ngrok-agent/reference/config\nYou can view your current agent sessions in the dashboard:\nhttps://dashboard.ngrok.com/tunnels/agents\r\n\r\nERR_NGROK_108\r\n.

make sure to set the directory in your drive to save the files in

In [None]:
import subprocess

# Function to kill processes by name
def kill_process_by_name(process_name):
    try:
        # Find and kill the process using pkill (Linux/Unix/Mac) or taskkill (Windows)
        if os.name == "posix":  # For Unix-based systems
            subprocess.run(["pkill", "-f", process_name], check=True)
        elif os.name == "nt":  # For Windows systems
            subprocess.run(["taskkill", "/IM", process_name, "/F"], check=True)
        print(f"Successfully terminated any running {process_name} processes.")
    except subprocess.CalledProcessError:
        print(f"No {process_name} processes were found running.")



In [None]:
import os
import time
import subprocess
from pyngrok import ngrok
import sys

def kill_process_by_name(process_name):
    try:
        if os.name == "posix":  # For Unix-based systems
            subprocess.run(["pkill", "-f", process_name], check=True)
        elif os.name == "nt":  # For Windows systems
            subprocess.run(["taskkill", "/IM", process_name, "/F"], check=True)
        print(f"Successfully terminated any running {process_name} processes.")
    except subprocess.CalledProcessError:
        print(f"No {process_name} processes were found running.")

def start_ngrok_tunnel(port):
    try:
        public_url = ngrok.connect(port)
        return public_url
    except Exception as e:
        print(f"Error starting ngrok tunnel: {str(e)}")
        return None

def start_optuna_dashboard(storage_path):
    print("Starting Optuna dashboard...")
    optuna_command = [
        "optuna-dashboard", "--port", "8081", f"sqlite:///{storage_path}"
    ]
    return subprocess.Popen(optuna_command)

# Function to start TensorBoard
def start_tensorboard(log_dir):
    print("Starting TensorBoard...")
    tensorboard_command = [
        sys.executable, "-m", "tensorboard.main", "--logdir", log_dir, "--port", "6006"
    ]
    return subprocess.Popen(tensorboard_command)

def run_optimization(arc_model_dir, date_folder, n_trials, n_embd_min, n_embd_max, n_head_min, n_head_max,
                     n_layer_min, n_layer_max, batch_size_min, batch_size_max, learning_rate_min, learning_rate_max,
                     max_epochs_min, max_epochs_max, n_head_exp_min, n_head_exp_max, n_embd_multiplier_min, n_embd_multiplier_max):

    storage_path = f"{date_folder}/optuna_results.db"
    log_dir = os.path.join(date_folder, "runs")
    os.makedirs(log_dir, exist_ok=True)

    try:
        # Kill existing processes
        kill_process_by_name("optuna-dashboard")
        kill_process_by_name("tensorboard")
        kill_process_by_name("ngrok")

        # Start Optuna dashboard
        optuna_dashboard_process = start_optuna_dashboard(storage_path)

        # Start TensorBoard
        tensorboard_process = start_tensorboard(log_dir)

        time.sleep(5)  # Give some time for the processes to start

        # Start ngrok tunnels
        public_url_optuna = start_ngrok_tunnel(8081)
        public_url_tb = start_ngrok_tunnel(6006)

        if public_url_optuna and public_url_tb:
            print(f"📈 Optuna Dashboard: {public_url_optuna}")
            print(f"📊 TensorBoard: {public_url_tb}")
        else:
            print("Failed to start ngrok tunnels.")
            return 1

        print("\nAll processes launched successfully.")

    except Exception as e:
        print(f"Error starting processes: {str(e)}")
        return 1

    # Run the optimization command
    print("Starting hyperparameter tuning...")
    optimize_command = [
        "python", os.path.join(arc_model_dir, "gpt2_arc/src/optimize_hyperparameters.py"),
        "--n_trials", str(n_trials),
        "--storage", f"sqlite:///{storage_path}",
        "--n_jobs", "1",
        "--n_embd_min", str(n_embd_min), "--n_embd_max", str(n_embd_max),
        "--n_head_min", str(n_head_min), "--n_head_max", str(n_head_max),
        "--n_layer_min", str(n_layer_min), "--n_layer_max", str(n_layer_max),
        "--batch_size_min", str(batch_size_min), "--batch_size_max", str(batch_size_max),
        "--learning_rate_min", str(learning_rate_min), "--learning_rate_max", str(learning_rate_max),
        "--max_epochs_min", str(max_epochs_min), "--max_epochs_max", str(max_epochs_max),
        "--n_head_exp_min", str(n_head_exp_min), "--n_head_exp_max", str(n_head_exp_max),
        "--n_embd_multiplier_min", str(n_embd_multiplier_min), "--n_embd_multiplier_max", str(n_embd_multiplier_max)
    ]

    try:
        # Run the optimization process and stream output in real-time
        with subprocess.Popen(optimize_command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, bufsize=1, universal_newlines=True) as process:
            for line in process.stdout:
                print(line, end='')  # Print each line as it's received
        return process.returncode
    except KeyboardInterrupt:
        print("Optimization process interrupted by user.")
        return 1
    finally:
        # Clean up processes
        optuna_dashboard_process.terminate()
        tensorboard_process.terminate()
        kill_process_by_name("ngrok")
        print("Optimization process completed.")

# Main execution
if perform_hyperparameter_tuning:
    max_retries = 100
    retry_count = 0
    while retry_count < max_retries:
        try:
            complete = run_optimization(arc_model_dir, date_folder, n_trials, n_embd_min, n_embd_max, n_head_min, n_head_max,
                            n_layer_min, n_layer_max, batch_size_min, batch_size_max, learning_rate_min, learning_rate_max,
                            max_epochs_min, max_epochs_max, n_head_exp_min, n_head_exp_max, n_embd_multiplier_min, n_embd_multiplier_max)
            if complete == 0:
                print("Hyperparameter tuning completed successfully.")
                break
            else:
                print("Hyperparameter tuning failed.")
                retry_count += 1
        except Exception as e:
            print(f"An error occurred: {str(e)}")
            retry_count += 1

        if retry_count < max_retries:
            print(f"Retrying in 5 seconds... (Attempt {retry_count + 1}/{max_retries})")
            time.sleep(5)
        else:
            print("Max retries reached. Exiting.")
else:
    print("Hyperparameter tuning not performed.")

### 3. Get the best hyperparameters:

In [None]:
import optuna
import numpy as np

# Set Optuna storage and study details
storage_name = f"sqlite:///{date_folder}/optuna_results.db"
study_name = "gpt2_arc_optimization"
print(f"Storage Name: {storage_name}")
print(f"Study Name: {study_name}")

if use_best_params:
    try:
        # List all study names in the database
        study_summaries = optuna.study.get_all_study_summaries(storage=storage_name)
        print("Available studies in the database:")
        for study_summary in study_summaries:
            print(f"- {study_summary.study_name}")

        # Load the specified study
        study = optuna.load_study(study_name=study_name, storage=storage_name)
        best_params = study.best_params
        print("Best hyperparameters:")
        print(json.dumps(best_params, indent=2))

        # Save the best parameters to a JSON file
        with open(f"{date_folder}/best_hyperparameters.json", "w") as f:
            json.dump(best_params, f)

    except KeyError as e:
        print("Error: The specified study does not exist in the database. Please ensure that the study name and storage path are correct.")
        print(f"Details: {str(e)}")
    except Exception as e:
        print(f"An error occurred: {str(e)}")

Storage Name: sqlite:////content/drive/MyDrive/ArcGPT/20240928/optuna_results.db
Study Name: gpt2_arc_optimization
Available studies in the database:
- gpt2_arc_optimization
Best hyperparameters:
{
  "n_head_exp": 2,
  "n_embd_multiplier": 1,
  "n_layer": 10,
  "batch_size": 2,
  "learning_rate": 0.00012725715474658315,
  "max_epochs": 23
}


### 4. Setup Evaluation of the trained model in the background:

In [None]:
import os
import wandb
import time
from watchdog.observers import Observer
from watchdog.events import FileSystemEventHandler
import subprocess
import threading
from datetime import datetime

# Set W&B API key (replace with your actual API key)
wandb_api_key = "2b06e99af167044b281668f6edd388c633aba1a0"  # Replace with your W&B API key
os.environ["WANDB_API_KEY"] = wandb_api_key

print(f"arc_model_dir is set to: {arc_model_dir}")

# Directory containing the model files
model_dir = os.path.join(date_folder, "checkpoints")
print(f"Watching for new models in directory: {model_dir}")

# Create the model_dir if it doesn't exist
os.makedirs(model_dir, exist_ok=True)

output_dir = "evaluation_results"
os.makedirs(output_dir, exist_ok=True)
wandb_project = "arc-evaluation"

# Set of evaluated models
evaluated_models = set()

# Load previously evaluated models from a file
evaluated_models_file = os.path.join(output_dir, "evaluated_models.txt")
if os.path.exists(evaluated_models_file):
    with open(evaluated_models_file, "r") as f:
        evaluated_models.update(line.strip() for line in f)
    print(f"Loaded evaluated models from {evaluated_models_file}")
else:
    print(f"No previously evaluated models found. Starting fresh.")

class CheckpointHandler(FileSystemEventHandler):
    def on_created(self, event):
        if event.is_directory:
            return
        if event.src_path.endswith('.ckpt') or event.src_path.endswith('.pth'):
            print(f"New checkpoint detected: {event.src_path}")
            self.evaluate_model(event.src_path)

    def evaluate_model(self, model_path):
        model_file = os.path.basename(model_path)

        if model_file in evaluated_models:
            print(f"Skipping already evaluated model: {model_file}")
            return  # Skip if the model was already evaluated

        # Extract epoch and val_loss from the filename for run_name
        try:
            parts = model_file.replace('.ckpt', '').replace('.pth', '').split('-')
            epoch = None
            val_loss = None
            for part in parts:
                if part.startswith('epoch='):
                    epoch = part.split('=')[1]
                elif part.startswith('val_loss='):
                    val_loss = part.split('=')[1]
            if epoch is not None and val_loss is not None:
                run_name = f"scaling-test-evaluation-epoch{epoch}-val_loss{val_loss}"
            else:
                run_name = f"scaling-test-evaluation-{model_file}"
        except Exception as e:
            print(f"Error parsing run name from filename {model_file}: {e}")
            run_name = f"scaling-test-evaluation-{model_file}"

        eval_command = [
            "python", os.path.join(arc_model_dir, "gpt2_arc/src/evaluate.py"),
            "--model_checkpoint", model_path,
            "--batch_size", "32",
            "--output_dir", output_dir,
            "--wandb_project", wandb_project,
            "--wandb_run_name", run_name
        ]
        print(f"Evaluating model: {model_file} with command: {' '.join(eval_command)}")

        try:
            # Run the evaluation command and capture stdout and stderr
            result = subprocess.run(
                eval_command,
                check=True,
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE,
                text=True  # Automatically decode bytes to string
            )
            print(f"Successfully evaluated model: {model_file}")
            print("Evaluation Output:")
            print(result.stdout)  # Print the standard output from evaluate.py
            if result.stderr:
                print("Evaluation Errors/Warnings:")
                print(result.stderr)  # Print any errors or warnings from evaluate.py
        except subprocess.CalledProcessError as e:
            print(f"Error during evaluation of {model_file}: {e}")
            print("Standard Output:")
            print(e.stdout)
            print("Standard Error:")
            print(e.stderr)
        except Exception as ex:
            print(f"An unexpected error occurred while evaluating {model_file}: {ex}")

        evaluated_models.add(model_file)

        # Save the evaluated model to the file
        with open(evaluated_models_file, "a") as f:
            f.write(model_file + "\n")

def get_all_checkpoint_files(directory):
    print(f"Checking directory for .ckpt and .pth files: {directory}")
    checkpoint_files = []
    for root, _, files in os.walk(directory):
        checkpoint_files.extend([os.path.join(root, f) for f in files if f.endswith('.ckpt') or f.endswith('.pth')])
    print(f"Found checkpoint files: {checkpoint_files}")
    return checkpoint_files

def start_observer():
    # Set up and start the watchdog observer
    event_handler = CheckpointHandler()
    observer = Observer()
    observer.schedule(event_handler, model_dir, recursive=True)
    observer.start()

    print("Watching for new checkpoints and final models in all subdirectories...")
    print("This script will continue running in the background.")

    try:
        while True:
            time.sleep(10)
            # Check for any new models
            current_models = set(get_all_checkpoint_files(model_dir))
            new_models = current_models - evaluated_models

            print(f"Current models: {current_models}")
            print(f"New models to evaluate: {new_models}")

            for model_path in new_models:
                event_handler.evaluate_model(model_path)
    except KeyboardInterrupt:
        observer.stop()
        print("Observer stopped by user.")
    except FileNotFoundError as fnf_error:
        print(f"FileNotFoundError: {fnf_error}")
        print(f"Please ensure that the directory '{model_dir}' exists.")
    except Exception as e:
        print(f"An error occurred in the observer: {e}")
    finally:
        observer.join()
        print("Checkpoint and final model evaluation completed.")

# Function to start the observer in a background thread
def run_observer():
    observer_thread = threading.Thread(target=start_observer)
    observer_thread.daemon = True  # Ensures the thread will exit when the main program exits
    observer_thread.start()
    print("Background checkpoint observer started.")

# Start the observer
run_observer()


arc_model_dir is set to: /content/arc-neural-reasoning-model/
Watching for new models in directory: /content/drive/MyDrive/ArcGPT/20240928/checkpoints
No previously evaluated models found. Starting fresh.
Background checkpoint observer started.


### 5. Use the best hyperparameters for longer training (manually set max epochs!):

In [None]:
import json
import subprocess
import os
import math

# Load best hyperparameters from the JSON file if use_best_params is True
if use_best_params:
    try:
        with open(f"{date_folder}/best_hyperparameters.json", "r") as f:
            best_params = json.load(f)
        print("Loaded best hyperparameters from JSON.")

        # Check if 'n_head_exp' and 'n_embd_multiplier' are present
        if 'n_head_exp' in best_params and 'n_embd_multiplier' in best_params:
            # Convert exponent to actual n_head
            n_head_exp = best_params['n_head_exp']
            n_head = 2 ** n_head_exp
            # Convert multiplier to actual n_embd
            n_embd_multiplier = best_params['n_embd_multiplier']
            n_embd = n_head * n_embd_multiplier
            # Ensure n_embd is a power of 2
            n_embd = 2 ** int(math.log2(n_embd))
            print(f"Converted n_head_exp: {n_head_exp} to n_head: {n_head}")
            print(f"Converted n_embd_multiplier: {n_embd_multiplier} to n_embd: {n_embd}")
        else:
            # If conversion parameters are not present, use manual values or existing best_params
            n_head = best_params.get("n_head", manual_params["n_head"])
            n_embd = best_params.get("n_embd", manual_params["n_embd"])
            print("n_head_exp and/or n_embd_multiplier not found in best_params. Using existing n_head and n_embd.")

        # Extract other hyperparameters but OVERRIDE max_epochs with manual setting
        params = {
            "n_embd": n_embd,
            "n_head": n_head,
            "n_layer": best_params.get("n_layer", manual_params["n_layer"]),
            "batch_size": best_params.get("batch_size", manual_params["batch_size"]),
            "learning_rate": best_params.get("learning_rate", manual_params["learning_rate"]),
            "max_epochs": manual_params["max_epochs"]  # Override max_epochs with manual setting
        }
        print(f"Final Parameters for Training: {params}")
    except FileNotFoundError:
        print(f"Warning: {date_folder}/best_hyperparameters.json not found. Using manual parameters.")
        params = manual_params
    except Exception as e:
        print(f"An error occurred while loading best hyperparameters: {str(e)}. Using manual parameters.")
        params = manual_params
else:
    # Use manually defined parameters
    params = manual_params
    print("Using manual hyperparameters.")



Loaded best hyperparameters from JSON.
Converted n_head_exp: 2 to n_head: 4
Converted n_embd_multiplier: 1 to n_embd: 4
Final Parameters for Training: {'n_embd': 4, 'n_head': 4, 'n_layer': 10, 'batch_size': 2, 'learning_rate': 0.00012725715474658315, 'max_epochs': 50}


In [None]:

# Build the arguments for the training command
train_args = [
    "python", os.path.join(arc_model_dir, "gpt2_arc/src/training/train.py"),
    "--n-embd", str(params["n_embd"]),
    "--n-head", str(params["n_head"]),
    "--n-layer", str(params["n_layer"]),
    "--batch-size", str(params["batch_size"]),
    "--learning-rate", str(params["learning_rate"]),
    "--max-epochs", str(params["max_epochs"]),
    "--use-gpu",
    "--project", "arc-scaling-test"
]

# Notify the user that training is starting
print("Starting training process...")

# Start the training process and stream output to the notebook cell
process = subprocess.Popen(
    train_args,
    stdout=subprocess.PIPE,
    stderr=subprocess.STDOUT,
    text=True  # Ensures that the output is treated as text
)

# Stream the output in real-time
try:
    for line in process.stdout:
        print(line, end='')  # Print each line as it's received
except KeyboardInterrupt:
    print("\nTraining interrupted by user.")
    process.terminate()
    process.wait()

# Wait for the process to complete and get the return code
return_code = process.wait()
if return_code == 0:
    print("Training completed successfully.")
else:
    print(f"Training failed with return code {return_code}.")


Starting training process...
2024-09-28 21:00:05.887437: I tensorflow/core/tpu/tpu_api_dlsym_initializer.cc:95] Opening library: /usr/local/lib/python3.10/dist-packages/tensorflow/python/platform/../../libtensorflow_cc.so.2
2024-09-28 21:00:05.887635: I tensorflow/core/tpu/tpu_api_dlsym_initializer.cc:119] Libtpu path is: libtpu.so
2024-09-28 21:00:05.929597: I tensorflow/core/tpu/tpu_api_dlsym_initializer.cc:136] FindAndLoadTpuLibrary failed with ABORTED: Internal error when accessing libtpu multi-process lockfile. Run "$ sudo rm /tmp/libtpu_lockfile".. This is expected if TPU is not used.
2024-09-28 21:00:05.931745: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
DEBUG:h5py._conv:Creating converter from 7 to 5
DEBUG:h5py._conv:Creating converte

### 6. Analyze the results:

In [None]:
import json
import pandas as pd
from pathlib import Path
import glob
import re
import matplotlib.pyplot as plt
import seaborn as sns

# 1. Define the directory containing evaluation results
results_dir = Path("./evaluation_results/")

# 2. Retrieve all JSON files
json_files = list(results_dir.glob("*.json"))
print(f"Found {len(json_files)} JSON files.")

# 3. Function to extract timestamp from filename
def extract_timestamp(filename):
    """
    Extracts the timestamp from the filename.
    Expected format: ..._YYYYMMDD_HHMMSS.json
    """
    pattern = r"_(\d{8}_\d{6})\.json$"
    match = re.search(pattern, filename)
    if match:
        return pd.to_datetime(match.group(1), format="%Y%m%d_%H%M%S")
    else:
        return pd.NaT  # Not a Time if pattern doesn't match

# 4. Load and compile aggregate results
data = []

for file in json_files:
    with open(file, "r") as f:
        results = json.load(f)

    # Extract aggregate results
    aggregate = results.get("aggregate_results", {})

    # Extract timestamp from filename
    timestamp = extract_timestamp(file.name)

    # Combine data
    record = {
        "timestamp": timestamp,
        "test_loss": aggregate.get("test_loss"),
        "test_accuracy": aggregate.get("test_accuracy"),
        "test_diff_accuracy": aggregate.get("test_diff_accuracy"),
        "complete_task_accuracy": aggregate.get("complete_task_accuracy")
    }

    data.append(record)

print(f"Compiled {len(data)} records.")

# 5. Create DataFrame for aggregate results
df = pd.DataFrame(data)

# Drop records with missing timestamps
df = df.dropna(subset=["timestamp"])

# Sort by timestamp
df = df.sort_values("timestamp").reset_index(drop=True)

print("Aggregate Results DataFrame:")
print(df.head())

# 6. (Optional) Handle individual metrics
individual_data = []

for file in json_files:
    with open(file, "r") as f:
        results = json.load(f)

    # Extract individual metrics
    individual = results.get("individual_metrics", {})

    # Extract timestamp from filename
    timestamp = extract_timestamp(file.name)

    for metric_id, metrics in individual.items():
        record = {
            "timestamp": timestamp,
            "metric_id": metric_id,
            "test_accuracy": metrics.get("test_accuracy"),
            "test_diff_accuracy": metrics.get("test_diff_accuracy")
        }
        individual_data.append(record)

individual_df = pd.DataFrame(individual_data)

# Drop records with missing timestamps
individual_df = individual_df.dropna(subset=["timestamp"])

# Convert timestamp to datetime if not already
if individual_df["timestamp"].dtype == object:
    individual_df["timestamp"] = pd.to_datetime(individual_df["timestamp"])

print("Individual Metrics DataFrame:")
print(individual_df.head())

# 7. Save DataFrames to CSV (Optional)
df.to_csv("aggregate_evaluation_results.csv", index=False)
individual_df.to_csv("individual_evaluation_metrics.csv", index=False)

# 8. Plot Test Accuracy Over Time
plt.figure(figsize=(12, 6))
plt.plot(df['timestamp'], df['test_accuracy'], marker='o', linestyle='-', label='Test Accuracy')
plt.title('Test Accuracy Over Time')
plt.xlabel('Timestamp')
plt.ylabel('Test Accuracy')
plt.grid(True)
plt.xticks(rotation=45)
plt.legend()
plt.tight_layout()
plt.show()

# Plot Test Diff Accuracy Over Time
plt.figure(figsize=(12, 6))
plt.plot(df['timestamp'], df['test_diff_accuracy'], marker='o', linestyle='-', color='green', label='Test Diff Accuracy')
plt.title('Test Diff Accuracy Over Time')
plt.xlabel('Timestamp')
plt.ylabel('Test Diff Accuracy')
plt.grid(True)
plt.xticks(rotation=45)
plt.legend()
plt.tight_layout()
plt.show()

# Plot Complete Task Accuracy Over Time
plt.figure(figsize=(12, 6))
plt.plot(df['timestamp'], df['complete_task_accuracy'], marker='o', linestyle='-', color='red', label='Complete Task Accuracy')
plt.title('Complete Task Accuracy Over Time')
plt.xlabel('Timestamp')
plt.ylabel('Complete Task Accuracy')
plt.grid(True)
plt.xticks(rotation=45)
plt.legend()
plt.tight_layout()
plt.show()

# Heatmap: Task Accuracy Over Time

# Pivot the individual_df to have 'metric_id' as rows and 'timestamp' as columns, with 'test_accuracy' as values
heatmap_data = individual_df.pivot_table(index='metric_id', columns='timestamp', values='test_accuracy')

# Plot the heatmap
plt.figure(figsize=(12, 10))
sns.heatmap(heatmap_data, cmap="coolwarm", cbar_kws={'label': 'Test Accuracy'}, annot=False)
plt.title('Task Accuracy Over Time')
plt.xlabel('Timestamp')
plt.ylabel('Task (Metric ID)')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
