In [2]:
## Helper functions

# Output
# this is needed to redirect the output to the screen. Else, it shows up in the log which is difficult to find
import os


output = widgets.Output()
validation = False
uniquename = "wxhz"
output_folder = uniquename


@output.capture()
def redirect_to_notebook(event):
    """    
    This function redirects the user to the next notebook, which displays the status of the job. It stores the hostname, username, and pkey values if provided, and opens the target notebook in a new window.
    """
    print("Going to next notebook")
    global hostname, username, job_id, pkey
    hostname = cluster_input.value
    username = username_input.value
    pkey = user_pkey.value
    if hostname:
        %store hostname
    if username:
        %store username
    if pkey:
        %store pkey
    target_url = "Show_job_status.ipynb"
    display(Javascript(f'window.open("{target_url}");'))

def auth_interactive_handler_callback(title, instructions, prompts):
    """
    Custom interactive handler callback for logging into HPC cluster.
    """
    responses = []
    for prompt in prompts:
        prompt_text = prompt[0]
        echo = prompt[1]
        response = getpass.getpass(prompt_text) if echo else getpass.getpass('')
        responses.append(response)
    return responses

@output.capture()
def on_validate_button_click(button):
    """
    Validates the input parameters.
    """
    global validation
    validation = False
    if username_input.value == "" or cluster_input.value == "" or queue_input.value == "" or job_name_input.value == "" or num_nodes_input.value == "" or runtime_input.value == "" or work_folder_input.value == "":
        print("Please enter a username, cluster, queue, job name, number of nodes, runtime, and work folder")
        return
    
    validation = True
    print("Validation successful")
    
@output.capture()
def submitJob():
    """
    Submits a job for execution.

    This function submits a job for execution on a cluster or locally. It performs the following steps:
    1. Validates the input parameters.
    2. Executes the model locally if the hostname is 'localhost'.
    3. Sets up the necessary environment variables and commands for running the job on a cluster.
    4. Updates the ED2IN file with the specified variables.
    5. Creates a batch job script and transfers it to the cluster.
    6. Submits the batch job and extracts the job ID.
    7. Stores the job ID for future reference.

    Note: This function assumes that the necessary input parameters and variables have been set before calling it.

    Parameters:
    None

    Returns:
    None
    """
    if not validation:
        print("")
        return
    print("Executing model")
    hostname = cluster_input.value
    print(hostname)
    path_ED2IN = ED2IN_path_input.value
    command = ['ed2', '-f', path_ED2IN]
    cwd = "ed-demo"

    #### UPDATE ED2IN FILE
    ## 1. create subdirectory

    if hostname == 'localhost':
        print(f"Starting to run model with {path_ED2IN}")
        resource.setrlimit(resource.RLIMIT_STACK, (-1, -1))
        proc = subprocess.Popen(command, cwd=cwd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
        for line in proc.stdout:
            print(line.decode(), end="")
        #output = subprocess.check_output(command, cwd=cwd, stderr=subprocess.STDOUT)
        #print(output.decode())
        return

    username = username_input.value
    password = user_password.value
    pkey = user_pkey.value
    for cluster in clusters_data["clusters"]:
        if cluster["hostname"] == hostname:
            modules_to_load = cluster["modules_to_load"]
            pre_run_command = cluster["pre_run_command"]
            apptainer_binary_command = cluster["apptainer_binary_command"]
            post_run_command = cluster["post_run_command"]


    account = user_acc.value
    partition = queue_input.value
    job_name = job_name_input.value
    nodes = num_nodes_input.value
    time = runtime_input.value
    work_folder = work_folder_input.value
    path_singularity_image = ed_binary_singularity_input.children[1].value

    # vars to be replaced in ED2IN
    header_file_path = header_file_input.value
    met_driver = met_driver_input.value
    vars = {}
    for dropdown in var_dropdowns.children:
        var_option = dropdown.children[0].value
        var_value = dropdown.children[1].value
        vars[var_option] = var_value

    # update/fix ED2IN file

    # TOOD HACK
    uniquename=generate_random_string()
    work_folder = "${HOME}/ed-demo"
    output_folder = "${HOME}/" + uniquename
    job_name = "ED2IN-" + uniquename

    # Batch job details
    ntasks_per_node = 16                    # Number of task (cores/ppn) per node
    output = "openmp_" + job_name + ".o%j"  # Name of batch job output file
    error = "openmp_" + job_name + ".e%j"   # Name of batch job error file
    mail_user = username + "@illinois.edu"        # Send email notifications
    mail_type = "BEGIN,END"                 # Type of email notifications to send

    ssh_client = paramiko.SSHClient()
    ssh_client.load_system_host_keys()
    ssh_client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
    connected = False

    if pkey != '':
        print("Using private key provided")
        key = paramiko.pkey.PKey.from_path(pkey)
        ssh_client.connect(hostname, username=username, pkey=key, allow_agent=True)
    else:
        #print("Using password provided")
        try:
            ssh_client.connect(hostname, username=username, password=password, allow_agent=True)
            connected = True
            print("successfully connected")
        except:
            pass
    transport = ssh_client.get_transport()
    if not connected:
        transport.auth_interactive(username=username, handler=auth_interactive_handler_callback)
    #transport.auth_password(username, getpass.getpass('Enter {0} Logon password :'.format(hostname)))
    sftp_client = paramiko.SFTPClient.from_transport(transport)

    #create the bat file
    with open(job_name + ".sbatch", 'w') as f:
        f.writelines("#!/bin/bash\n")
        if account != '':
            f.writelines("#SBATCH --account=" + str(account) + "\n")
        f.writelines("#SBATCH --time=" + str(time) + "\n")
        f.writelines("#SBATCH --nodes=" + str(nodes) + "\n")
        f.writelines("#SBATCH --ntasks-per-node=" + str(ntasks_per_node) + "\n")
        f.writelines("#SBATCH --job-name=" + job_name + "\n")
        f.writelines("#SBATCH --partition=" + partition + "\n")
        f.writelines("#SBATCH --output=" + output + "\n")
        f.writelines("#SBATCH --error=" + error + "\n")
        f.writelines("##SBATCH --mail-user=" + mail_user + "\n")
        f.writelines("##SBATCH --mail-type=" + mail_type + "\n")
        f.writelines("\n")
        # TODO check this, does not work on DELTA
        #if met_driver is not None and header_file_path is not None:
        #  f.writelines([f"sed -i /path_to/c{met_driver} {header_file_path}\n"])
        #for key, value in vars.items():
        #  if key is not None and value is not None:
        #    abs_ED2IN_path = work_folder + path_ED2IN
        #    f.writelines([f"sed -i /{key}/c{value} {abs_ED2IN_path}\n"])
        f.writelines("\n")
        # mkdir
        f.writelines(f"mkdir -p {output_folder}\n")
        if modules_to_load != "":
            f.writelines("# load modules\n")
            f.writelines(f"module load {modules_to_load}" + "\n")
        if pre_run_command != "":
            f.writelines("# pre run command\n")
            f.writelines(f"{pre_run_command}" + "\n")
        if apptainer_binary_command != "":
            f.writelines("# run apptainer\n")
            f.writelines(f"{apptainer_binary_command} --bind {work_folder}:/data --bind {output_folder}:/data/outputs --no-home --pwd /data {path_singularity_image} ed2 -f {path_ED2IN}")
        if post_run_command != "":
            f.writelines("# post run command\n")
            f.writelines(f"{post_run_command}" + "\n")
    f.close()

    #transfer .bat file to cluster and run it
    sftp_client.put(job_name + ".sbatch", f"{job_name}.sbatch")
    sftp_client.chmod(f"{job_name}.sbatch", stat.S_IRWXU)
    _, stdo, stde = ssh_client.exec_command("sbatch " + job_name + ".sbatch")
    print(stde.read().decode())

    # Extract the job ID from the sbatch output
    result = stdo.read().decode()
    print(result)
    submitted_job_id = result.split()[3]
    print(submitted_job_id)
    global job_id
    job_id=submitted_job_id
    if job_id:
        %store job_id

    sftp_client.close()
    ssh_client.close()
    transport.close()

@output.capture()
def showJobStatus():
    """
    Connects to a remote server using SSH and retrieves the status of a job.

    This function connects to a remote server using SSH and retrieves the status of a job
    specified by the `job_id`. It requires the `hostname`, `username`, `password`, and `pkey`
    to establish the SSH connection. If a private key (`pkey`) is provided, it will be used
    for authentication; otherwise, the password will be used.

    Parameters:
    - None

    Returns:
    - None
    """
    hostname = cluster_input.value
    username = username_input.value
    password = user_password.value
    pkey = user_pkey.value
    output_folder = output_folder_input.value
    job_id = job_id_input.value
    ssh_client = paramiko.SSHClient()
    ssh_client.load_system_host_keys()
    ssh_client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
    connected = False

    if pkey != '':
        print("Using private key provided")
        key = paramiko.pkey.PKey.from_path(pkey)
        ssh_client.connect(hostname, username=username, pkey=key, allow_agent=True)
    else:
        #print("Using password provided")
        try:
            ssh_client.connect(hostname, username=username, password=password, allow_agent=True)
            connected = True
            print("successfully connected")
        except:
            pass
    transport = ssh_client.get_transport()
    if not connected:
        transport.auth_interactive(username=username, handler=auth_interactive_handler_callback)
    sftp_client = paramiko.SFTPClient.from_transport(transport)

    # job status
    # Check the job status periodically
    print("Job status")
    while True:
        _, stdo, stde = ssh_client.exec_command(f"squeue -u {username} -j {job_id}")
        job_status = stdo.read().decode()
        print(job_status)

        # Break the loop if the job is completed or failed
        if job_id not in job_status:
            break

        # Wait for a few seconds before checking again
        timer.sleep(10)

    print("Output")
    # View output
    try:
        _, stdo, stde = ssh_client.exec_command(f"cat *.o{job_id}")
        print(stdo.read().decode())
        print(stde.read().decode())
    except:
        print("No output file found")
    
    print("Error")
    # View error
    try:
        _, stdo, stde = ssh_client.exec_command(f"cat *.e{job_id}")
        print(stdo.read().decode())
        print(stde.read().decode())
    except:
        print("No error file found")
    
    print("Copying output files here")
    files = sftp_client.listdir(output_folder)
    
    # Ensure local directory exists
    if not os.path.exists(uniquename):
        os.makedirs(uniquename)
    
    # Download each file
    for file in files:
        remote_filepath = os.path.join(output_folder, file)
        local_filepath = os.path.join(uniquename, file)
        try:
            sftp_client.get(remote_filepath, local_filepath)
            print(f"Downloaded {file} to {local_filepath}")
        except Exception as e:
            print(f"Failed to download {file}: {e}")

    sftp_client.close()
    ssh_client.close()
    transport.close()

def handle_cluster_change(change):
    """
    Handles changes in the cluster dropdown.
    """
    selected_cluster = change.new

    # Disable or enable widgets based on the selected cluster
    if selected_cluster == "localhost":
        username_input.disabled = True
        batch_job_input.disabled = True
        user_password.disabled = True
        user_acc.disabled = True
        queue_input.disabled = True
        ed_binary_singularity_input.children[1].disabled = True
        job_name_input.disabled = True
        num_nodes_input.disabled = True
        runtime_input.disabled = True
        work_folder_input.disabled = True
        user_pkey.disabled = True
    else:
        username_input.disabled = False
        username_input.value = ""
        user_password.disabled = False
        user_password.value = ""
        batch_job_input.options = batch_jobs_dict.get(selected_cluster, [])
        queue_input.options = queues_dict.get(selected_cluster, [])
        batch_job_input.disabled = False
        user_acc.disabled = False
        user_acc.value = ""
        queue_input.disabled = False
        ed_binary_singularity_input.children[1].disabled = False
        job_name_input.disabled = False
        num_nodes_input.disabled = False
        num_nodes_input.value = 1
        runtime_input.disabled = False
        runtime_input.value = "00:15:00"
        work_folder_input.disabled = False
        user_pkey.disabled = False


#<-------------------------UI related helper functions--------------------------------------------------->
def generate_random_string(length=4, chars="abcdefghijklmnopqrstuvwxyz0123456789"):
    """
    Generates a short random name.

    Args:
        length (int): The length of the random name. Default is 4.
        chars (str): The characters to choose from for generating the random name. Default is lowercase letters and digits.

    Returns:
        str: The generated random name.
    """
    return "".join(random.choice(chars) for _ in range(length))

def add_dropdown(button):
    """
    Handles the addition of a new dropdown widget.
    """
    # Remove selected options from var_options
    for dropdown in var_dropdowns.children:
        selected_option = dropdown.children[0].value
        if selected_option in var_options:
            var_options.remove(selected_option)

    # Add the new dropdown widget
    var_dropdowns.children += (create_dropdown(),)
    if len(var_options) == 1:
        add_button.disabled = True

def create_dropdown():
    """
    Creates a new dropdown widget.

    Returns:
        ipywidgets.Dropdown: The created dropdown widget.
    """
    # Code for creating the dropdown widget
    return widgets.HBox([widgets.Dropdown(options=var_options, description='Replace:'), widgets.Text(placeholder="Enter the path")])

def remove_dropdown(button):
    ''' Function to remove a dropdown widget'''
    if len(var_dropdowns.children) >= 1:
        var_dropdowns.children = var_dropdowns.children[:-1]
        var_option = var_dropdowns.children[-1].children[0].value
        if var_option not in var_options:
            var_options.append(var_option)
        add_button.disabled = False


NameError: name 'widgets' is not defined