## Welcome to the Profiling Tool for the RAPIDS Accelerator for Apache Spark

To run the profiling tool, enter the log path that represents the DBFS location of your Spark GPU event logs. Then, select "Run all" to execute the notebook. Once the notebook completes, various output tables will appear below. For more options on running the profiling tool, please refer to the [Profiling Tool User Guide](https://docs.nvidia.com/spark-rapids/user-guide/latest/profiling/quickstart.html#running-the-tool).

### Note
- Currently, only local or DBFS event log paths are supported.
- The DBFS path must use the File API format. Example: `/dbfs/<path-to-event-log>`.
- Multiple event logs must be comma-separated.

### Per-Job Profile

The profiler output includes information about the application, data sources, executors, SQL stages, Spark properties, and key application metrics at the job and stage levels.

In [0]:
TOOLS_VER = "24.06.0"
print(f"Using Tools Version: {TOOLS_VER}")

In [0]:
%pip install spark-rapids-user-tools==$TOOLS_VER > /dev/null

In [0]:
import os
import pandas as pd

# Initialize variables from widgets
dbutils.widgets.dropdown("Cloud Provider", "aws", ["aws", "azure"])
CSP=dbutils.widgets.get("Cloud Provider")

dbutils.widgets.text("Eventlog Path", "/dbfs/user1/profiling_logs")
EVENTLOG_PATH=dbutils.widgets.get("Eventlog Path")

dbutils.widgets.text("Output Path", "/tmp")
OUTPUT_PATH=dbutils.widgets.get("Output Path")

# Setup environment variables
os.environ["CSP"] = CSP
os.environ["EVENTLOG_PATH"] = EVENTLOG_PATH
os.environ["OUTPUT_PATH"] = OUTPUT_PATH

# Setup console output file
CONSOLE_OUTPUT_PATH = os.path.join(OUTPUT_PATH, 'console_output.log')
CONSOLE_ERROR_PATH = os.path.join(OUTPUT_PATH, 'console_error.log')
os.environ['CONSOLE_OUTPUT_PATH'] = CONSOLE_OUTPUT_PATH
os.environ['CONSOLE_ERROR_PATH'] = CONSOLE_ERROR_PATH
print(f'Console output will be stored at {CONSOLE_OUTPUT_PATH} and errors will be stored at {CONSOLE_ERROR_PATH}')

In [0]:
%sh
spark_rapids profiling --platform databricks-$CSP --eventlogs $EVENTLOG_PATH -o $OUTPUT_PATH > $CONSOLE_OUTPUT_PATH 2> $CONSOLE_ERROR_PATH

## Console Output
Console output shows the recommended configurations for each app

**Note**: Use the `--verbose` flag in the command above for more detailed output.


In [0]:
%sh
cat $CONSOLE_OUTPUT_PATH

In [0]:
%sh
cat $CONSOLE_ERROR_PATH

In [0]:
import re
import shutil
import os

def extract_file_info(console_output_path, output_base_path):
    try:
        with open(console_output_path, 'r') as file:
            stdout_text = file.read()
        
        # Extract log file location
        location_match = re.search(r"Location: (.+)", stdout_text)
        if not location_match:
            raise ValueError("Log file location not found in the provided text.")
        
        log_file_location = location_match.group(1)
        
        # Extract profiling output folder
        prof_match = re.search(r"prof_[^/]+(?=\.log)", log_file_location)
        if not prof_match:
            raise ValueError("Output folder not found in the log file location.")
        
        output_folder_name = prof_match.group(0)
        output_folder = os.path.join(output_base_path, output_folder_name)
        return output_folder, log_file_location
    
    except Exception as e:
        raise RuntimeError(f"Cannot parse console output. Reason: {e}")

def copy_logs(destination_folder, *log_files):
    try:
        log_folder = os.path.join(destination_folder, "logs")
        os.makedirs(log_folder, exist_ok=True)
        
        for log_file in log_files:
            if os.path.exists(log_file):
                shutil.copy2(log_file, log_folder)
            else:
                print(f"Log file not found: {log_file}")
    except Exception as e:
        raise RuntimeError(f"Cannot copy logs to output. Reason: {e}")

try:
    output_folder, log_file_location = extract_file_info(CONSOLE_OUTPUT_PATH, OUTPUT_PATH)
    print(f"Output folder detected {output_folder}")
    copy_logs(output_folder, log_file_location, CONSOLE_OUTPUT_PATH, CONSOLE_ERROR_PATH)
    print(f"Logs successfully copied to {output_folder}")
except Exception as e:
    print(e)


In [0]:
import shutil
import os
import re

current_working_directory = os.getcwd()

def create_destination_folders(folder_name):
    os.makedirs(folder_name, exist_ok=True)
    base_download_folder_path = os.path.join("/dbfs/FileStore/", folder_name)
    os.makedirs(base_download_folder_path, exist_ok=True) 
    return base_download_folder_path

def create_download_link(source_folder, destination_folder_name):
    folder_to_compress = os.path.basename(source_folder)
    zip_file_name = folder_to_compress + '.zip'
    local_zip_file_path = os.path.join(current_working_directory, destination_folder_name, zip_file_name)
    download_folder_path = os.path.join(destination_folder_name, zip_file_name)
    try:
        base_download_folder_path = create_destination_folders(destination_folder_name)
        shutil.make_archive(folder_to_compress, 'zip', source_folder)
        shutil.copy2(zip_file_name, base_download_folder_path)
        if os.path.exists(local_zip_file_path):
            os.remove(local_zip_file_path)
        shutil.move(zip_file_name, local_zip_file_path)
    
        download_button_html = f"""
        <style>
            .download-btn {{
                display: inline-block;
                padding: 10px 20px;
                font-size: 16px;
                color: white;
                background-color: #4CAF50;
                text-align: center;
                text-decoration: none;
                border-radius: 5px;
                border: none;
                cursor: pointer;
                margin: 15px auto;
            }}
            .download-btn:hover {{
                background-color: #45a049;
            }}
            .button-container {{
                display: flex;
                justify-content: center;
                align-items: center;
            }}
        </style>
        
        <div style="color: #444; font-size: 14px; text-align: center; margin: 10px;">
            Zipped output file created at {local_zip_file_path}
        </div>
        <div class='button-container'>
            <a href='/files/{download_folder_path}' class='download-btn'>Download Output</a>
        </div>
        """
        displayHTML(download_button_html)
    except Exception as e:
        error_message_html = f"""
        <div style="color: red; text-align: center; margin: 20px;">
            <strong>Error:</strong> Cannot create download link for {source_folder}. Reason: {e}
        </div>
        """
        displayHTML(error_message_html)

destination_folder_name = "Tools_Output"
create_download_link(output_folder, destination_folder_name)



%md

## GPU Job Tuning Recommendations
This has general suggestions for tuning your applications to run optimally on GPUs.


In [0]:
jar_output_folder = os.path.join(output_folder, "rapids_4_spark_profile")
app_df = pd.DataFrame(columns=['appId', 'appName'])

for x in os.scandir(jar_output_folder):
    if x.is_dir():
        csv_path = os.path.join(x.path, "application_information.csv")
        if os.path.exists(csv_path):
          tmp_df = pd.read_csv(csv_path)
          app_df = pd.concat([app_df, tmp_df[['appId', 'appName']]])


app_list = app_df["appId"].tolist()
app_recommendations = pd.DataFrame(columns=['app', 'recommendations'])

for app in app_list:
  app_file = open(os.path.join(jar_output_folder, app, "profile.log"))
  recommendations_start = 0
  recommendations_str = ""
  for line in app_file:
    if recommendations_start == 1:
      recommendations_str = recommendations_str + line
    if "### D. Recommended Configuration ###" in line:
      recommendations_start = 1
  app_recommendations = pd.concat([app_recommendations, pd.DataFrame({'app': [app], 'recommendations': [recommendations_str]})], ignore_index=True)
display(app_recommendations)