In [22]:
import pandas as pd

In [27]:
def convert_metrics_log(
    input_path: str,
    output_path: str = None,
    skip_footer: bool = True
) -> pd.DataFrame:
    """
    Read a comma-delimited .log of system metrics, 
    strip '%' and convert the percent columns into decimal fractions,
    convert timestamp to datetime, and write out as .csv.

    Parameters
    ----------
    input_path : str
        Path to the .log file.
    output_path : str, optional
        Path for the output .csv file. If None, replaces the
        .log extension with .csv in the same directory.
    skip_footer : bool, default True
        Whether to drop a footer line (e.g. "Total duration…")
        using `skipfooter=1`.
    
    Returns
    -------
    pd.DataFrame
        The cleaned DataFrame with percent-as-fraction columns.
    """
    # Determine output filename
    if output_path is None:
        base, _ = os.path.splitext(input_path)
        output_path = f"{base}.csv"

    # 1) Read the log, dropping any footer if requested
    read_kwargs = {}
    if skip_footer:
        read_kwargs = dict(engine='python', skipfooter=1)
    df = pd.read_csv(input_path, **read_kwargs)

    # 2) Strip '%' and convert to numeric floats
    pct_cols = ['cpu_percent', 'mem_percent', 'gpu_util_percent', 'gpu_mem_used_mb']
    for col in pct_cols:
        df[col] = pd.to_numeric(df[col].str.rstrip('%'), errors='coerce')

    # 3) Convert those to decimal fractions (e.g. 7.22 → 0.0722)
    for col in pct_cols:
        df[col] = df[col] / 100.0

    # 4) Convert timestamp to datetime
    df['timestamp'] = pd.to_datetime(df['timestamp'], unit='s')

    # 5) Write out with pure numeric columns (fractions, not "%")
    df.to_csv(output_path, index=False)
    print(f"Written cleaned CSV to: {output_path}")
    return df


In [28]:
import os

# 获取当前目录下所有 .log 文件
log_files = [f for f in os.listdir() if f.endswith(".log")]
print(log_files)

['monitor_ace0_0.log', 'monitor_ace0_1.log', 'monitor_colmap_0.log', 'monitor_openmvg_0.log']


In [29]:
for log_path in log_files:
    # This will write out metrics1.csv, metrics2.csv, etc.
    convert_metrics_log(input_path=log_path)

Written cleaned CSV to: monitor_ace0_0.csv
Written cleaned CSV to: monitor_ace0_1.csv
Written cleaned CSV to: monitor_colmap_0.csv
Written cleaned CSV to: monitor_openmvg_0.csv


In [43]:
def adjust_mem_capacity(
    df: pd.DataFrame,
    original_capacity_gib: float = 39.0,
    new_capacity_gib: float = 31.0
) -> pd.DataFrame:
    """
    Given df with 'mem_percent' as a fraction of original_capacity_gib (0–1),
    return a copy of df where 'mem_percent' is rescaled to be the fraction
    of new_capacity_gib instead.

    new_mem_percent = old_mem_percent * (original_capacity_gib / new_capacity_gib)
    """
    df_adj = df.copy()
    # ensure it's numeric
    df_adj['mem_percent'] = pd.to_numeric(df_adj['mem_percent'], errors='coerce')
    # rescale
    scale = original_capacity_gib / new_capacity_gib
    df_adj['mem_percent'] = df_adj['mem_percent'] * scale
    return df_adj

In [None]:
df_n = adjust_mem_capacity(df)
df_n.to_csv('monitor_ace0_1_n.csv', index=False)

In [47]:
# 读取 CSV 文件
df = pd.read_csv("monitor_openmvg_0.csv")

# 显示前几行数据
print(df.head())

                       timestamp  cpu_percent  mem_percent  gpu_util_percent  \
0  2025-05-09 20:19:31.083668709       0.0016     0.072229              0.00   
1  2025-05-09 20:20:02.227812767       0.0009     0.071261              0.00   
2  2025-05-09 20:20:33.313343525       0.0028     0.070053              0.00   
3  2025-05-09 20:21:04.392442703       0.0160     0.088308              0.09   
4  2025-05-09 20:21:35.479997158       0.0366     0.117450              0.00   

   gpu_mem_used_mb  
0             0.01  
1             0.01  
2             0.01  
3             0.01  
4             0.01  


In [49]:
df_n = adjust_mem_capacity(df)
df_n.to_csv("monitor_openmvg_0_n.csv", index=False)