In [8]:
import re
import pandas as pd


def parse_log_file(log_file_path):
    """
    주어진 로그 파일에서 Action, Object, Image Name, 그리고 
    'only_ego Current' 라인의 KLD, SIM, NSS 값을 추출합니다.
    """
    try:
        with open(log_file_path, 'r', encoding='utf-8') as f:
            log_content = f.read()
    except FileNotFoundError:
        print(f"오류: '{log_file_path}' 파일을 찾을 수 없습니다.")
        return None

    # 각 처리 블록('--- Start'부터 '*** End'까지)을 찾고, 
    # 'only_ego Current' 라인에서 메트릭을 추출하기 위한 정규식
    pattern = re.compile(
        r"Action : (.*?),\s*Object : (.*?)\s*image_name : (.*?)\n"  # Action, Object, Image Name 추출
        r".*?"  # 중간의 다른 로그 내용은 건너뛰기
        r"only_ego Current - KLD: ([\d.-]+)\s*\|\s*SIM: ([\d.-]+)\s*\|\s*NSS: ([\d.-]+)",  # KLD, SIM, NSS 추출
        re.DOTALL
    )
    
    matches = pattern.findall(log_content)
    
    # 추출된 데이터를 저장할 리스트
    extracted_data = []
    
    for match in matches:
        action = match[0].strip()
        object_name = match[1].strip()
        image_name = match[2].strip()
        # KLD, SIM, NSS 값은 음수일 수 있으므로 float으로 변환
        kld = float(match[3])
        sim = float(match[4])
        nss = float(match[5])
        
        extracted_data.append({
            "Action": action,
            "Object": object_name,
            "Image_Name": image_name,
            "KLD": kld,
            "SIM": sim,
            "NSS": nss
        })
        
    return extracted_data

In [9]:
log_file_path = '/home/bongo/porter_notebook/research/new_qwen_AG/32B_prompt5_real/ego_prompt5.log'

data = parse_log_file(log_file_path)

if data:
    # Pandas DataFrame으로 변환하여 보기 좋게 출력
    df = pd.DataFrame(data)
df.sort_values("KLD")

Unnamed: 0,Action,Object,Image_Name,KLD,SIM,NSS
106,beat,drum,drum_002586.jpg,0.1601,0.7903,1.1019
63,kick,soccer_ball,soccer_ball_001588.jpg,0.1673,0.7904,2.3367
37,catch,soccer_ball,soccer_ball_003333.jpg,0.1695,0.7979,1.2597
115,eat,broccoli,broccoli_002796.jpg,0.1803,0.7848,1.6551
7,peel,apple,apple_001541.jpg,0.2171,0.7586,1.0968
...,...,...,...,...,...,...
2,jump,surfboard,surfboard_000658.jpg,5.2312,0.1595,-0.1963
36,sip,wine_glass,wine_glass_003343.jpg,6.7315,0.0159,-0.3904
48,open,suitcase,suitcase_000520.jpg,7.4001,0.0289,-0.5250
84,hold,golf_clubs,golf_clubs_000045.jpg,10.4803,0.0118,-0.4968


In [10]:
df['KLD'].mean()

np.float64(1.6638669421487604)

In [11]:
df.sort_values("KLD").tail(10)

Unnamed: 0,Action,Object,Image_Name,KLD,SIM,NSS
25,type_on,keyboard,keyboard_000439.jpg,3.6809,0.4701,0.8639
46,open,oven,oven_001370.jpg,4.1811,0.245,0.2718
38,catch,frisbee,frisbee_000598.jpg,4.3516,0.2653,-0.0774
86,hold,fork,fork_000804.jpg,4.8942,0.1647,0.1472
55,hit,hammer,hammer_001006.jpg,4.9984,0.1378,0.1827
2,jump,surfboard,surfboard_000658.jpg,5.2312,0.1595,-0.1963
36,sip,wine_glass,wine_glass_003343.jpg,6.7315,0.0159,-0.3904
48,open,suitcase,suitcase_000520.jpg,7.4001,0.0289,-0.525
84,hold,golf_clubs,golf_clubs_000045.jpg,10.4803,0.0118,-0.4968
109,push,bicycle,bicycle_002432.jpg,12.0479,0.0006,-0.6111


In [12]:
df.to_csv("prompt5_metrics.csv")