In [1]:
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer
import json

# =========================
# 1) 데이터 로드
# =========================
df = pd.read_csv("all_labs_merged.csv")

# =========================
# 2) Action vs Observation 매칭 정의
# =========================
pair_cols = [
    ("action/target_cartesian_position_col0", "observation/robot_state/cartesian_position_col0"),
    ("action/target_cartesian_position_col1", "observation/robot_state/cartesian_position_col1"),
    ("action/target_cartesian_position_col2", "observation/robot_state/cartesian_position_col2"),
    ("action/joint_velocity_col0", "observation/robot_state/joint_velocities_col0"),
    ("action/joint_velocity_col1", "observation/robot_state/joint_velocities_col1"),
    ("action/joint_velocity_col2", "observation/robot_state/joint_velocities_col2"),
]

# 오차 컬럼 생성
for act, obs in pair_cols:
    df[f"err::{act.split('/')[-1]}"] = df[act] - df[obs]

# =========================
# 2-1) 추가 통계용 단일 컬럼 정의
# =========================
extra_cols = [
    "observation/robot_state/prev_command_successful",
    "action/robot_state/prev_controller_latency_ms",
    "action/robot_state/prev_controller_latency_ms"  # 중복으로 주셨는데, 그대로 넣었습니다
]

# =========================
# 3) 세션별 요약 통계 생성
# =========================
def summarize_session(session_df):
    obs_vals = session_df[[c for _, c in pair_cols]].values.flatten()
    err_vals = session_df[[f"err::{act.split('/')[-1]}" for act, _ in pair_cols]].values.flatten()
    
    # 추가 컬럼 통계
    extras = {}
    for c in extra_cols:
        extras[f"{c}::mean"] = float(np.nanmean(session_df[c]))
        extras[f"{c}::std"] = float(np.nanstd(session_df[c]))
        extras[f"{c}::min"] = float(np.nanmin(session_df[c]))
        extras[f"{c}::max"] = float(np.nanmax(session_df[c]))
    
    return pd.Series({
        "obs_mean": float(np.nanmean(obs_vals)),
        "obs_std": float(np.nanstd(obs_vals)),
        "obs_range": float(np.nanmax(obs_vals) - np.nanmin(obs_vals)),
        "err_mean": float(np.nanmean(err_vals)),
        "err_std": float(np.nanstd(err_vals)),
        "err_max": float(np.nanmax(err_vals)),
        **extras
    })

stats = df.groupby("session_id").apply(summarize_session).reset_index()

# =========================
# 4) video_summary 붙이기
# =========================
session_summary = df.groupby("session_id")["video_summary"].first().reset_index()
merged = pd.merge(stats, session_summary, on="session_id", how="left")

# =========================
# 5) 임베딩 생성 (KoE5 + DistilUSE)
# =========================
koe5 = SentenceTransformer("nlpai-lab/KoE5")
distiluse = SentenceTransformer("sentence-transformers/distiluse-base-multilingual-cased-v1")

summaries = merged["video_summary"].fillna("").tolist()
embeddings_koe5 = koe5.encode(summaries, convert_to_numpy=True, normalize_embeddings=True)
embeddings_distiluse = distiluse.encode(summaries, convert_to_numpy=True, normalize_embeddings=True)

merged["embedding_koe5"] = embeddings_koe5.tolist()
merged["embedding_distiluse"] = embeddings_distiluse.tolist()

# =========================
# 6) Document 변환
# =========================
docs = []
for row in merged.to_dict(orient="records"):
    doc = {
        "session_id": row["session_id"],
        "video_summary": row["video_summary"],
        "observation_stats": {
            "mean": row["obs_mean"],
            "std": row["obs_std"],
            "range": row["obs_range"]
        },
        "error_stats": {
            "mean": row["err_mean"],
            "std": row["err_std"],
            "max": row["err_max"]
        },
        "extra_stats": {
            col: {  # mean, std, min, max 묶어서 넣기
                "mean": row[f"{col}::mean"],
                "std": row[f"{col}::std"],
                "min": row[f"{col}::min"],
                "max": row[f"{col}::max"]
            }
            for col in extra_cols
        },
        "embedding_koe5": row["embedding_koe5"],
        "embedding_distiluse": row["embedding_distiluse"]
    }
    docs.append(doc)

# 샘플 출력
print(json.dumps(docs[0], ensure_ascii=False, indent=2))


  from .autonotebook import tqdm as notebook_tqdm
  stats = df.groupby("session_id").apply(summarize_session).reset_index()


{
  "session_id": "Fri_Aug_18_12_06_27_2023",
  "video_summary": "박스에 접근하던 중 급격한 움직임과 함께 충돌이 있었음",
  "observation_stats": {
    "mean": 0.18160002207098427,
    "std": 0.349257623880365,
    "range": 2.0307194590568542
  },
  "error_stats": {
    "mean": -0.0071375970548648035,
    "std": 0.2607038239094778,
    "max": 0.8785010613501072
  },
  "extra_stats": {
    "observation/robot_state/prev_command_successful": {
      "mean": 1.0,
      "std": 0.0,
      "min": 1.0,
      "max": 1.0
    },
    "action/robot_state/prev_controller_latency_ms": {
      "mean": 0.22838087046616956,
      "std": 0.032017270897274214,
      "min": 0.1515810042619705,
      "max": 0.3353759944438934
    }
  },
  "embedding_koe5": [
    0.0508526973426342,
    0.004953642841428518,
    0.025171950459480286,
    0.04261196777224541,
    0.026671884581446648,
    0.057786572724580765,
    -0.04907063767313957,
    -0.0014286170480772853,
    0.012571543455123901,
    0.0006717714131809771,
    0.03041755221

In [8]:
import pandas as pd

In [10]:
df1 = pd.read_csv("/home/user2/문서/agentApp/backend/app/data/all_labs_merged.csv")


In [11]:
df1['action/robot_state/prev_controller_latency_ms']

0        0.209516
1        0.214596
2        0.221335
3        0.212461
4        0.230750
           ...   
20011    0.210628
20012    0.258867
20013    0.207239
20014    0.212120
20015    0.212892
Name: action/robot_state/prev_controller_latency_ms, Length: 20016, dtype: float64

In [26]:
from pathlib import Path
import re

BASE_DIR = Path("/home/user2/문서/agentApp")




def normalize_session_id(session_id: str) -> str:
    """
    Fri_Aug_18_12_06_27_2023 → Fri_Aug_18_12:06:27_2023
    """
    return re.sub(
        r'_(\d{2})_(\d{2})_(\d{2})_(\d{4})$',
        lambda m: f"_{m.group(1)}:{m.group(2)}:{m.group(3)}_{m.group(4)}",
        session_id
    )
def find_video_path(session_id: str, camera_id: str) -> str | None:
    real_session_id = normalize_session_id(session_id)
    print(f"[dbg] 입력 session_id: {session_id}")
    print(f"[dbg] 변환 session_id: {real_session_id}")

    for p in BASE_DIR.rglob(f"{camera_id}.mp4"):
        if real_session_id in str(p):
            print(f"✅ 후보 매칭: {p}")
            return str(p)
    return None



# ===== 테스트 =====
session_id = "Fri_Aug_18_12_06_27_2023"   # 엘라스틱 저장 버전
camera_id = "22008760"

video_path = find_video_path(session_id, camera_id)

if video_path:
    print("✅ 찾음:", video_path)
else:
    print("❌ 파일 없음")


[dbg] 입력 session_id: Fri_Aug_18_12_06_27_2023
[dbg] 변환 session_id: Fri_Aug_18_12:06:27_2023
✅ 후보 매칭: /home/user2/문서/agentApp/AUTOLab_2gb_sessions/failure/2023-08-18/Fri_Aug_18_12:06:27_2023/Fri_Aug_18_12:06:27_2023/recordings/MP4/22008760.mp4
✅ 찾음: /home/user2/문서/agentApp/AUTOLab_2gb_sessions/failure/2023-08-18/Fri_Aug_18_12:06:27_2023/Fri_Aug_18_12:06:27_2023/recordings/MP4/22008760.mp4


In [14]:
df1[df1['session_id'] == session_id]

Unnamed: 0,action/cartesian_velocity_col0,action/cartesian_velocity_col1,action/cartesian_velocity_col2,action/cartesian_velocity_col3,action/cartesian_velocity_col4,action/cartesian_velocity_col5,action/gripper_velocity,action/joint_velocity_col0,action/joint_velocity_col1,action/joint_velocity_col2,...,observation/robot_state/prev_joint_torques_computed_safened_col3,observation/robot_state/prev_joint_torques_computed_safened_col4,observation/robot_state/prev_joint_torques_computed_safened_col5,observation/robot_state/prev_joint_torques_computed_safened_col6,camera_id,session_id,desc_major,object_text,video_summary,lab_name
323,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.095618,0.024538,-0.095145,...,2.645140,-0.053135,0.808138,0.035792,22008760,Fri_Aug_18_12_06_27_2023,로봇이 자세/정렬을 조정하고 있습니다.,박스,박스에 접근하던 중 급격한 움직임과 함께 충돌이 있었음,AUTOLab
324,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.095407,0.024527,-0.094815,...,1.996317,-0.035019,0.644145,0.038137,22008760,Fri_Aug_18_12_06_27_2023,로봇이 자세/정렬을 조정하고 있습니다.,박스,박스에 접근하던 중 급격한 움직임과 함께 충돌이 있었음,AUTOLab
325,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.095237,0.024516,-0.094587,...,1.954741,-0.032078,0.623744,0.035265,22008760,Fri_Aug_18_12_06_27_2023,로봇이 자세/정렬을 조정하고 있습니다.,박스,박스에 접근하던 중 급격한 움직임과 함께 충돌이 있었음,AUTOLab
326,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.095251,0.024519,-0.094590,...,1.993170,-0.030712,0.645497,0.040840,22008760,Fri_Aug_18_12_06_27_2023,로봇이 자세/정렬을 조정하고 있습니다.,박스,박스에 접근하던 중 급격한 움직임과 함께 충돌이 있었음,AUTOLab
327,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.095240,0.024516,-0.094577,...,2.064182,-0.045970,0.656759,0.035965,22008760,Fri_Aug_18_12_06_27_2023,로봇이 자세/정렬을 조정하고 있습니다.,박스,박스에 접근하던 중 급격한 움직임과 함께 충돌이 있었음,AUTOLab
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
394,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000,...,0.196622,0.004893,0.056496,0.005410,22008760,Fri_Aug_18_12_06_27_2023,로봇이 대상을 내려놓고 있습니다.,박스,박스에 접근하던 중 급격한 움직임과 함께 충돌이 있었음,AUTOLab
395,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000,...,0.197159,-0.001055,0.055686,-0.000518,22008760,Fri_Aug_18_12_06_27_2023,로봇이 대상을 내려놓고 있습니다.,박스,박스에 접근하던 중 급격한 움직임과 함께 충돌이 있었음,AUTOLab
396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000,...,0.219174,-0.005902,0.064883,-0.002370,22008760,Fri_Aug_18_12_06_27_2023,로봇이 대상을 내려놓고 있습니다.,박스,박스에 접근하던 중 급격한 움직임과 함께 충돌이 있었음,AUTOLab
397,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000,...,0.214137,3.170736,5.403646,-0.346027,22008760,Fri_Aug_18_12_06_27_2023,로봇이 대상을 내려놓고 있습니다.,박스,박스에 접근하던 중 급격한 움직임과 함께 충돌이 있었음,AUTOLab
