In [None]:
import pandas as pd

# 원본 CSV 불러오기
df = pd.read_csv("train_data.csv")

# video_id 컬럼 만들기
df["video_id"] = df["video_name_with_frame"].apply(
    lambda x: x.split("_frame_")[0].replace(".mp4", "").replace(".jpg", "")
)

# 그룹화: video_id 기준으로 첫 번째 값만 사용 (모두 동일하다고 가정)
grouped_df = df.groupby("video_id").agg({
    "accident_negligence_rateA": "first",
    "accident_negligence_rateB": "first",
    "accident_object": "first",
    "accident_place": "first",
    "accident_place_feature": "first",
    "vehicle_a_progress_info": "first",
    "vehicle_b_progress_info": "first",
    "filming_way": "first",
    "video_point_of_view": "first"
}).reset_index()

# 저장
grouped_df.to_csv("train_data_grouped_with_info.csv", index=False)
print("그룹화된 데이터 저장 완료: train_data_grouped_with_info.csv")


In [None]:
def extract_classes(txt_path):
    with open(txt_path) as f:
        return {int(line.strip().split()[1]) for line in f}


feat_all = sorted(extract_classes("tsn_dataset/train_accident_place_feature.txt")                 
                | extract_classes("tsn_dataset/val_accident_place_feature.txt")
                | extract_classes("tsn_dataset/test_accident_place_feature.txt"))  # train ∪ val ∪ test

a_all = sorted(extract_classes("tsn_dataset/train_vehicle_a_progress_info.txt")
               | extract_classes("tsn_dataset/val_vehicle_a_progress_info.txt")
               | extract_classes("tsn_dataset/test_vehicle_a_progress_info.txt"))

b_all = sorted(extract_classes("tsn_dataset/train_vehicle_b_progress_info.txt")
               | extract_classes("tsn_dataset/val_vehicle_b_progress_info.txt")
               | extract_classes("tsn_dataset/test_vehicle_b_progress_info.txt"))

print("전체 feat 클래스 수:", len(feat_all))
print("전체 A 클래스 수:", len(a_all))
print("전체 B 클래스 수:", len(b_all))


feat_to_new = {orig:i for i, orig in enumerate(feat_all)}
a_to_new = {orig:i for i, orig in enumerate(a_all)}
b_to_new = {orig:i for i, orig in enumerate(b_all)}


In [None]:
def remap_labels(in_path, out_path, mapping):
    with open(in_path) as fin, open(out_path, "w") as fout:
        for line in fin:
            vid, orig = line.strip().split()
            orig = int(orig)
            if orig not in mapping:
                continue
            new = mapping[orig]
            fout.write(f"{vid} {new}\n")

# train
remap_labels(
    "tsn_dataset/train_accident_place_feature.txt",
    "tsn_dataset/train_accident_place_feature_mapped.txt",
    feat_to_new
)

remap_labels(
    "tsn_dataset/train_vehicle_a_progress_info.txt",
    "tsn_dataset/train_vehicle_a_progress_info_mapped.txt",
    a_to_new
)

remap_labels(
    "tsn_dataset/train_vehicle_b_progress_info.txt",
    "tsn_dataset/train_vehicle_b_progress_info_mapped.txt",
    b_to_new
)

# val
remap_labels(
    "tsn_dataset/val_accident_place_feature.txt",
    "tsn_dataset/val_accident_place_feature_mapped.txt",
    feat_to_new
)

remap_labels(
    "tsn_dataset/val_vehicle_a_progress_info.txt",
    "tsn_dataset/val_vehicle_a_progress_info_mapped.txt",
    a_to_new
)

remap_labels(
    "tsn_dataset/val_vehicle_b_progress_info.txt",
    "tsn_dataset/val_vehicle_b_progress_info_mapped.txt",
    b_to_new
)

# test
remap_labels(
    "tsn_dataset/test_accident_place_feature.txt",
    "tsn_dataset/test_accident_place_feature_mapped.txt",
    feat_to_new
)

remap_labels(
    "tsn_dataset/test_vehicle_a_progress_info.txt",
    "tsn_dataset/test_vehicle_a_progress_info_mapped.txt",
    a_to_new
)

remap_labels(
    "tsn_dataset/test_vehicle_b_progress_info.txt",
    "tsn_dataset/test_vehicle_b_progress_info_mapped.txt",
    b_to_new
)

