In [2]:
import pandas as pd
import json

# 1. CSV 파일 불러오기
df = pd.read_csv("test.csv", header=None)

# 2. 열 이름 지정
df.columns = ["상품ID", "문장ID", "원문", "무시", "aspect span", "opinion span", "sentiment"]

# 3. uid(상품ID_문장ID) 생성
df["uid"] = df["상품ID"].astype(str) + "_" + df["문장ID"].astype(str)

# 4. sentiment → polarity 매핑 함수
def sentiment_to_polarity(sent):
    if sent == 1:
        return "POS"
    elif sent == 0:
        return "NEG"
    else:
        return "NEU"

# 5. 최종 딕셔너리 구조 생성
final_dict = {}

for product_id, group_df in df.groupby("상품ID"):
    product_dict = {}

    for uid, sub_group in group_df.groupby("uid"):
        sentence = sub_group.iloc[0]["원문"].strip()
        entry = {
            "sentence": sentence,
            "triples": []
        }

        for _, row in sub_group.iterrows():
            triple = {
                "aspect_span": row["aspect span"],
                "opinion_span": row["opinion span"],
                "polarity": sentiment_to_polarity(row["sentiment"])
            }
            entry["triples"].append(triple)

        product_dict[uid] = entry

    final_dict[str(product_id)] = product_dict

# 6. 결과 저장
with open("train.txt", "w", encoding="utf-8") as f:
    json.dump(final_dict, f, ensure_ascii=False, indent=2)

print("✅ train.txt 저장 완료!")


✅ train.txt 저장 완료!
