In [None]:
import os
import re
import pandas as pd

# ==== 檔案路徑自己改一下 ====
input_csv = r"C:\Users\USER\Desktop\CIS533\Final\Regression\All\songs.csv"
output_txt_folder = r"C:\Users\USER\Desktop\CIS533\Final\Regression\All\lyrics_txt"
output_mapping_csv = r"C:\Users\USER\Desktop\CIS533\Final\Regression\All\lyrics_playcount.csv"
# ============================

# 讀取原始 CSV
df = pd.read_csv(input_csv, encoding="utf-8")

# 確保輸出資料夾存在
os.makedirs(output_txt_folder, exist_ok=True)

mapping = []  # 之後拿來做 FileName / PlayCount 的 CSV

def make_safe_filename(name: str) -> str:
    """把歌名變成合法檔名（Windows 不允許的字元去掉）"""
    name = str(name).strip()
    # 移除 \ / : * ? " < > | 等字元
    name = re.sub(r'[\\/:*?"<>|]', "_", name)
    # 避免太長
    if len(name) == 0:
        name = "untitled"
    return name

used_names = set()

for idx, row in df.iterrows():
    song_name = row.get("SongName", f"song_{idx}")
    lyrics = row.get("Lyrics", "")
    playcount = row.get("PlayCount", 0)

    base_name = make_safe_filename(song_name)

    # 如果檔名重複，在後面加 _1, _2, ...
    filename = base_name + ".txt"
    counter = 1
    while filename in used_names:
        filename = f"{base_name}_{counter}.txt"
        counter += 1
    used_names.add(filename)

    # 寫出歌詞檔
    txt_path = os.path.join(output_txt_folder, filename)
    with open(txt_path, "w", encoding="utf-8") as f:
        f.write(str(lyrics))

    # 記錄對應關係
    mapping.append({"FileName": filename, "PlayCount": playcount})

# 產生對應的 CSV
mapping_df = pd.DataFrame(mapping)
mapping_df.to_csv(output_mapping_csv, index=False, encoding="utf-8")

print("完成！")
print("歌詞 txt 資料夾：", output_txt_folder)
print("對應表 CSV：", output_mapping_csv)