In [None]:
import os
import re
from pathlib import Path

import orjson
import pandas as pd
from deep_translator import GoogleTranslator


def extract_data(root_path: str):
    filenames = Path(root_path).rglob("*.csv")
    game_info = pd.DataFrame()
    for filename in filenames:
        data = pd.read_csv(filename)
        data = data.dropna(subset=["Name"])
        game_info = pd.concat([game_info, data], axis=0)

    filenames = Path(root_path).rglob("*.csv")
    parent_dir = Path(root_path).parent
    output_path = f"./{parent_dir}/final_data"
    os.makedirs(output_path, exist_ok=True)
    data = pd.concat([pd.read_csv(filename) for filename in filenames], ignore_index=True)
    groups = data.groupby("Name")

    search_dict = {}
    for name, group in groups:
        group = group.dropna()
        cleaned_name = re.sub(r"[^\w\s]", "", name)
        cleaned_name = re.sub(r"\s+", "_", cleaned_name)
        search_dict[name] = cleaned_name
        group.to_csv(f"./{output_path}/{cleaned_name}.csv", index=False)
    search_dict = orjson.dumps(search_dict)
    with open("./data/search_dict.json", "wb") as f:
        f.write(search_dict)


root_path = "./data/price_details"
extract_data(root_path)

In [None]:
import orjson

with open("./data/search_dict.json", "rb") as f:
    search_dict = orjson.loads(f.read())  # type: dict[str, str]
list(search_dict.keys())

In [None]:
from deep_translator import GoogleTranslator

translated = GoogleTranslator(source="auto", target="zh-TW").translate("NA")
translated