## Reminder
把所有annotations放到 ./b2c 資料夾下

In [2]:
import pandas as pd
import os

# 定義資料夾路徑
pair_folder = './b2c'
data_csv = 'data.csv'
output_folder = './output'

# 讀取 data.csv
print("Reading data.csv...")
data_df = pd.read_csv(data_csv)

# 確保輸出資料夾存在
if not os.path.exists(output_folder):
    os.makedirs(output_folder)

# 取得所有搜尋詞
search_terms = data_df['搜尋詞'].unique()

# 逐一處理 ./c2c 資料夾中的所有 pair.csv
for search_term in search_terms:
    # 針對當前的搜尋詞，篩選出對應的 data
    search_data = data_df[data_df['搜尋詞'] == search_term].reset_index(drop=True)
    
    # 取得 A 商品與 B 商品數量
    unique_A = search_data['商品名稱A']
    unique_B = search_data['商品名稱B']
    num_A = len(unique_A)
    num_B = len(unique_B)
    
    # 生成 A*B 大小的 search_data
    search_data_expanded = pd.DataFrame(
        [(a, b) for a in unique_A for b in unique_B],
        columns=['商品名稱A', '商品名稱B']
    )
    
    # 合併其他原始欄位
    search_data_expanded = search_data_expanded.merge(
        search_data.drop_duplicates(subset=['商品名稱A'])[['商品名稱A', '網頁A', '價格A']],
        on='商品名稱A', how='left'
    ).merge(
        search_data.drop_duplicates(subset=['商品名稱B'])[['商品名稱B', '網頁B', '價格B']],
        on='商品名稱B', how='left'
    )
    search_data_expanded['搜尋詞'] = search_term

    # 取得 pair.csv 的檔案名稱
    pair_csv_path = os.path.join(pair_folder, f'{search_term}_annotations.csv')
    if not os.path.exists(pair_csv_path):
        print(f"Pair CSV not found for search term '{search_term}', skipping...")
        continue
    
    # 讀取 pair.csv
    print(f"Processing {pair_csv_path}...")
    pair_df = pd.read_csv(pair_csv_path, index_col=0)
    
    # 修正: 如果 pair_df 的維度與 num_A, num_B 不匹配，裁剪或擴展 pair_df 以符合
    if pair_df.shape != (num_A, num_B):
        print(f"Warning: Dimension mismatch for search term '{search_term}', expected ({num_A}, {num_B}) but got {pair_df.shape}")
        # 調整 pair_df 的大小以符合 num_A x num_B
        pair_df = pair_df.iloc[:num_A, :num_B]
        pair_df = pair_df.reindex(index=range(num_A), columns=range(num_B), fill_value='x')

    # 展開 pair_df 矩陣成為一維的列表
    labels = pair_df.values.flatten()
    
    # 將標籤結果加到原始的 search_data_expanded 中
    search_data_expanded['label'] = labels
    
    # 儲存結果到新的 CSV
    output_csv_path = os.path.join(output_folder, f'{search_term}_labeled.csv')
    search_data_expanded.to_csv(output_csv_path, index=False)
    print(f"Output saved to {output_csv_path}")

print("Processing completed.")


Reading data.csv...
Processing ./b2c\前級擴大+dac_annotations.csv...
Output saved to ./output\前級擴大+dac_labeled.csv
Processing ./b2c\dyah93-a900a8ng7_annotations.csv...
Output saved to ./output\dyah93-a900a8ng7_labeled.csv
Processing ./b2c\子母扣_annotations.csv...
Output saved to ./output\子母扣_labeled.csv
Processing ./b2c\emount sony_annotations.csv...
Output saved to ./output\emount sony_labeled.csv
Processing ./b2c\電茶壺_annotations.csv...
Output saved to ./output\電茶壺_labeled.csv
Processing ./b2c\under armour_annotations.csv...
Output saved to ./output\under armour_labeled.csv
Processing ./b2c\iphone 13保護貼_annotations.csv...
Output saved to ./output\iphone 13保護貼_labeled.csv
Processing ./b2c\詹粉_annotations.csv...
Output saved to ./output\詹粉_labeled.csv
Processing ./b2c\手機包,斜背包_annotations.csv...
Output saved to ./output\手機包,斜背包_labeled.csv
Processing ./b2c\口水巾_annotations.csv...
Output saved to ./output\口水巾_labeled.csv
Pair CSV not found for search term '寵物衣服', skipping...
Pair CSV not found fo