In [None]:
import pandas as pd
from mlxtend.frequent_patterns import fpgrowth
from mlxtend.preprocessing import TransactionEncoder
import os
import time
import csv
from tqdm import tqdm  

def export_to_file(data, output_file_name):
    """將資料輸出到文件"""
    with open(output_file_name, "w", newline='') as f:
        writer = csv.writer(f, delimiter=',')
        for row in data:
            writer.writerow(row)

def data_from_file(fname):
    """從 CSV 文件讀取數據，並返回交易數據列表"""
    transactions = []
    df = pd.read_csv(fname)
    for _, row in tqdm(df.iterrows(), total=df.shape[0], desc="Processing Rows"):  # 使用進度條
        exceeded_indicators = row['Exceeded_Indicators']
        if isinstance(exceeded_indicators, str):
            exceeded_indicators = exceeded_indicators.strip('[]').replace("'", "").split(", ")
            transactions.append(exceeded_indicators)
    return transactions

def write_frequent_itemsets_to_file(frequent_itemsets, total_transactions, filename='result_file_1.txt'):
    """輸出頻繁項集到文件，支持度以百分比表示，並保留到小數點後一位"""
    with open(filename, 'w') as f:
        # 寫入標題行
        f.write("support_percentage\titem_set\n")
        for _, row in frequent_itemsets.iterrows():
            item_set_str = f"{{{', '.join(map(str, row['itemsets']))}}}"
            support_percentage = round(row['support'] * 100, 1)
            f.write(f"{support_percentage}\t{item_set_str}\n")


# 設定參數
input_filename = '../exceeded_indicators_data.csv'  # 輸入檔案名稱
min_support = 0.08  # 最小支持度
output_path = '.'  # 輸出路徑

# 讀取數據並轉換格式
transactions = data_from_file(input_filename)
total_transactions = len(transactions)

# 開始計時
start_time = time.time()

te = TransactionEncoder()
te_ary = te.fit(transactions).transform(transactions)
df = pd.DataFrame(te_ary, columns=te.columns_)

# 使用 mlxtend 中的 fpgrowth 方法 mining frequent_itemsets
frequent_itemsets = fpgrowth(df, min_support=min_support, use_colnames=True)

frequent_itemsets = frequent_itemsets.sort_values(by='support', ascending=False)

# 結束計時
end_time = time.time()
elapsed_time = end_time - start_time

# 輸出路徑
os.makedirs(output_path, exist_ok=True)
result_file = f"{output_path}/air_pollutants_{min_support}_result1.txt"
count_file = f"{output_path}/air_pollutants_{min_support}_result2.txt"

# 將結果輸出到文件
write_frequent_itemsets_to_file(frequent_itemsets, total_transactions, result_file)
with open(count_file, 'w') as f:
    f.write(f"{len(frequent_itemsets)}\n")

# 輸出計算時間
print(f"FP-growth 計算時間: {elapsed_time:.2f} 秒")
print(f"FP-growth 計算完成，結果保存在 {result_file} 和 {count_file}")


Processing Rows: 100%|██████████| 347891/347891 [00:12<00:00, 26903.87it/s]


FP-growth 計算時間: 1.04 秒
FP-growth 計算完成，結果保存在 ./air_pollutants_0.08_result1.txt 和 ./air_pollutants_0.08_result2.txt
