### 计算每个股票的30min总成交额

In [1]:
import pandas as pd
import os
import threading
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor,as_completed

def process_stock_data(file_name, input_folder, output_folder):
    # 检查输出文件是否已存在
    output_file_path = os.path.join(output_folder, file_name)
    if os.path.exists(output_file_path):
        return
    
    # 读取数据
    df = pd.read_csv(os.path.join(input_folder, file_name))
    
    # 按30行分组并处理数据
    result = []
    for i in range(0, len(df), 30):
        batch = df.iloc[i:i+30]
        if not batch.empty:
            total_amount = batch['amount'].sum()
            timestamp = batch.iloc[-1]['trade_time']
            result.append([timestamp, total_amount])
    
    # 保存到新的DataFrame中
    processed_df = pd.DataFrame(result, columns=['trade_time', 'total_amount'])
    
    # 写入到新文件中
    processed_df.to_csv(os.path.join(output_folder, file_name), index=False)

def main(input_folder, output_folder):
    files = os.listdir(input_folder)

    # 确保输出文件夹存在
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    # 使用线程池执行任务
    with ThreadPoolExecutor(max_workers=os.cpu_count() - 5) as executor:
        futures = [executor.submit(process_stock_data, file, input_folder, output_folder) for file in files]
        
        # 使用tqdm显示进度条
        for future in tqdm(as_completed(futures), total=len(files)):
            future.result()

if __name__ == "__main__":
    input_folder = 'data/by_stock_merged'  # 修改为您的输入文件夹路径
    output_folder = 'data/v30/by_stock_v30'  # 修改为您的输出文件夹路径
    main(input_folder, output_folder)


 28%|██▊       | 1427/5100 [1:58:24<7:40:53,  7.53s/it] 