# {{ cookiecutter.project_name }}

{{ cookiecutter.description }}

创建时间: {% now 'utc', '%Y-%m-%d %H:%M:%S' %}
作者: {{ cookiecutter.author_name }}

## 1. 导入依赖

In [None]:
from batch_processor import BatchProcessor
from config import DB_CONFIG, DATA_CONFIG, CACHE_CONFIG
import pandas as pd
import requests
import json
from typing import Dict, Any
from cachetools import cached, LRUCache
from collections import defaultdict

# 如果启用缓存，创建缓存装饰器
if CACHE_CONFIG['enable']:
    cache = LRUCache(maxsize=CACHE_CONFIG['size'])
    api_cache = cached(cache)
else:
    # 空装饰器，不使用缓存
    def api_cache(func):
        return func

## 2. 定义处理器类

**需要实现以下3个方法:**
1. `get_data_source()` - 定义数据源
2. `define_schema()` - 定义表结构  
3. `process_business_logic()` - 实现批量业务处理逻辑

**可选实现:**
4. `fetch_external_data()` - 如需要调用外部API，可重写此方法

In [None]:
class {{ cookiecutter.project_slug.title().replace('_', '') }}Processor(BatchProcessor):
    
    def get_data_source(self):
        """
        定义数据源
        
        返回CSV文件路径或DataFrame对象
        """
        return DATA_CONFIG['data_file']
    
    def define_schema(self) -> Dict[str, list]:
        """
        定义表结构字段
        
        TODO: 根据你的需求修改以下字段定义
        """
        return {
            'control_fields': [
                'is_processed',    # 处理状态(系统字段，勿删)
                'retry_count'      # 重试次数(系统字段，勿删)
            ],
            'result_fields': [
                'result1',         # TODO: 替换为你的结果字段1
                'result2',         # TODO: 替换为你的结果字段2  
                'result3'          # TODO: 替换为你的结果字段3
            ]
        }
    
    {% if cookiecutter.enable_cache == 'y' %}@api_cache
    def _fetch_api_data_cached(self, query_params_tuple):
        """
        带缓存的API调用方法 - 内部方法
        
        Args:
            query_params_tuple: 查询参数元组(用于缓存)
            
        Returns:
            API响应数据
        """
        # 将元组转换回列表
        query_params = list(query_params_tuple)
        
        # TODO: 替换为你的实际API调用逻辑
        # api_url = 'https://api.example.com/your-endpoint'
        # headers = {'Content-Type': 'application/json'}
        # payload = json.dumps(query_params)
        # response = requests.post(api_url, headers=headers, data=payload, timeout=30)
        # 
        # if response.status_code == 200:
        #     return response.json()
        # else:
        #     self.logger.warning(f"API调用失败: {response.status_code}")
        #     return {}
        
        return {}{% endif %}
    
    def fetch_external_data(self, batch_data: pd.DataFrame) -> Dict[str, Any]:
        """
        获取外部数据(如API调用) - 可选实现
        
        Args:
            batch_data: 当前批次的数据
            
        Returns:
            外部数据字典
            
        TODO: 如果需要调用外部API，请实现此方法
        """
        try:
            # 示例: 批量API调用
            # 获取需要查询的参数(如订单号、用户ID等)
            # query_params = batch_data['your_query_field'].unique().tolist()
            
            {% if cookiecutter.enable_cache == 'y' %}# 使用缓存方法
            # return self._fetch_api_data_cached(tuple(query_params)){% else %}# 直接API调用
            # api_url = 'https://api.example.com/your-endpoint'
            # headers = {'Content-Type': 'application/json'}
            # payload = json.dumps(query_params)
            # response = requests.post(api_url, headers=headers, data=payload, timeout=30)
            # 
            # if response.status_code == 200:
            #     return response.json()
            # else:
            #     self.logger.warning(f"API调用失败: {response.status_code}")
            #     return {}{% endif %}
            
            # TODO: 替换为你的实际API调用逻辑
            return {}
            
        except Exception as e:
            self.logger.error(f"API调用异常: {str(e)}")
            return {}
    
    def process_business_logic(self, batch_data: pd.DataFrame) -> pd.DataFrame:
        """
        处理业务逻辑 - 批量处理
        
        Args:
            batch_data: 当前批次的数据DataFrame
            
        Returns:
            处理后的DataFrame，必须包含结果字段的值
            
        TODO: 实现你的核心业务处理逻辑
        """
        try:
            # 如果需要调用外部API，可以取消下面这行注释
            # external_data = self.fetch_external_data(batch_data)
            
            # 处理每一行或批量处理
            for idx, row in batch_data.iterrows():
                # TODO: 实现你的业务逻辑
                # 示例业务逻辑
                # user_id = row['user_id']
                # order_number = row['order_number']
                
                # 执行计算、比较、判断等业务逻辑
                # result1_value = some_calculation(row)
                # result2_value = some_comparison(row, external_data)
                # result3_value = some_other_logic(row)
                
                # 设置结果值 (替换为你的实际逻辑)
                batch_data.loc[idx, 'result1'] = ''  # TODO: 设置实际的结果值
                batch_data.loc[idx, 'result2'] = ''  # TODO: 设置实际的结果值
                batch_data.loc[idx, 'result3'] = ''  # TODO: 设置实际的结果值
            
            return batch_data
            
        except Exception as e:
            self.logger.error(f"业务逻辑处理异常: {str(e)}")
            # 返回原始数据，结果字段为空
            for field in self.define_schema()['result_fields']:
                batch_data[field] = ''
            return batch_data

## 3. 执行批处理

### 调试模式 (推荐)
在开发阶段，建议先使用调试模式测试业务逻辑，避免处理大量数据时出错：

### 生产模式
确认业务逻辑正确后，处理全部数据：

In [None]:
# 生产模式: 处理全部数据
# processor = {{ cookiecutter.project_slug.title().replace('_', '') }}Processor(
#     batch_size=DB_CONFIG['batch_size'],
#     table_name=DB_CONFIG['table_name'], 
#     db_name=DB_CONFIG['db_name'],
#     max_retries=DB_CONFIG['max_retries']
# )
# processor.run()  # 不传参数，处理全部数据

In [None]:
# 创建处理器实例
processor = {{ cookiecutter.project_slug.title().replace('_', '') }}Processor(
    batch_size=DB_CONFIG['batch_size'],
    table_name=DB_CONFIG['table_name'], 
    db_name=DB_CONFIG['db_name'],
    max_retries=DB_CONFIG['max_retries']
)

# 🔧 调试模式: 只处理2个批次，用于测试业务逻辑
processor.run(debug_batch_times=2)

# ⚠️ 确认业务逻辑正确后，使用下面的命令处理全部数据:
# processor.run()

## 4. 查看处理结果

In [None]:
# 获取处理统计
stats = processor.get_statistics()
print("处理统计:")
print(f"总记录数: {stats['total']}")
print(f"已处理: {stats['processed']}")
print(f"待处理: {stats['pending']}")
print(f"处理失败: {stats['failed']}")

In [None]:
# 查看处理结果样本
import sqlite3
conn = sqlite3.connect(DB_CONFIG['db_name'])
sample_df = pd.read_sql(f"SELECT * FROM {DB_CONFIG['table_name']} WHERE is_processed = 1 LIMIT 10", conn)
conn.close()

print("处理结果样本:")
display(sample_df)

## 5. 导出结果 (可选)

In [None]:
# 导出处理结果
output_file = '{{ cookiecutter.project_slug }}_results.csv'
processor.export_results(output_file, only_processed=True)
print(f"结果已导出到: {output_file}")