<font color="red">STEP 1: 文档提取测试</font>

In [None]:
# 初始化
import django_setup

In [2]:
# 导入相关模型：get_user_model, Project, FileRecord, DocumentAnalysis, FileProjectLink, ProjectHistory
from django.contrib.auth import get_user_model
from apps.doc_analysis.models import DocumentAnalysis, InvalidStatusTransition
from apps.projects.models import Project
from apps.files.models import FileRecord
from django.core.files.uploadedfile import SimpleUploadedFile
from django.core.exceptions import ValidationError
from apps.doc_analysis.steps._01_extract_docx_elements import DocxExtractorStep
from apps.doc_analysis.pipeline.types import ModelData, DocxElements, OutlineAnalysisResult

In [None]:
# 准备测试所需的 user, project, file_record  (其中project与file_record关联)
User = get_user_model()

# 获取已存在的测试数据

# 获取已存在的用户
user = User.objects.get(phone='18501771516')
print(f"用户: {user.phone}")
        
# 获取已存在的项目
project = Project.objects.get(project_name='测试项目1')
print(f"项目: {project.project_name}")
        
# 获取已存在的文件
file_record = FileRecord.objects.get(id='3')
print(f"文件: {file_record.name}")

In [None]:
# 清除‘测试分析A”，用于接下去的测试
DocumentAnalysis.objects.filter(title="测试分析A").delete()

In [None]:
# 1. 创建新的文档分析实例 - 测试分析A
docx_analysis = DocumentAnalysis.objects.create(
    project=project,
    title="测试分析A",
    created_by=user,
    analysis_questions=["投标要求", "评分标准"]  # 示例分析问题
)
print(f"创建文档分析: {docx_analysis.title} (ID: {docx_analysis.id})")

In [None]:
# 2.上传真实的 DOCX文件

# 2.1 准备文件路径
doc_path = "C:/Users/huiwa/Downloads/文本分析测试/CaseTest/case8：招标文件-第1包：一级压榨花生油.docx"

# 2.2 读取文件内容
with open(doc_path, 'rb') as f:
    file_content = f.read()
test_file = SimpleUploadedFile(
    "test_doc.docx",
    file_content,
    content_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document"
)
print(f"文件大小: {test_file.size}")

# 2. 创建新的文件记录 并 存储文件对象
new_file_record = FileRecord.objects.create(
    name="test_doc.docx",
    file=test_file,  # 使用之前准备的测试文件
    owner=user,
    size = test_file.size
)
print(f"创建文件记录: {new_file_record.name}")

In [7]:
# 3. 关联新文件
try:
    docx_analysis.update_file_record(new_file_record)
except Exception as e:
    print(f"关联文件失败: {str(e)}")

In [None]:
# 4. 触发开始分析，并提取文档元素 elements 存入数据库
print("\n===== 最终状态检查 =====")
print(f"开始分析前-状态: {docx_analysis.status}")
#docx_analysis.start_analysis()
print(f"开始分析后-状态: {docx_analysis.status}")

# 初始化DocxExtractorStep
docx_extractor=DocxExtractorStep()

try:
    # 准备输入数据, 好比 DocumentAnalysis.instance.data
    input_data = ModelData(model= DocumentAnalysis, instance=docx_analysis)
    
    # 执行文档提取
    docx_elements = docx_extractor.process(input_data)
    
    # 打印提取结果
    print("文档提取成功！提取到的元素数量:", len(docx_elements))
    print("第一个元素示例:", docx_elements[0])
    
    # 检查保存到数据库的结果
    saved_analysis = DocumentAnalysis.objects.get(id=docx_analysis.id)
    print("保存的提取结果:", saved_analysis.extracted_elements)
    
except ValidationError as e:
    print("文档提取失败:", str(e))
    # 检查分析状态
    failed_analysis = DocumentAnalysis.objects.get(id=docx_analysis.id)
    print("分析状态:", failed_analysis.status)
    print("错误信息:", failed_analysis.error_message)


<font color="red">STEP 2: outline 分析测试</font>

In [None]:
# 初始化
import django_setup

In [2]:
# 导入相关模型：get_user_model, Project, FileRecord, DocumentAnalysis, FileProjectLink, ProjectHistory
from django.contrib.auth import get_user_model
from apps.doc_analysis.models import DocumentAnalysis, InvalidStatusTransition
from apps.projects.models import Project
from apps.files.models import FileRecord
from django.core.files.uploadedfile import SimpleUploadedFile
from django.core.exceptions import ValidationError
from apps.doc_analysis.steps._01_extract_docx_elements import DocxExtractorStep
from apps.doc_analysis.pipeline.types import ModelData, DocxElements, OutlineAnalysisResult
from apps.doc_analysis.steps._02_outline_analysis import DocxOutlineAnalyzerStep
from pprint import pprint 

In [None]:
# 直接引用"测试分析A", 并获取其extracted_elements

saved_analysis = DocumentAnalysis.objects.get(title="测试分析A")
extracted_elements = saved_analysis.extracted_elements
pprint(extracted_elements['elements'][10])

In [4]:
# 创建 DocxElements 实例 from models.py 的 extracted_elements
docx_elements = DocxElements.from_model(extracted_elements)
# 1. 目录标题列表
toc_chapters = docx_elements.format_toc_chapters()
toc_sections = docx_elements.format_toc_sections()
toc_subsections = docx_elements.format_toc_subsections()

# 2. 正文标题列表
heading_chapters = docx_elements.format_heading_chapters()
heading_sections = docx_elements.format_heading_sections()
heading_subsections = docx_elements.format_heading_subsections()

In [None]:
print(toc_sections)
print("--------------------------")
print(heading_sections)

测试模型多线程并发任务

In [None]:
# 创建服务实例 并 设置线程数
from apps.doc_analysis.services.llm_services import BidAnalysisService, LLMAnalysisInput
bid_llm_analyzer = BidAnalysisService(model_name="qwen-plus", max_workers=4)

In [8]:
# 构建 contexts
transtool = DocxOutlineAnalyzerStep()
chapter_context = transtool._build_context(toc_chapters, heading_chapters)
section_context = transtool._build_context(toc_sections, heading_sections)
subsection_context = transtool._build_context(toc_subsections, heading_subsections)
requirement = transtool._build_requirement()
output_format = transtool._build_output_format()

In [None]:
# 创建多个分析请求 request
requests = [
    LLMAnalysisInput(context=chapter_context, requirement=requirement, output_format=output_format),
    LLMAnalysisInput(context=section_context, requirement=requirement, output_format=output_format),
    LLMAnalysisInput(context=subsection_context, requirement=requirement, output_format=output_format),
]

In [None]:
# 使用qwen-plus进行测试
import asyncio
import nest_asyncio
nest_asyncio.apply()
response = asyncio.run(bid_llm_analyzer.batch_outline_analysis(requests,stream=True))

In [None]:
# 输出结果
print(type(response))
pprint(response)

测试outline_analyzer.py 并发处理

In [None]:
# 直接测试 outline_analyzer.py
outline_analyzer = DocxOutlineAnalyzerStep()
analysis_result = outline_analyzer.process(docx_elements)

In [None]:
# 打印输出的结果
print(type(analysis_result))
pprint(analysis_result)


In [None]:
saved_analysis = DocumentAnalysis.objects.get(title="测试分析A")
pprint(saved_analysis.outline_analysis_result)


<font color="red">STEP 3: 测试outline_improvement.py</font>


In [3]:
# 初始化
import django_setup

In [4]:
# 导入相关模型
from django.contrib.auth import get_user_model
from apps.doc_analysis.models import DocumentAnalysis, InvalidStatusTransition
from apps.projects.models import Project
from apps.files.models import FileRecord
from django.core.files.uploadedfile import SimpleUploadedFile
from django.core.exceptions import ValidationError
from apps.doc_analysis.steps._01_extract_docx_elements import DocxExtractorStep
from apps.doc_analysis.pipeline.types import ModelData, DocxElements, OutlineAnalysisResult
from apps.doc_analysis.steps._02_outline_analysis import DocxOutlineAnalyzerStep
from apps.doc_analysis.steps._03_outline_improvement import OutlineImprovementStep
from pprint import pprint 

In [5]:
# 直接引用"测试分析A", 并获取其docx_elements 和 outline_analysis_result

saved_analysis = DocumentAnalysis.objects.get(title="测试分析A")
docx_elements = DocxElements.from_model(saved_analysis.extracted_elements)
outline_analysis_result = OutlineAnalysisResult.from_model(saved_analysis.outline_analysis_result)

In [None]:
print(type(docx_elements))
pprint(docx_elements)

In [None]:
print(type(outline_analysis_result))
pprint(outline_analysis_result)


In [None]:
from typing import Tuple
def validate_data(data: Tuple[DocxElements, OutlineAnalysisResult]) -> bool:
    """验证输入数据是否有效"""
    docx_elements, outline_analysis_result = data
    return (isinstance(docx_elements, DocxElements) and 
            isinstance(outline_analysis_result, OutlineAnalysisResult) and 
            outline_analysis_result.user_confirm)

validate_data((docx_elements, outline_analysis_result))


In [None]:
# 模拟用户确认
outline_analysis_result.user_confirm = True
for element in outline_analysis_result.heading_only_elements:
    element['user_confirm'] = True
for element in outline_analysis_result.toc_only_elements:
    element['user_confirm'] = True

pprint(outline_analysis_result)


In [10]:
outline_improver = OutlineImprovementStep()
improved_docx_elements = outline_improver.process((docx_elements, outline_analysis_result))

In [None]:
pprint(docx_elements)

In [None]:
pprint(improved_docx_elements)

<font color=red>STEP 4: 测试outline_further_analysis.py<font>


In [1]:
# 初始化
import django_setup

Development settings loaded
INSTALLED_APPS: ['django.contrib.admin', 'django.contrib.auth', 'django.contrib.contenttypes', 'django.contrib.sessions', 'django.contrib.messages', 'django.contrib.staticfiles', 'rest_framework', 'corsheaders', 'storages', 'apps.authentication', 'apps.files', 'apps.projects', 'apps.doc_analysis', 'apps.chat', 'django_filters', 'drf_spectacular', 'rest_framework_simplejwt.token_blacklist', 'django_celery_results', 'django_celery_beat']


INFO 2025-02-18 02:03:22,907 storage default_storage 的类型: COSStorage


Settings从哪里加载？: config.settings.development
项目根目录对么？: C:\Users\huiwa\Documents\_All_Projects\BidPilot_new\backend
文件存储settings对么？: apps.files.storage.COSStorage
文件default_storage对么？: COSStorage

已经安装的应用 Installed Apps 完整了么？:
- django.contrib.admin
- django.contrib.auth
- django.contrib.contenttypes
- django.contrib.sessions
- django.contrib.messages
- django.contrib.staticfiles
- rest_framework
- corsheaders
- storages
- apps.authentication
- apps.files
- apps.projects
- apps.doc_analysis
- apps.chat
- django_filters
- drf_spectacular
- rest_framework_simplejwt.token_blacklist
- django_celery_results
- django_celery_beat


In [2]:
# 导入相关模型
from django.contrib.auth import get_user_model
from apps.doc_analysis.models import DocumentAnalysis, InvalidStatusTransition
from apps.projects.models import Project
from apps.files.models import FileRecord
from django.core.files.uploadedfile import SimpleUploadedFile
from django.core.exceptions import ValidationError
from apps.doc_analysis.steps._01_extract_docx_elements import DocxExtractorStep
from apps.doc_analysis.pipeline.types import ModelData, DocxElements, OutlineAnalysisResult, ImprovedDocxElements
from apps.doc_analysis.steps._02_outline_analysis import DocxOutlineAnalyzerStep
from apps.doc_analysis.steps._03_outline_improvement import OutlineImprovementStep
from pprint import pprint 

In [3]:
saved_analysis = DocumentAnalysis.objects.get(title="测试分析A")
docx_elements = DocxElements.from_model(saved_analysis.extracted_elements)
outline_analysis_result = OutlineAnalysisResult.from_model(saved_analysis.outline_analysis_result)
improved_docx_elements = ImprovedDocxElements.from_model(saved_analysis.improved_docx_elements)


In [4]:
print(type(improved_docx_elements))
pprint(improved_docx_elements)







<class 'apps.doc_analysis.pipeline.types.ImprovedDocxElements'>
ImprovedDocxElements(elements=[{'content': '北京京铁运恒采购供应站有限公司 2024 年端午节物资采购项目',
                                'position': 1,
                                'type': 'paragraph'},
                               {'content': '招标文件',
                                'position': 2,
                                'type': 'paragraph'},
                               {'content': '        项目名称：北京京铁运恒采购供应站有限公司 2024 '
                                           '年端午节 物资采购项目（第一包：一级压榨花生油）',
                                'position': 3,
                                'type': 'paragraph'},
                               {'content': '项目编号：DLXM-2024-148-01',
                                'position': 4,
                                'type': 'paragraph'},
                               {'content': '招 标 人：北京京铁运恒采购供应站有限公司 '
                                           '代理机构：北京中外建工程管理有限公司',
                                'position': 5,
    