# 代码测试

## 按照汉字拼音排序


In [None]:
import locale
from functools import cmp_to_key
import pandas as pd

In [None]:
locale.setlocale(locale.LC_COLLATE, 'zh_CN.utf8')

In [None]:
str_list: list[str] = ['硫', '氨', '氯', '碳', '氧', '氯化氢']
str_list

In [None]:
a = sorted(str_list, key=cmp_to_key(locale.strcoll))
a

In [None]:
sorted(str_list, key=lambda x: x.encode('gbk'))

In [None]:
df = pd.DataFrame(str_list, columns=['str'])
df['num'] = list(range(0, len(str_list)))

In [None]:
df.sort_values(by='str', key=lambda x: x.str.encode('gbk'), ignore_index=True) # type: ignore

## 向一个excel文件循环写入数据


In [None]:
import pandas as pd

In [None]:
with pd.ExcelWriter('test.xlsx') as writer:
    for i in range(1, 4):
        df = pd.DataFrame(data=[i], columns=['num'])
        df.to_excel(writer, sheet_name=f'sheet{i}', index=False) # type: ignore

## 动态变量名

### `globals`方法

```python
for x in range(0, 7):
    globals()[f"variable{x}"] = f"Hello the variable number {x}!"

print(variable5)
```

### `locals()`方法

```python
for x in range(0, 7):
    locals()[f"variable{x}"] = f"Hello the variable number {x}!"

print(variable5)
```

### 新`locals()`方法


In [None]:
# 定义变量名的规律
variable_name_pattern = "variable_{}"

# 定义生成变量的数量
num_variables = 5

# 循环生成变量名
for i in range(num_variables):
  variable_name = variable_name_pattern.format(i+1)
  # 将生成的变量名用于给变量赋值
  locals()[variable_name] = i+10
  print(variable_name, locals()[variable_name])

## 采样工作脚本测试

### 职业卫生

#### 模板0测试


In [None]:
import io
import os
import openpyxl
import pandas as pd
from nptyping import DataFrame
# from pandas.api.types import CategoricalDtype
from my_modules.occupational_health import OccupationalHealthItemInfo#, refresh_engaged_num
# from my_modules.occupational_health import SingleDayOccupationalHealthItemInfo

In [None]:
company_name: str = 'MSCN'
project_name: str = '23ZXP0000'

file_path: str = r'./templates/项目信息试验模板.xlsx'
point_info_df: DataFrame = pd.read_excel(file_path, sheet_name='定点') # type: ignore
personnel_info_df: DataFrame = pd.read_excel(file_path, sheet_name='个体') # type: ignore

In [None]:
new_project = OccupationalHealthItemInfo(company_name, project_name, point_info_df, personnel_info_df)

In [None]:
new_project.point_info_df.head()

In [None]:
new_project.point_info_df.query('采样点编号 == 25')#.tail()  # type: ignore

In [None]:
new_project.personnel_info_df.head()

In [None]:
new_project.point_deleterious_substance_df.head()

In [None]:
new_project.personnel_deleterious_substance_df.head()

In [None]:
new_project.factor_reference_df.head()

In [None]:
current_blank_df = new_project.get_single_day_blank_df(0, 1)

current_blank_df.head()

In [None]:
current_blank_df['空白编号'].astype(int).max()

In [None]:
current_blank_df.dtypes

In [None]:
current_point_df = new_project.get_single_day_point_df(current_blank_df['空白编号'].max(), 1)

current_point_df.head()

In [None]:
current_point_df.dtypes

In [None]:
current_personnel_df = new_project.get_single_day_personnel_df(current_point_df['终止编号'].max(), 1)

current_personnel_df.head()

In [None]:
# current_point_df['检测因素'] = current_point_df['检测因素'].astype('str')

r_current_point_df = pd.merge(current_point_df, current_blank_df, how='left', on='标识检测因素').fillna(0)

r_current_point_df.head()

In [None]:
pivoted_point_df: DataFrame = pd.pivot_table(r_current_point_df, index=['检测因素'], aggfunc={'空白编号': max, '起始编号': min, '终止编号': max})

pivoted_point_df.head()

In [None]:
pivoted_personnel_df: DataFrame = (
    pd.pivot_table(current_personnel_df, index=['检测因素'], values='个体编号', aggfunc=[min, max])
    .stack()
    .reset_index().
    set_index('检测因素').
    drop('level_1', axis=1)
    .rename(columns={'min': '个体起始编号', 'max': '个体终止编号'})
)

pivoted_personnel_df.head()

In [None]:
pivoted_personnel_df.shape

In [None]:
pivoted_personnel_df = pd.DataFrame(index=pivoted_personnel_df.index, columns=['个体起始编号', '个体终止编号'])

pivoted_personnel_df.head()

In [None]:
def get_exploded_point_df(r_current_point_df: DataFrame) -> list[str]:
    r_current_point_df[['终止编号', '起始编号', '空白编号']]  = r_current_point_df[['终止编号', '起始编号', '空白编号']].apply(int)
    # 空白编号
    if r_current_point_df['空白编号'] != 0:
        blank_list: list[str] = [
            f'{new_project.project_number}{r_current_point_df["空白编号"]:0>4d}-1',
            f'{new_project.project_number}{r_current_point_df["空白编号"]:0>4d}-2',
        ]
    else:
        blank_list: list[str] = [' ', ' ']
    # 定点编号
    point_list: list[int] = list(range(r_current_point_df['起始编号'], r_current_point_df['终止编号'] + 1)) # type: ignore
    point_str_list: list[str] = [f'{new_project.project_number}{i:0>4d}-1' for i in point_list]
    point_str_list_extra: list[str] = [' '] * (4 - len(point_str_list))
    point_str_list.extend(point_str_list_extra)
    # 空白加定点
    all_list: list[str] = blank_list + point_str_list
    return all_list

In [None]:
def dfs_to_file(project: OccupationalHealthItemInfo, file_path: str):
    desktop_path = os.path.expanduser('~/Desktop')
    file_io0 = project.get_dfs_num(['空白', '定点', '个体'])
    with open(f'{desktop_path}/{file_path}.xlsx', 'wb') as f:
        f.write(file_io0.getvalue())

In [None]:
dfs_to_file(new_project, 'test0')

#### 模板1测试


In [None]:
company_name: str = 'MSCN'
project_name: str = '23ZXP0000'

file_path: str = r'./templates/项目信息试验模板1.xlsx'
point_info_df: DataFrame = pd.read_excel(file_path, sheet_name='定点') # type: ignore
personnel_info_df: DataFrame = pd.read_excel(file_path, sheet_name='个体') # type: ignore

In [None]:
project_1 = OccupationalHealthItemInfo(company_name, project_name, point_info_df, personnel_info_df)

In [None]:
dfs_to_file(project_1, 'test1')

#### 模板2测试


In [None]:
company_name: str = 'MSCN'
project_name: str = '23ZXP0000'

file_path: str = r'./templates/项目信息试验模板2.xlsx'
point_info_df: DataFrame = pd.read_excel(file_path, sheet_name='定点') # type: ignore
personnel_info_df: DataFrame = pd.read_excel(file_path, sheet_name='个体') # type: ignore

In [None]:
project_2 = OccupationalHealthItemInfo(company_name, project_name, point_info_df, personnel_info_df)

In [None]:
dfs_to_file(project_2, 'test2')

#### 模板3测试


In [None]:
company_name: str = 'MSCN'
project_name: str = '23ZXP0000'

file_path: str = r'./templates/项目信息试验模板3.xlsx'
point_info_df: DataFrame = pd.read_excel(file_path, sheet_name='定点') # type: ignore
personnel_info_df: DataFrame = pd.read_excel(file_path, sheet_name='个体') # type: ignore

In [None]:
project_3 = OccupationalHealthItemInfo(company_name, project_name, point_info_df, personnel_info_df)

In [None]:
dfs_to_file(project_3, 'test3')

### 记录表模板写入测试

In [None]:
import math
import os
import re
from copy import deepcopy
import pandas as pd
from docx import Document
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
from docx.shared import Pt

In [None]:
file0_path: str = f'{os.path.expanduser("~/Desktop")}/test0.xlsx'

In [None]:
point_df1 = pd.read_excel(file0_path, sheet_name='爆炸定点D1')

point_df1.head()

In [None]:
items = point_df1['检测因素'].drop_duplicates().tolist()

items

In [None]:
for item in items:
    # 导入定点模板
    point_module_path: str = r'./templates/有害物质定点采样记录.docx'
    point_document = Document(point_module_path)

    # 获得当前检测因素的dataframe
    current_factor_df = point_df1[point_df1['检测因素'] == item].reset_index(drop=True)
    # 计算需要的记录表页数
    table_pages: int = math.ceil((len(current_factor_df) - 42) / 24 + 2)
    # 按照页数来增减表格数量
    if table_pages == 1:
        rm_table = point_document.tables[2]
        t = rm_table._element
        t.getparent().remove(t)
        rm_page_break = point_document.paragraphs[-2]
        pg = rm_page_break._element
        pg.getparent().remove(pg)
        rm_page_break2 = point_document.paragraphs[-2]
        pg2 = rm_page_break2._element
        pg2.getparent().remove(pg2)
    elif table_pages == 2:
        pass
        # rm_page_break = point_document.paragraphs[-2]
        # pg = rm_page_break._element
        # pg.getparent().remove(pg)
    else:
        for _ in range(table_pages - 2):
            cp_table = point_document.tables[2]
            new_table = deepcopy(cp_table)
            # new_paragraph = point_document.add_paragraph()
            new_paragraph = point_document.add_page_break()
            new_paragraph._p.addnext(new_table._element)
    
            # paragraph = point_document.add_paragraph()
            # paragraph._p.addnext(new_table._element)
            # point_document.add_page_break()
            point_document.add_paragraph()
    
        rm_page_break = point_document.paragraphs[4]
        pg = rm_page_break._element
        pg.getparent().remove(pg)
        rm_page_break2 = point_document.paragraphs[2]
        pg2 = rm_page_break2._element
        pg2.getparent().remove(pg2)
    
    tables = point_document.tables
    for table_page in range(table_pages):
        if table_page == 0:
            index_first: int = 0
            index_last: int = 17
        else:
            index_first: int = 24 * table_page - 6
            index_last: int = 24 * table_page + 17
        current_df = current_factor_df.query(f'index >= {index_first} and index <= {index_last}').reset_index(drop=True)
        # 向指定表格填写数据
        current_table = tables[table_page + 1]
        for r_i in range(current_df.shape[0]):
            current_row_list = [
                current_df.loc[r_i, '采样点编号'],
                f"{current_df.loc[r_i, '单元']}\n{current_df.loc[r_i, '检测地点']}",
                current_df.loc[r_i, '样品编号'],
            ]
            for c_i in range(3):
                current_cell = current_table.rows[r_i + 2].cells[c_i]
                current_cell.text = str(current_row_list[c_i])
    
                if c_i <=1:
                    current_cell.paragraphs[0].paragraph_format.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER # type: ignore
                else:
                    current_cell.paragraphs[0].runs[0].font.size = Pt(6.5)
    info_table = tables[0]
    code_cell = info_table.rows[0].cells[1]
    comp_cell = info_table.rows[0].cells[4]
    item_cell = info_table.rows[3].cells[1]
    code_cell.text = '23ZDQ0000'
    comp_cell.text = 'MSCN'
    # code_cell.text = new_project.project_number
    # comp_cell.text = new_project.company_name
    item_cell.text = item

    for cell in [code_cell, comp_cell, item_cell]:
        p = cell.paragraphs[0]
        p.paragraph_format.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER # type: ignore
        p.runs[0].font.size = Pt(9)


    file_name = f'23ZDQ0000--D1--{item}'
    safe_file_name: str = re.sub(r'[?*/\<>:"|]', ',', file_name)
    point_document.save(f'{os.path.expanduser("~/Desktop")}/test1/{safe_file_name}.docx')

In [None]:
current_factor_df = point_df1[point_df1['检测因素'] == '苯|甲苯|二甲苯（全部异构体）'].reset_index(drop=True)

current_factor_df.head()

In [None]:
current_row_list = [
    current_factor_df.loc[0, '采样点编号'],
    f"{current_factor_df.loc[0, '单元']}\n{current_factor_df.loc[0, '检测地点']}",
    current_factor_df.loc[0, '样品编号']
]

current_row_list

In [None]:
table_pages: int = math.ceil((len(current_factor_df) - 42) / 24 + 2)

table_pages

In [None]:
# table_pages = 2

In [None]:
point_module_path: str = r'./templates/有害物质定点采样记录.docx'
point_document = Document(point_module_path)

In [None]:
if table_pages == 1:
    rm_table = point_document.tables[2]
    t = rm_table._element
    t.getparent().remove(t)
    rm_page_break = point_document.paragraphs[-2]
    pg = rm_page_break._element
    pg.getparent().remove(pg)
    rm_page_break2 = point_document.paragraphs[-2]
    pg2 = rm_page_break2._element
    pg2.getparent().remove(pg2)
elif table_pages == 2:
    pass
    # rm_page_break = point_document.paragraphs[-2]
    # pg = rm_page_break._element
    # pg.getparent().remove(pg)
else:
    for _ in range(table_pages - 2):
        cp_table = point_document.tables[2]
        new_table = deepcopy(cp_table)
        # new_paragraph = point_document.add_paragraph()
        new_paragraph = point_document.add_page_break()
        new_paragraph._p.addnext(new_table._element)

        # paragraph = point_document.add_paragraph()
        # paragraph._p.addnext(new_table._element)
        # point_document.add_page_break()
        point_document.add_paragraph()

    rm_page_break = point_document.paragraphs[4]
    pg = rm_page_break._element
    pg.getparent().remove(pg)
    rm_page_break2 = point_document.paragraphs[2]
    pg2 = rm_page_break2._element
    pg2.getparent().remove(pg2)


In [None]:
tables = point_document.tables

In [None]:
for table_page in range(table_pages):
    if table_page == 0:
        index_first: int = 0
        index_last: int = 17
    else:
        index_first: int = 24 * table_page - 6
        index_last: int = 24 * table_page + 17
    current_df = current_factor_df.query(f'index >= {index_first} and index <= {index_last}').reset_index(drop=True)
    # 向指定表格填写数据
    current_table = tables[table_page + 1]
    for r_i in range(len(current_df)):
        current_row_list = [
            current_factor_df.loc[r_i, '采样点编号'],
            f"{current_factor_df.loc[r_i, '单元']}\n{current_factor_df.loc[0, '检测地点']}",
            current_factor_df.loc[r_i, '样品编号'],
        ]
        for c_i in range(3):
            current_cell = current_table.rows[r_i + 2].cells[c_i]
            current_cell.text = str(current_row_list[c_i])

            if c_i <=1:
                current_cell.paragraphs[0].paragraph_format.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER # type: ignore
            else:
                current_cell.paragraphs[0].runs[0].font.size = Pt(6.5)

In [None]:
point_document.save(f'{os.path.expanduser("~/Desktop")}/test1.docx')