# 代码测试

## 按照汉字拼音排序


In [None]:
import locale
from functools import cmp_to_key
import pandas as pd

In [None]:
locale.setlocale(locale.LC_COLLATE, 'zh_CN.utf8')

In [None]:
str_list = ['硫', '氨', '氯', '碳', '氧', '氯化氢']
str_list

In [None]:
a = sorted(str_list, key=cmp_to_key(locale.strcoll))
a

In [None]:
sorted(str_list, key=lambda x: x.encode('gbk'))

In [None]:
df = pd.DataFrame(str_list, columns=['str'])
df['num'] = list(range(0, len(str_list)))

In [None]:
df.sort_values(by='str', key=lambda x: x.str.encode('gbk'), ignore_index=True) # type: ignore

## 向一个excel文件循环写入数据


In [None]:
import pandas as pd

In [None]:
with pd.ExcelWriter('test.xlsx') as writer:
    for i in range(1, 4):
        df = pd.DataFrame(data=[i], columns=['num'])
        df.to_excel(writer, sheet_name=f'sheet{i}', index=False) # type: ignore

## 动态变量名

### `globals`方法

```python
for x in range(0, 7):
    globals()[f"variable{x}"] = f"Hello the variable number {x}!"

print(variable5)
```

### `locals()`方法

```python
for x in range(0, 7):
    locals()[f"variable{x}"] = f"Hello the variable number {x}!"

print(variable5)
```

### 新`locals()`方法


In [None]:
# 定义变量名的规律
variable_name_pattern = "variable_{}"

# 定义生成变量的数量
num_variables = 5

# 循环生成变量名
for i in range(num_variables):
  variable_name = variable_name_pattern.format(i+1)
  # 将生成的变量名用于给变量赋值
  locals()[variable_name] = i + 10
  print(variable_name, locals()[variable_name])

### 转换xlsx为csv


In [None]:
import pandas as pd

file_path = r'./info_files/检测因素参考信息.xlsx'

df = pd.read_excel(file_path)
df.to_csv('./info_files/检测因素参考信息.csv', index=False)

## 采样工作脚本测试

### 职业卫生

#### 模板0测试


In [None]:
import io
import os
import math
from copy import deepcopy
from docx import Document
import openpyxl
import pandas as pd
from nptyping import DataFrame
# from pandas.api.types import CategoricalDtype
from occupational_health_module.occupational_health import OccupationalHealthItemInfo#, refresh_engaged_num
from occupational_health_module.write_docx_templates import templates_info

In [None]:
company_name: str = 'MSCN'
project_name: str = '23ZXP0000'

file_path: str = r'./templates/项目信息试验模板.xlsx'
point_info_df: DataFrame = pd.read_excel(file_path, sheet_name='定点') # type: ignore
personnel_info_df: DataFrame = pd.read_excel(file_path, sheet_name='个体') # type: ignore

In [None]:
new_project = OccupationalHealthItemInfo(company_name, project_name, point_info_df, personnel_info_df)

In [None]:
new_project.output_deleterious_substance_info_dict['1']['定点'].head()

In [None]:
available_cols = [
    '采样点编号',
    '单元',
    '检测地点',
    '工种',
    '检测因素',
    '采样数量/天',
    '采样日程',
    '收集方式',
]

test_df = (
    new_project
    .output_deleterious_substance_info_dict
    ['1']['定点']
    [available_cols]
)

test_df.head()

In [None]:
pivot_test_df = pd.pivot_table(
    data=test_df,
    index=['采样点编号', '采样数量/天', '收集方式'],
    # values=['收集方式'],
    aggfunc={'收集方式': 'count'}
).rename(columns={"收集方式": "数量"}).reset_index()

# pivot_test_df['次数']

pivot_test_df.head()

In [None]:
# new_project.writer_output_deleterious_substance_info()

#### 模板1测试


In [None]:
company_name: str = 'MSCN'
project_name: str = '23ZXP0001'

file_path: str = r'./templates/项目信息试验模板1.xlsx'
point_info_df: DataFrame = pd.read_excel(file_path, sheet_name='定点') # type: ignore
personnel_info_df: DataFrame = pd.read_excel(file_path, sheet_name='个体') # type: ignore

In [None]:
project_1 = OccupationalHealthItemInfo(company_name, project_name, point_info_df, personnel_info_df)

In [None]:
project_1.get_single_day_blank_df(0, 1)

In [None]:
project_1.get_single_day_point_df(0, 1)

In [None]:
# project_1.get_dfs_num(project_1.default_types_order)

#### 模板2测试


In [None]:
company_name: str = 'MSCN'
project_name: str = '23ZXP0002'

file_path: str = r'./templates/项目信息试验模板2.xlsx'
point_info_df: DataFrame = pd.read_excel(file_path, sheet_name='定点') # type: ignore
personnel_info_df: DataFrame = pd.read_excel(file_path, sheet_name='个体') # type: ignore

In [None]:
project_2 = OccupationalHealthItemInfo(company_name, project_name, point_info_df, personnel_info_df)

In [None]:
# project_2.get_dfs_num(project_2.default_types_order)

#### 模板3测试


In [None]:
company_name: str = 'MSCN'
project_name: str = '23ZXP0003'

file_path: str = r'./templates/项目信息试验模板3.xlsx'
point_info_df: DataFrame = pd.read_excel(file_path, sheet_name='定点') # type: ignore
personnel_info_df: DataFrame = pd.read_excel(file_path, sheet_name='个体') # type: ignore

In [None]:
project_3 = OccupationalHealthItemInfo(company_name, project_name, point_info_df, personnel_info_df)

In [None]:
current_blank_df = project_3.get_single_day_blank_df(0, 1)

current_blank_df.head()

In [None]:
current_point_df = project_3.get_single_day_point_df(10, 1)

current_point_df.head()

In [None]:
r_current_point_df: DataFrame = pd.merge(current_point_df, current_blank_df, how='left', on='标识检测因素').fillna(0)
r_current_point_df['样品编号'] = r_current_point_df.apply(project_3.get_exploded_point_df, axis=1)
r_current_point_df['代表时长'] = (
                    r_current_point_df.apply(lambda df: 
                    project_3.get_exploded_contact_duration(df['日接触时间'], df['采样数量/天'], 4),
                    axis=1
                    )
                )

r_current_point_df.head()

In [None]:
r_current_point_df.loc[0]

In [None]:
r_current_point_df.apply(project_3.get_exploded_point_df, axis=1)

In [None]:
r_current_point_df['代表时长'] = (  # type: ignore
                    r_point_df.apply(lambda df:   # type: ignore
                    project_3.get_exploded_contact_duration(df['日接触时间'], df['采样数量/天'], 4),
                    axis=1
                    )
                )

r_current_point_df.head()

In [None]:
r_current_point_df.explode(['样品编号', '代表时长'])

In [None]:
# project_3.get_dfs_num(project_3.default_types_order)

### 记录表模板写入测试

#### 定点


In [None]:
import os
from copy import deepcopy
from docx import Document

In [None]:
docx_pathes = [
    './templates/有害物质定点采样记录.docx',
    './templates/有害物质个体采样记录.docx',
    './templates/定点噪声.docx',
    './templates/高温.docx',
    './templates/个体噪声.docx',
    './templates/一氧化碳.docx',
]

In [None]:
for i, docx_path in enumerate(docx_pathes):
    document = Document(docx_path)
    table_pages = 5
    if table_pages == 1:
        rm_table = document.tables[2]
        t = rm_table._element
        t.getparent().remove(t)
    
        paragraphs = document.paragraphs
        rm_paragraphs1 = paragraphs[-1]
        rm_p1 = rm_paragraphs1._element
        rm_p1.getparent().remove(rm_p1)
    
        rm_paragraphs2 = paragraphs[-2]
        rm_p2 = rm_paragraphs2._element
        rm_p2.getparent().remove(rm_p2)
    elif table_pages == 2:
        pass
    else:
        for _ in range(table_pages - 2):
            cp_table = document.tables[2]
            new_table = deepcopy(cp_table)
            # document.add_page_break()
            new_paragraph = document.add_page_break()
            new_paragraph._p.addnext(new_table._element)
            document.add_paragraph()
    document.save(f'{os.path.expanduser("~/Desktop")}/t{i}.docx')

In [None]:
templates_info = {
    '定点有害物质': {
        'template_path': './templates/有害物质个体采样记录.docx',
        'direct-reading': False,
        'project_num_row': 0,
        'project_num_col': 1,
        'company_name_row': 0,
        'company_name_col': 4,
        'deleterious_substance_row': 3,
        'deleterious_substance_col': 1,
        'first_page_rows': 18,
        'late_page_rows': 24,
        'title_rows': 2,
        'item_rows': 6,
        'available_cols': [0, 1, 2, 4]
    },
    '个体有害物质': {
        'template_path': './templates/有害物质个体采样记录.docx',
        'direct-reading': False,
        'project_num_row': 0,
        'project_num_col': 1,
        'company_name_row': 0,
        'company_name_col': 4,
        'deleterious_substance_row': 3,
        'deleterious_substance_col': 1,
        'first_page_rows': 5,
        'late_page_rows': 6,
        'title_rows': 2,
        'item_rows': 3,
        'available_cols': [0, 1, 2, 4]
    },
    '定点噪声': {
        'template_path': './templates/定点噪声.docx',
        'direct-reading': True,
        'project_num_row': 0,
        'project_num_col': 1,
        'company_name_row': 1,
        'company_name_col': 1,
        'deleterious_substance_row': 3,
        'deleterious_substance_col': 1,
        'first_page_rows': 10,
        'late_page_rows': 12,
        'title_rows': 2,
        'item_rows': 1,
        'available_cols': [0, 1, 2]
    },
    '个体噪声': {
        'template_path': './templates/个体噪声.docx',
        'direct-reading': True,
        'project_num_row': 0,
        'project_num_col': 1,
        'company_name_row': 1,
        'company_name_col': 1,
        'deleterious_substance_row': 3,
        'deleterious_substance_col': 1,
        'first_page_rows': 9,
        'late_page_rows': 11,
        'title_rows': 2,
        'item_rows': 1,
        'available_cols': [0, 1, 2]
    },
    '高温': {
        'template_path': './templates/高温.docx',
        'direct-reading': True,
        'project_num_row': 0,
        'project_num_col': 1,
        'company_name_row': 1,
        'company_name_col': 1,
        'deleterious_substance_row': 3,
        'deleterious_substance_col': 1,
        'first_page_rows': 9,
        'late_page_rows': 18,
        'title_rows': 3,
        'item_rows': 3,
        'available_cols': [0, 1]
    },
    '一氧化碳': {
        'template_path': './templates/一氧化碳.docx',
        'direct-reading': True,
        'project_num_row': 0,
        'project_num_col': 1,
        'company_name_row': 0,
        'company_name_col': 3,
        'deleterious_substance_row': 3,
        'deleterious_substance_col': 1,
        'first_page_rows': 20,
        'late_page_rows': 20,
        'title_rows': 2,
        'item_rows': 4,
        'available_cols': [0, 1]
    },
}

In [None]:
import os
from docx import Document
import pandas as pd
from occupational_health_module.write_docx_templates import templates_info

In [None]:
factor = '一氧化碳'
current_factor_info = templates_info[factor]

In [None]:
file_path = current_factor_info['template_path']
doc = Document(file_path)

In [None]:
df = pd.DataFrame(columns=list(range(3)), index=list(range(10)))

df.head()

In [None]:
for i in range(10):
    for j in range(3):
        df.iloc[i, j] = f'{i}-{j}'

In [None]:
df.head()

In [None]:
current_df = df.query('index >= 0 and index <= 10')

current_df.head()

In [None]:
rows_num = current_factor_info['item_rows']
title_num = current_factor_info['title_rows']

for i in range(4):
    current_cell = doc.tables[2].rows[i * rows_num + title_num].cells[0]
    current_cell.text = current_df.iloc[i, 0]

In [None]:
doc.save(f'{os.path.expanduser("~/desktop")}/t{factor}.docx')

### 接触时间分开


In [None]:
from decimal import Decimal, ROUND_HALF_DOWN
from typing import List
import os
import pandas as pd
import numpy as np

In [None]:
def split_time(time: float, size: int) -> List[float]:
    # 接触时间和数量转为十进制
    time_dec: Decimal = Decimal(str(time))
    size_dec: Decimal = Decimal(str(size))
    time_list_dec: List[Decimal] = [] # 存放代表时长列表

    # 判断接触时间的小数位数
    if time == int(time):
        time_prec: int = 0
    else:
        time_prec: int = int(time_dec.as_tuple().exponent)

    # 如果接触时间不能让每个代表时长大于0.25，则不分开
    if time_dec < Decimal('0.25') * size_dec:
        time_list_dec.append(time_dec)
    # 如果分开的代表时长小于0.3，则保证代表时长以0.25为最多
    elif time_dec < Decimal('0.3') * size_dec:
        front_time_list_dec: List[Decimal] = [Decimal('0.25')] * (int(size) - 1)
        last_time_dec: Decimal = time_dec - sum(front_time_list_dec)
        time_list_dec.extend(front_time_list_dec)
        time_list_dec.append(last_time_dec)
    # elif time_dec < Decimal('0.5') * size_dec:
    #     front_time_list_dec: List[Decimal] = [Decimal('0.3')] * (int(size) - 1)
    #     last_time_dec: Decimal = time_dec - sum(front_time_list_dec)
    #     time_list_dec.extend(front_time_list_dec)
    #     time_list_dec.append(last_time_dec)
    # elif time_dec < Decimal('0.7') * size_dec:
    #     front_time_list_dec: List[Decimal] = [Decimal('0.5')] * (int(size) - 1)
    #     last_time_dec: Decimal = time_dec - sum(front_time_list_dec)
    #     time_list_dec.extend(front_time_list_dec)
    #     time_list_dec.append(last_time_dec)
    else:
        if time_prec == 2:
            prec_str: str = '0.00'
        elif time_prec == 1:
            prec_str: str = '0.0'
        else:
            prec_str: str = '0'
        judge_result: Decimal = time_dec / size_dec
        for i in range(int(size) - 1):
            result: Decimal = judge_result.quantize(Decimal(prec_str), ROUND_HALF_DOWN)
            time_list_dec.append(result)
        last_result: Decimal = time_dec - sum(time_list_dec)
        time_list_dec.append(last_result)

    time_list: List[float] = sorted(list(map(float, time_list_dec)), reverse=False)
    # blank_list: list[float] = [0., 0.]
    # extra_time_list: List[float] = [0.] * (4 - len(time_list))
    # all_time_list: List[float] = blank_list + time_list + extra_time_list
    # return all_time_list
    return time_list

In [None]:
split_time(0.9, 3)

In [None]:
split_time(1.5, 3)

In [None]:
df1 = pd.DataFrame({
    "time": np.arange(1, 1001) / 100,
    "size": np.full(1000, 3)
})

df1.head()

In [None]:
df2 = pd.DataFrame({
    "time": np.arange(1, 1001) / 100,
    "size": np.full(1000, 2)
})

df2.head()

In [None]:
df1['timelist'] = df1.apply(lambda x: split_time(x['time'], x['size']), axis=1)
# df1 = df1.explode('timelist')

df1.head()

In [None]:
df2['timelist'] = df2.apply(lambda x: split_time(x['time'], x['size']), axis=1)
# df2 = df2.explode('timelist')

df2.head()

In [None]:
with pd.ExcelWriter(os.path.join(os.path.expanduser("~/desktop"), '采样时长划分测试.xlsx')) as excelfile:
    df1.to_excel(excelfile, sheet_name='数量3', index=False)
    df2.to_excel(excelfile, sheet_name='数量2', index=False)

### docx模板里的表格合并测试


In [None]:
import os
from docx import Document

In [None]:
template_docx_path: str = './templates/有害物质定点采样记录.docx'

doc = Document(template_docx_path)

In [None]:
tables = doc.tables
current_table = tables[1]
current_cell = current_table.cell(4, 9)
current_cell.text = '123'

current_merge_cell = current_table.cell(6, 9)

current_cell.merge(current_merge_cell)

In [None]:
doc.save(os.path.join(os.path.expanduser("~/desktop"), '合并单元格.docx'))

## 随机噪声值和等效噪声值


In [None]:
import pandas as pd
import os
from occupational_noise_module.occupational_noise import OccupationalNoiseInfo

In [None]:
file_path: str ='./templates/噪声值模板.csv'

df = pd.read_csv(file_path)

df.head()

In [None]:
noise = OccupationalNoiseInfo(df, 1., 3)

In [None]:
noise.new_noise_df.head()

### 接触时间划分（新）


In [None]:
from decimal import Decimal, ROUND_HALF_DOWN, ROUND_HALF_UP, ROUND_DOWN, ROUND_UP
from typing import List, Dict
import os
import pandas as pd
import numpy as np

In [None]:
# 接触时间和数量转为十进制
time: float = 4.01
size: int = 3
time_dec: Decimal = Decimal(str(time))
size_dec: Decimal = Decimal(str(size))

In [None]:
time_list_dec: List[Decimal] = [] # 存放代表时长列表

In [None]:
# 判断接触时间的小数位数
if time == int(time):
    time_prec: int = 0
else:
    time_prec: int = int(time_dec.as_tuple().exponent)

time_prec

In [None]:
# 确定基本平均值的小数位数
time_prec_dec_dict: Dict[int, Decimal] = {
    0: Decimal('0'),
    -1: Decimal('0.0'),
    -2: Decimal('0.0')
}
prec_dec_str: Decimal = time_prec_dec_dict[time_prec]

prec_dec_str

In [None]:
# 如果接触时间不能让每个代表时长大于0.25，则不分开
if time_dec < Decimal('0.25') * size_dec:
    time_list_dec.append(time_dec)
else:
    judge_result: Decimal = time_dec / size_dec
    for i in range(int(size) - 1):
        result: Decimal = judge_result.quantize(prec_dec_str, ROUND_DOWN)
        time_list_dec.append(result)
    last_result: Decimal = time_dec - sum(time_list_dec)
    time_list_dec.append(last_result)


In [None]:
time_list_dec

In [None]:
(time_dec / size_dec).quantize(prec_dec_str, ROUND_HALF_DOWN)

In [None]:
(time_dec / size_dec).quantize(prec_dec_str, ROUND_HALF_UP)

In [None]:
(time_dec / size_dec).quantize(prec_dec_str, ROUND_DOWN)

In [None]:
(time_dec / size_dec).quantize(prec_dec_str, ROUND_UP)

In [None]:
def new_split_time(time: float, size: int, round_type: str) -> List[float]:
    # 接触时间和数量转为十进制
    time_dec: Decimal = Decimal(str(time))
    size_dec: Decimal = Decimal(str(size))
    time_list_dec: List[Decimal] = [] # 存放代表时长列表
    # 判断接触时间的小数位数
    if time == int(time):
        time_prec: int = 0
    else:
        time_prec: int = int(time_dec.as_tuple().exponent)
    # 确定基本平均值的小数位数
    time_prec_dec_dict: Dict[int, Decimal] = {
        0: Decimal('0'),
        -1: Decimal('0.0'),
        -2: Decimal('0.0')
    }
    prec_dec_str: Decimal = time_prec_dec_dict[time_prec]
    # 如果接触时间不能让每个代表时长大于0.25，则不分开
    if time_dec < Decimal('0.25') * size_dec:
        time_list_dec.append(time_dec)
    elif time_dec < Decimal('0.5') * size_dec:
        front_time_list_dec: List[Decimal] = [
            Decimal('0.25')] * (int(size) - 1)
        last_time_dec: Decimal = time_dec - sum(front_time_list_dec)
        time_list_dec.extend(front_time_list_dec)
        time_list_dec.append(last_time_dec)
    elif time_dec < Decimal('0.7') * size_dec:
        front_time_list_dec: List[Decimal] = [
            Decimal('0.5')] * (int(size) - 1)
        last_time_dec: Decimal = time_dec - sum(front_time_list_dec)
        time_list_dec.extend(front_time_list_dec)
        time_list_dec.append(last_time_dec)
    else:
        judge_result: Decimal = time_dec / size_dec
        for i in range(int(size) - 1):
            result: Decimal = judge_result.quantize(prec_dec_str, round_type)
            time_list_dec.append(result)
        last_result: Decimal = time_dec - sum(time_list_dec)
        time_list_dec.append(last_result)
    # time_list: List[float] = sorted(list(map(float, time_list_dec)), reverse=False)
    time_list: List[float] = list(map(float, time_list_dec))
    return time_list

In [None]:
df1 = pd.DataFrame({
    "time": np.arange(1, 1001) / 100,
    "size": np.full(1000, 3)
})

df1.head()

In [None]:
df2 = pd.DataFrame({
    "time": np.arange(1, 1001) / 100,
    "size": np.full(1000, 2)
})

df2.head()

In [None]:
round_types: List[str] = [ROUND_HALF_DOWN, ROUND_HALF_UP, ROUND_DOWN, ROUND_UP]

In [None]:
with pd.ExcelWriter(os.path.join(os.path.expanduser("~/desktop"), '采样时长划分测试.xlsx')) as excelfile:
    for round_type in round_types:
        # 复制
        current_df1 = df1.copy()
        current_df2 = df2.copy()
        # 生成代表时长列
        current_df1['timelist'] = current_df1.apply(lambda x: new_split_time(x['time'], x['size'], round_type), axis=1)
        current_df2['timelist'] = current_df2.apply(lambda x: new_split_time(x['time'], x['size'], round_type), axis=1)
        # 代表时长横向分开
        temp1 = current_df1['timelist'].apply(pd.Series).add_prefix('time_')
        temp1 = pd.DataFrame(current_df1['timelist'].to_list()).add_prefix(f"{current_df1['timelist'].name}_")
        current_df1 = pd.concat([current_df1, temp1], axis=1)
        temp2 = current_df2['timelist'].apply(pd.Series).add_prefix('time_')
        temp2 = pd.DataFrame(current_df2['timelist'].to_list()).add_prefix(f"{current_df2['timelist'].name}_")
        current_df2 = pd.concat([current_df2, temp2], axis=1)
        # 不符合的值高亮
        current_df1_style = current_df1.style.highlight_between(left=-100.0, right=0.25, subset=['timelist_0', 'timelist_1', 'timelist_2'])
        current_df2_style = current_df2.style.highlight_between(left=-100.0, right=0.25, subset=['timelist_0', 'timelist_1'])
        # 保存到excel文件里
        # current_df1.to_excel(excelfile, sheet_name=f'数量3-{round_type}', index=False)
        # current_df2.to_excel(excelfile, sheet_name=f'数量2-{round_type}', index=False)
        current_df1_style.to_excel(excelfile, sheet_name=f'数量3-{round_type}', index=False)
        current_df2_style.to_excel(excelfile, sheet_name=f'数量2-{round_type}', index=False)

In [None]:
from docx2pdf import convert

In [None]:
files_path = r'C:\Users\15210\Desktop\23ZXP0024记录表'

In [None]:
convert(files_path)

In [None]:
import pandas as pd
from occupational_health_module.new_occupational_health import NewOccupationalHealthItemInfo

In [None]:
project_number: str = '24ZDQ0003-1'
company_name: str = '福建青拓实业股份有限公司'

In [None]:
raw_df = pd.read_excel('templates/项目信息试验模板5.xlsx')

raw_df.dtypes

In [None]:
new_project: NewOccupationalHealthItemInfo = NewOccupationalHealthItemInfo(
    project_number,
    company_name,
    raw_df
)


In [None]:
new_project.point_df.head()

In [None]:
new_project.templates_info_dict['有害物质定点']