# 代码测试

## 按照汉字拼音排序


In [None]:
import locale
from functools import cmp_to_key
import pandas as pd

In [None]:
locale.setlocale(locale.LC_COLLATE, 'zh_CN.utf8')

In [None]:
str_list: list[str] = ['硫', '氨', '氯', '碳', '氧', '氯化氢']
str_list

In [None]:
a = sorted(str_list, key=cmp_to_key(locale.strcoll))
a

In [None]:
sorted(str_list, key=lambda x: x.encode('gbk'))

In [None]:
df = pd.DataFrame(str_list, columns=['str'])
df['num'] = list(range(0, len(str_list)))

In [None]:
df.sort_values(by='str', key=lambda x: x.str.encode('gbk'), ignore_index=True) # type: ignore

## 向一个excel文件循环写入数据


In [None]:
import pandas as pd

In [None]:
with pd.ExcelWriter('test.xlsx') as writer:
    for i in range(1, 4):
        df = pd.DataFrame(data=[i], columns=['num'])
        df.to_excel(writer, sheet_name=f'sheet{i}', index=False) # type: ignore

## 动态变量名

### `globals`方法


In [None]:
for x in range(0, 7):
    globals()[f"variable{x}"] = f"Hello the variable number {x}!"

print(variable5)

### `locals()`方法


In [None]:
for x in range(0, 7):
    locals()[f"variable{x}"] = f"Hello the variable number {x}!"

print(variable5)

### 新`locals()`方法


In [None]:
# 定义变量名的规律
variable_name_pattern = "variable_{}"

# 定义生成变量的数量
num_variables = 5

# 循环生成变量名
for i in range(num_variables):
  variable_name = variable_name_pattern.format(i+1)
  # 将生成的变量名用于给变量赋值
  locals()[variable_name] = i+10
  print(variable_name, locals()[variable_name])

## 采样工作脚本测试

### 职业卫生


In [None]:
import io
import openpyxl
import pandas as pd
from nptyping import DataFrame
# from pandas.api.types import CategoricalDtype
from my_modules.occupational_health import OccupationalHealthItemInfo, refresh_engaged_num
# from my_modules.occupational_health import SingleDayOccupationalHealthItemInfo

In [None]:
company_name: str = 'MSCN'
project_name: str = '23ZXP0000'
# working_days: float = 5.0

file_path: str = r'./templates/项目信息试验模板.xlsx'
point_info_df: DataFrame = pd.read_excel(file_path, sheet_name='定点') # type: ignore
personnel_info_df: DataFrame = pd.read_excel(file_path, sheet_name='个体') # type: ignore

In [None]:
new_project = OccupationalHealthItemInfo(company_name, project_name, point_info_df, personnel_info_df)

In [None]:
new_project.point_info_df.head()

In [None]:
new_project.point_info_df.query('采样点编号 == 25')#.tail()  # type: ignore

In [None]:
new_project.personnel_info_df.head()

In [None]:
new_project.point_deleterious_substance_df.head()

In [None]:
new_project.personnel_deleterious_substance_df.head()

In [None]:
new_project.factor_reference_df.head()

In [None]:
new_project.get_single_day_blank_df(0, 1).head()

In [None]:
engaged_num = 0
test_blank = new_project.get_single_day_blank_df(engaged_num, 1)
engaged_num = refresh_engaged_num(test_blank, engaged_num)
test_blank.head()

In [None]:
test_point = new_project.get_single_day_point_df(engaged_num, 1)
engaged_num = refresh_engaged_num(test_point, engaged_num)
test_point.head()

In [None]:
test_personnel = new_project.get_single_day_personnel_df(engaged_num, 1)
engaged_num = refresh_engaged_num(test_personnel, engaged_num)
test_personnel.head()

In [None]:
n_blank, n_point, n_personnel = new_project.trim_dfs(test_blank, test_point, test_personnel)
n_blank.head()

In [None]:
n_point.head()

In [None]:
n_personnel.head()

In [None]:
p_point = pd.pivot_table(
    n_point,
    index=['检测因素', '空白编号'],
    aggfunc={'起始编号': min, '终止编号': max}
).reset_index()

p_point.head()

In [None]:
p_personnel = pd.pivot_table(
    n_personnel,
    index='检测因素',
    values='个体编号',
    # aggfunc={'个体编号': min, '个体编号': max}
    aggfunc=[min, max]
).stack().reset_index().rename(columns={'min': '个体起始编号', 'max': '个体终止编号'})[['检测因素', '个体起始编号', '个体终止编号']]

p_personnel.head()

In [None]:
pd.merge(p_point, p_personnel, on='检测因素', how='outer').fillna(0).set_index('检测因素').applymap(int)

In [None]:
file_io = new_project.get_dfs_num(['空白', '定点', '个体'])

with open('C:/Users/15210/Desktop/test2.xlsx', 'wb') as f:
  f.write(file_io.getvalue())

#### 统计样品


In [None]:
engaged_num: int = 0
types_order: list[str] = ['空白', '定点', '个体']
# file_io: io.BytesIO = io.BytesIO()
schedule_day: int = 1

current_blank: DataFrame = new_project.get_single_day_blank_df(engaged_num, schedule_day)
# current_blank.to_excel(excel_writer, sheet_name=f'空白D{schedule_day}', index=False)  # type: ignore
engaged_num = refresh_engaged_num(current_blank, engaged_num)

current_point: DataFrame = new_project.get_single_day_point_df(engaged_num, schedule_day)
# current_point.to_excel(excel_writer, sheet_name=f'定点D{schedule_day}', index=False)  # type: ignore
engaged_num = refresh_engaged_num(current_point, engaged_num)

current_personnel: DataFrame = new_project.get_single_day_personnel_df(engaged_num, schedule_day)
# current_personnel.to_excel(excel_writer, sheet_name=f'个体D{schedule_day}', index=False)  # type: ignore
engaged_num = refresh_engaged_num(current_personnel, engaged_num)


In [None]:
current_blank

In [None]:
current_point.head()

In [None]:
current_point = current_point.drop(columns=['空白编号'])

current_point.head()

In [None]:
df = pd.merge(
    current_point,
    current_blank,
    on='标识检测因素',
    how='left'
)

df.head()

In [None]:
def get_sample_list(df: DataFrame):
    num_list = list(range(df['起始编号'], df['终止编号'] + 1))
    num_list += [None] * (4 - len(num_list))
    return num_list

In [None]:
current_point['样品编号'] = current_point.apply(get_sample_list, axis=1)

current_point.head()

#### 测试获得所有样品信息的编号，并写入bytesio文件里


In [None]:
engaged_num: int = 0
types_order: list[str] = ['空白', '定点', '个体']
file_io: io.BytesIO = io.BytesIO()

with pd.ExcelWriter(file_io) as excel_writer:
    for schedule_day in [1, 2, 3]:
        for type in types_order:
            if type == '空白':
                current_blank: DataFrame = new_project.get_single_day_blank_df(engaged_num, schedule_day)
                current_blank.to_excel(excel_writer, sheet_name=f'空白D{schedule_day}', index=False)  # type: ignore
                engaged_num = refresh_engaged_num(current_blank, engaged_num)
            elif type == '定点':
                current_point: DataFrame = new_project.get_single_day_point_df(engaged_num, schedule_day)
                current_point.to_excel(excel_writer, sheet_name=f'定点D{schedule_day}', index=False)  # type: ignore
                engaged_num = refresh_engaged_num(current_point, engaged_num)
            elif type == '个体':
                current_personnel: DataFrame = new_project.get_single_day_personnel_df(engaged_num, schedule_day)
                engaged_num = refresh_engaged_num(current_personnel, engaged_num)
                current_personnel.to_excel(excel_writer, sheet_name=f'个体D{schedule_day}', index=False)  # type: ignore

# with open(r'C:/Users/15210/Desktop/test.xlsx', 'wb') as f:
#     file_bytes: bytes = file_io.getvalue()
#     f.write(file_bytes)

In [None]:
workbook = openpyxl.load_workbook(file_io)
sheet_names = workbook.sheetnames
print(sheet_names)

#### 获得定点和个体的空白样品


In [None]:
# test1_df = new_project.point_deleterious_substance_df#[['检测因素', '是否需要空白', '复合因素代码']]
# test2_df = new_project.personnel_deleterious_substance_df#[['检测因素', '是否需要空白', '复合因素代码']]

test1_df, test2_df = new_project.get_single_day_deleterious_substance_df(1)

In [None]:
test1_df['检测因素'] = test1_df['检测因素'].str.split('|')  # type: ignore
ex_test1_df: DataFrame = test1_df.explode('检测因素')
ex_test1_df.head()

In [None]:
test2_df['检测因素'] = test2_df['检测因素'].str.split('|')  # type: ignore
ex_test2_df: DataFrame = test2_df.explode('检测因素')
ex_test2_df.head()

In [None]:
test_df = pd.concat(  # type: ignore
    [
        ex_test1_df[['检测因素', '是否需要空白', '复合因素代码']],
        ex_test2_df[['检测因素', '是否需要空白', '复合因素代码']]
    ]
    ).drop_duplicates('检测因素').reset_index(drop=True)

test_df.head()

In [None]:
group1 = test_df.loc[test_df['复合因素代码'] == 0, ['检测因素', '是否需要空白']]
group1.head()

In [None]:
raw_group2 = test_df.loc[test_df['复合因素代码'] != 0]
raw_group2.head()

In [None]:
group2 = pd.DataFrame(raw_group2.groupby(['复合因素代码'])['检测因素'].apply('|'.join)).reset_index(drop=True)  # type: ignore
group2['是否需要空白'] = True

group2.head()

In [None]:
concat_group = pd.concat(
    [group1, group2],
    ignore_index=True,
    axis=0,
    # sort=True
)

concat_group.head()

In [None]:
from pandas.api.types import CategoricalDtype
blank_factor_list = sorted(concat_group['检测因素'].tolist(), key=lambda x: x.encode('gbk'))
blank_factor_order = CategoricalDtype(categories=blank_factor_list, ordered=True)

blank_factor_order

In [None]:
concat_group['检测因素'] = concat_group['检测因素'].astype(blank_factor_order)
concat_group = concat_group.loc[concat_group['是否需要空白'] == True].sort_values('检测因素', ignore_index=True)
concat_group

In [None]:
concat_group['标识检测因素'] = concat_group['检测因素'].astype(str).map(lambda x: x.split('|'))
concat_group['空白编号'] = list(range(1, concat_group.shape[0] + 1))# + engaged_num
concat_group.explode('标识检测因素')