In [3]:
# type:ignore
import os
import time
import pyodbc
import pandas as pd

In [4]:
# 建立连接并列出表
def get_tables(conn_str):
    conn = pyodbc.connect(conn_str)
    cursor = conn.cursor()
    try:
        # 获取所有表名
        tables = cursor.tables(tableType='TABLE')
        tables_name = [table.table_name for table in tables]
        return tables_name
    finally:
        cursor.close()
        conn.close()

# 读取表数据，返回一个pandas的DataFrame
def get_table_data(conn_str,table_name):
    conn = pyodbc.connect(conn_str)
    cursor = conn.cursor()
    try:
        cursor.execute(f"SELECT * FROM {table_name}")
        # 获取列名
        columns = [column[0] for column in cursor.description]
        # 获取数据并创建DataFrame,指定列名
        data = pd.DataFrame.from_records(cursor.fetchall(), columns=columns)
        return data
    finally:
        cursor.close()
        conn.close()
# 查看DF的某列是否有重复值
def check_duplicate(df,column_name):
    return df[column_name].duplicated().sum()

# 去除DF中某列的重复值(只保留第一个)
def remove_duplicate(df,column_name):
    return df.drop_duplicates(subset=[column_name],keep='first')


In [5]:
# 指定MDB文件路径 - 需要包含文件扩展名
mdb_file = r"C:\Users\Runker\Desktop\mdb\522623.mdb"  # 添加.mdb扩展名

# 检查文件是否存在
if not os.path.exists(mdb_file):
    raise FileNotFoundError(f"找不到MDB文件: {mdb_file}")

# 使用ODBC连接字符串
conn_str = (
    r'DRIVER={Microsoft Access Driver (*.mdb, *.accdb)};'
    r'DBQ=' + mdb_file + ';'
)

In [6]:
# 获取所有表名
tables = get_tables(conn_str)
print(tables,len(tables))


['CYXX', 'DCYD', 'FJWJ', 'JCFF', 'LDTJDCXX', 'PMXTXDCFCXX', 'PMXTXDCJBXX', 'SFYL', 'SXDM', 'SYS', 'TRHJXZ', 'TRHXXZ', 'TRRZ', 'TRWLXZ', 'TRWLXZ_NEW', 'YDSQ', 'YPLZ', 'YPLZQD', 'YPZB', 'ZKYP'] 20


In [7]:
# 读取表数据 调查样点属性表  use_col YDBH YDLB CYLX BSJD BSWD JD WD DWJD WDWD DWGC TL YL TS TZ SFCJSWXDTJTYP SFJCJXZC SFTTCPYD TTCPMC
dcyd_df = get_table_data(conn_str,'DCYD')
# 读取指定列
dcyd_df = dcyd_df[['YDBH','YDLB','CYLX','BSJD','BSWD','JD','WD','DWJD','DWWD','DWGC',
                   'TL','YL','TS','TZ','SFCJSWXDTJTYP','SFJCJXZC','SFTTCPYD','TTCPMC']]
print(dcyd_df.shape[0])
# 查看唯一的YDBH
print(dcyd_df['YDBH'].nunique())
# 更改列名TS为TSS避免与其他表格冲突
dcyd_df.rename(columns={'TS':'TSS'},inplace=True)
dcyd_df.sample(1)

861
861


Unnamed: 0,YDBH,YDLB,CYLX,BSJD,BSWD,JD,WD,DWJD,DWWD,DWGC,TL,YL,TSS,TZ,SFCJSWXDTJTYP,SFJCJXZC,SFTTCPYD,TTCPMC
576,5226230103000651,0,1,108.14560995350212,27.06931806011066,108.14560995350212,27.06931806011066,108.14588581422996,27.069248160166744,754.3794628363103,石灰（岩）土,黄色石灰土,黄色石灰壤土,薄层黄色石灰壤土,0,1,0,


In [8]:
# 读取表数据 采样信息属性表 use_col YDBH YPBH YPLX CH
cyxx_df = get_table_data(conn_str,'CYXX')
# 读取指定列
cyxx_df = cyxx_df[['YDBH','YPBH','YPLX','CH']]
print(cyxx_df.shape[0])
# 查看唯一的YDBH 
print(cyxx_df['YDBH'].nunique())
cyxx_df.sample(1)

1917
861


Unnamed: 0,YDBH,YPBH,YPLX,CH
1415,5226230103000831,522623010300083110,1,


In [9]:
# 读取表数据 土壤物理性状属性表 use_col YDBH YPBH YYPBH JXZCXSL JXZC1 JXZC2 JXZC3 JXZC4 TRZD SWXDTJT1 SWXDTJT2 SWXDTJT3 SWXDTJT4 SWXDTJT5 SWXDTJT6 SWXDTJT7 
wlxz_df = get_table_data(conn_str,'TRWLXZ')
# 读取指定列
wlxz_df = wlxz_df[['YDBH','YPBH','YYPBH','JXZCXSL','JXZC1','JXZC2','JXZC3','JXZC4','TRZD','SWXDTJT1','SWXDTJT2','SWXDTJT3','SWXDTJT4','SWXDTJT5','SWXDTJT6','SWXDTJT7']]
print(wlxz_df.shape[0])
# 查看唯一的YDBH 
print(wlxz_df['YDBH'].nunique())
wlxz_df.sample(1)


1050
861


Unnamed: 0,YDBH,YPBH,YYPBH,JXZCXSL,JXZC1,JXZC2,JXZC3,JXZC4,TRZD,SWXDTJT1,SWXDTJT2,SWXDTJT3,SWXDTJT4,SWXDTJT5,SWXDTJT6,SWXDTJT7
553,5226230101000497,231228119506b47,522623010100049710,0,11.9,25.5,27.0,35.6,壤质黏土,,,,,,,


In [10]:
# 读取表数据 土壤化学性状属性表 use_col YDBH YPBH YYPBH FGSYHSL PH JHXSZL ECH ECAL SJXZSD CEC JHXYJZL ECA EMG ENA EK SRXYZL DDL SRXNLZ SRXJLZ SRXGLZ
# SRXMLZ SRXTSG SRXTSQG SRXLSG SRXLG LZZL OM TN TP TK TS TB TSI TSE TFE TMN TCU TZN TMO TAL TCA TMG AP SK AK AS1 ASI AFE AMN ACU AZN AB AMO CACO3 FE2O3
hxxz_df = get_table_data(conn_str,'TRHXXZ')
# 读取指定列
hxxz_df = hxxz_df[['YDBH','YPBH','YYPBH','FGSYHSL','PH','JHXSZL','ECH','ECAL','SJXZSD','CEC','JHXYJZL','ECA','EMG','ENA',
                   'EK','SRXYZL','DDL','SRXNLZ','SRXJLZ','SRXGLZ','SRXMLZ','SRXTSG','SRXTSQG','SRXLSG','SRXLG','LZZL',
                   'OM','TN','TP','TK','TS','TB','TSI','TSE','TFE','TMN','TCU','TZN','TMO','TAL','TCA','TMG','AP',
                   'SK','AK','AS1','ASI','AFE','AMN','ACU','AZN','AB','AMO','CACO3','FE2O3']]
print(hxxz_df.shape[0])
# 查看唯一的YDBH 
print(hxxz_df['YDBH'].nunique())
hxxz_df.sample(1)

947
861


Unnamed: 0,YDBH,YPBH,YYPBH,FGSYHSL,PH,JHXSZL,ECH,ECAL,SJXZSD,CEC,...,AS1,ASI,AFE,AMN,ACU,AZN,AB,AMO,CACO3,FE2O3
223,5226230301000664,231208090128b18,522623030100066410,9.1,7.74,,,,,23.5,...,28.0,128,90.8,4.76,5.28,4.18,0.2,0.06,,


In [11]:
# 读取表数据 土壤环境性状属性表 use_col YDBH YPBH YYPBH HG AS2 PB CD CR NI
hjxz_df = get_table_data(conn_str,'TRHJXZ')
# 读取指定列
hjxz_df = hjxz_df[['YDBH','YPBH','YYPBH','HG','AS2','PB','CD','CR','NI']]
print(hjxz_df.shape[0])
# 查看唯一的YDBH 
print(hjxz_df['YDBH'].nunique())
hjxz_df.sample(1)


947
861


Unnamed: 0,YDBH,YPBH,YYPBH,HG,AS2,PB,CD,CR,NI
797,5226230101000244,231228119506b14,522623010100024410,0.444,24.0,48.6,0.28,70.4,23.9


In [12]:
# 读取表数据 土壤容重属性表 use_col YDBH YPBH TRRZ1 TRRZ2 TRRZ3 TRRZ4 TRRZPJZ 
trrz_df = get_table_data(conn_str, 'TRRZ')
# 读取指定列
trrz_df = trrz_df[['YDBH','YPBH','TRRZ1','TRRZ2','TRRZ3','TRRZ4','TRRZPJZ']]
print(trrz_df.shape[0])
# 查看唯一的YDBH 
print(trrz_df['YDBH'].nunique())
trrz_df.sample(1)


892
832


Unnamed: 0,YDBH,YPBH,TRRZ1,TRRZ2,TRRZ3,TRRZ4,TRRZPJZ
39,5226230101000130,522623010100013020,1.12,0.88,1.07,,1.02


In [13]:
# 读取表数据 立地条件调查表  use_col YDBH MY MYQT MZ MZQT TDLYLX GZCHD
ldtj_df = get_table_data(conn_str,'LDTJDCXX')
# 读取指定列
ldtj_df = ldtj_df[['YDBH','MY','MYQT','MZ','MZQT','TDLYLX','GZCHD']]
print(ldtj_df.shape[0])
# 查看唯一的YDBH 
print(ldtj_df['YDBH'].nunique())
ldtj_df.sample(1)

861
861


Unnamed: 0,YDBH,MY,MYQT,MZ,MZQT,TDLYLX,GZCHD
284,5226230101000322,17,,LG,,101,18.0


In [14]:
# 读取表数据 剖面形态学调查基本信息调查表  use_col YDBH FSCS YXTCHD FSXTL FSXYL TSXTS TSXTZ 
pmxx_df = get_table_data(conn_str,'PMXTXDCJBXX')
# 读取指定列
pmxx_df = pmxx_df[['YDBH','FSCS','YXTCHD','FSXTL','FSXYL','FSXTS','FSXTZ']]
print(pmxx_df.shape[0])
# 查看唯一的YDBH 
print(pmxx_df['YDBH'].nunique())
pmxx_df.sample(1)

21
21


Unnamed: 0,YDBH,FSCS,YXTCHD,FSXTL,FSXYL,FSXTS,FSXTZ
13,5226230101100020,4,110,水稻土,渗育水稻土,渗鳝泥田,黄渗鳝泥田


In [15]:
# 土壤容重结果 ，基于YDBH
trrz_result = pd.merge(dcyd_df,trrz_df,on='YDBH',how='inner')
print(trrz_result.shape[0])
trrz_result.sample(1)

892


Unnamed: 0,YDBH,YDLB,CYLX,BSJD,BSWD,JD,WD,DWJD,DWWD,DWGC,...,SFCJSWXDTJTYP,SFJCJXZC,SFTTCPYD,TTCPMC,YPBH,TRRZ1,TRRZ2,TRRZ3,TRRZ4,TRRZPJZ
700,5226230103000794,0,1,107.95596522955054,27.159057977942016,107.95596522955054,27.159057977942016,107.95558402,27.15935145,927.612548828125,...,0,0,1,太子参,522623010300079420,1.13,1.19,1.18,,1.17


In [16]:
# 土壤物理性状结果 ，需要连接DCYD和CYXX，基于YDBH
temp_result = pd.merge(dcyd_df,cyxx_df,on='YDBH',how='inner')
print(temp_result.shape[0])
wlxz_result = pd.merge(temp_result,wlxz_df,left_on='YPBH',right_on='YYPBH',how='inner')
print(wlxz_result.shape[0])
wlxz_result.sample(1)

1917
1050


Unnamed: 0,YDBH_x,YDLB,CYLX,BSJD,BSWD,JD,WD,DWJD,DWWD,DWGC,...,JXZC3,JXZC4,TRZD,SWXDTJT1,SWXDTJT2,SWXDTJT3,SWXDTJT4,SWXDTJT5,SWXDTJT6,SWXDTJT7
12,5226230101000041,0,1,108.24030048801389,26.986778336019945,108.24030048801389,26.986778336019945,108.24064666666666,26.987070000000003,639.1,...,31.8,43.9,壤质黏土,,,,,,,


In [17]:
# 土壤化学性状结果 ，需要连接DCYD和CYXX，基于YDBH
temp_result = pd.merge(dcyd_df,cyxx_df,on='YDBH',how='inner')
print(temp_result.shape[0])
hxxz_result = pd.merge(temp_result,hxxz_df,left_on='YPBH',right_on='YYPBH',how='inner')
print(hxxz_result.shape[0])
hxxz_result.sample(1)


1917
947


Unnamed: 0,YDBH_x,YDLB,CYLX,BSJD,BSWD,JD,WD,DWJD,DWWD,DWGC,...,AS1,ASI,AFE,AMN,ACU,AZN,AB,AMO,CACO3,FE2O3
51,5226230101000178,0,1,108.2094117616556,26.91355760623388,108.2094117616556,26.91355760623388,108.20979708,26.91327718,815.025146484375,...,50.7,155,194,10.7,2.27,2.3,0.21,0.239,,


In [18]:
# 土壤环境性状结果 ，需要连接DCYD和CYXX，基于YDBH
temp_result = pd.merge(dcyd_df,cyxx_df,on='YDBH',how='inner')
print(temp_result.shape[0])
hjxz_result = pd.merge(temp_result,hjxz_df,left_on='YPBH',right_on='YYPBH',how='inner')
print(hjxz_result.shape[0])
hjxz_result.sample(1)


1917
947


Unnamed: 0,YDBH_x,YDLB,CYLX,BSJD,BSWD,JD,WD,DWJD,DWWD,DWGC,...,CH,YDBH_y,YPBH_y,YYPBH,HG,AS2,PB,CD,CR,NI
325,5226230101100021,1,1,107.88849509248011,27.175171071421545,107.88849509248011,27.175171071421545,107.88469525,27.17052989,799.1475830078125,...,2,5226230101100021,240425179288b05,522623010110002112,0.088,10.5,21.1,0.16,92.0,30.5


In [19]:
def process_merge_result(df):
    """
    处理合并后的DataFrame,去除重复列并重命名列名
    Args:
        df: 需要处理的DataFrame
    Returns:
        处理后的DataFrame
    """
    # 去除重复列
    result = df.loc[:, ~df.columns.str.endswith('_y')]
    # 创建副本避免SettingWithCopyWarning
    result = result.copy()
    # 重命名列,将_x替换为空
    result = result.rename(columns=lambda x: x.replace('_x', ''))
    return result

# 处理各个结果
result_wlxz = process_merge_result(wlxz_result)
result_hxxz = process_merge_result(hxxz_result)
result_hjxz = process_merge_result(hjxz_result) 
result_trrz = process_merge_result(trrz_result)


In [20]:
# 过滤数据
# 过滤数据 土壤容重结果 TRRZPJZ不为空
filter_trrz_result = result_trrz[
    (~result_trrz['TRRZPJZ'].isna()) & # 过滤 NaN
    (result_trrz['TRRZPJZ'] != 'None') & # 过滤字符串 'None' 
    (result_trrz['TRRZPJZ'].notna()) & # 再次确认过滤 NaN
    (result_trrz['TRRZPJZ'] != '') # 过滤空字符串

]
print(filter_trrz_result.shape[0])
filter_trrz_result.sample(1)
# 删除YYPH重复项
filter_trrz_result = remove_duplicate(filter_trrz_result,'YPBH')
print(filter_trrz_result.shape[0])
filter_trrz_result.sample(1)
# 删除YDBH重复项
filter_trrz_result = remove_duplicate(filter_trrz_result,'YDBH')
print(filter_trrz_result.shape[0])
filter_trrz_result.sample(1)
# 查看处理前后的YDBH数量
print(f"原YDBH数量：{trrz_df['YDBH'].nunique()}")
print(f"去重后YDBH数量：{filter_trrz_result['YDBH'].nunique()}")


892
892
832
原YDBH数量：832
去重后YDBH数量：832


In [21]:
# 过滤数据 土壤物理性状结果 YPLX为1或2，CH为1或空值
# filter_wlxz_result = result_wlxz[
#     (result_wlxz['YPLX'].isin(['1','2',])) & # YPLX为1或2
#     ((result_wlxz['CH'].isna()) | (result_wlxz['CH'] == '1')) # CH为空值或1
# ]
filter_wlxz_result = result_wlxz[
    ((result_wlxz['CH'].isna()) | (result_wlxz['CH'] == '1')) # CH为空值或1
]
print(filter_wlxz_result.shape[0])
filter_wlxz_result.sample(1)
# 删除YYPBH
filter_wlxz_result = remove_duplicate(filter_wlxz_result,'YYPBH')
print(filter_wlxz_result.shape[0])
filter_wlxz_result.sample(1)
# 删除YDBH
filter_wlxz_result = remove_duplicate(filter_wlxz_result,'YDBH')
print(filter_wlxz_result.shape[0])
filter_wlxz_result.sample(1)
# 查看处理前后的YDBH数量
print(f"原YDBH数量：{wlxz_df['YDBH'].nunique()}")
print(f"去重后YDBH数量：{filter_wlxz_result['YDBH'].nunique()}")



986
961
861
原YDBH数量：861
去重后YDBH数量：861


In [22]:
# 过滤数据 土壤化学性状结果 
filter_hxxz_result = result_hxxz[
    ((result_hxxz['CH'].isna()) | (result_hxxz['CH'] == '1')) # CH为空值或1
]
print(filter_hxxz_result.shape[0])
filter_hxxz_result.sample(1)
# 删除YYPBH
filter_hxxz_result = remove_duplicate(filter_hxxz_result,'YYPBH')
print(filter_hxxz_result.shape[0])
filter_hxxz_result.sample(1)
# 删除YDBH
filter_hxxz_result = remove_duplicate(filter_hxxz_result,'YDBH')
print(filter_hxxz_result.shape[0])
filter_hxxz_result.sample(1)
# 查看处理前后的YDBH数量
print(f"原YDBH数量：{hxxz_df['YDBH'].nunique()}")
print(f"去重后YDBH数量：{filter_hxxz_result['YDBH'].nunique()}")

883
861
861
原YDBH数量：861
去重后YDBH数量：861


In [23]:
# 过滤数据 土壤环境性状结果 
filter_hjxz_result = result_hjxz[
    ((result_hjxz['CH'].isna()) | (result_hjxz['CH'] == '1')) # CH为空值或1
]
print(filter_hjxz_result.shape[0])
filter_hjxz_result.sample(1)
# 删除YYPBH
filter_hjxz_result = remove_duplicate(filter_hjxz_result,'YYPBH')
print(filter_hjxz_result.shape[0])
filter_hjxz_result.sample(1)
# 删除YDBH
filter_hjxz_result = remove_duplicate(filter_hjxz_result,'YDBH')
print(filter_hjxz_result.shape[0])
filter_hjxz_result.sample(1)
# 查看处理前后的YDBH数量
print(f"原YDBH数量：{hjxz_df['YDBH'].nunique()}")
print(f"去重后YDBH数量：{filter_hjxz_result['YDBH'].nunique()}")

883
861
861
原YDBH数量：861
去重后YDBH数量：861


In [24]:
# 处理后检查机械组成（filter_wlxz_result中TRZD列不为空且不为'/'的数据）和原表数据涉及的YDBH是否一致
jxzc_df = filter_wlxz_result[
    (filter_wlxz_result['TRZD'].notna()) & # 过滤NaN
    (filter_wlxz_result['TRZD'] != '/') # 过滤'/'
]
# 处理前机械组成YDBH数量
jxzc_df_before = wlxz_df[
    (wlxz_df['TRZD'].notna()) & # 过滤NaN
    (wlxz_df['TRZD'] != '/') # 过滤'/'
]
# 查看处理前后的YDBH数量及是否检测(SFJCJXZC列值为'1')的数量
print(f"处理前机械组成YDBH数量：{jxzc_df_before['YDBH'].nunique()}")
print(f"处理后机械组成YDBH数量：{jxzc_df['YDBH'].nunique()}")
print(f"是否检测机械组成YDBH数量：{jxzc_df[jxzc_df['SFJCJXZC'] == '1']['YDBH'].nunique()}")


处理前机械组成YDBH数量：441
处理后机械组成YDBH数量：409
是否检测机械组成YDBH数量：409


In [25]:
# 处理后水团组成（filter_wlxz_result中SWXDTJT7列不为空且不为'/'的数据）和原表数据涉及的YDBH是否一致
stzc_df = filter_wlxz_result[
    (filter_wlxz_result['SWXDTJT7'].notna()) & # 过滤NaN
    (filter_wlxz_result['SWXDTJT7'] != '/') # 过滤'/'
]
# 处理前机械组成YDBH数量
stzc_df_before = wlxz_df[
    (wlxz_df['SWXDTJT7'].notna()) & # 过滤NaN
    (wlxz_df['SWXDTJT7'] != '/') # 过滤'/'
]
# 查看处理前后的YDBH数量及是否采集水团样品(SFCJSWXDTJTYP列值为'1')
print(f"处理前水团组成YDBH数量：{stzc_df_before['YDBH'].nunique()}")
print(f"处理后水团组成YDBH数量：{stzc_df['YDBH'].nunique()}")
print(f"是否采集水团样品YDBH数量：{stzc_df[stzc_df['SFCJSWXDTJTYP'] == '1']['YDBH'].nunique()}")


处理前水团组成YDBH数量：84
处理后水团组成YDBH数量：48
是否采集水团样品YDBH数量：48


In [26]:
# 连接所有表 filter_wlxz_result filter_hxxz_result filter_hjxz_result filter_trrz_result,ldtj_df pmxx_df 基于YDBH
result_all = pd.merge(filter_wlxz_result,filter_hxxz_result,on='YDBH',how='left')
result_all = process_merge_result(result_all)
result_all = pd.merge(result_all,filter_hjxz_result,on='YDBH',how='left')
result_all = process_merge_result(result_all)
result_all = pd.merge(result_all,filter_trrz_result,on='YDBH',how='left')
result_all = process_merge_result(result_all)
result_all = pd.merge(result_all,ldtj_df,on='YDBH',how='left')
result_all = process_merge_result(result_all)
result_all = pd.merge(result_all,pmxx_df,on='YDBH',how='left')
result_all = process_merge_result(result_all)

In [27]:
result_all

Unnamed: 0,YDBH,YDLB,CYLX,BSJD,BSWD,JD,WD,DWJD,DWWD,DWGC,...,MZ,MZQT,TDLYLX,GZCHD,FSCS,YXTCHD,FSXTL,FSXYL,FSXTS,FSXTZ
0,5226230101000004,0,1,108.22565542568145000000,26.86609951060243000000,108.22565542568145000000,26.86609951060243000000,108.22584714000000000000,26.86599741000000000000,595.08813476562500000000,...,LG,,0101,17.00000000000000000000,,,,,,
1,5226230101000005,0,1,108.19765524879395000000,27.03515105655674000000,108.19765524879395000000,27.03515105655674000000,108.19787166666669000000,27.03490666666666400000,668.40000000000000000000,...,LG,,0101,16.00000000000000000000,,,,,,
2,5226230101000007,0,1,108.20763352157385000000,26.91999593072689500000,108.20763352157385000000,26.91999593072689500000,108.20756948000000000000,26.92055569000000000000,859.42663574218750000000,...,LG,,0103,17.00000000000000000000,,,,,,
3,5226230101000010,0,1,108.25214189576430000000,26.88062201718088000000,108.25214189576430000000,26.88062201718088000000,108.25254141000000000000,26.88037548000000000000,612.63403320312500000000,...,LG,,0101,16.00000000000000000000,,,,,,
4,5226230101000011,0,1,108.24286786921321000000,26.92059691135055400000,108.24286786921321000000,26.92059691135055400000,108.24325880000000000000,26.91994034000000000000,940.40063476562500000000,...,LG,,0101,18.00000000000000000000,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
856,5226230404000035,0,1,108.11228920007221000000,26.96791852284607700000,108.11228920007221000000,26.96791852284607700000,108.11286999999999000000,26.96733999999999700000,624.30000000000000000000,...,LG,,0103,18.00000000000000000000,,,,,,
857,5226230404000147,0,1,108.11740994569828000000,27.06897859257153300000,108.11618158000000000000,27.06971582000000000000,108.11618158000000000000,27.06971582000000000000,696.22851562500000000000,...,LG,,0103,15.00000000000000000000,,,,,,
858,5226230404000246,0,1,108.14671454635385000000,27.12928098055501300000,108.14648728000000000000,27.13013274000000000000,108.14648728000000000000,27.13013274000000000000,1123.21398925781250000000,...,LG,,0103,16.00000000000000000000,,,,,,
859,5226230404000330,0,1,108.12410655081632000000,27.04750800290550000000,108.11710200000000000000,27.04835000000000000000,108.11692392000000000000,27.04877168000000000000,505.14965820312500000000,...,LG,,0103,14.00000000000000000000,,,,,,


In [28]:
# 导出为xlsx,总表
save_path = r"E:\soil_property_result\sbx\table"
os.makedirs(save_path,exist_ok=True)
result_all.to_excel(os.path.join(save_path,f'result_all_{time.strftime("%Y%m%d_%H%M%S")}.xlsx'),index=False)

In [29]:
# 导出为xlsx,用于分析
# use_col YDBH DWJD DWWD DWGC TRZD PH CEC OM TN TP TK TSE AP SK AK HG AS2 PB CD CR TRRZPJZ GZCHD_LDTJ GZCHD YXTCHD
ana_df = result_all[['YDBH','DWJD','DWWD','DWGC','TRZD','PH','CEC','OM','TN','TP','TK','TSE','AP','SK','AK',
                     'HG','AS2','PB','CD','CR','TRRZPJZ','GZCHD','YXTCHD','FSXTL', 'FSXYL', 'FSXTS', 'FSXTZ']]
# 重命名列
ana_df = ana_df.copy()
ana_df.rename(columns={'TRRZPJZ':'TRRZ'},inplace=True)
ana_df.to_excel(os.path.join(save_path,f'result_ana_df_{time.strftime("%Y%m%d_%H%M%S")}.xlsx'),index=False)