In [2]:
# type:ignore
import os
import time
import pyodbc
import pandas as pd

In [3]:
# 建立连接并列出表
def get_tables(conn_str):
    conn = pyodbc.connect(conn_str)
    cursor = conn.cursor()
    try:
        # 获取所有表名
        tables = cursor.tables(tableType='TABLE')
        tables_name = [table.table_name for table in tables]
        return tables_name
    finally:
        cursor.close()
        conn.close()

# 读取表数据，返回一个pandas的DataFrame
def get_table_data(conn_str,table_name):
    conn = pyodbc.connect(conn_str)
    cursor = conn.cursor()
    try:
        cursor.execute(f"SELECT * FROM {table_name}")
        # 获取列名
        columns = [column[0] for column in cursor.description]
        # 获取数据并创建DataFrame,指定列名
        data = pd.DataFrame.from_records(cursor.fetchall(), columns=columns)
        return data
    finally:
        cursor.close()
        conn.close()
# 查看DF的某列是否有重复值
def check_duplicate(df,column_name):
    return df[column_name].duplicated().sum()

# 去除DF中某列的重复值(只保留第一个)
def remove_duplicate(df,column_name):
    return df.drop_duplicates(subset=[column_name],keep='first')


In [4]:
# 指定MDB文件路径 - 需要包含文件扩展名
mdb_file = r"F:\cache_data\MDB\520424_关岭县.mdb"  # 添加.mdb扩展名

# 检查文件是否存在
if not os.path.exists(mdb_file):
    raise FileNotFoundError(f"找不到MDB文件: {mdb_file}")

# 使用ODBC连接字符串
conn_str = (
    r'DRIVER={Microsoft Access Driver (*.mdb, *.accdb)};'
    r'DBQ=' + mdb_file + ';'
)

In [5]:
# 获取所有表名
tables = get_tables(conn_str)
print(tables,len(tables))


['CYXX', 'DCYD', 'FJWJ', 'JCFF', 'LDTJDCXX', 'PMXTXDCFCXX', 'PMXTXDCJBXX', 'SFYL', 'SXDM', 'SYS', 'TRHJXZ', 'TRHXXZ', 'TRRZ', 'TRWLXZ', 'YDSQ', 'YPLZ', 'YPLZQD', 'YPZB', 'ZKYP'] 19


In [6]:
# 读取表数据 调查样点属性表  use_col YDBH YDLB CYLX BSJD BSWD JD WD DWJD WDWD DWGC TL YL TS TZ SFCJSWXDTJTYP SFJCJXZC SFTTCPYD TTCPMC
dcyd_df = get_table_data(conn_str,'DCYD')
# 读取指定列
dcyd_df = dcyd_df[['YDBH','YDLB','CYLX','BSJD','BSWD','JD','WD','DWJD','DWWD','DWGC',
                   'TL','YL','TS','TZ','SFCJSWXDTJTYP','SFJCJXZC','SFTTCPYD','TTCPMC']]
print(dcyd_df.shape[0])
# 查看唯一的YDBH
print(dcyd_df['YDBH'].nunique())
# 更改列名TS为TSS避免与其他表格冲突
dcyd_df.rename(columns={'TS':'TSS'},inplace=True)
dcyd_df.sample(1)

959
959


Unnamed: 0,YDBH,YDLB,CYLX,BSJD,BSWD,JD,WD,DWJD,DWWD,DWGC,TL,YL,TSS,TZ,SFCJSWXDTJTYP,SFJCJXZC,SFTTCPYD,TTCPMC
15,5204240101000139,0,1,105.75295908204475,25.73198096812754,105.75295908204475,25.73198096812754,105.75296183,25.73248834,502.2425537109375,黄壤,黄壤性土,砂泥质黄壤性土,中层砂泥质黄壤性土,0,1,0,


In [7]:
# 读取表数据 采样信息属性表 use_col YDBH YPBH YPLX CH
cyxx_df = get_table_data(conn_str,'CYXX')
# 读取指定列
cyxx_df = cyxx_df[['YDBH','YPBH','YPLX','CH']]
print(cyxx_df.shape[0])
# 查看唯一的YDBH 
print(cyxx_df['YDBH'].nunique())
cyxx_df.sample(1)

2149
959


Unnamed: 0,YDBH,YPBH,YPLX,CH
1333,5204240204000293,520424020400029320,4,


In [8]:
# 读取表数据 土壤物理性状属性表 use_col YDBH YPBH YYPBH JXZCXSL JXZC1 JXZC2 JXZC3 JXZC4 TRZD SWXDTJT1 SWXDTJT2 SWXDTJT3 SWXDTJT4 SWXDTJT5 SWXDTJT6 SWXDTJT7 
wlxz_df = get_table_data(conn_str,'TRWLXZ')
# 读取指定列
wlxz_df = wlxz_df[['YDBH','YPBH','YYPBH','JXZCXSL','JXZC1','JXZC2','JXZC3','JXZC4','TRZD','SWXDTJT1','SWXDTJT2','SWXDTJT3','SWXDTJT4','SWXDTJT5','SWXDTJT6','SWXDTJT7']]
print(wlxz_df.shape[0])
# 查看唯一的YDBH 
print(wlxz_df['YDBH'].nunique())
wlxz_df.sample(1)


1189
959


Unnamed: 0,YDBH,YPBH,YYPBH,JXZCXSL,JXZC1,JXZC2,JXZC3,JXZC4,TRZD,SWXDTJT1,SWXDTJT2,SWXDTJT3,SWXDTJT4,SWXDTJT5,SWXDTJT6,SWXDTJT7
889,5204240103000302,231220108830b34,520424010300030210,,,,,,,,,,,,,


In [9]:
# 读取表数据 土壤化学性状属性表 use_col YDBH YPBH YYPBH FGSYHSL PH JHXSZL ECH ECAL SJXZSD CEC JHXYJZL ECA EMG ENA EK SRXYZL DDL SRXNLZ SRXJLZ SRXGLZ
# SRXMLZ SRXTSG SRXTSQG SRXLSG SRXLG LZZL OM TN TP TK TS TB TSI TSE TFE TMN TCU TZN TMO TAL TCA TMG AP SK AK AS1 ASI AFE AMN ACU AZN AB AMO CACO3 FE2O3
hxxz_df = get_table_data(conn_str,'TRHXXZ')
# 读取指定列
hxxz_df = hxxz_df[['YDBH','YPBH','YYPBH','FGSYHSL','PH','JHXSZL','ECH','ECAL','SJXZSD','CEC','JHXYJZL','ECA','EMG','ENA',
                   'EK','SRXYZL','DDL','SRXNLZ','SRXJLZ','SRXGLZ','SRXMLZ','SRXTSG','SRXTSQG','SRXLSG','SRXLG','LZZL',
                   'OM','TN','TP','TK','TS','TB','TSI','TSE','TFE','TMN','TCU','TZN','TMO','TAL','TCA','TMG','AP',
                   'SK','AK','AS1','ASI','AFE','AMN','ACU','AZN','AB','AMO','CACO3','FE2O3']]
print(hxxz_df.shape[0])
# 查看唯一的YDBH 
print(hxxz_df['YDBH'].nunique())
hxxz_df.sample(1)

1067
959


Unnamed: 0,YDBH,YPBH,YYPBH,FGSYHSL,PH,JHXSZL,ECH,ECAL,SJXZSD,CEC,...,AS1,ASI,AFE,AMN,ACU,AZN,AB,AMO,CACO3,FE2O3
584,5204240103000508,231120070484b35,520424010300050810,2.8,7.55,,,,,15.8,...,3.35,/,30.6,19.2,1.56,0.64,0.47,0.064,,


In [10]:
# 读取表数据 土壤环境性状属性表 use_col YDBH YPBH YYPBH HG AS2 PB CD CR NI
hjxz_df = get_table_data(conn_str,'TRHJXZ')
# 读取指定列
hjxz_df = hjxz_df[['YDBH','YPBH','YYPBH','HG','AS2','PB','CD','CR','NI']]
print(hjxz_df.shape[0])
# 查看唯一的YDBH 
print(hjxz_df['YDBH'].nunique())
hjxz_df.sample(1)


1067
959


Unnamed: 0,YDBH,YPBH,YYPBH,HG,AS2,PB,CD,CR,NI
599,5204240101100011,240522190381b04,520424010110001114,0.266,23.9,60.9,0.92,130,59.5


In [11]:
# 读取表数据 土壤容重属性表 use_col YDBH YPBH TRRZ1 TRRZ2 TRRZ3 TRRZ4 TRRZPJZ 
trrz_df = get_table_data(conn_str, 'TRRZ')
# 读取指定列
trrz_df = trrz_df[['YDBH','YPBH','TRRZ1','TRRZ2','TRRZ3','TRRZ4','TRRZPJZ']]
print(trrz_df.shape[0])
# 查看唯一的YDBH 
print(trrz_df['YDBH'].nunique())
trrz_df.sample(1)


990
913


Unnamed: 0,YDBH,YPBH,TRRZ1,TRRZ2,TRRZ3,TRRZ4,TRRZPJZ
725,5204240103100021,520424010310002121,1.21,1.27,1.25,,1.24


In [12]:
# 读取表数据 立地条件调查表  use_col YDBH MY MYQT MZ MZQT TDLYLX GZCHD
ldtj_df = get_table_data(conn_str,'LDTJDCXX')
# 读取指定列
ldtj_df = ldtj_df[['YDBH','MY','MYQT','MZ','MZQT','TDLYLX','GZCHD']]
print(ldtj_df.shape[0])
# 查看唯一的YDBH 
print(ldtj_df['YDBH'].nunique())
ldtj_df.sample(1)

959
959


Unnamed: 0,YDBH,MY,MYQT,MZ,MZQT,TDLYLX,GZCHD
293,5204240103000289,19,,LG,,103,12.0


In [13]:
# 读取表数据 剖面形态学调查基本信息调查表  use_col YDBH FSCS YXTCHD FSXTL FSXYL TSXTS TSXTZ 
pmxx_df = get_table_data(conn_str,'PMXTXDCJBXX')
# 读取指定列
pmxx_df = pmxx_df[['YDBH','FSCS','YXTCHD','FSXTL','FSXYL','FSXTS','FSXTZ']]
print(pmxx_df.shape[0])
# 查看唯一的YDBH 
print(pmxx_df['YDBH'].nunique())
pmxx_df.sample(1)

31
31


Unnamed: 0,YDBH,FSCS,YXTCHD,FSXTL,FSXYL,FSXTS,FSXTZ
12,5204240101100022,5,120,水稻土,潴育水稻土,砂泥田,黄砂泥田


In [14]:
# 土壤容重结果 ，基于YDBH
trrz_result = pd.merge(dcyd_df,trrz_df,on='YDBH',how='inner')
print(trrz_result.shape[0])
trrz_result.sample(1)

990


Unnamed: 0,YDBH,YDLB,CYLX,BSJD,BSWD,JD,WD,DWJD,DWWD,DWGC,...,SFCJSWXDTJTYP,SFJCJXZC,SFTTCPYD,TTCPMC,YPBH,TRRZ1,TRRZ2,TRRZ3,TRRZ4,TRRZPJZ
446,5204240103000485,0,1,105.687591723645,25.82466120410777,105.687591723645,25.82466120410777,105.68765933,25.82427974,770.228271484375,...,0,1,0,,520424010300048520,1.16,1.26,1.2,,1.21


In [15]:
# 土壤物理性状结果 ，需要连接DCYD和CYXX，基于YDBH
temp_result = pd.merge(dcyd_df,cyxx_df,on='YDBH',how='inner')
print(temp_result.shape[0])
wlxz_result = pd.merge(temp_result,wlxz_df,left_on='YPBH',right_on='YYPBH',how='inner')
print(wlxz_result.shape[0])
wlxz_result.sample(1)

2149
1189


Unnamed: 0,YDBH_x,YDLB,CYLX,BSJD,BSWD,JD,WD,DWJD,DWWD,DWGC,...,JXZC3,JXZC4,TRZD,SWXDTJT1,SWXDTJT2,SWXDTJT3,SWXDTJT4,SWXDTJT5,SWXDTJT6,SWXDTJT7
111,5204240101000608,0,1,105.61675096914048,25.721711023150757,105.61675096914048,25.721711023150757,105.61628833333334,25.72198333333333,1172.3,...,,,,,,,,,,


In [16]:
# 土壤化学性状结果 ，需要连接DCYD和CYXX，基于YDBH
temp_result = pd.merge(dcyd_df,cyxx_df,on='YDBH',how='inner')
print(temp_result.shape[0])
hxxz_result = pd.merge(temp_result,hxxz_df,left_on='YPBH',right_on='YYPBH',how='inner')
print(hxxz_result.shape[0])
hxxz_result.sample(1)


2149
1067


Unnamed: 0,YDBH_x,YDLB,CYLX,BSJD,BSWD,JD,WD,DWJD,DWWD,DWGC,...,AS1,ASI,AFE,AMN,ACU,AZN,AB,AMO,CACO3,FE2O3
373,5204240103000299,0,2,105.6237623967537,25.821198596033334,105.6237623967537,25.821198596033334,105.62293933,25.82128197,963.778076171875,...,23.0,,45.7,186,1.9,2.13,0.44,2.2,,


In [17]:
# 土壤环境性状结果 ，需要连接DCYD和CYXX，基于YDBH
temp_result = pd.merge(dcyd_df,cyxx_df,on='YDBH',how='inner')
print(temp_result.shape[0])
hjxz_result = pd.merge(temp_result,hjxz_df,left_on='YPBH',right_on='YYPBH',how='inner')
print(hjxz_result.shape[0])
hjxz_result.sample(1)


2149
1067


Unnamed: 0,YDBH_x,YDLB,CYLX,BSJD,BSWD,JD,WD,DWJD,DWWD,DWGC,...,CH,YDBH_y,YPBH_y,YYPBH,HG,AS2,PB,CD,CR,NI
315,5204240103000222,0,1,105.58464227051896,26.021919107622647,105.58464227051896,26.021919107622647,105.58489387,26.0217608,1112.5899658203125,...,,5204240103000222,240110131850b03,520424010300022210,0.42,45.0,35.8,0.34,84.6,39.4


In [18]:
def process_merge_result(df):
    """
    处理合并后的DataFrame,去除重复列并重命名列名
    Args:
        df: 需要处理的DataFrame
    Returns:
        处理后的DataFrame
    """
    # 去除重复列
    result = df.loc[:, ~df.columns.str.endswith('_y')]
    # 创建副本避免SettingWithCopyWarning
    result = result.copy()
    # 重命名列,将_x替换为空
    result = result.rename(columns=lambda x: x.replace('_x', ''))
    return result

# 处理各个结果
result_wlxz = process_merge_result(wlxz_result)
result_hxxz = process_merge_result(hxxz_result)
result_hjxz = process_merge_result(hjxz_result) 
result_trrz = process_merge_result(trrz_result)


In [19]:
# def safe_float(x):
#     try:
#         return float(x) if pd.notna(x) and x != 'None' else 0
#     except:
#         return 0


# result_trrz['TRRZPJZ'] = result_trrz['TRRZ1'].apply(lambda x: safe_float(x))+result_trrz['TRRZ2'].apply(lambda x: safe_float(x))+result_trrz['TRRZ3'].apply(lambda x: safe_float(x))
# result_trrz['TRRZPJZ'] = result_trrz['TRRZPJZ']/3

In [20]:
# 过滤数据
# 过滤数据 土壤容重结果 TRRZPJZ不为空
filter_trrz_result = result_trrz[
    (~result_trrz['TRRZPJZ'].isna()) & # 过滤 NaN
    (result_trrz['TRRZPJZ'] != 'None') & # 过滤字符串 'None' 
    (result_trrz['TRRZPJZ'].notna()) & # 再次确认过滤 NaN
    (result_trrz['TRRZPJZ'] != '') # 过滤空字符串

]
print(filter_trrz_result.shape[0])
filter_trrz_result.sample(1)
# 删除YYPH重复项
filter_trrz_result = remove_duplicate(filter_trrz_result,'YPBH')
print(filter_trrz_result.shape[0])
filter_trrz_result.sample(1)
# 删除YDBH重复项
filter_trrz_result = remove_duplicate(filter_trrz_result,'YDBH')
print(filter_trrz_result.shape[0])
filter_trrz_result.sample(1)
# 查看处理前后的YDBH数量
print(f"原YDBH数量：{trrz_df['YDBH'].nunique()}")
print(f"去重后YDBH数量：{filter_trrz_result['YDBH'].nunique()}")


990
990
913
原YDBH数量：913
去重后YDBH数量：913


In [21]:
# 过滤数据 土壤物理性状结果 YPLX为1或2，CH为1或空值
# filter_wlxz_result = result_wlxz[
#     (result_wlxz['YPLX'].isin(['1','2',])) & # YPLX为1或2
#     ((result_wlxz['CH'].isna()) | (result_wlxz['CH'] == '1')) # CH为空值或1
# ]
filter_wlxz_result = result_wlxz[
    ((result_wlxz['CH'].isna()) | (result_wlxz['CH'] == '1')) # CH为空值或1
]
print(filter_wlxz_result.shape[0])
filter_wlxz_result.sample(1)
# 删除YYPBH
filter_wlxz_result = remove_duplicate(filter_wlxz_result,'YYPBH')
print(filter_wlxz_result.shape[0])
filter_wlxz_result.sample(1)
# 删除YDBH
filter_wlxz_result = remove_duplicate(filter_wlxz_result,'YDBH')
print(filter_wlxz_result.shape[0])
filter_wlxz_result.sample(1)
# 查看处理前后的YDBH数量
print(f"原YDBH数量：{wlxz_df['YDBH'].nunique()}")
print(f"去重后YDBH数量：{filter_wlxz_result['YDBH'].nunique()}")



1107
1077
959
原YDBH数量：959
去重后YDBH数量：959


In [22]:
# 过滤数据 土壤化学性状结果 
filter_hxxz_result = result_hxxz[
    ((result_hxxz['CH'].isna()) | (result_hxxz['CH'] == '1')) # CH为空值或1
]
print(filter_hxxz_result.shape[0])
filter_hxxz_result.sample(1)
# 删除YYPBH
filter_hxxz_result = remove_duplicate(filter_hxxz_result,'YYPBH')
print(filter_hxxz_result.shape[0])
filter_hxxz_result.sample(1)
# 删除YDBH
filter_hxxz_result = remove_duplicate(filter_hxxz_result,'YDBH')
print(filter_hxxz_result.shape[0])
filter_hxxz_result.sample(1)
# 查看处理前后的YDBH数量
print(f"原YDBH数量：{hxxz_df['YDBH'].nunique()}")
print(f"去重后YDBH数量：{filter_hxxz_result['YDBH'].nunique()}")

985
959
959
原YDBH数量：959
去重后YDBH数量：959


In [23]:
# 过滤数据 土壤环境性状结果 
filter_hjxz_result = result_hjxz[
    ((result_hjxz['CH'].isna()) | (result_hjxz['CH'] == '1')) # CH为空值或1
]
print(filter_hjxz_result.shape[0])
filter_hjxz_result.sample(1)
# 删除YYPBH
filter_hjxz_result = remove_duplicate(filter_hjxz_result,'YYPBH')
print(filter_hjxz_result.shape[0])
filter_hjxz_result.sample(1)
# 删除YDBH
filter_hjxz_result = remove_duplicate(filter_hjxz_result,'YDBH')
print(filter_hjxz_result.shape[0])
filter_hjxz_result.sample(1)
# 查看处理前后的YDBH数量
print(f"原YDBH数量：{hjxz_df['YDBH'].nunique()}")
print(f"去重后YDBH数量：{filter_hjxz_result['YDBH'].nunique()}")

985
959
959
原YDBH数量：959
去重后YDBH数量：959


In [24]:
# 处理后检查机械组成（filter_wlxz_result中TRZD列不为空且不为'/'的数据）和原表数据涉及的YDBH是否一致
jxzc_df = filter_wlxz_result[
    (filter_wlxz_result['TRZD'].notna()) & # 过滤NaN
    (filter_wlxz_result['TRZD'] != '/') # 过滤'/'
]
# 处理前机械组成YDBH数量
jxzc_df_before = wlxz_df[
    (wlxz_df['TRZD'].notna()) & # 过滤NaN
    (wlxz_df['TRZD'] != '/') # 过滤'/'
]
# 查看处理前后的YDBH数量及是否检测(SFJCJXZC列值为'1')的数量
print(f"处理前机械组成YDBH数量：{jxzc_df_before['YDBH'].nunique()}")
print(f"处理后机械组成YDBH数量：{jxzc_df['YDBH'].nunique()}")
print(f"是否检测机械组成YDBH数量：{jxzc_df[jxzc_df['SFJCJXZC'] == '1']['YDBH'].nunique()}")


处理前机械组成YDBH数量：503
处理后机械组成YDBH数量：467
是否检测机械组成YDBH数量：459


In [25]:
# 处理后水团组成（filter_wlxz_result中SWXDTJT7列不为空且不为'/'的数据）和原表数据涉及的YDBH是否一致
stzc_df = filter_wlxz_result[
    (filter_wlxz_result['SWXDTJT7'].notna()) & # 过滤NaN
    (filter_wlxz_result['SWXDTJT7'] != '/') # 过滤'/'
]
# 处理前机械组成YDBH数量
stzc_df_before = wlxz_df[
    (wlxz_df['SWXDTJT7'].notna()) & # 过滤NaN
    (wlxz_df['SWXDTJT7'] != '/') # 过滤'/'
]
# 查看处理前后的YDBH数量及是否采集水团样品(SFCJSWXDTJTYP列值为'1')
print(f"处理前水团组成YDBH数量：{stzc_df_before['YDBH'].nunique()}")
print(f"处理后水团组成YDBH数量：{stzc_df['YDBH'].nunique()}")
print(f"是否采集水团样品YDBH数量：{stzc_df[stzc_df['SFCJSWXDTJTYP'] == '1']['YDBH'].nunique()}")


处理前水团组成YDBH数量：118
处理后水团组成YDBH数量：61
是否采集水团样品YDBH数量：61


In [26]:
# 连接所有表 filter_wlxz_result filter_hxxz_result filter_hjxz_result filter_trrz_result,ldtj_df pmxx_df 基于YDBH
result_all = pd.merge(filter_wlxz_result,filter_hxxz_result,on='YDBH',how='left')
result_all = process_merge_result(result_all)
result_all = pd.merge(result_all,filter_hjxz_result,on='YDBH',how='left')
result_all = process_merge_result(result_all)
result_all = pd.merge(result_all,filter_trrz_result,on='YDBH',how='left')
result_all = process_merge_result(result_all)
result_all = pd.merge(result_all,ldtj_df,on='YDBH',how='left')
result_all = process_merge_result(result_all)
result_all = pd.merge(result_all,pmxx_df,on='YDBH',how='left')
result_all = process_merge_result(result_all)

In [27]:
result_all

Unnamed: 0,YDBH,YDLB,CYLX,BSJD,BSWD,JD,WD,DWJD,DWWD,DWGC,...,MZ,MZQT,TDLYLX,GZCHD,FSCS,YXTCHD,FSXTL,FSXYL,FSXTS,FSXTZ
0,5204240101000010,0,1,105.54966706400991000000,25.76881758236020300000,105.54966706400991000000,25.76881758236020300000,105.55005064431995000000,25.76851349960422500000,1376.08615453075620000000,...,LG,,0103,17.00000000000000000000,,,,,,
1,5204240101000029,0,1,105.64328842428895000000,25.76572406300331000000,105.64328842428895000000,25.76572406300331000000,105.64341500000000000000,25.76566333333333600000,1133.80000000000000000000,...,LG,,0103,20.00000000000000000000,,,,,,
2,5204240101000031,0,1,105.71419015346864000000,25.75862956392805000000,105.71419015346864000000,25.75862956392805000000,105.71454735000000000000,25.75812693333333300000,667.80000000000000000000,...,LG,,0101,16.00000000000000000000,,,,,,
3,5204240101000052,0,1,105.72055155758960000000,25.79017842493067600000,105.72055155758960000000,25.79017842493067600000,105.72074325000000000000,25.79056471666667000000,609.20000000000000000000,...,LG,,0103,15.00000000000000000000,,,,,,
4,5204240101000056,0,1,105.50183243248450000000,25.96080067485575400000,105.50183243248450000000,25.96080067485575400000,105.50169391000000000000,25.96055941000000000000,1350.64147949218750000000,...,LG,,0103,16.00000000000000000000,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
954,5204240404000751,0,1,105.52425265421532000000,26.05032541854952000000,105.52425265421532000000,26.05032541854952000000,105.52373350000000000000,26.04991092000000000000,1082.58044433593750000000,...,LG,,0404,,,,,,,
955,5204240404000824,0,1,105.71470300074887000000,25.86805448010204000000,105.71470300074887000000,25.86805448010204000000,105.71467899280475000000,25.86751035737260200000,1027.27106010042800000000,...,LG,,0404,,,,,,,
956,5204240404000852,0,1,105.46116268275941000000,25.80754057614711500000,105.46116268275941000000,25.80754057614711500000,105.46147699382051000000,25.80712187305877000000,1068.08528639283030000000,...,LG,,0404,,,,,,,
957,5204240404000893,0,1,105.54560212784627000000,26.00361688827381000000,105.54410551666666000000,26.00434700000000000000,105.54410551666666000000,26.00434700000000000000,1461.40000000000000000000,...,LG,,0404,,,,,,,


In [28]:
# 导出为xlsx,总表
save_path = r"G:\soil_property_result\glx\table"
os.makedirs(save_path,exist_ok=True)
result_all.to_excel(os.path.join(save_path,f'result_all_{time.strftime("%Y%m%d_%H%M%S")}.xlsx'),index=False)

In [29]:
# 导出为xlsx,用于分析
# use_col YDBH DWJD DWWD DWGC TRZD PH CEC OM TN TP TK TSE AP SK AK HG AS2 PB CD CR TRRZPJZ GZCHD_LDTJ GZCHD YXTCHD
ana_df = result_all[['YDBH','DWJD','DWWD','DWGC','TRZD','PH','CEC','OM','TN','TP','TK','TSE','AP','SK','AK',
                     'HG','AS2','PB','CD','CR','TRRZPJZ','GZCHD','YXTCHD','FSXTL', 'FSXYL', 'FSXTS', 'FSXTZ']]
# 重命名列
ana_df = ana_df.copy()
ana_df.rename(columns={'TRRZPJZ':'TRRZ'},inplace=True)
ana_df.to_excel(os.path.join(save_path,f'result_ana_df_{time.strftime("%Y%m%d_%H%M%S")}.xlsx'),index=False)