In [33]:
import os
import json
import pandas as pd
import numpy as np
from autogluon.tabular import TabularPredictor
from pykrige.ok import OrdinaryKriging
from sklearn.metrics import r2_score
from pyproj import Transformer
from tqdm import tqdm
import pickle

In [34]:
# 获取当前日期
def get_dtime():
    # 例如20240516
    import datetime
    dtime = datetime.datetime.now().strftime('%Y%m%d')
    return dtime

In [35]:
# 导入autogluon模型
model_path = r"F:\cache_data\zone_ana\dy\modle\autogluon_type_class"
sdt_predictor = TabularPredictor.load(os.path.join(model_path, f"{'sdt'}_model"))
fzrt_predictor = TabularPredictor.load(os.path.join(model_path, f"{'fzrt'}_model"))
zrt_predictor = TabularPredictor.load(os.path.join(model_path, f"{'zrt'}_model"))

In [36]:
sdt_predictor.model_best,sdt_predictor.model_names()

('WeightedEnsemble_L2',
 ['NeuralNetFastAI',
  'RandomForestGini',
  'RandomForestEntr',
  'ExtraTreesGini',
  'ExtraTreesEntr',
  'NeuralNetTorch',
  'WeightedEnsemble_L2'])

In [37]:
fzrt_predictor.model_best,fzrt_predictor.model_names()

('WeightedEnsemble_L2',
 ['NeuralNetFastAI',
  'RandomForestGini',
  'RandomForestEntr',
  'ExtraTreesGini',
  'ExtraTreesEntr',
  'NeuralNetTorch',
  'WeightedEnsemble_L2'])

In [38]:
zrt_predictor.model_best,zrt_predictor.model_names()

('WeightedEnsemble_L2',
 ['NeuralNetFastAI',
  'RandomForestGini',
  'RandomForestEntr',
  'ExtraTreesGini',
  'ExtraTreesEntr',
  'NeuralNetTorch',
  'WeightedEnsemble_L2'])

In [39]:
# 获取特征
sdt_feature_names = sdt_predictor.feature_metadata_in.get_features()
print("SDT",sdt_feature_names)
fzrt_feature_names = fzrt_predictor.feature_metadata_in.get_features()
print("FZRT",fzrt_feature_names)
zrt_feature_names = zrt_predictor.feature_metadata_in.get_features()
print("ZRT",zrt_feature_names)

SDT ['DLMC', 'XMin', 'XMax', 'YMin', 'YMax', 'Centroid_X', 'Centroid_Y', 'Riverdis', 'DEM_MAX', 'DEM_MIN', 'DEM_MEAN', 'DEM_RANGE', 'DEM_AREA', 'ndvi_MEAN', 'PCA_0_MEAN', 'Slope_MAX', 'Slope_MIN', 'Slope_MEAN', 'Slope_RANGE', 'Aspect_MEAN', 'MRVBF_MAX', 'MRVBF_MIN', 'MRVBF_MEAN', 'MRVBF_RANGE', 'TopographicWetnessIndex_MEAN', 'TopographicWetnessIndex_RANGE', 'Mean_MEAN', 'PH_MEAN', 'PRE_MEAN', 'PRE_RANGE', 'SRA_MEAN', 'SRA_RANGE', 'TMP_MEAN', 'TMP_RANGE', 'VAP_MEAN', 'VAP_RANGE', 'WIND_MEAN', 'WIND_RANGE', 'PlanCurvature_MEAN', 'PlanCurvature_RANGE']
FZRT ['DLMC', 'XMin', 'XMax', 'YMin', 'YMax', 'Centroid_X', 'Centroid_Y', 'DEM_MAX', 'DEM_MIN', 'DEM_MEAN', 'DEM_RANGE', 'DEM_AREA', 'ndvi_MEAN', 'PCA_0_MEAN', 'Slope_MAX', 'Slope_MIN', 'Slope_MEAN', 'Slope_RANGE', 'Aspect_MEAN', 'MRVBF_MAX', 'MRVBF_MIN', 'MRVBF_MEAN', 'MRVBF_RANGE', 'TopographicWetnessIndex_MEAN', 'TopographicWetnessIndex_RANGE', 'Mean_MEAN', 'PH_MEAN', 'PRE_MEAN', 'PRE_RANGE', 'SRA_MEAN', 'SRA_RANGE', 'TMP_MEAN', 'TMP_RA

In [40]:
# 存放预测结果的路径
result_path =  r"F:\cache_data\zone_ana\dy\prediction_result"

In [41]:
# 读取数据集
feature_path = r'F:\cache_data\zone_ana\dy\prediction_data\result.csv'

In [42]:
feature_df = pd.read_csv(feature_path)

In [43]:
# 区分数据
# 筛选水稻土数据
sdt_data = feature_df[feature_df['DLMC'].isin(['水田','水浇地','坑塘水面','养殖坑塘','内陆滩涂'])]

In [44]:
# 筛选非自然土数据
fzrt_data = feature_df[~feature_df['DLMC'].isin(['乔木林地','灌木林地','竹林地','其他林地','其他草地','天然牧草地','人工牧草地','水田','水浇地','坑塘水面','养殖坑塘','内陆滩涂'])]

In [45]:
# 筛选自然土数据
zrt_data = feature_df[feature_df['DLMC'].isin(['乔木林地','灌木林地','竹林地','其他林地','其他草地','天然牧草地','人工牧草地'])]

In [46]:
# 检查数据完整性
sdt_data.shape,fzrt_data.shape,zrt_data.shape,feature_df.shape,sdt_data.shape[0]+fzrt_data.shape[0]+zrt_data.shape[0]==feature_df.shape[0]

((24169, 400), (50414, 400), (42820, 400), (117403, 400), True)

In [47]:
# 获取对照字典
with open(r'D:\worker_code\Terrain_Test\data\soil_dict.json', 'r') as f:
    soil_dict = json.load(f)

In [48]:
# 模型选择
select_model = 'RandomForestEntr'

In [49]:
# 使用对应模型进行预测
sdt_data = sdt_data.copy()
sdt_data['pre_value'] = sdt_predictor.predict(sdt_data[sdt_feature_names], model=select_model).to_frame()['TZ_label']
fzrt_data = fzrt_data.copy()
fzrt_data['pre_value'] = fzrt_predictor.predict(fzrt_data[fzrt_feature_names], model=select_model).to_frame()['TZ_label']
zrt_data = zrt_data.copy()
zrt_data['pre_value'] = zrt_predictor.predict(zrt_data[zrt_feature_names],model=select_model).to_frame()['TZ_label']


In [59]:
# 合并数据
result_df = pd.concat([sdt_data,fzrt_data,zrt_data])

In [60]:
result_df = result_df[['OBJECTID','new_TZ','pre_value']]
result_df['pre_value'] = result_df['pre_value'].astype(str)

In [61]:
result_df = result_df.copy()
result_df['reclass_tz'] = result_df['pre_value'].map(soil_dict)

In [62]:
result_df = result_df[['OBJECTID','new_TZ','reclass_tz']]
result_df

Unnamed: 0,OBJECTID,new_TZ,reclass_tz
0,1,腐薄层灰泥质黄壤,石灰泥田
1,2,腐薄层灰泥质黄壤,青石灰泥田
2,3,腐薄层灰泥质黄壤,青石灰泥田
3,4,薄层砾硅质黄壤,黄浅白粉泥田
4,5,薄层砾硅质黄壤,黄浅白粉泥田
...,...,...,...
117124,141483,腐薄层硅质黄壤,腐薄层硅质黄壤
117394,155830,"腐中层壤质黄色石灰土,浅石红泥田",腐厚层红泥质黄壤
117396,155834,腐薄层红泥质黄壤,腐薄层红泥质黄壤
117397,155835,石红泥田,腐厚层红泥质黄壤


In [63]:
# 获取三普土种对照字典
# 读取Excel文件
sp_file_path = r"C:\Users\Runker\Desktop\search_dict.xlsx"
df = pd.read_excel(sp_file_path)

# 使用前向填充（ffill）处理合并单元格的情况
df_filled = df.ffill()
# 定义一个函数来为每一行生成一个字典
def create_dict(row):
    return {
        row['三普土种']: {
            '土类': row['三普土类'],
            '亚类': row['三普亚类'],
            '土属': row['三普土属'],
        }
    }

# 使用apply方法为每一行应用这个函数，并将结果合并到一个字典中
sp_soiltype_dict = {}
for d in df_filled.apply(create_dict, axis=1):
    sp_soiltype_dict.update(d)

In [64]:
sp_soiltype_dict

{'薄层红泥质黄壤': {'土类': '黄壤', '亚类': '典型黄壤', '土属': '红泥质黄壤'},
 '中层红泥质黄壤': {'土类': '黄壤', '亚类': '典型黄壤', '土属': '红泥质黄壤'},
 '厚层红泥质黄壤': {'土类': '黄壤', '亚类': '典型黄壤', '土属': '红泥质黄壤'},
 '薄层砂泥质黄壤': {'土类': '黄壤', '亚类': '典型黄壤', '土属': '砂泥质黄壤'},
 '中层砂泥质黄壤': {'土类': '黄壤', '亚类': '典型黄壤', '土属': '砂泥质黄壤'},
 '厚层砂泥质黄壤': {'土类': '黄壤', '亚类': '典型黄壤', '土属': '砂泥质黄壤'},
 '薄层砾硅质黄壤': {'土类': '黄壤', '亚类': '典型黄壤', '土属': '砾硅质黄壤'},
 '腐薄层砂泥质黄壤': {'土类': '黄壤', '亚类': '典型黄壤', '土属': '砂泥质黄壤'},
 '腐厚层砂泥质黄壤': {'土类': '黄壤', '亚类': '典型黄壤', '土属': '砂泥质黄壤'},
 '腐中层砂泥质黄壤': {'土类': '黄壤', '亚类': '典型黄壤', '土属': '砂泥质黄壤'},
 '腐薄层红泥质黄壤': {'土类': '黄壤', '亚类': '典型黄壤', '土属': '红泥质黄壤'},
 '腐厚层红泥质黄壤': {'土类': '黄壤', '亚类': '典型黄壤', '土属': '红泥质黄壤'},
 '腐中层红泥质黄壤': {'土类': '黄壤', '亚类': '典型黄壤', '土属': '红泥质黄壤'},
 '腐厚层硅质黄壤': {'土类': '黄壤', '亚类': '典型黄壤', '土属': '硅质黄壤'},
 '腐薄层硅质黄壤': {'土类': '黄壤', '亚类': '典型黄壤', '土属': '硅质黄壤'},
 '腐中层硅质黄壤': {'土类': '黄壤', '亚类': '典型黄壤', '土属': '硅质黄壤'},
 '腐薄层灰泥质黄壤': {'土类': '黄壤', '亚类': '典型黄壤', '土属': '灰泥质黄壤'},
 '腐厚层灰泥质黄壤': {'土类': '黄壤', '亚类': '典型黄壤', '土属': '灰泥质黄壤'},
 '腐中层

In [65]:
# 使用 apply 方法结合 lambda 函数来获取对应的 '土类' 值
result_df['reclass_tl'] = result_df['reclass_tz'].apply(lambda x: sp_soiltype_dict.get(x, {}).get('土类', None))
result_df['reclass_yl'] = result_df['reclass_tz'].apply(lambda x: sp_soiltype_dict.get(x, {}).get('亚类', None))
result_df['reclass_ts'] = result_df['reclass_tz'].apply(lambda x: sp_soiltype_dict.get(x, {}).get('土属', None))

In [66]:
result_df

Unnamed: 0,OBJECTID,new_TZ,reclass_tz,reclass_tl,reclass_yl,reclass_ts
0,1,腐薄层灰泥质黄壤,石灰泥田,水稻土,潴育型水稻土,石灰泥田
1,2,腐薄层灰泥质黄壤,青石灰泥田,水稻土,潜育型水稻土,青灰泥田
2,3,腐薄层灰泥质黄壤,青石灰泥田,水稻土,潜育型水稻土,青灰泥田
3,4,薄层砾硅质黄壤,黄浅白粉泥田,水稻土,淹育型水稻土,浅白粉泥田
4,5,薄层砾硅质黄壤,黄浅白粉泥田,水稻土,淹育型水稻土,浅白粉泥田
...,...,...,...,...,...,...
117124,141483,腐薄层硅质黄壤,腐薄层硅质黄壤,黄壤,典型黄壤,硅质黄壤
117394,155830,"腐中层壤质黄色石灰土,浅石红泥田",腐厚层红泥质黄壤,黄壤,典型黄壤,红泥质黄壤
117396,155834,腐薄层红泥质黄壤,腐薄层红泥质黄壤,黄壤,典型黄壤,红泥质黄壤
117397,155835,石红泥田,腐厚层红泥质黄壤,黄壤,典型黄壤,红泥质黄壤


In [67]:
# 保存数据
result_df.to_csv(os.path.join(result_path, f'prediction_class_{select_model}_{get_dtime()}.csv'),index=False)