In [1]:
import pandas as pd
import numpy as np
from autogluon.tabular import TabularPredictor
from pykrige.ok import OrdinaryKriging
from sklearn.metrics import r2_score
from pyproj import Transformer
import os
from tqdm import tqdm
import pickle

In [8]:
# 获取当天日期
def get_dtime():
    # 例如20240516
    import datetime
    dtime = datetime.datetime.now().strftime('%Y%m%d')
    return dtime

In [2]:
# 导入autogluon模型
predictor = TabularPredictor.load(r"F:\cache_data\zone_ana\dy\modle\autogluon_type")

In [3]:
predictor.model_best,predictor.model_names()

('WeightedEnsemble_L2',
 ['NeuralNetFastAI',
  'RandomForestGini',
  'RandomForestEntr',
  'ExtraTreesGini',
  'ExtraTreesEntr',
  'NeuralNetTorch',
  'WeightedEnsemble_L2'])

In [4]:
# 获取特征
feature_names = predictor.feature_metadata_in.get_features()
print(feature_names)

['DLMC', 'XMin', 'XMax', 'YMin', 'YMax', 'Centroid_X', 'Centroid_Y', 'DEM_MAX', 'DEM_MIN', 'DEM_MEAN', 'ndvi_MAX', 'ndvi_MIN', 'ndvi_MEAN', 'PCA_0_MAX', 'PCA_0_MIN', 'PCA_0_MEAN', 'Slope_MAX', 'Slope_MIN', 'Slope_MEAN', 'Aspect_MAX', 'Aspect_MIN', 'Aspect_MEAN', 'MRVBF_MAX', 'MRVBF_MIN', 'MRVBF_MEAN']


In [10]:
# 存放预测结果的路径
result_path =  r"F:\cache_data\zone_ana\dy\prediction_result"

In [11]:
# 读取数据集
feature_path = r'F:\cache_data\zone_ana\dy\prediction_data\result.csv'

In [12]:
feature_df = pd.read_csv(feature_path)
prediction_df = feature_df[feature_names]

In [13]:
prediction_df

Unnamed: 0,DLMC,XMin,XMax,YMin,YMax,Centroid_X,Centroid_Y,DEM_MAX,DEM_MIN,DEM_MEAN,...,PCA_0_MEAN,Slope_MAX,Slope_MIN,Slope_MEAN,Aspect_MAX,Aspect_MIN,Aspect_MEAN,MRVBF_MAX,MRVBF_MIN,MRVBF_MEAN
0,水田,453415.7591,453504.8993,2.909007e+06,2.909054e+06,453460.380336,2.909038e+06,831.500000,810.599976,820.541093,...,-0.096359,0.797213,0.042473,0.385560,4.540927,0.062419,1.687067,2.967779,0.004468,0.959029
1,水田,453745.4063,453833.4091,2.909096e+06,2.909244e+06,453776.184840,2.909171e+06,833.500000,818.900024,825.126560,...,0.060324,0.571303,0.019992,0.280075,6.283185,0.020413,3.714515,1.977468,0.006735,0.743168
2,水田,453397.2270,453426.3763,2.909322e+06,2.909352e+06,453414.506759,2.909337e+06,821.299988,814.200012,817.437500,...,-0.146770,0.773339,0.309840,0.460163,6.110996,0.025645,2.449364,0.932592,0.000272,0.562656
3,水田,453172.7335,453194.5528,2.903237e+06,2.903281e+06,453180.794689,2.903264e+06,963.900024,960.299988,962.055552,...,-0.062809,0.415360,0.295699,0.340881,5.536232,4.764959,4.995005,0.520207,0.181701,0.411477
4,水田,453040.1877,453105.8702,2.903039e+06,2.903078e+06,453076.497652,2.903056e+06,905.700012,901.500000,904.005874,...,0.150961,0.215026,0.000195,0.032623,4.990708,1.966520,2.963729,2.840893,0.190537,1.726206
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
117398,水田,458876.1215,458946.2619,2.905722e+06,2.905799e+06,458911.191700,2.905729e+06,,,,...,,,,,,,,,,
117399,水田,437542.1649,437589.4663,2.919336e+06,2.919397e+06,437560.040717,2.919374e+06,,,,...,,,,,,,,,,
117400,旱地,441343.3945,441405.9169,2.866864e+06,2.866943e+06,441369.306054,2.866889e+06,,,,...,,,,,,,,,,
117401,乔木林地,454622.3101,454708.3515,2.889526e+06,2.889638e+06,454660.784718,2.889578e+06,,,,...,,,,,,,,,,


In [28]:
soiltype_dict = {0: ['中层灰泥质黄色石灰土'], 1: ['中层砂泥质黄壤'], 2: ['厚层砂泥质黄壤'], 3: ['厚层红泥质黄壤'], 4: ['厚层黏质黄色石灰土'], 5: ['浅石灰泥田'], 6: ['浅石红泥田'], 7: ['潮泥田'], 8: ['潮砂泥田'], 9: ['石灰泥田'], 10: ['石红泥田'], 11: ['腐中层壤质黄色石灰土'], 12: ['腐中层壤质黑色石灰土'], 13: ['腐中层灰泥质黄壤'], 14: ['腐中层砂泥质黄壤'], 15: ['腐中层砂泥质黄壤性土'], 16: ['腐中层砾壤质酸性紫色土'], 17: ['腐中层砾壤质钙质紫色土'], 18: ['腐中层硅质黄壤'], 19: ['腐中层红泥质黄壤'], 20: ['腐厚层壤质黄色石灰土'], 21: ['腐厚层壤质黑色石灰土'], 22: ['腐厚层灰泥质黄壤'], 23: ['腐厚层砂泥质黄壤'], 24: ['腐厚层硅质黄壤'], 25: ['腐厚层红泥质黄壤'], 26: ['腐薄层壤质黄色石灰土'], 27: ['腐薄层灰泥质黄壤'], 28: ['腐薄层砂泥质黄壤'], 29: ['腐薄层砾砂质黑色石灰土'], 30: ['腐薄层硅质山地灌丛草甸土'], 31: ['腐薄层硅质黄壤'], 32: ['腐薄层红泥质黄壤'], 33: ['腐薄层黏质黄色石灰土'], 34: ['薄层壤质中性紫色土'], 35: ['薄层灰泥质黄色石灰土'], 36: ['薄层砂泥质黄壤'], 37: ['薄层砾壤质中性紫色土'], 38: ['薄层砾壤质黄色石灰土'], 39: ['薄层砾壤质黑色石灰土'], 40: ['薄层砾硅质黄壤'], 41: ['薄层硅质酸性粗骨土'], 42: ['轻漂白粉泥田'], 43: ['重漂砂泥田'], 44: ['重漂红泥田'], 45: ['青石灰泥田'], 46: ['黄浅白粉泥田'], 47: ['黄浅砂泥田'], 48: ['黄浅红泥田'], 49: ['黄浅鳝泥田'], 50: ['黄白粉泥田'], 51: ['黄砂泥田'], 52: ['黄红泥田'], 53: ['黄青白粉泥田'], 54: ['黄鳝泥田'], 55: ['黄黄砂泥田']}

In [39]:
# 使用autogluon训练
prediction_result = predictor.predict(prediction_df, model='RandomForestEntr')
# 转换为 DataFrame 格式
prediction_result_df = prediction_result.to_frame()
# 添加OBJECTID列
prediction_result_df['OBJECTID'] = prediction_result_df.index + 1
# 映射类别
prediction_result_df['soil_TZ'] = prediction_result_df['TZ_label'].map(soiltype_dict)
# 去除列表字符串字符
prediction_result_df['soil_TZ'] = prediction_result_df['soil_TZ'].astype(str)
prediction_result_df['soil_TZ'] = prediction_result_df['soil_TZ'].str.replace('[\'', '').str.replace('\']', '')
# 保存 pred_df 到新的 CSV 文件
prediction_result_df.to_csv(os.path.join(result_path, f'prediction_soiltype_{get_dtime()}.csv'),index=False)

In [40]:
prediction_result_df

Unnamed: 0,TZ_label,OBJECTID,soil_TZ
0,27,1,腐薄层灰泥质黄壤
1,27,2,腐薄层灰泥质黄壤
2,27,3,腐薄层灰泥质黄壤
3,40,4,薄层砾硅质黄壤
4,40,5,薄层砾硅质黄壤
...,...,...,...
117398,10,117399,石红泥田
117399,8,117400,潮砂泥田
117400,46,117401,黄浅白粉泥田
117401,26,117402,腐薄层壤质黄色石灰土
