# 土壤属性预测-PH

#### 依赖库

In [1]:
from autogluon.tabular import TabularDataset, TabularPredictor
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pykrige.ok import OrdinaryKriging
from pykrige.kriging_tools import write_asc_grid
from sklearn.metrics import mean_absolute_error,mean_squared_error,r2_score
import seaborn as sns

#### 配置

In [10]:
# 点位数据路径
property_point_path = r"F:\cache_data\pre_property_table\ky\feature_ky.csv"
# label列
label_column = 'pH'
# 属性list
property_list = ['pH', 'B', 'Cu', 'Mn', 'N', 'P', 'Zn', 'K2O', 'som']
# 模型参数
model_path = r"F:\cache_data\model_path\ky\ph\autogluon\all_feature"
model_presets = 'good'
model_time_limit = 10800
model_eval_metric = 'r2'
model_problem_type = 'regression'

#### 读取数据

In [3]:
# 读取数据
data = pd.read_csv(r"F:\cache_data\pre_property_table\ky\feature_ky.csv")
# 填补缺失值为列均值
data.fillna(data.mean(),inplace=True)
# 移除多余label列
property_list.remove(label_column)
data.drop(columns=property_list,inplace=True)

In [5]:
# 移除重复数据
data.drop_duplicates(subset=list(data.columns)[1:], keep='first', inplace=True)

In [9]:
# 划分测试数据
train_data,test_data = train_test_split(data,test_size=0.2,random_state=0)
train_data.shape,test_data.shape

((5884, 35), (1471, 35))

In [None]:
# 自动化机器学习预测
predictor = TabularPredictor(label=label_column,problem_type=model_problem_type,eval_metric=model_eval_metric,path=model_path)
predictor.fit(train_data=train_data,presets=model_presets,time_limit=model_time_limit)

In [None]:
# 获取最佳模型
best_model = predictor.model_best()
print(best_model)
# 获取模型的训练摘要
predictor.fit_summary()