In [40]:
import os
import json
import pandas as pd
import geopandas as gpd
from shapely import wkt

In [41]:
# 重命名内容
def rename_columns_text(df):
    # 字段中包含'_prediction_mean','predeiction_majority',统一替换为空字符串
    df.columns = df.columns.str.replace('_prediction_mean', '')
    df.columns = df.columns.str.replace('_prediction_majority', '')
    df.columns = df.columns.str.replace('_mean', '')
    df.columns = df.columns.str.replace('_majority', '')
    # 字段中包含'irrigation','drainage',去除其他字符串只保留'irrigation','drainage'
    df.columns = df.columns.str.replace(r'.*?(irrigation|drainage).*', r'\1', regex=True)
    return df
# 计算地形部位
def calculate_slope_position(value):
    value = int(value)
    # 1 2 为山地坡下，3 4 为山地坡中，5 6 为山地坡上
    if value == 1 or value == 2:
        return '山地坡下'
    elif value == 3 or value == 4:
        return '山地坡中'
    elif value == 5 or value == 6:
        return '山地坡上'
    else:
        return None
# 计算灌溉能力
def calculate_irrigation_ability(value):
    value = int(value)
    # 1 不满足 2 基本满足 3 满足 4 充分满足
    if value == 1:
        return '不满足'
    elif value == 2:
        return '基本满足'
    elif value == 3:
        return '满足'
    elif value == 4:
        return '充分满足'
    else:
        return None
# 计算排水能力
def calculate_drainage_ability(value):
    value = int(value)
    # 1 不满足 2 基本满足 3 满足 4 充分满足
    if value == 1:
        return '不满足'
    elif value == 2:
        return '基本满足'
    elif value == 3:
        return '满足'
    elif value == 4:
        return '充分满足'
    else:
        return None
# 计算耕层质地
def calculate_soil_texture(value,search_dict):
    value = int(value)
    # 字典的值等于value的key
    for key, val in search_dict.items():
        if val == value:
            return key
    return None
# 计算质地构型
def determine_soil_texture_structure(soil_texture: str,soil_type: str, land_use: str) -> str:
    """
    根据土壤类型、土地利用方式和土壤质地判断质地构型
    
    参数:
        soil_type: 土壤类型 (黄壤、石灰土、水稻土、潮土、黄棕壤)
        land_use: 土地利用方式 (水田、旱地、水浇地)
        soil_texture: 土壤质地 (壤质黏土、黏土、黏壤土、粉(砂)质黏土、砂质黏土、
                        砂质黏壤土、重黏土、粉(砂)质黏壤土、砂质壤土、粉(砂)质壤土、壤土)
    
    返回:
        质地构型类型 (上松下紧型、海绵型、夹层型、紧实型、上紧下松型、松散型、薄层型)
    """
    # 质地分类字典
    texture_groups = {
        "黏质类": ["黏土", "重黏土", "壤质黏土"],
        "壤质类": ["黏壤土", "粉(砂)质黏壤土", "砂质黏壤土", "壤土"],
        "砂质类": ["砂质壤土", "粉(砂)质壤土"],
        "特殊类": ["粉(砂)质黏土", "砂质黏土"]
    }
    
    # 规则判断
    if land_use == "水田":
        if soil_type == "水稻土":
            return "紧实型"
        if soil_texture in texture_groups["黏质类"]:
            return "上松下紧型"
        return "海绵型"
    
    if land_use in ["旱地", "水浇地"]:
        if soil_type in ["黄壤", "黄棕壤"]:
            if soil_texture in texture_groups["砂质类"]:
                return "松散型"
            if soil_texture in texture_groups["壤质类"]:
                return "海绵型"
            return "上松下紧型"
        
        if soil_type == "石灰土":
            if soil_texture in texture_groups["黏质类"]:
                return "夹层型"
            return "薄层型"
        
        if soil_type == "潮土":
            if soil_texture in texture_groups["砂质类"] + ["壤土"]:
                return "上紧下松型"
            return "海绵型"
    
    # 默认返回（当所有条件不满足时）
    return "海绵型"  # 海绵型作为最普遍的构型
# 计算有效土层厚度
def calc_yxtchd(soil_type, land_use, slop_position):
    """
    计算贵州省地区的有效土层厚度
    
    参数:
    soil_type (str): 土壤类型 - '紫色土', '石灰岩土', '黄壤', '黄棕壤', '水稻土'
    land_use (str): 土地利用类型 - '水田', '旱地' 或 '水浇地'
    slop_position (str): 坡位 - '山地坡上', '山地坡中' 或 '山地坡下'
    
    返回:
    float: 有效土层厚度（厘米）
    """
    # 土壤类型基础厚度
    base_thickness = {
        "紫色土": 75,
        "石灰(岩)土": 55,
        "黄壤": 70,
        "黄棕壤": 65,
        "水稻土": 85
    }
    
    # 坡位修正系数
    slope_factor = {
        "山地坡上": 0.7,   # 坡上侵蚀严重，土层较薄
        "山地坡中": 1.0,   # 坡中为基准
        "山地坡下": 1.3    # 坡下有堆积，土层较厚
    }
    
    # 土地利用修正系数
    land_use_factor = {
        "水田": 1.2,      # 水田通常经过整平和长期水耕，土层较厚
        "水浇地": 1.1,    # 水浇地次之
        "旱地": 0.9       # 旱地土层较薄
    }
    
    # 计算最终厚度
    thickness = base_thickness.get(soil_type, 65) * slope_factor.get(slop_position, 1.0) * land_use_factor.get(land_use, 1.0)
    
    # 确保厚度在合理范围内 (25-130cm)
    thickness = max(25, min(thickness, 130))
    
    # 贵州省特殊地区调整
    # 石灰岩土在坡上位置特别薄
    if soil_type == "石灰(岩)土" and slop_position == "山地坡上":
        thickness = max(25, thickness * 0.85)  # 额外降低15%，但不低于最小值
    
    # 紫色土在坡下水田特别厚
    if soil_type == "紫色土" and slop_position == "山地坡下" and land_use == "水田":
        thickness = min(130, thickness * 1.15)  # 额外增加15%，但不超过最大值
    
    # 水稻土在水田区域特别厚
    if soil_type == "水稻土" and land_use == "水田":
        thickness = min(130, thickness * 1.2)  # 额外增加20%，但不超过最大值
    
    # 黄壤在坡上旱地特别薄
    if soil_type == "黄壤" and slop_position == "山地坡上" and land_use == "旱地":
        thickness = max(25, thickness * 0.9)  # 额外降低10%，但不低于最小值
    # 潮土在水田和水浇地条件下特别厚
    if soil_type == "潮土" and land_use in ["水田", "水浇地"]:
        thickness = min(130, thickness * 1.15)  # 额外增加15%，但不超过最大值
    
    # 潮土在坡下位置更厚
    if soil_type == "潮土" and slop_position == "山地坡下":
        thickness = min(130, thickness * 1.1)  # 额外增加10%，但不超过最大值
    return round(thickness, 1)  # 返回保留一位小数的厚度值

# 保存为shp
def save_shp(df,path):
    os.makedirs(os.path.dirname(path), exist_ok=True)
    # 直接创建GeoDataFrame，因为geometry已经是Shapely对象
    gdf = gpd.GeoDataFrame(df, geometry='geometry')
    gdf.to_file(path, driver='ESRI Shapefile', encoding='gbk')


In [42]:
# 数据路径
data_path = r"G:\soil_property_result\qzs\grade_evaluation\table\grade_evaluation_sample_near.csv"

In [43]:
df = pd.read_csv(data_path)
df.rename(columns={'TRZD':'邻近质地'}, inplace=True)

In [44]:
# 重命名字段
df = rename_columns_text(df)

In [45]:
# 重命名字段
map_dict = {'PH':'酸碱度','OM':'有机质','AK':'速效钾','AP':'有效磷','TRRZ':'土壤容重','GZCHD':'耕层厚度','YXTCHD':'有效土层厚度',
            'CD':'镉','HG':'汞','AS2':'砷','PB':'铅','CR':'铬','slopepostion':'坡位','dem':'海拔高度','irrigation':'灌溉','drainage':'排水','TRZD':'质地'}
df.rename(columns=map_dict, inplace=True)

In [46]:
# 计算地形部位
df['地形部位'] = df['坡位'].apply(calculate_slope_position)

In [47]:
# 计算灌溉能力
df['灌溉能力'] = df['灌溉'].apply(calculate_irrigation_ability)


In [48]:
# 计算排水能力
df['排水能力'] = df['排水'].apply(calculate_drainage_ability)

In [49]:
# 计算耕层质地（邻近点）
# dict_path = r"G:\soil_property_result\qzs\models\soil_property_class\label_mappings\TRZD_mapping.json"
# search_dict = json.load(open(dict_path, 'r', encoding='utf-8'))
# df['耕层质地'] = df['质地'].apply(calculate_soil_texture, args=(search_dict,))
# 计算耕层质地
df['耕层质地'] = df['邻近质地']

In [50]:
# 计算有效土层
df['有效土层厚度'] = df.apply(lambda row: calc_yxtchd(row['Class1_tl'], row['DLMC'], row['坡位']), axis=1)

In [51]:
# 计算质地构型
df['质地构型'] = df.apply(lambda row: determine_soil_texture_structure(row['耕层质地'], row['Class1_tl'], row['DLMC']), axis=1)

In [52]:
df.columns

Index(['FID', 'BSM', 'DLBM', 'DLMC', 'QSDWDM', 'QSDWMC', 'DLDM', 'DLLB',
       'Class1', 'Class1_tl', 'Class1_yl', 'Class1_ts', 'geometry', '酸碱度',
       '有机质', '速效钾', '有效磷', '土壤容重', '耕层厚度', '有效土层厚度', '镉', '汞', '砷', '铅', '铬',
       '坡位', '海拔高度', '灌溉', '排水', '质地', 'Centroid_X', 'Centroid_Y', 'XMin',
       'YMin', 'XMax', 'YMax', 'project_Area', '邻近质地', 'nearest_distance',
       '地形部位', '灌溉能力', '排水能力', '耕层质地', '质地构型'],
      dtype='object')

In [53]:
# 整理字段
filter_df = df[['FID','BSM', 'DLMC', 'QSDWDM', 'QSDWMC', 'Class1', 'Class1_ts', 'Class1_yl',
       'Class1_tl', 'DLBM', 'QSDWDM', 'QSDWMC', 'DLLB', 'DLDM',  'Centroid_X', 'Centroid_Y',
       'project_Area','酸碱度', '有机质', '速效钾', '有效磷', '土壤容重', '耕层厚度',
       '有效土层厚度', '海拔高度', '地形部位', '灌溉能力', '排水能力', '耕层质地','质地构型']]

In [54]:
# 保证都为正值（对对应列应用ABS函数）
for col in ['酸碱度', '有机质', '速效钾', '有效磷', '土壤容重', '耕层厚度','有效土层厚度', '海拔高度']:
    filter_df.loc[:, col] = filter_df[col].apply(lambda x: abs(x))

In [55]:
# 计算平差面积
import json
area_index_dict = {}
with open(r"F:\cache_data\shp_file\qz\json_file\area_index_dict.json", 'r', encoding='utf-8') as f:
    area_index_dict = json.load(f)

filter_df['平差面积'] = filter_df['BSM'].apply(lambda x: area_index_dict[str(x)])*filter_df['project_Area']


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filter_df['平差面积'] = filter_df['BSM'].apply(lambda x: area_index_dict[str(x)])*filter_df['project_Area']


In [56]:
filter_df['平差面积'].sum()

401284369.4296734

In [58]:
# 保存为csv和excel
filter_df.to_csv(r'G:\soil_property_result\qzs\grade_evaluation\table\grade_evaluation_have_index.csv', index=False)
filter_df.to_excel(r'G:\soil_property_result\qzs\grade_evaluation\table\grade_evaluation_have_index.xlsx', index=False)