In [2]:
# ========= Cell 1: 填写任务信息 =========
from datetime import datetime
import ipynbname
task_name = "calculate population on barrio"
notebook_name = "barrio_population_calculate2.0.ipynb"  # 不带扩展名
notebook_path = "CASA0004\barrio_population_calculate2.0.ipynb"  # 完整路径
dataset = "2016 Barrio & 2018 cencus on MANZANA"
code_version = "v1.0_G, (first part of geo cleaning)"
input_dir = r"E:\Dissertation\CASA0004\Barrios_will_UPZ.geojson" 
manzana_dir = r"E:\Dissertation\XGBoost_cleaning\MANZANA2018CENSO.geojson"    # 输入文件夹
output_dir = "E:/Dissertation/XGBoost_cleaning/barrio_with_population2"   # 输出文件夹
note = "1.用barrio边界与manzana，按地理位置聚合，计算barrio的人口并且记录在\"population\"里面。2.如果街区完全包含了manzana，那就把全部的人口都算给它，如果没完全包含，就算[manzana总人口*(重叠面积/manzana的面积(这个数据记录在manzana的shape_area里面))]，四舍五入；3.完成后计算barrio的人口密度，为population/shape_area(这个是barrio数据集的shape_area)"

In [4]:
# ========= Cell 2: 人口计算 =========
import geopandas as gpd
import pandas as pd
import os

# ========== 基础准备 ==========
target_crs = "EPSG:3116"  # 哥伦比亚本地投影，米制
barrio = gpd.read_file(input_dir).to_crs(target_crs)
manzana = gpd.read_file(manzana_dir).to_crs(target_crs)

# 人口字段
pop_field = "tp27_perso"
manzana[pop_field] = pd.to_numeric(manzana[pop_field], errors="coerce").fillna(0)

# 实际面积（平方米）
barrio["area_m2"] = barrio.geometry.area
manzana["area_m2"] = manzana.geometry.area

# ========== 叠加计算 ==========
intersections = gpd.overlay(manzana, barrio, how="intersection")
intersections["area_intersect"] = intersections.geometry.area

# 分配人口逻辑
def allocate_population(row):
    manz_area = row["area_m2_1"]   # manzana面积
    barr_area = row["area_m2_2"]   # barrio面积
    inter_area = row["area_intersect"]
    pop = row[pop_field]

    # 1. manzana 完全包含于某个 barrio 内
    if abs(inter_area - manz_area) < 1:  
        return pop

    # 2. barrio 完全包含于某个 manzana 内
    if abs(inter_area - barr_area) < 1:
        return pop * (barr_area / manz_area)

    # 3. 普通部分重叠，按比例分配
    return pop * (inter_area / manz_area)

# 注意 overlay 生成的列会自动加 _1, _2 后缀
intersections["population"] = intersections.apply(allocate_population, axis=1).round()

# ========== 聚合到 barrio ==========
barrio_pop = intersections.groupby("barriocomu")["population"].sum().reset_index()
barrio = barrio.merge(barrio_pop, on="barriocomu", how="left").fillna({"population": 0})

# 人口密度（人/平方公里）
barrio["pop_density"] = barrio["population"] / (barrio["area_m2"] / 1e6)

# 输出目录
os.makedirs(output_dir, exist_ok=True)
output_file = os.path.join(output_dir, "barrio_with_population.geojson")

# 保存
barrio.to_file(output_file, driver="GeoJSON", encoding="utf-8")
print("✅ 完成，结果已保存：", output_file)


✅ 完成，结果已保存： E:/Dissertation/XGBoost_cleaning/barrio_with_population2\barrio_with_population.geojson


In [None]:
# ===== 记录日志 =====
current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
def append_log(task_name, dataset, code_version, input_dir, output_dir, status, duration, note):
    repo_dir = 'E:\Dissertation\CASA0004'
    log_path = f"{repo_dir}/operation_log.md"

    # 写入日志
    with open(log_path, "a", encoding="utf-8") as f:
        f.write(f"**任务名称**: {task_name}\n")
        f.write(f"**任务文件**: {notebook_name}\n")
        f.write(f"**文件路径**: {notebook_path}\n")
        f.write(f"**数据集**: {dataset}\n")
        f.write(f"**代码版本**: {code_version}\n")
        f.write(f"**输入目录**: {input_dir}\n")
        f.write(f"**输出目录**: {output_dir}\n")
        f.write(f"**状态**: {status}\n")
        f.write(f"**耗时**: {duration}\n")
        f.write(f"**备注**: {note}\n")
        f.write(f"**记录时间**: {current_time}\n\n")
        f.write("================分割线================\n\n")

    print("✅ 日志写入完成")
status="完成，存在192万异常值，后面在QGIS对空值和0值异常值进行了筛选"
duration = "50s"
append_log(task_name, dataset, code_version, input_dir, output_dir, status, duration, note)

✅ 日志写入完成


  repo_dir = 'E:\Dissertation\CASA0004'
