In [1]:
import geopandas as gpd
import folium
from shapely.geometry import Point
import pandas as pd
import numpy as np

In [2]:
gdf = gpd.read_file('census_2020.shp')
gdf.head()

Unnamed: 0,GEOID_20,ACRES_LAND,ACRES_WATE,TRACT,BG,TRBG,BG_NAME,TRACT_LABE,C_DISTRICT,VILLNUMB,...,CD_TYPE,UCUV_TYPE,EARNINGS_5,EARNINGS_6,POPULATI_4,MALE_16_OV,FEMALE_16_,Shape__Are,Shape__Len,geometry
0,530330105022,93.418437,0.0,10502,2,10502.2,Block Group 2,105.02,1,0.0,...,CD,UCUV,524,427,1403,737,666,4069196.0,9132.543011,"POLYGON ((1256065.98 207109.145, 1256060.564 2..."
1,530330106023,74.934707,0.0,10602,3,10602.3,Block Group 3,106.02,1,0.0,...,CD,UCUV,296,294,955,543,412,3264104.0,7252.175209,"POLYGON ((1256766.721 205266.409, 1256740.309 ..."
2,530330107012,98.666214,0.0,10701,2,10701.2,Block Group 2,107.01,1,0.0,...,CD,UCUV,192,167,612,335,277,4297807.0,11496.354128,"POLYGON ((1262684.558 204417.446, 1262656.721 ..."
3,530330111021,160.18038,282.550178,11102,1,11102.1,Block Group 1,111.02,2,0.0,...,CD,UCUV,288,280,845,391,454,6948791.0,16141.467904,"POLYGON ((1288695.321 202025.319, 1288695.001 ..."
4,530330111023,29.226142,0.0,11102,3,11102.3,Block Group 3,111.02,2,0.0,...,CD,UCUV,84,139,621,313,308,1273065.0,5191.31741,"POLYGON ((1285492.109 199179.917, 1285543.89 1..."


In [3]:
# 列名
gdf.columns

Index(['GEOID_20', 'ACRES_LAND', 'ACRES_WATE', 'TRACT', 'BG', 'TRBG',
       'BG_NAME', 'TRACT_LABE', 'C_DISTRICT', 'VILLNUMB',
       ...
       'CD_TYPE', 'UCUV_TYPE', 'EARNINGS_5', 'EARNINGS_6', 'POPULATI_4',
       'MALE_16_OV', 'FEMALE_16_', 'Shape__Are', 'Shape__Len', 'geometry'],
      dtype='object', length=133)

In [4]:
# 修改列名
column_renames = {
    'TOTAL_POPU':'TOTAL_POPULATION',
    'Children_u':'CHILDREN_UNDER_5',
    'Older_Adul':'OLDER_ADULTS_65',
    'POPULATI_3':'POVERTY',
    'LESS_THAN_':'LESS_THAN_HIGH_SCHOOL',
    'BACHELOR_H':'BACHELOR_HIGHER',
    'TOTAL_HO_1':'TOTAL_HOUSING_UNITS',
    'LOW_DENSIT':'LOW_DENSITY',
    'MEDIUM_DEN':'MEDIUM_DENSITY',
    'UNIT_50_MO':'HIGHER_DENSITY',
    'OVERCROWDE':'OVERCROWDED',
    'POP_20_643':'EMPLOYED',
    'PER_CAPITA':'PER_CAPITA_INCOME',
    'NOT_HISPAN': 'WHITE_PEOPLE',
    'NOTHISPLAT': 'BLACK_PEOPLE',
    'NOTHISPL_1': 'INDIAN_ALASKA_PEOPLE',
    'NOTHISPL_2': 'ASIAN_PEOPLE',
    'NOTHISPL_3': 'HAWAIIAN_PACIFIC_PEOPLE',
    'NOTHISPL_4': 'OTHER_RACE_PEOPLE',
    'NOTHISPL_5': 'TWO_OR_MORE_RACES_PEOPLE',
    'HISPANIC_O': 'HISPANIC_PEOPLE',
    'PEOPLE_OF_': 'PEOPLE_OF_COLOR',
    'Median_Age': 'MEDIAN_AGE'
}
gdf.rename(columns = column_renames, inplace = True)
gdf.head()

Unnamed: 0,GEOID_20,ACRES_LAND,ACRES_WATE,TRACT,BG,TRBG,BG_NAME,TRACT_LABE,C_DISTRICT,VILLNUMB,...,CD_TYPE,UCUV_TYPE,EARNINGS_5,EARNINGS_6,POPULATI_4,MALE_16_OV,FEMALE_16_,Shape__Are,Shape__Len,geometry
0,530330105022,93.418437,0.0,10502,2,10502.2,Block Group 2,105.02,1,0.0,...,CD,UCUV,524,427,1403,737,666,4069196.0,9132.543011,"POLYGON ((1256065.98 207109.145, 1256060.564 2..."
1,530330106023,74.934707,0.0,10602,3,10602.3,Block Group 3,106.02,1,0.0,...,CD,UCUV,296,294,955,543,412,3264104.0,7252.175209,"POLYGON ((1256766.721 205266.409, 1256740.309 ..."
2,530330107012,98.666214,0.0,10701,2,10701.2,Block Group 2,107.01,1,0.0,...,CD,UCUV,192,167,612,335,277,4297807.0,11496.354128,"POLYGON ((1262684.558 204417.446, 1262656.721 ..."
3,530330111021,160.18038,282.550178,11102,1,11102.1,Block Group 1,111.02,2,0.0,...,CD,UCUV,288,280,845,391,454,6948791.0,16141.467904,"POLYGON ((1288695.321 202025.319, 1288695.001 ..."
4,530330111023,29.226142,0.0,11102,3,11102.3,Block Group 3,111.02,2,0.0,...,CD,UCUV,84,139,621,313,308,1273065.0,5191.31741,"POLYGON ((1285492.109 199179.917, 1285543.89 1..."


In [7]:
# 创建一个新的列 OTHER_RACE_PEOPLE
gdf['OTHER_RACE_PEOPLE'] = (gdf['INDIAN_ALASKA_PEOPLE'] + gdf['HAWAIIAN_PACIFIC_PEOPLE'] + gdf['OTHER_RACE_PEOPLE'])

In [21]:
# 添加新的一列
gdf['ACRES_TOTAL'] = gdf['ACRES_LAND'] + gdf['ACRES_WATE']
gdf['POP_DENSITY'] = (gdf['TOTAL_POPULATION'] / gdf['ACRES_TOTAL']) 

gdf.head(3)

Unnamed: 0,GEOID_20,ACRES_LAND,ACRES_WATE,TRACT,BG,TRBG,BG_NAME,TRACT_LABE,C_DISTRICT,VILLNUMB,...,EARNINGS_5,EARNINGS_6,POPULATI_4,MALE_16_OV,FEMALE_16_,Shape__Are,Shape__Len,geometry,ACRES_TOTAL,POP_DENSITY
0,530330105022,93.418437,0.0,10502,2,10502.2,Block Group 2,105.02,1,0.0,...,524,427,1403,737,666,4069196.0,9132.543011,"POLYGON ((1256065.98 207109.145, 1256060.564 2...",93.418437,22.361753
1,530330106023,74.934707,0.0,10602,3,10602.3,Block Group 3,106.02,1,0.0,...,296,294,955,543,412,3264104.0,7252.175209,"POLYGON ((1256766.721 205266.409, 1256740.309 ...",74.934707,17.575301
2,530330107012,98.666214,0.0,10701,2,10701.2,Block Group 2,107.01,1,0.0,...,192,167,612,335,277,4297807.0,11496.354128,"POLYGON ((1262684.558 204417.446, 1262656.721 ...",98.666214,11.077754


In [22]:
# 统一crs
gdf = gdf.to_crs('EPSG:4326')
gdf.crs

<Geographic 2D CRS: EPSG:4326>
Name: WGS 84
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: World.
- bounds: (-180.0, -90.0, 180.0, 90.0)
Datum: World Geodetic System 1984 ensemble
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich

In [23]:
# 帮助确定bins范围
gdf['POP_DENSITY'].max()

255.50289570715114

In [31]:
# 定义渐变色图层的函数
def add_choropleth(map_obj,geo_df,column,palette,legend_name,bins = None):
    folium.Choropleth(
        geo_data=geo_df.to_json(),
        data=geo_df,
        name=legend_name,
        columns=['GEOID_20', column], # [region,value]
        key_on='feature.properties.GEOID_20', # 'feature.properties.region'字典检索方法,
        # geo_data 中用于匹配 data 的字段。例如，如果 geo_data 中的区域标识符是 id，则可以设置为 'feature.properties.id'
        fill_color='YlGnBu', 
        fill_opacity=0.7, 
        line_opacity=0.2, 
        legend_name=legend_name, 
        bins = bins #表示legend的区间
    ).add_to(map_obj)

# 定义风格函数
def style_function(feature):
    return{
        'fillColor':'#3182bd',
        'color':'white',
        'weight':0.7,
        'fillOpacity':0.3
    }

# 定义高亮函数
def highlight_function(feature):
    return{
        'fillColor':'orange',
        'color':'white',
        'weight':1.5,
        'fillOpacity':0.7,
        'Opacity':0.8,
        'dashArray':'5,5',
        'lineCap':'round',
        'lineJoin':'round'
    }

# 创建地图对象
m = folium.Map(location = [47.6062,-122.3321],zoom_start = 10)

# 添加一个TileLayer图层
folium.TileLayer(tiles ='cartodbpositron').add_to(m)

# 添加Choropleth渐变色图层
add_choropleth(m,gdf,'POP_DENSITY','Blues','Population Density Seattle',bins = [0,10,50,100,200,300])

# 添加GeoJson图层

folium.GeoJson(gdf,
              style_function = style_function,
              highlight_function = highlight_function,
              tooltip = folium.GeoJsonTooltip(
                  fields = ['TRACT','POP_DENSITY'], # 数据列名
                  aliases = ['Tract:','Population Density:'], # 标签
                  localize = True
              )
              ).add_to(m)

m.save('Population_Density_Seattle.html')
m

In [17]:
# 计算所有西雅图的统计数据 | 分组聚合
grid_all_seattle = gdf.assign(Category = 'All Seattle').groupby('Category').agg({'WHITE_PEOPLE':'sum',
                                                                     'BLACK_PEOPLE':'sum',
                                                                     'ASIAN_PEOPLE':'sum',
                                                                     'HISPANIC_PEOPLE':'sum',
                                                                     'OTHER_RACE_PEOPLE':'sum',
                                                                     'PER_CAPITA_INCOME':'mean',
                                                                     'EMPLOYED':'sum',
                                                                     'MEDIAN_AGE':'mean',
                                                                     'TOTAL_POPULATION':'sum'}).reset_index()
# 在提供的代码里，通过 gdf.assign(Category = 'All Seattle').groupby('Category') 这一步，将所有数据都标记为同一个类别 All Seattle，然后按照这个类别进行分组。
# 接着使用 agg() 函数对分组后的数据进行聚合操作，分别对不同的列执行求和、求平均值等操作，从而得到关于西雅图整体的各项统计数据，如不同种族的人口总数、人均收入的平均值、就业人数总和等。
grid_all_seattle 

Unnamed: 0,Category,WHITE_PEOPLE,BLACK_PEOPLE,ASIAN_PEOPLE,HISPANIC_PEOPLE,OTHER_RACE_PEOPLE,PER_CAPITA_INCOME,EMPLOYED,MEDIAN_AGE,TOTAL_POPULATION
0,All Seattle,449969.0,48811.0,123116.0,54913.0,8118.0,78186.876866,423542,37.483022,735454.0


In [16]:
# 原理解释
gdf_explain = gdf.copy()
gdf_explain = gdf_explain[['WHITE_PEOPLE','BLACK_PEOPLE','ASIAN_PEOPLE','HISPANIC_PEOPLE','OTHER_RACE_PEOPLE','PER_CAPITA_INCOME','EMPLOYED','MEDIAN_AGE','TOTAL_POPULATION']]
gdf_explain = gdf_explain.assign(Category = 'All Seattle')
gdf_explain.head()

Unnamed: 0,WHITE_PEOPLE,BLACK_PEOPLE,ASIAN_PEOPLE,HISPANIC_PEOPLE,OTHER_RACE_PEOPLE,PER_CAPITA_INCOME,EMPLOYED,MEDIAN_AGE,TOTAL_POPULATION,Category
0,1544.0,0.0,0.0,224.0,156.0,72620,1205,38.7,2089.0,All Seattle
1,1024.0,23.0,41.0,113.0,0.0,91332,821,31.9,1317.0,All Seattle
2,495.0,229.0,205.0,164.0,0.0,42402,597,32.4,1093.0,All Seattle
3,1090.0,41.0,200.0,41.0,0.0,94529,556,56.8,1569.0,All Seattle
4,156.0,418.0,94.0,246.0,0.0,35530,543,39.7,998.0,All Seattle


In [18]:
# 回到对人口数据的处理
grid_all_seattle 

Unnamed: 0,Category,WHITE_PEOPLE,BLACK_PEOPLE,ASIAN_PEOPLE,HISPANIC_PEOPLE,OTHER_RACE_PEOPLE,PER_CAPITA_INCOME,EMPLOYED,MEDIAN_AGE,TOTAL_POPULATION
0,All Seattle,449969.0,48811.0,123116.0,54913.0,8118.0,78186.876866,423542,37.483022,735454.0


In [20]:
# 结果四舍五入
grid_all_seattle = grid_all_seattle.round({
    'WHITE_PEOPLE':1,
    'BLACK_PEOPLE':1,
    'ASIAN_PEOPLE':1,
    'HISPANIC_PEOPLE':1,
    'OTHER_RACE_PEOPLE':1,
    'PER_CAPITA_INCOME':0,
    'EMPLOYED':1,
    'MEDIAN_AGE':0,
    'TOTAL_POPULATION':0
})
# round 就是用于四舍五入的方法,数字代表保留几位小数
grid_all_seattle

Unnamed: 0,Category,WHITE_PEOPLE,BLACK_PEOPLE,ASIAN_PEOPLE,HISPANIC_PEOPLE,OTHER_RACE_PEOPLE,PER_CAPITA_INCOME,EMPLOYED,MEDIAN_AGE,TOTAL_POPULATION
0,All Seattle,449969.0,48811.0,123116.0,54913.0,8118.0,78187.0,423542,37.0,735454.0
