In [1]:
import pandas as pd
from geopy.geocoders import Nominatim
from folium.plugins import TimestampedGeoJson
import folium

# 读取NYC工作数据
data = pd.read_csv('NYCjobs.csv')

# 提取工作地点并去除空值
addresses = data['Work Location 1'].dropna()

# 统计每个地址的出现次数，并取出现次数最多的前100个
top_100_addresses = addresses.value_counts().head(100).index

# 打印前10个地址以进行检查
print(top_100_addresses[:10])



Index(['55 Water St Ny Ny', '255 Greenwich Street',
       '30-30 Thomson Avenue, LIC, NY 11101', '100 Gold Street',
       '4 World Trade Center', '2 Lafayette St., N.Y.',
       '55 Water Street, NY, NY', '30-30 Thomson Ave L I City Qns',
       '96-05 Horace Harding Expway', '33 Beaver St, New York Ny'],
      dtype='object')


In [2]:
# 提取发布日期、工资和工作地点信息
time_salary_location_data = data[['Posting Date', 'Work Location 1', 'Salary Range From', 'Salary Range To']].dropna()

# 计算平均工资作为工资的代表
time_salary_location_data['Average Salary'] = (time_salary_location_data['Salary Range From'] + time_salary_location_data['Salary Range To']) / 2

# 转换发布日期为日期格式
time_salary_location_data['Posting Date'] = pd.to_datetime(time_salary_location_data['Posting Date'])

# 只保留前100个重复出现次数最多的非空工作地点
time_salary_location_data = time_salary_location_data[time_salary_location_data['Work Location 1'].isin(top_100_addresses)]

# 创建地理编码器
geolocator = Nominatim(user_agent="geoapiExercises", timeout=10)

# 创建地图对象
time_space_map = folium.Map(location=[40.7128, -74.0060], zoom_start=11)

# 颜色映射函数：根据平均工资返回颜色
def get_color(average_salary):
    if average_salary < 50000:
        return "green"
    elif 50000 <= average_salary < 80000:
        return "yellow"
    else:
        return "red"

# 大小映射函数：根据平均工资返回大小
def get_size(average_salary):
    if average_salary < 50000:
        return 5
    elif 50000 <= average_salary < 80000:
        return 10
    else:
        return 15



In [3]:
# 准备时间-空间数据
time_space_data = []
for index, row in time_salary_location_data.iterrows():
    address = row['Work Location 1']
    location = geolocator.geocode(address)  # 获取经纬度
    if location:
        average_salary = row['Average Salary']
        color = get_color(average_salary)
        size = get_size(average_salary)
        
        feature = {
            "type": "Feature",
            "geometry": {
                "type": "Point",
                "coordinates": [location.longitude, location.latitude]
            },
            "properties": {
                "time": row['Posting Date'].strftime("%Y-%m-%d"),
                "style": {"color": color, "radius": size},
                "icon": "circle",
                "popup": f"Salary: {average_salary}"
            }
        }
        
        time_space_data.append(feature)

        
# 添加时间戳地理JSON图层
TimestampedGeoJson(
    {"type": "FeatureCollection", "features": time_space_data},
    period="P1D",
    add_last_point=True
).add_to(time_space_map)

# 保存或显示地图
time_space_map.save('time_space_map22.html') # 保存为HTML文件