In [None]:
import folium
from folium.plugins import HeatMap
import pandas as pd

# 读取房价和POI数据
house_price_data_path = 'london_only_house_price.csv'  # 替换为你的文件路径
poi_data_path = 'london_all_pois.csv'  # 替换为你的文件路径

house_price_data = pd.read_csv(house_price_data_path)
poi_data = pd.read_csv(poi_data_path)

# 设置地图的经纬度范围 (这个范围应该与房价和POI数据对应的实际地理区域一致)
min_lon, max_lon = -0.6, 0.4
min_lat, max_lat = 51.2, 51.7

# 创建基础地图，设置在伦敦中心
map_center = [51.5074, -0.1278]

# 创建POI热力图，仅显示POI数量的热力图
m_poi = folium.Map(location=map_center, zoom_start=12)

# 创建POI热力图数据
poi_heat_data = []
for index, row in poi_data.iterrows():
    poi_heat_data.append([row['lat'], row['lon']])

# 设置POI热力图的颜色梯度（蓝色渐变，过渡更平滑）
POI_color_scale = {
    0: 'rgba(255, 255, 255, 0)',    # 透明
    0.1: 'rgba(0, 0, 255, 0.2)',    # 浅蓝色
    0.5: 'rgba(0, 0, 255, 0.5)',    # 中蓝色
    1: 'rgba(0, 0, 255, 1)'         # 深蓝色
}

HeatMap(poi_heat_data, radius=25, blur=15, min_opacity=0.1, gradient=POI_color_scale).add_to(m_poi)

# 保存POI热力图
output_html_poi_only = 'london_poi_updated_heatmap.html'
m_poi.save(output_html_poi_only)

# 创建房价热力图，使用红色渐变
m_price_only = folium.Map(location=map_center, zoom_start=12)

# 根据房价数据创建热力图数据，使用房价作为热力值
house_price_heat_data = []
for index, row in house_price_data.iterrows():
    house_price_heat_data.append([row['lat'], row['lon'], row['price']])

# 设置房价热力图的颜色梯度（红色渐变）
price_color_scale = {
    0: 'rgba(255, 255, 255, 0)',    # 透明
    0.1: 'rgba(255, 0, 0, 0.3)',    # 浅红色
    0.5: 'rgba(255, 0, 0, 0.6)',    # 中红色
    1: 'rgba(255, 0, 0, 1)'         # 深红色
}

HeatMap(house_price_heat_data, radius=25, blur=15, min_opacity=0.1, gradient=price_color_scale).add_to(m_price_only)

# 保存房价热力图
output_html_price_only = 'london_price_updated_heatmap.html'
m_price_only.save(output_html_price_only)

# 返回生成的两个 HTML 文件路径
output_html_poi_only, output_html_price_only  # 返回生成的两个 HTML 文件路径


In [None]:
import folium

# 创建一个地图对象，中心坐标在指定的经纬度范围内
m = folium.Map(location=[51.45, -0.1], zoom_start=12)

# 添加一个矩形区域，用来标记裁剪的区域
folium.Rectangle(
    bounds=[[51.2, -0.6], [51.7, 0.4]],  # 设置裁剪区域的经纬度范围
    color="blue",
    weight=2,
    fill=True,
    fill_opacity=0.2
).add_to(m)

# 保存地图为 HTML 文件
m.save("london_map.html")


In [None]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point
from tqdm import tqdm

# 计算POI数量的函数
def count_nearby_pois(house_point, poi_df, radius_m=1250):
    # Convert radius from meters to degrees (1 degree ≈ 111320 meters)
    radius_deg = radius_m / 111320
    return poi_df.geometry.distance(house_point).lt(radius_deg).sum()

# 计算每个房产的POI数量
def compute_accessibility(house_data, poi_data):
    # Define POI types you're interested in
    poi_types = ['Park', 'School', 'Commer', 'Hospital', 'Transit']
    
    # Add new columns for each POI type (if not already present)
    for poi_type in poi_types:
        house_data[f'{poi_type.lower()}_access'] = 0

    # Loop through each house and calculate the number of nearby POIs
    for idx, row in tqdm(house_data.iterrows(), total=house_data.shape[0], desc="Calculating POI accessibility"):
        house_point = row['geometry']
        
        # Filter POIs by type and count them within the radius
        for poi_type in poi_types:
            poi_of_type = poi_data[poi_data['type'] == poi_type]
            count = count_nearby_pois(house_point, poi_of_type)
            house_data.at[idx, f'{poi_type.lower()}_access'] = count

    return house_data

# Load the house price data and POI data
def load_data():
    # Load house data (replace with your actual file path)
    house_data = pd.read_csv("london_only_house_price.csv")  # Update with actual path
    house_data['geometry'] = house_data.apply(lambda row: Point(row['lon'], row['lat']), axis=1)
    house_data = gpd.GeoDataFrame(house_data, geometry='geometry')

    # Load POI data (replace with your actual file path)
    poi_data = pd.read_csv("london_all_pois.csv")  # Update with actual path
    poi_data['geometry'] = poi_data.apply(lambda row: Point(row['lon'], row['lat']), axis=1)
    poi_data = gpd.GeoDataFrame(poi_data, geometry='geometry')

    return house_data, poi_data

# Main function to compute accessibility
def main():
    # Load house price data and POI data
    house_data, poi_data = load_data()

    # Step 1: Compute POI accessibility (count for 15-minutes walkability)
    house_data = compute_accessibility(house_data, poi_data)
    
    # Step 2: Append new columns to the original CSV
    house_data.to_csv("updated_london_house_price.csv", index=False)  # Save the new file with POI counts
    print("POI accessibility has been calculated and saved to 'updated_london_house_price.csv'.")

if __name__ == "__main__":
    main()


In [None]:
import pandas as pd
import geopandas as gpd
import osmnx as ox
import matplotlib.pyplot as plt
import numpy as np
from shapely.geometry import Point
from scipy.stats import gaussian_kde
from tqdm import tqdm

# 读取已筛选的伦敦房价数据
london_houses = pd.read_csv('london_only_house_price.csv')  # 路径请根据实际情况修改

# 创建 GeoDataFrame
geometry = [Point(xy) for xy in zip(london_houses['lon'], london_houses['lat'])]
house_gdf = gpd.GeoDataFrame(london_houses, geometry=geometry, crs='EPSG:4326')

# 随机抽样1000个点（均匀抽样）
house_sample = house_gdf.sample(n=1000, random_state=42)

# 获取伦敦街道网络作为底图
city = 'London, UK'
G = ox.graph_from_place(city, network_type='walk')
london_map = ox.plot_graph(ox.project_graph(G), show=False, close=False)

# POI数据
poi_df = pd.read_csv('london_all_pois.csv')  # POI 数据路径
poi_gdf = gpd.GeoDataFrame(poi_df, geometry=gpd.points_from_xy(poi_df['lon'], poi_df['lat']), crs='EPSG:4326')

# 计算房价核密度估计 (KDE)
x = house_sample.geometry.x
y = house_sample.geometry.y
price_weights = house_sample['price']

# KDE计算
kde_price = gaussian_kde(np.vstack([x, y]), weights=price_weights)
xi, yi = np.mgrid[x.min():x.max():500j, y.min():y.max():500j]
zi_price = kde_price(np.vstack([xi.flatten(), yi.flatten()]))

# 计算POI数量
def count_nearby_pois(house_point, poi_df, radius_m=1250):
    radius_deg = radius_m / 111320  # 1度大约等于111320米
    return poi_df.geometry.distance(house_point).lt(radius_deg).sum()

# 使用 tqdm 显示进度条
print("Counting POIs within 1250 meters...")
house_sample['poi_count'] = [count_nearby_pois(pt, poi_gdf) for pt in tqdm(house_sample.geometry, desc="POI Counting")]

# KDE for POI count
poi_weights = house_sample['poi_count']
kde_poi = gaussian_kde(np.vstack([x, y]), weights=poi_weights)
zi_poi = kde_poi(np.vstack([xi.flatten(), yi.flatten()]))

# 绘制热力图
fig, axs = plt.subplots(1, 2, figsize=(16, 8), sharex=True, sharey=True)

# 房价热力图
axs[0].set_title("Housing Price Heatmap (Yellow = Higher Prices)")
axs[0].pcolormesh(xi, yi, zi_price.reshape(xi.shape), shading='auto', cmap='YlOrBr')
axs[0].set_xlabel("Longitude")
axs[0].set_ylabel("Latitude")

# POI数量热力图
axs[1].set_title("POI Density Heatmap (Blue = More Facilities)")
axs[1].pcolormesh(xi, yi, zi_poi.reshape(xi.shape), shading='auto', cmap='Blues')
axs[1].set_xlabel("Longitude")
axs[1].set_ylabel("Latitude")

# 将热力图覆盖在伦敦地图背景上
plt.tight_layout()
plt.show()




In [None]:
import pandas as pd

# 读取房价数据
house_df = pd.read_csv('london_house_price.csv')  # 确保路径正确

# 定义伦敦区域的经纬度边界（Greater London Bounding Box）
lon_min, lon_max = -0.6, 0.35
lat_min, lat_max = 51.25, 51.7

# 筛选处于伦敦边界内的数据
london_houses = house_df[
    (house_df['lon'] >= lon_min) & (house_df['lon'] <= lon_max) &
    (house_df['lat'] >= lat_min) & (house_df['lat'] <= lat_max)
]

# 保存为新CSV文件
london_houses.to_csv('london_only_house_price.csv', index=False)

# 输出处理结果
print(f"原始数据量: {len(house_df)}")
print(f"筛选后的伦敦数据量: {len(london_houses)}")
print("已保存为 'london_only_house_price.csv'")


In [None]:
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
from shapely.geometry import Point
import numpy as np
from scipy.stats import gaussian_kde

# Load data
houses = pd.read_csv('london_only_house_price.csv')
pois = pd.read_csv('london_all_pois.csv')

# Clean and sample
houses = houses.dropna(subset=['lat', 'lon', 'price'])
pois = pois.dropna(subset=['lat', 'lon'])
houses_sample = houses.sample(n=1000, random_state=42)

# Create GeoDataFrames
house_gdf = gpd.GeoDataFrame(houses_sample, geometry=gpd.points_from_xy(houses_sample['lon'], houses_sample['lat']), crs='EPSG:4326')
poi_gdf = gpd.GeoDataFrame(pois, geometry=gpd.points_from_xy(pois['lon'], pois['lat']), crs='EPSG:4326')

# Count POIs within 1250 meters (approx. 0.011 lat/lon degrees) for each house
def count_nearby_pois(house_point, poi_df, radius_m=1250):
    radius_deg = radius_m / 111320  # Rough conversion: 1 degree ≈ 111.32 km
    return poi_df.geometry.distance(house_point).lt(radius_deg).sum()

print("Counting POIs within 1250m...")
house_gdf['poi_count'] = house_gdf.geometry.apply(lambda pt: count_nearby_pois(pt, poi_gdf))

# Kernel Density Estimation: Housing Price
x = house_gdf.geometry.x
y = house_gdf.geometry.y
price_weights = house_gdf['price']
poi_weights = house_gdf['poi_count']

# Heatmap grid
xi, yi = np.mgrid[x.min():x.max():500j, y.min():y.max():500j]

# KDE for price
kde_price = gaussian_kde(np.vstack([x, y]), weights=price_weights)
zi_price = kde_price(np.vstack([xi.flatten(), yi.flatten()]))

# KDE for POI count
kde_poi = gaussian_kde(np.vstack([x, y]), weights=poi_weights)
zi_poi = kde_poi(np.vstack([xi.flatten(), yi.flatten()]))

# Plotting
fig, axs = plt.subplots(1, 2, figsize=(16, 8), sharex=True, sharey=True)

# Price heatmap
axs[0].set_title("Housing Price Heatmap (Yellow = Higher)")
axs[0].pcolormesh(xi, yi, zi_price.reshape(xi.shape), shading='auto', cmap='YlOrBr')
axs[0].set_xlabel("Longitude")
axs[0].set_ylabel("Latitude")

# POI heatmap
axs[1].set_title("POI Density Heatmap (Blue = More Facilities)")
axs[1].pcolormesh(xi, yi, zi_poi.reshape(xi.shape), shading='auto', cmap='Blues')
axs[1].set_xlabel("Longitude")
axs[1].set_ylabel("")

plt.tight_layout()
plt.show()



In [None]:
from pyproj import CRS, Transformer

# 定义英国国家格网（EPSG:27700）和WGS84（EPSG:4326）坐标系
crs_bng = CRS.from_epsg(27700)
crs_wgs84 = CRS.from_epsg(4326)

# 创建转换器
transformer = Transformer.from_crs(crs_bng, crs_wgs84, always_xy=True)

# 示例坐标（Eastings, Northings）
easting = 409863
northing = 285457

# 转换为经纬度
longitude, latitude = transformer.transform(easting, northing)

print(f"经度: {longitude}, 纬度: {latitude}")


In [None]:
import osmnx as ox
import pandas as pd
import os

os.environ['HTTP_PROXY'] = 'http://127.0.0.1:33210'
os.environ['HTTPS_PROXY'] = 'http://127.0.0.1:33210'


# 禁用 SSL 验证
ox.settings.requests_kwargs = {"verify": False}


# 定义查询区域：伦敦市区（你可以根据需要调整区域名称）
place = "London, UK"

# 1. 绿地/公园（leisure=park）
parks_gdf = ox.features_from_place(place, tags={"leisure": "park"})

# 2. 学校（amenity=school）
schools_gdf = ox.features_from_place(place, tags={"amenity": "school"})

# 3. 商圈（shop=*）
# 可以根据需要调整店铺类型，比如restaurant, cafe, supermarket等
shops_gdf = ox.features_from_place(place, tags={"shop": True})

# 4. 医疗设施（amenity=hospital, clinic, pharmacy）
hospitals_gdf = ox.features_from_place(place, tags={"amenity": ["hospital", "clinic", "pharmacy"]})

# 5. 交通站点（合并railway=station和amenity=bus_station）
railway_gdf = ox.features_from_place(place, tags={"railway": "station"})
bus_stations_gdf = ox.features_from_place(place, tags={"amenity": "bus_station"})

# 合并交通站点数据
transit_gdf = pd.concat([railway_gdf, bus_stations_gdf], ignore_index=True)

# 添加“类型”和“名称”列
def add_type_and_name(gdf, type_name):
    gdf['类型'] = type_name
    gdf['名称'] = gdf.get('name', '无名称')
    return gdf[['类型', '名称', 'geometry']]

# 为每个GeoDataFrame添加类型和名称列
parks_gdf = add_type_and_name(parks_gdf, '绿地/公园')
schools_gdf = add_type_and_name(schools_gdf, '学校')
shops_gdf = add_type_and_name(shops_gdf, '商圈')
hospitals_gdf = add_type_and_name(hospitals_gdf, '医疗设施')
transit_gdf = add_type_and_name(transit_gdf, '交通站点')

# 合并所有POI数据
all_pois_gdf = pd.concat([parks_gdf, schools_gdf, shops_gdf, hospitals_gdf, transit_gdf], ignore_index=True)

# 检查几何类型并提取经纬度
def extract_coordinates(gdf):
    # 使用apply逐个处理每个几何对象
    gdf['经度'] = gdf.geometry.apply(lambda x: x.x if x.geom_type == 'Point' else x.centroid.x)
    gdf['纬度'] = gdf.geometry.apply(lambda x: x.y if x.geom_type == 'Point' else x.centroid.y)
    return gdf

# 提取经纬度
all_pois_gdf = extract_coordinates(all_pois_gdf)

# 实时将数据追加到CSV
output_file = 'london_all_pois.csv'

# 首先检查文件是否存在，如果不存在就创建头部
if not os.path.isfile(output_file):
    all_pois_gdf[['类型', '名称', '经度', '纬度']].to_csv(output_file, mode='w', header=True, index=False)
else:
    all_pois_gdf[['类型', '名称', '经度', '纬度']].to_csv(output_file, mode='a', header=False, index=False)

# 检查结果
print(all_pois_gdf.head())


In [None]:
import pandas as pd
from pyproj import Proj, transform
import os

# 初始化投影：英国国家格网(Eastings, Northings)
proj_bng = Proj(init="epsg:27700")  # EPSG:27700 是英国国家格网的投影坐标系
# 定义WGS84经纬度坐标系
proj_wgs84 = Proj(init="epsg:4326")  # EPSG:4326 是WGS84经纬度坐标系

# 读取house_price.csv文件，包含邮编数据
postcode_file_path = 'house_price.csv'  # house_price.csv文件路径
postcode_data = pd.read_csv(postcode_file_path)

# 清理邮编字段，去除空格
postcode_data['邮编'] = postcode_data['邮编'].str.replace(" ", "")

# 准备处理每个文件夹中的CSV文件
def convert_to_latlon(easting, northing):
    # 将Eastings和Northings转换为经纬度
    longitude, latitude = transform(proj_bng, proj_wgs84, easting, northing)
    return latitude, longitude

# 存储所有文件的Eastings/Northings转换后的经纬度
postcode_coords = {}

# 数据文件夹路径（与Python文件和house_price.csv同级）
folder_path = 'Data/CSV/'  # 数据存储在data文件夹下的CSV中
for filename in os.listdir(folder_path):
    if filename.endswith('.csv'):  # 只处理CSV文件
        file_path = os.path.join(folder_path, filename)
        df = pd.read_csv(file_path, header=None)  # 没有列名，所以指定header=None

        # 添加列名
        df.columns = ['Postcode', 'Positional_quality_indicator', 'Eastings', 'Northings',
                      'Country_code', 'NHS_regional_HA_code', 'NHS_HA_code', 'Admin_county_code',
                      'Admin_district_code', 'Admin_ward_code']

        # 只提取 Postcode, Eastings, Northings
        df = df[['Postcode', 'Eastings', 'Northings']]

        # 清理邮编字段，去除空格
        df['Postcode'] = df['Postcode'].str.replace(" ", "")

        # 将Eastings和Northings转换为经纬度
        df['纬度'], df['经度'] = zip(*df.apply(lambda row: convert_to_latlon(row['Eastings'], row['Northings']), axis=1))

        # 将该文件的邮编与经纬度存储到字典中
        for _, row in df.iterrows():
            postcode_coords[row['Postcode']] = (row['纬度'], row['经度'])

# 匹配邮编，填充经纬度到原始的house_price.csv数据中
def match_postcode(postcode):
    return postcode_coords.get(postcode, (None, None))  # 如果没找到，返回None

# 为house_price.csv中的每个邮编匹配经纬度，并打印进度
progress_counter = 0
total_postcodes = len(postcode_data)

# 为每个邮编匹配经纬度
latitude_longitude = []
for idx, row in postcode_data.iterrows():
    lat, lon = match_postcode(row['邮编'])
    latitude_longitude.append([lat, lon])

    # 打印进度
    progress_counter += 1
    if progress_counter % 100 == 0 or progress_counter == total_postcodes:
        print(f"已处理 {progress_counter}/{total_postcodes} 条")

# 将经纬度添加到DataFrame
postcode_data[['纬度', '经度']] = pd.DataFrame(latitude_longitude, columns=['纬度', '经度'])

# 保存结果到新的CSV文件
postcode_data.to_csv('property_data_with_latlon.csv', index=False, encoding='utf-8')

# 打印结果查看
print(postcode_data.head())

In [None]:
import pandas as pd

# 读取原始CSV文件（替换为你的文件名）
df = pd.read_csv('pp-2024.csv', header=None)

# 设置明确的列名
column_names = [
    '交易唯一标识', '价格', '成交日期', '邮编', '房产类型',
    '新建房屋标识', '产权类型', '门牌号码(PAON)', '公寓/单元号(SAON)', 
    '街道', '地区', '城市', '行政区', '县', '交易类别', '记录状态'
]

# 添加列名
df.columns = column_names

# 保存到新的CSV文件
df.to_csv('house_price.csv', index=False, encoding='utf-8')

# 查看结果
print(df.head())