In [1]:
import pandas as pd
import sqlalchemy
import geopandas as gpd
import folium
from folium import Marker, Circle, CircleMarker, PolyLine
from folium.plugins import HeatMap, MarkerCluster
from folium.map import Popup, Tooltip
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
from math import sin, cos, sqrt, fabs, atan2
from math import pi

#------------------------------------------------------------#
# 因地图坐标偏差加入辅助的转换函数
# 作者：元凿坊工作室
# 链接：https://zhuanlan.zhihu.com/p/107253611
#------------------------------------------------------------#

# define ellipsoid
a = 6378245
f = 1 / 298.3
b = a * (1 - f)
ee = 1 - (b * b) / (a * a)

# check if the point in china
def outOfChina(lng, lat):
    return not (72.004 <= lng <= 137.8347 and 0.8293 <= lat <= 55.8271)


def geohey_transformLat(x, y):
    ret = -100.0 + 2.0 * x + 3.0 * y + 0.2 * y * y + 0.1 * x * y + 0.2 * sqrt(fabs(x))
    ret = ret + (20.0 * sin(6.0 * x * pi) + 20.0 * sin(2.0 * x * pi)) * 2.0 / 3.0
    ret = ret + (20.0 * sin(y * pi) + 40.0 * sin(y / 3.0 * pi)) * 2.0 / 3.0
    ret = ret + (160.0 * sin(y / 12.0 * pi) + 320.0 * sin(y * pi / 30.0)) * 2.0 / 3.0
    return ret


def geohey_transformLon(x, y):
    ret = 300.0 + x + 2.0 * y + 0.1 * x * x +  0.1 * x * y + 0.1 * sqrt(fabs(x))
    ret = ret + (20.0 * sin(6.0 * x * pi) + 20.0 * sin(2.0 * x * pi)) * 2.0 / 3.0
    ret = ret + (20.0 * sin(x * pi) + 40.0 * sin(x / 3.0 * pi)) * 2.0 / 3.0
    ret = ret + (150.0 * sin(x / 12.0 * pi) + 300.0 * sin(x * pi / 30.0)) * 2.0 / 3.0
    return ret


def wgs2gcj(wgsLon, wgsLat):
    if outOfChina(wgsLon, wgsLat):
        return wgsLon, wgsLat
    dLat = geohey_transformLat(wgsLon - 105.0, wgsLat - 35.0)
    dLon = geohey_transformLon(wgsLon - 105.0, wgsLat - 35.0)
    radLat = wgsLat / 180.0 * pi
    magic = sin(radLat)
    magic = 1 - ee * magic * magic
    sqrtMagic = sqrt(magic)
    dLat = (dLat * 180.0) / ((a * (1 - ee)) / (magic * sqrtMagic) * pi)
    dLon = (dLon * 180.0) / (a / sqrtMagic * cos(radLat) * pi)
    gcjLat = wgsLat + dLat
    gcjLon = wgsLon + dLon
    return (gcjLon, gcjLat)


def gcj2wgs(gcjLon, gcjLat):
    
    g0 = (gcjLon, gcjLat)    
    w0 = g0
    g1 = wgs2gcj(w0[0], w0[1])
    # w1 = w0 - (g1 - g0)
    w1 = tuple([x[0]-(x[1]-x[2]) for x in zip(w0,g1,g0)])  
    # delta = w1 - w0
    delta = tuple([x[0] - x[1] for x in zip(w1, w0)])
    while (abs(delta[0]) >= 1e-6 or abs(delta[1]) >= 1e-6):
        w0 = w1
        g1 = wgs2gcj(w0[0], w0[1])
        # w1 = w0 - (g1 - g0)
        w1 = tuple([x[0]-(x[1]-x[2]) for x in zip(w0,g1,g0)])
        # delta = w1 - w0
        delta = tuple([x[0] - x[1] for x in zip(w1, w0)])
    return w1

def gcj2wgs_loc(location=None):
    if not location.empty:
        return gcj2wgs(*location) # location must be in the form of (Longtitude, Latitude)
    else:
        raise ValueError('Invalid location.')

In [3]:
HouseExtractionSQL = '''
                     SELECT d.Community, c.Longitude, c.Latitude, d.Price/d.Area AS UnitPrice
                     FROM `guangzhou-detail` AS d
                     LEFT JOIN `guangzhou-community` AS c
                     ON d.Community = c.Community
                     WHERE (c.Longitude IS NOT NULL) AND (c.Latitude IS NOT NULL)
                     '''

MetroExtractionSQL = '''
                     SELECT * FROM `guangzhou-metro`
                     WHERE (Longitude IS NOT NULL) AND (Latitude IS NOT NULL)
                     '''

Engine = sqlalchemy.create_engine('sqlite:///lianjia.db') 

HouseData = pd.read_sql_query(HouseExtractionSQL, Engine)
MetroData = pd.read_sql_query(MetroExtractionSQL, Engine)

In [4]:
# 由于广州地铁命名习惯问题，需要将三号线和十四号线中不连通的部分进行简单的数据清洗，避免产生误导
MetroData.loc[MetroData['LineName']=='三北线', 'LineCode'] = '3N'
MetroData.loc[MetroData['LineName'].str.contains('知识城'), 'LineCode'] = '14B'
L3_ex_stat = MetroData[(MetroData['LineCode'] == 3) & (MetroData['StationName'] == '体育西路')].copy() # 三号线与三北线交叉站点
L3_ex_stat.replace({'LineCode': {3: '3N'}, 'LineName': {'三号线': '三北线'}}, inplace=True)
MetroData = pd.concat([MetroData, L3_ex_stat], ignore_index=True)

L14_ex_stat = MetroData[(MetroData['LineCode'] == 14) & (MetroData['StationName'] == '新和')].copy()
L14_ex_stat.replace({'LineCode': {14: '14B'}, 'LineName': {'十四号线': '十四号线(知识城)'}}, inplace=True) #十四号线与十四号线支线交叉站点
MetroData = pd.concat([MetroData, L14_ex_stat], ignore_index=True)
MetroData.tail()

Unnamed: 0,LineCode,LineName,LineColor,StationCode,StationName,Longitude,Latitude
269,GF,广佛线,"rgb(187, 216, 10)",23,石溪,113.285951,23.067937
270,GF,广佛线,"rgb(187, 216, 10)",24,南洲,113.297367,23.064799
271,GF,广佛线,"rgb(187, 216, 10)",25,沥滘,113.319077,23.054898
272,3N,三北线,"rgb(232, 158, 71)",11,体育西路,113.321503,23.131138
273,14B,十四号线(知识城),"rgb(121, 39, 32)",8,新和,113.46706,23.413259


In [5]:
def MetroVisualizaiton(Map, opacity=0.75):

    stations = []
    for lcode in MetroData['LineCode'].unique():
        ldata = MetroData[MetroData['LineCode'] == lcode].sort_values('StationCode', ascending=True)
        assert ldata['LineColor'].unique().size == 1
        lcolor = ldata['LineColor'].tolist()[0]

        # 转换编码坐标系并根据PolyLine函数的参数顺序调转经纬度坐标
        station_list = [(sdata['StationCode'], *gcj2wgs(sdata['Longitude'], sdata['Latitude'])) for _, sdata in ldata.iterrows()]
        pos_list = [(lat, lng) for _, lng, lat in station_list]

        # 运用各线路的的代表颜色可视化地铁线路
        folium.PolyLine(pos_list, color=lcolor, opacity=opacity).add_to(Map)

    # 在地铁线路中标出各个站点及站点编码
    for _, station in MetroData.iterrows():
        pos = gcj2wgs(station['Longitude'], station['Latitude'])[::-1]
        stations.append(pos)
        station_encoded = str(station['StationName'].encode('ascii', 'xmlcharrefreplace'))[2:-1]
        station_html = '<p>{}-{:02d}  {:s}</p>'.format(station['LineCode'], station['StationCode'], station_encoded)
        folium.Circle(location=pos, radius=5, color='white', fill_color='white', fill_opacity=opacity,
                      tooltip=Tooltip(station_html)).add_to(Map)

    return stations

In [6]:
#大致检视各个区域的房源情况
MC = MarkerCluster()
GZMap = folium.Map(location=[23.132, 113.266], tiles='OpenStreetMap', zoom_start=12)

for _, rec in HouseData.iterrows():
    lng, lat = gcj2wgs(rec['Longitude'], rec['Latitude'])
    location_encoded = str(rec['Community'].encode('ascii', 'xmlcharrefreplace'))
    location_html = '<p>{}</p>'.format(location_encoded[2:-1])
    MC.add_child(Marker([lat, lng], popup=Popup(location_html, max_width=100)))
MC.add_to(GZMap)

MetroVisualizaiton(GZMap)
GZMap.save('Cluster.html')

In [7]:
# 热力图形式呈现房源情况

GZMap2 = folium.Map(location=[23.132, 113.266], tiles='OpenStreetMap', zoom_start=12)
ModifiedData = HouseData[['Longitude', 'Latitude']].apply(gcj2wgs_loc, axis=1).tolist()
ModifiedData = pd.DataFrame(ModifiedData, columns=['Longitude', 'Latitude'])
HeatMap(data=ModifiedData[['Latitude', 'Longitude']], radius=15, blur=10).add_to(GZMap2)

MetroVisualizaiton(GZMap2)
GZMap2.save('Heatmap.html')

In [8]:
# 区分合租和整租显示单位面积的租金

ExtractionSQL2 = '''
                SELECT d.Community, d.RentType, c.Longitude, c.Latitude, d.Price/d.Area AS UnitPrice
                FROM `guangzhou-detail` AS d
                LEFT JOIN `guangzhou-community` AS c
                ON d.Community = c.Community
                WHERE (c.Longitude IS NOT NULL) AND (c.Latitude IS NOT NULL)
                '''

Engine = sqlalchemy.create_engine('sqlite:///lianjia.db') 

HouseData2 = pd.read_sql(ExtractionSQL2, Engine)

In [9]:
GZMap3 = folium.Map(location=[23.132, 113.266], tiles='CartoDBPositron', zoom_start=12)

for _, rec in HouseData2.iterrows():
    color = 'orange' if rec['RentType'] == '合租' else 'blue'
    ModifiedLocation = gcj2wgs(rec['Longitude'], rec['Latitude'])[::-1]
    Circle(location=ModifiedLocation, 
           radius=rec['UnitPrice']/2, color=color, opacity=0.6, fill_color=color, fill_opacity=0.75).add_to(GZMap3)

MetroVisualizaiton(GZMap3)
GZMap3.save('RentType.html')

In [10]:
# 区分是否配备电梯显示单位面积的租金

ExtractionSQL3 = '''
                SELECT d.Community, d.ElevatorFlag, c.Longitude, c.Latitude, d.Price/d.Area AS UnitPrice
                FROM `guangzhou-detail` AS d
                LEFT JOIN `guangzhou-community` AS c
                ON d.Community = c.Community
                WHERE (c.Longitude IS NOT NULL) AND (c.Latitude IS NOT NULL)
                '''

Engine = sqlalchemy.create_engine('sqlite:///lianjia.db') 

HouseData3 = pd.read_sql(ExtractionSQL3, Engine)

In [11]:
GZMap4 = folium.Map(location=[23.132, 113.266], tiles='CartoDBPositron', zoom_start=12)

for _, rec in HouseData3.iterrows():
    color = 'limegreen' if rec['ElevatorFlag'] == '有' else 'orangered'
    ModifiedLocation = gcj2wgs(rec['Longitude'], rec['Latitude'])[::-1]
    Circle(location=ModifiedLocation, 
           radius=rec['UnitPrice']/2, color=color, opacity=0.6, fill_color=color, fill_opacity=0.75).add_to(GZMap4)

MetroVisualizaiton(GZMap4)
GZMap4.save('Elevator.html')