## 高德

### 地理编码

In [42]:
import json
import requests
from pprint import pprint
import logging
logger = logging.getLogger("root")
logger.setLevel(logging.INFO)

def load_dict():
    LOC_DICT = json.load(open("loc_dict.json"))
    DIS_DICT = json.load(open("dis_dict.json"))
    print(LOC_DICT, DIS_DICT)
    
def dump_dict():
    json.dump(LOC_DICT, open("loc_dict.json", "w"), ensure_ascii=False, indent=2)
    json.dump(DIS_DICT, open("dis_dict.json", "w"), ensure_ascii=False, indent=2)

KEY = '33e2719dbf39acaf016591b2e6786052'
CITY = "北京" # speicfy city, otherwise search the whole country
TARGET_ADDRESS = "源创空间大厦"

LOC_DICT = DIS_DICT = {}
load_dict()

{'朝阳大悦城': '116.523365,39.923063', '源创空间大厦': '116.506470,39.902120', '姚家园东里7号院': '116.518627,39.945863', '扬州水乡别墅': '116.570957,39.852185', '通惠家园惠生园': '116.508111,39.909585', '石门新居': '116.347152,39.891967', '龙跃苑东五区': '116.356823,40.072324', '空军大院': '116.330807,40.057551', '康家园': '116.517304,39.911124', '金隅翡丽铂爵郡': '116.372886,40.065132', '金顶街四区': '116.173015,39.925170', '建清园小区': '116.347789,40.015794', '佳运园二区': '116.401943,40.046257', '慧谷时空': '116.470847,40.002795', '和谐家园二区': '116.360358,40.082532', '高档小区榴景秀苑27号楼': '116.422704,39.844279', '富力阳光美园': '116.606466,39.927837', '方舟苑': '116.465574,39.980322', '东方雅苑小区': '116.474849,39.887748', '北花园小区': '116.550101,39.904687', '广通小区': '116.637081,39.901193', '花家地西里1区': '116.460162,39.988340', '老街西里小区': '116.375838,39.855286', '二里庄小区45号楼': '116.366718,39.993796', '延静里中街小区': '116.483779,39.916279', '永乐西小区': '116.226403,39.901519', '满庭芳嘉园': '116.546618,40.084657', '双花园南里': '116.454510,39.898911', '郎辛庄': '116.569842,39.856280', '羊毛胡同': '116.388983,39.

In [34]:
def get_add_loc(address: str):
#     logging.info(f"getting address of <{address}>")
    if address not in LOC_DICT:
        logging.info(f"fetching loc of {address} from gaode api")
        res = requests.get('https://restapi.amap.com/v3/geocode/geo', params={
            "key": KEY,
            "city": CITY,
            "address": address,
        })

        assert res.status_code == 200
        result = res.json()
        count = int(result.get('count', 0))
        if count == 0:
            logging.error("should have result")
            logging.error(result)
            LOC_DICT[address] = None
        else:
            if count > 1:
                logging.warning("more than 1 result, select the first")
            loc = result["geocodes"][0]["location"]
            LOC_DICT[address] = loc
        
    return LOC_DICT[address]

### 通勤规划

In [32]:
def calc_walking_dis(from_address, to_address):
    logging.info(f"calc_walking_dis: from_address: {from_address}, to_address: {to_address}")
    res = requests.get('https://restapi.amap.com/v3/direction/walking',
                   params={
                       "key": KEY,
                       "origin": get_add_loc(from_address),
                       "destination": get_add_loc(to_address),
                       "output": "json",
                   })
    result = res.json()
    count = int(result.get("count", 0))
    if count == 0:
        logging.warning("not found any walking solution")
        logging.warning(result)
        return -1
    return int(float(result["route"]["paths"][0]["duration"]) / 60) # minutes  

def calc_transit_dis(from_address, to_address):
    key = from_address + "-" + to_address
    if key not in DIS_DICT:
        logging.info(f"calc_work_dis: from_address: {from_address}, to_address: {to_address}")
        from_loc = get_add_loc(from_address)
        to_loc = get_add_loc(to_address)
        if not to_loc:
            raise Exception("to loc must exist")
        if not from_loc:
            DIS_DICT[key] = 999
        else:
            res = requests.get('https://restapi.amap.com/v3/direction/transit/integrated',
                           params={
                               "origin": from_loc,
                               "destination": to_loc,
                               "output": "json",
                               "key": KEY,
                               "strategy": 3, # 0：最快捷模式, 1：最经济模式, 2：最少换乘模式, 3：最少步行模式, 5：不乘地铁模式
                               "extensions": "base",
                               "city": CITY
                           })
            result = res.json()
            count = int(result.get("count", 0))
            if count == 0:
                logging.info("not found any transit solution, trying walking ones")
                logging.info(result)
                DIS_DICT[key] = calc_walking_dis(from_address, to_address)
            else:
                DIS_DICT[key] = int(float(result["route"]["transits"][0]["duration"]) / 60) # minutes
    return DIS_DICT[key]

## analyze rent sheet from some wechat group

In [8]:
import pandas as pd

### read excel

In [16]:
df_raw = pd.read_excel("./北漂租房登记（表二）.xlsx")
df_raw

Unnamed: 0,求整租／求合租,居室,区域,居室.1,一居室,步梯／电梯,楼层,登记时间,预算,联系方式,备注,Unnamed: 11
0,房东,整租,朝阳,富华家园,一居室,电梯,24楼19层,登记日期,价位,联系方式,备注,
1,,,,,,,,,,,,
2,,,,,,,,,,,,
3,,你还挺逗的啊！,,,,,,,,,,
4,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...
122,房东,合租,1号线管庄京通苑7号楼,京通苑,98平三居室(主卧带阳台)全女生，无隔断,电梯,2层,2022-03-01 00:00:00,2000/2500,grace304874709微信,押一付一，全职妈妈房东直租，无中介费，可以看各种证件，好沟通,
123,转租,整租,7号线达官营,西城区天宁寺西里,正规两居室,楼梯,5层,2022-03-01 00:00:00,5200,15001111201微信,押一付三，转租，余14个月，无中介费，如有需要可与房东直签,
124,转租,合租,望京西站13号线,香颂3期,正规次卧朝阳,电梯,10层,2022-03-02 00:00:00,2700-3000,15801260086微信同步,个人转租，房子新，装修精，个人刚租交完中介费、服务费。便宜转租，只需交租金,
125,转租,合租,8号线平西府,田园风光雅园东区,正规次卧，四室两卫,楼梯,6层,2022-03-02 00:00:00,1900,S_pxy99_R,在地铁站一二百米的位置，1900一个月。已和房东商议好不要中介费，到期🉑️和直接与房东续签。...,


### drop specific nan

In [25]:
df = df_raw.dropna(subset=["联系方式", "预算", "区域"])
columns = list(filter(lambda x: not x.startswith("Unnamed"), df.columns))
df = df[columns]
df

Unnamed: 0,求整租／求合租,居室,区域,居室.1,一居室,步梯／电梯,楼层,登记时间,预算,联系方式,备注
0,房东,整租,朝阳,富华家园,一居室,电梯,24楼19层,登记日期,价位,联系方式,备注
7,转租,合租,西二旗邓庄南路,唐家岭t09小区,次卧,电梯,2层,2022-03-01 00:00:00,2300,电话同微13050566120,与房东签合同，押一付三，无中介费和服务费，水费已交，有5g网，房间采光好，整洁安静。
8,转租,整租一居室,通州梨园,海棠湾小区,正规一居室,电梯,17,2022-01-03 00:00:00,4200,18206681785,与房东签合同，押一付三。无中介，已有无线网。
9,转租,合租,距离6号线田村地铁站269米,田村48号院,单间，正规三居室，其他两间都是女生,步梯,3楼,2022-01-03 00:00:00,2730,VX同电话：15311522713,自如签的合同，放心入住，安全干净卫生，有无线网，合同2023年9月4号到期，因工作原因转租
10,转租,合租,八通线附近,玉桥北里小区,次卧,步梯,2层,2022-01-17 00:00:00,1200,vx电话 17736573437,与房东签合同，已经押金了，无中介，有网。租期3.15-7.15
...,...,...,...,...,...,...,...,...,...,...,...
121,房东,整租,朝阳区东坝锦安家园,朝阳区东坝锦安家园,三居室,电梯,20层,2022-03-01 00:00:00,6000,tcy425816 微信,房东直租，无中介费。
122,房东,合租,1号线管庄京通苑7号楼,京通苑,98平三居室(主卧带阳台)全女生，无隔断,电梯,2层,2022-03-01 00:00:00,2000/2500,grace304874709微信,押一付一，全职妈妈房东直租，无中介费，可以看各种证件，好沟通
123,转租,整租,7号线达官营,西城区天宁寺西里,正规两居室,楼梯,5层,2022-03-01 00:00:00,5200,15001111201微信,押一付三，转租，余14个月，无中介费，如有需要可与房东直签
124,转租,合租,望京西站13号线,香颂3期,正规次卧朝阳,电梯,10层,2022-03-02 00:00:00,2700-3000,15801260086微信同步,个人转租，房子新，装修精，个人刚租交完中介费、服务费。便宜转租，只需交租金


### filter price

In [26]:
def find_price(s: str) -> int:
    import re
    s = re.search(r'\d+', str(s))
    return int(s.group()) if s else 0

df['price_base'] = df['预算'].apply(find_price)
df = df.query("2000 <= price_base <= 3000")
df

Unnamed: 0,求整租／求合租,居室,区域,居室.1,一居室,步梯／电梯,楼层,登记时间,预算,联系方式,备注,price_base
7,转租,合租,西二旗邓庄南路,唐家岭t09小区,次卧,电梯,2层,2022-03-01 00:00:00,2300,电话同微13050566120,与房东签合同，押一付三，无中介费和服务费，水费已交，有5g网，房间采光好，整洁安静。,2300
9,转租,合租,距离6号线田村地铁站269米,田村48号院,单间，正规三居室，其他两间都是女生,步梯,3楼,2022-01-03 00:00:00,2730,VX同电话：15311522713,自如签的合同，放心入住，安全干净卫生，有无线网，合同2023年9月4号到期，因工作原因转租,2730
11,转租,合租,10号线石榴庄,康泽园小区二期,主卧独卫,电梯,14层,2022-01-03 00:00:00,2660,18101080610,主卧独卫，带飘窗，石墙非隔断，西向无遮挡，采光非常好，包物业取暖费，水电气平摊，今年5月23...,2660
12,招室友,合租,亦庄线小红门\n,鸿博家园二期,两室一厅飘窗主卧,电梯,12层,2022-01-03 00:00:00,2400,18611963994（同wx）,本人整租了一套房子，两室一厅户型，现对外出租飘窗主卧，朝南户型，采光好，可随时入住，房子很干净,2400
14,房东,合租,西城右安门内大街,甲10号院6楼,三室一厅独卫独厨,步梯,6楼,2022-01-03 00:00:00,2400-2600,94522286,本人房东，房子南向，一间12方，意间14方，主卧已长租，主卧租客好相处，房子干净，周边交通方...,2400
19,转租,合租,海淀区学院路,清华东路17号院农大家属院,三居室,步梯,二楼,2022-01-03 00:00:00,2800,18731745417（同vx）,正规次卧 自如风格 小区安静 舍友事少 很少碰面 大厂房补内 字节知乎等等，租期到7月份，月...,2800
23,转租,合租,朝阳路 1号6号线,慈云寺北里,精装修朝南次卧,电梯,8/28层,2022-01-03 00:00:00,2000,微信：15532159982,与中介签合同，押一付三，无中介费，无服务费。高层正南次卧。朝阳路黄金地段，临近CBD，国贸直...,2000
30,转租,合租,昌平区回龙观,龙锦苑东一区,三居室，步梯，,,3楼,2022-01-10 00:00:00,月租金2820，押一付一,电话同微 15738739350,主卧转租，距离地铁走路八分钟，地铁，24小时保安巡逻，到期可续租，无中介费，家电齐全，无隔断...,2820
32,招室友,合租,昌平区回龙观,通达园小区,复试，主卧，朝南,步梯,6层,2022-01-10 00:00:00,2700/月，押一付一,电话同微：18283765946,通达园 南向 主卧 带小露台 阳光超足 两家公用一个卫生间（另一个屋一个男生）附带一个公用的...,2700
34,房东,合租,1号线，石景山区鲁谷南路,六合园北区,三室一厅,电梯,20楼,2022-01-10 00:00:00,2600/月，押一付三,电话微信18601203454,大主卧，全天采光，视野好，新置电器。新华社物业，安全，服务好。,2600


### call gaode api and update distances

In [35]:
df2 = df2.copy()
df2["work_minutes"] = df2['居室.1'].apply(lambda x: calc_transit_dis(x, TARGET_ADDRESS))
df2

INFO:root:calc_work_dis: from_address: 87号楼, to_address: 源创空间大厦
INFO:root:calc_work_dis: from_address: 友谊嘉园二期, to_address: 源创空间大厦
INFO:root:fetching loc of 友谊嘉园二期 from gaode api
INFO:root:calc_work_dis: from_address: 住欣家园, to_address: 源创空间大厦
INFO:root:fetching loc of 住欣家园 from gaode api
INFO:root:calc_work_dis: from_address: 天通西苑, to_address: 源创空间大厦
INFO:root:fetching loc of 天通西苑 from gaode api
INFO:root:calc_work_dis: from_address: 华源一里, to_address: 源创空间大厦
INFO:root:fetching loc of 华源一里 from gaode api
INFO:root:calc_work_dis: from_address: 泰福苑二区, to_address: 源创空间大厦
INFO:root:fetching loc of 泰福苑二区 from gaode api
INFO:root:calc_work_dis: from_address: 云趣园二区, to_address: 源创空间大厦
INFO:root:fetching loc of 云趣园二区 from gaode api
INFO:root:calc_work_dis: from_address: 新建村二期高层, to_address: 源创空间大厦
INFO:root:fetching loc of 新建村二期高层 from gaode api
INFO:root:calc_work_dis: from_address: 吉晟别墅, to_address: 源创空间大厦
INFO:root:fetching loc of 吉晟别墅 from gaode api
INFO:root:calc_work_dis: from_address: tbd

Unnamed: 0,求整租／求合租,居室,区域,居室.1,一居室,步梯／电梯,楼层,登记时间,预算,联系方式,备注,price_base,work_minutes
7,转租,合租,西二旗邓庄南路,唐家岭t09小区,次卧,电梯,2层,2022-03-01 00:00:00,2300,电话同微13050566120,与房东签合同，押一付三，无中介费和服务费，水费已交，有5g网，房间采光好，整洁安静。,2300,150
9,转租,合租,距离6号线田村地铁站269米,田村48号院,单间，正规三居室，其他两间都是女生,步梯,3楼,2022-01-03 00:00:00,2730,VX同电话：15311522713,自如签的合同，放心入住，安全干净卫生，有无线网，合同2023年9月4号到期，因工作原因转租,2730,131
11,转租,合租,10号线石榴庄,康泽园小区二期,主卧独卫,电梯,14层,2022-01-03 00:00:00,2660,18101080610,主卧独卫，带飘窗，石墙非隔断，西向无遮挡，采光非常好，包物业取暖费，水电气平摊，今年5月23...,2660,106
12,招室友,合租,亦庄线小红门\n,鸿博家园二期,两室一厅飘窗主卧,电梯,12层,2022-01-03 00:00:00,2400,18611963994（同wx）,本人整租了一套房子，两室一厅户型，现对外出租飘窗主卧，朝南户型，采光好，可随时入住，房子很干净,2400,92
14,房东,合租,西城右安门内大街,甲10号院6楼,三室一厅独卫独厨,步梯,6楼,2022-01-03 00:00:00,2400-2600,94522286,本人房东，房子南向，一间12方，意间14方，主卧已长租，主卧租客好相处，房子干净，周边交通方...,2400,117
23,转租,合租,朝阳路 1号6号线,慈云寺北里,精装修朝南次卧,电梯,8/28层,2022-01-03 00:00:00,2000,微信：15532159982,与中介签合同，押一付三，无中介费，无服务费。高层正南次卧。朝阳路黄金地段，临近CBD，国贸直...,2000,55
34,房东,合租,1号线，石景山区鲁谷南路,六合园北区,三室一厅,电梯,20楼,2022-01-10 00:00:00,2600/月，押一付三,电话微信18601203454,大主卧，全天采光，视野好，新置电器。新华社物业，安全，服务好。,2600,128
35,转租,合租,朝阳酒仙桥,驼房营南里,两室一厅,步梯,2楼,2022-01-10 00:00:00,3000╱月，押一付三,电话18701815919,我爱我家相寓的房子，八月底到期，室友是干净一男生，人好，小区门口菜场超市交通都特便利。转租为...,3000,86
37,转租,合租,天通苑,天通苑西二区,大次卧,电梯,9楼,2022-01-10 00:00:00,2500/月，押一付三,电话同微信15001272928,面积真的很大，工作原因转租，一直都不用交中介费，步行至地铁7分钟左右，楼下就有超市，周边医院...,2500,97
41,招室友,合租,朝阳区柳芳地铁站附近,柳芳北里,两室一厅次卧朝东,电梯,11楼,2022-01-12 00:00:00,3000/月，押一付三,电话同微信15205217865,次卧招室友仅限女生，小区近柳芳地铁站，主卧也只住一个女生，家电齐全，房屋整体面积60平米。无...,3000,66


### drop distance too far

In [36]:
df2 = df2.query("work_minutes < 60").copy()

### rank

In [37]:
df2['score'] = df2['price_base'] * df2['work_minutes']
df3 = df2
df3.sort_values(by=["score"], ascending=True, inplace=True)
df3

Unnamed: 0,求整租／求合租,居室,区域,居室.1,一居室,步梯／电梯,楼层,登记时间,预算,联系方式,备注,price_base,work_minutes,score
72,转租,合租,朝阳青年路,珠江罗马嘉园,次卧,电梯,11层,2022-02-08 00:00:00,2400押一付一,微信418130452,没有中介费，三月初可以入住，可以和中介重新签合同，紧挨朝悦，青年路地铁，想要跟朋友合租所以转租,2400,42,100800
23,转租,合租,朝阳路 1号6号线,慈云寺北里,精装修朝南次卧,电梯,8/28层,2022-01-03 00:00:00,2000,微信：15532159982,与中介签合同，押一付三，无中介费，无服务费。高层正南次卧。朝阳路黄金地段，临近CBD，国贸直...,2000,55,110000
47,,合租,青年路国美第一城小区,国美第一城,次卧,电梯,12楼,5022-01-17 00:00:00,2600，押一付三,'13070198955,新客户完成：5家,2600,47,122200
