## 高德

In [1]:
import os
import json
import requests
import logging
logger = logging.getLogger("root")
logger.setLevel(logging.INFO)

GLOBAL_DIR = "global"
dict_title2coords_fp = os.path.join(GLOBAL_DIR, "TITLE2COORDS_DICT.json")
print("global dict of title to coords path: " + dict_title2coords_fp)
dict_coords2name_fp = os.path.join(GLOBAL_DIR, "COORDS2NAME_DICT.json")
print("global dict of coords to name path : " + dict_coords2name_fp)
dict_coordspair2duration_fp = os.path.join(GLOBAL_DIR, "COORDSPAIR2DURATION_DICT.json")
print("global dict of coords pair to distance path: " + dict_coordspair2duration_fp)

KEY = os.environ["GAODE_KEY"]
CITY = "北京" # specify city, otherwise search the whole country
TARGET_ADDRESS = "源创空间大厦"

TITLE2COORDS_DICT = COORDS2NAME_DICT = COORDSPAIR2DURATION_DICT = {}

def load_dict():
    global TITLE2COORDS_DICT, COORDS2NAME_DICT, COORDSPAIR2DURATION_DICT
    if os.path.exists(dict_title2coords_fp):
        TITLE2COORDS_DICT = json.load(open(dict_title2coords_fp))
    if os.path.exists(dict_coords2name_fp):
        COORDS2NAME_DICT = json.load(open(dict_coords2name_fp))
    if os.path.exists(dict_coordspair2duration_fp):
        COORDSPAIR2DURATION_DICT = json.load(open(dict_coordspair2duration_fp))
    print({"TITLE2COORDS_DICT": TITLE2COORDS_DICT, "COORDS2NAME_DICT": COORDS2NAME_DICT, "COORDSPAIR2DURATION_DICT": COORDSPAIR2DURATION_DICT})
    
def dump_dict():
    global TITLE2COORDS_DICT, COORDS2NAME_DICT, COORDSPAIR2DURATION_DICT
    json.dump(TITLE2COORDS_DICT, open(dict_title2coords_fp, "w"), ensure_ascii=False, indent=2)
    json.dump(COORDS2NAME_DICT, open(dict_coords2name_fp, "w"), ensure_ascii=False, indent=2)
    json.dump(COORDSPAIR2DURATION_DICT, open(dict_coordspair2duration_fp, "w"), ensure_ascii=False, indent=2)


# 编码
def _get_coords_from_addr(address: str):
    # logging.info(f"getting address of <{address}>")
    if address not in TITLE2COORDS_DICT:
        logging.debug(f"fetching loc of {address} from gaode api")
        res = requests.get('https://restapi.amap.com/v3/geocode/geo', params={
            "key": KEY,
            "city": CITY,
            "address": address,
        })

        assert res.status_code == 200
        result = res.json()
        count = int(result.get('count', 0))
        if count == 0:
            logging.error("should have result")
            logging.error(result)
            TITLE2COORDS_DICT[address] = None
        else:
            if count > 1:
                logging.warning("more than 1 result, select the first")
            loc = result["geocodes"][0]["location"]
            TITLE2COORDS_DICT[address] = loc
        
    return TITLE2COORDS_DICT[address]

def get_addr_name_from_coords(loc: str):
    # logging.info(f"getting address of <{address}>")
    if not loc:
        return None
    if loc not in COORDS2NAME_DICT:
        logging.debug(f"fetching loc of {loc} from gaode api")
        res = requests.get('https://restapi.amap.com/v3/geocode/regeo', params={
            "key": KEY,
            "location": loc,
        })

        assert res.status_code == 200
        result = res.json()
        COORDS2NAME_DICT[loc] = result['regeocode']['formatted_address']
    return COORDS2NAME_DICT[loc]

# 步行
def _calc_walking_duration(from_loc, to_loc):
    res = requests.get('https://restapi.amap.com/v3/direction/walking',
                   params={
                       "key": KEY,
                       "origin": from_loc,
                       "destination": to_loc,
                       "output": "json",
                   })
    result = res.json()
    count = int(result.get("count", 0))
    if count == 0:
        logging.warning("not found any walking solution")
        logging.warning(result)
        return -1
    return int(float(result["route"]["paths"][0]["duration"]) / 60) # minutes  

# 通勤计算
def _calc_transit_duration_between_coords(from_coords, to_coords):
    if not to_coords:
        raise Exception("to loc must exist")
    if not from_coords:
        return None

    key = from_coords + "-" + to_coords
    
    if key not in COORDSPAIR2DURATION_DICT:
        res = requests.get('https://restapi.amap.com/v3/direction/transit/integrated',
                       params={
                           "origin": from_coords,
                           "destination": to_coords,
                           "output": "json",
                           "key": KEY,
                           "strategy": 3, # 0：最快捷模式, 1：最经济模式, 2：最少换乘模式, 3：最少步行模式, 5：不乘地铁模式
                           "extensions": "base",
                           "city": CITY
                       })
        result = res.json()
        count = int(result.get("count", 0))
        if count == 0:
            logging.info("not found any transit solution, trying walking ones")
            logging.info(result)
            COORDSPAIR2DURATION_DICT[key] = _calc_walking_duration(from_coords, to_coords)
        else:
            COORDSPAIR2DURATION_DICT[key] = int(float(result["route"]["transits"][0]["duration"]) / 60) # minutes
    return COORDSPAIR2DURATION_DICT[key]    
        

# 通勤计算
def _calc_transit_duration_between_addrs(from_pos, to_pos):
    from_loc = _get_coords_from_addr(from_pos)
    to_loc = _get_coords_from_addr(to_pos)
    return _calc_transit_duration_between_coords(from_loc, to_loc)

load_dict()

global dict of title to coords path: global/TITLE2COORDS_DICT.json
global dict of coords to name path : global/COORDS2NAME_DICT.json
global dict of coords pair to distance path: global/COORDSPAIR2DURATION_DICT.json
{'TITLE2COORDS_DICT': {'东坝3居室转租其中1个次卧怎么租都可以拎包入住包水电气 wifi 首城东郡家园': '116.529424,39.968691', '转租一间卧室2300/月': None, '十号线劲松地铁站附近房东直租': '116.419093,40.089153', '个人出租北京师范大学东门附近两居室': '116.364139,39.962637', '房东直租  南向大主卧带阳台   |  学院南路  西直门  北邮南门  |   无中介费 无网费  价格优惠': [], '通州土桥站公交直达两室一厅转租': '116.700155,39.881128', '【转租】西北旺地铁-永靓家园-三室一厅朝南主卧-近西北旺地铁站，科技园/软件园-2900元 （可议价）-男女不限': '116.257923,40.048703', '房东直租| 南北通透次卧2500，距6号线十里堡地铁口仅5分钟，无中介费': '116.419093,40.089153', '团结湖三里屯附近大主卧带阳台转租': '116.455041,39.935446', '知春路-双榆树东里 房东直租2800 三月中下旬可租': '116.329640,39.967848', '九号线丰台七里庄东大街十号线丰台泥洼附近前泥洼三区次卧转租': '116.299485,39.854458', '直租6号线草房地铁站。无中介费畅心园小区精装朝南主卧。2000': '116.608260,39.926464', '昌平回龙观附近带阳台大卧室转租': '116.342188,40.059295', '转租枣营北里独立标准1居室（2022年8月～2023年2月）非中介，个人转租，可提供合同，5300': '116.470677,39.946876'

## analyze rents from douban

In [4]:
import os
import pandas as pd

DATA_FROM_DOUBAN_DIR = "data_from_douban"
filename = '2022-03-02-zhufang.csv'
filepath = os.path.join(DATA_FROM_DOUBAN_DIR, filename)
print("reading file from: " + filepath)
df = pd.read_csv(filepath)

# convert response_latest_time format into datetime
df.response_latest_time = pd.to_datetime(df.response_latest_time)

# filter datetime
df = df.query("'2022-02-25' < response_latest_time")

# filter personal
def filter_personal(s):
    return "女生" not in s and ("个人" in s or "直租" in s or "转租" in s)
df = df[df.post_title.apply(filter_personal)]
print("shape: ", df.shape)

# get coords from title
print("getting loc from title")
df['addr_coords'] = df.post_title.apply(_get_coords_from_addr)
dump_dict()

# get address from coords
print("getting pos name from loc")
df["addr_name"] = df["addr_coords"].apply(get_addr_name_from_coords)
dump_dict()

# get duration between coords
print("getting distance from loc")
df["transit_minutes"] = df["addr_coords"].apply(
    lambda x: _calc_transit_duration_between_coords(x, _get_coords_from_addr(TARGET_ADDRESS)))
dump_dict()

# filter distance
print("filter distance")
df = df.query("transit_minutes < 60")

# sort
print('sort')
df = df.sort_values(by=["transit_minutes"], ascending=True)

# dump
print("dump")
df.to_csv(filepath.replace(".csv", "_filter.csv"), encoding="utf-8")
df

reading file from: data_from_douban/2022-03-02-zhufang.csv
shape:  (886, 6)
getting loc from title
getting pos name from loc
getting distance from loc
filter distance
sort
dump


Unnamed: 0,post_title,post_url,author_name,author_url,response_count,response_latest_time,addr_coords,addr_name,transit_minutes
1370,【个人直租】 7号线百子湾/地铁房次卧/24楼风景超好,https://www.douban.com/group/topic/261131740/,小王子的玫瑰花,https://www.douban.com/people/217167438/,0,2022-02-27 20:24:00,"116.497940,39.902261",北京市朝阳区南磨房乡惠水湾森林公园,13.0
712,【个人直租】 7号线百子湾/地铁房次卧/24楼风景超好,https://www.douban.com/group/topic/261258429/,小王子的玫瑰花,https://www.douban.com/people/217167438/,0,2022-03-01 13:05:00,"116.497940,39.902261",北京市朝阳区南磨房乡惠水湾森林公园,13.0
757,转租 地铁一号线 国贸 大望路 四惠 华腾世纪附近 惠生园小区 朝南大卧室出租 看房微信 1...,https://www.douban.com/group/topic/261252075/,匡 立 同 学,https://www.douban.com/people/217296911/,0,2022-03-01 11:42:00,"116.507714,39.908927",北京市朝阳区高碑店乡惠生园小区,22.0
818,个人转租 16号线 永丰南 1350元/月,https://www.douban.com/group/topic/261243250/,VV,https://www.douban.com/people/Warm-Heart/,0,2022-03-01 09:53:00,"116.475200,39.909379",北京市朝阳区建外街道金地中心B座,28.0
165,个人转租 16号线 永丰南 1350元/月,https://www.douban.com/group/topic/261346940/,VV,https://www.douban.com/people/Warm-Heart/,0,2022-03-02 14:52:00,"116.475200,39.909379",北京市朝阳区建外街道金地中心B座,28.0
...,...,...,...,...,...,...,...,...,...
1507,急转 个人出租 步行地铁6号线黄渠5分钟 大主卧 近＃朝阳大悦城＃长楹天街,https://www.douban.com/group/topic/261106352/,smaokim,https://www.douban.com/people/81004924/,15,2022-02-27 14:28:00,"116.601492,39.924972",北京市朝阳区常营乡澳门味道茶餐(长楹天街购物中心店)长楹天街购物中心-东区,59.0
597,6号线草房地铁站转租,https://www.douban.com/group/topic/261274279/,豆友,https://www.douban.com/people/191921656/,1,2022-03-01 16:30:00,"116.615574,39.924477",北京市朝阳区常营乡草房(地铁站),59.0
261,十里堡500米精装次卧转租,https://www.douban.com/group/topic/261333980/,Mignon,https://www.douban.com/people/165684338/,1,2022-03-02 11:55:00,"116.500447,39.918414",北京市朝阳区八里庄街道八里庄东里八里庄东里社区,59.0
2575,【六号线草房】房东直租六号线草房地铁站精品独卫单间,https://www.douban.com/group/topic/260929200/,豆友65357,https://www.douban.com/people/219317394/,13,2022-02-25 08:56:00,"116.615574,39.924477",北京市朝阳区常营乡草房(地铁站),59.0


## analyze rents from some wechat group

In [5]:
import os
import re
import pandas as pd

DATA_FROM_WECHAT_DIR = "data_from_wechat"

files = [
    {
        "filename": "./北漂租房登记（表一）.xlsx",
        "column_contact": "联系方式",
        "column_budget": "价位",
        "column_area": "       区域"        
    },
    {
        "filename": "./北漂租房登记（表二）.xlsx",
        "column_contact": "联系方式",
        "column_budget": "预算",
        "column_area": "居室.1"
    }
]


file = files[0]

### read excel
filepath = os.path.join(DATA_FROM_WECHAT_DIR, file["filename"])
print("reading file from: " + filepath)
df_raw = pd.read_excel(filepath)

### drop specific nan

df = df_raw.dropna(subset=[file["column_contact"], file["column_budget"], file["column_area"]])
columns = list(filter(lambda x: not x.startswith("Unnamed"), df.columns))
df = df[columns]

### filter price

def find_price(s: str) -> int:
    s = re.search(r'\d+', str(s))
    return int(s.group()) if s else 0

df['price_base'] = df[file["column_budget"]].apply(find_price)
df = df.query("2000 <= price_base <= 3000")

### call gaode api and update distances

df2 = df.copy()
df2["work_minutes"] = df2[file["column_area"]].apply(
    lambda x: _calc_transit_duration_between_addrs(x, TARGET_ADDRESS))
dump_dict() # update global dict

### drop distance too far

df2 = df2.query("work_minutes < 60").copy()

### rank

df2['score'] = df2['price_base'] * df2['work_minutes']
df3 = df2
df3.sort_values(by=["score"], ascending=True, inplace=True)

### render
df3.style.background_gradient(cmap="Reds")



reading file from: data_from_wechat/./北漂租房登记（表一）.xlsx


  warn("Workbook contains no default style, apply openpyxl's default")


Unnamed: 0,整租／合租,区域,小区,居室,步梯／电梯,楼层,价位,联系方式,备注,房东／转租／招室友,登记日期,price_base,work_minutes,score
14,合租,四惠东地铁,通惠家园惠生园,三室一厅两卫次卧（没有隔断）,电梯,4.000000,2700.000000,18911303613.000000,押一付三，到2022年6月30日，主卧独卫一对情侣，另外主卧一对情侣,个人转租,,2700,27,72900
52,合租,朝阳传媒大学,北花园小区,次卧（三室一厨一卫）,电梯,23层,2050加转租服务费,15657857003.000000,朝北，转租服务费可谈，押一付三,转租,,2050,49,100450
49,合租,朝阳区西大望路,东方雅苑小区,三室一厅,电梯,1层,2700-2800,18801053254.000000,押一付三，无中介，随时入住,转租,,2700,39,105300
21,合租,朝阳百子湾,石门新居,三室一厅,电梯,一层,3000.000000,15821825746.000000,民水民电长租，随时入住，押一付三，无中介费用,房东,,3000,37,111000
180,整租,1号线传媒大学/双桥,三间房南里七号院乐乎公寓,一居室 有独立卫生间,一层,1.000000,2700.000000,电话：18201120957 微信同步,1.6之后可入住，合同到2022.10.7，之后可续租，可住1-2人，周围有万达，离传媒大学和双桥地铁站都很近,转租,2021-12-27 00:00:00,2700,45,121500
6,转租,朝阳大悦城,姚家园东里7号院,两居室,电梯,五楼,2600.000000,VX典故13846211995,出租正南次卧，押一付三，无任何费用，入住后只交电费和燃气费,,,2600,51,132600
148,合租,朝阳区十里堡,八里庄北里1号院,次卧,步梯,4层,2800.000000,微信Snike_bing,个人转租，次卧超南和主卧一样大，无中介费，12月16可住，距离十里堡地铁100米,个人转租,12.130000,2800,51,142800
89,合租,朝阳大望路/金台路,延静里中街小区,三室一厅，转租主卧,步梯,5楼,2900，无中介,15652392686.000000,租期到2022年八月，月底可搬，签约正规合同,转租,,2900,53,153700
