In [66]:
#!/usr/bin/env python
# cofing: utf-8

# from retry import retry
import requests
from bs4 import BeautifulSoup
import pandas as pd
import datetime as dt
from tqdm import tqdm
import re
import math
# from logging import getLogger, StreamHandler, Formatter, FileHandler, DEBUG
import yaml
import os
from retry import retry
import sys
import time

notebook = True
if notebook:
    work_dir = '/Users/satomitakei/property_valuation_calculator'
    with open(work_dir + '/setting/kenbiya_scraping_config.yaml', 'r') as yml:
        config = yaml.safe_load(yml)
    area_name = 'tokyo'
else:
    work_dir = os.getcwd()
    with open(work_dir + '/setting/kenbiya_scraping_config.yaml', 'r') as yml:
        config = yaml.safe_load(yml)
    area_name = sys.argv[1]
if area_name == 'tokyo':
    base_url = config['base_url_tokyo']
elif area_name == 'osaka':
    base_url = config['base_url_osaka']
elif area_name == 'fukuoka':
    base_url = config['base_url_fukuoka']

def write_log(log_file, text):
    f = open(log_file, 'a', encoding='UTF-8')
    f.write(text)
    f.close()
    print(text)

diff_jst_from_utc = 0
start_time = dt.datetime.now() + dt.timedelta(hours=diff_jst_from_utc)
now_time = (dt.datetime.now() +
            dt.timedelta(hours=diff_jst_from_utc)).strftime('%Y%m%d_%H%M')

log_dir = work_dir + f'/log/scraping'
os.makedirs(log_dir, exist_ok=True)
log_file = log_dir + f'/{now_time}_log.txt'
f = open(log_file, 'w', encoding='UTF-8')
f.close()

text = 'processing_start_time:' + str(start_time.replace(microsecond=0)) + '\n'
write_log(log_file, text)
excution_date = dt.datetime.today().strftime('%Y%m%d')

# file_name = 'suumo_baibai'
# excution_date = dt.datetime.today().strftime('%Y%m%d')

def get_html(url):
    r = requests.get(url)
    soup = BeautifulSoup(r.content, "html.parser")
    return soup
# @retry(tries=3, delay=10, backoff=2)
# def main():
all_data = []

# 基本ページurl 
page = '1'
url = base_url.format(page = page)
write_log(log_file,'base_url:'+url+'\n')
# get html
item = get_html(url)

# extract all items
total_rooms = int(re.sub(r"\D", "",item.find(True,"strong", class_="result_num").get_text()))
max_page = math.floor(total_rooms/50)+ 1
text = f"max_page:{max_page} \n"
write_log(log_file,text)

url_list = []
all_data = []
error_page = []
# 物件URLの取得
data= {}

def extract_value(pattern,temp_property):
    match = re.search(pattern, temp_property)
    if match:
        return match.group(1)
property_dict = {
    'property_name': '物件名',
    'price': '価格',
    'transportation': '交通',
    'address': '住所',
    'year_built': '築年月',
    'building_structure': '建物構造/階数',
    'exclusive_area': '専有面積',
    'floor_plan': '間取り',
    'transaction_method': '取引態様',
    'delivery': '引渡',
    'current_condition': '現況',
    'cap_rate': '満室時利回り',
    'full_occupancy_incom': '満室時年収/月収',
    'property_name': '物件名',
    'land_rights': '土地権利',
    'management_fee_repair_reserve_fund': '管理費/修繕積立',
    'management_company': '管理会社',
    'management_method': '管理方式/管理人',
    'last_update_date': '直前の更新日',
    'register_date': '情報公開日',
    'scheduled_update_date': '更新予定日',
    'management_id': '管理ID'
}
# for i in tqdm(range(max_page+1)): 
for i in tqdm(range(2)): 
    all_data = []
    url = base_url.format(page = str(i))
    # get html
    item = get_html(url)
    for j in item.findAll(href=re.compile('/pp1/s/tokyo/.+/re')):
        room_url = 'https://www.kenbiya.com/'+j.get('href')
        write_log(log_file,'room_url:'+room_url+'\n')
        room_item = get_html(room_url)
        temp_property_data={}
        for k, l in property_dict.items():
            try:
                temp_property_data[k] = room_item.find('dt', text=l).find_next('dd').get_text()
            except:
                temp_property_data[k] = ''
        data = {}
        # 物件詳細のデータを収集
        # マンション名
        data["property_name"] = temp_property_data['property_name']

        # 価格
        pattern = r'\n(\d*)億'
        try:
            price_oku = int(extract_value(pattern,temp_property_data['price'])) * 100000000
        except:
            price_oku = 0
        try:
            price_man = int(temp_property_data['price'].replace(',', '').replace('万円', '')) * 10000
        except:
            try:
                pattern = r'\n*億(\d*)'
                price_man = int(extract_value(pattern,temp_property_data['price'].replace(',', '').replace('万円', ''))) * 10000
            except:
                price_man = 0
        data["price"] = price_oku + price_man

        # 交通
        data["train_line"] = temp_property_data['transportation'].split()[0]
        data["station"] = temp_property_data['transportation'].split()[1]
        temp_minutes_from_station = temp_property_data['transportation'].split()[2]
        pattern = r'徒歩(\d.+)分'
        data["minutes_from_station"] = extract_value(pattern,temp_minutes_from_station)

        # 住所
        temp_address= temp_property_data['address']
        pattern = r'^(.+(都|道|府|県))'
        data["prefecture_name"] = extract_value(pattern,temp_address)
        pattern = r'^(.+(市|区))'
        data["city_name"] = extract_value(pattern,temp_address)
        data["town_name"] = temp_address.replace(str(data["prefecture_name"]),'').replace(str(data["city_name"]),'')

        # 築年数
        temp_year_built = temp_property_data['year_built']
        pattern = r'築(\d*)年'
        data["year_built"] = int(extract_value(pattern,temp_year_built))

        # 構造
        temp_building_structure= temp_property_data['building_structure']
        pattern = r'^(.+)造'
        data["structure"] = extract_value(pattern,temp_building_structure)

        pattern = r'^.+造(\d+)階'
        data["floor"] = extract_value(pattern,temp_building_structure)

        pattern = r'^.+(\d.+)階建'
        data["max_floor"] = extract_value(pattern,temp_building_structure)

        # 総戸数
        temp_total_rooms = temp_property_data['building_structure'].strip()
        pattern = r'総戸数(\d+)戸'
        data["total_rooms"] = extract_value(pattern,temp_total_rooms)

        # 専有面積
        temp_exclusive_area= temp_property_data['exclusive_area']
        pattern = r'^(\d.+)m²'
        data["exclusive_area"] = extract_value(pattern,temp_exclusive_area)

        # 間取り
        data["floor_plan"] = temp_property_data['floor_plan'].split()[0]

        # 方角
        try:
            data["direction"] = temp_property_data['floor_plan'].split()[1]
        except:
            data["direction"] = None

        # 取引態様
        data["transaction_method"] = temp_property_data['transaction_method']

        # 引渡
        data["delivery"] = temp_property_data['delivery']

        # 現況
        data["current_condition"] = temp_property_data['current_condition']

        # 満室時利回り
        try:
            data["cap_rate"] = float(temp_property_data['cap_rate'].replace('％','')) / 100
        except:
            data["cap_rate"] = None

        # 満室時年収
        try:
            data["full_occupancy_incom"] = int(float(temp_property_data['full_occupancy_incom'].split()[0].replace('万円','')) * 10000)
        except:
            data["full_occupancy_incom"] = None

        # 土地権利
        data["land_rights"] = temp_property_data['land_rights']

        # 'management_fee_repair_reserve_fund': '管理費/修繕積立',
        data["management_fee"] = int(temp_property_data['management_fee_repair_reserve_fund'].split('/')[0].replace('円','').replace(',',''))
        data["repair_reserve_fund"] = int(temp_property_data['management_fee_repair_reserve_fund'].split('/')[1].replace('円','').replace(',',''))

        # 管理会社
        data["management_company"] = temp_property_data['management_company']

        # 'management_method': '管理方式/管理人',
        # data["management_method"] = re.findall('(.*)\r\n  \r\n*', temp_property_data['management_method'].split('/')[0])[0]
        # data["management_person"] = re.findall('\r\n    (.*)', temp_property_data['management_method'].split('/')[1])[0]

        # 直前の更新日
        date_format = '%Y年%m月%d日'
        try:
            text = temp_property_data['last_update_date'].replace(' ','')
            data["last_update_date"] = dt.datetime.strptime(text, date_format).date()
        except:
            text = temp_property_data['register_date'].replace(' ','')
            data["last_update_date"] = dt.datetime.strptime(text, date_format).date()

        # 'scheduled_update_date': '更新予定日',
        text = temp_property_data['scheduled_update_date'].replace(' ','')
        data["scheduled_update_date"] = dt.datetime.strptime(text, date_format).date()

        # 'management_id': '管理ID'
        data["management_id"] = temp_property_data['management_id']
        
        # room_url:URL
        data["room_url"] = room_url
        write_log(log_file,'Done:'+data["property_name"])
        time.sleep(3)
        all_data.append(data)
    # except:
    #     time.sleep(3)
    #     write_log(log_file,'error')

    df = pd.DataFrame(all_data,index=None)
    df.to_csv(work_dir+f'/scraping_raw/kenbiya_baibai_{excution_date}.csv',index = False)
    
text = 'df_shape:{}\n'.format(df.shape)
write_log(log_file,text)

end_time = dt.datetime.now() + dt.timedelta(hours=diff_jst_from_utc)
text = 'predicting done.\nend_time:{}\n'.format(end_time)
write_log(log_file,text)

processing_time = end_time - start_time
text = 'processing_time:{}\n'.format(processing_time)
write_log(log_file,text)

processing_start_time:2023-03-18 20:29:31

base_url:https://www.kenbiya.com/pp1/s/tokyo/n-1/ctk=292_300_311_299_293_302_307_298_294_301_310_314_295_303_308_313_304_306_309_312_296_305_297/

max_page:164 



 50%|█████     | 1/2 [00:00<00:00,  1.47it/s]

room_url:https://www.kenbiya.com//pp1/s/tokyo/kita-ku/re_2979232ivr/



Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Done:ドミール赤羽
room_url:https://www.kenbiya.com//pp1/s/tokyo/nakano-ku/re_29792257uv/



Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Done:メゾン・ド・ペガース
room_url:https://www.kenbiya.com//pp1/s/tokyo/ota-ku/re_2979201xzx/



Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Done:カーサ大岡山
room_url:https://www.kenbiya.com//pp1/s/tokyo/taito-ku/re_2979128q0p/



Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Done:東建シティハイツ上野
room_url:https://www.kenbiya.com//pp1/s/tokyo/ota-ku/re_2979097lxq/



Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Done:メインステージ大森町駅前
room_url:https://www.kenbiya.com//pp1/s/tokyo/nakano-ku/re_29790946e3/



Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Done:ブライズ中野江古田
room_url:https://www.kenbiya.com//pp1/s/tokyo/itabashi-ku/re_2979092buw/



Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Done:MAXIV成増
room_url:https://www.kenbiya.com//pp1/s/tokyo/setagaya-ku/re_2979091kbr/



Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Done:エスペルーモ桜新町
room_url:https://www.kenbiya.com//pp1/s/tokyo/setagaya-ku/re_2979085zxq/



Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Done:東京都世田谷区 投資用マンション
room_url:https://www.kenbiya.com//pp1/s/tokyo/shinjuku-ku/re_2979067vej/



Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Done:トーカン新宿第2キャステール
room_url:https://www.kenbiya.com//pp1/s/tokyo/kita-ku/re_2979043al1/



Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Done:エルフォルテ赤羽
room_url:https://www.kenbiya.com//pp1/s/tokyo/meguro-ku/re_2979039llm/



Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Done:秀和洗足レジデンス
room_url:https://www.kenbiya.com//pp1/s/tokyo/shinagawa-ku/re_2979031r0w/



Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Done:東京都品川区 投資用マンション
room_url:https://www.kenbiya.com//pp1/s/tokyo/shinagawa-ku/re_2979023xnm/



Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Done:東京都品川区 投資用マンション
room_url:https://www.kenbiya.com//pp1/s/tokyo/chuo-ku/re_2979021bed/



Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Done:ライオンズマンション東銀座
room_url:https://www.kenbiya.com//pp1/s/tokyo/suginami-ku/re_2979008kwe/



Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Done:リヴレ・清水の森
room_url:https://www.kenbiya.com//pp1/s/tokyo/minato-ku/re_2978996kpz/



Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Done:東京都港区 投資用マンション
room_url:https://www.kenbiya.com//pp1/s/tokyo/suginami-ku/re_2978975zht/



Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Done:デュオ・スカーラ阿佐ヶ谷
room_url:https://www.kenbiya.com//pp1/s/tokyo/shinjuku-ku/re_2978972la5/



Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Done:早稲田永谷マンション
room_url:https://www.kenbiya.com//pp1/s/tokyo/shinagawa-ku/re_2978943pis/



Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Done:トーシンフェニックス五反田
room_url:https://www.kenbiya.com//pp1/s/tokyo/minato-ku/re_2978898qgw/



Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Done:東京都港区 投資用マンション
room_url:https://www.kenbiya.com//pp1/s/tokyo/shinjuku-ku/re_2978867cxp/



Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Done:東京都新宿区 投資用マンション
room_url:https://www.kenbiya.com//pp1/s/tokyo/taito-ku/re_297885845i/



Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Done:ノバ浅草
room_url:https://www.kenbiya.com//pp1/s/tokyo/taito-ku/re_2978849bhx/



Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Done:東京都台東区 投資用マンション
room_url:https://www.kenbiya.com//pp1/s/tokyo/nerima-ku/re_2978836puf/



Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Done:フェニックス練馬桜台
room_url:https://www.kenbiya.com//pp1/s/tokyo/shinagawa-ku/re_2978835rjd/



Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Done:投資用マンション
room_url:https://www.kenbiya.com//pp1/s/tokyo/itabashi-ku/re_29788349wd/



Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Done:ルーブル中板橋
room_url:https://www.kenbiya.com//pp1/s/tokyo/ota-ku/re_2978832yqa/



Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Done:ブライズ大森東TRE
room_url:https://www.kenbiya.com//pp1/s/tokyo/shinagawa-ku/re_2978824lcc/



Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Done:エクセル旗の台
room_url:https://www.kenbiya.com//pp1/s/tokyo/shinagawa-ku/re_29787981ti/



Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Done:シティタワー大崎
room_url:https://www.kenbiya.com//pp1/s/tokyo/shibuya-ku/re_2978781b4l/



Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Done:東京都渋谷区 投資用マンション
room_url:https://www.kenbiya.com//pp1/s/tokyo/shibuya-ku/re_2978775cf7/



Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Done:東京都渋谷区 投資用マンション
room_url:https://www.kenbiya.com//pp1/s/tokyo/katsushika-ku/re_2978772isu/



Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Done:日興パレス新小岩
room_url:https://www.kenbiya.com//pp1/s/tokyo/kita-ku/re_297877185v/



Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Done:ブリス板橋
room_url:https://www.kenbiya.com//pp1/s/tokyo/nakano-ku/re_2978770kzd/



Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Done:日神パレス中野
room_url:https://www.kenbiya.com//pp1/s/tokyo/nakano-ku/re_2978768xrr/



Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Done:ラ・レジダンス・ド・VIP中野坂上
room_url:https://www.kenbiya.com//pp1/s/tokyo/shinjuku-ku/re_2978763olr/



Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Done:日神パレス下落合
room_url:https://www.kenbiya.com//pp1/s/tokyo/adachi-ku/re_2978761igw/



Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Done:東和緑野ダイヤモンドマンション
room_url:https://www.kenbiya.com//pp1/s/tokyo/kita-ku/re_2978759dmw/



Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Done:TOP赤羽
room_url:https://www.kenbiya.com//pp1/s/tokyo/adachi-ku/re_29787578uz/



Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Done:千住大橋センチュリー21
room_url:https://www.kenbiya.com//pp1/s/tokyo/nakano-ku/re_2978756zom/



Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Done:エヴェナール高円寺
room_url:https://www.kenbiya.com//pp1/s/tokyo/shinagawa-ku/re_29787558rc/



Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Done:マンション五反田
room_url:https://www.kenbiya.com//pp1/s/tokyo/minato-ku/re_2978752qrv/



Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Done:メインステージ南麻布II
room_url:https://www.kenbiya.com//pp1/s/tokyo/taito-ku/re_29787507pc/



Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Done:入谷センチュリープラザ21
room_url:https://www.kenbiya.com//pp1/s/tokyo/katsushika-ku/re_2978743q4a/



Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Done:東京都葛飾区 投資用マンション
room_url:https://www.kenbiya.com//pp1/s/tokyo/shinagawa-ku/re_2978714wtt/



Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Done:東京都品川区 投資用マンション
room_url:https://www.kenbiya.com//pp1/s/tokyo/toshima-ku/re_2978699b7z/



Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Done:池袋シティハイツ
room_url:https://www.kenbiya.com//pp1/s/tokyo/setagaya-ku/re_2978677pun/



Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Done:東京都世田谷区 投資用マンション
room_url:https://www.kenbiya.com//pp1/s/tokyo/nakano-ku/re_2978662mzp/



Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Done:東京都中野区 投資用マンション
room_url:https://www.kenbiya.com//pp1/s/tokyo/minato-ku/re_2978644crh/



Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Done:東京都港区 投資用マンション


100%|██████████| 2/2 [02:53<00:00, 86.65s/it] 

df_shape:(50, 29)

predicting done.
end_time:2023-03-18 20:32:25.064117

processing_time:0:02:53.821757






In [32]:
pattern = r'\n(\d*)億'
try:
    price_oku = int(extract_value(pattern,temp_property_data['price'])) * 100000000
except:
    price_oku = 0

try:
    price_man = int(temp_property_data['price'].replace(',', '').replace('万円', '')) * 10000
except:
    try:
        pattern = r'\n*億(\d*)'
        price_man = int(extract_value(pattern,temp_property_data['price'].replace(',', '').replace('万円', ''))) * 10000
    except:
        price_man = 0
data["price"] = price_oku + price_man

In [41]:
pd.DataFrame(data,index=[0]).to_csv('./test.csv',index = False)

In [57]:
df_temp = pd.DataFrame(all_data,index=None)

In [58]:
df_temp

Unnamed: 0,property_name,price,train_line,station,minutes_from_station,prefecture_name,city_name,town_name,year_built,structure,...,land_rights,management_fee,repair_reserve_fund,management_company,management_method,management_person,last_update_date,scheduled_update_date,management_id,room_url
0,カーサ大岡山,19900000,東急目黒線,大岡山駅,,,,\r\n東京都大田区北千束3-16-19\r\n,51,,...,\r\n所有権,10004,7300,株式会社エム・シー・サービス,全部委託,日勤\r,2023-03-18,2023-06-18,001147601,https://www.kenbiya.com//pp1/s/tokyo/ota-ku/re...
1,東建シティハイツ上野,16500000,JR山手線,上野駅,,,,\r\n東京都台東区東上野\r\n,34,,...,\r\n所有権,13320,2880,,全部委託,日勤\r,2023-03-18,2023-06-18,184126438,https://www.kenbiya.com//pp1/s/tokyo/taito-ku/...
2,メインステージ大森町駅前,16500000,京急本線,大森町駅,,,,\r\n東京都大田区大森西3丁目\r\n,27,,...,\r\n所有権,8000,6400,,全部委託,巡回\r,2023-03-18,2023-06-18,2687968,https://www.kenbiya.com//pp1/s/tokyo/ota-ku/re...
3,ブライズ中野江古田,27500000,西武新宿線,沼袋駅,,,,\r\n東京都中野区江古田2丁目\r\n,4,,...,\r\n所有権,6000,2100,,全部委託,巡回\r,2023-03-18,2023-06-18,2687978,https://www.kenbiya.com//pp1/s/tokyo/nakano-ku...
4,MAXIV成増,25000000,東京メトロ有楽町線,地下鉄成増駅,,,,\r\n東京都板橋区成増1丁目\r\n,5,,...,\r\n所有権,5700,2100,,全部委託,巡回\r,2023-03-18,2023-06-18,2687984,https://www.kenbiya.com//pp1/s/tokyo/itabashi-...
5,エスペルーモ桜新町,28000000,東急田園都市線,桜新町駅,10.0,,,\r\n東京都世田谷区弦巻3丁目\r\n,7,,...,\r\n所有権,7890,10870,,全部委託,巡回\r,2023-03-18,2023-06-18,2687985,https://www.kenbiya.com//pp1/s/tokyo/setagaya-...
6,東京都世田谷区 投資用マンション,8800000,京王線,千歳烏山駅,,,,\n東京都世田谷区南烏山\n,36,,...,\r\n所有権,8970,2070,,全部委託,巡回\r,2023-03-18,2023-06-18,,https://www.kenbiya.com//pp1/s/tokyo/setagaya-...
7,トーカン新宿第2キャステール,15900000,京王線,初台駅,,,,\n東京都新宿区西新宿3-5-12\n,43,,...,\r\n所有権,5230,6050,株式会社東急コミュニティー,全部委託,日勤\r,2023-03-18,2023-06-18,,https://www.kenbiya.com//pp1/s/tokyo/shinjuku-...
8,エルフォルテ赤羽,25000000,東京メトロ南北線,志茂駅,,,,\n東京都北区志茂3-3-8\n,6,,...,\r\n所有権,4000,1100,,全部委託,巡回\r,2023-03-18,2023-06-18,,https://www.kenbiya.com//pp1/s/tokyo/kita-ku/r...
9,秀和洗足レジデンス,39990000,東急目黒線,洗足駅,,,,\n東京都目黒区洗足2丁目\n,53,,...,\r\n所有権,8800,13000,,全部委託,日勤\r,2023-03-18,2023-06-18,,https://www.kenbiya.com//pp1/s/tokyo/meguro-ku...


In [8]:
temp_property_data={}
for k, l in property_dict.items():
    try:
        temp_property_data[k] = room_item.find('dt', text=l).find_next('dd').get_text()
    except:
        temp_property_data[k] = ''

In [9]:
temp_property_data

{'property_name': '早稲田永谷マンション',
 'price': '\n2,280万円\r\n',
 'transportation': '\r\n  東京メトロ副都心線 西早稲田駅 徒歩1分\r\n  \r\n  東京メトロ東西線 高田馬場駅 徒歩6分\r\n  \r\n  \r\n  JR山手線 高田馬場駅 徒歩9分\n',
 'address': '\n東京都新宿区高田馬場1-2-15\n',
 'year_built': '\r\n1973年10月（築49年）\r\n',
 'building_structure': '\r\nSRC造3階/11階建 総戸数79戸',
 'exclusive_area': '\r\n35.19m²\r\n（バルコニー 7.91m²）',
 'floor_plan': '\r\n1LDK 南向き ',
 'transaction_method': '仲介',
 'delivery': '相談',
 'current_condition': '\r\n賃貸中',
 'cap_rate': '6.31％',
 'full_occupancy_incom': '144万円 / 12万円\r\n情報の見方',
 'land_rights': '\r\n所有権 ',
 'management_fee_repair_reserve_fund': '10,420円 / 5,760円',
 'management_company': '(株)東急コミュニティー',
 'management_method': '\r\n  全部委託\r\n  \r\n    /\r\n    日勤\r\n  \r\n',
 'last_update_date': '',
 'register_date': '2023年 3月 18日',
 'scheduled_update_date': '2023年 6月 18日',
 'management_id': ''}

In [62]:
temp_property_data

{'property_name': '東京都世田谷区 投資用マンション',
 'price': '\n780万円\r\n',
 'transportation': '\r\n  小田急小田原線 経堂駅 徒歩6分\r\n  \r\n  東急世田谷線 宮の坂駅 徒歩13分\r\n  \r\n  \r\n  小田急小田原線 千歳船橋駅 徒歩13分\n',
 'address': '\n東京都世田谷区経堂\n',
 'year_built': '\r\n1986年3月（築37年）\r\n',
 'building_structure': '\r\nRC造4階/4階建 総戸数26戸',
 'exclusive_area': '\r\n16.43m²\r\n（バルコニー 2m²）',
 'floor_plan': '\r\n1R 南向き ',
 'transaction_method': '仲介',
 'delivery': '相談',
 'current_condition': '\r\n賃貸中',
 'cap_rate': '7.69％',
 'full_occupancy_incom': '60万円 / 5万円\r\n情報の見方',
 'land_rights': '\r\n所有権 ',
 'management_fee_repair_reserve_fund': '7,600円 / 3,000円',
 'management_company': '',
 'management_method': '\r\n  全部委託\r\n  \r\n    /\r\n    巡回\r\n  \r\n',
 'last_update_date': '',
 'register_date': '2023年 3月 18日',
 'scheduled_update_date': '2023年 6月 18日',
 'management_id': ''}

In [5]:
room_url

'https://www.kenbiya.com//pp1/s/tokyo/shinjuku-ku/re_2978972la5/'

In [6]:
room_item = get_html(room_url)

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.
