In [15]:
import pandas as pd
pd.set_option('display.max_colwidth', -1)

price_gap = 0.9
price_range = 160, 265
neighs = ['一梯两户','一梯三户','一梯四户']

In [16]:
data = pd.read_json('./data_p1p2_2019_1_22/putuo.json')

In [17]:
data_clean = data[['标题', '总价', '每平方售价', '建筑面积', '产权年限', '建造时间', '小区名称', '梯户比例', '所在楼层', 'url']].rename(index=str, columns={'标题': 'title', '总价': 'total_price', '每平方售价': 'price', '产权年限' : 'period', '建造时间': 'time', '小区名称': 'community', '梯户比例': 'neighbours', '所在楼层': 'floor', '建筑面积':'area'})

In [18]:
data_clean.total_price = data_clean.total_price.map(lambda x: x.strip('万')).astype(float)
data_clean.price = data_clean.price.map(lambda x: x.strip('元/平米')).astype(float)
data_clean = data_clean[(data_clean['total_price']<price_range[1]) & (data_clean['total_price']>price_range[0])]

In [19]:
data_clean.head()

Unnamed: 0,title,total_price,price,area,period,time,community,neighbours,floor,url
2,业主定好房急售，一楼天井已搭，看房随时，朝南户型,183.0,56378.0,32.46㎡,未知,1986年建/板楼,管弄五六街坊,一梯五户,低楼层 (共6层),https://sh.lianjia.com/ershoufang/107100545261.html
4,小户型总价低，近新村路7号线地铁站，采光充足无遮挡,163.0,47620.0,34.23㎡,未知,1987年建/板楼,新村路285弄,一梯四户,中楼层 (共5层),https://sh.lianjia.com/ershoufang/107100526372.html
5,朝南主卧带阳台 总价低 位置好 非顶楼 交通便利,177.0,60972.0,29.03㎡,70年,1983年建/板楼,桂巷新村,一梯四户,高楼层 (共6层),https://sh.lianjia.com/ershoufang/107100906672.html
6,桃浦七村雪松苑 1室1厅 176万,176.0,40137.0,43.85㎡,70年,1995年建/板楼,桃浦七村雪松苑,一梯四户,高楼层 (共6层),https://sh.lianjia.com/ershoufang/107100945752.html
8,满五唯一 低楼层 采光好 诚意出售 随时看房,185.0,46436.0,39.84㎡,未知,1985年建/板楼,新宜小区,一梯五户,低楼层 (共6层),https://sh.lianjia.com/ershoufang/107100796194.html


In [20]:
print('总的房源数目: {}, 均价: {}'.format(len(data_clean), data_clean.price.mean()))

总的房源数目: 704, 均价: 48265.046875


In [21]:
community_price_avg = data_clean.groupby('community', as_index=False).price.agg('mean').rename(columns={'price': 'community_avg_price'})
community_price_avg.head()

Unnamed: 0,community,community_avg_price
0,万航渡后路87号,74735.0
1,世纪之门,79804.0
2,东新支路55弄,53959.0
3,东泉苑小区,52105.0
4,中天SOHO,40047.0


In [22]:
print('总的小区数目: {}'.format(len(community_price_avg)))

总的小区数目: 196


In [23]:
data_clean = data_clean.merge(community_price_avg, how='left', on='community')

In [24]:
data_clean.head()

Unnamed: 0,title,total_price,price,area,period,time,community,neighbours,floor,url,community_avg_price
0,业主定好房急售，一楼天井已搭，看房随时，朝南户型,183.0,56378.0,32.46㎡,未知,1986年建/板楼,管弄五六街坊,一梯五户,低楼层 (共6层),https://sh.lianjia.com/ershoufang/107100545261.html,51749.454545
1,小户型总价低，近新村路7号线地铁站，采光充足无遮挡,163.0,47620.0,34.23㎡,未知,1987年建/板楼,新村路285弄,一梯四户,中楼层 (共5层),https://sh.lianjia.com/ershoufang/107100526372.html,45243.0
2,朝南主卧带阳台 总价低 位置好 非顶楼 交通便利,177.0,60972.0,29.03㎡,70年,1983年建/板楼,桂巷新村,一梯四户,高楼层 (共6层),https://sh.lianjia.com/ershoufang/107100906672.html,61660.666667
3,桃浦七村雪松苑 1室1厅 176万,176.0,40137.0,43.85㎡,70年,1995年建/板楼,桃浦七村雪松苑,一梯四户,高楼层 (共6层),https://sh.lianjia.com/ershoufang/107100945752.html,37923.925926
4,满五唯一 低楼层 采光好 诚意出售 随时看房,185.0,46436.0,39.84㎡,未知,1985年建/板楼,新宜小区,一梯五户,低楼层 (共6层),https://sh.lianjia.com/ershoufang/107100796194.html,46268.235294


In [25]:
price_mask = (data_clean['price'] < data_clean['community_avg_price'] * price_gap)
if len(neighs) == 0:
    fruit = data_clean[price_mask]
else:
    neigh_mask = (data_clean['neighbours'].isin(neighs))
    fruit = data_clean[neigh_mask & price_mask]

In [26]:
print('total: {}, left: {}'.format(len(data_clean), len(fruit)))
fruit

total: 704, left: 26


Unnamed: 0,title,total_price,price,area,period,time,community,neighbours,floor,url,community_avg_price
37,真光九 一梯两户正规一室一厅 采光好 小区中间位置,190.0,39071.0,48.63㎡,70年,1995年建/板楼,真光九街坊,一梯两户,高楼层 (共6层),https://sh.lianjia.com/ershoufang/107100967952.html,44258.2
39,曹杨三村(兰岭园) 1室1厅 182万,182.0,43077.0,42.25㎡,70年,未知年建/板楼,曹杨三村(兰岭园),一梯四户,高楼层 (共5层),https://sh.lianjia.com/ershoufang/107100956007.html,52405.083333
50,朝南大一房，边套户型，诚意出售，随时看房,175.0,47633.0,36.74㎡,未知,1991年建/板楼,岚皋西路145弄,一梯五户,低楼层 (共7层),https://sh.lianjia.com/ershoufang/107100842729.html,53107.5
62,东边套全明非顶楼一室户、采光好 总价低,181.0,49835.0,36.32㎡,未知,1985年建/板楼,太浜巷小区,一梯六户,高楼层 (共6层),https://sh.lianjia.com/ershoufang/107100096357.html,56718.428571
73,一室一厅 单价低 交通便利 配套成熟,185.0,38923.0,47.53㎡,未知,1985年建/板楼,宜川六村,一梯五户,高楼层 (共6层),https://sh.lianjia.com/ershoufang/107100538116.html,45648.533333
92,大一房户型正气 房龄新 一梯两户 品质小区 随时可看,185.0,38406.0,48.17㎡,未知,1994年建/板楼,章家巷小区,一梯两户,高楼层 (共7层),https://sh.lianjia.com/ershoufang/107002216607.html,46268.083333
94,汪家井小区，一室一厅，采光通风好，房龄新,185.0,41592.0,44.48㎡,未知,1991年建/板楼,汪家井小区,一梯四户,高楼层 (共6层),https://sh.lianjia.com/ershoufang/107001513271.html,47104.636364
131,产权清晰，采光充足，交通方便，生活配套完善，无抵押,180.0,46986.0,38.31㎡,未知,1987年建/板楼,东新支路55弄,一梯八户,中楼层 (共7层),https://sh.lianjia.com/ershoufang/107100684861.html,53959.0
153,章家巷小区大一房，近新村路地铁，诚意出售,185.0,38247.0,48.37㎡,未知,1989年建/板楼,延长西路529弄,一梯四户,高楼层 (共6层),https://sh.lianjia.com/ershoufang/107100081287.html,43538.0
182,西边套 全明一室一厅 通风采光佳 1梯4户,168.0,43546.0,38.58㎡,未知,1981年建/板楼,宜川一村,一梯四户,高楼层 (共6层),https://sh.lianjia.com/ershoufang/107100669031.html,48584.222222
