In [34]:
import geopandas as gpd
import pandas as pd
import numpy as np
import os
from tqdm import tqdm
import chardet
import csv

In [2]:
poi_path = r"/data-sat-img/postprocessed/mujiang/POI/全国POI数据/"
save_path = "/data-sat-img/postprocessed/mujiang/master-thesis/poi_summary/"

### year range

In [3]:
year_list = list(range(2012,2023,1))
year_list 

[2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022]

### read geo-master-table

In [4]:
geomaster_path = "/data-sat-img/postprocessed/mujiang/geobound_master/"
geomaster = pd.read_csv(geomaster_path + "lookup_table.csv")

geomaster_selected = geomaster[["MID_m","地级","省级"]]
geomaster_selected 
geomaster_selected.head()

Unnamed: 0,MID_m,地级,省级
0,1,三沙市,海南省
1,1,三沙市,海南省
2,2,锡林郭勒盟,内蒙古自治区
3,3,阿坝藏族羌族自治州,四川省
4,4,哈尔滨市,黑龙江省


In [5]:
def append_row(df,year,city,number,class1,class2):
    new_row_df = pd.DataFrame([{"year": year, "city": city,"number": number, "class1": class1, "class2": class2}])
    df_appended = pd.concat([df, new_row_df], ignore_index=True)

    return df_appended


In [6]:
def find_match(poi_city, geo_df):
    for index, row in geo_df.iterrows():
        if (poi_city in row['地级']) or (poi_city[:-1] in row['地级']):
            return row['MID_m']
    return None  # Return None if no match is found


## match for every year geomaster and poi

### 2022

In [6]:
year = 2022
folder = r"2022POI"
os.chdir(poi_path + folder)

In [7]:
poi_2022_region = os.listdir()
poi_2022_region

['西北poi', '华东poi2', '华中poi', '华北poi', '东北poi', '华南POI', '华东poi1', '西南poi']

In [8]:
region_list = []
province_list = []
city_list = []
poi_city_list = []

for region in poi_2022_region:
    poi_2022_province = os.listdir(region)
    
    for province in poi_2022_province:
        if (province.endswith(".zip") == False) & (province.endswith(".rar") == False):
            poi_2022_city = os.listdir(os.path.join(region,province))
            
            for poi_city in poi_2022_city:
                if poi_city.endswith(".csv"):
                    city = poi_city.split("POI")[0]
                    #print(region,province,city)
                    
                    region_list.append(region)
                    province_list.append(province)
                    city_list.append(city)
                    poi_city_list.append(poi_city)


In [9]:
i = 183
poi_df = pd.read_csv(os.path.join(region_list[i],province_list[i],poi_city_list[i]), encoding='GB2312',encoding_errors='ignore')
poi_df

Unnamed: 0,名称,大类,中类,经度,纬度,省份,城市,区域
0,后财神寺庙,旅游景点,宗教,125.530420,44.786439,吉林省,长春市,农安县
1,小城子中学,科教文化,中学,125.360149,44.774729,吉林省,长春市,农安县
2,中国石油油站,汽车相关,加油站,125.366809,44.775069,吉林省,长春市,农安县
3,黄鱼圈客运站,交通设施,长途汽车,125.469408,44.815758,吉林省,长春市,农安县
4,亨达油站,汽车相关,加油站,125.471999,44.816386,吉林省,长春市,农安县
...,...,...,...,...,...,...,...,...
233998,地面专用停车场,交通设施,停车场,125.242102,43.902915,吉林省,长春市,绿园区
233999,专用停车场,交通设施,停车场,125.260643,43.908331,吉林省,长春市,绿园区
234000,专用停车场,交通设施,停车场,125.260631,43.907970,吉林省,长春市,绿园区
234001,吉林大学和平校区-行政办公大楼,科教文化,其他,125.266758,43.907389,吉林省,长春市,绿园区


In [10]:
with open(os.path.join(region_list[i],province_list[i],poi_city_list[i]), 'rb') as f:
    raw_data = f.read()
    result = chardet.detect(raw_data)
    encoding = result['encoding']
    print(f"Detected encoding: {encoding}")

Detected encoding: GB2312


In [11]:
class1_list = []
class2_list = []
number_list = []
path_list = []

for i in tqdm(range(len(region_list))):
    try:    
        poi_df = pd.read_csv(os.path.join(region_list[i],province_list[i],poi_city_list[i]))
        
    except Exception as e:
        print(i,region_list[i],province_list[i],poi_city_list[i])
        
        poi_df = pd.read_csv(os.path.join(region_list[i],province_list[i],poi_city_list[i]), encoding='GB2312',encoding_errors='ignore')

    number_list.append(poi_df.shape[0])
    path_list.append(os.path.join(folder,region_list[i],province_list[i],poi_city_list[i]))
    
    
    class1 = poi_df["大类"].unique()
    class2 = poi_df["中类"].unique()

    for element in class1:
        if element not in class1_list:
            class1_list.append(element)

    for element in class2:
        if element not in class2_list:
            class2_list.append(element)
                

        

 49%|███████████████████████████████████████████████████████████████████████████████████▌                                                                                     | 183/370 [00:24<00:39,  4.73it/s]

183 东北poi 吉林省 长春市POI数据.csv


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 370/370 [00:52<00:00,  7.00it/s]


In [12]:
summary_all = pd.DataFrame(columns=["year", "city","number", "class1", "class2"])
summary_all_2022 = append_row(summary_all,2022,len(region_list),sum(number_list),class1_list,class2_list)
summary_all_2022

Unnamed: 0,year,city,number,class1,class2
0,2022,370,41502251,"[公司企业, 生活服务, 旅游景点, 酒店住宿, 餐饮美食, 购物消费, 科教文化, 汽车相...","[公司, 公厕, 宗教, 旅馆, 其他, 公园, 邮局, 小学, 便利店, 汽车维修, 停车..."


In [13]:
summary_all_2022.to_csv(save_path + "summary_all.csv",encoding = "utf-8-sig",index = False)

In [14]:
summary_year = pd.DataFrame(columns=["province","city","number", "path"])

summary_year[ "province"] = province_list
summary_year[ "city"] = city_list
summary_year[ "number"] = number_list
summary_year[ "path"] = path_list


summary_year

Unnamed: 0,province,city,number,path
0,青海省,海北藏族自治州,5748,2022POI/西北poi/青海省/海北藏族自治州POI数据.csv
1,青海省,黄南藏族自治州,3282,2022POI/西北poi/青海省/黄南藏族自治州POI数据.csv
2,青海省,玉树藏族自治州,3618,2022POI/西北poi/青海省/玉树藏族自治州POI数据.csv
3,青海省,果洛藏族自治州,1568,2022POI/西北poi/青海省/果洛藏族自治州POI数据.csv
4,青海省,西宁市,61473,2022POI/西北poi/青海省/西宁市POI数据.csv
...,...,...,...,...
365,西藏自治区,林芝市,6142,2022POI/西南poi/西藏自治区/林芝市POI数据.csv
366,西藏自治区,拉萨市,34956,2022POI/西南poi/西藏自治区/拉萨市POI数据.csv
367,西藏自治区,昌都市,4951,2022POI/西南poi/西藏自治区/昌都市POI数据.csv
368,西藏自治区,那曲市,3230,2022POI/西南poi/西藏自治区/那曲市POI数据.csv


In [15]:
summary_year = pd.merge(geomaster_selected, summary_year, left_on=['地级', '省级'], right_on=['city', 'province'], how = "right")
summary_year = summary_year[["MID_m","province","city","number", "path"]]
summary_year.drop_duplicates(inplace=True)
summary_year

Unnamed: 0,MID_m,province,city,number,path
0,255,青海省,海北藏族自治州,5748,2022POI/西北poi/青海省/海北藏族自治州POI数据.csv
4,294,青海省,黄南藏族自治州,3282,2022POI/西北poi/青海省/黄南藏族自治州POI数据.csv
8,137,青海省,玉树藏族自治州,3618,2022POI/西北poi/青海省/玉树藏族自治州POI数据.csv
14,68,青海省,果洛藏族自治州,1568,2022POI/西北poi/青海省/果洛藏族自治州POI数据.csv
20,142,青海省,西宁市,61473,2022POI/西北poi/青海省/西宁市POI数据.csv
...,...,...,...,...,...
2971,56,西藏自治区,林芝市,6142,2022POI/西南poi/西藏自治区/林芝市POI数据.csv
2978,168,西藏自治区,拉萨市,34956,2022POI/西南poi/西藏自治区/拉萨市POI数据.csv
2986,49,西藏自治区,昌都市,4951,2022POI/西南poi/西藏自治区/昌都市POI数据.csv
2997,20,西藏自治区,那曲市,3230,2022POI/西南poi/西藏自治区/那曲市POI数据.csv


In [16]:
##check if anycity are not matched with masterid
summary_year[pd.isna(summary_year['MID_m'])]

Unnamed: 0,MID_m,province,city,number,path


In [17]:
summary_year.to_csv(save_path + "summary_2022.csv",encoding = "utf-8-sig",index = False)

### 2021

In [18]:
year = 2021
folder = r"2021POI"
os.chdir(poi_path + folder)

In [19]:
city_list = []
poi_city_list = []


poi_2021_city = os.listdir()

for poi_city in poi_2021_city:
    if poi_city.endswith(".csv"):
        poi_city_list.append(poi_city)
        
        city = poi_city.split("-")[1]
        city_list.append(city)

In [21]:
i = 272
print(city_list[i])

poi_df = pd.read_csv(poi_city_list[i], encoding='utf-8',encoding_errors='ignore', dtype={14: 'str'}, on_bad_lines='skip')
poi_df

绥化市


Unnamed: 0,序号,状态,查询时间,城市,关键字,方式,PoiID,名称,类别,经纬度,地址,距离中心(仅圆形范围),固话,手机,电话(原值),轮廓坐标
0,1,complete,2020/3/8 21:30,绥化市,,多边形,B01C700DKU,黑龙江省红光农场,"生活服务,生活服务场所,生活服务场所","127.229966,47.592096",荣光路与Z551交叉口西150米,,,,,
1,2,complete,2020/3/8 21:30,绥化市,,多边形,B01C70MG8Q,四海店镇,"地名地址信息,普通地名,乡镇级地名","127.502811,47.47688",绥棱县,,,,,
2,3,complete,2020/3/8 21:30,绥化市,,多边形,B01C70MG8O,双岔河镇,"地名地址信息,普通地名,乡镇级地名","127.268328,47.457473",绥棱县,,,,,
3,4,complete,2020/3/8 21:30,绥化市,,多边形,B01C700IRO,北大荒,"地名地址信息,普通地名,村庄级地名","127.462194,47.489195",绥棱县,,,,,
4,5,complete,2020/3/8 21:30,绥化市,,多边形,B01C700DKX,红光派出所,"政府机构及社会团体,公检法机构,公安警察","127.224499,47.592067",403乡道北侧,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
100539,110172,complete,2021/7/12 1:05,绥化市,,多边形,B01C70MI8Y,国家电网,生活服务;电力营业厅;电力营业厅,"[126.01076,46.060775]",四明南路102号附近,,,,,
100540,110173,complete,2021/7/12 1:05,绥化市,,多边形,B0G1AO2ELA,铁哥水洗,生活服务;洗衣店;洗衣店,"[126.01078,46.053944]",,,,,,
100541,110174,complete,2021/7/12 1:05,绥化市,,多边形,B0HUVO4DS0,国家电网(肇东新城供电营业厅),生活服务;电力营业厅;电力营业厅,"[126.010768,46.060778]",,,,,,
100542,110175,complete,2021/7/12 1:05,绥化市,,多边形,B0G2NB2EBN,莱恩英语,科教文化服务;科教文化场所;科教文化场所,"[125.955887,46.061743]",民益路(紫都花园),,,1.577673e+10,15776732370,


In [191]:
#poi_df['类别'].str.split(';|\||,|/').explode().unique()

In [22]:
with open(os.path.join(poi_city_list[i]), 'rb') as f:
    raw_data = f.read()
    result = chardet.detect(raw_data)
    encoding = result['encoding']
    print(f"Detected encoding: {encoding}")

Detected encoding: utf-8


In [23]:
#class1_list = []
class2_list = []
class1_list = []
number_list = []
path_list = []
class1_name_list = []

for i in tqdm(range(len(poi_city_list))):
    
    try:    
        poi_df = pd.read_csv(os.path.join(poi_city_list[i]),encoding = "utf-8",encoding_errors='ignore', on_bad_lines='skip', dtype={14: 'str'})
        split_columns = poi_df['类别'].str.split(';|,', expand=True)
        
    except Exception as e:
        #print(i,poi_city_list[i])        
        poi_df = pd.read_csv(os.path.join(poi_city_list[i]), encoding='GB2312',encoding_errors='ignore', on_bad_lines='skip', dtype={14: 'str'})
        split_columns = poi_df['类别'].str.split(';|,', expand=True)
    

    class1 = split_columns[0].unique()
    class2 = split_columns[1].unique()
    
    for element in class1:
        if element not in class1_list:
            class1_list.append(element)
            class1_name_list.append((str(element)+str(i)+city_list[i]))

    for element in class2:
        if element not in class2_list:
            class2_list.append(element)
    
    number_list.append(poi_df.shape[0])
    path_list.append(os.path.join(folder,poi_city_list[i]))


  poi_df = pd.read_csv(os.path.join(poi_city_list[i]),encoding = "utf-8",encoding_errors='ignore', on_bad_lines='skip', dtype={14: 'str'})
  poi_df = pd.read_csv(os.path.join(poi_city_list[i]),encoding = "utf-8",encoding_errors='ignore', on_bad_lines='skip', dtype={14: 'str'})
  poi_df = pd.read_csv(os.path.join(poi_city_list[i]), encoding='GB2312',encoding_errors='ignore', on_bad_lines='skip', dtype={14: 'str'})
  poi_df = pd.read_csv(os.path.join(poi_city_list[i]),encoding = "utf-8",encoding_errors='ignore', on_bad_lines='skip', dtype={14: 'str'})
  poi_df = pd.read_csv(os.path.join(poi_city_list[i]), encoding='GB2312',encoding_errors='ignore', on_bad_lines='skip', dtype={14: 'str'})
  poi_df = pd.read_csv(os.path.join(poi_city_list[i]),encoding = "utf-8",encoding_errors='ignore', on_bad_lines='skip', dtype={14: 'str'})
  poi_df = pd.read_csv(os.path.join(poi_city_list[i]),encoding = "utf-8",encoding_errors='ignore', on_bad_lines='skip', dtype={14: 'str'})
  poi_df = pd.read_csv(os.p

In [24]:
class1_name_list

['地名地址信息0河源市',
 '科教文化服务0河源市',
 '风景名胜0河源市',
 '政府机构及社会团体0河源市',
 '住宿服务0河源市',
 '体育休闲服务0河源市',
 '交通设施服务0河源市',
 '商务住宅0河源市',
 '医疗保健服务0河源市',
 '购物服务0河源市',
 '生活服务0河源市',
 '汽车服务0河源市',
 '公共设施0河源市',
 '公司企业0河源市',
 '餐饮服务0河源市',
 '道路附属设施0河源市',
 '摩托车服务0河源市',
 '汽车维修0河源市',
 '金融保险服务0河源市',
 '汽车销售0河源市',
 'nan8泉州市',
 '室内设施34咸阳市',
 '类别42吐鲁番',
 '通行设施103兰州市',
 '事件活动152杭州市']

In [25]:
len(class1_list)

25

In [26]:
class1_list

['地名地址信息',
 '科教文化服务',
 '风景名胜',
 '政府机构及社会团体',
 '住宿服务',
 '体育休闲服务',
 '交通设施服务',
 '商务住宅',
 '医疗保健服务',
 '购物服务',
 '生活服务',
 '汽车服务',
 '公共设施',
 '公司企业',
 '餐饮服务',
 '道路附属设施',
 '摩托车服务',
 '汽车维修',
 '金融保险服务',
 '汽车销售',
 nan,
 '室内设施',
 '类别',
 '通行设施',
 '事件活动']

In [27]:
#summary_all = pd.DataFrame(columns=["year", "city","number", "class1", "class2"])
summary_all_2021 = append_row(summary_all_2022,2021,len(poi_city_list),sum(number_list),class1_list,class2_list)
summary_all_2021

Unnamed: 0,year,city,number,class1,class2
0,2022,370,41502251,"[公司企业, 生活服务, 旅游景点, 酒店住宿, 餐饮美食, 购物消费, 科教文化, 汽车相...","[公司, 公厕, 宗教, 旅馆, 其他, 公园, 邮局, 小学, 便利店, 汽车维修, 停车..."
1,2021,340,71203623,"[地名地址信息, 科教文化服务, 风景名胜, 政府机构及社会团体, 住宿服务, 体育休闲服务...","[普通地名, 学校, 风景名胜, 自然地名, 政府机关, 交通地名, 科教文化场所, 宾馆酒..."


In [29]:
summary_all_2021.to_csv(save_path + "summary_all.csv",encoding = "utf-8-sig",index = False)

In [30]:
summary_year = pd.DataFrame(columns=["city","number", "path"])


summary_year[ "city"] = city_list
summary_year[ "number"] = number_list
summary_year[ "path"] = path_list


summary_year

Unnamed: 0,city,number,path
0,河源市,157955,2021POI/208-河源市-157599.csv
1,黄山市,105806,2021POI/106-黄山市-105806.csv
2,株洲市,227391,2021POI/182-株洲市-227391.csv
3,松原市,78677,2021POI/057-松原市-78677.csv
4,吉林市,161788,2021POI/052-吉林市-161788.csv
...,...,...,...
335,黔南州,151137,2021POI/262-黔南州-151137.csv
336,吴忠市,62525,2021POI/320-吴忠市-62525.csv
337,延边市,87311,2021POI/059-延边市-87311.csv
338,林芝市,13525,2021POI/282-林芝市-13525.csv


In [31]:
def find_match(poi_city, geo_df):
    for index, row in geo_df.iterrows():
        if (poi_city in row['地级']) or (poi_city[:-1] in row['地级']):
            return row['MID_m']
    return None  # Return None if no match is found

# Applying the custom match for each row in df_left to find matches in df_right
summary_year['MID_m'] = summary_year['city'].apply(lambda x: find_match(x, geomaster))

summary_year = summary_year[["MID_m","city","number", "path"]]
summary_year


Unnamed: 0,MID_m,city,number,path
0,221,河源市,157955,2021POI/208-河源市-157599.csv
1,307,黄山市,105806,2021POI/106-黄山市-105806.csv
2,117,株洲市,227391,2021POI/182-株洲市-227391.csv
3,240,松原市,78677,2021POI/057-松原市-78677.csv
4,129,吉林市,161788,2021POI/052-吉林市-161788.csv
...,...,...,...,...
335,224,黔南州,151137,2021POI/262-黔南州-151137.csv
336,298,吴忠市,62525,2021POI/320-吴忠市-62525.csv
337,34,延边市,87311,2021POI/059-延边市-87311.csv
338,56,林芝市,13525,2021POI/282-林芝市-13525.csv


In [32]:
##check if anycity are not matched with masterid
summary_year[pd.isna(summary_year['MID_m'])]

Unnamed: 0,MID_m,city,number,path


In [33]:
summary_year.to_csv(save_path + "summary_2021.csv",encoding = "utf-8-sig",index = False)

### 2020

In [34]:
year = 2020
folder = r"2020POI"
os.chdir(poi_path + folder)

In [35]:
city_list = []
poi_city_list = []


poi_2020_city = os.listdir()

for poi_city in poi_2020_city:
    poi_city_list.append(poi_city)    
    city_list.append(poi_city)

In [36]:
class1_list = []
class1_name_list = []

for i in tqdm(range(len(poi_city_list))):
    try:    
        file_list = os.listdir(poi_city_list[i])


        for file in file_list:
            if "_" in file:            
                class1 = file.split("_")[1]
            else:
                class1 = file
                
            class1 = class1.split(".")[0]

            if class1 not in class1_list:
                class1_list.append(class1)
                class1_name_list.append((str(class1)+str(i)+city_list[i]))
                
    except Exception as e:
        print(poi_city_list[i])
        
        
        

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 367/367 [00:04<00:00, 74.26it/s]


In [37]:
class1_list

['汽车销售',
 '政府机构及社会团体',
 '商务住宅',
 '餐饮服务',
 '购物服务',
 '交通设施服务',
 '金融保险服务',
 '室内设施',
 '公司企业',
 '公共设施',
 '体育休闲服务',
 '医疗保健服务',
 '道路附属设施',
 '通行设施',
 '生活服务',
 '科教文化服务',
 '地名地址信息',
 '汽车服务',
 '风景名胜',
 '汽车维修',
 '住宿服务',
 '摩托车服务',
 '事件活动']

In [38]:
class2_list = []
number_list = []
path_list = []

for i in tqdm(range(len(poi_city_list))):
    number = 0
    files = ""
    file_list = os.listdir(poi_city_list[i])        
    
    for file in file_list:
        poi_df = pd.read_csv(os.path.join(poi_city_list[i],file),encoding = "utf-8",encoding_errors='ignore', on_bad_lines='skip', dtype={14: 'str'})
        number += poi_df.shape[0]
        files = files + folder+"/"+file + "|"
        
        split_columns = poi_df['dtype'].str.split(';', expand=True)
        class2 = split_columns[1].unique()
        
        for element in class2:
            if element not in class2_list:
                class2_list.append(element)
        
    
    
    number_list.append(number)
    path_list.append(files)


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 367/367 [07:18<00:00,  1.19s/it]


In [39]:
#summary_all = pd.DataFrame(columns=["year", "city","number", "class1", "class2"])
summary_all_2020 = append_row(summary_all_2021,2020,len(poi_city_list),sum(number_list),class1_list,class2_list)
summary_all_2020.to_csv(save_path + "summary_all.csv",encoding = "utf-8-sig",index = False)
summary_all_2020

Unnamed: 0,year,city,number,class1,class2
0,2022,370,41502251,"[公司企业, 生活服务, 旅游景点, 酒店住宿, 餐饮美食, 购物消费, 科教文化, 汽车相...","[公司, 公厕, 宗教, 旅馆, 其他, 公园, 邮局, 小学, 便利店, 汽车维修, 停车..."
1,2021,340,71203623,"[地名地址信息, 科教文化服务, 风景名胜, 政府机构及社会团体, 住宿服务, 体育休闲服务...","[普通地名, 学校, 风景名胜, 自然地名, 政府机关, 交通地名, 科教文化场所, 宾馆酒..."
2,2020,367,48373239,"[汽车销售, 政府机构及社会团体, 商务住宅, 餐饮服务, 购物服务, 交通设施服务, 金融...","[汽车销售, 标致雪铁龙特约销售, 通用特约销售, 福特特约销售, 奇瑞特约销售, 吉利特约..."


In [40]:
summary_year = pd.DataFrame(columns=["city","number", "path"])


summary_year[ "city"] = city_list
summary_year[ "number"] = number_list
summary_year[ "path"] = path_list


summary_year

Unnamed: 0,city,number,path
0,枣庄市,114884,2020POI/枣庄市_汽车销售.csv|2020POI/枣庄市_政府机构及社会团体.csv...
1,海北藏族自治州,11459,2020POI/海北藏族自治州_政府机构及社会团体.csv|2020POI/海北藏族自治州_...
2,晋中市,100629,2020POI/晋中市_汽车销售.csv|2020POI/晋中市_交通设施服务.csv|20...
3,鸡西市,38444,2020POI/鸡西市_风景名胜.csv|2020POI/鸡西市_汽车维修.csv|2020...
4,贵阳市,184943,2020POI/贵阳市_地名地址信息.csv|2020POI/贵阳市_通行设施.csv|20...
...,...,...,...
362,仙桃市,30307,2020POI/仙桃市_风景名胜.csv|2020POI/仙桃市_科教文化服务.csv|20...
363,昌都市,25340,2020POI/昌都市_公共设施.csv|2020POI/昌都市_交通设施服务.csv|20...
364,怀化市,104211,2020POI/怀化市_地名地址信息.csv|2020POI/怀化市_住宿服务.csv|20...
365,黄石市,72320,2020POI/黄石市_公司企业.csv|2020POI/黄石市_政府机构及社会团体.csv...


In [42]:
# Applying the custom match for each row in df_left to find matches in df_right
summary_year['MID_m'] = summary_year['city'].apply(lambda x: find_match(x, geomaster))

summary_year = summary_year[["MID_m","city","number", "path"]]
summary_year

Unnamed: 0,MID_m,city,number,path
0,350,枣庄市,114884,2020POI/枣庄市_汽车销售.csv|2020POI/枣庄市_政府机构及社会团体.csv...
1,255,海北藏族自治州,11459,2020POI/海北藏族自治州_政府机构及社会团体.csv|2020POI/海北藏族自治州_...
2,292,晋中市,100629,2020POI/晋中市_汽车销售.csv|2020POI/晋中市_交通设施服务.csv|20...
3,146,鸡西市,38444,2020POI/鸡西市_风景名胜.csv|2020POI/鸡西市_汽车维修.csv|2020...
4,274,贵阳市,184943,2020POI/贵阳市_地名地址信息.csv|2020POI/贵阳市_通行设施.csv|20...
...,...,...,...,...
362,366,仙桃市,30307,2020POI/仙桃市_风景名胜.csv|2020POI/仙桃市_科教文化服务.csv|20...
363,49,昌都市,25340,2020POI/昌都市_公共设施.csv|2020POI/昌都市_交通设施服务.csv|20...
364,135,怀化市,104211,2020POI/怀化市_地名地址信息.csv|2020POI/怀化市_住宿服务.csv|20...
365,179,黄石市,72320,2020POI/黄石市_公司企业.csv|2020POI/黄石市_政府机构及社会团体.csv...


In [43]:
##check if anycity are not matched with masterid
summary_year[pd.isna(summary_year['MID_m'])]

Unnamed: 0,MID_m,city,number,path


In [44]:
summary_year.to_csv(save_path + "summary_2020.csv",encoding = "utf-8-sig",index = False)

### 2019-V1

In [6]:
year = "2019-V1"
folder = r"2019POI-V1"
os.chdir(poi_path + folder)

In [7]:
class1_list = os.listdir()
class1_list

['我国绿地面积',
 '学校教育',
 '药店药房',
 '餐饮服务',
 '医疗服务',
 '公交站点',
 '镇驻地',
 '政府机构',
 '公司、服务区、公测等等信息',
 '高楼大厦',
 '高速出入口',
 '停车场服务',
 '公园',
 '宾馆酒店',
 '村驻地',
 '收费站服务',
 '我国A级景区',
 '银行服务']

In [None]:
gpd.read_file(os.path.join('宾馆酒店',"宾馆酒店_点.shp"))

In [47]:
number_list = []

for class1 in tqdm(class1_list):
    files = os.listdir(class1)
    
    for file in files:
        if file.endswith("shp"):            
            poi_df = gpd.read_file(os.path.join(class1,file))
            number_list.append(poi_df.shape[0])

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 18/18 [09:38<00:00, 32.16s/it]


In [50]:
number_list

[43061,
 269884,
 178996,
 465086,
 175051,
 259446,
 36334,
 240949,
 2571241,
 32530,
 21873,
 20108,
 8356,
 148156,
 3464473,
 14248,
 11970,
 301892]

In [49]:
#summary_all = pd.DataFrame(columns=["year", "city","number", "class1", "class2"])
summary_all_2019_V1 = append_row(summary_all_2020,"2019-V1","",sum(number_list),class1_list,"")
summary_all_2019_V1.to_csv(save_path + "summary_all.csv",encoding = "utf-8-sig",index = False)
summary_all_2019_V1

Unnamed: 0,year,city,number,class1,class2
0,2022,370.0,41502251,"[公司企业, 生活服务, 旅游景点, 酒店住宿, 餐饮美食, 购物消费, 科教文化, 汽车相...","[公司, 公厕, 宗教, 旅馆, 其他, 公园, 邮局, 小学, 便利店, 汽车维修, 停车..."
1,2021,340.0,71203623,"[地名地址信息, 科教文化服务, 风景名胜, 政府机构及社会团体, 住宿服务, 体育休闲服务...","[普通地名, 学校, 风景名胜, 自然地名, 政府机关, 交通地名, 科教文化场所, 宾馆酒..."
2,2020,367.0,48373239,"[汽车销售, 政府机构及社会团体, 商务住宅, 餐饮服务, 购物服务, 交通设施服务, 金融...","[汽车销售, 标致雪铁龙特约销售, 通用特约销售, 福特特约销售, 奇瑞特约销售, 吉利特约..."
3,2019-V1,,8263654,"[我国绿地面积, 学校教育, 药店药房, 餐饮服务, 医疗服务, 公交站点, 镇驻地, 政府...",


### 2019-V2

In [15]:
year = "2019-V2"
folder = r"2019POI-V2"
os.chdir(poi_path + folder)

In [110]:
# The path to your Excel file
file_path = '100000/d_130500.xls'

# Read all sheets into a dictionary of DataFrames
xls = pd.ExcelFile(file_path)
sheets = xls.sheet_names

# Concatenate all sheets into a single DataFrame
df_list = [xls.parse(sheet_name, header=None) for sheet_name in sheets]
concatenated_df = pd.concat(df_list, ignore_index=True)

concatenated_df.to_csv('100000/d_130500.csv', index=False, header=False)

In [118]:
folder = "300000"
file = "d_320300.csv"

with open(os.path.join(folder,file), 'rb') as f:
    raw_data = f.read()
    result = chardet.detect(raw_data)
    encoding = result['encoding']
    print(f"Detected encoding: {encoding}")

Detected encoding: utf-8


In [117]:
folder = "300000"
file = "d_320300.csv"

poi_df = pd.read_csv(os.path.join(folder,file), encoding='utf-8',encoding_errors='ignore', on_bad_lines='skip',header=None)
poi_df

Unnamed: 0,0,1,2,3,4,5,6,7
0,1,艾山风景名胜区,铁富镇310国道附近,34.516990,117.965239,徐州市;邳州市,休闲娱乐;旅游景点,0516-86135998
1,2,艾山九龙风景区自驾游基地,铁富镇艾山九龙风景区,34.513622,117.969078,徐州市;邳州市,休闲娱乐;旅游景点,
2,3,艾山九龙风景区自驾游基地-奶奶庙,401乡道,34.510350,117.963265,徐州市;邳州市,休闲娱乐;旅游景点,
3,4,艾山九龙风景区自驾游基地-仙人湖,310国道附近,34.526866,117.974780,徐州市;邳州市,休闲娱乐;旅游景点,
4,5,艾山洞石沟风景区,铁富镇艾山九龙山风景区,34.516194,117.963962,徐州市;邳州市,休闲娱乐;旅游景点,
...,...,...,...,...,...,...,...,...
435847,435848,申鹭达卫浴,新夏路口世纪明珠装饰城三期7-123号,34.422439,117.447159,五家渠市;贾汪区,购物;家具家居建材,
435848,435849,金晖嘉园2号楼,金晖嘉园(三环东路西150米),34.320378,117.233527,澳门特别行政区;鼓楼区,建筑房产;住宅小区,
435849,435850,金晖嘉园10号楼,金晖嘉园10号楼,34.321312,117.233748,澳门特别行政区;鼓楼区,建筑房产;其它房产小区,
435850,435851,金晖嘉园4号楼,东三环快速路金晖嘉园4号楼,34.320794,117.232977,澳门特别行政区;鼓楼区,建筑房产;住宅小区,


In [16]:
# Initialize data structures
city_list = []
poi_file_list = []
# Use sets for class1_list and class2_list for efficiency
class1_set = set()
class2_set = set()
number_list = []

folder_list = os.listdir()
geo_city_list = set(geomaster_selected["地级"])  # Convert to set for efficient look-up

for folder in folder_list:
    if folder.endswith("00000"):
        file_list = os.listdir(folder)
        print(folder)

        for file in tqdm(file_list):
            if file.endswith(".csv"):
                try:
                    file_path = os.path.join(folder, file)
                    # Try to determine the correct encoding by reading the first line
                    with open(file_path, 'r', encoding='GB2312', errors='ignore') as f:
                        first_line = f.readline()
                        city_first = first_line.split(",")[5].split(";")[0]
                        encoding = 'GB2312' if city_first in geo_city_list else 'utf-8'

                    # Read the file with the determined encoding
                    poi_df = pd.read_csv(file_path, encoding=encoding, encoding_errors='ignore', on_bad_lines='skip', header=None)

                    # Process city information
                    city_split_columns = poi_df[5].str.split(';', expand=True)
                    poi_df["city"] = city_split_columns[0]
                    city_counts = poi_df.groupby("city").size().reset_index(name='count')
                    
                    # Update lists and sets
                    city_list.extend(city_counts["city"])
                    number_list.extend(city_counts["count"])
                    poi_file_list.extend([file_path] * len(city_counts))

                    # Process class information
                    class_split_columns = poi_df[6].str.split(';', expand=True)
                    poi_df["class1"] = class_split_columns[0]
                    poi_df["class2"] = class_split_columns[1]
                    class1_set.update(poi_df["class1"].dropna().unique())
                    class2_set.update(poi_df["class2"].dropna().unique())

                except Exception as e:
                    print(f"Error processing {file}: {e}")

# Convert sets back to lists if necessary
class1_list = list(class1_set)
class2_list = list(class2_set)


600000


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 54/54 [00:38<00:00,  1.39it/s]


200000


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 35/35 [00:24<00:00,  1.42it/s]


500000


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 48/48 [00:47<00:00,  1.02it/s]


300000


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 79/79 [02:11<00:00,  1.66s/it]


100000


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 34/34 [00:40<00:00,  1.19s/it]


400000


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 101/101 [01:44<00:00,  1.03s/it]


In [17]:
summary_year = pd.DataFrame(columns=["city","number", "path"])


summary_year[ "city"] = city_list
summary_year[ "number"] = number_list
summary_year[ "path"] = poi_file_list

summary_year[ "path"] = "2019POI-V2/" + summary_year[ "path"]
summary_year

Unnamed: 0,city,number,path
0,汉中市,188342,2019POI-V2/600000/d_610700.csv
1,塔城地区,168617,2019POI-V2/600000/d_654200.csv
2,金昌市,33754,2019POI-V2/600000/d_620300.csv
3,固原市,148434,2019POI-V2/600000/d_640400.csv
4,西安市,1065649,2019POI-V2/600000/d_610100.csv
...,...,...,...
529,仙桃市,111072,2019POI-V2/400000/d_429004.csv
530,郴州市,141196,2019POI-V2/400000/d_431000.csv
531,焦作市,145561,2019POI-V2/400000/d_410800.csv
532,黄石市,92391,2019POI-V2/400000/d_420200.csv


In [18]:
# Applying the custom match for each row in df_left to find matches in df_right
summary_year['MID_m'] = summary_year['city'].apply(lambda x: find_match(x, geomaster))

summary_year = summary_year[["MID_m","city","number", "path"]]
summary_year = summary_year.sort_values(by = "MID_m")
summary_year

Unnamed: 0,MID_m,city,number,path
447,1,三沙市,219,2019POI-V2/400000/d_460300.csv
467,1,,88951,2019POI-V2/400000/d_450400.csv
415,2,锡林郭勒盟,103663,2019POI-V2/100000/d_152500.csv
105,3,阿坝藏族羌族自治州,54702,2019POI-V2/500000/d_513200.csv
87,4,哈尔滨市,495460,2019POI-V2/200000/d_230100.csv
...,...,...,...,...
512,368,中山市,431212,2019POI-V2/400000/d_442000.csv
317,373,莱芜市,2,2019POI-V2/300000/d_320300.csv
150,373,莱芜市,58609,2019POI-V2/300000/d_371200.csv
107,374,那曲地区,44059,2019POI-V2/500000/d_542400.csv


In [19]:
summary_year = summary_year.groupby('MID_m').agg({
    'city': "first",   
    'number': "sum",
    'path': ';'.join
}).reset_index()

summary_year

Unnamed: 0,MID_m,city,number,path
0,1,三沙市,89170,2019POI-V2/400000/d_460300.csv;2019POI-V2/4000...
1,2,锡林郭勒盟,103663,2019POI-V2/100000/d_152500.csv
2,3,阿坝藏族羌族自治州,54702,2019POI-V2/500000/d_513200.csv
3,4,哈尔滨市,495460,2019POI-V2/200000/d_230100.csv
4,5,兴安盟,65524,2019POI-V2/100000/d_152200.csv
...,...,...,...,...
345,366,仙桃市,111097,2019POI-V2/300000/d_320300.csv;2019POI-V2/4000...
346,367,香港特别行政区,298455,2019POI-V2/600000/d_810000.csv
347,368,中山市,431221,2019POI-V2/300000/d_320300.csv;2019POI-V2/4000...
348,373,莱芜市,58611,2019POI-V2/300000/d_320300.csv;2019POI-V2/3000...


In [75]:
##check if anycity are not matched with masterid
summary_year[pd.isna(summary_year['MID_m'])]

Unnamed: 0,MID_m,city,number,path


In [76]:
summary_year.to_csv(save_path + "summary_2019-V2.csv",encoding = "utf-8-sig",index = False)

In [82]:
summary_all_2019_V1 = pd.read_csv(save_path + "summary_all.csv")
summary_all_2019_V1

Unnamed: 0,year,city,number,class1,class2
0,2022,370.0,41502251,"['公司企业', '生活服务', '旅游景点', '酒店住宿', '餐饮美食', '购物消费...","['公司', '公厕', '宗教', '旅馆', '其他', '公园', '邮局', '小学..."
1,2021,340.0,71203623,"['地名地址信息', '科教文化服务', '风景名胜', '政府机构及社会团体', '住宿服...","['普通地名', '学校', '风景名胜', '自然地名', '政府机关', '交通地名',..."
2,2020,367.0,48373239,"['汽车销售', '政府机构及社会团体', '商务住宅', '餐饮服务', '购物服务', ...","['汽车销售', '标致雪铁龙特约销售', '通用特约销售', '福特特约销售', '奇瑞特..."
3,2019-V1,,8263654,"['我国绿地面积', '学校教育', '药店药房', '餐饮服务', '医疗服务', '公交...",


In [86]:
#summary_all = pd.DataFrame(columns=["year", "city","number", "class1", "class2"])
summary_all_2019_V2 = append_row(summary_all_2019_V1,"2019-V2",summary_year.shape[0],sum(number_list),class1_list,class2_list)
summary_all_2019_V2.to_csv(save_path + "summary_all.csv",encoding = "utf-8-sig",index = False)
summary_all_2019_V2

Unnamed: 0,year,city,number,class1,class2
0,2022,370.0,41502251,"['公司企业', '生活服务', '旅游景点', '酒店住宿', '餐饮美食', '购物消费...","['公司', '公厕', '宗教', '旅馆', '其他', '公园', '邮局', '小学..."
1,2021,340.0,71203623,"['地名地址信息', '科教文化服务', '风景名胜', '政府机构及社会团体', '住宿服...","['普通地名', '学校', '风景名胜', '自然地名', '政府机关', '交通地名',..."
2,2020,367.0,48373239,"['汽车销售', '政府机构及社会团体', '商务住宅', '餐饮服务', '购物服务', ...","['汽车销售', '标致雪铁龙特约销售', '通用特约销售', '福特特约销售', '奇瑞特..."
3,2019-V1,,8263654,"['我国绿地面积', '学校教育', '药店药房', '餐饮服务', '医疗服务', '公交...",
4,2019-V2,350.0,74081272,"[购物, 美食, 医疗保健, 运动健身, 基础设施, 休闲娱乐, 文化场馆, 生活服务, 地...","[其它购物, 小吃快餐, 中餐厅, 数码家电, 火锅, 诊所, 服饰鞋包, 农贸市场, 其它..."


### 2018

In [20]:
year = "2018"
folder = r"2018POI"
os.chdir(poi_path + folder)

In [32]:
file_list = os.listdir()
file_list[0]

'1540881779063.csv'

In [9]:
with open("1540884019261.csv", 'rb') as f:
    raw_data = f.read()
    result = chardet.detect(raw_data)
    encoding = result['encoding']
    print(f"Detected encoding: {encoding}")

Detected encoding: GB2312


In [33]:
poi_df = pd.read_csv('1540881779063.csv', encoding='GB2312',encoding_errors='ignore', on_bad_lines='warn',quoting=csv.QUOTE_NONE)
poi_df

Unnamed: 0,address,adname,page_publish_time,adcode,pname,cityname,name,location,_id,type,Unnamed: 10
0,万源市,万源市,2017-06-19 16:19:35,511781,四川省,达州市,营盘,108.354324，31.804957,m#20170619#e6e880990f667fb56f640a93fa939382,地名地址信息;普通地名;村庄级地名,
1,平桂区,平桂区,2017-06-19 13:39:07,451103,广西壮族自治区,贺州市,白},111.261630，23.980339,7#20170619#773319260bad90aa4d61536a9acd70ed,地名地址信息;普通地名;村庄级地名,
2,中山大道中138号汇勤楼4楼,天河区,2017-06-19 10:11:31,440106,广东省,广州市,广州书韵文化传播有限公司,113.400530，23.124350,w#20170619#17d637d18e214d0970088b9e996b14ae,科教文化服务;科教文化场所;科教文化场所,
3,谷里街道振容路79号,江宁区,2017-06-18 18:49:56,320115,江苏省,南京市,世纪华联超市(谷里店),118.694965，31.867626,6#20170618#5f6aea29355515fec16f1c86a3b41540,购物服务;便民商店/便利店;便民商店/便利店,
4,纯阳宫路3号附近,迎泽区,2017-06-18 13:31:10,140106,山西省,太原市,少儿艺术团操招生,112.572522，37.863343,1#20170618#68fe4683a42e54c5aff2edb99f1196c0,生活服务;生活服务场所;生活服务场所,
...,...,...,...,...,...,...,...,...,...,...,...
999995,苍梧县,苍梧县,2017-06-17 19:43:52,450421,广西壮族自治区,梧州市,丹竹,111.166856，23.821681,7#20170617#e125d2269374978be9bc394b2a3be5e9,地名地址信息;普通地名;村庄级地名,
999996,兴阳路779号,城阳区,2017-06-19 03:05:36,370214,山东省,青岛市,金得利商社,120.381582，36.292821,q#20170619#55984ec612ab965f4b733a9246750c0c,购物服务;购物相关场所;购物相关场所,
999997,环县,环县,2017-06-19 19:38:15,621022,甘肃省,庆阳市,后台,107.026178，36.769612,o#20170619#b38ba32b19f234acccb7fb99c697d5d1,地名地址信息;普通地名;村庄级地名,
999998,茶山镇增渡扯,东莞市,2017-06-17 18:49:02,441900,广东省,东莞市,绿杨工业大厦,113.860435，23.049770,q#20170617#3d6d22b700a821b2b5e6612c7e8dff0b,商务住宅;楼宇;工业大厦建筑物,


In [35]:

# Initialize lists to store results
city_list = []
number_list = []
poi_file_list = []
# Use sets for class1_list and class2_list to efficiently manage unique elements
class1_set = set()
class2_set = set()

for file in tqdm(file_list):
    try:    
        # Load the file
        poi_df = pd.read_csv(file, encoding='GB2312', encoding_errors='ignore', on_bad_lines='skip')
    
    except Exception as e:
        poi_df = pd.read_csv(file, encoding='GB2312', encoding_errors='ignore', on_bad_lines='skip',quoting=csv.QUOTE_NONE)
        
    # Split 'type' into 'class1' and 'class2'
    class_split_columns = poi_df["type"].str.split(';', expand=True)
    poi_df["class1"] = class_split_columns[0]
    poi_df["class2"] = class_split_columns[1]

    # Update class sets with unique elements
    class1_set.update(poi_df["class1"].dropna().unique())
    class2_set.update(poi_df["class2"].dropna().unique())

    # Aggregate counts by city
    city_counts = poi_df.groupby("cityname").size().reset_index(name='count')

    # Append results
    city_list.extend(city_counts["cityname"])
    number_list.extend(city_counts["count"])
    poi_file_list.extend([os.path.join(folder, file)] * len(city_counts))
    


# Convert class sets back to lists if necessary
class1_list = list(class1_set)
class2_list = list(class2_set)


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 67/67 [07:29<00:00,  6.71s/it]


In [40]:
unique_values = set(poi_file_list)
unique_list = list(unique_values)

print(len(unique_list))

67


In [43]:
summary_year = pd.DataFrame(columns=["city","number", "path"])


summary_year[ "city"] = city_list
summary_year[ "number"] = number_list
summary_year[ "path"] = poi_file_list

summary_year = summary_year.groupby('city').agg({ 
    'number': "sum",
    'path': ';'.join
}).reset_index()

summary_year

Unnamed: 0,city,number,path
0,七台河市,33714,2018POI/1540881779063.csv;2018POI/154088060985...
1,万宁市,15929,2018POI/1540881779063.csv;2018POI/154088060985...
2,三亚市,75192,2018POI/1540881779063.csv;2018POI/154088060985...
3,三明市,130234,2018POI/1540881779063.csv;2018POI/154088060985...
4,三沙市,436,2018POI/1540881779063.csv;2018POI/154088060985...
...,...,...,...
369,黔东南苗族侗族自治州,138192,2018POI/1540881779063.csv;2018POI/154088060985...
370,黔南布依族苗族自治州,118783,2018POI/1540881779063.csv;2018POI/154088060985...
371,黔西南布依族苗族自治州,77231,2018POI/1540881779063.csv;2018POI/154088060985...
372,齐齐哈尔市,151664,2018POI/1540881779063.csv;2018POI/154088060985...


In [44]:
# Applying the custom match for each row in df_left to find matches in df_right
summary_year['MID_m'] = summary_year['city'].apply(lambda x: find_match(x, geomaster))

summary_year = summary_year[["MID_m","city","number", "path"]]
summary_year = summary_year.sort_values(by = "MID_m")
summary_year

Unnamed: 0,MID_m,city,number,path
4,1,三沙市,436,2018POI/1540881779063.csv;2018POI/154088060985...
183,1,板,1,2018POI/1540884339286.csv
333,2,锡林郭勒盟,111477,2018POI/1540881779063.csv;2018POI/154088060985...
346,3,阿坝藏族羌族自治州,65635,2018POI/1540881779063.csv;2018POI/154088060985...
82,4,哈尔滨市,495872,2018POI/1540881779063.csv;2018POI/154088060985...
...,...,...,...,...
357,367,香港特别行政区,408297,2018POI/1540881779063.csv;2018POI/154088060985...
12,368,中山市,174079,2018POI/1540881779063.csv;2018POI/154088060985...
167,370,昆玉市,395,2018POI/1540880609855.csv;2018POI/154088446952...
283,373,莱芜市,59303,2018POI/1540881779063.csv;2018POI/154088060985...


In [47]:
summary_year = summary_year.loc[summary_year['number'] != 1]

In [48]:
##check if anycity are not matched with masterid
summary_year[pd.isna(summary_year['MID_m'])]

Unnamed: 0,MID_m,city,number,path


In [49]:
summary_year.to_csv(save_path + "summary_2018.csv",encoding = "utf-8-sig",index = False)

In [52]:
summary_all_2019_V2 = pd.read_csv(save_path + "summary_all.csv")

summary_all_2018 = append_row(summary_all_2019_V2,"2018",summary_year.shape[0],sum(number_list),class1_list,class2_list)
summary_all_2018.to_csv(save_path + "summary_all.csv",encoding = "utf-8-sig",index = False)
summary_all_2018

Unnamed: 0,year,city,number,class1,class2
0,2022,370.0,41502251,"['公司企业', '生活服务', '旅游景点', '酒店住宿', '餐饮美食', '购物消费...","['公司', '公厕', '宗教', '旅馆', '其他', '公园', '邮局', '小学..."
1,2021,340.0,71203623,"['地名地址信息', '科教文化服务', '风景名胜', '政府机构及社会团体', '住宿服...","['普通地名', '学校', '风景名胜', '自然地名', '政府机关', '交通地名',..."
2,2020,367.0,48373239,"['汽车销售', '政府机构及社会团体', '商务住宅', '餐饮服务', '购物服务', ...","['汽车销售', '标致雪铁龙特约销售', '通用特约销售', '福特特约销售', '奇瑞特..."
3,2019-V1,,8263654,"['我国绿地面积', '学校教育', '药店药房', '餐饮服务', '医疗服务', '公交...",
4,2019-V2,350.0,74081272,"['购物', '美食', '医疗保健', '运动健身', '基础设施', '休闲娱乐', '...","['其它购物', '小吃快餐', '中餐厅', '数码家电', '火锅', '诊所', '服..."
5,2018,373.0,65187245,"[科教文化服务, 医疗保健服务, 室内设施, 风景名胜, 通行设施, 地名地址信息, 住宿服...","[中国重汽维修, 休闲场所, 吉利特约维修, 飞机场, 科教文化场所, 楼宇, 金融保险服务..."
