### 1. 载入数据

In [11]:
import numpy as np

data_list = []

with open("22830134_latitude_longitude.txt", "r") as f:
    for line in f.readlines():
        data_list.append([float(i) for i in line.split()])

X = np.array(data_list)

### 2. sklearn dbscan

#### 2.1 fit

In [170]:
from sklearn.cluster import DBSCAN

clustering = DBSCAN(eps=3, min_samples=2).fit(X)

np.argwhere(clustering.labels_ == 1)


array([[231],
       [299]])

#### 2.2 predict

In [57]:
mean_position_a = np.mean(X[clustering.labels_ == 0], axis=0)
mean_position_b = np.mean(X[clustering.labels_ == 1], axis=0)

{"a": mean_position_a, "b": mean_position_b}

{'a': array([ 31.20870031, 121.59304649]),
 'b': array([ 36.6565035, 118.3938755])}

### 3. dbscan class

In [174]:
class DBSCAN(object):
    def __init__(self, eps=0.5, min_samples=5):
        """ 初始化
        
        arguments:
        ----------
        eps: float
        min_sample: int
        """
        self._eps = eps
        self._min_samples = min_samples
        self._labels = None
    
    def fit(self, X):
        """ 预测
        """
        # 样本X的尺寸
        samples_num, features_num = X.shape
        
        # 两两之间计算距离, 同时寻找核心对象集合
        core_points_list = []
        mutual_distance = np.zeros((samples_num, samples_num))
        neighbor_flag = np.zeros((samples_num, samples_num))
        for inx, x in enumerate(X):
            pos = np.sqrt(np.sum((x - X) * (x - X), axis=1))
            mutual_distance[inx] = pos
            neighbor_flag[inx] = pos < self._eps
            if np.sum(pos < self._eps) >= self._min_samples:
                core_points_list.append(inx)
        
        # 以任意核心对象为出发点，找到由其密度可达的样本生成样本簇
        labels = 0
        already_point_set = set() # 存储已经遍历过的点
        self._labels = np.ones(samples_num) * (-1)
        while len(core_points_list) != 0:
            # 随机选择一个核心对象加入到 queue 中，以它为出发点，寻找密度可达的样本
            queue = [] # 遍历时使用的队列
            class_list = [] # 将密度可达的样本存放进去，形成一类簇
            core_point = int(np.random.choice(core_points_list, 1)) # 随机选择一个核心对象
            queue.append(core_point) # 将核心对象添加进入队列中，准备遍历
            core_points_list.remove(core_point) # 将该核心对象删除掉
            
            while len(queue) != 0:
                point = queue.pop(0)
                class_list.append(point)
                already_point_set.add(point)
                
                # 寻找从 point 出发密度直达的点
                neighbor_point = list(np.argwhere(neighbor_flag[point] == 1).squeeze())
                # 去除掉已经遍历过的点
                retD = list(set(neighbor_point).difference(already_point_set))
                # 将这些点添加进入类簇列表中
                class_list.extend(retD)
                # 将这些点添加进已经遍历过的集合中
                already_point_set = already_point_set.union(set(retD))
                # 将找到的点中，属于核心对象的点，存放到 queue 中
                for i in retD:
                    if i in core_points_list:
                        queue.append(i)
                        core_points_list.remove(i)
                
            # 将找好的一类归类
            self._labels[class_list] = labels
            labels = labels + 1

In [175]:
dbscan = DBSCAN(eps=3, min_samples=2)
dbscan.fit(X)
np.argwhere(dbscan._labels == 1)

array([[231],
       [299]])

In [176]:
mean_position_a = np.mean(X[dbscan._labels == 0], axis=0)
mean_position_b = np.mean(X[dbscan._labels == 1], axis=0)

{"a": mean_position_a, "b": mean_position_b}

{'a': array([ 31.20870031, 121.59304649]),
 'b': array([ 36.6565035, 118.3938755])}

### 4. baidu map api

In [185]:
import requests 

ak = "CBqb0vQAWP1WG8TvNyaBGh0DRvQAkZUW"

url = "http://api.map.baidu.com/geocoder/v2/?callback=renderReverse&location=%s&output=json&pois=1&ak=%s" % ("31.20870031,121.59304649", ak)

with requests.Session() as sess:
    resp = sess.get(url)
    print(resp.text)

renderReverse&&renderReverse({"status":0,"result":{"location":{"lng":121.59304648999994,"lat":31.208700173642275},"formatted_address":"上海市浦东新区松涛路696号","business":"张江","addressComponent":{"country":"中国","country_code":0,"country_code_iso":"CHN","country_code_iso2":"CN","province":"上海市","city":"上海市","city_level":2,"district":"浦东新区","town":"","adcode":"310115","street":"松涛路","street_number":"696号","direction":"东","distance":"63"},"pois":[{"addr":"上海市浦东张江祖冲之路555号","cp":"","direction":"内","distance":"0","name":"中国科学院上海药物研究所","poiType":"教育培训","point":{"x":121.5936905900105,"y":31.20883883922049},"tag":"教育培训;科研机构","tel":"","uid":"d146b388dfa661df74026945","zip":"","parent_poi":{"name":"","tag":"","addr":"","point":{"x":0.0,"y":0.0},"direction":"","distance":"","uid":""}},{"addr":"上海市浦东新区张江高科技园区中国科学院上海药物研究所","cp":" ","direction":"附近","distance":"16","name":"生命科学图书馆(浦东分馆)","poiType":"教育培训","point":{"x":121.59319652198,"y":31.20872301864032},"tag":"教育培训;图书馆","tel":"","uid":"f4a33da9b9abe3773010a

In [186]:
import requests 

ak = "CBqb0vQAWP1WG8TvNyaBGh0DRvQAkZUW"

url = "http://api.map.baidu.com/geocoder/v2/?callback=renderReverse&location=%s&output=json&pois=1&ak=%s" % ("36.6565035, 118.3938755", ak)

with requests.Session() as sess:
    resp = sess.get(url)
    print(resp.text)

renderReverse&&renderReverse({"status":0,"result":{"location":{"lng":118.39387549999992,"lat":36.65650341318585},"formatted_address":"山东省潍坊市青州市","business":"","addressComponent":{"country":"中国","country_code":0,"country_code_iso":"CHN","country_code_iso2":"CN","province":"山东省","city":"潍坊市","city_level":2,"district":"青州市","town":"","adcode":"370781","street":"","street_number":"","direction":"","distance":""},"pois":[{"addr":"山东省潍坊市青州市王府街道富班村","cp":" ","direction":"东","distance":"766","name":"富班村","poiType":"行政地标","point":{"x":118.3870543263177,"y":36.655757648070437},"tag":"行政地标;村庄","tel":"","uid":"25ec69fc9974947cc26c1435","zip":"","parent_poi":{"name":"","tag":"","addr":"","point":{"x":0.0,"y":0.0},"direction":"","distance":"","uid":""}},{"addr":"潍坊市青州市三二五省道","cp":" ","direction":"东南","distance":"958","name":"辛店子村","poiType":"行政地标","point":{"x":118.38742263157678,"y":36.661091974621609},"tag":"行政地标;村庄","tel":"","uid":"2a7a25ec3957d3626c3e1b35","zip":"","parent_poi":{"name":"","tag":"