In [5]:
import numpy as np
import matplotlib.pyplot as plt
import glog as log
 
from sklearn.cluster import DBSCAN  # 进行DBSCAN聚类
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import silhouette_score ,calinski_harabasz_score,davies_bouldin_score # 计算 轮廓系数，CH 指标，DBI 
 
 
# 定义一个进行DBSCAN的函数
def DBSCAN_Cluster(embedding_image_feats):
    """
    dbscan cluster
    :param embedding_image_feats:  # 比如形状是（9434,4）表示9434个像素点
    :return:
    """
    db = DBSCAN(eps=0.35, min_samples=600)
    try:
        features = StandardScaler().fit_transform(embedding_image_feats)  # 将特征进行归一化
        db.fit(features)
    except Exception as err:
        log.error(err)   
        ret = {
            'origin_features': None,
            'cluster_nums': 0,
            'db_labels': None,
            'cluster_center': None
            }
        return ret
 
    db_labels = db.labels_                  # 获取聚类之后没一个样本的类别标签
    unique_labels = np.unique(db_labels)    # 获取唯一的类别
 
    num_clusters = len(unique_labels)
    cluster_centers = db.components_
 
    ret = {
            'origin_features': features,      #(9434,4)
            'cluster_nums': num_clusters,     # 5  它是一个标量，表示5类，包含背景
            'db_labels': db_labels,           #(9434,)
            'unique_labels': unique_labels,   #(5,)
            'cluster_center': cluster_centers #(6425,4)
        }
 
    return ret
 
# 画出聚类之后的结果
def plot_dbscan_result(features,db_labels,unique_labels,num_clusters):
    colors = plt.cm.Spectral(np.linspace(0, 1, len(unique_labels)))
    for k, color in zip(unique_labels, colors):
        if k == -1:
           color = 'k'  # 黑色的，这代表噪声点
 
        index=np.where(db_labels==k)   #  获取每一个类别的索引位置
        x=features[index]
 
        plt.plot(x[:, 0], x[:, 1], 'o', markerfacecolor=color,markeredgecolor='k', markersize=6)
 
    plt.title('Estimated number of clusters: %d' % num_clusters)
    plt.show()
 
 
if __name__=='__main__':
    embedding_features=np.load("./tools/features_logits/lane_embedding_feats.npy")  # 导入数据，数据格式为（samples，）
 
    ret=DBSCAN_Cluster(embedding_features)  # 进行 DBSCAN聚类
 
    plot_dbscan_result(ret['origin_features'],ret['db_labels'],ret['unique_labels'],ret['cluster_nums']) # 展示聚类之后的结果
     
        
        #silhouette_score (数据，生成的标签，指标)
    
    s1=silhouette_score(embedding_features, ret['db_labels'], metric='euclidean') # 计算轮廓系数
    s2=calinski_harabasz_score(embedding_features,ret['db_labels']) # 计算CH score
    s3=davies_bouldin_score(embedding_features,ret['db_labels'])    # 计算 DBI
 
    print(s1)
    print(s2)
    print(s3)
 
'''运行结果为：
0.7971864
48375.80213812995
0.8799878743935938
'''

FileNotFoundError: [Errno 2] No such file or directory: './tools/features_logits/lane_embedding_feats.npy'

In [4]:
!pip install glog

Looking in indexes: https://pypi.tuna.tsinghua.edu.cn/simple
