In [5]:
#The imports
from sklearn.cluster import DBSCAN
import numpy as np
import pandas as pd

In [8]:
#Read the parameters of the fitted nested variogram structures, which were saved as a GEO-EAS point set.
# 1st column: parameter a (main range)
# 2nd column: parameter b/a (minor/main ranges ratio)
# 3rd column: parameter theta (azimuth)
# 4th column: parameter cc (contribution)

varParameters = pd.read_csv("Scat_V1_smth_pls_noise_GA_FIM_408.xyz", sep='\t', skiprows=0)
print(varParameters.values.shape)

numberOfClusters = 2


(408, 4)


In [38]:
#Perform the clustering using only range, ranges ratio and azimuth (does not use contribution)
#clusteringData = SpectralClustering(n_clusters=numberOfClusters, 
#                                    assign_labels="discretize", 
#                                    random_state=0).fit( varParameters.values[:,:3] )

#Perform the clustering using all variables
clusteringData = DBSCAN(eps=0.5).fit( varParameters )

In [39]:
#Stores the mean parameters for each cluster
rangeMeans       = np.zeros( numberOfClusters )
rangeRatioMeans  = np.zeros( numberOfClusters )
azimuthMeans     = np.zeros( numberOfClusters )
ccMeans          = np.zeros( numberOfClusters )

#The counts of samples for each cluster 
sampleCounts     = np.zeros( numberOfClusters )

#Accumulate the sums of parameters per cluster for the computation of the means (cluster centers)
for iRow in range(0, clusteringData.labels_.shape[0] ) :
    rangeV                 = varParameters.values[ iRow, 0 ]
    rangeRatio             = varParameters.values[ iRow, 1 ]
    azimuth                = varParameters.values[ iRow, 2 ]
    covarianceContribution = varParameters.values[ iRow, 3 ]
    
    clusterId = clusteringData.labels_[ iRow ]
    
    rangeMeans     [ clusterId ] += rangeV 
    rangeRatioMeans[ clusterId ] += rangeRatio 
    azimuthMeans   [ clusterId ] += azimuth 
    ccMeans        [ clusterId ] += covarianceContribution 

    sampleCounts   [ clusterId ] += 1
   
#Compute the means of parameters per cluster (cluster centers)
for iCluster in range(0, numberOfClusters) :
    rangeMeans     [ iCluster ] /= sampleCounts [ iCluster ] 
    rangeRatioMeans[ iCluster ] /= sampleCounts [ iCluster ] 
    azimuthMeans   [ iCluster ] /= sampleCounts [ iCluster ] 
    ccMeans        [ iCluster ] /= sampleCounts [ iCluster ] 
    
    print("Cluster", iCluster+1, ":")
    print("  a=", rangeMeans[iCluster], 
          "b/a=", rangeRatioMeans[iCluster], 
          "az=", azimuthMeans[iCluster], 
          "cc=", ccMeans[iCluster])
 


Cluster 1 :
  a= 20.8889074041 b/a= 0.4279050447096 az= 62.35645619068 cc= 1.450642427874
Cluster 2 :
  a= 10.967378125345556 b/a= 0.3085486301846454 az= 81.30262464483322 cc= 1.4610126309873446


In [None]:
#Code to plot the clusters
#print(__doc__)

#import numpy as np

#from sklearn.cluster import DBSCAN
#from sklearn import metrics
#from sklearn.datasets import make_blobs
#from sklearn.preprocessing import StandardScaler


# #############################################################################
# Generate sample data
#centers = [[1, 1], [-1, -1], [1, -1]]
#X, labels_true = make_blobs(n_samples=750, centers=centers, cluster_std=0.4,
                            random_state=0)

#X = StandardScaler().fit_transform(X)

# #############################################################################
# Compute DBSCAN
#db = DBSCAN(eps=0.3, min_samples=10).fit(X)
#core_samples_mask = np.zeros_like(db.labels_, dtype=bool)
#core_samples_mask[db.core_sample_indices_] = True
#labels = db.labels_

# Number of clusters in labels, ignoring noise if present.
#n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
#n_noise_ = list(labels).count(-1)

#print('Estimated number of clusters: %d' % n_clusters_)
#print('Estimated number of noise points: %d' % n_noise_)
#print("Homogeneity: %0.3f" % metrics.homogeneity_score(labels_true, labels))
#print("Completeness: %0.3f" % metrics.completeness_score(labels_true, labels))
#print("V-measure: %0.3f" % metrics.v_measure_score(labels_true, labels))
#print("Adjusted Rand Index: %0.3f"
#      % metrics.adjusted_rand_score(labels_true, labels))
#print("Adjusted Mutual Information: %0.3f"
#      % metrics.adjusted_mutual_info_score(labels_true, labels))
#print("Silhouette Coefficient: %0.3f"
#      % metrics.silhouette_score(X, labels))

# #############################################################################
# Plot result
#import matplotlib.pyplot as plt

# Black removed and is used for noise instead.
#unique_labels = set(labels)
#colors = [plt.cm.Spectral(each)
#          for each in np.linspace(0, 1, len(unique_labels))]
#for k, col in zip(unique_labels, colors):
#    if k == -1:
        # Black used for noise.
#        col = [0, 0, 0, 1]

#    class_member_mask = (labels == k)

#    xy = X[class_member_mask & core_samples_mask]
#    plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=tuple(col),
#             markeredgecolor='k', markersize=14)

#    xy = X[class_member_mask & ~core_samples_mask]
#    plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=tuple(col),
#             markeredgecolor='k', markersize=6)

#plt.title('Estimated number of clusters: %d' % n_clusters_)
#plt.show()