Run K_means for several k and print contingency matrix

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.cluster import KMeans
from sklearn import metrics

import os, glob, inspect, sys


currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(currentdir)
sys.path.insert(0,parentdir) 
import epri_mc_lib as mc
from importlib import reload
reload(mc)

### Import Data

In [None]:
data_path = "../../Data/Merged_data"
df = pd.read_csv(os.path.join(data_path, 'ALL_TUBE_PIPE_simulated.csv'), 
                 index_col=0)


### Calculating new values

In [None]:
df['AUC_avg'] = mc.findAUC(df, A=df['A'], B=df['B'], p=df['p'], name='AUC_avg')
df.drop(columns=["A","B","p"],inplace=True)

In [None]:
df['CF_perm'] = df['mean_CF']/df['mean_perm'].astype('float64')
df.drop(columns=["mean_MBN","mean_perm","mean_CF"],inplace=True)

### Scaling Values

In [None]:
scaled_df = mc.scale_general(df, MinMaxScaler())[0]


### Selecting sub samples

In [None]:
tube, pipe, tube_wo_blind, tube_blind = mc.get_subsample_df(scaled_df)

## Clustering

### Elbow Method

In [None]:
min_range = 2
max_range = 8

def plot_elbow_kmeans(feat_norm, title):
    '''
    Elbow plot
    Args:
    - feat_norm : pandas dataframe
    - title : title of the figure ideally correpond to the samples
    return plot
    '''
    
    inertia = []
    k_list = range(min_range, max_range+1)

    for k in k_list:
        km = KMeans(n_clusters = k, random_state= 0)
        km.fit(feat_norm) 
        score = km.inertia_
        inertia.append(score)


    plt.figure(1 , figsize = (10 ,6))
    plt.plot(np.arange(min_range , max_range+1) , inertia , 'o')
    plt.plot(np.arange(min_range , max_range+1) , inertia , '-' , alpha = 0.5)

    plt.xlabel('Number of Clusters', fontsize=20) , plt.ylabel('Inertia', fontsize=20)
    plt.title(title, fontsize=20)
    plt.show()

In [None]:
# tubes
plot_elbow_kmeans(tube, title='Tubes simulated')

In [None]:
# tubes
plot_elbow_kmeans(tube_wo_blind, title='Tubes labelled simulated')

In [None]:
# tubes
plot_elbow_kmeans(tube_blind, title='Tubes Blind simulated')

In [None]:
def calculate_wcss(data):
    '''
    Calculate within class sum-squared value which represents loss in KMeans clustering
    '''
    wcss = []
    for n in range(min_range, max_range):
        kmeans = KMeans(n_clusters=n,random_state=42)
        kmeans.fit(data)
        wcss.append(kmeans.inertia_)
    
    return wcss

from math import sqrt

def optimal_number_of_clusters(wcss):
    '''
    Calculate normal distance 
    '''
    x1, y1 = min_range, wcss[0]
    x2, y2 = max_range, wcss[len(wcss)-1]

    distances = []
    for i in range(len(wcss)):
        x0 = i+2
        y0 = wcss[i]
        numerator = abs((y2-y1)*x0 - (x2-x1)*y0 + x2*y1 - y2*x1)
        denominator = sqrt((y2 - y1)**2 + (x2 - x1)**2)
        distances.append(numerator/denominator)
    
    return distances.index(max(distances)) + 2

In [None]:
# calculating the within clusters sum-of-squares for n cluster amounts
sum_of_squares = calculate_wcss(tube)
    
# calculating the optimal number of clusters
n = optimal_number_of_clusters(sum_of_squares)
print('Number of cluster =', n)

In [None]:
# calculating the within clusters sum-of-squares for n cluster amounts
sum_of_squares = calculate_wcss(tube_wo_blind)
    
# calculating the optimal number of clusters
n = optimal_number_of_clusters(sum_of_squares)
print('Number of cluster =', n)

In [None]:
# calculating the within clusters sum-of-squares for n cluster amounts
sum_of_squares = calculate_wcss(tube_blind)
    
# calculating the optimal number of clusters
n = optimal_number_of_clusters(sum_of_squares)
print('Number of cluster =', n)

In [None]:
from sklearn.metrics.cluster import contingency_matrix
#test wo_blind sample:
model = KMeans(n_clusters=2, random_state= 42)
model.fit(tube_wo_blind.iloc[:, :]) 
labels_2 = model.predict(tube_wo_blind)
silhouette = metrics.silhouette_score(tube_wo_blind, labels_2, metric='euclidean')
print(silhouette)
ch_score=metrics.calinski_harabasz_score(tube_wo_blind, labels_2)
print(ch_score)
DB_score=metrics.davies_bouldin_score(tube_wo_blind, labels_2)
print(DB_score)
c_mat=contingency_matrix(tube_wo_blind.index, labels_2)
print(c_mat)
#blind_labels = model.predict(tube_blind)

In [None]:
labeled_df_2 = pd.DataFrame()

In [None]:
labeled_df_2['sample'] = tube_wo_blind.index
labeled_df_2['labels'] = labels_2

In [None]:
labeled_df_2.value_counts()

From the contingency matrix and the value counts we can infer that there are 2 clusters grouped as
 - cluster 0 = T_T, T_OT, T_N_T, T_HAZ_T, T_FF, T_AR
 - cluster 1 = T_N, T_HAZ

In [None]:
from sklearn.metrics.cluster import contingency_matrix
#test wo_blind sample:
model = KMeans(n_clusters=3, random_state= 42)
model.fit(tube_wo_blind.iloc[:, :]) 
labels_3 = model.predict(tube_wo_blind)
silhouette = metrics.silhouette_score(tube_wo_blind, labels_3, metric='euclidean')
print(silhouette)
ch_score=metrics.calinski_harabasz_score(tube_wo_blind, labels_3)
print(ch_score)
DB_score=metrics.davies_bouldin_score(tube_wo_blind, labels_3)
print(DB_score)
c_mat=contingency_matrix(tube_wo_blind.index, labels_3)
print(c_mat)
#blind_labels = model.predict(tube_blind)

In [None]:
labeled_df_3 = pd.DataFrame()

In [None]:
labeled_df_3['sample'] = tube_wo_blind.index
labeled_df_3['labels'] = labels_3

In [None]:
labeled_df_3.value_counts()

From the contingency matrix and the value counts we can infer that there are 3 clusters grouped as

-   cluster 0 = T_N_T, T_FF
-   cluster 1 = T_N, T_HAZ
-   cluster 2 = T_T, T_OT, T_AR, T_HAZ_T

In [None]:
from sklearn.metrics.cluster import contingency_matrix
#test wo_blind sample:
model = KMeans(n_clusters=4, random_state= 42)
model.fit(tube_wo_blind.iloc[:, :]) 
labels_4 = model.predict(tube_wo_blind)
silhouette = metrics.silhouette_score(tube_wo_blind, labels_4, metric='euclidean')
print(silhouette)
ch_score=metrics.calinski_harabasz_score(tube_wo_blind, labels_4)
print(ch_score)
DB_score=metrics.davies_bouldin_score(tube_wo_blind, labels_4)
print(DB_score)
c_mat=contingency_matrix(tube_wo_blind.index, labels_4)
print(c_mat)
#blind_labels = model.predict(tube_blind)

In [None]:
labeled_df_4 = pd.DataFrame()

In [None]:
labeled_df_4['sample'] = tube_wo_blind.index
labeled_df_4['labels'] = labels_4

In [None]:
labeled_df_4.value_counts()

From the contingency matrix and the value counts we can infer that there are 4 clusters grouped as

-    cluster 0 = T_FF, T_N_T
-    cluster 1 = T_N, T_HAZ
-    cluster 2 = T_OT
-    cluster 3 = T_T, T_AR, T_HAZ_T

In [None]:
from sklearn.metrics.cluster import contingency_matrix
#test wo_blind sample:
model = KMeans(n_clusters=5, random_state= 42)
model.fit(tube_wo_blind.iloc[:, :]) 
labels_5 = model.predict(tube_wo_blind)
silhouette = metrics.silhouette_score(tube_wo_blind, labels_5, metric='euclidean')
print(silhouette)
ch_score=metrics.calinski_harabasz_score(tube_wo_blind, labels_5)
print(ch_score)
DB_score=metrics.davies_bouldin_score(tube_wo_blind, labels_5)
print(DB_score)
c_mat=contingency_matrix(tube_wo_blind.index, labels_5)
print(c_mat)
#blind_labels = model.predict(tube_blind)

In [None]:
labeled_df_5 = pd.DataFrame()

In [None]:
labeled_df_5['sample'] = tube_wo_blind.index
labeled_df_5['labels'] = labels_5

In [None]:
labeled_df_5.value_counts()

From the contingency matrix and the value counts we can infer that there are 5 clusters grouped as

-    cluster 0 = T_FF, T_N_T
-    cluster 1 = T_HAZ 
-    cluster 2 = T_AR, T_HAZ_T, T_T
-    cluster 3 = T_N
-    cluster 4 = T_OT

In [None]:
from sklearn.metrics.cluster import contingency_matrix
#test wo_blind sample:
model = KMeans(n_clusters=6, random_state= 42)
model.fit(tube_wo_blind.iloc[:, :]) 
labels_6 = model.predict(tube_wo_blind)
silhouette = metrics.silhouette_score(tube_wo_blind, labels_6, metric='euclidean')
print(silhouette)
ch_score=metrics.calinski_harabasz_score(tube_wo_blind, labels_6)
print(ch_score)
DB_score=metrics.davies_bouldin_score(tube_wo_blind, labels_6)
print(DB_score)
c_mat=contingency_matrix(tube_wo_blind.index, labels_6)
print(c_mat)
#blind_labels = model.predict(tube_blind)

In [None]:
labeled_df_6 = pd.DataFrame()

In [None]:
labeled_df_6['sample'] = tube_wo_blind.index
labeled_df_6['labels'] = labels_6

In [None]:
labeled_df_6.value_counts()

From the contingency matrix and the value counts we can infer that there are 6 clusters grouped as

-    cluster 0 = T_FF
-    cluster 1 = T_AR, T_HAZ_T, T_T
-    cluster 2 = T_HAZ
-    cluster 3 = T_N_T
-    cluster 4 = T_N
-    cluster 5 = T_OT

In [None]:
from sklearn.metrics.cluster import contingency_matrix
#test wo_blind sample:
model = KMeans(n_clusters=7, random_state= 42)
model.fit(tube_wo_blind.iloc[:, :]) 
labels_7 = model.predict(tube_wo_blind)
silhouette = metrics.silhouette_score(tube_wo_blind, labels_7, metric='euclidean')
print(silhouette)
ch_score=metrics.calinski_harabasz_score(tube_wo_blind, labels_7)
print(ch_score)
DB_score=metrics.davies_bouldin_score(tube_wo_blind, labels_7)
print(DB_score)
c_mat=contingency_matrix(tube_wo_blind.index, labels_7)
print(c_mat)
#blind_labels = model.predict(tube_blind)

In [None]:
labeled_df_7 = pd.DataFrame()

In [None]:
labeled_df_7['sample'] = tube_wo_blind.index
labeled_df_7['labels'] = labels_7

In [None]:
labeled_df_7.value_counts()

From the contingency matrix and the value counts we can infer that there are 7 clusters grouped as

-    cluster 0 = T_N_T 
-    cluster 1 = T_N
-    cluster 2 = T_AR, T_HAZ_T, T_T
-    cluster 3 = T_AR, T_HAZ_T, T_T
-    cluster 4 = T_HAZ
-    cluster 5 = T_FF
-    cluster 6 = T_OT

If we compare the clusters of 6 and 7 the clustering is the same with no further improvement. Hence, it would be appropriate to stop at cluster number 6 for labelled tube samples.

In [None]:
from sklearn.metrics.cluster import contingency_matrix
#test wo_blind sample:
model = KMeans(n_clusters=8, random_state= 42)
model.fit(tube_wo_blind.iloc[:, :]) 
labels_8 = model.predict(tube_wo_blind)
silhouette = metrics.silhouette_score(tube_wo_blind, labels_8, metric='euclidean')
print(silhouette)
ch_score=metrics.calinski_harabasz_score(tube_wo_blind, labels_8)
print(ch_score)
DB_score=metrics.davies_bouldin_score(tube_wo_blind, labels_8)
print(DB_score)
c_mat=contingency_matrix(tube_wo_blind.index, labels_8)
print(c_mat)
#blind_labels = model.predict(tube_blind)

In [None]:
labeled_df_8 = pd.DataFrame()

In [None]:
labeled_df_8['sample'] = tube_wo_blind.index
labeled_df_8['labels'] = labels_8

In [None]:
labeled_df_8.value_counts()

In [None]:
from sklearn.metrics.cluster import contingency_matrix
#test blind sample:
model = KMeans(n_clusters=2, random_state= 42)
model.fit(tube_blind.iloc[:, :]) 
labels_b_2 = model.predict(tube_blind)
silhouette = metrics.silhouette_score(tube_blind, labels_b_2, metric='euclidean')
print(silhouette)
ch_score=metrics.calinski_harabasz_score(tube_blind, labels_b_2)
print(ch_score)
DB_score=metrics.davies_bouldin_score(tube_blind, labels_b_2)
print(DB_score)
c_mat=contingency_matrix(tube_blind.index, labels_b_2)
print(c_mat)

In [None]:
labeled_df_b_2 = pd.DataFrame()

In [None]:
labeled_df_b_2['sample'] = tube_blind.index
labeled_df_b_2['labels'] = labels_b_2

In [None]:
labeled_df_b_2.value_counts()

From the contingency matrix and the value counts we can infer that there are 2 clusters grouped as

-    cluster 0 = T_B1, T_B2, T_B3, T_B5, T_B7, T_B8
-    cluster 1 = T_B4, T_B6

In [None]:
from sklearn.metrics.cluster import contingency_matrix
#test blind sample:
model = KMeans(n_clusters=3, random_state= 42)
model.fit(tube_blind.iloc[:, :]) 
labels_b_3 = model.predict(tube_blind)
silhouette = metrics.silhouette_score(tube_blind, labels_b_3, metric='euclidean')
print(silhouette)
ch_score=metrics.calinski_harabasz_score(tube_blind, labels_b_3)
print(ch_score)
DB_score=metrics.davies_bouldin_score(tube_blind, labels_b_3)
print(DB_score)
c_mat=contingency_matrix(tube_blind.index, labels_b_3)
print(c_mat)

In [None]:
labeled_df_b_3 = pd.DataFrame()

In [None]:
labeled_df_b_3['sample'] = tube_blind.index
labeled_df_b_3['labels'] = labels_b_3

In [None]:
labeled_df_b_3.value_counts()

From the contingency matrix and the value counts we can infer that there are 3 clusters grouped as

-    cluster 0 = T_B1, T_B3, T_B5, T_B8
-    cluster 1 = T_B4, T_B6
-    cluster 2 = T_B2, T_B7

In [None]:
from sklearn.metrics.cluster import contingency_matrix
#test blind sample:
model = KMeans(n_clusters=4, random_state= 42)
model.fit(tube_blind.iloc[:, :]) 
labels_b_4 = model.predict(tube_blind)
silhouette = metrics.silhouette_score(tube_blind, labels_b_4, metric='euclidean')
print(silhouette)
ch_score=metrics.calinski_harabasz_score(tube_blind, labels_b_4)
print(ch_score)
DB_score=metrics.davies_bouldin_score(tube_blind, labels_b_4)
print(DB_score)
c_mat=contingency_matrix(tube_blind.index, labels_b_4)
print(c_mat)

In [None]:
labeled_df_b_4 = pd.DataFrame()

In [None]:
labeled_df_b_4['sample'] = tube_blind.index
labeled_df_b_4['labels'] = labels_b_4

In [None]:
labeled_df_b_4.value_counts()

From the contingency matrix and the value counts we can infer that there are 4 clusters grouped as

-    cluster 0 = T_B1, T_B3, T_B5
-    cluster 1 = T_B4, T_B6
-    cluster 2 = T_B2, T_B7
-    cluster 3 = T_B8

In [None]:
from sklearn.metrics.cluster import contingency_matrix
#test blind sample:
model = KMeans(n_clusters=5, random_state= 42)
model.fit(tube_blind.iloc[:, :]) 
labels_b_5 = model.predict(tube_blind)
silhouette = metrics.silhouette_score(tube_blind, labels_b_5, metric='euclidean')
print(silhouette)
ch_score=metrics.calinski_harabasz_score(tube_blind, labels_b_5)
print(ch_score)
DB_score=metrics.davies_bouldin_score(tube_blind, labels_b_5)
print(DB_score)
c_mat=contingency_matrix(tube_blind.index, labels_b_5)
print(c_mat)

In [None]:
labeled_df_b_5 = pd.DataFrame()

In [None]:
labeled_df_b_5['sample'] = tube_blind.index
labeled_df_b_5['labels'] = labels_b_5

In [None]:
labeled_df_b_5.value_counts()

From the contingency matrix and the value counts we can infer that there are 5 clusters grouped as

-    cluster 0 = T_B1, T_B3, T_B5
-    cluster 1 = T_B4, T_B6
-    cluster 2 = T_B7
-    cluster 3 = T_B8
-    cluster 4 = T_B2

In [None]:
from sklearn.metrics.cluster import contingency_matrix
#test blind sample:
model = KMeans(n_clusters=6, random_state= 42)
model.fit(tube_blind.iloc[:, :]) 
labels_b_6 = model.predict(tube_blind)
silhouette = metrics.silhouette_score(tube_blind, labels_b_6, metric='euclidean')
print(silhouette)
ch_score=metrics.calinski_harabasz_score(tube_blind, labels_b_6)
print(ch_score)
DB_score=metrics.davies_bouldin_score(tube_blind, labels_b_6)
print(DB_score)
c_mat=contingency_matrix(tube_blind.index, labels_b_6)
print(c_mat)

In [None]:
labeled_df_b_6 = pd.DataFrame()

In [None]:
labeled_df_b_6['sample'] = tube_blind.index
labeled_df_b_6['labels'] = labels_b_6

In [None]:
labeled_df_b_6.value_counts()

From the contingency matrix and the value counts we can infer that there are 6 clusters grouped as

-    cluster 0 = T_B4
-    cluster 1 = T_B1, T_B3, T_B5
-    cluster 2 = T_B7
-    cluster 3 = T_B8
-    cluster 4 = T_B2
-    cluster 5 = T_B6

In [None]:
from sklearn.metrics.cluster import contingency_matrix
#test blind sample:
model = KMeans(n_clusters=7, random_state= 42)
model.fit(tube_blind.iloc[:, :]) 
labels_b_7 = model.predict(tube_blind)
silhouette = metrics.silhouette_score(tube_blind, labels_b_7, metric='euclidean')
print(silhouette)
ch_score=metrics.calinski_harabasz_score(tube_blind, labels_b_7)
print(ch_score)
DB_score=metrics.davies_bouldin_score(tube_blind, labels_b_7)
print(DB_score)
c_mat=contingency_matrix(tube_blind.index, labels_b_7)
print(c_mat)

In [None]:
labeled_df_b_7 = pd.DataFrame()

In [None]:
labeled_df_b_7['sample'] = tube_blind.index
labeled_df_b_7['labels'] = labels_b_7

In [None]:
labeled_df_b_7.value_counts()

In [None]:
from sklearn.metrics.cluster import contingency_matrix
#test blind sample:
model = KMeans(n_clusters=8, random_state= 42)
model.fit(tube_blind.iloc[:, :]) 
labels_b_8 = model.predict(tube_blind)
silhouette = metrics.silhouette_score(tube_blind, labels_b_8, metric='euclidean')
print(silhouette)
ch_score=metrics.calinski_harabasz_score(tube_blind, labels_b_8)
print(ch_score)
DB_score=metrics.davies_bouldin_score(tube_blind, labels_b_8)
print(DB_score)
c_mat=contingency_matrix(tube_blind.index, labels_b_8)
print(c_mat)

In [None]:
labeled_df_b_8 = pd.DataFrame()

In [None]:
labeled_df_b_8['sample'] = tube_blind.index
labeled_df_b_8['labels'] = labels_b_8

In [None]:
labeled_df_b_8.value_counts()

### Conclusion - I

From the above clustering patterns we can conclude that
- {T_B1, T_B3, T_B5} = {T_AR, T_HAZ_T, T_T} not mapped one to one
-  T_B8 = T_OT
- {T_B4, T_B6}= {T_N, T_HAZ}  not mapped one to one
- {T_B7, T_B2}= {T_FF, T_N_T} not mapped one to one

In [None]:
tube.shape

In [None]:
from sklearn.metrics.cluster import contingency_matrix
#test blind sample:
model = KMeans(n_clusters=6, random_state= 42)
model.fit(tube.iloc[:, :]) 
labels_tube = model.predict(tube)
silhouette = metrics.silhouette_score(tube, labels_tube, metric='euclidean')
print(silhouette)
ch_score=metrics.calinski_harabasz_score(tube, labels_tube)
print(ch_score)
DB_score=metrics.davies_bouldin_score(tube, labels_tube)
print(DB_score)
c_mat=contingency_matrix(tube.index, labels_tube)
print(c_mat)

In [None]:
labeled_df_tube = pd.DataFrame()

In [None]:
labeled_df_tube['sample'] = tube.index
labeled_df_tube['labels'] = labels_tube

In [None]:
labeled_df_tube.value_counts()

### Conclusion - II

From the above clustering pattern we can  further classify 

-    {T_B1, T_B3, T_B5} = {T_AR, T_HAZ_T, T_T} not mapped one to one
-    T_B8 = T_OT
-    T_B4 = T_N 
-    T_B6 = T_HAZ
-    {T_B7, T_B2}= {T_FF, T_N_T} not mapped one to one
-    T_B2 more likely to be T_FF and T_B7 to be T_N_T

In [None]:
def bar_chart(feature):
    sample_1=labeled_df_tube[labeled_df_tube['sample']=='T_AR'][feature].value_counts()
    sample_2=labeled_df_tube[labeled_df_tube['sample']=='T_FF'][feature].value_counts()
    sample_3=labeled_df_tube[labeled_df_tube['sample']=='T_HAZ_T'][feature].value_counts()
    sample_4=labeled_df_tube[labeled_df_tube['sample']=='T_N'][feature].value_counts()
    sample_5=labeled_df_tube[labeled_df_tube['sample']=='T_T'][feature].value_counts()
    sample_6=labeled_df_tube[labeled_df_tube['sample']=='T_HAZ'][feature].value_counts()
    sample_7=labeled_df_tube[labeled_df_tube['sample']=='T_N_T'][feature].value_counts()
    sample_8=labeled_df_tube[labeled_df_tube['sample']=='T_OT'][feature].value_counts()
    df=pd.DataFrame([sample_1,sample_2,sample_3,sample_4,sample_5,sample_6,sample_7,sample_8])
    df.index=['T_AR','T_FF','T_HAZ_T','T_N','T_T','T_HAZ','T_N_T','T_OT']
    df.plot(kind='bar',stacked=True,figsize=(10,5))
    plt.xlabel("Microstructures",fontsize=18, fontweight='bold')
    plt.ylabel("No. of simulated samples",fontsize=14, fontweight='bold')
    plt.title ("Clustering of Tubes labelled simulated",fontsize=16, fontweight='bold')
    plt.show()

In [None]:
bar_chart('labels')

In [None]:
def bar_chart_unknown(feature):
    sample_1=labeled_df_tube[labeled_df_tube['sample']=='T_B1'][feature].value_counts()
    sample_2=labeled_df_tube[labeled_df_tube['sample']=='T_B2'][feature].value_counts()
    sample_3=labeled_df_tube[labeled_df_tube['sample']=='T_B3'][feature].value_counts()
    sample_4=labeled_df_tube[labeled_df_tube['sample']=='T_B4'][feature].value_counts()
    sample_5=labeled_df_tube[labeled_df_tube['sample']=='T_B5'][feature].value_counts()
    sample_6=labeled_df_tube[labeled_df_tube['sample']=='T_B6'][feature].value_counts()
    sample_7=labeled_df_tube[labeled_df_tube['sample']=='T_B7'][feature].value_counts()
    sample_8=labeled_df_tube[labeled_df_tube['sample']=='T_B8'][feature].value_counts()
    df=pd.DataFrame([sample_1,sample_2,sample_3,sample_4,sample_5,sample_6,sample_7,sample_8])
    df.index=['T_B1','T_B2','T_B3','T_B4','T_B5','T_B6','T_B7','T_B8']
    df.plot(kind='bar',stacked=True,figsize=(10,5))
    plt.xlabel("Microstructures",fontsize=18, fontweight='bold')
    plt.ylabel("No. of simulated samples",fontsize=14, fontweight='bold')
    plt.title ("Clustering of Tubes Blind simulated",fontsize=16, fontweight='bold')
    plt.show()

In [None]:
bar_chart_unknown('labels')