In [1]:
from src.create_dataset import main
import matplotlib.pyplot as plt
import os
from os.path import join
import pickle
import numpy as np
import pandas as pd
import json
from itertools import product

In [2]:
from tqdm.notebook import tqdm

In [3]:
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA, TruncatedSVD
from sklearn.manifold import TSNE
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, davies_bouldin_score, silhouette_score

In [4]:
from typing import Dict, List

In [5]:
from src.convert_jsons_to_csvs import convert_jsons_to_csv

In [6]:
def get_subdataset(dataset: pd.DataFrame, hist_name: str, n_bins: int) -> pd.DataFrame:
      return dataset[(dataset.n_bins == n_bins) & 
                     (dataset.hist_type == hist_name)][['type', 'values']]

In [7]:
dataset = convert_jsons_to_csv('data3/histograms/', 'data3/labels.csv')

100%|██████████| 20/20 [00:15<00:00,  1.28it/s]


In [8]:
dataset.head()

Unnamed: 0,model,type,model_name,hist_type,n_bins,values
0,1b220e0b-d7fa-40a0-8cfd-930a3228c668,Sphere,1b220e0b-d7fa-40a0-8cfd-930a3228c668,model_bounding_sphere_concentric_sphere,8,"[0, 0, 0, 0, 0.3373978709670946, 0.31850100387..."
1,1b220e0b-d7fa-40a0-8cfd-930a3228c668,Sphere,1b220e0b-d7fa-40a0-8cfd-930a3228c668,model_bounding_sphere_concentric_sphere,16,"[0, 0, 0, 0, 0, 0, 0, 0, 0.10627104869753085, ..."
2,1b220e0b-d7fa-40a0-8cfd-930a3228c668,Sphere,1b220e0b-d7fa-40a0-8cfd-930a3228c668,model_bounding_sphere_concentric_sphere,32,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
3,1b220e0b-d7fa-40a0-8cfd-930a3228c668,Sphere,1b220e0b-d7fa-40a0-8cfd-930a3228c668,model_bounding_sphere_concentric_sphere,64,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
4,1b220e0b-d7fa-40a0-8cfd-930a3228c668,Sphere,1b220e0b-d7fa-40a0-8cfd-930a3228c668,model_bounding_sphere_concentric_sphere,128,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."


In [9]:
sub_dataset = get_subdataset(dataset, 'model_bounding_sphere_concentric_sphere', 8)

labels = sub_dataset.type.values
u_labels = np.unique(labels)
X = np.vstack(sub_dataset['values'].values)

le = LabelEncoder().fit(labels)
tr_labels = le.transform(labels)

kmeans = KMeans(n_clusters=len(u_labels), random_state=0).fit(X)
pca = PCA(n_components=2)
X_r = pca.fit_transform(X)
Z = kmeans.predict(X)

print('davies bouldin score', davies_bouldin_score(X, Z))
print('silhouette score', silhouette_score(X, Z))
print(classification_report(Z, tr_labels, target_names=le.classes_))


davies bouldin score 1.1861606719208708
silhouette score 0.27266058630573264
              precision    recall  f1-score   support

        Cone       0.41      0.36      0.38       374
        Cube       0.00      0.02      0.01        60
    Cylinder       0.20      0.20      0.20       315
      Sphere       0.19      0.16      0.17       429
       Torus       0.20      0.15      0.17       421

    accuracy                           0.20      1599
   macro avg       0.20      0.18      0.19      1599
weighted avg       0.24      0.20      0.22      1599



In [10]:
sub_dataset = get_subdataset(dataset, 'model_bounding_sphere_concentric_sphere', 128)

labels = sub_dataset.type.values
u_labels = np.unique(labels)
X = np.vstack(sub_dataset['values'].values)

le = LabelEncoder().fit(labels)
tr_labels = le.transform(labels)

kmeans = KMeans(n_clusters=len(u_labels), random_state=0).fit(X)
pca = PCA(n_components=2)
X_r = pca.fit_transform(X)
Z = kmeans.predict(X)

print('davies bouldin score', davies_bouldin_score(X, Z))
print('silhouette score', silhouette_score(X, Z))
print(classification_report(Z, tr_labels, target_names=le.classes_))


davies bouldin score 1.485175692177613
silhouette score 0.19670974064217922
              precision    recall  f1-score   support

        Cone       0.15      0.10      0.12       479
        Cube       0.00      0.02      0.01        60
    Cylinder       0.20      0.20      0.20       311
      Sphere       0.19      0.21      0.20       320
       Torus       0.25      0.19      0.22       429

    accuracy                           0.16      1599
   macro avg       0.16      0.14      0.15      1599
weighted avg       0.19      0.16      0.17      1599

