In [1]:
import numpy as np
import scipy as sp
from sklearn.decomposition import PCA 
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
imad = "/home/ubuntu/imad"
models_dir = f"{imad}/segmentation/models/"
formative_dir = f"{imad}/formative_models_oriented"
model_dir = f"{models_dir}/mdel_3/vectors_labels.npz"
figures_segmentation = f"{imad}/segmentation/figures_segmentation"
figures_form_function = f"{imad}/segmentation/figures_form_function"

In [3]:
segmentation_labels_form_function = pd.read_csv("./Segmented_Models_Labeling.csv")

In [4]:
segmentation_labels_form_function.columns

Index(['Model', 'Segmented Part Number', 'Label', 'Unnamed: 3', 'Unnamed: 4'], dtype='object')

In [5]:
models_that_work = [3, 4, 7, 16, 18, 19, 20, 22, 23, 24, 28, 33, 34, 36, 37, 38, 40, 42, 43, 46, 47, 49, 53, 57, 61, 64, 65, 66, 68, 69, 70, 76, 77, 79 , 83, 84, 85, 86, 88, 89, 90, 93, 94, 95, 97, 98, 99]

In [6]:
all_eigen_vectors = []
all_original_seg_labels = []
all_form_func_seg_labels = []
for ele in models_that_work:
    model_number = ele
    b=np.load(f"{formative_dir}/model_{model_number}/vectors_labels.npz", allow_pickle=True)
    results_dict = dict(enumerate(b['arr_0'].flatten(), 1))
    eigen_vectors = results_dict[1]['vectors']
    labels = results_dict[1]['labels']
    eigen_values = results_dict[1]['values']
    pca = PCA(n_components=2)
    principalComponents = pca.fit_transform(eigen_vectors)
    this_model_labels = segmentation_labels_form_function[segmentation_labels_form_function['Model'] == model_number]
    if(this_model_labels.empty == True or this_model_labels[this_model_labels['Segmented Part Number'] == 0]['Label'].isna().values.any()):
        print(f"Bad {ele}")
        continue
    this_model_labels_dict = {}
    for ind, value in this_model_labels.iterrows():
        if(value['Label'] == 'Form'):
            this_model_labels_dict[value['Segmented Part Number']] = 0
        if(value['Label'] == 'Function'):
            this_model_labels_dict[value['Segmented Part Number']] = 1
    form_function_labels = np.zeros(labels.shape[0], dtype='int8')
    for index, value in enumerate(labels):
        form_function_labels[index] = this_model_labels_dict[value]
    all_eigen_vectors.extend(eigen_vectors)
    all_original_seg_labels.extend(labels)
    all_form_func_seg_labels.extend(form_function_labels)
#     plt. clf()
#     plt.scatter(principalComponents[:,0], principalComponents[:,1], c=labels)
#     plt.savefig(f'{figures_segmentation}/model_{ele}')
#     plt. clf()
#     plt.scatter(principalComponents[:,0], principalComponents[:,1], c=form_function_labels)
#     plt.savefig(f'{figures_form_function}/model_{ele}')
#     plt. clf()
    print(f"Finished {ele}")

Finished 3
Finished 4
Finished 7
Finished 16
Finished 18
Bad 19
Finished 20
Finished 22
Finished 23
Finished 24
Finished 28
Bad 33
Finished 34
Bad 36
Finished 37
Finished 38
Finished 40
Finished 42
Finished 43
Finished 46
Finished 47
Finished 49
Finished 53
Finished 57
Finished 61
Bad 64
Finished 65
Finished 66
Bad 68
Finished 69
Finished 70
Bad 76
Finished 77
Finished 79
Finished 83
Finished 84
Finished 85
Finished 86
Bad 88
Finished 89
Finished 90
Finished 93
Finished 94
Finished 95
Bad 97
Bad 98
Finished 99


In [7]:
all_eigen_vectors = np.asarray(all_eigen_vectors)
all_original_seg_labels = np.asarray(all_original_seg_labels)
all_form_func_seg_labels = np.asarray(all_form_func_seg_labels)

In [8]:
all_eigen_vectors.shape

(974506, 6)

# Classification

In [21]:
from sklearn.neural_network import MLPClassifier
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split

In [39]:
# X, y = make_classification(n_samples=100, random_state=1)
X_train, X_test, y_train, y_test = train_test_split(all_eigen_vectors, all_form_func_seg_labels, test_size=0.25,
                                                    random_state=42)
clf = MLPClassifier(random_state=42, max_iter=300, hidden_layer_sizes=(100,100), verbose=True).fit(X_train, y_train)
clf.predict_proba(X_test[:1])

clf.predict(X_test[:5, :])

clf.score(X_test, y_test)

Iteration 1, loss = 0.32139403
Iteration 2, loss = 0.16179209
Iteration 3, loss = 0.11635813
Iteration 4, loss = 0.09254435
Iteration 5, loss = 0.07797819
Iteration 6, loss = 0.06810868
Iteration 7, loss = 0.06085653
Iteration 8, loss = 0.05550378
Iteration 9, loss = 0.05110983
Iteration 10, loss = 0.04709209
Iteration 11, loss = 0.04404671
Iteration 12, loss = 0.04183627
Iteration 13, loss = 0.03995031
Iteration 14, loss = 0.03814428
Iteration 15, loss = 0.03655129
Iteration 16, loss = 0.03512920
Iteration 17, loss = 0.03387850
Iteration 18, loss = 0.03268734
Iteration 19, loss = 0.03187257
Iteration 20, loss = 0.03087236
Iteration 21, loss = 0.02987839
Iteration 22, loss = 0.02916381
Iteration 23, loss = 0.02821043
Iteration 24, loss = 0.02759380
Iteration 25, loss = 0.02701243
Iteration 26, loss = 0.02671084
Iteration 27, loss = 0.02606926
Iteration 28, loss = 0.02542014
Iteration 29, loss = 0.02519103
Iteration 30, loss = 0.02460533
Iteration 31, loss = 0.02432462
Iteration 32, los

0.9962688864534719

In [40]:
clf.predict(X_test)

array([1, 1, 0, ..., 1, 1, 1], dtype=int8)

In [41]:
X_test.shape

(243627, 6)

In [43]:
all_eigen_vectors.shape

(974506, 6)

In [46]:
import pickle
filename = 'form_function_segmentation_model.sav'
pickle.dump(clf, open(filename, 'wb'))

In [48]:
loaded_model = pickle.load(open(filename, 'rb'))
result = loaded_model.predict(X_test)

In [49]:
result


array([1, 1, 0, ..., 1, 1, 1], dtype=int8)

In [50]:
this_model_labels = segmentation_labels_form_function[segmentation_labels_form_function['Model'] == model_number]

In [51]:
this_model_labels

Unnamed: 0,Model,Segmented Part Number,Label,Unnamed: 3,Unnamed: 4
585,99.0,0.0,Form,Form,
586,99.0,1.0,Function,Function,
587,99.0,2.0,Function,,
588,99.0,3.0,Function,Bad,
589,99.0,4.0,Function,,
590,99.0,5.0,Form,,
591,99.0,6.0,Form,,
592,99.0,7.0,Function,,
593,99.0,8.0,Form,,
594,99.0,9.0,Form,,


In [53]:
all_form_func_seg_labels[-12:]

array([1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1], dtype=int8)

In [54]:
labels

array([11, 11, 11, ...,  3,  7,  1], dtype=int32)

In [56]:
this_model_labels_dict

{0.0: 0,
 1.0: 1,
 2.0: 1,
 3.0: 1,
 4.0: 1,
 5.0: 0,
 6.0: 0,
 7.0: 1,
 8.0: 0,
 9.0: 0,
 10.0: 1,
 11.0: 1}

In [68]:
this_form_function_labels = np.zeros(labels.shape[0], dtype='int8')
for index, value in enumerate(labels):
    this_form_function_labels[index] = this_model_labels_dict[value]

In [58]:
form_function_labels.shape

(28978,)

In [59]:
b=np.load(f"{formative_dir}/model_99/vectors_labels.npz", allow_pickle=True)
results_dict = dict(enumerate(b['arr_0'].flatten(), 1))
eigen_vectors = results_dict[1]['vectors']
labels = results_dict[1]['labels']
eigen_values = results_dict[1]['values']

In [61]:
labels

array([11, 11, 11, ...,  3,  7,  1], dtype=int32)

In [67]:
eigen_vectors.shape

(28978, 6)

In [62]:
res = loaded_model.predict(eigen_vectors)

In [63]:
res

array([1, 1, 1, ..., 1, 1, 1], dtype=int8)

In [69]:
result = loaded_model.score(eigen_vectors, this_form_function_labels)

In [70]:
result

0.9953758023328042