In [1]:
# from google.colab import drive
# drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import numpy as np
import tensorflow as tf, matplotlib.pyplot as plt
import os

from tensorflow.keras import layers

## Data load

In [3]:
# os.chdir('/content/drive/MyDrive/Cephalogram')
# os.chdir('/content/drive/MyDrive/Cephalogram/workspace/feature_test')
# import ceph_config as cf

# our modules
import utils.ceph_config as cf

cf.RESOLUTION_CONFIG

32

In [4]:
model_save_path = os.path.join(os.getcwd(), 'output', 'kfold', 'ml', f'{cf.RESOLUTION_CONFIG}', 'model')
os.makedirs(model_save_path, exist_ok=True)

image_feature = np.load(os.path.join(os.getcwd(), 'output', f'{cf.RESOLUTION_CONFIG}', 'feature', 'feature.npy'))
binary_array = np.load(os.path.join(os.getcwd(), 'output', f'{cf.RESOLUTION_CONFIG}', 'array', 'binary_array.npy'))
int_array = np.load(os.path.join(os.getcwd(), 'output', f'{cf.RESOLUTION_CONFIG}', 'array', 'int_array.npy'))

image_feature_2 = image_feature.astype('float32')
binary_array_2 = binary_array.astype('float32')
int_array_2 = int_array.astype('float32')


print(image_feature_2.shape)
print(binary_array_2.shape)
print(int_array_2.shape)

print(image_feature_2.dtype)
print(binary_array_2.dtype)
print(int_array_2.dtype)

(6928, 96)
(6928, 3)
(6928,)
float32
float32
float32


In [5]:
x_test = image_feature_2[4560:5760]
x_train_all = np.concatenate((image_feature_2[0:4560], image_feature_2[5760:]))

y_test = int_array_2[4560:5760]
y_train_all = np.concatenate((int_array_2[0:4560], int_array_2[5760:]))

y2_test = binary_array[4560:5760]
y2_train_all = np.concatenate((binary_array[0:4560], binary_array[5760:]))

In [6]:
from sklearn.model_selection import KFold

kf = KFold(n_splits=20, shuffle=True, random_state=7)

In [7]:
for train_index, test_index in kf.split(x_train_all):
    x_train, x_test     = x_train_all[train_index], x_train_all[test_index]
    y_train, y_test     = y_train_all[train_index], y_train_all[test_index]
    y2_train, y2_test   = y2_train_all[train_index], y2_train_all[test_index]

### model 생성

In [8]:
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC

log_model = LogisticRegression(max_iter=5000)
knn_model = KNeighborsRegressor()
rf_model = RandomForestClassifier()
smc_model = SVC()

In [9]:
# 전체 훈련
# from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score

clf_list = [log_model, knn_model, rf_model, smc_model]

# for i in range(10):
#     x_train, x_val, y_train, y_val = train_test_split(x_train_all, y_train_all, test_size=0.1, random_state=i)
#     _, _, y2_train, y2_val = train_test_split(x_train_all, y2_train_all, test_size=0.1, random_state=i)

#     for clf in clf_list:
#         clf.fit(x_train, y_train)
#         clf.save(os.path.join(model_save_path, f'{clf}_{i}.keras'))

In [10]:
total_score = []
trained_clf_list = []

for clf in clf_list:
    scores = cross_val_score(clf, x_train, y_train,
                             scoring='neg_mean_squared_error', cv=10)
    total_score.append(np.sqrt(-scores))

    clf.fit(x_train, y_train)
    trained_clf_list.append(clf)

In [11]:
def display_scores(scores):
    print('############################')
    # print('점수:', scores)
    print('Mean:', scores.mean())
    print('STD:', scores.std())
    print('\n')

In [12]:
for scores in total_score:
    display_scores(scores)

############################
Mean: 0.23871540363554952
STD: 0.48450272686019985


############################
Mean: 0.22340475518334216
STD: 0.376030750309477


############################
Mean: 0.20169655028787742
STD: 0.3931146063271256


############################
Mean: 0.25207314888801335
STD: 0.49041129355199037




### 최종 model 성능 평가

In [13]:
prediction_out = []

for clf in trained_clf_list:
    prediction = clf.predict(x_test)

    # pred_class = []

    # for i, item in enumerate(prediction):
    #     pred_class.append(np.argmax(item))

    # prediction_out.append(np.array(pred_class).astype('float32'))
    prediction_out.append(np.round(np.array(prediction)).astype('float32'))

In [14]:
prediction_np = np.array(prediction_out)

In [15]:
len(x_test)

286

In [16]:
len(prediction_np[0])

286

In [17]:
prediction_np

array([[5., 3., 2., ..., 6., 3., 0.],
       [5., 3., 2., ..., 5., 3., 0.],
       [5., 3., 2., ..., 4., 3., 0.],
       [5., 3., 2., ..., 6., 3., 0.]], dtype=float32)

In [18]:
len(prediction_np)

4

In [19]:
from sklearn.metrics import confusion_matrix, classification_report

for i, output_pred in enumerate(prediction_np):
    print(f'# {clf_list[i]} Prediction')
    print('\n')
    print(confusion_matrix(y_test, output_pred))
    print(classification_report(y_test, output_pred))
    print('\n')

# LogisticRegression(max_iter=5000) Prediction


[[38  0  0  0  0  0  0  0]
 [ 1 28  0  0  0  0  0  0]
 [ 0  0 28  0  0  0  0  0]
 [ 0  0  0 35  0  0  0  0]
 [ 0  0  0  0 39  0  0  0]
 [ 0  0  0  0  0 37  0  0]
 [ 0  2  0  0  0  0 39  2]
 [ 2  0  0  0  0  0  1 34]]
              precision    recall  f1-score   support

         0.0       0.93      1.00      0.96        38
         1.0       0.93      0.97      0.95        29
         2.0       1.00      1.00      1.00        28
         3.0       1.00      1.00      1.00        35
         4.0       1.00      1.00      1.00        39
         5.0       1.00      1.00      1.00        37
         6.0       0.97      0.91      0.94        43
         7.0       0.94      0.92      0.93        37

    accuracy                           0.97       286
   macro avg       0.97      0.97      0.97       286
weighted avg       0.97      0.97      0.97       286



# KNeighborsRegressor() Prediction


[[38  0  0  0  0  0  0  0]
 [ 1 27  1  0  0 