# 미니 프로젝트: 차량 파손 여부 분류 자동화 모델 개발
> 차량 공유업체의 차량 파손 여부 확인 업무를 자동화할 수 있는 분류 모델 개발

<img src="https://story.s-oil.com/wp-content/uploads/2021/09/%EC%B0%A8%EB%B0%95%EC%82%AC_02.jpg" height="400px">


# 단계 2. 모델링

### 1.데이터 불러오기

In [5]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


- 전처리 데이터 저장

In [7]:
import numpy as np

# 저장된 데이터를 불러오는 함수
def load_data(file_path):
    data = np.load(file_path)
    X_train, X_valid, X_test = data['X_train'], data['X_valid'], data['X_test']
    y_train, y_valid, y_test = data['y_train'], data['y_valid'], data['y_test']
    return X_train, X_valid, X_test, y_train, y_valid, y_test

# 데이터 불러오기
file_path = '/content/drive/MyDrive/AIVLE/preprocessed_data.npz'
X_train, X_valid, X_test, y_train, y_valid, y_test = load_data(file_path)

## 2.기본 CNN 구조

In [None]:
import keras
from keras.utils import clear_session
from keras.models import Sequential, Model
from keras.layers import Input, Conv2D, MaxPool2D, Flatten, Dense, BatchNormalization, Dropout
from keras.callbacks import EarlyStopping, ModelCheckpoint
from sklearn.metrics import confusion_matrix, classification_report

### (1) 모델링

In [None]:
## 1.세션 클리어
clear_session()


input_layer = Input(shape = (256, 256, 3))

# Convolution : 필터수 32개, 사이즈(3, 3), same padding
hl = Conv2D(64, (2,2), (1,1), 'same', activation='relu')(input_layer)
hl = BatchNormalization()(hl)


hl = Conv2D(32, (2,2), (1,1), 'same', activation='relu')(hl)
hl = BatchNormalization()(hl)


hl = MaxPool2D((2,2), (2,2))(hl)
hl = Dropout(0.25)(hl)


# Convolution : 필터수 64개, 사이즈(3, 3), same padding
hl = Conv2D(16, (3,3), (1,1), 'same', activation='relu')(hl)
hl = BatchNormalization()(hl)


hl = Conv2D(8, (2,2), (1,1), 'same', activation='relu')(hl)
hl = BatchNormalization()(hl)


hl = MaxPool2D((2,2), (2,2))(hl)
hl = Dropout(0.25)(hl)

# Flatten
hl = Flatten()(hl)
hl = Dense(16, activation='relu')(hl)

hl = BatchNormalization()(hl)
output_layer = Dense(1, activation='sigmoid')(hl)

# 모델 생성
model = Model(input_layer, output_layer)

model.summary()

In [None]:
model.compile(optimizer = 'adam',
              loss = 'binary_crossentropy',
              metrics = ['accuracy'])

### (2) 학습

- Early Stopping

In [None]:
es = EarlyStopping(monitor='val_loss',
                   min_delta = 0,
                   patience = 5,
                   verbose = 1,
                   restore_best_weights = True,
                   )

- CheckPoint

In [None]:
mcp = ModelCheckpoint(filepath = './best_mode.keras',
                      monitor = 'val_loss',
                      verbose = 1,
                      save_best_only = True,
                      save_weights_only = False)

In [None]:
X_train.shape, y_train.shape, X_valid.shape, y_valid.shape

((489, 256, 256, 3), (489,), (55, 256, 256, 3), (55,))

In [None]:
hist = model.fit(X_train, y_train,
                 validation_data = (X_valid, y_valid),
                 epochs=10000,
                 verbose=1,
                 callbacks=[es, mcp]
          )

Epoch 1/10000
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 385ms/step - accuracy: 0.7535 - loss: 0.6125
Epoch 1: val_loss improved from inf to 1.18377, saving model to ./best_mode.keras
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 458ms/step - accuracy: 0.7558 - loss: 0.6049 - val_accuracy: 0.6545 - val_loss: 1.1838
Epoch 2/10000
[1m15/16[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m0s[0m 109ms/step - accuracy: 0.8809 - loss: 0.3122
Epoch 2: val_loss did not improve from 1.18377
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 109ms/step - accuracy: 0.8824 - loss: 0.3093 - val_accuracy: 0.4364 - val_loss: 3.9750
Epoch 3/10000
[1m15/16[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m0s[0m 108ms/step - accuracy: 0.9294 - loss: 0.2148
Epoch 3: val_loss did not improve from 1.18377
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 109ms/step - accuracy: 0.9290 - loss: 0.2150 - val_accuracy: 0.4909 - val_loss: 2.9241
E

### (3) 성능 평가
* 평가는 confusion_matrix, classification_report 활용

In [None]:
y_pred = model.predict(X_test)

# 가장 높은 확률 예측 값 (Index)
y_pred = np.where(y_pred > 0.5, 1, 0)

print( confusion_matrix(y_test, y_pred) )
print( classification_report(y_test, y_pred) )

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 378ms/step
[[24  6]
 [ 8 23]]
              precision    recall  f1-score   support

         0.0       0.75      0.80      0.77        30
         1.0       0.79      0.74      0.77        31

    accuracy                           0.77        61
   macro avg       0.77      0.77      0.77        61
weighted avg       0.77      0.77      0.77        61



## 3.Transfer Learning

### (1) 모델링

In [None]:
from keras.applications.inception_v3 import InceptionV3
from keras.applications.inception_v3 import preprocess_input
from keras.applications.inception_v3 import decode_predictions

model = InceptionV3(include_top = False,
                    weights='imagenet',
                    input_shape = (256,256,3),
                    pooling='avg')


classification = Dense(64, activation='relu')(model.output)
classification = Dense(32, activation='relu')(classification)
classification = Dense(1, activation='sigmoid')(classification)

model = keras.models.Model(model.inputs, classification)

for layer in model.layers[:-3] :
    layer.trainable = False

model.summary()

In [None]:
model.compile(optimizer = 'adam',
              loss = 'binary_crossentropy',
              metrics = ['accuracy'])

### (2) 학습

- Early Stopping

In [None]:
es = EarlyStopping(monitor='val_loss',
                   min_delta = 0,
                   patience = 5,
                   verbose = 1,
                   restore_best_weights = True,
                   )

- CheckPoint

In [None]:
mcp = ModelCheckpoint(filepath = './best_mode.keras',
                      monitor = 'val_loss',
                      verbose = 1,
                      save_best_only = True,
                      save_weights_only = False)

In [None]:
# 데이터 전처리
X_train = preprocess_input(X_train)
X_valid = preprocess_input(X_valid)
X_test = preprocess_input(X_test)

In [None]:
hist = model.fit(# train_dataset,
                #  validation_data = valid_dataset,
                 X_train, y_train,
                 validation_data = (X_valid, y_valid),
                 epochs=10000,
                 verbose=1,
                 callbacks=[es, mcp]
          )

Epoch 1/10000
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 339ms/step - accuracy: 0.7595 - loss: 0.4856
Epoch 1: val_loss improved from 0.37727 to 0.14636, saving model to ./best_mode.keras
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 997ms/step - accuracy: 0.7638 - loss: 0.4789 - val_accuracy: 0.9455 - val_loss: 0.1464
Epoch 2/10000
[1m15/16[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m0s[0m 82ms/step - accuracy: 0.9487 - loss: 0.1503
Epoch 2: val_loss improved from 0.14636 to 0.13733, saving model to ./best_mode.keras
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 153ms/step - accuracy: 0.9492 - loss: 0.1478 - val_accuracy: 0.9455 - val_loss: 0.1373
Epoch 3/10000
[1m15/16[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m0s[0m 80ms/step - accuracy: 0.9543 - loss: 0.0977
Epoch 3: val_loss improved from 0.13733 to 0.07568, saving model to ./best_mode.keras
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 164ms/st

### (3) 성능 평가

In [None]:
y_pred = model.predict(X_test)

# 가장 높은 확률 예측 값 (Index)
y_pred = np.where(y_pred > 0.5, 1, 0)

print( classification_report(y_test, y_pred) )

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 5s/step
              precision    recall  f1-score   support

         0.0       0.88      1.00      0.94        30
         1.0       1.00      0.87      0.93        31

    accuracy                           0.93        61
   macro avg       0.94      0.94      0.93        61
weighted avg       0.94      0.93      0.93        61



## 4.Data Augmentation

### (1) 모델링

In [None]:
from keras.layers import RandomZoom, RandomFlip, RandomRotation

# Pre-trained Model
inception_model = InceptionV3(include_top = False,
                    weights='imagenet',
                    input_shape = (256,256,3),
                    pooling='avg')


# 입력
input = Input(shape = (256, 256, 3))

# Data Augmentation Layer 추가
data_augmentation = Sequential([
    RandomZoom(0.2),
    RandomFlip(mode='horizontal'),
    RandomRotation(factor=0.2),
])

x = data_augmentation(input)

x = inception_model(x)

# 분류 층
classification = Dense(64, activation='relu')(x)
classification = Dense(32, activation='relu')(classification)
classification = Dense(1, activation='sigmoid')(classification)

model = keras.models.Model(input, classification)

for layer in model.layers[:-3] :
    layer.trainable = False

model.summary()

In [None]:
model.compile(optimizer = 'adam',
              loss = 'binary_crossentropy',
              metrics = ['accuracy'])

### (2) 학습

- Early Stopping

In [None]:
es = EarlyStopping(monitor='val_loss',
                   min_delta = 0,
                   patience = 5,
                   verbose = 1,
                   restore_best_weights = True,
                   )

- CheckPoint

In [None]:
mcp = ModelCheckpoint(filepath = './best_mode.keras',
                      monitor = 'val_loss',
                      verbose = 1,
                      save_best_only = True,
                      save_weights_only = False)

In [None]:
# 데이터 전처리
X_train = preprocess_input(X_train)
X_valid = preprocess_input(X_valid)
X_test = preprocess_input(X_test)

In [None]:
hist = model.fit(# train_dataset,
                #  validation_data = valid_dataset,
                 X_train, y_train,
                 validation_data = (X_valid, y_valid),
                 epochs=10000,
                 verbose=1,
                 callbacks=[es, mcp]
          )

Epoch 1/10000
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 221ms/step - accuracy: 0.5784 - loss: 0.6894
Epoch 1: val_loss did not improve from 0.05486
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 606ms/step - accuracy: 0.5827 - loss: 0.6866 - val_accuracy: 0.7273 - val_loss: 0.5320
Epoch 2/10000
[1m15/16[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m0s[0m 103ms/step - accuracy: 0.7723 - loss: 0.5070
Epoch 2: val_loss did not improve from 0.05486
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 112ms/step - accuracy: 0.7724 - loss: 0.5050 - val_accuracy: 0.7636 - val_loss: 0.4780
Epoch 3/10000
[1m15/16[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m0s[0m 104ms/step - accuracy: 0.7876 - loss: 0.4423
Epoch 3: val_loss did not improve from 0.05486
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 114ms/step - accuracy: 0.7873 - loss: 0.4432 - val_accuracy: 0.8182 - val_loss: 0.4263
Epoch 4/10000
[1m15/16[0m [32m━━━

### (3) 성능 평가

In [None]:
y_pred = model.predict(X_test)

# 가장 높은 확률 예측 값 (Index)
y_pred = np.where(y_pred > 0.5, 1, 0)

print( classification_report(y_test, y_pred) )

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 5s/step
              precision    recall  f1-score   support

         0.0       0.88      1.00      0.94        30
         1.0       1.00      0.87      0.93        31

    accuracy                           0.93        61
   macro avg       0.94      0.94      0.93        61
weighted avg       0.94      0.93      0.93        61

