<a href="https://colab.research.google.com/github/By0ungJoo/dacon/blob/main/efficientNet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -q efficientnet >> /dev/null

In [None]:
import numpy as np
import pandas as pd
from tqdm import tqdm
import matplotlib.pyplot as plt
import tensorflow as tf
import tensorflow.keras.backend as K
import efficientnet.tfkeras as efn
from sklearn.model_selection import KFold
from keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split

from torchsummary import summary
import time
import copy
import cv2
import gc

In [None]:
DEVICE            = "GPU"  # Any value in ["TPU", "GPU"]
SEED              = 8080
FOLDS             = 5
FOLD_WEIGHTS      = [1./FOLDS]*FOLDS
BATCH_SIZE        = 64
EPOCHS            = 5000
MONITOR           = "val_loss"
MONITOR_MODE      = "min"
ES_PATIENCE       = 5
LR_PATIENCE       = 0
LR_FACTOR         = 0.5
EFF_NET           = 3
EFF_NET_WEIGHTS   = 'noisy-student'
LABEL_SMOOTHING   = 0.1
VERBOSE           = 1

In [None]:
## 주어진 train_df에 각 레이블마다 원-핫인코딩값을 생성할 인덱스 부여
train_df = pd.read_csv('/content/drive/MyDrive/DACON_이상치 탐지 알고리즘 경진대회/open/train_df.csv')
final_train88_df = train_df.copy()
label_lst = final_train88_df.label.unique().tolist()
label_lst.sort()
one_hot_label = pd.DataFrame(label_lst, columns = {'label'})
one_hot_label['one_hot_label'] = one_hot_label.index.tolist()
one_hot_label

final_train88_df = pd.merge(final_train88_df, one_hot_label, how = 'left', on = 'label')
final_train88_df

# 학습 파일 기본 경로 지정 및 넘파이 배열로 변환
# 넘파일 배열을 생성할 레이블의 one_hot_label값 [0 ~ 87] / df > one_hot_label 참조
image_dir = '/content/drive/MyDrive/dacon/dacon/open/train'
num_total_label = 88 # 레이블 총 개수
one_hot_label = [x for x in range(88)]

img_rows = 224
img_cols = 224

X = [] # 입력 데이터
Y = [] # 정답값

## 이미지 배열 파일 생성
for index, row in tqdm(final_train88_df.iterrows(), total = len(final_train88_df)) : 
  filename = row['file_name']
  label_idx = row['one_hot_label']
  # img_label : Y 배열에 들어갈 원-핫 인코딩된 정답값
  img_label = [0 for i in range(num_total_label)]
  img_label[label_idx] = 1

  img = cv2.imread(f'{image_dir}/{filename}').astype('float32')
  img = cv2.resize(img, (img_rows,img_cols))#, fx=img_w/img.shape[1], fy=img_h/img.shape[0])
  X.append(img/256)
  Y.append(img_label)

X = np.array(X)
Y = np.array(Y)

x_train, x_test, y_train, y_test = train_test_split(X, Y, stratify=Y, test_size=0.3, random_state=1234)

## 안쓰는 변수 비우기 : 램관리
del X, Y
gc.collect()

100%|██████████| 4277/4277 [02:21<00:00, 30.24it/s]


0

In [None]:
X = np.load('/content/drive/MyDrive/Colab Notebooks/X.npy')
y = np.load('/content/drive/MyDrive/Colab Notebooks/y.npy')

NameError: ignored

In [None]:
x_train.shape

(2993, 512, 512, 3)

In [None]:
X.shape[3]

3

In [None]:
eff_nets = [
    efn.EfficientNetB0,
    efn.EfficientNetB1,
    efn.EfficientNetB2,
    efn.EfficientNetB3,
    efn.EfficientNetB4,
    efn.EfficientNetB5,
    efn.EfficientNetB6,
    efn.EfficientNetB7,
    efn.EfficientNetL2,
]

def build_model ():
    inp = tf.keras.layers.Input(shape=(x_train.shape[1], x_train.shape[2], x_train.shape[3]))
    oup = eff_nets[EFF_NET](
        input_shape=(x_train.shape[1], x_train.shape[2], x_train.shape[3]),
        weights=EFF_NET_WEIGHTS,
        include_top=False,)(inp)
    oup = tf.keras.layers.GlobalAveragePooling2D()(oup)
    oup = tf.keras.layers.Dense(512, activation='linear')(oup)
    oup = tf.keras.layers.Activation('relu')(oup)
    oup = tf.keras.layers.Dropout(0.5)(oup)
    oup = tf.keras.layers.Dense(88, activation='linear')(oup)
    oup = tf.keras.layers.Activation('softmax')(oup)
    
    model = tf.keras.Model (inputs=[inp], outputs=[oup])
    
    loss = tf.keras.losses.CategoricalCrossentropy(
        from_logits=False,
        label_smoothing=LABEL_SMOOTHING,
    )
    
    opt = tf.keras.optimizers.Nadam(learning_rate=3e-4)
    
    model.compile(optimizer=opt,loss=loss,metrics=['acc'])
    
    return model

build_model().summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 efficientnet-b3 (Functional  (None, 7, 7, 1536)       10783528  
 )                                                               
                                                                 
 global_average_pooling2d (G  (None, 1536)             0         
 lobalAveragePooling2D)                                          
                                                                 
 dense (Dense)               (None, 512)               786944    
                                                                 
 activation (Activation)     (None, 512)               0         
                                                                 
 dropout (Dropout)           (None, 512)               0     

In [None]:
weights_filename='fold-1.h5'
# Save best model for each fold
sv = tf.keras.callbacks.ModelCheckpoint(
    weights_filename, monitor=MONITOR, verbose=VERBOSE, save_best_only=True,
    save_weights_only=True, mode=MONITOR_MODE, save_freq='epoch')

# Learning rate reduction
lrr = tf.keras.callbacks.ReduceLROnPlateau(
    monitor=MONITOR,
    factor=LR_FACTOR,
    patience=LR_PATIENCE,
    verbose=VERBOSE,
    mode=MONITOR_MODE)

# Early stopping
es = tf.keras.callbacks.EarlyStopping(
    monitor=MONITOR,
    patience=ES_PATIENCE,
    verbose=VERBOSE,
    mode=MONITOR_MODE)

In [None]:
model = build_model()

In [None]:
history = model.fit(x_train, y_train, batch_size=BATCH_SIZE, epochs=EPOCHS, callbacks = [sv, lrr, es],
                    validation_data=(x_test, y_test),verbose=VERBOSE)

Epoch 1/5000
Epoch 1: val_loss did not improve from 0.92851
Epoch 2/5000
Epoch 2: val_loss did not improve from 0.92851

Epoch 2: ReduceLROnPlateau reducing learning rate to 2.3437501113221515e-06.
Epoch 3/5000
Epoch 3: val_loss did not improve from 0.92851

Epoch 3: ReduceLROnPlateau reducing learning rate to 1.1718750556610757e-06.
Epoch 4/5000
Epoch 4: val_loss did not improve from 0.92851

Epoch 4: ReduceLROnPlateau reducing learning rate to 5.859375278305379e-07.
Epoch 5/5000
Epoch 5: val_loss did not improve from 0.92851

Epoch 5: ReduceLROnPlateau reducing learning rate to 2.9296876391526894e-07.
Epoch 6/5000
Epoch 6: val_loss did not improve from 0.92851

Epoch 6: ReduceLROnPlateau reducing learning rate to 1.4648438195763447e-07.
Epoch 6: early stopping


In [None]:
# 참고 https://www.kaggle.com/code/gabrielmilan/efficientnet-baseline