In [1]:
import warnings 
warnings.filterwarnings(action='ignore')

In [2]:
import os 
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from glob import glob
from tqdm.auto import tqdm
import cv2

# from tensorflow.keras.applications import Densnet
import tensorflow as tf
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras import layers
import tensorflow.keras as keras
from tensorflow.data import Dataset

from tensorflow.keras.optimizers import Adam, RMSprop, Nadam, SGD
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

from itertools import product
from tensorflow.keras.preprocessing.image import ImageDataGenerator

from tensorflow.keras.applications import *
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from skmultilearn.model_selection import iterative_train_test_split
from wandb.keras import WandbCallback
import wandb

2022-06-21 22:08:28.123417: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.10.1


In [3]:
tf.__version__

'2.4.1'

# Data Load

In [4]:
df = pd.read_csv('/home/lab38/Multi_proj_6/data/straw_smallsizeimg_with_pest.csv')
df.head()

Unnamed: 0,image,grow,disease,area,points,original,disease-grow
0,/home/lab38/딸기/Strawberry Pest Damage_3.jpg,5,11,,,,11-5
1,/home/lab38/딸기/Strawberry Pest Damage_727.jpg,2,11,,,,11-2
2,/home/lab38/딸기/Strawberry Pest Damage_487.jpg,5,11,,,,11-5
3,/home/lab38/딸기/Strawberry Pest Damage_124.jpg,5,11,,,,11-5
4,/home/lab38/딸기/Strawberry Pest Damage_70.jpg,5,11,,,,11-5


In [5]:
# label encoding
disease_encoder = LabelEncoder()
disease_encoder.fit(df['disease'])
df['disease'] = disease_encoder.transform(df['disease'])
print(df['disease'].unique())

grow_encoder = LabelEncoder()
grow_encoder.fit(df['grow'])
df['grow'] = grow_encoder.transform(df['grow'])
print(df['grow'].unique())

[3 0 1 2]
[4 1 3 0 2]


In [6]:
shuffled_df = df.sample(frac=1)

In [7]:
X_train, X_test, y_train, y_test = train_test_split(df['image'],
                                                   df['disease-grow'],
                                                   stratify=df['disease-grow'],
                                                   test_size=0.2)

In [8]:
train_df = df[df['image'].isin(X_train)]
test_df = df[df['image'].isin(X_test)]

In [9]:
print(train_df['disease'].unique())
print(train_df['grow'].unique())
print(test_df['disease'].unique())
print(test_df['grow'].unique())

[3 0 1 2]
[4 1 3 0 2]
[3 0 1 2]
[4 1 0 3 2]


In [10]:
train_gen = ImageDataGenerator(rescale=1./255,
                            rotation_range=20, # 최대 20도까지 회전
                            width_shift_range=0.1, # 최대 x 범위안에서 좌우/상하 이동
                            height_shift_range=0.1,
                            zoom_range=0.2, # 확대 축소 비율,
                            horizontal_flip=True, # 좌우반전
                            vertical_flip=True, # 상하반전
                            fill_mode='nearest')
valid_gen = ImageDataGenerator(rescale= 1. /255.)

# Model Top layer 학습

In [11]:
config_default = {
    'pretrain_net': 'inception',
    'epochs' : 1000,
    'batch_size': 20,
    'dropout' : 0.2,
    'learning_rate' : 1e-3,
    'activation': 'elu',
    'optimizer': 'adam',
    'dense': 32,
}
wandb.init(project='strawberry',
          config=config_default)
config = wandb.config

train_generator = train_gen.flow_from_dataframe(train_df, 
                                           x_col='image',
                                           y_col=['disease', 'grow'],
                                           target_size=(IMAGE_SIZE, IMAGE_SIZE),
                                           class_mode='multi_output',
                                           batch_size=config.batch_size)
valid_generator = valid_gen.flow_from_dataframe(test_df,
                                           x_col='image',
                                           y_col=['disease','grow'],
                                           target_size=(IMAGE_SIZE, IMAGE_SIZE),
                                           class_mode='multi_output',
                                           batch_size=config.batch_size)

if config.pretrain_net == 'inception' :
    MODEL_IMAGE_SIZE = 299
    base_model = inception_resnet_v2.InceptionResNetV2(
        weights='imagenet',
        include_top = False,
        input_shape = (MODEL_IMAGE_SIZE, MODEL_IMAGE_SIZE,3)
    )
base_model.trainable = False 

# resizing model
input_data = layers.Input((IMAGE_SIZE, IMAGE_SIZE, 3))
x = tf.keras.layers.experimental.preprocessing.Resizing(MODEL_IMAGE_SIZE, MODEL_IMAGE_SIZE)(input_data)
resizing = Model(inputs=input_data, outputs=x, name='resize')

# model
inputs = layers.Input(shape=(IMAGE_SIZE, IMAGE_SIZE, 3))
x = resizing(inputs)
x = base_model(x, training=False)
x = layers.GlobalAveragePooling2D()(x)
x = layers.Dropout(config.dropout)(x)
backbone_out = layers.Dense(config.dense, activation=config.activation)(x)

disease_outputs = layers.Dense(df['disease'].nunique(), activation='softmax',
                        name = 'diease_outputs')(backbone_out)
grow_outputs = layers.Dense(df['grow'].nunique(), activation='softmax',
                    name = 'grow_outputs')(backbone_out)

model = Model(inputs=inputs, 
              outputs=[disease_outputs, grow_outputs],
              name='strawberry')   

if config.optimizer=='adam':
    optimizer = Adam(learning_rate=config.learning_rate)

es = EarlyStopping(monitor='loss',
                   mode='auto',
                  patience=5,
                  verbose=1)

ckpt_path = './toplayer_strawberry.ckpt'
checkpointer = ModelCheckpoint(filepath=ckpt_path,
                              monitor='val_loss',
                              save_weights_only = True,
                              save_best_only= True,
                              verbose=1)

model.compile(loss={
              'diease_outputs' : 'sparse_categorical_crossentropy',
              'grow_outputs' : 'sparse_categorical_crossentropy'
              },
              optimizer=optimizer,
              metrics=['accuracy'])    

model.load_weights(ckpt_path)
history = model.fit(train_generator,
      validation_data=valid_generator,
      verbose=1,
      epochs=config.epochs,
      initial_epoch=6,
      callbacks=[es, checkpointer],
      steps_per_epoch=len(train_df)//config.batch_size)

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mhkleee[0m. Use [1m`wandb login --relogin`[0m to force relogin



KeyboardInterrupt



42에서 early stopping

# Fine Tuning
### Sweep
finetuning freezing rate

In [11]:
start = 42
IMAGE_SIZE=320

In [12]:
def finetune():
    config_default = {
        'pretrain_net': 'inception',
        'epochs' : 1000,
        'batch_size': 20,
        'dropout' : 0.2,
        'learning_rate' : 1e-3,
        'activation': 'elu',
        'optimizer': 'adam',
        'dense': 32,
        'freeze_rate' : 0.3
    }
    wandb.init(config=config_default, project='strawberry')
    config = wandb.config
    # generator load
    train_generator = train_gen.flow_from_dataframe(train_df, 
                                           x_col='image',
                                           y_col=['disease', 'grow'],
                                           target_size=(IMAGE_SIZE, IMAGE_SIZE),
                                           class_mode='multi_output',
                                           batch_size=config.batch_size)
    valid_generator = valid_gen.flow_from_dataframe(test_df,
                                               x_col='image',
                                               y_col=['disease','grow'],
                                               target_size=(IMAGE_SIZE, IMAGE_SIZE),
                                               class_mode='multi_output',
                                               batch_size=config.batch_size)
    
    ############################ model 구성 #################################
    if config.pretrain_net == 'inception' :
        MODEL_IMAGE_SIZE = 299
        base_model = inception_resnet_v2.InceptionResNetV2(
            weights='imagenet',
            include_top = False,
            input_shape = (MODEL_IMAGE_SIZE, MODEL_IMAGE_SIZE,3)
        )
    # resizing model
    input_data = layers.Input((IMAGE_SIZE, IMAGE_SIZE, 3))
    x = tf.keras.layers.experimental.preprocessing.Resizing(MODEL_IMAGE_SIZE, MODEL_IMAGE_SIZE)(input_data)
    resizing = Model(inputs=input_data, outputs=x, name='resize')

    # model
    inputs = layers.Input(shape=(IMAGE_SIZE, IMAGE_SIZE, 3))
    x = resizing(inputs)
    x = base_model(x, training=False)
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dropout(config.dropout)(x)
    backbone_out = layers.Dense(config.dense, activation=config.activation)(x)

    disease_outputs = layers.Dense(df['disease'].nunique(), activation='softmax',
                            name = 'diease_outputs')(backbone_out)
    grow_outputs = layers.Dense(df['grow'].nunique(), activation='softmax',
                        name = 'grow_outputs')(backbone_out)

    model = Model(inputs=inputs, 
                  outputs=[disease_outputs, grow_outputs],
                  name='strawberry')   
    ###########################################################################3
    
    ####### top layer weight & bias loading ######
    try :
        ckpt_path = './toplayer_strawberry.ckpt'
        model.load_weights=ckpt_path
    except :
        print('fail to load weights')
        
    ############### 동결 해제 ###################
    base_model.trainable=True
    fine_tune_at = int(len(base_model.layers) * config.freeze_rate)
    for layer in base_model.layers[:fine_tune_at] :
        layer.trainable = False  
    model.compile(optimizer=Adam(config.learning_rate/10),  ## compile
                  loss={
                      'diease_outputs' : 'sparse_categorical_crossentropy',
                      'grow_outputs' : 'sparse_categorical_crossentropy'
                    },
                    metrics=['accuracy'])
    
    es = EarlyStopping(monitor='loss',
                   mode='auto',
                  patience=5,
                  verbose=1)
    
    ## finetuning
    history_fine = model.fit(train_generator,
                              validation_data=valid_generator,
                              verbose=1,
                              epochs=config.epochs,
                              initial_epoch = start,
                              callbacks=[es, WandbCallback()],  # model save at wandb
                              steps_per_epoch=len(train_df)//config.batch_size)
    

In [13]:
sweep_config = {
    'method': 'random', #grid, random
    'metric': {
      'name': 'val_loss',
      'goal': 'minimize'   
    },
    'parameters': {
        'freeze_rate': {
            'values': [0, 0.2, 0.5, 0.9]
        }
    }
}

In [14]:
sweep_id = wandb.sweep(sweep_config, project='strawberry')

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Create sweep with ID: qvycddps
Sweep URL: https://wandb.ai/hkleee/strawberry/sweeps/qvycddps


In [None]:
wandb.agent(sweep_id, finetune)

[34m[1mwandb[0m: Agent Starting Run: ktaacbzj with config:
[34m[1mwandb[0m: 	freeze_rate: 0.2
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mhkleee[0m. Use [1m`wandb login --relogin`[0m to force relogin


Found 13437 validated image filenames.
Found 3360 validated image filenames.


2022-06-21 22:09:09.112541: I tensorflow/compiler/jit/xla_cpu_device.cc:41] Not creating XLA devices, tf_xla_enable_xla_devices not set
2022-06-21 22:09:09.113701: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcuda.so.1
2022-06-21 22:09:09.158747: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:941] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-06-21 22:09:09.159400: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1720] Found device 0 with properties: 
pciBusID: 0000:00:1e.0 name: Tesla T4 computeCapability: 7.5
coreClock: 1.59GHz coreCount: 40 deviceMemorySize: 14.75GiB deviceMemoryBandwidth: 298.08GiB/s
2022-06-21 22:09:09.159449: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.10.1
2022-06-21 22:09:09.239932: I tensorflow/stream_executor/platform/default/dso_loade

Epoch 43/1000


2022-06-21 22:09:31.870922: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublas.so.10
2022-06-21 22:09:32.587693: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudnn.so.7


Epoch 44/1000