In [None]:
## This script is for model training using the basic framework and ResNet-50 features

In [None]:
# import packages
import os, cv2
import pickle
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
import keras
from keras.applications import resnet50
from keras.models import Model, Sequential
from keras.applications.resnet50 import preprocess_input
from keras.utils import np_utils
from tensorflow.keras.layers import Input, UpSampling2D, Flatten, BatchNormalization, Dense, Dropout, GlobalAveragePooling2D
from tensorflow.python.client import device_lib
## check GPU
print(device_lib.list_local_devices())

In [None]:
# load data (first part)
## load shadow-free images
path_wd = '../' ## set working directory
img = cv2.imread(path_wd + 'output/images/shadow_free.jpg')
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) ## transform color channel to RGB
## load LFDP ground-based labels
df = pd.read_csv(path_wd + 'data/labels/LFDP_labels.csv', index_col=0)

In [None]:
# choose parameters to filter the ground labels
THRESH_DIAM = 20 ## diameter threshold
df = df[df.ALIVE == 'A']
df = df[df.DIAM > THRESH_DIAM]
df = df[df.pix_1 < 9600] ## column range
df = df[df.pix_2 < 15000] ## row range
df.index = range(df.shape[0])

In [None]:
# create training set
## resolution 
rs = 100
n_row = img.shape[0] // rs
n_col = img.shape[1] // rs
print(n_row, n_col)
## species list
sp_dict = {'PREMON': ['PREMON', 'ROYBOR'], 'CECSCH': ['CECSCH'], 'MANBID': ['MANBID']}
## create data list
x_mat = []
y_mat = []
## dictionary for location map
loc_dict = {}
## loop over the LFDP labels
for i in range(df.shape[0]):
    loc_1 = df['pix_2'][i] // rs
    loc_2 = df['pix_1'][i] // rs
    loc_id = loc_1 * n_col + loc_2
    if loc_id in loc_dict.keys():
        idd = loc_dict[loc_id]
    else:
        idd = len(y_mat)
        rr_1 = loc_1 * rs
        rr_2 = rr_1 + rs
        cc_1 = loc_2 * rs
        cc_2 = cc_1 + rs
        x_mat.append(img[rr_1:rr_2, cc_1:cc_2])
        y_mat.append([0] * 3)
        loc_dict[loc_id] = idd
    ## assign the labels
    if df['SPECIES'][i] in ['PREMON', 'ROYBOR']:
        y_mat[idd][0] = 1
    elif df['SPECIES'][i] in ['CECSCH']:
        y_mat[idd][1] = 1
    elif df['SPECIES'][i] in ['MANBID']:
        y_mat[idd][2] = 1
## transform the list into numpy array
x_mat = np.array(x_mat)
y_mat = np.array(y_mat)

In [None]:
# optional: rebalance the data set
print('Number of palm patches:', np.sum(y_mat[:, 0]))
print('Number of cecropia patches:', np.sum(y_mat[:, 1]))
print('Number of total patches:', y_mat.shape[0])
REBALANCE = True ## whether rebalance the patches
if REBALANCE ==  True:
    x_list = []
    y_list = []
    for i in range(y_mat.shape[0]):
        if y_mat[i, 0] == 1:
            for j in range(30):
                x_list.append(x_mat[i])
                y_list.append(y_mat[i])
        elif y_mat[i, 1] == 1:
            for j in range(7):
                x_list.append(x_mat[i])
                y_list.append(y_mat[i])
        else:
            x_list.append(x_mat[i])
            y_list.append(y_mat[i])
    x_dat = np.array(x_list)
    y_dat = np.array(y_list)
else:
    x_dat = x_mat
    y_dat = y_mat

In [None]:
# build the model
## load the ResNet model
resnet_model = resnet50.ResNet50(weights='imagenet', include_top=False, input_shape=(100, 100, 3))
## change the training settings
#for layer in resnet_model.layers:
#    if isinstance(layer, BatchNormalization):
#        layer.trainable = True
#    else:
#        layer.trainable = False
## build the model
model = Sequential()
model.add(resnet_model)
model.add(GlobalAveragePooling2D())
model.add(Dense(3, activation='sigmoid'))
## compile the model
opt = keras.optimizers.Adam(learning_rate=1e-3)
model.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy'])

In [None]:
# model selection
## train-validation split
np.random.seed(2020)
n_img = x_dat.shape[0]
loc_train = np.random.choice(n_img, int(n_img * 0.8), replace=False)
loc_val = np.setdiff1d(np.arange(n_img), loc_train)
## start model training
time_start = datetime.now()
print('Start training:', time_start)
#model.fit(x_dat[loc_train], y_dat[loc_train], batch_size=128, epochs=100, validation_data=(x_dat[loc_val], y_dat[loc_val]))
print('Time for model training:', datetime.now()-time_start)

In [None]:
# retrain the model with selected parameters
## build the model
model = Sequential()
model.add(resnet_model)
model.add(GlobalAveragePooling2D())
model.add(Dense(3, activation='sigmoid'))
## compile the model
opt = keras.optimizers.Adam(learning_rate=1e-3)
model.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy'])
## start model training
time_start = datetime.now()
print('Start training:', time_start)
np.random.seed(2020)
model.fit(x_dat, y_dat, batch_size=128, epochs=50, validation_data=(x_dat[loc_val], y_dat[loc_val]))
## save the model
model.save(path_wd + 'output/models/Basic_' + str(THRESH_DIAM))
print('Time for model training:', datetime.now()-time_start)