In [1]:

import re
import cv2
import os
from os.path import join
import random
import numpy as np
from glob import glob
from tqdm import tqdm
import matplotlib.pyplot as plt
from functools import partial
from sklearn.model_selection import train_test_split
import tensorflow as tf
import pandas as pd
from tensorflow import keras
from tensorflow.keras import callbacks as cb
from tensorflow.keras import backend as K
from tensorflow.keras.layers import Input, Lambda, Dense, Dropout, Conv2D, MaxPooling2D, Flatten, Activation, \
    BatchNormalization
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.applications.efficientnet import preprocess_input, EfficientNetB0

gpus = tf.config.experimental.list_physical_devices(device_type='GPU')
for gpu in gpus:
    tf.config.experimental.set_memory_growth(device=gpu, enable=True)
if len(gpus) > 1:
    os.environ["CUDA_VISIBLE_DEVICES"] = f"{len(gpus) - 1}"
os.environ['TF_CPP_MIN_LOG_LEVEL'] = "2"

In [2]:
W, H, CH = 64, 64, 1


def load_img(path, width=W):
    img = cv2.imread(path.numpy().decode())[..., 0].astype(np.float32) / 255.
    shape_dst = np.min(img.shape[:2])
    oh = (img.shape[0] - shape_dst) // 2
    ow = (img.shape[1] - shape_dst) // 2
    center_square = np.array([width, width]) // 2
    new_size = (width, width)

    # cropping + resize
    img = img[oh:oh + shape_dst, ow:ow + shape_dst]
    img = np.expand_dims(cv2.resize(img, new_size), -1)
    return tf.constant(img)


SUFFIX = '.JPG'


In [3]:


WAYS = 3
SHOTS = 5
QUERIES = 1
BATCH_SIZE = 8

np.random.seed(2021)
tf.random.set_seed(2021)

In [4]:

DATA_ROOT="./kaggle_fungi/source"
# DATA_ROOT=join("..","..","data")
# DATA_ROOT="I:\AIA Small Data\datasets"
source_classes = glob(join(DATA_ROOT,"source","*"))
# source_classes,target_classes=train_test_split(all_classes,test_size=0.2)
sorce_len=len(source_classes)
# target_len=len(target_classes)
# print(f"total {len(all_classes)} classes=source {sorce_len} + target {target_len} classes")
print(f"source: {sorce_len}")


source: 300


In [5]:

# 建立每個人的loader，隨意從每個人的圖片中抽取 (尚未指定張數)
# '''Mapping function for loading'''
map_fun = lambda string: tf.py_function(func=load_img, inp=[string], Tout=tf.float32)
# '''Source set中每個人都有一個tf Dataset loader'''
source_sup_sub = [
    tf.data.Dataset.list_files(glob(join(sc, '*' + SUFFIX)), shuffle=True)
        .map(map_fun).cache()
    for sc in source_classes
]

# **將所有可能run過一遍，讓cache記得**
for sub in source_sup_sub:
    for x in iter(sub.batch(10)):
        pass

In [6]:

# 建立每個task的loader，隨意從WAY個人抽取SHOT張，並再隨機指派每個way的class是從0~WAY-1的哪一個
def gen(all_sub):
    order = np.random.permutation(len(all_sub))
    '''For each task'''
    for tasks in range(len(all_sub) // WAYS):
        '''從已決定好的順序拉出WAY個人'''
        picked = [all_sub[tt] for tt in order[WAYS * tasks:WAYS * (tasks + 1)]]
        '''support每個人各有SHOTS張照片'''
        support = tf.concat(
            [
                next(
                    iter(
                        sub.batch(SHOTS)
                    )
                ) for sub in picked
            ]
            , axis=0)
        '''query挑WAY人中的QUERY張，這邊是設定每張屬於不同人，順序不固定'''
        idxs = np.random.choice(range(WAYS), size=QUERIES, replace=False)
        query = tf.concat(
            [
                next(
                    iter(
                        picked[idx].batch(1)
                    )
                ) for idx in idxs
            ]
            , axis=0)
        '''輸出的時候把support跟query接在一起'''
        yield tf.concat([support, query], axis=0), \
              tf.stack([keras.utils.to_categorical(idx, num_classes=WAYS) for idx in idxs], axis=0)



In [19]:

def gen_test():
    # # 歷遍test 資料 (2200)
    for i in range(len(df)):
        # 每一筆test 有 三個類別
        print(f'test: {i}')
        row_data = df.iloc[i]
        ts_sup_sub = []
        for index, k in enumerate(['support_0', 'support_1', 'support_2']):
            md = tf.data.Dataset.list_files(
                glob(join('target_s', row_data[k], '*' + SUFFIX)), shuffle=True).map(map_fun).cache()
            ts_sup_sub.append(md)

        ts_target_sub = tf.data.Dataset.list_files(
            os.path.join('target_q', row_data['filename'])).map(map_fun)

        for sub in ts_sup_sub:
            for x in iter(sub.batch(5)):
                pass
        # # Support
        data = []
        for sub in ts_sup_sub:
            data.append(next(iter(sub.batch(SHOTS))))
        support = tf.concat(data, axis=0)
        # Query
        query = next(iter(ts_target_sub.batch(SHOTS)))
        yield tf.concat([support, query], axis=0)


In [8]:

# 建立data generator，可以一次抽一個meta batch的資料
# 1. 要input一個function(這邊用partial產生一個預先設定好參數的function)
# 1. output types要對好前面的格式
# 2. output shape要給對

MLUT = 16
data_source = tf.data.Dataset.from_generator(
    partial(gen, source_sup_sub),
    output_types=(tf.float32, tf.float32),
    output_shapes=((WAYS * SHOTS + QUERIES, W, H, CH), (QUERIES, WAYS))
).repeat(MLUT).shuffle(buffer_size=999).cache().batch(BATCH_SIZE).prefetch(MLUT)


data_target = tf.data.Dataset.from_generator(
    gen_test,
    output_types=(tf.float32),
    output_shapes=(WAYS * SHOTS + QUERIES, W, H, CH))

In [9]:
def conv_net(input_shape):
    convnet = Sequential()
    for i in range(3):
        convnet.add(Conv2D(64, (3, 3), padding='valid', input_shape=input_shape))
        convnet.add(BatchNormalization())
        convnet.add(Activation('relu'))
        convnet.add(MaxPooling2D())
    return convnet


def build_relation_network(input_shape):
    seq = Sequential()
    # layer1
    seq.add(Conv2D(64, kernel_size=3, input_shape=input_shape,
                   padding="valid", activation='relu'))
    seq.add(BatchNormalization())
    seq.add(MaxPooling2D(pool_size=(2, 2)))
    #     seq.add(Dropout(.1))

    seq.add(Flatten())
    seq.add(Dense(8, activation='relu'))
    #     seq.add(Dropout(0.1))
    seq.add(Dense(1, activation=None))
    return seq

In [11]:
base_dim = (W, H, CH)
base_network = conv_net(base_dim)
# Query feature
x_in = Input(shape=(WAYS * SHOTS + QUERIES, W, H, CH))
latent_s = [base_network(x_in[:, ii]) for ii in range(WAYS * SHOTS)]
latent_q = [base_network(x_in[:, WAYS * SHOTS + ii]) for ii in range(QUERIES)]

relation_net = build_relation_network((latent_q[0].shape[-3], latent_q[0].shape[-2], latent_q[0].shape[-1] * 2))

y = []
for q in latent_q:
    relation_score = []
    for ww in range(WAYS):
        relation = [relation_net(tf.concat([q, s], -1)) for s in latent_s[ww * SHOTS:(ww + 1) * SHOTS]]
        relation_score.append(tf.reduce_mean(tf.concat(relation, -1), -1, keepdims=True))

    y.append(tf.nn.softmax(tf.concat(relation_score, -1), -1))
pred = tf.stack(y, 1)


In [12]:

model = Model(inputs=x_in, outputs=pred)

lr = 0.001
reduce_lr = cb.ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=6, min_lr=1e-8, verbose=1)
tensorboard = cb.TensorBoard(log_dir="tf_relation_logs")
earlystop = cb.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True, verbose=1)
opt = tf.keras.optimizers.Adam(lr=lr)
model.compile(loss=CategoricalCrossentropy(), optimizer=opt, metrics=['acc'])

In [14]:

EPOCHS = 20  # 400
try:
    model.fit(data_source,
              epochs=EPOCHS, verbose=1, workers=4,
              callbacks=[reduce_lr, earlystop, tensorboard],
              validation_data=data_source)
except KeyboardInterrupt:
    print("KeyboardInterrupt")


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [18]:

df = pd.read_csv('./kaggle_fungi/test2.csv')
df

Unnamed: 0,filename,support_0,support_1,support_2
0,TS2013PIC17437389.JPG,10000_Abortiporus_biennis,15888_Lactarius_pallidus,10065_Agaricus_campestris
1,TS2010PIC72338205.JPG,10994_Basidioradulum_radula,15888_Lactarius_pallidus,11085_Xerocomellus_porosporus
2,TS2016-9169441_H1qNFt0u.JPG,11086_Xerocomellus_pruinatus,11095_Hortiboletus_rubellus,15888_Lactarius_pallidus
3,MC2017-9207438_B1nMv9EFW.JPG,11202_Buglossoporus_quercinus,11234_Calocera_viscosa,15888_Lactarius_pallidus
4,MAV2017-9203941_HkwZOvmuZ.JPG,11384_Ceratiomyxa_fruticulosa,15888_Lactarius_pallidus,11573_Chalciporus_piperatus
...,...,...,...,...
2195,MC2017-9216120_B1rV9k4oW.JPG,21155_Tricholoma_ustale,14667_Hericium_coralloides,14778_Hydnellum_concrescens
2196,EAT2017-9205185_BklapgTOOW.JPG,21155_Tricholoma_ustale,14799_Hydnum_rufescens,14815_Hygrocybe_acutoconica
2197,BWP2012PIC41205195.JPG,14849_Gliophorus_laetus,14845_Hygrocybe_insipida,21155_Tricholoma_ustale
2198,TRL2015PIC35525891.JPG,14863_Hygrocybe_punicea,21155_Tricholoma_ustale,14859_Cuphophyllus_pratensis


In [20]:

df = pd.read_csv('./kaggle_fungi/test2.csv')

# # 預測結果
all_result = []
for batch in data_target.batch(50):
    pre = model.predict(batch)
    print(pre.shape)
    for i in pre:
        all_result.extend(np.argmax(i, axis=1))
        print(allresult)
# df = pd.read_csv('SampleSubmission2.csv')
# df['ans'] = all_result
# df.to_csv('Submission2.csv', index=False)
# print('Done.')

test: 0


UnknownError: InvalidArgumentError: Expected 'tf.Tensor(False, shape=(), dtype=bool)' to be true. Summarized data: b'No files matched pattern: '
Traceback (most recent call last):

  File "C:\ProgramData\Anaconda3\envs\AIA_Small_Data\lib\site-packages\tensorflow\python\ops\script_ops.py", line 244, in __call__
    ret = func(*args)

  File "C:\ProgramData\Anaconda3\envs\AIA_Small_Data\lib\site-packages\tensorflow\python\autograph\impl\api.py", line 302, in wrapper
    return func(*args, **kwargs)

  File "C:\ProgramData\Anaconda3\envs\AIA_Small_Data\lib\site-packages\tensorflow\python\data\ops\dataset_ops.py", line 827, in generator_py_func
    values = next(generator_state.get_iterator(iterator_id))

  File "<ipython-input-7-904c860548b0>", line 10, in gen_test
    glob(join('target_s', row_data[k], '*' + SUFFIX)), shuffle=True).map(map_fun).cache()

  File "C:\ProgramData\Anaconda3\envs\AIA_Small_Data\lib\site-packages\tensorflow\python\data\ops\dataset_ops.py", line 1115, in list_files
    condition, [message], summarize=1, name="assert_not_empty")

  File "C:\ProgramData\Anaconda3\envs\AIA_Small_Data\lib\site-packages\tensorflow\python\util\dispatch.py", line 201, in wrapper
    return target(*args, **kwargs)

  File "C:\ProgramData\Anaconda3\envs\AIA_Small_Data\lib\site-packages\tensorflow\python\util\tf_should_use.py", line 247, in wrapped
    return _add_should_use_warning(fn(*args, **kwargs),

  File "C:\ProgramData\Anaconda3\envs\AIA_Small_Data\lib\site-packages\tensorflow\python\ops\control_flow_ops.py", line 158, in Assert
    (condition, "\n".join(data_str)))

tensorflow.python.framework.errors_impl.InvalidArgumentError: Expected 'tf.Tensor(False, shape=(), dtype=bool)' to be true. Summarized data: b'No files matched pattern: '


	 [[{{node PyFunc}}]]