In [None]:
'''Toolboxs'''
import re
import cv2
import os
from os.path import join
import random
import numpy as np
from glob import glob
from tqdm import tqdm
import matplotlib.pyplot as plt
from functools import partial
from sklearn.model_selection import train_test_split
'''Tensorflow'''
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import callbacks as cb
from tensorflow.keras import backend as K
from tensorflow.keras.layers import Input, Lambda, Dense, Dropout, Conv2D, MaxPooling2D, Flatten,Activation,BatchNormalization
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.applications.efficientnet import preprocess_input,EfficientNetB0

gpus = tf.config.experimental.list_physical_devices(device_type='GPU')
for gpu in gpus:
    tf.config.experimental.set_memory_growth( device=gpu, enable=True)
if len(gpus)>1:
    os.environ["CUDA_VISIBLE_DEVICES"] = f"{len(gpus)-1}"

In [None]:
import numpy as np
import cv2

W,H,CH = 64,64,1

def load_img(path, width = W, hight=H):
    img=cv2.imread(path.numpy().decode())[...,0].astype(np.float32)/255.
#     img = read_pgm(path.numpy().decode()).astype(np.float32)/255.
    min_of_shape = np.min(img.shape[:2])
    oh = (img.shape[0] - min_of_shape) // 2
    ow = (img.shape[1] - min_of_shape) // 2
    center_square = np.array([width,hight])// 2
    new_size=(width,hight)
    
    # cropping + resize
    img = img[oh:oh + min_of_shape, ow:ow + min_of_shape]
    img=np.expand_dims(cv2.resize(img, new_size),-1)
    return tf.constant(img-0.5)
SUFFIX='.jpg'

In [None]:
WAYS = 3
SHOTS=5
QUERIES=WAYS*1
BATCH_SIZE=8

In [None]:
np.random.seed(2021)
tf.random.set_seed(2021)
DATA_ROOT="./kaggle_fungi/source"
# DATA_ROOT=join("..","..","data")
# DATA_ROOT="I:\AIA Small Data\datasets"
all_classes = glob(join(DATA_ROOT,"source","*"))
source_classes,target_classes=train_test_split(all_classes,test_size=0.2)
sorce_len=len(source_classes)
target_len=len(target_classes)
print(f"total {len(all_classes)} classes=source {sorce_len} + target {target_len} classes")

In [None]:
## exclude classes with too few examples
source_classes=[sdir for sdir in source_classes if len(glob(join(sdir,'*'+SUFFIX)))>SHOTS+QUERIES]

sorce_len=len(source_classes)
print(f"source {sorce_len} + target {target_len} classes")

In [None]:
print(target_classes[0])

In [None]:
'''Mapping function for loading'''
map_fun=lambda string: tf.py_function(func=load_img,inp=[string], Tout=tf.float32)
'''Source set中每個人都有一個tf Dataset loader'''
source_sup_sub = [
    tf.data.Dataset.list_files(glob(join(sc,'*'+SUFFIX)), shuffle=True)
    .map(map_fun).cache()
    for sc in source_classes
]
source_q_sub = [
    tf.data.Dataset.list_files(glob(join(sc,'*'+SUFFIX)), shuffle=True)
    .map(map_fun).cache()
    for sc in source_classes
]
'''Target set中每個人都有一個tf Dataset loader'''
target_sup_sub = [
    tf.data.Dataset.list_files(glob(join(sc,'*'+SUFFIX)), shuffle=True)
    .map(map_fun).cache()
    for sc in target_classes
]
target_q_sub = [
    tf.data.Dataset.list_files(glob(join(sc,'*'+SUFFIX)), shuffle=True)
    .map(map_fun).cache()
    for sc in target_classes
]


In [None]:
%%time
for sub in source_sup_sub :
    for x in iter(sub.batch(10)):
        pass
for sub in source_q_sub :
    for x in iter(sub.batch(10)):
        pass

In [None]:
%%time
for sub in target_sup_sub :
    for x in iter(sub.batch(10)):
        pass
for sub in target_q_sub :
    for x in iter(sub.batch(10)):
        pass

In [None]:
def gen(sup_sub,query_sub):
    '''先決定好順序'''
    order=np.random.permutation(len(sup_sub))  
    '''For each task'''
    for tasks in range(len(sup_sub)//WAYS):
        '''從已決定好的順序拉出WAY個人'''
        picked=[sup_sub[tt] for tt in order[WAYS*tasks:WAYS*(tasks+1)]]
        picked_q=[query_sub[tt] for tt in order[WAYS*tasks:WAYS*(tasks+1)]]
        '''support每個人各有SHOTS張照片'''
        support = tf.concat(
            [
                next(
                    iter(
                        sub.batch(SHOTS)
                    )
                    ) for sub in picked
            ]
            , axis=0)
        '''query挑WAY人中的QUERY張，這邊是設定每張屬於不同人，順序不固定'''
        idxs=np.random.choice(range(WAYS), size=QUERIES, replace=False)
        query = tf.concat(
            [
                next(
                    iter(
                        picked_q[idx].batch(1)
                    )
                    ) for idx in idxs
            ]
            , axis=0)
        '''輸出的時候把support跟query接在一起'''
        yield tf.concat([support, query], axis=0), tf.stack([keras.utils.to_categorical(idx,num_classes=WAYS) for idx in idxs], axis=0)

In [None]:
MLUT=16
data_source = tf.data.Dataset.from_generator(partial(gen,source_sup_sub,source_q_sub),
                                    output_types=(tf.float32,tf.float32),
                                    output_shapes=((WAYS*SHOTS+QUERIES,W,H,CH),(QUERIES,WAYS))
                                            ).repeat(MLUT).shuffle(buffer_size=999).cache().batch(BATCH_SIZE).prefetch(MLUT)
data_target = tf.data.Dataset.from_generator(partial(gen,target_sup_sub,target_q_sub),
                                    output_types=(tf.float32,tf.float32),
                                    output_shapes=((WAYS*SHOTS+QUERIES,W,H,CH),(QUERIES,WAYS))
                                            ).repeat(MLUT*4).batch(BATCH_SIZE)


In [None]:
# Embedding function
def conv_net(input_shape):
    convnet = Sequential()
    for i in range(3):
        convnet.add(Conv2D(64,(3,3),padding='valid',input_shape=input_shape))
        convnet.add(BatchNormalization())
        convnet.add(Activation('relu'))
        convnet.add(MaxPooling2D())
    return convnet
def build_relation_network(input_shape):
    seq = Sequential()
    #layer1
    seq.add(Conv2D(64, kernel_size=3, input_shape=input_shape,
                           padding="valid",activation='relu'))
    seq.add(BatchNormalization()) 
    seq.add(MaxPooling2D(pool_size=(2, 2)))  
#     seq.add(Dropout(.1))
    
    seq.add(Flatten())
    seq.add(Dense(8, activation='relu'))
#     seq.add(Dropout(0.1))
    seq.add(Dense(1, activation=None))
    return seq

In [None]:
base_dim = (W,H,CH)
print(base_dim)
base_network = conv_net(base_dim)
# Query feature
x_in=Input(shape=(WAYS*SHOTS+QUERIES,W,H,CH))
latent_s=[base_network(x_in[:,ii]) for ii in range(WAYS*SHOTS)]
latent_q=[base_network(x_in[:,WAYS*SHOTS+ii]) for ii in range(QUERIES)]

relation_net=build_relation_network((latent_q[0].shape[-3],latent_q[0].shape[-2],latent_q[0].shape[-1]*2))
print(latent_q[0].shape[-3],latent_q[0].shape[-2],latent_q[0].shape[-1]*2)
y=[]
for q in latent_q:
    relation_score=[]
    for ww in range(WAYS):
        relation=[relation_net(tf.concat([q,s],-1)) for s in latent_s[ww*SHOTS:(ww+1)*SHOTS]]
        relation_score.append(tf.reduce_mean(tf.concat(relation,-1),-1,keepdims=True))
        
    y.append(tf.nn.softmax(tf.concat(relation_score,-1),-1))
pred=tf.stack(y,1)

model = Model(inputs=x_in, outputs=pred)

In [None]:
lr=0.001
reduce_lr = cb.ReduceLROnPlateau(monitor='val_loss', factor=0.1,patience=6, min_lr=1e-8, verbose=1)
tensorboard = cb.TensorBoard(log_dir="tf_relation_logs")
earlystop=cb.EarlyStopping(monitor='val_loss',patience=10,restore_best_weights=True, verbose=1)
opt = tf.keras.optimizers.Adam(lr=lr)
model.compile(loss=CategoricalCrossentropy(), optimizer=opt, metrics=['acc'])

In [None]:
EPOCHS=100
# %%time
try:
    model.fit(data_source,
              epochs=EPOCHS, verbose=1,workers=4,
              callbacks=[reduce_lr,earlystop, tensorboard],
              validation_data=data_source)
except KeyboardInterrupt:
    print("KeyboardInterrupt") 

In [None]:
model.evaluate(data_target, verbose=2)

In [None]:
for tasks in range(len(test_query)):
    picked =[test_support[tt] for tt in range(3*tasks,3*(tasks+1))]
    picked_q=[test_query[tt] for tt in range(3*tasks,tasks+1)]
    imgs_t =  tf.concat(
        [
        next(
            iter(
                sub.batch(SHOTS+1)) #.prefetch(tf.data.AUTOTUNE))
            )
            for sub in picked
        ]
        , axis=0)
    idxs=np.random.choice(range(WAYS), size=QUERIES, replace=False)
    imgs_q =  tf.concat(
        [
    next(
        iter(
            sub.batch(SHOTS)) #.prefetch(tf.data.AUTOTUNE))
        )
        for sub in picked_q
        ]
        , axis=0)
    print(imgs_q.dtype)

In [None]:
def gen_test(sup_sub,query_sub):
    '''先決定好順序'''
#     for tasks in range(len(test_query)):
#         picked =[test_support[tt] for tt in range(3*tasks,3*(tasks+1))]
#         picked_q=[test_query[tt] for tt in range(3*tasks,tasks+1)]
#         imgs_t =  tf.concat(
#             [
#             next(
#                 iter(
#                     sub.batch(SHOTS+1)) #.prefetch(tf.data.AUTOTUNE))
#                 )
#                 for sub in picked
#             ]
#             , axis=0)
#         idxs=np.random.choice(range(WAYS), size=QUERIES, replace=False)
#         imgs_q =  tf.concat(
#             [
#         next(
#             iter(
#                 sub.batch(SHOTS)) #.prefetch(tf.data.AUTOTUNE))
#             )
#             for sub in picked_q
#             ]
#             , axis=0)
#         yield tf.concat([imgs_t, imgs_q], axis=0)

    order=np.random.permutation(len(sup_sub))  
#     '''For each task'''
    for tasks in range(len(sup_sub)//WAYS):
        '''從已決定好的順序拉出WAY個人'''
        picked=[sup_sub[tt] for tt in order[WAYS*tasks:WAYS*(tasks+1)]]
        picked_q=[query_sub[tt] for tt in order[WAYS*tasks:WAYS*(tasks+1)]]
        '''support每個人各有SHOTS張照片'''
        support = tf.concat(
            [
                next(
                    iter(
                        sub.batch(SHOTS)
                    )
                    ) for sub in picked
            ]
            , axis=0)
        '''query挑WAY人中的QUERY張，這邊是設定每張屬於不同人，順序不固定'''
        idxs=np.random.choice(range(WAYS), size=QUERIES, replace=False)
        
        query = tf.concat(
            [
                next(
                    iter(
                        sub.batch(SHOTS)
                       
                    )
                    ) for sub in picked_q
            ]
            , axis=0)
        '''輸出的時候把support跟query接在一起'''
    yield tf.concat([imgs_t, imgs_q], axis=0)#, tf.stack([keras.utils.to_categorical(idx,num_classes=WAYS) for idx in idxs], axis=0)

In [None]:
data_test = tf.data.Dataset.from_generator(partial(gen_test,test_support,test_query),
                        output_types=(tf.float32,tf.float32),
                        output_shapes=((WAYS*SHOTS+QUERIES,W,H,CH),(QUERIES,WAYS))
                                ).repeat(MLUT*4).batch(BATCH_SIZE)

In [None]:

# # 預測結果
all_result = []
for batch in data_test.batch(50):
    pre = model.predict(batch)
    print(pre.shape)
    for i in pre:
        all_result.extend(np.argmax(i, axis=1))

df = pd.read_csv('SampleSubmission2.csv')
df['ans'] = all_result
df.to_csv('Submission2.csv', index=False)
print('Done.')


In [None]:
import pandas as pd
# read kaggle test1.csv file row by row
ans = pd.DataFrame(columns=['filename', 'ans'])
import csv
file_name =[]
support_list = []

MLUT=16
with open (r'C:/Users/2103099/Documents/kaggle_fungi/test2.csv',newline='') as csvfile:
    rows = csv.reader(csvfile)
    print(rows)

    for i, row in enumerate(rows):
#         print(i)
#        
        if i == 0:
            continue
        
        # restore model weights at the begining of adaptation
       # model.set_weights(w)
# abs
        imgs, labels = (row[0],row[1:4])
        hw_file_name = row[0]
       
        hw_support_list = row[1:4]
#         support_list.append(hw_support_list)
        test_data_path = ["C:/Users/2103099/Documents/kaggle_fungi/target_s/target_s/"+i+"/" for i in hw_support_list]
        support_list.append(test_data_path)
        test_img = "C:/Users/2103099/Documents/kaggle_fungi/target_q/target_q/"+hw_file_name
        file_name.append(test_img)
        
        
        test_support = [
        tf.data.Dataset.list_files(os.path.join(sc,'*.JPG'), shuffle=True)
        .map(map_fun)for sc in test_data_path]
        
#         print(f'len(test_support): {len(test_support)}')
        
        test_query = [
        tf.data.Dataset.list_files(test_img, shuffle=False)
        .map(map_fun)
        #for sc in test_img
        ]
#         print(f'len(test_query): {len(test_query)}')
     


        
        # # 預測結果
        all_result = []
        for batch in data_test.batch(10):
            print(batch)
            pre = model.predict(batch)
            print(pre.shape)
            for i in pre:
                all_result.extend(np.argmax(i, axis=1))
            print(all_result)
        df = pd.read_csv('SampleSubmission2.csv')
        df['ans'] = all_result
        df.to_csv('Submission2.csv', index=False)
        print('Done.')

        
        
        
#         for task in range(len(test_support)):
#             print(f'test: {task}')
#     # 每一筆test 有 三個類別
#             print(f'test: {task}')

In [None]:
# print(file_name[0])
# ./kaggle_fungi/source\source\14155_Ganoderma_adspersum
# print(support_list[0])
# for sc in support_list:
#     print(sc)
#     for i in sc:
test_ans = [ sc for sc in file_name ]
print(len(test_ans))


In [None]:
'''Mapping function for loading'''
map_fun=lambda string: tf.py_function(func=load_img,inp=[string], Tout=tf.float32)

'''Test set中每個人都有一個tf Dataset loader'''

test_support = [
tf.data.Dataset.list_files(os.path.join(i,'*.JPG'), shuffle=True)
.map(map_fun) for i in sc for sc in support_list]

# test_ans = [[i  for i in sc] for sc in support_list]

test_query = [
    tf.data.Dataset.list_files(sc, shuffle=False)
    .map(map_fun) for sc in file_name
]

# print(test_support)

In [None]:
print(f'len(test_support): {len(test_support)}')

print(f'len(test_query): {len(test_query)}')

In [None]:
for tasks in range(len(test_query)):
    picked=[test_support[tt] for tt in range(3*tasks,3*(tasks+1))]
    imgs_t =  tf.concat(
            [
                next(
                    iter(
                        sub.batch(SHOTS+1)) #.prefetch(tf.data.AUTOTUNE))
                    )
                    for sub in picked
            ]
            , axis=0)
#     print(imgs_t)
    
    picked_q=[test_query[tt] for tt in range(3*tasks,tasks+1)]
    imgs_q =  tf.concat(
        [
            next(
                iter(
                    sub.batch(SHOTS)) #.prefetch(tf.data.AUTOTUNE))
                )
                for sub in picked_q
        ]
        , axis=0)
#     print(imgs_q.shape)
#     idxs=np.random.choice(range(3), size=1, replace=False)
#     imgs_q = tf.concat(
#         [
#             next(
#                 iter(
#                     picked_q[idx].batch(1)
#                 )
#                 ) for idx in idxs
#         ]
#         , axis=0)
#     '''輸出的時候把support跟query接在一起'''
#     tf_con = tf.concat([imgs_s, imgs_q], axis=0)
#     yield tf.concat([imgs_s, imgs_q], axis=0)

In [None]:
def gen2(test_support,test_query):
    '''先決定好順序'''
    order=np.random.permutation(len(sup_sub))  
#     '''For each task'''
    for tasks in range(len(sup_sub)//WAYS):
        '''從已決定好的順序拉出WAY個人'''
        picked=[sup_sub[tt] for tt in order[WAYS*tasks:WAYS*(tasks+1)]]
        picked_q=[query_sub[tt] for tt in order[WAYS*tasks:WAYS*(tasks+1)]]
        '''support每個人各有SHOTS張照片'''
        support = tf.concat(
            [
                next(
                    iter(
                        sub.batch(SHOTS)
                    )
                    ) for sub in picked
            ]
            , axis=0)
        '''query挑WAY人中的QUERY張，這邊是設定每張屬於不同人，順序不固定'''
        idxs=np.random.choice(range(WAYS), size=QUERIES, replace=False)
        query = tf.concat(
            [
                next(
                    iter(
                        picked_q[idx].batch(1)
                    )
                    ) for idx in idxs
            ]
            , axis=0)
        '''輸出的時候把support跟query接在一起'''
        yield tf.concat([support, query], axis=0), tf.stack([keras.utils.to_categorical(idx,num_classes=WAYS) for idx in idxs], axis=0)

In [None]:
        
        test_support = [
        tf.data.Dataset.list_files(os.path.join(sc,'*.JPG'), shuffle=True)
        .map(map_fun)
        for sc in test_data_path]
        print(f'len(test_support): {len(test_support)}')
        
        test_query = [
        tf.data.Dataset.list_files(test_img, shuffle=False)
        .map(map_fun)
        #for sc in test_img
        ]