In [1]:
import shutil, os
from glob import glob
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

In [12]:
# Define params
WAYS=3
SHOTS=5
SRC_SAMPLES=10
SRC_THR=80
TGT_SAMPLES=150

In [3]:
# List all classes
all_classes = glob('/home/jovyan/data/fungi/images/*')
print(f"total {len(all_classes)} classes")

total 1394 classes


In [4]:
# List length of each class
data_per_class=[len(glob(sdir+'/*.JPG')) for sdir in all_classes]
print("Min",min(data_per_class),",Max",max(data_per_class))

Min 6 ,Max 442


In [5]:
# Sort all the classes by ID
sorted_classID=np.array(sorted([(l,e) for e,l in enumerate(data_per_class)]))

In [6]:
# Total usable classes
sum(sorted_classID[:,0]>=SRC_THR)

406

In [7]:
# Total classes that can be used as target
sum(sorted_classID[:,0]>=TGT_SAMPLES)

205

In [9]:
# Filter unusable classes
sorted_classID=sorted_classID[sorted_classID[:,0]>=SRC_THR,:]
print(f"total {len(sorted_classID)} classes")

total 406 classes


In [13]:
# Partition classes for meta training and meta testing
sourceID=sorted_classID[sorted_classID[:,0]<TGT_SAMPLES,1]
targetID=sorted_classID[sorted_classID[:,0]>=TGT_SAMPLES,1]
print(f"total {len(sorted_classID)} classes=source {len(sourceID)} + target {len(targetID)} classes")

total 406 classes=source 293 + target 113 classes


In [14]:
# Assign source and target class directories
source_classes=np.array(all_classes)[sourceID]
target_classes=np.array(all_classes)[targetID]

In [15]:
# Creating source dataset in a directory
np.random.seed(2021)
for class_dir in source_classes:
    label_name=os.path.basename(class_dir)
    # List all files
    label_pics=glob(os.path.join(class_dir,"*.JPG"))
    # Pick some pictures
    label_pics=np.random.choice(label_pics,size=SRC_SAMPLES,replace=False)
    # Copy those pictures
    class_dir=os.path.join("source",label_name)
    os.makedirs(class_dir,exist_ok = True)
    for f in label_pics:
        shutil.copy(src=f, dst=class_dir)

In [17]:
Q_SAMPLES=2000//len(targetID)
print(Q_SAMPLES)

17


In [18]:
# Creating Target in a directory

np.random.seed(2021)
for class_dir in target_classes:
    label_name=os.path.basename(class_dir)
    # copy some pics to new dir
    label_pics=glob(os.path.join(class_dir,"*.JPG"))
    label_pics=np.random.choice(label_pics,size=Q_SAMPLES+SHOTS,replace=False)
    class_dir=os.path.join("target_support",label_name)
    os.makedirs(class_dir,exist_ok = True)
    for f in label_pics:
        shutil.copy(src=f, dst=class_dir)

In [19]:
# Move those data and record tasks in a list
target_ans=pd.DataFrame(columns=['filename']+[f'source{i:.0f}' for i in range(WAYS)]+["ans"])
all_tgt_classes = glob('target_support/*')
os.makedirs("target_query",exist_ok = True)
np.random.seed(2021)
for cls_id,class_dir in enumerate(all_tgt_classes):
    label_name=os.path.basename(class_dir)
    label_pics=glob(os.path.join(class_dir,"*.JPG"))
    label_pics=np.random.choice(label_pics,size=Q_SAMPLES,replace=False)
    for pics in label_pics:
        ## pick classes exept query class
        other_classes=np.random.choice(all_tgt_classes[:cls_id]+all_tgt_classes[cls_id+1:],
                                       size=WAYS-1,
                                       replace=False)
        classes_in_task=np.random.permutation([os.path.basename(dir_name) for dir_name in other_classes]+[label_name])
        label=list(classes_in_task).index(label_name)
        filename=os.path.basename(pics)
        shutil.move(pics,"target_query")
        sample_frame=pd.DataFrame([[filename,*list(classes_in_task),label]],
                                  columns=['filename']+[f'source{i:.0f}' for i in range(WAYS)]+["ans"])
        target_ans=target_ans.append(sample_frame)
target_ans=target_ans.reset_index(drop=True)

In [20]:
target_ans.to_csv("test.csv",index=False)