# Data Generating and Preprocessing

In [None]:
# !git clone https://github.com/jinnyjinny/MetaPattern_FAS.git

In [None]:
# !wget --no-check-certificate \
# https://storage.googleapis.com/mledu-datasets/cats_and_dogs_filtered.zip \
# -O ./cats_and_dogs_filtered.zip

In [1]:
import os
import zipfile

local_zip = './cats_and_dogs_filtered.zip'
zip_ref = zipfile.ZipFile(local_zip, 'r')
zip_ref.extractall('./')
zip_ref.close()

In [2]:
base_dir = './cats_and_dogs_filtered'
train_dir = os.path.join(base_dir, 'train')
validation_dir = os.path.join(base_dir, 'validation')

# Directory with our training cat pictures
train_cats_dir = os.path.join(train_dir, 'cats')

# Directory with our training dog pictures
train_dogs_dir = os.path.join(train_dir, 'dogs')

# Directory with our validation cat pictures
validation_cats_dir = os.path.join(validation_dir, 'cats')

# Directory with our validation dog pictures
validation_dogs_dir = os.path.join(validation_dir, 'dogs')

In [3]:
train_cat_fnames = os.listdir(train_cats_dir)
print(train_cat_fnames[:10])

train_dog_fnames = os.listdir(train_dogs_dir)
train_dog_fnames.sort()
print(train_dog_fnames[:10])

['cat.686.jpg', 'cat.280.jpg', 'cat.441.jpg', 'cat.837.jpg', 'cat.917.jpg', 'cat.575.jpg', 'cat.385.jpg', 'cat.212.jpg', 'cat.520.jpg', 'cat.60.jpg']
['dog.0.jpg', 'dog.1.jpg', 'dog.10.jpg', 'dog.100.jpg', 'dog.101.jpg', 'dog.102.jpg', 'dog.103.jpg', 'dog.104.jpg', 'dog.105.jpg', 'dog.106.jpg']


In [4]:
import os
import shutil
from sklearn.model_selection import train_test_split

def split_cat_dog_dataset(data_path, output_path):
    cat_folder = os.path.join(data_path, "cats")
    dog_folder = os.path.join(data_path, "dogs")

    if not os.path.exists(cat_folder) or not os.path.exists(dog_folder):
        print(f"Error: {cat_folder} or {dog_folder} does not exist.")
        return

    # Collect image paths
    cat_files = [os.path.join(cat_folder, f) for f in os.listdir(cat_folder) if f.endswith(".jpg")]
    dog_files = [os.path.join(dog_folder, f) for f in os.listdir(dog_folder) if f.endswith(".jpg")]

    print(f"Found {len(cat_files)} cat images and {len(dog_files)} dog images.")

    # Split data into subsets
    train_cats, tgt_cats = train_test_split(cat_files, test_size=0.25, random_state=42)
    src1_cats, src2_cats = train_test_split(train_cats, test_size=0.5, random_state=42)
    src3_cats, src1_cats = train_test_split(src1_cats, test_size=0.5, random_state=42)

    train_dogs, tgt_dogs = train_test_split(dog_files, test_size=0.25, random_state=42)
    src1_dogs, src2_dogs = train_test_split(train_dogs, test_size=0.5, random_state=42)
    src3_dogs, src1_dogs = train_test_split(src1_dogs, test_size=0.5, random_state=42)

    # Save subsets
    subsets = {
        "src1_train_spoofing": src1_cats,
        "src1_train_live": src1_dogs,
        "src2_train_spoofing": src2_cats,
        "src2_train_live": src2_dogs,
        "src3_train_spoofing": src3_cats,
        "src3_train_live": src3_dogs,
    }

    # tgt 통합 데이터셋
    tgt_files = tgt_cats + tgt_dogs  # Combine spoofing (cats) and live (dogs)
    tgt_dir = os.path.join(output_path, "src4_test")
    os.makedirs(tgt_dir, exist_ok=True)

    # Helper function to determine label
    def get_label(file_path):
        if "cat" in file_path.split('/')[-1].lower():
            return "spoof"
        elif "dog" in file_path.split('/')[-1].lower():
            return "live"
        return None  # If file doesn't match expected labels

    # Create output directories and save files
    for subset_name, subset_files in subsets.items():
        subset_dir = os.path.join(output_path, subset_name)
        os.makedirs(subset_dir, exist_ok=True)

        # Save image files in the corresponding folder
        new_file_paths = []
        for file_path in subset_files:
            label = get_label(file_path)
            if label is None:
                continue  # Skip files that don't match expected labels

            new_file_name = f"{label}_{os.path.basename(file_path)}"
            new_path = os.path.join(subset_dir, new_file_name)
            shutil.copy(file_path, new_path)
            new_file_paths.append(new_path)  # Save new path for the txt file

    # Save tgt dataset as a single list
    tgt_file_paths = []
    for file_path in tgt_files:
        label = get_label(file_path)
        if label is None:
            continue  # Skip files that don't match expected labels

        new_file_name = f"{label}_{os.path.basename(file_path)}"
        new_path = os.path.join(tgt_dir, new_file_name)
        shutil.copy(file_path, new_path)
        tgt_file_paths.append(new_path)

    print("Dataset split and saved at:", output_path)

# Paths
original_data_path = "./cats_and_dogs_filtered/train"
output_data_path = "./MetaPattern_FAS/data/FAS_data"

# Split the dataset
split_cat_dog_dataset(original_data_path, output_data_path)

Found 1000 cat images and 1000 dog images.
Dataset split and saved at: ./MetaPattern_FAS/data/FAS_data


In [5]:
import os
import cv2

def make_data_list(input_data_path=None, output_data_path=None):
    """
    Generate labeled data list files for face anti-spoofing dataset.

    Args:
        input_data_path (str): Path to the input dataset directory.
        output_data_path (str): Path to save the output data list files.
    """
    if input_data_path is None:
        input_data_path = "FAS_data"
    if output_data_path is None:
        output_data_path = "data_list"

    # Use input folder name as prefix
    prefix = os.path.basename(os.path.normpath(input_data_path))

    # Ensure output directory exists
    os.makedirs(output_data_path, exist_ok=True)

    for root, _, files in os.walk(input_data_path):  # Search all directories under input_data_path
        for file in files:
            if file.endswith(".jpg"):
                file_path = os.path.abspath(os.path.join(root, file))  # Convert to absolute path
                height, width, channels = 0, 0, 0

                # Read image dimensions
                img = cv2.imread(file_path)
                if img is not None:
                    height, width, channels = img.shape

                # Determine list file name and label
                list_file = None
                if "test" in root.lower():
                    if "live" in root.lower():
                        list_file = f"{prefix}-TEST-REAL.txt"
                        face_label = "0"
                    elif "spoof" in root.lower():
                        list_file = f"{prefix}-TEST-FAKE.txt"
                        face_label = "1"
                    else:
                        list_file = f"{prefix}-TEST-ALL.txt"
                        face_label = "1" if "spoof" in file.lower() else "0"
                elif "train" in root.lower() or "dev" in root.lower():
                    if "live" in root.lower():
                        list_file = f"{prefix}-TRAIN-REAL.txt"
                        face_label = "0"
                    elif "spoof" in root.lower():
                        list_file = f"{prefix}-TRAIN-FAKE.txt"
                        face_label = "1"
                    else:
                        list_file = f"{prefix}-TRAIN-ALL.txt"
                        face_label = "1" if "spoof" in file.lower() else "0"

                # Skip if list_file is not assigned
                if list_file is None:
                    continue

                # Create the full path for the list file
                list_file_path = os.path.abspath(os.path.join(output_data_path, list_file))

                # Append data to the list file
                with open(list_file_path, "a") as f:
                    f.write(f"{file_path},{face_label},{height},{width}\n")

    print(f"Data lists for {prefix} created in: {output_data_path}")
    return True

# 반복적으로 호출하여 하위 폴더 처리
def process_all_folders(root_folder, output_data_path):
    """
    Process all subfolders in the root folder to create data lists.

    Args:
        root_folder (str): Root directory containing subfolders to process.
        output_data_path (str): Path to save the output data list files.
    """
    for sub_folder in os.listdir(root_folder):
        sub_folder_path = os.path.join(root_folder, sub_folder)
        if os.path.isdir(sub_folder_path):  # Check if it's a directory
            print(f"Processing folder: {sub_folder_path}")
            make_data_list(input_data_path=sub_folder_path, output_data_path=output_data_path)


process_all_folders(root_folder='./MetaPattern_FAS/data/FAS_data', output_data_path='./MetaPattern_FAS/data/data_list')

Processing folder: ./MetaPattern_FAS/data/FAS_data/src3_train_spoofing
Data lists for src3_train_spoofing created in: ./MetaPattern_FAS/data/data_list
Processing folder: ./MetaPattern_FAS/data/FAS_data/src2_train_spoofing
Data lists for src2_train_spoofing created in: ./MetaPattern_FAS/data/data_list
Processing folder: ./MetaPattern_FAS/data/FAS_data/src2_train_live
Data lists for src2_train_live created in: ./MetaPattern_FAS/data/data_list
Processing folder: ./MetaPattern_FAS/data/FAS_data/src1_train_spoofing
Data lists for src1_train_spoofing created in: ./MetaPattern_FAS/data/data_list
Processing folder: ./MetaPattern_FAS/data/FAS_data/src3_train_live
Data lists for src3_train_live created in: ./MetaPattern_FAS/data/data_list
Processing folder: ./MetaPattern_FAS/data/FAS_data/src4_test
Data lists for src4_test created in: ./MetaPattern_FAS/data/data_list
Processing folder: ./MetaPattern_FAS/data/FAS_data/src1_train_live
Data lists for src1_train_live created in: ./MetaPattern_FAS/da

# Meta Pattern model training

In [1]:
import os
os.chdir("./MetaPattern_FAS")
print("Current working directory:", os.getcwd())

Current working directory: /workspace/meta-learning/FAS/MetaPattern_FAS


In [None]:
from models.HFN_MP import custom_config

# Config 초기화
config = custom_config.get_cfg_custom()
# 필요에 따라 Config 수정

# ========================FOR DATA=========================
config.DATA.ROOT_DIR = "/root/Desktop/workspace/meta-learning/FAS/MetaPattern_FAS/data"
dl_root_dir = "/root/Desktop/workspace/meta-learning/FAS/MetaPattern_FAS/data/data_list"
config.DATA.TARGET_DATA = dl_root_dir + "/src4_test-TEST-ALL.txt"
config.DATA.TRAIN_SRC_FAKE_1 = dl_root_dir + "/src1_train_spoofing-TRAIN-FAKE.txt"
config.DATA.TRAIN_SRC_FAKE_2 = dl_root_dir + "/src2_train_spoofing-TRAIN-FAKE.txt"
config.DATA.TRAIN_SRC_FAKE_3 = dl_root_dir + "/src3_train_spoofing-TRAIN-FAKE.txt"
config.DATA.TRAIN_SRC_REAL_1 = dl_root_dir + "/src1_train_live-TRAIN-REAL.txt"
config.DATA.TRAIN_SRC_REAL_2 = dl_root_dir + "/src2_train_live-TRAIN-REAL.txt"
config.DATA.TRAIN_SRC_REAL_3 = dl_root_dir + "/src3_train_live-TRAIN-REAL.txt"
config.OUTPUT_DIR = "/root/Desktop/workspace/meta-learning/FAS/MetaPattern_FAS/data/output/tmp"
config.DATA.DATASET='ZipDataset'
config.NORM_FLAG = True
config.SEED = 42
config.DEBUG = False

# ========================FOR TRIANING=========================
config.TRAIN.INIT_LR = 0.01
config.TRAIN.LR_EPOCH_1 = 0
config.TRAIN.LR_EPOCH_2 = 150
config.TRAIN.MOMENTUM = 0.9
config.TRAIN.WEIGHT_DECAY = 0.0 # 5e-4
config.TRAIN.WEIGHT_DECAY_T = 0.0 # ColorNet for TRANSFORMER
config.TRAIN.MAX_ITER = 10# 1000000
config.TRAIN.META_TRAIN_SIZE = 2
config.TRAIN.ITER_PER_EPOCH = 10 #100
config.TRAIN.META_PRE_TRAIN = True
config.TRAIN.DROPOUT = 0.0
config.TRAIN.EPOCHS = 10# 20
config.TRAIN.SYNC_TRAINING = False
config.TRAIN.IMAGENET_PRETRAIN = True
config.TRAIN.RESUME = '' # Path to the resume ckpt
config.TRAIN.LR_PATIENCE = 0
config.TRAIN.PATIENCE = 100
config.TRAIN.SAVE_BEST = True # Only save the best model while training
config.TRAIN.PRINT_FREQ = 1000
config.TRAIN.VAL_FREQ = 1

config.TRAIN.W_depth = 10
config.TRAIN.W_metatest = 1
config.TRAIN.META_LEARNING_RATE = 0.001
config.TRAIN.BETAS = [0.9, 0.999]
config.TRAIN.META_TEST_FREQ = 1
config.TRAIN.VAL_FREQ = 1
config.TRAIN.NUM_FRAMES = 1000
config.TRAIN.INNER_LOOPS = 100
config.TRAIN.RETRAIN_FROM_SCATCH = True

config.TRAIN.OPTIM = 'SGD' # Adam


In [3]:
from models.HFN_MP.dataset import get_data_loader

# Get data loaders|
data_loaders = get_data_loader(config)

src1_train_dataloader_fake, src1_train_dataloader_real, \
src2_train_dataloader_fake, src2_train_dataloader_real, \
src3_train_dataloader_fake, src3_train_dataloader_real, \
tgt_dataloader = data_loaders

Load Target Data


In [4]:
# !python models/HFN_MP/network.py

In [5]:
from models.HFN_MP.trainer import Trainer

In [6]:
Trainer(config).train()

2024-12-07 04:41:10 - INFO - Loading ImageNet Pretrain
Load Target Data


Meta_train=[1, 0], Meta_test=[2], Meta-test-loss = 0.9287644028663635, Meta_train_loss=0.4648005962371826:  90%|███████████████▎ | 9/10 [00:19<00:01,  1.62s/it]

2024-12-07 04:41:30 - INFO - Training at Epoch 2


Meta_train=[2, 1], Meta_test=[0], Meta-test-loss = 0.9291478991508484, Meta_train_loss=0.4636671841144562:  90%|███████████████▎ | 9/10 [00:20<00:01,  1.62s/it]

2024-12-07 04:41:32 - INFO - Validation at epoch 2


100%|███████████████████████████████████████████| 31/31 [00:05<00:00,  5.29it/s]


2024-12-07 04:41:49 - INFO - Frame level metrics: 
   AUC  EER  EER_THR  HTER@THR  FAR@THR  FRR@THR  THR  HTER@0.5  FAR@0.5  \
0  0.5  0.5  0.48615       0.5      0.5      0.5  0.5       0.5      0.5   

   FRR@0.5  MIN_HTER  MIN_HTER_THR  MIN_FAR_THR  MIN_FRR_THR  
0      0.5       0.5      0.486149          1.0            0  
2024-12-07 04:41:49 - INFO - Video level metrics: 
   AUC  EER  EER_THR  HTER@THR  FAR@THR  FRR@THR  THR  HTER@0.5  FAR@0.5  \
0  NaN  0.5      0.5       0.5      1.0      0.0  0.5       0.5      1.0   

   FRR@0.5  MIN_HTER  MIN_HTER_THR  MIN_FAR_THR  MIN_FRR_THR  
0      0.0       0.5      0.504868          1.0            0  
2024-12-07 04:41:49 - INFO - Save best models
2024-12-07 04:41:49 - INFO - [*] Saving model to /root/Desktop/workspace/meta-learning/FAS/MetaPattern_FAS/data/output/tmp/ckpt/best.ckpt
2024-12-07 04:41:50 - INFO - Current Best MIN_HTER=50.0%, AUC=50.0%


Meta_train=[2, 1], Meta_test=[0], Meta-test-loss = 0.9291478991508484, Meta_train_loss=0.4636671841144562: 100%|████████████████| 10/10 [00:39<00:00,  3.93s/it]

2024-12-07 04:41:50 - INFO - train_hfn_from_scratch





2024-12-07 04:41:51 - INFO - Loading ImageNet Pretrain
Load Target Data


MSE_LOSS=0.2501, CLS_LOSS=0.6893:   0%|                                                                                                  | 0/11 [00:04<?, ?it/s]

2024-12-07 04:41:57 - INFO - Validation at epoch 1


100%|███████████████████████████████████████████| 31/31 [00:04<00:00,  6.89it/s]


2024-12-07 04:42:13 - INFO - Frame level metrics: 
   AUC  EER   EER_THR  HTER@THR  FAR@THR  FRR@THR  THR  HTER@0.5  FAR@0.5  \
0  0.5  0.5  0.492459       0.5      0.5      0.5  0.5       0.5      0.5   

   FRR@0.5  MIN_HTER  MIN_HTER_THR  MIN_FAR_THR  MIN_FRR_THR  
0      0.5       0.5      0.492458          1.0            0  
2024-12-07 04:42:13 - INFO - Video level metrics: 
   AUC  EER  EER_THR  HTER@THR  FAR@THR  FRR@THR  THR  HTER@0.5  FAR@0.5  \
0  NaN  0.5      0.5       0.5      1.0      0.0  0.5       0.5      1.0   

   FRR@0.5  MIN_HTER  MIN_HTER_THR  MIN_FAR_THR  MIN_FRR_THR  
0      0.0       0.5      0.504732          1.0            0  
2024-12-07 04:42:13 - INFO - Current Best MIN_HTER=50.0%, AUC=50.0%


MSE_LOSS=0.2500, CLS_LOSS=0.3678:  91%|████████████████████████████████████████████████████████████████████████████████▉        | 10/11 [00:32<00:01,  1.60s/it]

2024-12-07 04:42:25 - INFO - Training at Epoch 2


MSE_LOSS=0.2500, CLS_LOSS=0.3039:  91%|████████████████████████████████████████████████████████████████████████████████▉        | 10/11 [00:33<00:01,  1.60s/it]

2024-12-07 04:42:26 - INFO - Validation at epoch 2


100%|███████████████████████████████████████████| 31/31 [00:04<00:00,  6.58it/s]


2024-12-07 04:42:43 - INFO - Frame level metrics: 
   AUC  EER   EER_THR  HTER@THR  FAR@THR  FRR@THR  THR  HTER@0.5  FAR@0.5  \
0  0.5  0.5  0.271393       0.5      0.5      0.5  0.5       0.5      0.5   

   FRR@0.5  MIN_HTER  MIN_HTER_THR  MIN_FAR_THR  MIN_FRR_THR  
0      0.5       0.5      0.271389          1.0            0  
2024-12-07 04:42:43 - INFO - Video level metrics: 
   AUC  EER  EER_THR  HTER@THR  FAR@THR  FRR@THR  THR  HTER@0.5  FAR@0.5  \
0  NaN  0.5      0.5       0.0      0.0      0.0  0.5       0.0      0.0   

   FRR@0.5  MIN_HTER  MIN_HTER_THR  MIN_FAR_THR  MIN_FRR_THR  
0      0.0       0.5      0.492929          1.0            0  
2024-12-07 04:42:43 - INFO - Current Best MIN_HTER=50.0%, AUC=50.0%


MSE_LOSS=0.2500, CLS_LOSS=0.3039: 100%|█████████████████████████████████████████████████████████████████████████████████████████| 11/11 [00:50<00:00,  4.55s/it]
