In [None]:
REMOVE_RES_CONTENT = True
IMG_SIZE=224
#IMG_SIZE=64
IMG_RESIZE=True
SEED = 42
RES_PATH = '/kaggle/working/res/'
DATASET_PATH = '/kaggle/input/deepfakedataset/data/'
SHOW_EXTRA_INFO=False

# Setup

In [None]:
!pip install -q tabulate
import zipfile
import pandas as pd
import numpy as np
import tabulate as tb
from typing import Dict
import tensorflow as tf
import json
import re
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, GlobalAveragePooling2D, Input, BatchNormalization, concatenate
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import accuracy_score, f1_score
from tensorflow.keras.applications import Xception, EfficientNetB4, InceptionV3, EfficientNetV2M,EfficientNetV2S , MobileNetV2
from tensorflow.keras.callbacks import EarlyStopping
import os
import shutil
from sklearn.model_selection import train_test_split
from tensorflow.keras.layers import (
    Input, Conv2D, BatchNormalization, LeakyReLU,
    MaxPooling2D, Flatten, Dense, Dropout, concatenate
)
from tensorflow.keras import layers
from sklearn.metrics import precision_score, recall_score, confusion_matrix, roc_auc_score, log_loss, brier_score_loss
import time

np.random.seed(SEED)
tf.random.set_seed(SEED)


device_name = tf.test.gpu_device_name()
if device_name:
    print(f"GPU available: {device_name}")
else:
    print("No GPU available!")

# Dataset splitting - handling

In [None]:
file_path = DATASET_PATH + 'metadata.csv'
df_tmp = pd.read_csv(file_path, sep=',')
df_tmp['path'] = DATASET_PATH + df_tmp['path']

df_tmp = df_tmp[df_tmp['deepfake'] != 0]

df_tmp['ethnicity'] = df_tmp.apply(
    lambda row: 'white' if row['white'] == 1 else ('black' if row['black'] == 1 else (
        'asian' if row['asian'] == 1 else None)), axis=1)

df = df_tmp[['deepfake', 'male', 'ethnicity', 'eyeglasses', 'heavy_makeup', 'big_lips', 'path']]

df = df.rename(columns={'deepfake': 'type', 'male': 'sex', 'heavy_makeup': 'makeup', 'big_lips': 'lips',})

df['typeName'] = df['type'].replace({1: 'fake', -1: 'real'})
df['type'] = df['typeName'].map({'fake': 1, 'real': 0}).values
df['sex'] = df['sex'].replace({-1: 'female', 0: None, 1: 'male'})
df['makeup'] = df['makeup'].replace({-1: 'no', 0: None, 1: 'yes'})
df['lips'] = df['lips'].replace({-1: 'small', 0: None, 1: 'big'})
df['eyeglasses'] = df['eyeglasses'].replace({-1: 'no', 0: None, 1: 'yes'})


print(tb.tabulate(df.head(), headers='keys', tablefmt='psql'))
print(f"Dataset size: {len(df)}")

In [None]:
def get_balanced_subset(
    df, class_col, feature_col, feature_value,
    samples_per_class, randomize=True, reset_index=False
):
    """
    Select a balanced subset of the data for a given feature value, with equal number of samples per class.

    Args:
        df: DataFrame
        class_col: column name of class labels
        feature_col: column name of feature
        feature_value: specific feature value to filter
        samples_per_class: number of samples per class
        randomize: whether to shuffle within class before selecting
        reset_index: whether to reset index of returned DataFrame
        seed: random seed for reproducibility

    Returns:
        Balanced DataFrame subset
    """
    tmp = df[df[feature_col] == feature_value]

    counts = tmp[class_col].value_counts()
    for cl, count in counts.items():
        if count < samples_per_class:
            raise ValueError(f"Not enough samples for class '{cl}' in feature '{feature_value}'. "
                             f"Required: {samples_per_class}, Available: {count}")

    tmp = pd.concat([
        (g.sample(frac=1, random_state=SEED).head(samples_per_class) if randomize else g.head(samples_per_class))
        for _, g in tmp.groupby(class_col)
    ])

    if reset_index:
        tmp = tmp.reset_index(drop=True)

    return tmp

tmp_test = get_balanced_subset(
    df=df, class_col='type', feature_col='sex', feature_value='male',
    samples_per_class=2, randomize=True, reset_index=True)
print(tb.tabulate(tmp_test, headers='keys', tablefmt='psql'))

In [None]:
def get_exp_data(df, class_col, feature_col, ratio : Dict, size, randomize=True, exclude_column=None, exclude_df=None, max_diff=0.05):
    '''
    Get a balanced subset of the data based on specified ratios for features.
    Args:
        df: DataFrame containing the data
        class_col: column name for class labels
        feature_col: column name for features
        ratio: dictionary with feature values as keys and their ratios as values
        size: total number of samples to return
        randomize: whether to shuffle the DataFrame before processing
        exclude_column: column name to exclude from the DataFrame
        exclude_df: DataFrame containing values to exclude based on exclude_column
    '''
    if randomize:
        df_rnd = df.sample(frac=1, random_state=SEED).reset_index(drop=True)
    else:
        df_rnd = df.copy()

    if exclude_column is not None and exclude_df is not None:
        if exclude_column not in df_rnd.columns:
            raise ValueError(f"Column '{exclude_column}' not found in DataFrame.")
        if exclude_column not in exclude_df.columns:
            raise ValueError(f"Column '{exclude_column}' not found in exclude DataFrame.")
        df_rnd = df_rnd[~df_rnd[exclude_column].isin(exclude_df[exclude_column])]

    uniq_classes = df_rnd[class_col].unique()
    uniq_features = df_rnd[feature_col].unique()

    def get_exp_data_inner(tmp_df, size):
        df_tmp = None
        for uf in uniq_features:
            if ratio.get(uf) is None:
                if SHOW_EXTRA_INFO:
                    print(f"Feature '{uf}' not found in ratios. Skipping.")
                continue
            c_amt = int(size * ratio[uf] / len(uniq_classes))
            # if c_amt <= 0:
            #     raise ValueError(f"Calculated samples per class ({c_amt}) is less than or equal to zero for feature '{uf}' with ratio {ratio}.")
            tmp = get_balanced_subset(df=tmp_df, class_col=class_col, feature_col=feature_col, feature_value=uf,
                                        samples_per_class=c_amt, randomize=False)
            if df_tmp is None:
                df_tmp = tmp
            else:
                df_tmp = pd.concat([df_tmp, tmp])
        return df_tmp

    df_res = get_exp_data_inner(df_rnd, size)

    if len(df_res) < size and SHOW_EXTRA_INFO:
        print(f"Samples for ({len(df_res)}) are less than requested ({size}).")

    ratios_fet = df_res[feature_col].value_counts(normalize=True).to_dict()
    ratios_cls = df_res[class_col].value_counts(normalize=False).to_dict()

    if SHOW_EXTRA_INFO:
        print(f"[] Ratios for {feature_col}: {ratios_fet}")
        print(f"[] Ratios for {class_col}: {ratios_cls}")
        print()
        

    for k in ratio:
        if ratios_fet.get(k) is None:
            if ratio[k] > 0.0:
                raise ValueError(f"Feature '{k}' not found in DataFrame after sampling (try increase 'size' parameter).")
        elif abs(ratios_fet[k] - ratio[k]) > max_diff:
            raise ValueError(f"Feature '{k}' ratio {ratios_fet[k]} differs from requested {ratio[k]} by more than {max_diff}.")

    

    df_res = df_res.reset_index(drop=True)

    return df_res

tmp_test = get_exp_data(
    df=df, class_col='type', feature_col='ethnicity', ratio={'white':0.2, 'black':0.6, 'asian': 0.2}, size=10)
print(tb.tabulate(tmp_test, headers='keys', tablefmt='psql'))

# Methods for testing

In [None]:
from tensorflow.keras.applications.efficientnet import preprocess_input as preprocess_efficientnet
from tensorflow.keras.applications.xception import preprocess_input as preprocess_xception
from tensorflow.keras.applications.inception_v3 import preprocess_input as preprocess_inception_v3
from tensorflow.keras.applications.efficientnet_v2 import preprocess_input as preprocess_efficientnetv2
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input as preprocess_mobilenet_v2

augmentation_std = tf.keras.Sequential([
    #layers.RandomFlip("horizontal"),
    layers.RandomRotation(0.05),
    layers.RandomZoom(0.05),
    #layers.RandomContrast(0.05),
    #layers.RandomBrightness(0.05),
], name="augmentation_std")

AUTOTUNE = tf.data.AUTOTUNE

def load_image(file_path, label, preprocess, augment):
    image = tf.io.read_file(file_path)
    image = tf.image.decode_jpeg(image, channels=3)

    if IMG_RESIZE:
        image = tf.image.resize(image, (IMG_SIZE, IMG_SIZE))
    else:
        image = tf.cast(image, tf.float32)

    if preprocess:
        image = preprocess(image)

    if augment:
        image = augment(image)

    return image, label

def get_data_for_model(df, class_col, files_col, batch_size, preprocess, augment):
    image_paths = df[files_col].values
    labels = df[class_col]

    dataset = tf.data.Dataset.from_tensor_slices((image_paths, labels))
    dataset = dataset.map(lambda path, label: load_image(path, label, preprocess, augment), num_parallel_calls=AUTOTUNE)
    dataset = dataset.batch(batch_size)
    dataset = dataset.prefetch(AUTOTUNE)

    return dataset

KeyboardInterrupt: 

In [None]:
# Create folder if it doesn't exist
if not os.path.exists(RES_PATH):
    os.makedirs(RES_PATH)
    print(f"Created folder: {RES_PATH}")
elif REMOVE_RES_CONTENT:
    # Remove all files inside the folder
    for filename in os.listdir(RES_PATH):
        file_path = os.path.join(RES_PATH, filename)
        try:
            if os.path.isfile(file_path) or os.path.islink(file_path):
                os.unlink(file_path)          # remove file or link
            elif os.path.isdir(file_path):
                shutil.rmtree(file_path)      # remove folder and contents
        except Exception as e:
            print(f'Failed to delete {file_path}. Reason: {e}')
    print(f"Cleared contents of folder: {RES_PATH}")

In [None]:
def get_done_reps(model_name, feature_name, amt_per_rep):
  results_path = f'{RES_PATH}res_{feature_name}_{model_name.replace(" ", "_")}.csv'
  if not os.path.exists(results_path):
    return [], None

  tmp_df = pd.read_csv(results_path)
  dones = []
  for r in tmp_df["rep"].unique():
    amt = len(tmp_df[tmp_df["rep"]==r])
    if amt == amt_per_rep:
      dones.append(r)

  return dones, tmp_df

In [None]:
def compute_fairness_metrics(y_true, y_pred, group_labels):
    """Compute per-group rates and their gaps."""
    groups = pd.unique(group_labels)
    tpr = {}
    fpr = {}
    pos_rate = {}

    for g in groups:
        mask = (group_labels == g)
        yt = y_true[mask]
        yp = y_pred[mask]
        tn, fp, fn, tp = confusion_matrix(yt, yp, labels=[0,1]).ravel()
        tpr[g] = tp / (tp + fn) if (tp + fn) > 0 else 0.0
        fpr[g] = fp / (fp + tn) if (fp + tn) > 0 else 0.0
        pos_rate[g] = (yp == 1).mean()

    tpr_gap = max(tpr.values()) - min(tpr.values())
    fpr_gap = max(fpr.values()) - min(fpr.values())
    pos_rate_gap = max(pos_rate.values()) - min(pos_rate.values())

    return {
        "tpr_gap": tpr_gap,
        "fpr_gap": fpr_gap,
        "pos_rate_gap": pos_rate_gap,
        "per_group": {
            "tpr": tpr,
            "fpr": fpr,
            "pos_rate": pos_rate
        }
    }


In [None]:
def perform_tests(df, train_metas, test_metas, validation_size, reps, feature_split_col, get_model, preprocess, class_col='type', exclude_column='path', 
                  files_col='path', data_augmentation=False, epochs_num=15, batch_size=64):
    res = []

    model_name = get_model.__name__.replace("create_", "")

    done_reps, prev_results = get_done_reps(model_name, feature_split_col, len(test_metas) * len(train_metas))

    for r in range(reps):
        if r in done_reps:
          res.extend(prev_results[prev_results['rep']==r].values.tolist())
          print(f"Rep {r} already done for {model_name}. Skipping...")
          continue

        np.random.seed(SEED + r)
        tf.random.set_seed(SEED + r)

        for train_meta in train_metas:
            train_val = get_exp_data(df, class_col=class_col, feature_col=feature_split_col, ratio=train_meta['ratio'], size=train_meta['size'] + validation_size)
            train_val = train_val.sample(frac=1, random_state=SEED+r).reset_index(drop=True)

            stratify_key = train_val[class_col].astype(str) + "_" + train_val[feature_split_col].astype(str)

            train, val = train_test_split(
                train_val,
                test_size=validation_size / (train_meta['size'] + validation_size),
                stratify=stratify_key,
                random_state=SEED + r
            )

            tests = [
                get_exp_data(df, class_col=class_col, feature_col=feature_split_col, ratio=tm['ratio'], size=tm['size'], exclude_column=exclude_column, exclude_df=train) for tm in test_metas
            ]

            train_dataset = get_data_for_model(train, class_col=class_col, files_col=files_col, batch_size=batch_size, preprocess=preprocess, augment=data_augmentation)
            val_dataset = get_data_for_model(val, class_col=class_col, files_col=files_col, batch_size=batch_size, preprocess=preprocess, augment=False)
            test_datasets = [
                get_data_for_model(test, class_col=class_col, files_col=files_col, batch_size=batch_size, preprocess=preprocess, augment=False) for test in tests
            ]

            train_ratio = '/'.join([f"{k}:{v}" for k, v in train_meta['ratio'].items()])
            train_ratio_rel = '/'.join([f"{k}:{v:.4f}" for k, v in train[feature_split_col].value_counts(normalize=True).to_dict().items()])
            train_ratio_sim = re.sub(r'[a-zA-Z0.:]', '', train_ratio)

            model = get_model(input_shape=(IMG_SIZE,IMG_SIZE,3), 
                              train_dataset=train_dataset, val_dataset=val_dataset, 
                              epochs_num=epochs_num)

            for test_dataset, test_meta, test_df in zip(test_datasets, test_metas, tests):
                predictions = model.predict(test_dataset)
                y_true = test_df[class_col]
                y_pred = (predictions > 0.5).astype(int).flatten()
                
                tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
                accuracy = accuracy_score(y_true, y_pred)

                fairness = compute_fairness_metrics(
                    y_true=np.array(y_true),
                    y_pred=np.array(y_pred),
                    group_labels=test_df[feature_split_col].values
                )
                fairness_json = json.dumps(fairness["per_group"])
                
                test_ratio = '/'.join([f"{k}:{v}" for k, v in test_meta['ratio'].items()])
                test_ratio_rel = '/'.join([f"{k}:{v:.4f}" for k, v in test_df[feature_split_col].value_counts(normalize=True).to_dict().items()])
                test_ratio_sim = re.sub(r'[a-zA-Z0.:]', '', test_ratio)

                roc_df = pd.DataFrame({
                    'y_true': y_true,
                    'y_pred_proba': predictions.flatten()
                })
                path_roc = f'{RES_PATH}roc_{feature_split_col}_{model_name.replace(" ", "_")}_rep{r}_{time.time_ns()}.csv'
                roc_df.to_csv(path_roc, index=False)
                                
                res.append([
                    r,
                    model_name,
                    feature_split_col,
                    train_meta['size'],
                    train_ratio,
                    test_meta['size'],
                    test_ratio,
                    train_ratio_rel,
                    test_ratio_rel,
                    train_ratio_sim,
                    test_ratio_sim,
                    accuracy,
                    tn, fp, fn, tp,
                    fairness["tpr_gap"],
                    fairness["fpr_gap"],
                    fairness["pos_rate_gap"],
                    fairness_json,
                    path_roc
                ])

                print(f"Rep: {r:2} | Model: {model_name} | Feature Split: {feature_split_col} | Ratio: {test_ratio} | Acc: {accuracy:.2f}")

                res_df = pd.DataFrame(res, columns=[
                    'rep', 'model_name', 'feature_split_col',
                    'train_size', 'train_ratio_detail', 'test_size', 'test_ratio_detail',
                    'train_ratio_rel', 'test_ratio_rel', "train_ratio", "test_ratio",
                    'accuracy', 'TN', 'FP', 'FN', 'TP',
                    'fairness_tpr_gap', 'fairness_fpr_gap', 'fairness_pos_rate_gap',
                    'fairness_per_group_json',
                    'path_roc'
                ])

                res_df.to_csv(f'{RES_PATH}res_{feature_split_col}_{model_name.replace(" ", "_")}.csv', index=False)
    print(f"Done for {model_name}.")


# Models implementation

In [None]:
def create_xception(input_shape, train_dataset, val_dataset, epochs_num):
    base_model = Xception(weights='imagenet', include_top=False, input_shape=input_shape)
    base_model.trainable = True
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    #x = Dense(128, activation='relu')(x)
    x = Dropout(0.3)(x)
    predictions = Dense(1, activation='sigmoid')(x)
    model = Model(inputs=base_model.input, outputs=predictions)
    model.compile(optimizer=Adam(learning_rate=1e-4),
                  loss='binary_crossentropy',
                  metrics=['accuracy'])

    print("FITTING FULL XCEPTION")
    early_stopping = EarlyStopping(monitor='val_accuracy', patience=2, mode='max', restore_best_weights=True, verbose=1)
    model.fit(train_dataset, validation_data=val_dataset, epochs=epochs_num, callbacks=[early_stopping])

    return model

In [None]:
def create_efficientnetb4(input_shape, train_dataset, val_dataset, epochs_num):
    base_model = EfficientNetB4(weights='imagenet', include_top=False, input_shape=input_shape)
    base_model.trainable = True 
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    #x = Dense(128, activation='relu')(x)
    x = Dropout(0.3)(x)
    predictions = Dense(1, activation='sigmoid')(x)

    model = Model(inputs=base_model.input, outputs=predictions)
    model.compile(optimizer=Adam(learning_rate=1e-4),
                  loss='binary_crossentropy',
                  metrics=['accuracy'])

    early_stopping = EarlyStopping(monitor='val_accuracy', patience=2, mode='max', restore_best_weights=True, verbose=1)

    print("TRAINING FULL EfficientNetB4")
    model.fit(train_dataset, validation_data=val_dataset, epochs=epochs_num, callbacks=[early_stopping])

    return model


In [None]:
def create_efficientnetv2(input_shape, train_dataset, val_dataset, epochs_num):
    base_model = EfficientNetV2S(weights='imagenet', include_top=False, input_shape=input_shape)
    #base_model = EfficientNetV2M(weights='imagenet', include_top=False, input_shape=input_shape)
    base_model.trainable = True 
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    #x = Dense(128, activation='relu')(x)
    x = Dropout(0.3)(x)
    predictions = Dense(1, activation='sigmoid')(x)

    model = Model(inputs=base_model.input, outputs=predictions)
    model.compile(optimizer=Adam(learning_rate=1e-4),
                  loss='binary_crossentropy',
                  metrics=['accuracy'])

    early_stopping = EarlyStopping(monitor='val_accuracy', patience=2, mode='max', restore_best_weights=True, verbose=1)

    print("TRAINING FULL EfficientNetV2S")
    model.fit(train_dataset, validation_data=val_dataset, epochs=epochs_num, callbacks=[early_stopping])

    return model


In [None]:
def create_inceptionv3(input_shape, train_dataset, val_dataset, epochs_num):
    base_model = InceptionV3(weights='imagenet', include_top=False, input_shape=input_shape)
    base_model.trainable = True
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dropout(0.3)(x)
    predictions = Dense(1, activation='sigmoid')(x)

    model = Model(inputs=base_model.input, outputs=predictions)
    model.compile(optimizer=Adam(learning_rate=1e-4),
                  loss='binary_crossentropy',
                  metrics=['accuracy'])

    print("TRAINING FULL InceptionV3")
    early_stopping = EarlyStopping(monitor='val_accuracy', patience=2, mode='max', restore_best_weights=True, verbose=1)
    model.fit(train_dataset, validation_data=val_dataset, epochs=epochs_num, callbacks=[early_stopping])

    return model

In [None]:
def create_mobilenetv2(input_shape, train_dataset, val_dataset, epochs_num):
    base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=input_shape)
    base_model.trainable = True
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dropout(0.3)(x)
    predictions = Dense(1, activation='sigmoid')(x)
    model = Model(inputs=base_model.input, outputs=predictions)
    model.compile(optimizer=Adam(learning_rate=1e-4),
                  loss='binary_crossentropy',
                  metrics=['accuracy'])

    print("TRAINING FULL MobileNetV2")
    early_stopping = EarlyStopping(monitor='val_accuracy', patience=2, mode='max', restore_best_weights=True, verbose=1)
    model.fit(train_dataset,
              validation_data=val_dataset,
              epochs=epochs_num,
              callbacks=[early_stopping])

    return model

# Presets

```
perform_tests(df=df,
             train_metas=[
                  {'ratio': {'male':0.1, 'female':0.9}, 'size': 5000},
                  {'ratio': {'male':0.5, 'female':0.5}, 'size': 5000},
                  {'ratio': {'male':0.9, 'female':0.1}, 'size': 5000},
                  ],
              test_metas=[
                  {'ratio': {'male':0.1, 'female':0.9}, 'size': 500},
                  {'ratio': {'male':0.3, 'female':0.7}, 'size': 500},
                  {'ratio': {'male':0.5, 'female':0.5}, 'size': 500},
                  {'ratio': {'male':0.7, 'female':0.3}, 'size': 500},
                  {'ratio': {'male':0.9, 'female':0.1}, 'size': 500},
              ],
              validation_size=500,
              reps=10,
              feature_split_col='sex',
              get_model=create_xception,
              preprocess=preprocess_xception,
              data_augmentation=augmentation_std
              )

perform_tests(df=df,
              train_metas=[
                  {'ratio': {'male':0.1, 'female':0.9}, 'size': 5000},
                  {'ratio': {'male':0.5, 'female':0.5}, 'size': 5000},
                  {'ratio': {'male':0.9, 'female':0.1}, 'size': 5000},
                  ],
              test_metas=[
                  {'ratio': {'male':0.1, 'female':0.9}, 'size': 500},
                  {'ratio': {'male':0.3, 'female':0.7}, 'size': 500},
                  {'ratio': {'male':0.5, 'female':0.5}, 'size': 500},
                  {'ratio': {'male':0.7, 'female':0.3}, 'size': 500},
                  {'ratio': {'male':0.9, 'female':0.1}, 'size': 500},
              ],
              validation_size=500,
              reps=10,
              feature_split_col='sex',
              get_model=create_inceptionv3,
              preprocess=preprocess_inception_v3,
              data_augmentation=augmentation_std
              )

perform_tests(df=df,
             train_metas=[
                  {'ratio': {'male':0.1, 'female':0.9}, 'size': 5000},
                  {'ratio': {'male':0.5, 'female':0.5}, 'size': 5000},
                  {'ratio': {'male':0.9, 'female':0.1}, 'size': 5000},
                  ],
              test_metas=[
                  {'ratio': {'male':0.1, 'female':0.9}, 'size': 500},
                  {'ratio': {'male':0.3, 'female':0.7}, 'size': 500},
                  {'ratio': {'male':0.5, 'female':0.5}, 'size': 500},
                  {'ratio': {'male':0.7, 'female':0.3}, 'size': 500},
                  {'ratio': {'male':0.9, 'female':0.1}, 'size': 500},
              ],
              validation_size=500,
              reps=10,
              feature_split_col='sex',
              get_model=create_efficientnetv2,
              preprocess=preprocess_efficientnetv2,
              data_augmentation=augmentation_std
              )






perform_tests(df=df,
              train_metas=[
                  {'ratio': {'male':0.1, 'female':0.9}, 'size': 5000},
                  {'ratio': {'male':0.5, 'female':0.5}, 'size': 5000},
                  {'ratio': {'male':0.9, 'female':0.1}, 'size': 5000},
                  ],
              test_metas=[
                  {'ratio': {'male':0.1, 'female':0.9}, 'size': 500},
                  {'ratio': {'male':0.3, 'female':0.7}, 'size': 500},
                  {'ratio': {'male':0.5, 'female':0.5}, 'size': 500},
                  {'ratio': {'male':0.7, 'female':0.3}, 'size': 500},
                  {'ratio': {'male':0.9, 'female':0.1}, 'size': 500},
              ],
              validation_size=500,
              reps=10,
              feature_split_col='sex',
              get_model=create_efficientnetb4,
              preprocess=preprocess_efficientnet,
              data_augmentation=augmentation_std
              )


perform_tests(df=df,
             train_metas=[
                  {'ratio': {'male':0.1, 'female':0.9}, 'size': 5000},
                  {'ratio': {'male':0.5, 'female':0.5}, 'size': 5000},
                  {'ratio': {'male':0.9, 'female':0.1}, 'size': 5000},
                  ],
              test_metas=[
                  {'ratio': {'male':0.1, 'female':0.9}, 'size': 500},
                  {'ratio': {'male':0.3, 'female':0.7}, 'size': 500},
                  {'ratio': {'male':0.5, 'female':0.5}, 'size': 500},
                  {'ratio': {'male':0.7, 'female':0.3}, 'size': 500},
                  {'ratio': {'male':0.9, 'female':0.1}, 'size': 500},
              ],
              validation_size=500,
              reps=10,
              feature_split_col='sex',
              get_model=create_simple_cnn,
              preprocess=preprocess_simple_cnn,
              data_augmentation=augmentation_std
              )


```

# Tests starter

In [None]:
perform_tests(df=df,
             train_metas=[
                  {'ratio': {'male':0.2, 'female':0.8}, 'size': 5000},
                  ],
              test_metas=[
                  {'ratio': {'male':0.1, 'female':0.9}, 'size': 500},
                  {'ratio': {'male':0.2, 'female':0.8}, 'size': 500},
                  {'ratio': {'male':0.3, 'female':0.7}, 'size': 500},
                  {'ratio': {'male':0.4, 'female':0.6}, 'size': 500},
                  {'ratio': {'male':0.5, 'female':0.5}, 'size': 500},
                  {'ratio': {'male':0.6, 'female':0.4}, 'size': 500},
                  {'ratio': {'male':0.7, 'female':0.3}, 'size': 500},
                  {'ratio': {'male':0.8, 'female':0.2}, 'size': 500},
                  {'ratio': {'male':0.9, 'female':0.1}, 'size': 500},
              ],
              validation_size=500,
              reps=10,
              feature_split_col='sex',
              get_model=create_xception,
              preprocess=preprocess_xception,
              data_augmentation=augmentation_std
              )

perform_tests(df=df,
              train_metas=[
                  {'ratio': {'male':0.2, 'female':0.8}, 'size': 5000},
                  ],
              test_metas=[
                  {'ratio': {'male':0.1, 'female':0.9}, 'size': 500},
                  {'ratio': {'male':0.2, 'female':0.8}, 'size': 500},
                  {'ratio': {'male':0.3, 'female':0.7}, 'size': 500},
                  {'ratio': {'male':0.4, 'female':0.6}, 'size': 500},
                  {'ratio': {'male':0.5, 'female':0.5}, 'size': 500},
                  {'ratio': {'male':0.6, 'female':0.4}, 'size': 500},
                  {'ratio': {'male':0.7, 'female':0.3}, 'size': 500},
                  {'ratio': {'male':0.8, 'female':0.2}, 'size': 500},
                  {'ratio': {'male':0.9, 'female':0.1}, 'size': 500},
              ],
              validation_size=500,
              reps=10,
              feature_split_col='sex',
              get_model=create_inceptionv3,
              preprocess=preprocess_inception_v3,
              data_augmentation=augmentation_std
              )

perform_tests(df=df,
             train_metas=[
                  {'ratio': {'male':0.2, 'female':0.8}, 'size': 5000},
                  ],
              test_metas=[
                  {'ratio': {'male':0.1, 'female':0.9}, 'size': 500},
                  {'ratio': {'male':0.2, 'female':0.8}, 'size': 500},
                  {'ratio': {'male':0.3, 'female':0.7}, 'size': 500},
                  {'ratio': {'male':0.4, 'female':0.6}, 'size': 500},
                  {'ratio': {'male':0.5, 'female':0.5}, 'size': 500},
                  {'ratio': {'male':0.6, 'female':0.4}, 'size': 500},
                  {'ratio': {'male':0.7, 'female':0.3}, 'size': 500},
                  {'ratio': {'male':0.8, 'female':0.2}, 'size': 500},
                  {'ratio': {'male':0.9, 'female':0.1}, 'size': 500},
              ],
              validation_size=500,
              reps=10,
              feature_split_col='sex',
              get_model=create_efficientnetv2,
              preprocess=preprocess_efficientnetv2,
              data_augmentation=augmentation_std
              )

In [None]:
def print_summarise_res(model_name:str):
  path = RES_PATH + f'res_sex_{model_name}.csv'

  if not os.path.exists(path):
    print(f"File {path} does not exists!")
    return

  res = pd.read_csv(path)
  gr = res.groupby(['train_ratio', 'test_ratio']).agg(
      # Model=('model_name', 'first'),
      TrainRatio=('train_ratio', 'first'),
      TestRatio=('test_ratio', 'first'),
      Accuracy= ('accuracy', 'mean'),
      AccuracySTD= ('accuracy', 'std'),
  ).reset_index(drop=True)

  gr = gr.round(3).sort_values(by=['TrainRatio', 'TestRatio'], ascending=False)

  print("MODEL: " + model_name)
  print(tb.tabulate(gr, headers='keys', tablefmt='psql'))
  print()
  print()

print_summarise_res('efficientnetv2')
print_summarise_res('xception')
print_summarise_res('inceptionv3')