In [11]:
%load_ext autoreload
%autoreload 2

import os, sys
sys.path.insert(0, '..')

import numpy as np
import pandas as pd
from src.datasets import get_denta_labels, get_dynamic_features
from src.preprocess import extract_faces_denta, extract_landmarks
from src.detector import get_face_detector
import src.utils as utils


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Features and estimators

In [12]:
dynamic_data = get_dynamic_features('../data/denta_v1/ex3_data.csv')

In [13]:
from src.features import (
    KarolewskiFilteredFeaturesExtractor,
    EmptyFeaturesExtractor
)
from src.method_experiments import estimators

features = [
    {
        'name': 'Dynamic',
        'cls': EmptyFeaturesExtractor,
        'extra_features': dynamic_data,
        'add_extra_features': True,
    },
    {
        'name': 'KarolewskiFiltered+Dynamic',
        'cls': KarolewskiFilteredFeaturesExtractor,
        'extra_features': dynamic_data,
        'add_extra_features': True,
    },
    {
        'name': 'KarolewskiFiltered',
        # 'extra_features': dynamic_data,
        'cls': KarolewskiFilteredFeaturesExtractor
    },
]

# Denta

In [14]:
import datetime
import pytz

denta_df = get_denta_labels('../data/denta_v1', split_type='random')
# denta_df = denta_df[denta_df['Split'] == 'training']
denta_data_path = os.path.join('..', 'data', 'experiments', 'denta')


In [None]:
app = get_face_detector()
extract_faces_denta('../data/denta_v1', denta_df, detector=lambda image: app.get(image))
extract_landmarks('../data/denta_v1', denta_df)


## Features preparation

In [17]:
from src.method_experiments import prepare_features

features_denta_0_32 = prepare_features('../data/denta_v1', features, df=denta_df, reference_points=(0, 32))
features_denta_64_68 = prepare_features('../data/denta_v1', features, df=denta_df, reference_points=(64, 68))

100%|██████████| 258/258 [00:00<00:00, 712.20it/s]
100%|██████████| 258/258 [00:00<00:00, 341.85it/s]
100%|██████████| 258/258 [00:00<00:00, 341.75it/s]
100%|██████████| 258/258 [00:00<00:00, 724.65it/s]
100%|██████████| 258/258 [00:00<00:00, 341.98it/s]
100%|██████████| 258/258 [00:00<00:00, 341.27it/s]


## Aggregation tests

In [18]:
from src.method_experiments import test_concat_vs_avg

agg_tests = test_concat_vs_avg(estimators, features_denta_64_68, cv=5, stratified=True, iterations=40)
utils.save_pickle(os.path.join(denta_data_path, 'aggregation_5fold_64_68.pkl'), agg_tests)

100%|██████████| 1440/1440 [01:37<00:00, 14.84it/s]


In [19]:
from src.method_experiments import test_concat_vs_avg

agg_tests = test_concat_vs_avg(estimators, features_denta_0_32, cv=5, stratified=True, iterations=40)
utils.save_pickle(os.path.join(denta_data_path, 'aggregation_5fold_0_32.pkl'), agg_tests)

100%|██████████| 1440/1440 [01:39<00:00, 14.46it/s]


In [20]:
# agg_tests_loo = test_concat_vs_avg(estimators, features_denta, cv=len(features[0]['X_avg']), stratified=False)
# utils.save_pickle(os.path.join(denta_data_path, 'aggregation_loo.pkl'), agg_tests_loo)

In [21]:
from src.method_experiments import test_concat_vs_avg, dummy_estimators

agg_tests = test_concat_vs_avg(dummy_estimators, features_denta_0_32, cv=5, stratified=True, iterations=50)
utils.save_pickle(os.path.join(denta_data_path, 'dummy_aggregation_5fold.pkl'), agg_tests)

100%|██████████| 1800/1800 [00:12<00:00, 146.15it/s]


# TNF

In [None]:
from src.preprocess import extract_faces_tnf_grouped
from src.datasets import get_tnf_grouped_labels

tnf_df = get_tnf_grouped_labels('../data/TNF_grouped')
app = get_face_detector()
extract_faces_tnf_grouped('../data/TNF_grouped', tnf_df, detector=lambda image: app.get(image))
extract_landmarks('../data/TNF_grouped', tnf_df)

tnf_data_path = os.path.join('..', 'data', 'experiments', 'tnf')

100%|██████████| 108/108 [00:03<00:00, 35.07it/s]
100%|██████████| 108/108 [00:06<00:00, 17.72it/s]


## Features preparation

In [10]:
from src.method_experiments import prepare_features

# Make it binary
tnf_df.loc[tnf_df['Class'] != 'Healthy controls', 'Label'] = 1
tnf_df.loc[tnf_df['Class'] == 'Healthy controls', 'Label'] = 0
features_tnf_0_32 = prepare_features('../data/TNF_grouped', features, df=tnf_df, reference_points=(0, 32))
features_tnf_64_68 = prepare_features('../data/TNF_grouped', features, df=tnf_df, reference_points=(64, 68))

100%|██████████| 108/108 [00:00<00:00, 407.55it/s]
100%|██████████| 108/108 [00:00<00:00, 457.62it/s]
100%|██████████| 108/108 [00:00<00:00, 397.79it/s]
100%|██████████| 108/108 [00:00<00:00, 239.20it/s]
100%|██████████| 108/108 [00:00<00:00, 234.27it/s]
100%|██████████| 108/108 [00:00<00:00, 237.10it/s]
100%|██████████| 108/108 [00:01<00:00, 71.88it/s]
100%|██████████| 108/108 [00:01<00:00, 61.17it/s]
100%|██████████| 108/108 [00:01<00:00, 67.48it/s]
100%|██████████| 108/108 [00:01<00:00, 62.63it/s]
100%|██████████| 108/108 [00:00<00:00, 405.25it/s]
100%|██████████| 108/108 [00:00<00:00, 469.56it/s]
100%|██████████| 108/108 [00:00<00:00, 400.00it/s]
100%|██████████| 108/108 [00:00<00:00, 244.62it/s]
100%|██████████| 108/108 [00:00<00:00, 234.02it/s]
100%|██████████| 108/108 [00:00<00:00, 245.18it/s]
100%|██████████| 108/108 [00:01<00:00, 71.43it/s]
100%|██████████| 108/108 [00:01<00:00, 60.78it/s]
100%|██████████| 108/108 [00:01<00:00, 67.46it/s]
100%|██████████| 108/108 [00:01<00:00,

## Aggregation tests

In [11]:
from src.method_experiments import test_concat_vs_avg

agg_tests = test_concat_vs_avg(estimators, features_tnf_0_32, cv=5, stratified=True, iterations=20)
utils.save_pickle(os.path.join(tnf_data_path, 'aggregation_5fold_0_32.pkl'), agg_tests)

100%|██████████| 2400/2400 [04:50<00:00,  8.27it/s]


In [12]:
from src.method_experiments import test_concat_vs_avg

agg_tests = test_concat_vs_avg(estimators, features_tnf_64_68, cv=5, stratified=True, iterations=20)
utils.save_pickle(os.path.join(tnf_data_path, 'aggregation_5fold_64_68.pkl'), agg_tests)

100%|██████████| 2400/2400 [05:58<00:00,  6.69it/s]


In [14]:
from src.method_experiments import test_concat_vs_avg

agg_tests = test_concat_vs_avg(dummy_estimators, features_tnf_64_68, cv=5, stratified=True, iterations=20)
utils.save_pickle(os.path.join(tnf_data_path, 'dummy_aggregation_5fold.pkl'), agg_tests)

100%|██████████| 2400/2400 [00:17<00:00, 133.54it/s]
