In [1]:
import pickle
import numpy as np
import os
import json
from sklearn.metrics import f1_score, accuracy_score

# ProtoNet

Note that the code below implements loading of data from new experiments. We also provide pickled data from running our experiments -- loading of this data is at the end of this notebook.

In [2]:
model_name = 'ProtoNet'

In [3]:
DATA_PATH = os.path.join('..', '..', 'results', 'stability')
FACTORS = os.path.join(DATA_PATH, 'factors', 'predictions')
GOLDEN = os.path.join(DATA_PATH, 'golden', 'predictions', 'golden_model')

In [4]:
results = []
failed = 0
all = 0 

for split in os.listdir(GOLDEN):
    split_path = os.path.join(GOLDEN, split)
    for label in os.listdir(split_path):
        label_path = os.path.join(split_path, label)
        if os.path.isdir(label_path):
            for run in os.listdir(label_path):
                run_path = os.path.join(label_path, run)
                if os.path.isdir(run_path):
                    for evaluation in os.listdir(os.path.join(run_path, model_name)):
                        evaluation_path = os.path.join(run_path, model_name, evaluation)
                        if os.path.isdir(evaluation_path):
                            with open(os.path.join(evaluation_path, 'results.json'), 'r') as file:
                                data = json.load(file)
                            score = f1_score(np.array(data['predictions'][0]), np.array(data['predictions'][1]), average='macro')
                            results.append(score)
                            if score < 0.5:
                                failed += 1
                            all += 1

In [5]:
np.mean(results) * 100

80.33655926452433

In [6]:
np.std(results) * 100

0.9381398239175368

In [7]:
print(f"Failed percentage of runs: {failed / all * 100}%")

Failed percentage of runs: 0.0%


## Load Data

#### Data Split

In [8]:
data_split_results = {
    'results': [],
    'failed': 0,
    'all': 0
}

factor_path = os.path.join(FACTORS, 'data_split')

for split in os.listdir(factor_path):
    split_path = os.path.join(factor_path, split)
    if split.startswith('split_') and os.path.isdir(split_path):
        for label in os.listdir(split_path):
            label_path = os.path.join(split_path, label)
            if label.startswith('label_') and os.path.isdir(label_path):
                for run in os.listdir(label_path):
                    run_path = os.path.join(label_path, run)
                    if run.startswith('run_') and os.path.isdir(run_path) and os.path.exists(os.path.join(run_path, model_name)):  
                        for evaluation in os.listdir(os.path.join(run_path, model_name)):
                            evaluation_path = os.path.join(run_path, model_name, evaluation)
                            if evaluation.startswith('evaluation_') and os.path.isdir(evaluation_path):
                                with open(os.path.join(evaluation_path, 'results.json'), 'r') as file:
                                    data = json.load(file)
                                score = f1_score(np.array(data['predictions'][0]), np.array(data['predictions'][1]), average='macro')
                                
                                split_number = int(split.split('_')[1])
                                label_number = int(label.split('_')[1])
                                run_number = int(run.split('_')[1])
                                adaptation_number = int(evaluation.split('_')[1])
                                
                                data_split_results['results'].append({
                                    'score': score,
                                    'split': split_number,
                                    'label': label_number,
                                    'run': run_number,
                                    'adaptation': adaptation_number
                                })
                                if score < 0.5:
                                    data_split_results['failed'] += 1
                                data_split_results['all'] += 1

In [9]:
len(data_split_results['results']), data_split_results['failed']

(10000, 0)

#### Label Selection

In [10]:
label_results = {
    'results': [],
    'failed': 0,
    'all': 0
}

factor_path = os.path.join(FACTORS, 'label_selection')
                                
for label in os.listdir(factor_path):
    label_path = os.path.join(factor_path, label)
    if label.startswith('label_') and os.path.isdir(label_path):
        for split in os.listdir(label_path):
            split_path = os.path.join(label_path, split)
            if split.startswith('split_') and os.path.isdir(split_path):
                for run in os.listdir(split_path):
                    run_path = os.path.join(split_path, run)
                    if run.startswith('run_') and os.path.isdir(run_path) and os.path.exists(os.path.join(run_path, model_name)):  
                        for evaluation in os.listdir(os.path.join(run_path, model_name)):
                            evaluation_path = os.path.join(run_path, model_name, evaluation)
                            if evaluation.startswith('evaluation_') and os.path.isdir(evaluation_path):
                                with open(os.path.join(evaluation_path, 'results.json'), 'r') as file:
                                    data = json.load(file)
                                score = f1_score(np.array(data['predictions'][0]), np.array(data['predictions'][1]), average='macro')
                                
                                split_number = int(split.split('_')[1])
                                label_number = int(label.split('_')[1])
                                run_number = int(run.split('_')[1])
                                adaptation_number = int(evaluation.split('_')[1])
                                
                                label_results['results'].append({
                                    'score': score,
                                    'split': split_number,
                                    'label': label_number,
                                    'run': run_number,
                                    'adaptation': adaptation_number
                                })
                                if score < 0.5:
                                    label_results['failed'] += 1
                                label_results['all'] += 1

In [11]:
len(label_results['results']), label_results['failed']

(10000, 0)

#### Choice of Adaptation Data

In [12]:
adaptation_results = {
    'results': [],
    'failed': 0,
    'all': 0
}

factor_path = os.path.join(FACTORS, 'model_adaptation')

for split in os.listdir(factor_path):
    split_path = os.path.join(factor_path, split)
    if split.startswith('split_') and os.path.isdir(split_path):
        for label in os.listdir(split_path):
            label_path = os.path.join(split_path, label)
            if label.startswith('label_') and os.path.isdir(label_path):
                for run in os.listdir(label_path):
                    run_path = os.path.join(label_path, run)
                    if run.startswith('run_') and os.path.isdir(run_path) and os.path.exists(os.path.join(run_path, model_name)):  
                        for evaluation in os.listdir(os.path.join(run_path, model_name)):
                            evaluation_path = os.path.join(run_path, model_name, evaluation)
                            if evaluation.startswith('evaluation_') and os.path.isdir(evaluation_path):
                                with open(os.path.join(evaluation_path, 'results.json'), 'r') as file:
                                    data = json.load(file)
                                score = f1_score(np.array(data['predictions'][0]), np.array(data['predictions'][1]), average='macro')
                                
                                split_number = int(split.split('_')[1])
                                label_number = int(label.split('_')[1])
                                run_number = int(run.split('_')[1])
                                adaptation_number = int(evaluation.split('_')[1])
                                
                                adaptation_results['results'].append({
                                    'score': score,
                                    'split': split_number,
                                    'label': label_number,
                                    'run': run_number,
                                    'adaptation': adaptation_number
                                })
                                if score < 0.5:
                                    adaptation_results['failed'] += 1
                                adaptation_results['all'] += 1

In [13]:
len(adaptation_results['results']), adaptation_results['failed']

(10000, 0)

##### Stable

In [14]:
stable_adaptation_results = {
    'results': [],
    'failed': 0,
    'all': 0
}

factor_path = os.path.join(DATA_PATH, 'adaptation_stable', 'predictions', 'model_adaptation')

for split in os.listdir(factor_path):
    split_path = os.path.join(factor_path, split)
    if split.startswith('split_') and os.path.isdir(split_path):
        for label in os.listdir(split_path):
            label_path = os.path.join(split_path, label)
            if label.startswith('label_') and os.path.isdir(label_path):
                for run in os.listdir(label_path):
                    run_path = os.path.join(label_path, run)
                    if run.startswith('run_') and os.path.isdir(run_path) and os.path.exists(os.path.join(run_path, model_name)):  
                        for evaluation in os.listdir(os.path.join(run_path, model_name)):
                            evaluation_path = os.path.join(run_path, model_name, evaluation)
                            if evaluation.startswith('evaluation_') and os.path.isdir(evaluation_path):
                                adaptation_number = int(evaluation.split('_')[1])
                                if adaptation_number > 10:
                                    continue
                                with open(os.path.join(evaluation_path, 'results.json'), 'r') as file:
                                    data = json.load(file)
                                score = f1_score(np.array(data['predictions'][0]), np.array(data['predictions'][1]), average='macro')
                                
                                split_number = int(split.split('_')[1])
                                label_number = int(label.split('_')[1])
                                run_number = int(run.split('_')[1])
                                adaptation_number = int(evaluation.split('_')[1])
                                
                                stable_adaptation_results['results'].append({
                                    'score': score,
                                    'split': split_number,
                                    'label': label_number,
                                    'run': run_number,
                                    'adaptation': adaptation_number
                                })
                                if score < 0.5:
                                    stable_adaptation_results['failed'] += 1
                                stable_adaptation_results['all'] += 1

In [15]:
len(stable_adaptation_results['results']), stable_adaptation_results['failed']

(10000, 0)

In [16]:
len(stable_adaptation_results['results']), stable_adaptation_results['failed']

(10000, 0)

##### Unstable

In [17]:
unstable_adaptation_results = {
    'results': [],
    'failed': 0,
    'all': 0
}

factor_path = os.path.join(DATA_PATH, 'adaptation_unstable', 'predictions', 'model_adaptation')

for split in os.listdir(factor_path):
    split_path = os.path.join(factor_path, split)
    if split.startswith('split_') and os.path.isdir(split_path):
        for label in os.listdir(split_path):
            label_path = os.path.join(split_path, label)
            if label.startswith('label_') and os.path.isdir(label_path):
                for run in os.listdir(label_path):
                    run_path = os.path.join(label_path, run)
                    if run.startswith('run_') and os.path.isdir(run_path) and os.path.exists(os.path.join(run_path, model_name)):  
                        for evaluation in os.listdir(os.path.join(run_path, model_name)):
                            evaluation_path = os.path.join(run_path, model_name, evaluation)
                            if evaluation.startswith('evaluation_') and os.path.isdir(evaluation_path):
                                adaptation_number = int(evaluation.split('_')[1])
                                if adaptation_number > 10:
                                    continue
                                with open(os.path.join(evaluation_path, 'results.json'), 'r') as file:
                                    data = json.load(file)
                                score = f1_score(np.array(data['predictions'][0]), np.array(data['predictions'][1]), average='macro')
                                
                                split_number = int(split.split('_')[1])
                                label_number = int(label.split('_')[1])
                                run_number = int(run.split('_')[1])
                                adaptation_number = int(evaluation.split('_')[1])
                                
                                unstable_adaptation_results['results'].append({
                                    'score': score,
                                    'split': split_number,
                                    'label': label_number,
                                    'run': run_number,
                                    'adaptation': adaptation_number
                                })
                                if score < 0.5:
                                    unstable_adaptation_results['failed'] += 1
                                unstable_adaptation_results['all'] += 1

In [18]:
len(unstable_adaptation_results['results']), unstable_adaptation_results['failed']

(10000, 0)

In [19]:
len(unstable_adaptation_results['results']), unstable_adaptation_results['failed']

(10000, 0)

#### Initialisation of Model

In [20]:
initialisation_results = {
    'results': [],
    'failed': 0,
    'all': 0
}

factor_path = os.path.join(FACTORS, 'model_initialisation')

for split in os.listdir(factor_path):
    split_path = os.path.join(factor_path, split)
    if split.startswith('split_') and os.path.isdir(split_path):
        for label in os.listdir(split_path):
            label_path = os.path.join(split_path, label)
            if label.startswith('label_') and os.path.isdir(label_path):
                for initialisation in os.listdir(label_path):
                    initialisation_path = os.path.join(label_path, initialisation)
                    if initialisation.startswith('init_') and os.path.isdir(initialisation_path):
                        for run in os.listdir(initialisation_path):
                            run_path = os.path.join(initialisation_path, run)
                            if run.startswith('run_') and os.path.isdir(run_path) and os.path.exists(os.path.join(run_path, model_name)):  
                                for evaluation in os.listdir(os.path.join(run_path, model_name)):
                                    evaluation_path = os.path.join(run_path, model_name, evaluation)
                                    if evaluation.startswith('evaluation_') and os.path.isdir(evaluation_path):
                                        with open(os.path.join(evaluation_path, 'results.json'), 'r') as file:
                                            data = json.load(file)
                                        score = f1_score(np.array(data['predictions'][0]), np.array(data['predictions'][1]), average='macro')
                                
                                        split_number = int(split.split('_')[1])
                                        label_number = int(label.split('_')[1])
                                        run_number = int(run.split('_')[1])
                                        adaptation_number = int(evaluation.split('_')[1])
                                        initialisation_number = int(initialisation.split('_')[1])
                                
                                        initialisation_results['results'].append({
                                            'score': score,
                                            'split': split_number,
                                            'label': label_number,
                                            'run': run_number,
                                            'adaptation': adaptation_number,
                                            'initialisation': initialisation_number
                                        })
                                        if score < 0.5:
                                            initialisation_results['failed'] += 1
                                        initialisation_results['all'] += 1

In [21]:
len(initialisation_results['results']), initialisation_results['failed']

(20000, 0)

#### Order of Train Data

In [22]:
order_results = {
    'results': [],
    'failed': 0,
    'all': 0
}

factor_path = os.path.join(FACTORS, 'data_order')

for split in os.listdir(factor_path):
    split_path = os.path.join(factor_path, split)
    if split.startswith('split_') and os.path.isdir(split_path):
        for label in os.listdir(split_path):
            label_path = os.path.join(split_path, label)
            if label.startswith('label_') and os.path.isdir(label_path):
                for order in os.listdir(label_path):
                    order_path = os.path.join(label_path, order)
                    if order.startswith('data_order_') and os.path.isdir(order_path):
                        for run in os.listdir(order_path):
                            run_path = os.path.join(order_path, run)
                            if run.startswith('run_') and os.path.isdir(run_path) and os.path.exists(os.path.join(run_path, model_name)):  
                                for evaluation in os.listdir(os.path.join(run_path, model_name)):
                                    evaluation_path = os.path.join(run_path, model_name, evaluation)
                                    if evaluation.startswith('evaluation_') and os.path.isdir(evaluation_path):
                                        with open(os.path.join(evaluation_path, 'results.json'), 'r') as file:
                                            data = json.load(file)
                                        score = f1_score(np.array(data['predictions'][0]), np.array(data['predictions'][1]), average='macro')
                                
                                        split_number = int(split.split('_')[1])
                                        label_number = int(label.split('_')[1])
                                        run_number = int(run.split('_')[1])
                                        adaptation_number = int(evaluation.split('_')[1])
                                        order_number = int(order.split('_')[2])
                                
                                        order_results['results'].append({
                                            'score': score,
                                            'split': split_number,
                                            'label': label_number,
                                            'run': run_number,
                                            'adaptation': adaptation_number,
                                            'order': order_number
                                        })
                                        if score < 0.5:
                                            order_results['failed'] += 1
                                        order_results['all'] += 1

In [23]:
len(order_results['results']), order_results['failed']

(20000, 71)

## Compare Factors

### Aggregation by investigated factor

In this part we use the aggreagtion by the main investigated factor in following way:
- select runs where the value of factors only differ in the investigated factor (non-investigated factors have the same value; investigated has 10 values)
- calculate mean and standard deviation across the values of investigated factor
- results in ~10 000 values of mean and standard deviation
- calculate the final performance values as a mean of the pre-calculated many mean values
- calculate the instability of factor by calculating mean of the pre-calculated standard deviations

#### Data Split

In [50]:
data_split_by_other_factors = {}
overall_score = []

for result in data_split_results['results']:
    # key = f"split_{value['split']}-label_{value['label']}-run_{value['run']}-adaptation_{value['adaptation']}"
    key = f"label_{result['label']}-run_{result['run']}-adaptation_{result['adaptation']}"
    score = result['score'] * 100
    if data_split_by_other_factors.get(key, None) is None:
        data_split_by_other_factors[key] = [score]
    else:
        data_split_by_other_factors[key].append(score)
    overall_score.append(score)
np.mean(overall_score), np.std(overall_score), np.min(overall_score), np.max(overall_score)

(80.35404868647417, 0.9714921564083986, 74.39445561012135, 82.51307705805904)

In [51]:
aggregated_data_split = {'mean': [], 'std': []}

for _, factor_value in data_split_by_other_factors.items():
    aggregated_data_split['mean'].append(np.mean(factor_value))
    aggregated_data_split['std'].append(np.std(factor_value))

print(f"Investigated factor mean: {np.mean(aggregated_data_split['mean'])}")
print(f"Investigated factor deviation: {np.mean(aggregated_data_split['std'])}")
print(f"Other factors deviation: {np.std(aggregated_data_split['mean'])}")
print(f"Variability of factor deviation: {np.std(aggregated_data_split['std'])}")

Investigated factor mean: 80.3540486864742
Investigated factor deviation: 0.8872246422612884
Other factors deviation: 0.28280597758458825
Variability of factor deviation: 0.27685776703895165


In [52]:
print(f"Failed percentage of runs: {data_split_results['failed'] / data_split_results['all'] * 100}%")

Failed percentage of runs: 0.0%


#### Label Selection

In [53]:
label_by_other_factors = {}
overall_score = []

for result in label_results['results']:
    # key = f"split_{value['split']}-label_{value['label']}-run_{value['run']}-adaptation_{value['adaptation']}"
    key = f"split_{result['split']}-run_{result['run']}-adaptation_{result['adaptation']}"
    score = result['score'] * 100
    if label_by_other_factors.get(key, None) is None:
        label_by_other_factors[key] = [score]
    else:
        label_by_other_factors[key].append(score)
    overall_score.append(score)
np.mean(overall_score), np.std(overall_score), np.min(overall_score), np.max(overall_score)

(80.33393554683644, 1.0346948286989226, 73.18484772249053, 82.73616477096068)

In [54]:
aggregated_label = {'mean': [], 'std': []}

for _, factor_value in label_by_other_factors.items():
    aggregated_label['mean'].append(np.mean(factor_value))
    aggregated_label['std'].append(np.std(factor_value))

print(f"Investigated factor mean: {np.mean(aggregated_label['mean'])}")
print(f"Investigated factor deviation: {np.mean(aggregated_label['std'])}")
print(f"Other factors deviation: {np.std(aggregated_label['mean'])}")
print(f"Variability of factor deviation: {np.std(aggregated_label['std'])}")

Investigated factor mean: 80.33393554683646
Investigated factor deviation: 0.9591447944681754
Other factors deviation: 0.2682594076955213
Variability of factor deviation: 0.28048447722421666


In [55]:
print(f"Failed percentage of runs: {label_results['failed'] / label_results['all'] * 100}%")

Failed percentage of runs: 0.0%


#### Choice of Adaptation Data

In [56]:
adaptation_by_other_factors = {}
overall_score = []

for result in adaptation_results['results']:
    # key = f"split_{result['split']}-label_{result['label']}-run_{result['run']}-adaptation_{result['adaptation']}"
    key = f"split_{result['split']}-label_{result['label']}-run_{result['run']}"
    score = result['score'] * 100
    if adaptation_by_other_factors.get(key, None) is None:
        adaptation_by_other_factors[key] = [score]
    else:
        adaptation_by_other_factors[key].append(score)
    overall_score.append(score)
np.mean(overall_score), np.std(overall_score), np.min(overall_score), np.max(overall_score)

(80.22839090196791, 1.204322610028756, 73.05102461123994, 82.65649200339897)

In [57]:
aggregated_adaptation = {'mean': [], 'std': []}

for _, factor_value in adaptation_by_other_factors.items():
    aggregated_adaptation['mean'].append(np.mean(factor_value))
    aggregated_adaptation['std'].append(np.std(factor_value))

print(f"Investigated factor mean: {np.mean(aggregated_adaptation['mean'])}")
print(f"Investigated factor deviation: {np.mean(aggregated_adaptation['std'])}")
print(f"Other factors deviation: {np.std(aggregated_adaptation['mean'])}")
print(f"Variability of factor deviation: {np.std(aggregated_adaptation['std'])}")

Investigated factor mean: 80.2283909019679
Investigated factor deviation: 0.9421706933669466
Other factors deviation: 0.6678830038500373
Variability of factor deviation: 0.3415254408608149


In [58]:
print(f"Failed percentage of runs: {adaptation_results['failed'] / adaptation_results['all'] * 100}%")

Failed percentage of runs: 0.0%


##### Stable

In [59]:
stable_adaptation_by_other_factors = {}
overall_score = []

for result in stable_adaptation_results['results']:
    # key = f"split_{result['split']}-label_{result['label']}-run_{result['run']}-adaptation_{result['adaptation']}"
    key = f"split_{result['split']}-label_{result['label']}-run_{result['run']}"
    score = result['score'] * 100
    if stable_adaptation_by_other_factors.get(key, None) is None:
        stable_adaptation_by_other_factors[key] = [score]
    else:
        stable_adaptation_by_other_factors[key].append(score)
    overall_score.append(score)
np.mean(overall_score), np.std(overall_score), np.min(overall_score), np.max(overall_score)

(80.41238485657219, 0.9758672000628341, 71.33147447754189, 82.56516405329049)

In [60]:
aggregated_stable_adaptation = {'mean': [], 'std': []}

for _, factor_value in stable_adaptation_by_other_factors.items():
    aggregated_stable_adaptation['mean'].append(np.mean(factor_value))
    aggregated_stable_adaptation['std'].append(np.std(factor_value))

print(f"Investigated factor mean: {np.mean(aggregated_stable_adaptation['mean'])}")
print(f"Investigated factor deviation: {np.mean(aggregated_stable_adaptation['std'])}")
print(f"Other factors deviation: {np.std(aggregated_stable_adaptation['mean'])}")
print(f"Variability of factor deviation: {np.std(aggregated_stable_adaptation['std'])}")

Investigated factor mean: 80.4123848565722
Investigated factor deviation: 0.6455378802732623
Other factors deviation: 0.6299999774736654
Variability of factor deviation: 0.37242135501869256


In [61]:
print(f"Failed percentage of runs: {stable_adaptation_results['failed'] / stable_adaptation_results['all'] * 100}%")

Failed percentage of runs: 0.0%


##### Unstable

In [62]:
unstable_adaptation_by_other_factors = {}
overall_score = []

for result in unstable_adaptation_results['results']:
    # key = f"split_{result['split']}-label_{result['label']}-run_{result['run']}-adaptation_{result['adaptation']}"
    key = f"split_{result['split']}-label_{result['label']}-run_{result['run']}"
    score = result['score'] * 100
    if unstable_adaptation_by_other_factors.get(key, None) is None:
        unstable_adaptation_by_other_factors[key] = [score]
    else:
        unstable_adaptation_by_other_factors[key].append(score)
    overall_score.append(score)
np.mean(overall_score), np.std(overall_score), np.min(overall_score), np.max(overall_score)

(80.22839090196791, 1.204322610028756, 73.05102461123994, 82.65649200339897)

In [63]:
aggregated_unstable_adaptation = {'mean': [], 'std': []}

for _, factor_value in unstable_adaptation_by_other_factors.items():
    aggregated_unstable_adaptation['mean'].append(np.mean(factor_value))
    aggregated_unstable_adaptation['std'].append(np.std(factor_value))

print(f"Investigated factor mean: {np.mean(aggregated_unstable_adaptation['mean'])}")
print(f"Investigated factor deviation: {np.mean(aggregated_unstable_adaptation['std'])}")
print(f"Other factors deviation: {np.std(aggregated_unstable_adaptation['mean'])}")
print(f"Variability of factor deviation: {np.std(aggregated_unstable_adaptation['std'])}")

Investigated factor mean: 80.2283909019679
Investigated factor deviation: 0.9421706933669466
Other factors deviation: 0.6678830038500373
Variability of factor deviation: 0.3415254408608149


In [64]:
print(f"Failed percentage of runs: {unstable_adaptation_results['failed'] / unstable_adaptation_results['all'] * 100}%")

Failed percentage of runs: 0.0%


#### Initialisation of Model

In [65]:
initialisation_by_other_factors = {}
overall_score = []

for result in initialisation_results['results']:
    key = f"split_{result['split']}-label_{result['label']}-run_{result['run']}-adaptation_{result['adaptation']}"
    score = result['score'] * 100
    if initialisation_by_other_factors.get(key, None) is None:
        initialisation_by_other_factors[key] = [score]
    else:
        initialisation_by_other_factors[key].append(score)
    overall_score.append(score)
np.mean(overall_score), np.std(overall_score), np.min(overall_score), np.max(overall_score)

(80.19678577972888, 0.9678733907935632, 73.4165576204774, 82.34979733092058)

In [66]:
aggregated_initialisation = {'mean': [], 'std': []}

for _, factor_value in initialisation_by_other_factors.items():
    aggregated_initialisation['mean'].append(np.mean(factor_value))
    aggregated_initialisation['std'].append(np.std(factor_value))

print(f"Investigated factor mean: {np.mean(aggregated_initialisation['mean'])}")
print(f"Investigated factor deviation: {np.mean(aggregated_initialisation['std'])}")
print(f"Other factors deviation: {np.std(aggregated_initialisation['mean'])}")
print(f"Variability of factor deviation: {np.std(aggregated_initialisation['std'])}")

Investigated factor mean: 80.19678577972888
Investigated factor deviation: 0.6583951568179297
Other factors deviation: 0.6312840284252992
Variability of factor deviation: 0.32368996515191717


In [67]:
print(f"Failed percentage of runs: {initialisation_results['failed'] / initialisation_results['all'] * 100}%")

Failed percentage of runs: 0.0%


#### Order of Train Data

In [68]:
order_by_other_factors = {}
overall_score = []

for result in order_results['results']:
    key = f"split_{result['split']}-label_{result['label']}-run_{result['run']}-adaptation_{result['adaptation']}"
    score = result['score'] * 100
    if order_by_other_factors.get(key, None) is None:
        order_by_other_factors[key] = [score]
    else:
        order_by_other_factors[key].append(score)
    overall_score.append(score)
np.mean(overall_score), np.std(overall_score), np.min(overall_score), np.max(overall_score)

(75.770676117898, 4.508541849545775, 25.392578679324558, 82.13136315914777)

In [69]:
aggregated_order = {'mean': [], 'std': []}

for _, factor_value in order_by_other_factors.items():
    aggregated_order['mean'].append(np.mean(factor_value))
    aggregated_order['std'].append(np.std(factor_value))

print(f"Investigated factor mean: {np.mean(aggregated_order['mean'])}")
print(f"Investigated factor deviation: {np.mean(aggregated_order['std'])}")
print(f"Other factors deviation: {np.std(aggregated_order['mean'])}")
print(f"Variability of factor deviation: {np.std(aggregated_order['std'])}")

Investigated factor mean: 75.77067611789799
Investigated factor deviation: 3.232722235921918
Other factors deviation: 2.3712999424630894
Variability of factor deviation: 2.0623756053047644


In [70]:
print(f"Failed percentage of runs: {order_results['failed'] / order_results['all'] * 100}%")

Failed percentage of runs: 0.35500000000000004%


# Save Data

In [74]:
PICKLE_PATH = os.path.join('..', '..', 'pickled', 'sst2')

In [75]:
with open(os.path.join(SAVE_PATH, 'ProtoNet-data'), 'wb') as file:
    pickle.dump({
        'golden': results,
        'split': data_split_results,
        'label': label_results,
        'initialisation': initialisation_results,
        'order': order_results,
        'adaptation': adaptation_results,
        's_adaptation': stable_adaptation_results,
        'u_adaptation': unstable_adaptation_results,
    }, file)

In [None]:
with open(os.path.join(PICKLE_PATH, 'ProtoNet-data'), 'rb') as file:
    pickled = pickle.load(file)

results = pickled['golden']
data_split_results = pickled['split']
label_results = pickled['label']
initialisation_results = pickled['initialisation']
order_results = pickled['order']
adaptation_results = pickled['adaptation_results']
stable_adaptation_results = pickled['s_adaptation']
unstable_adaptation_results = pickled['u_adaptation']