In [23]:
import scipy
import pandas as pd
from scipy.stats import hmean
import numpy as np
import os
from common.data.datasets.dcase2021_task2 import MACHINE_TYPES

from experiments.evaluate import evaluate
from create_submission_packages.submission_help import load_data_frame, aggregate_metrics
import yaml
import io

LOG_DIR = os.path.join('..', 'logs')
OUT_DIR = os.path.join('zip_package_root', 'task2')

In [49]:
def aggregate_fun_max(df, machine_type):
    return df.groupby('path').max()

def aggregate_fun_best(df, machine_type):
    if machine_type in ['ToyTrain']:
        return df.groupby('path').max()
    else:
        return df.groupby('path').mean()

def aggregate_fun(df, machine_type):
    return df.groupby('path').mean()

def aggregate_metrics_yaml(df):

    results = {}
    
    for i in range(7):
        metrics = evaluate(df.loc[(df['train'] == False) & (df['machine_type_idx'] == i)], machine_type_idx=i,section=-1)

        aucs = []
        paucs = []
        m = MACHINE_TYPES[i]
        
        for s in [0, 1, 2]:  # metrics[m]:
            for d in metrics[m][s]:
                aucs.append(metrics[m][s][d]['auc'])
                paucs.append(metrics[m][s][d]['pauc'])

        results[m] = hmean(aucs), hmean(paucs)
    return results

def create_submission_package(submission_id, df):
    df = df.copy()
    
    id_ = {'ae': 1, 'made': 1, 'maf': 2, 'clf': 3, 'comb': 4}[submission_id]
    submission_id_ = f'Primus_CPJKU_task2_{id_}'

    submission_dir = os.path.join(OUT_DIR, submission_id_)
    os.makedirs(submission_dir, exist_ok=True)

    for d in ['source', 'target']:
        for m in MACHINE_TYPES:
            for s in [0, 1, 2, 3, 4, 5]:

                to_save_mask = (df['train'] == False) & (df['machine_type_idx'] == MACHINE_TYPES.index(m)) & (df['section'] == s) & (df['source'] == (d == 'source'))
                to_normalize_mask = (df['train'] == True) & (df['machine_type_idx'] == MACHINE_TYPES.index(m)) & (df['section'] == s)

                filename = f"anomaly_score_{m}_section_{s}_{d}.csv"
                if s > 2:
                    df.loc[to_save_mask, 'anomaly_score'].to_csv(os.path.join(submission_dir, filename), header=False)

                shape_hat, loc_hat, scale_hat = scipy.stats.gamma.fit(df.loc[to_normalize_mask, 'anomaly_score'].to_numpy())
                decision_threshold = scipy.stats.gamma.ppf(q=0.9, a=shape_hat, loc=loc_hat, scale=scale_hat)

                filename = f"decision_result_{m}_section_{s}_{d}.csv"
                df['decision_result'] = df['anomaly_score'] > decision_threshold
                if s > 2:
                    df["decision_result"] = df["decision_result"].astype(int)
                    df.loc[to_save_mask, 'decision_result'].to_csv(os.path.join(submission_dir, filename), header=False)

   
    # Read YAML file
    with open("template.meta.yaml", 'r') as stream:
        template = yaml.safe_load(stream)
    
    name = {
        'ae': 'Autoencoder with PO Loss',
        'made': 'MADE with PO Loss',
        'maf': 'MAF with PO Loss', 
        'clf': 'Domain Adapted ResNet with PO Loss', 
        'comb': 'Ensemble with PO Loss'
    }[submission_id]
    
    abbreviation = {
        'ae': 'AE', 
        'made': 'MADE', 
        'maf': 'MAF', 
        'clf': 'ResNet', 
        'comb': 'Ensemble'
    }[submission_id]
    
    machine_learning_method = {
        'ae': 'AE',
        'made': 'MADE',
        'maf': 'MAF',
        'clf': 'ResNet',
        'comb': 'Ensemble'
    }[submission_id]
    
    complexity = {
        'ae': 2072000,
        'made': 144900000,
        'maf': 228200000,
        'clf': 46200000,
        'comb': 421372000
    }[submission_id]
    
    template['submission']['label'] = submission_id_
    template['submission']['name'] = name
    template['submission']['abbreviation'] = abbreviation

    template['system']['description']['machine_learning_method'] = machine_learning_method
    if submission_id == 'comb':
        template['system']['description']['ensemble_method_subsystem_count'] = 3
        template['system']['description']['decision_making'] = 'mean'
        template['system']['description']['domain_adaptation_method'] = 'Contrastive Semantic Alignment'
    elif submission_id == 'clf':
        template['system']['description']['domain_adaptation_method'] = 'Contrastive Semantic Alignment'
        
    template['system']['complexity']['total_parameters'] = complexity
    
    metrics = aggregate_metrics_yaml(df)
    for m in metrics:
        template['results']['development_dataset'][m]['harmonic_mean_auc'] = float(metrics[m][0]) *100
        template['results']['development_dataset'][m]['harmonic_mean_pauc'] = float(metrics[m][1]) *100
    
    # Write YAML file
    with io.open(os.path.join(submission_dir, f'Primus_CPJKU_task2_{id_}.meta.yaml'), 'w', encoding='utf8') as outfile:
        yaml.dump(template, outfile, default_flow_style=False, allow_unicode=True, sort_keys=False)

    return df

In [25]:
# pump, slider -> no po

classification = {
    'source': {
        'ToyTrain': ['dba703948e3c4c04a4c7697510c430c8'],
        'ToyCar': ['5fbdff2bcd444e738b650affa424d221'],
        'valve': ['0230bd7f9f574a4fab2758ccd263fef7'],
        'slider': ['70596e0c1c394b17858648f71984c940'],
        'pump': ['d516cf02fd3e46f199fd0bcdbe09aaef'],
        'gearbox': ['6670d468bf544c55bd998b6c2fcc0b71'],
        'fan': ['97fcd5a6e7d24a8c83f77e286384f5aa']
    },
    'target': {
        'ToyTrain': ['0692450061ff4c54bf510077b1fc23fb'],
        'ToyCar': ['08bc33d7e35c4bf5b86b0620689bf8e7'],
        'valve': ['247aab1d7651415f9526506a8689047e'],
        'slider': ['39666e607e9e4eddba7b74ad916ee765'],
        'pump': ['551e9530ebbb4574a522adbea69d2d48'],
        'gearbox': ['2c33a109178f4e78927dae7760e63d76'],
        'fan': ['d506b9f1e3ba419aabb241d331ca0ee8']
    }
}

classification_df = load_data_frame(classification)

In [9]:
# ToyCar, fan, pump, slider , valve -> other machines
# ToyTrain, gearbox -> only same

maf = {
    'source': {
        'ToyTrain': ['e18f736ad2f24b54ae5e75810ef60e09'],
        'ToyCar': ['15d68a23d4eb46f8a24a33643f031538'],
        'valve': ['03e153ad747d4b3a92427c5cf691365e'],
        'slider': ['c5fb329e20a2491aa310ff18ed76a49e'],
        'pump': ['70c51092d72640209e2951aa00e19086'],
        'gearbox': ['9eefb837119e43ec8fe9ea1ddaf921f8'],
        'fan': ['3f56295b34e6499b9484a50a009d4244']
    },
    'target': {
        'ToyTrain': ['e18f736ad2f24b54ae5e75810ef60e09'],
        'ToyCar': ['15d68a23d4eb46f8a24a33643f031538'],
        'valve': ['03e153ad747d4b3a92427c5cf691365e'],
        'slider': ['c5fb329e20a2491aa310ff18ed76a49e'],
        'pump': ['70c51092d72640209e2951aa00e19086'],
        'gearbox': ['9eefb837119e43ec8fe9ea1ddaf921f8'],
        'fan': ['3f56295b34e6499b9484a50a009d4244']
    }
}

maf_df = load_data_frame(maf)


In [None]:
# same -> gearbox, ToyTrain, ToyCar
ae = {
    'source': {
        'ToyTrain': ['9902e95c2b4341fa9176faf9e07452ba'],
        'ToyCar': ['ca7edb887a8640d19b284b033e83ddde'],
        'valve': ['f9633473158a4f478f2e8d48075d4960'],
        'slider': ['05a15e261fcd4a5d91fdd691ebd44eb4'],
        'pump': ['39803953b24c4b8f8be2a567f6e3f853'],
        'gearbox': ['58fadba0c17842ca9a529d488ecedd15'],
        'fan': ['7715c83b31a24e7ca0a2011b61f8fec6']
    },
    'target': {
        'ToyTrain': ['9902e95c2b4341fa9176faf9e07452ba'],
        'ToyCar': ['ca7edb887a8640d19b284b033e83ddde'],
        'valve': ['f9633473158a4f478f2e8d48075d4960'],
        'slider': ['05a15e261fcd4a5d91fdd691ebd44eb4'],
        'pump': ['39803953b24c4b8f8be2a567f6e3f853'],
        'gearbox': ['58fadba0c17842ca9a529d488ecedd15'],
        'fan': ['7715c83b31a24e7ca0a2011b61f8fec6']
    }
}

ae_df = load_data_frame(ae)

In [None]:
# all -> fan, pump, slider valve
made = {
    'source': {
        'ToyTrain': ['87eac890ecc2475ab361e7d5e2c145ab'],
        'ToyCar': ['1f33112db5144f7da62200b67a6e76d0'],
        'valve': ['c6b274a052944f1f877b466b4d5291f4'],
        'slider': ['c0cd20d44917422b890275c08e6460d8'],
        'pump': ['2c1c0c98aa8f4f4f85aeaefd34fc131e'],
        'gearbox': ['d84133c12ab846849a82bff02b76f0c1'],
        'fan': ['56abadb62a3643c9bb07fb1fb8b274d6']
    },
    'target': {
        'ToyTrain': ['87eac890ecc2475ab361e7d5e2c145ab'],
        'ToyCar': ['1f33112db5144f7da62200b67a6e76d0'],
        'valve': ['c6b274a052944f1f877b466b4d5291f4'],
        'slider': ['c0cd20d44917422b890275c08e6460d8'],
        'pump': ['2c1c0c98aa8f4f4f85aeaefd34fc131e'],
        'gearbox': ['d84133c12ab846849a82bff02b76f0c1'],
        'fan': ['56abadb62a3643c9bb07fb1fb8b274d6']
    }
}

made_df = load_data_frame(made)

In [50]:
# sanity check

overview = pd.DataFrame(
    {
        'lab':  MACHINE_TYPES,
        'classification': aggregate_metrics(classification_df),
        'ae': aggregate_metrics(ae_df),
        'maf': aggregate_metrics(maf_df),
        'made': aggregate_metrics(made_df)
    }
)

overview.plot.bar(x='lab', rot=0)


In [1]:
_ = create_submission_package('made', made_df)
_ = create_submission_package('maf', maf_df)
_ = create_submission_package('clf', classification_df)