# 🧫 🧪 Эксперименты с классическими *unsupervised* методами обнаружения аномалий

### 🌐 Установка [pyod](https://github.com/yzhao062/pyod)

In [1]:
# !pip3 install pyod

In [2]:
# import os 
# os.environ['MKL_NUM_THREADS'] = '12' 
# os.environ['GOTO_NUM_THREADS'] = '12' 
# os.environ['OMP_NUM_THREADS'] = '12' 
# os.environ['openmp'] = 'True'

## 💅 Предобработка данных

In [3]:
from bs4 import BeautifulSoup
from gensim.parsing.preprocessing import remove_stopwords
from gensim.parsing.preprocessing import strip_short
from gensim.parsing.preprocessing import strip_non_alphanum
from gensim.parsing.preprocessing import strip_numeric
from gensim.utils import tokenize
import nltk; nltk.download('wordnet')
from nltk.stem import WordNetLemmatizer


def strip_html_tags(text):
    """Удаление html tags из текста."""
    soup = BeautifulSoup(text, "html.parser")
    stripped_text = soup.get_text(separator=" ")
    return stripped_text


def get_wordnet_pos(word):
    """Map POS tag to first character lemmatize() accepts"""
    tag = nltk.pos_tag([word])[0][1][0].upper()
    tag_dict = {"J": wordnet.ADJ,
                "N": wordnet.NOUN,
                "V": wordnet.VERB,
                "R": wordnet.ADV}
    return tag_dict.get(tag, wordnet.NOUN)


def preprocess_text(text):
    text = strip_html_tags(text)  # удаление html tags
    text = strip_non_alphanum(text) # заменили все небуквенные символы на пробел
    text = strip_numeric(text) # удалили все цифры
    text = remove_stopwords(text) # удалили все стоп-слова
    # text = strip_short(text, minsize=2) # удалили короткие слова
    word_list = list(tokenize(text, deacc=True, to_lower=True)) # токенизация, deacc - избавляет от ударений
    word_list = [WordNetLemmatizer().lemmatize(word) for word in word_list] # лемматизация
    return ' '.join(word for word in word_list)

[nltk_data] Downloading package wordnet to
[nltk_data]     /Users/pavelmamaev/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


## 🧬 Экперименты

In [4]:
import numpy as np
import pandas as pd
import tensorflow as tf

# tf.config.threading.set_inter_op_parallelism_threads(6)

tf.keras.backend.set_floatx('float64')

from sklearn.datasets import fetch_20newsgroups
from sklearn.utils import shuffle
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import roc_auc_score

import pyod
from pyod.models import auto_encoder
from pyod.models import vae

c = 0.1  # отношение количества аномальных экземпляров к нормальным

categories = ["comp.graphics",
              'talk.politics.mideast',
              "rec.sport.hockey",
              "sci.med",
              "sci.space",
              'misc.forsale',
              'soc.religion.christian',
              'talk.politics.misc']

experimant_cnt = 0
all_experiments = len(categories) * (len(categories) -  1)
auc_list_ae = []
auc_list_vae = []
auc_list_ae1 = []
auc_list_vae1 = []

# Формирование словаря с категориями
dataset = {}
for cat in categories:
    # Загрузка
    dataset[cat] = fetch_20newsgroups(subset='all', categories=[cat],
                            shuffle=True, random_state=123,
                            remove=('headers', 'footers'), return_X_y=True)[0]
    # Предобработка
    dataset[cat] = [preprocess_text(text) for text in dataset[cat]]

# Перебираем пары категорий
for i in range(len(categories)):
    for j in range(len(categories)):
        if i == j:
            continue

        c1 = categories[i]
        c2 = categories[j]

        experimant_cnt += 1
        
#         if experimant_cnt <= 53:
#             continue

        # Формирование нормальной и аномальной выборок
        normal_data = dataset[c1]
        anomal_data = dataset[c2][:min(int(c * len(normal_data)) + 1, len(dataset[c2]))]
        all_data = normal_data + anomal_data

        # TF-IDF векторизация
        vectorizer = TfidfVectorizer()
        all_data_tf = vectorizer.fit_transform(all_data).toarray()

        # Формирование выборок
        x = all_data_tf
        y = np.array([False] * len(normal_data) + [True] * len(anomal_data))
        all_data, x, y = shuffle(all_data, x, y, random_state=123)

        # Задаем модели

        # AE
        
        ae_clf = auto_encoder.AutoEncoder(
            hidden_neurons=[128, 64, 32, 64, 128],
            hidden_activation='relu',
            output_activation='sigmoid',
            optimizer='adam',
            epochs=30,
            batch_size=512,
            dropout_rate=0.3,
            l2_regularizer=0.4,
            validation_size=0.0,
            preprocessing=True,
            verbose=1,
            random_state=123,
            contamination=0.1
        )
        

        # VAE
        
        vae_clf = vae.VAE(
            contamination=0.1,
            encoder_neurons=[128, 64, 32],
            decoder_neurons=[32, 64, 128],
            latent_dim=5,
            hidden_activation='relu',
            output_activation='sigmoid',
            optimizer='adam',
            epochs=30,
            batch_size=512,
            dropout_rate=0.3,
            l2_regularizer=0.4,
            validation_size=0.0,
            preprocessing=True,
            verbose=1,
            random_state=123,
        )

        # Тренировка моделей
        try:
            ae_clf.fit(x)
            vae_clf.fit(x)
            
        except ValueError:
            continue
            

        # Предсказание моделей
        y_predict_ae = ae_clf.decision_function(x)
        y_predict_vae = vae_clf.decision_function(x)
        
        y_predict_ae1 = ae_clf.predict(x)
        y_predict_vae1 = vae_clf.predict(x)

        # Считаем метрику ROC AUC
        auc_ae = roc_auc_score(y, y_predict_ae)
        auc_vae = roc_auc_score(y, y_predict_vae)
        
        auc_ae1 = roc_auc_score(y, y_predict_ae1)
        auc_vae1 = roc_auc_score(y, y_predict_vae1)

        # Добавляем в списки
        auc_list_ae.append(auc_ae)
        auc_list_vae.append(auc_vae)
        auc_list_ae1.append(auc_ae1)
        auc_list_vae1.append(auc_vae1)
        

        # Вывод результатов
        print("-" * 50)
        print("Эксперимент №{}/{}  с normal = {}, anomal = {}".format(
            experimant_cnt, all_experiments, c1, c2))
        print("auc_ae = ", auc_ae)
        print("auc_vae = ", auc_vae)
        print("auc_ae1(predict) = ", auc_ae1)
        print("auc_vae1(predict) = ", auc_vae1)
        print("-" * 50)


auc_np = np.array(auc_list_ae)
print("*" * 50)
print("AE Медиана auc = {}".format(np.median(auc_np)))
print("AE Среднее auc = {}".format(np.mean(auc_np)))
print("*" * 50)

auc_np = np.array(auc_list_vae)
print("*" * 50)
print("VAE Медиана auc = {}".format(np.median(auc_np)))
print("VAE Среднее auc = {}".format(np.mean(auc_np)))
print("*" * 50)

auc_np = np.array(auc_list_ae1)
print("*" * 50)
print("AE Медиана auc (predict) = {}".format(np.median(auc_np)))
print("AE Среднее auc (predict) = {}".format(np.mean(auc_np)))
print("*" * 50)

auc_np = np.array(auc_list_vae1)
print("*" * 50)
print("VAE Медиана auc (predict) = {}".format(np.median(auc_np)))
print("VAE Среднее auc (predict) = {}".format(np.mean(auc_np)))
print("*" * 50)

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 14789)             218729310 
_________________________________________________________________
dropout (Dropout)            (None, 14789)             0         
_________________________________________________________________
dense_1 (Dense)              (None, 14789)             218729310 
_________________________________________________________________
dropout_1 (Dropout)          (None, 14789)             0         
_________________________________________________________________
dense_2 (Dense)              (None, 128)               1893120   
_________________________________________________________________
dropout_2 (Dropout)          (None, 128)               0         
_________________________________________________________________
dense_3 (Dense)              (None, 64)                8

Epoch 2/3
Epoch 3/3
--------------------------------------------------
Эксперимент №54/56  с normal = talk.politics.misc, anomal = sci.space
auc_ae =  0.6400496277915633
auc_vae =  0.6394044665012407
auc_ae1(predict) =  0.508014888337469
auc_vae1(predict) =  0.508014888337469
--------------------------------------------------
Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_19 (Dense)             (None, 14427)             208152756 
_________________________________________________________________
dropout_13 (Dropout)         (None, 14427)             0         
_________________________________________________________________
dense_20 (Dense)             (None, 14427)             208152756 
_________________________________________________________________
dropout_14 (Dropout)         (None, 14427)             0         
________________________________________________________

Epoch 2/3
Epoch 3/3
--------------------------------------------------
Эксперимент №55/56  с normal = talk.politics.misc, anomal = misc.forsale
auc_ae =  0.4411248966087675
auc_vae =  0.441091811414392
auc_ae1(predict) =  0.479793217535153
auc_vae1(predict) =  0.47785773366418527
--------------------------------------------------
Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_38 (Dense)             (None, 15179)             230417220 
_________________________________________________________________
dropout_26 (Dropout)         (None, 15179)             0         
_________________________________________________________________
dense_39 (Dense)             (None, 15179)             230417220 
_________________________________________________________________
dropout_27 (Dropout)         (None, 15179)             0         
____________________________________________________

Epoch 2/3
Epoch 3/3
--------------------------------------------------
Эксперимент №56/56  с normal = talk.politics.misc, anomal = soc.religion.christian
auc_ae =  0.6534160463192722
auc_vae =  0.6541439205955335
auc_ae1(predict) =  0.5574028122415219
auc_vae1(predict) =  0.5574028122415219
--------------------------------------------------
**************************************************
AE Медиана auc = 0.6400496277915633
AE Среднее auc = 0.5781968569065343
**************************************************
**************************************************
VAE Медиана auc = 0.6394044665012407
VAE Среднее auc = 0.5782133995037221
**************************************************
**************************************************
AE Медиана auc (predict) = 0.508014888337469
AE Среднее auc (predict) = 0.5150703060380479
**************************************************
**************************************************
VAE Медиана auc (predict) = 0.508014888337469
VAE Среднее auc (

## 🏁Результаты

Так как при проведении экспериментов ядро `jupyter-notebook` постоянно умирало, то приходилось копировать результаты в текстовый файл и запускать эксперименты с прерванной точки.  

Также на некоторых экпериментах возникала ошибка `ValueError` из-за переполнения. Скорее всего это связано с тем, что при обучении VAE функция потерь (или другие переменные) принимала экстремально высокие значения.  

Ниже представлены результаты экспериментов:


--------------------------------------------------
Эксперимент №2/56  с normal = comp.graphics, anomal = rec.sport.hockey
auc_ae =  0.816525788115863
auc_vae =  0.8159070411309437
auc_ae1(predict) =  0.6022610483042137
auc_vae1(predict) =  0.6068492145059462
--------------------------------------------------

--------------------------------------------------
Эксперимент №3/56  с normal = comp.graphics, anomal = sci.med
auc_ae =  0.8245118191161355
auc_vae =  0.8246429095790423
auc_ae1(predict) =  0.6303406254588166
auc_vae1(predict) =  0.6359565408897372
--------------------------------------------------

--------------------------------------------------
Эксперимент №4/56  с normal = comp.graphics, anomal = sci.space
auc_ae =  0.8223357174318854
auc_vae =  0.8221784088763974
auc_ae1(predict) =  0.624724710027896
auc_vae1(predict) =  0.6242108354133019
--------------------------------------------------

--------------------------------------------------
Эксперимент №6/56  с normal = comp.graphics, anomal = soc.religion.christian
auc_ae =  0.8200180380476959
auc_vae =  0.8204060658178997
auc_ae1(predict) =  0.6471883717515783
auc_vae1(predict) =  0.6477022463661724
--------------------------------------------------

--------------------------------------------------
Эксперимент №7/56  с normal = comp.graphics, anomal = talk.politics.misc
auc_ae =  0.8778027140969439
auc_vae =  0.8774880969859681
auc_ae1(predict) =  0.6921156951989429
auc_vae1(predict) =  0.6921156951989429
--------------------------------------------------

--------------------------------------------------
Эксперимент №8/56  с normal = talk.politics.mideast, anomal = comp.graphics
auc_ae =  0.48460246360582304
auc_vae =  0.4845128779395297
auc_ae1(predict) =  0.4678611422172452
auc_vae1(predict) =  0.47312430011198203
--------------------------------------------------

--------------------------------------------------
Эксперимент №9/56  с normal = talk.politics.mideast, anomal = rec.sport.hockey
auc_ae =  0.6261254199328107
auc_vae =  0.6271220604703248
auc_ae1(predict) =  0.48524636058230686
auc_vae1(predict) =  0.4847144456886898
--------------------------------------------------

--------------------------------------------------
Эксперимент №10/56  с normal = talk.politics.mideast, anomal = sci.med
auc_ae =  0.6638969764837626
auc_vae =  0.6620380739081748
auc_ae1(predict) =  0.52001679731243
auc_vae1(predict) =  0.5263437849944009
--------------------------------------------------

--------------------------------------------------
Эксперимент №11/56  с normal = talk.politics.mideast, anomal = sci.space
auc_ae =  0.6519036954087345
auc_vae =  0.6512541993281075
auc_ae1(predict) =  0.4968365061590146
auc_vae1(predict) =  0.4973684210526316
--------------------------------------------------

--------------------------------------------------
Эксперимент №12/56  с normal = talk.politics.mideast, anomal = misc.forsale
auc_ae =  0.4608286674132138
auc_vae =  0.4549720044792833
auc_ae1(predict) =  0.47945128779395296
auc_vae1(predict) =  0.48365061590145575
--------------------------------------------------

--------------------------------------------------
Эксперимент №13/56  с normal = talk.politics.mideast, anomal = soc.religion.christian
auc_ae =  0.640582306830907
auc_vae =  0.6406606942889139
auc_ae1(predict) =  0.5258118701007839
auc_vae1(predict) =  0.5258118701007839
--------------------------------------------------

--------------------------------------------------
Эксперимент №14/56  с normal = talk.politics.mideast, anomal = talk.politics.misc
auc_ae =  0.7116461366181411
auc_vae =  0.7093169092945129
auc_ae1(predict) =  0.5489921612541993
auc_vae1(predict) =  0.5489921612541993
--------------------------------------------------

--------------------------------------------------
Эксперимент №15/56  с normal = rec.sport.hockey, anomal = comp.graphics
auc_ae =  0.6343043043043044
auc_vae =  0.6330330330330329
auc_ae1(predict) =  0.543953953953954
auc_vae1(predict) =  0.543953953953954
--------------------------------------------------


--------------------------------------------------
Эксперимент №16/56  с normal = rec.sport.hockey, anomal = talk.politics.mideast
auc_ae =  0.843013013013013
auc_vae =  0.8362762762762764
auc_ae1(predict) =  0.7254704704704705
auc_vae1(predict) =  0.730970970970971

--------------------------------------------------
Эксперимент №17/56  с normal = rec.sport.hockey, anomal = sci.med
auc_ae =  0.8058558558558558
auc_vae =  0.8007107107107108
auc_ae1(predict) =  0.6594644644644645
auc_vae1(predict) =  0.6594644644644645
--------------------------------------------------

--------------------------------------------------
Эксперимент №18/56  с normal = rec.sport.hockey, anomal = sci.space
auc_ae =  0.813943943943944
auc_vae =  0.8143143143143143
auc_ae1(predict) =  0.6484634634634635
auc_vae1(predict) =  0.647962962962963
--------------------------------------------------

--------------------------------------------------
Эксперимент №19/56  с normal = rec.sport.hockey, anomal = misc.forsale
auc_ae =  0.6013913913913913
auc_vae =  0.5993493493493495
auc_ae1(predict) =  0.554954954954955
auc_vae1(predict) =  0.5539539539539539
--------------------------------------------------

--------------------------------------------------
Эксперимент №20/56  с normal = rec.sport.hockey, anomal = soc.religion.christian
auc_ae =  0.8089189189189189
auc_vae =  0.8100200200200199
auc_ae1(predict) =  0.6704654654654654
auc_vae1(predict) =  0.6704654654654654
--------------------------------------------------

--------------------------------------------------
Эксперимент №21/56  с normal = rec.sport.hockey, anomal = talk.politics.misc
auc_ae =  0.8675675675675676
auc_vae =  0.8679479479479479
auc_ae1(predict) =  0.7144694694694694
auc_vae1(predict) =  0.71496996996997
--------------------------------------------------

--------------------------------------------------
Эксперимент №22/56  с normal = sci.med, anomal = comp.graphics
auc_ae =  0.5162727272727273
auc_vae =  0.5144040404040404
auc_ae1(predict) =  0.4944949494949495
auc_vae1(predict) =  0.49500000000000005
--------------------------------------------------

--------------------------------------------------
Эксперимент №24/56  с normal = sci.med, anomal = rec.sport.hockey
auc_ae =  0.7010505050505049
auc_vae =  0.6949696969696969
auc_ae1(predict) =  0.5715656565656566
auc_vae1(predict) =  0.565050505050505
--------------------------------------------------

--------------------------------------------------
Эксперимент №25/56  с normal = sci.med, anomal = sci.space
auc_ae =  0.7121616161616162
auc_vae =  0.7042323232323232
auc_ae1(predict) =  0.5880808080808081
auc_vae1(predict) =  0.5870707070707071
--------------------------------------------------                                                                                                                                                                                                               
--------------------------------------------------
Эксперимент №28/56  с normal = sci.med, anomal = talk.politics.misc
auc_ae =  0.7978585858585859
auc_vae =  0.7972929292929293
auc_ae1(predict) =  0.6376262626262625
auc_vae1(predict) =  0.6426262626262625
--------------------------------------------------

--------------------------------------------------
Эксперимент №29/56  с normal = sci.space, anomal = comp.graphics
auc_ae =  0.5168810700725595
auc_vae =  0.512296214423874
auc_ae1(predict) =  0.4892388934942127
auc_vae1(predict) =  0.49025206472014987
--------------------------------------------------

--------------------------------------------------
Эксперимент №30/56  с normal = sci.space, anomal = talk.politics.mideast
auc_ae =  0.7968028819092648
auc_vae =  0.793671261756368
auc_ae1(predict) =  0.6615087040618957
auc_vae1(predict) =  0.6620152896748642
--------------------------------------------------

--------------------------------------------------
Эксперимент №32/56  с normal = sci.space, anomal = sci.med
auc_ae =  0.7278253661232384
auc_vae =  0.7277332596481534
auc_ae1(predict) =  0.578152344109791
auc_vae1(predict) =  0.5786589297227596
--------------------------------------------------

--------------------------------------------------
Эксперимент №33/56  с normal = sci.space, anomal = misc.forsale
auc_ae =  0.5018677146336721
auc_vae =  0.49976973381228695
auc_ae1(predict) =  0.5114672561481073
auc_vae1(predict) =  0.5109606705351386
--------------------------------------------------

--------------------------------------------------
Эксперимент №34/56  с normal = sci.space, anomal = soc.religion.christian
auc_ae =  0.7331470735726054
auc_vae =  0.7333824567867121
auc_ae1(predict) =  0.6226090694175801
auc_vae1(predict) =  0.6170519787541064
--------------------------------------------------

--------------------------------------------------
Эксперимент №35/56  с normal = sci.space, anomal = talk.politics.misc
auc_ae =  0.7969563927010737
auc_vae =  0.7962093068476048
auc_ae1(predict) =  0.6392803414080009
auc_vae1(predict) =  0.6392803414080009
--------------------------------------------------

--------------------------------------------------
Эксперимент №36/56  с normal = misc.forsale, anomal = comp.graphics
auc_ae =  0.6556253270538985
auc_vae =  0.6550915750915752
auc_ae1(predict) =  0.5456828885400314
auc_vae1(predict) =  0.5467085295656724
--------------------------------------------------

--------------------------------------------------
Эксперимент №37/56  с normal = misc.forsale, anomal = talk.politics.mideast
auc_ae =  0.8830455259026687
auc_vae =  0.8823338566195709
auc_ae1(predict) =  0.7478178963893249
auc_vae1(predict) =  0.7473050758765044
--------------------------------------------------

--------------------------------------------------
Эксперимент №38/56  с normal = misc.forsale, anomal = rec.sport.hockey
auc_ae =  0.8242804814233387
auc_vae =  0.8236316064887493
auc_ae1(predict) =  0.635520669806384
auc_vae1(predict) =  0.6406227106227106
--------------------------------------------------

--------------------------------------------------
Эксперимент №39/56  с normal = misc.forsale, anomal = sci.med
auc_ae =  0.8415593929879646
auc_vae =  0.8399372056514913
auc_ae1(predict) =  0.6692098377812663
auc_vae1(predict) =  0.6641077969649398
--------------------------------------------------

--------------------------------------------------
Эксперимент №40/56  с normal = misc.forsale, anomal = sci.space
auc_ae =  0.8415803244374672
auc_vae =  0.8414861329147044
auc_ae1(predict) =  0.6635949764521193
auc_vae1(predict) =  0.6584929356357927
--------------------------------------------------

--------------------------------------------------
Эксперимент №41/56  с normal = misc.forsale, anomal = soc.religion.christian
auc_ae =  0.8446363160648874
auc_vae =  0.8446991104133961
auc_ae1(predict) =  0.6972841444270015
auc_vae1(predict) =  0.7023861852433281
--------------------------------------------------

--------------------------------------------------
Эксперимент №42/56  с normal = misc.forsale, anomal = talk.politics.misc
auc_ae =  0.896232339089482
auc_vae =  0.8951439037153324
auc_ae1(predict) =  0.7646624803767662
auc_vae1(predict) =  0.7641496598639456
--------------------------------------------------

--------------------------------------------------
Эксперимент №43/56  с normal = soc.religion.christian, anomal = comp.graphics
auc_ae =  0.5029889669007022
auc_vae =  0.4972918756268807
auc_ae1(predict) =  0.4888465396188566
auc_vae1(predict) =  0.4888465396188566
--------------------------------------------------

--------------------------------------------------
Эксперимент №44/56  с normal = soc.religion.christian, anomal = talk.politics.mideast
auc_ae =  0.7448445336008024
auc_vae =  0.7429789368104314
auc_ae1(predict) =  0.6098796389167503
auc_vae1(predict) =  0.6098796389167503
--------------------------------------------------

--------------------------------------------------
Эксперимент №45/56  с normal = soc.religion.christian, anomal = rec.sport.hockey
auc_ae =  0.6668505516549649
auc_vae =  0.6646038114343029
auc_ae1(predict) =  0.5328585757271814
auc_vae1(predict) =  0.5288615847542627
--------------------------------------------------

--------------------------------------------------
Эксперимент №46/56  с normal = soc.religion.christian, anomal = sci.med
auc_ae =  0.6750651955867603
auc_vae =  0.6739518555667001
auc_ae1(predict) =  0.5438615847542627
auc_vae1(predict) =  0.5483600802407221
--------------------------------------------------

--------------------------------------------------
Эксперимент №47/56  с normal = soc.religion.christian, anomal = sci.space
auc_ae =  0.6971013039117353
auc_vae =  0.6908625877632899
auc_ae1(predict) =  0.5603660982948846
auc_vae1(predict) =  0.5658676028084253
--------------------------------------------------

--------------------------------------------------
Эксперимент №48/56  с normal = soc.religion.christian, anomal = misc.forsale
auc_ae =  0.49897693079237715
auc_vae =  0.4979338014042126
auc_ae1(predict) =  0.49434804413239725
auc_vae1(predict) =  0.49434804413239725
--------------------------------------------------

--------------------------------------------------
Эксперимент №49/56  с normal = soc.religion.christian, anomal = talk.politics.misc
auc_ae =  0.7560682046138415
auc_vae =  0.7549849548645937
auc_ae1(predict) =  0.6098796389167503
auc_vae1(predict) =  0.6043781344032096
--------------------------------------------------

--------------------------------------------------
Эксперимент №50/56  с normal = talk.politics.misc, anomal = comp.graphics
auc_ae =  0.4638213399503722
auc_vae =  0.4628949545078578
auc_ae1(predict) =  0.479793217535153
auc_vae1(predict) =  0.4791480562448304
--------------------------------------------------

--------------------------------------------------
Эксперимент №51/56  с normal = talk.politics.misc, anomal = talk.politics.mideast
auc_ae =  0.6916625310173697
auc_vae =  0.6914640198511166
auc_ae1(predict) =  0.5785690653432589
auc_vae1(predict) =  0.5798593879239041
--------------------------------------------------

--------------------------------------------------
Эксперимент №52/56  с normal = talk.politics.misc, anomal = rec.sport.hockey
auc_ae =  0.6227295285359802
auc_vae =  0.6223655913978495
auc_ae1(predict) =  0.508014888337469
auc_vae1(predict) =  0.508014888337469
--------------------------------------------------

--------------------------------------------------
Эксперимент №53/56  с normal = talk.politics.misc, anomal = sci.med
auc_ae =  0.6734160463192721
auc_vae =  0.6741604631927214
auc_ae1(predict) =  0.5291811414392059
auc_vae1(predict) =  0.5227708850289495
--------------------------------------------------

--------------------------------------------------
Эксперимент №54/56  с normal = talk.politics.misc, anomal = sci.space
auc_ae =  0.6400496277915633
auc_vae =  0.6394044665012407
auc_ae1(predict) =  0.508014888337469
auc_vae1(predict) =  0.508014888337469
--------------------------------------------------

--------------------------------------------------
Эксперимент №55/56  с normal = talk.politics.misc, anomal = misc.forsale
auc_ae =  0.4411248966087675
auc_vae =  0.441091811414392
auc_ae1(predict) =  0.479793217535153
auc_vae1(predict) =  0.47785773366418527
--------------------------------------------------

--------------------------------------------------
Эксперимент №56/56  с normal = talk.politics.misc, anomal = soc.religion.christian
auc_ae =  0.6534160463192722
auc_vae =  0.6541439205955335
auc_ae1(predict) =  0.5574028122415219
auc_vae1(predict) =  0.5574028122415219





### Соберем результаты в итоговый список:

In [10]:
auc_list_ae = [
    0.816525788115863,
    0.8245118191161355,
    0.8223357174318854,
    0.8200180380476959,
    0.8778027140969439,
    0.48460246360582304,
    0.6261254199328107,
    0.6638969764837626,
    0.6519036954087345,
    0.4608286674132138,
    0.640582306830907,
    0.7116461366181411,
    0.6343043043043044,
    0.843013013013013,
    0.8058558558558558,
    0.813943943943944,
    0.6013913913913913,
    0.8089189189189189,
    0.8675675675675676,
    0.5162727272727273,
    0.7010505050505049,
    0.7121616161616162,
    0.7978585858585859,
    0.5168810700725595,
    0.7968028819092648,
    0.7278253661232384,
    0.5018677146336721,
    0.7331470735726054,
    0.7969563927010737,
    0.6556253270538985,
    0.8830455259026687,
    0.8242804814233387,
    0.8415593929879646,
    0.8415803244374672,
    0.8446363160648874,
    0.896232339089482,
    0.5029889669007022,
    0.7448445336008024,
    0.6668505516549649,
    0.6750651955867603,
    0.6971013039117353,
    0.49897693079237715,
    0.7560682046138415,
    0.4638213399503722,
    0.6916625310173697,
    0.6227295285359802,
    0.6734160463192721,
    0.6400496277915633,
    0.4411248966087675,
    0.6534160463192722, 
]

auc_list_vae = [
    0.8159070411309437,
    0.8246429095790423,
    0.8221784088763974,
    0.8204060658178997,
    0.8774880969859681,
    0.4845128779395297,
    0.6271220604703248,
    0.6620380739081748,
    0.6512541993281075,
    0.6406606942889139,
    0.7093169092945129,
    0.6330330330330329,
    0.8362762762762764,
    0.8007107107107108,
    0.8143143143143143,
    0.5993493493493495,
    0.8100200200200199,
    0.8679479479479479,
    0.7144040404040404,
    0.6949696969696969,
    0.7042323232323232,                                                                                                                                                                                            
    0.7972929292929293,
    0.512296214423874,
    0.793671261756368,
    0.7277332596481534,
    0.49976973381228695,
    0.7333824567867121,
    0.7962093068476048,
    0.6550915750915752,
    0.8823338566195709,
    0.8236316064887493,
    0.8399372056514913,
    0.8414861329147044,
    0.8446991104133961,
    0.8951439037153324,
    0.4972918756268807,
    0.7429789368104314,
    0.6646038114343029,
    0.6739518555667001,
    0.6908625877632899,
    0.4979338014042126,
    0.7549849548645937,
    0.6914640198511166,
    0.6223655913978495,
    0.6741604631927214,
    0.6394044665012407,
    0.6541439205955335,
]

In [11]:
import numpy as np
from sklearn.metrics import roc_auc_score

auc_np = np.array(auc_list_ae)
print("*" * 50)
print("AE Медиана auc = {}".format(np.median(auc_np)))
print("AE Среднее auc = {}".format(np.mean(auc_np)))
print("*" * 50)

auc_np = np.array(auc_list_vae)
print("*" * 50)
print("VAE Медиана auc = {}".format(np.median(auc_np)))
print("VAE Среднее auc = {}".format(np.mean(auc_np)))
print("*" * 50)

**************************************************
AE Медиана auc = 0.7063483208343231
AE Среднее auc = 0.701833481640325
**************************************************
**************************************************
VAE Медиана auc = 0.7144040404040404
VAE Среднее auc = 0.7203746784755137
**************************************************
