In [1]:
import nltk
import string
from datasets import load_dataset
import re
from nltk.corpus import stopwords
from nltk import pos_tag
import numpy as np
from sklearn.tree import DecisionTreeClassifier
from scipy.sparse import csr_matrix
from sklearn.metrics import f1_score
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
import mlflow
from sklearnex import patch_sklearn
from warnings import filterwarnings
patch_sklearn()

Intel(R) Extension for Scikit-learn* enabled (https://github.com/uxlfoundation/scikit-learn-intelex)


In [3]:
filterwarnings("ignore")

In [4]:
ag_news_dataset = load_dataset("ag_news")
stop_words = set(stopwords.words("english"))

#### Нужно: (ничего, Стемминг, Лемматизация) * (binary, count, tf-idf)) -> F1_Macro

In [5]:
text_preprocess_types = [None, 'стемминг', 'лемматизация']
words_classes = ['N', 'NJ', 'NJV', 'ALL']

#frequency_filtration_types = [None, 'low', 'high', 'both']
frequency_filtration_types = [None, 'low']

vector_representation_types = ['binary', 'count', 'tfidf']

In [6]:
iterations_num = len(text_preprocess_types) * len(words_classes) * len(frequency_filtration_types)
print(iterations_num)

24


In [7]:
def base_ag_news_preprocess(text):
    tokens = text.lower()

    # Удаление спец слов
    special_words = ['reuters', 'afp', 'ap', 'usatoday.com', 'forbes.com', 'target=/stocks/quickinfo/fullquote"' ]
    for word in special_words:
        tokens = tokens.replace(word, '')
    
    pattern = r'[&lt][^<>]*&gt'
    tokens = re.sub(pattern, '', tokens)
    
    # Удаление пунктуации и цифр
    #tokens = ''.join(i if i not in set(string.punctuation) - set('-') | set(string.digits) else ' ' for i in tokens)
    tokens = ''.join(i if i not in set(string.punctuation)  | set(string.digits) else ' ' for i in tokens)
    
    # Токенизация
    tokens = nltk.word_tokenize(tokens)
    
    # Удаление стоп слов
    #stop_wordsL = stop_words - {'no','not'}
    stop_wordsL = stop_words
    tokens = [word for word in tokens if (word not in stop_wordsL and word != '-')]
    return tokens

In [8]:
def different_ag_news_preprocess(tokens, preprocess_type, words_class):
    
    # Обработка слов
    if preprocess_type == 'лемматизация':
        lemmatizer = nltk.WordNetLemmatizer()
        tokens = [lemmatizer.lemmatize(token) for token in tokens]
    elif preprocess_type == 'стемминг':
        stemmer = nltk.PorterStemmer()
        tokens = [stemmer.stem(token) for token in tokens]
    
    # Обработка частей речи
    tokens = pos_tag(tokens)
    if words_class == 'N':
        tokens = [word for word, tag in tokens if tag.startswith('N')]
    elif words_class == 'NJ':
        tokens = [word for word, tag in tokens if tag.startswith('N') or tag.startswith('J')]
    elif words_class == 'NJV':
        tokens = [word for word, tag in tokens if tag.startswith('N') or tag.startswith('J') or tag.startswith('V')]
    elif words_class == 'ALL':
        tokens = [word for word, _ in tokens]
    
    return tokens  

In [9]:
def frequency_filtration(words_dictionary, frequency_filtration_type):
    if frequency_filtration_type == 'low':
        return dict([(key, value) for key, value in words_dictionary.items() if value >= 10 ])
    elif frequency_filtration_type == 'high':
        return dict([(key, value) for key, value in words_dictionary.items() if value <= 3000])
    elif frequency_filtration_type == 'both':
        return dict([(key, value) for key, value in words_dictionary.items() if 10 <= value <= 3000])
    else:
        return words_dictionary

In [10]:
def dummy(doc):
    return doc

In [18]:
def final_ag_news_preprocess(dataset):
    # Подготовка данных
    x_train = dataset['train']['text']
    y_train = dataset['train']['label']
    
    x_test = dataset['test']['text']
    y_test = dataset['test']['label']
    
    # Базовая обработка
    for i, text in enumerate(x_train):
        x_train[i] = base_ag_news_preprocess(text)
        
    for i, text in enumerate(x_test):
        x_test[i] = base_ag_news_preprocess(text)
    
    index = 2
    # Вариативная обработка
    for preprocess_type in text_preprocess_types: # 3 варианта
        for words_class in words_classes:         # 4 варианта
            if preprocess_type is None and words_class == 'N':
                continue
            words = {}
            xtr = x_train
            xte = x_test
            
            # Обработка текстов
            for i, tokens in enumerate(xtr):
                final_tokens = different_ag_news_preprocess(tokens, preprocess_type, words_class)
                xtr[i] = final_tokens
                
                # Заполнение словаря
                for token in final_tokens:
                    if token not in words:
                        words[token] = 1
                    else:
                        words[token] += 1
            
            xte = [different_ag_news_preprocess(tokens, preprocess_type, words_class) for tokens in xte]
                    
            # Фильтрация по частоте
            for frequency_filtration_type in frequency_filtration_types: # 4 варианта
                filtered_words = frequency_filtration(words, frequency_filtration_type)
                token_length = len(filtered_words)
                
                # Векторизация слов
                word_list = sorted(filtered_words.keys())
                # Присвоение словам индексов
                words_indexed = {}
                for idx, word in enumerate(word_list):
                    words_indexed[word] = idx
                
                # OHE
                vectorizer_OHE = CountVectorizer(vocabulary=words_indexed, tokenizer=dummy, preprocessor=dummy, dtype=np.int8, binary=True)
                x_train_OHE = vectorizer_OHE.fit_transform(xtr)
                x_test_OHE = vectorizer_OHE.transform(xte)
                
                # COUNT
                vectorizer_COUNT = CountVectorizer(vocabulary=words_indexed, tokenizer=dummy, preprocessor=dummy, dtype=np.int8)
                x_train_COUNT = vectorizer_COUNT.fit_transform(xtr)
                x_test_COUNT = vectorizer_COUNT.transform(xte)
                
                # TF-IDF
                vectorizer_TFIDF = TfidfVectorizer(vocabulary=words_indexed, preprocessor=dummy, tokenizer=dummy, dtype=np.float32)
                x_train_TFIDF = vectorizer_TFIDF.fit_transform(xtr)
                x_test_TFIDF = vectorizer_TFIDF.transform(xte)


                # Построение классификаторов
                clf_OHE = DecisionTreeClassifier(max_depth=750)
                clf_COUNT = DecisionTreeClassifier(max_depth=750)
                clf_TFIDF = DecisionTreeClassifier(max_depth=750)
                
                # Обучение классификаторов
                clf_OHE = clf_OHE.fit(x_train_OHE, y_train)
                clf_COUNT = clf_COUNT.fit(x_train_COUNT, y_train)
                clf_TFIDF = clf_TFIDF.fit(x_train_TFIDF, y_train)
                
                # Тестирование
                predictions_OHE = clf_OHE.predict(x_test_OHE)
                predictions_COUNT = clf_COUNT.predict(x_test_COUNT)
                predictions_TFIDF = clf_TFIDF.predict(x_test_TFIDF)
                
                # Метрики
                macro_score_OHE = f1_score(y_test, predictions_OHE, average='macro')
                macro_score_COUNT = f1_score(y_test, predictions_COUNT, average='macro')
                macro_score_TFIDF = f1_score(y_test, predictions_TFIDF, average='macro')
                
                mlflow.start_run(run_name=f'{preprocess_type}_{words_class}_{frequency_filtration_type}_OHE')
                mlflow.log_param('model', clf_OHE.__class__.__name__)
                mlflow.log_param('preprocess_type', preprocess_type)
                mlflow.log_param('words_class', words_class)
                mlflow.log_param('frequency_filtration_type', frequency_filtration_type)
                mlflow.log_param('token_length', token_length)
                mlflow.sklearn.log_model(clf_OHE, 'DecisionTreeClassifier')
                mlflow.log_metric('macro_score', macro_score_OHE)
                mlflow.end_run()
                
                mlflow.start_run(run_name=f'{preprocess_type}_{words_class}_{frequency_filtration_type}_COUNT')
                mlflow.log_param('model', clf_COUNT.__class__.__name__)
                mlflow.log_param('preprocess_type', preprocess_type)
                mlflow.log_param('words_class', words_class)
                mlflow.log_param('frequency_filtration_type', frequency_filtration_type)
                mlflow.log_param('token_length', token_length)
                mlflow.sklearn.log_model(clf_COUNT, 'DecisionTreeClassifier')
                mlflow.log_metric('macro_score', macro_score_COUNT)
                mlflow.end_run()
                
                mlflow.start_run(run_name=f'{preprocess_type}_{words_class}_{frequency_filtration_type}_TFIDF')
                mlflow.log_param('model', clf_TFIDF.__class__.__name__)
                mlflow.log_param('preprocess_type', preprocess_type)
                mlflow.log_param('words_class', words_class)
                mlflow.log_param('frequency_filtration_type', frequency_filtration_type)
                mlflow.log_param('token_length', token_length)
                mlflow.sklearn.log_model(clf_TFIDF, 'DecisionTreeClassifier')
                mlflow.log_metric('macro_score', macro_score_TFIDF)
                mlflow.end_run()
                
                index += 1
                print(f'Итерация {index} / {iterations_num}')

In [19]:
mlflow.set_tracking_uri("http://127.0.0.1:5000")
mlflow.set_experiment(experiment_name="agNewsDT")

<Experiment: artifact_location='mlflow-artifacts:/537408113752698407', creation_time=1741637380932, experiment_id='537408113752698407', last_update_time=1741637380932, lifecycle_stage='active', name='agNewsDT', tags={}>

In [20]:
final_ag_news_preprocess(ag_news_dataset)



🏃 View run None_NJ_None_OHE at: http://127.0.0.1:5000/#/experiments/537408113752698407/runs/a9998c497dff418180ac4f9432f2fad3
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/537408113752698407




🏃 View run None_NJ_None_COUNT at: http://127.0.0.1:5000/#/experiments/537408113752698407/runs/984bc71c2d134d9b89bb98f83f0045a3
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/537408113752698407




🏃 View run None_NJ_None_TFIDF at: http://127.0.0.1:5000/#/experiments/537408113752698407/runs/c8248b0c18e04b50bda8eddbb649ac67
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/537408113752698407
Итерация 3 / 24




🏃 View run None_NJ_low_OHE at: http://127.0.0.1:5000/#/experiments/537408113752698407/runs/e4fba1c443cb44e8a382590625119c69
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/537408113752698407




🏃 View run None_NJ_low_COUNT at: http://127.0.0.1:5000/#/experiments/537408113752698407/runs/981f68d40dcf4d23a0d63b2ca1995b9a
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/537408113752698407




🏃 View run None_NJ_low_TFIDF at: http://127.0.0.1:5000/#/experiments/537408113752698407/runs/adcf621cd630477088eca60c6adc7b98
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/537408113752698407
Итерация 4 / 24




🏃 View run None_NJV_None_OHE at: http://127.0.0.1:5000/#/experiments/537408113752698407/runs/5c5e37087a784b468957e95096ef6edb
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/537408113752698407




🏃 View run None_NJV_None_COUNT at: http://127.0.0.1:5000/#/experiments/537408113752698407/runs/7bbfc295189f4ca9947f1d9b4cf433c0
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/537408113752698407




🏃 View run None_NJV_None_TFIDF at: http://127.0.0.1:5000/#/experiments/537408113752698407/runs/d4445bc701114588b03e5b272bdc3d91
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/537408113752698407
Итерация 5 / 24




🏃 View run None_NJV_low_OHE at: http://127.0.0.1:5000/#/experiments/537408113752698407/runs/2fa6fc2546174c29a47e116dc1b3f2f6
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/537408113752698407




🏃 View run None_NJV_low_COUNT at: http://127.0.0.1:5000/#/experiments/537408113752698407/runs/5546736674954e6b85ea757ccbfa7825
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/537408113752698407




🏃 View run None_NJV_low_TFIDF at: http://127.0.0.1:5000/#/experiments/537408113752698407/runs/6efeb0c953b14e6d8e18369bea99e965
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/537408113752698407
Итерация 6 / 24




🏃 View run None_ALL_None_OHE at: http://127.0.0.1:5000/#/experiments/537408113752698407/runs/8c71567300e04e9399a1ddba300bc3f6
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/537408113752698407




🏃 View run None_ALL_None_COUNT at: http://127.0.0.1:5000/#/experiments/537408113752698407/runs/1ff764a944af4c17b5e366e439402384
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/537408113752698407




🏃 View run None_ALL_None_TFIDF at: http://127.0.0.1:5000/#/experiments/537408113752698407/runs/3c663c9074fb465bb49bdff2c3ab0dd7
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/537408113752698407
Итерация 7 / 24




🏃 View run None_ALL_low_OHE at: http://127.0.0.1:5000/#/experiments/537408113752698407/runs/15306e6060f4421babccf6c59841ce02
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/537408113752698407




🏃 View run None_ALL_low_COUNT at: http://127.0.0.1:5000/#/experiments/537408113752698407/runs/b825b542f5974f0998dc339e8b520ba9
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/537408113752698407




🏃 View run None_ALL_low_TFIDF at: http://127.0.0.1:5000/#/experiments/537408113752698407/runs/55fb48dc54c6420cb4a3c901201439e8
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/537408113752698407
Итерация 8 / 24




🏃 View run стемминг_N_None_OHE at: http://127.0.0.1:5000/#/experiments/537408113752698407/runs/51be57539b084e28bf05035b58c2f54c
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/537408113752698407




🏃 View run стемминг_N_None_COUNT at: http://127.0.0.1:5000/#/experiments/537408113752698407/runs/3d73509fc85440218227e06a96000809
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/537408113752698407




🏃 View run стемминг_N_None_TFIDF at: http://127.0.0.1:5000/#/experiments/537408113752698407/runs/db7fc4a392234444bbdb11a14b00e883
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/537408113752698407
Итерация 9 / 24




🏃 View run стемминг_N_low_OHE at: http://127.0.0.1:5000/#/experiments/537408113752698407/runs/eb564db711e34bb3a3dada4fcedbe798
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/537408113752698407




🏃 View run стемминг_N_low_COUNT at: http://127.0.0.1:5000/#/experiments/537408113752698407/runs/a1dce75f3afd4ff4884f39e0955d6643
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/537408113752698407




🏃 View run стемминг_N_low_TFIDF at: http://127.0.0.1:5000/#/experiments/537408113752698407/runs/3cdee238b870439b8f40feaa4c49aa91
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/537408113752698407
Итерация 10 / 24




🏃 View run стемминг_NJ_None_OHE at: http://127.0.0.1:5000/#/experiments/537408113752698407/runs/88c37c09623d4f69b43cb35a91ec28f9
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/537408113752698407




🏃 View run стемминг_NJ_None_COUNT at: http://127.0.0.1:5000/#/experiments/537408113752698407/runs/240c1549387847b09b7598b676b321ac
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/537408113752698407




🏃 View run стемминг_NJ_None_TFIDF at: http://127.0.0.1:5000/#/experiments/537408113752698407/runs/558018d6823a4ee68994e9ea4c163c83
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/537408113752698407
Итерация 11 / 24




🏃 View run стемминг_NJ_low_OHE at: http://127.0.0.1:5000/#/experiments/537408113752698407/runs/e5f2d591af78440db45478e5b07ac89d
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/537408113752698407




🏃 View run стемминг_NJ_low_COUNT at: http://127.0.0.1:5000/#/experiments/537408113752698407/runs/7bebe42c7ae44829b1007934baf86fa3
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/537408113752698407




🏃 View run стемминг_NJ_low_TFIDF at: http://127.0.0.1:5000/#/experiments/537408113752698407/runs/9dbf5e2c13ac4ce3a351002ab09396db
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/537408113752698407
Итерация 12 / 24




🏃 View run стемминг_NJV_None_OHE at: http://127.0.0.1:5000/#/experiments/537408113752698407/runs/19b7b1bf24b44c4a96462d188e201d5a
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/537408113752698407




🏃 View run стемминг_NJV_None_COUNT at: http://127.0.0.1:5000/#/experiments/537408113752698407/runs/73e696889e624a83af953ed6d6f2dd57
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/537408113752698407




🏃 View run стемминг_NJV_None_TFIDF at: http://127.0.0.1:5000/#/experiments/537408113752698407/runs/81aace30de4940ccbffcf0c3db6f376f
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/537408113752698407
Итерация 13 / 24




🏃 View run стемминг_NJV_low_OHE at: http://127.0.0.1:5000/#/experiments/537408113752698407/runs/b01c2c21065b43fead3d3603cba5f703
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/537408113752698407




🏃 View run стемминг_NJV_low_COUNT at: http://127.0.0.1:5000/#/experiments/537408113752698407/runs/b4030da05e514999a16187c9a24fae9a
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/537408113752698407




🏃 View run стемминг_NJV_low_TFIDF at: http://127.0.0.1:5000/#/experiments/537408113752698407/runs/a7c8906b7a124ba0b82102c669747758
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/537408113752698407
Итерация 14 / 24




🏃 View run стемминг_ALL_None_OHE at: http://127.0.0.1:5000/#/experiments/537408113752698407/runs/196284dcbc8546df948585bb522db044
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/537408113752698407




🏃 View run стемминг_ALL_None_COUNT at: http://127.0.0.1:5000/#/experiments/537408113752698407/runs/5725a95fc3f74c2395192d29ac549b08
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/537408113752698407




🏃 View run стемминг_ALL_None_TFIDF at: http://127.0.0.1:5000/#/experiments/537408113752698407/runs/2dd5a4b552a44f6e849ba17bdcfb347d
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/537408113752698407
Итерация 15 / 24




🏃 View run стемминг_ALL_low_OHE at: http://127.0.0.1:5000/#/experiments/537408113752698407/runs/9e852b3a25564054ba87eefeaf00493e
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/537408113752698407




🏃 View run стемминг_ALL_low_COUNT at: http://127.0.0.1:5000/#/experiments/537408113752698407/runs/c7616471623c4472b98f57a90e0339f1
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/537408113752698407




🏃 View run стемминг_ALL_low_TFIDF at: http://127.0.0.1:5000/#/experiments/537408113752698407/runs/56320e3af19142e8a5bee2c446db97eb
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/537408113752698407
Итерация 16 / 24




🏃 View run лемматизация_N_None_OHE at: http://127.0.0.1:5000/#/experiments/537408113752698407/runs/fa44fd33aa0b4b149219cf8a991d985c
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/537408113752698407




🏃 View run лемматизация_N_None_COUNT at: http://127.0.0.1:5000/#/experiments/537408113752698407/runs/cb9ed0bf29ed40b9bcb9ae0bbfd9db66
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/537408113752698407




🏃 View run лемматизация_N_None_TFIDF at: http://127.0.0.1:5000/#/experiments/537408113752698407/runs/d0d06e64fc6c48f0abf603bc23da73d8
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/537408113752698407
Итерация 17 / 24




🏃 View run лемматизация_N_low_OHE at: http://127.0.0.1:5000/#/experiments/537408113752698407/runs/0d49c686643b409aaf48ff7330d78223
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/537408113752698407




🏃 View run лемматизация_N_low_COUNT at: http://127.0.0.1:5000/#/experiments/537408113752698407/runs/f71338c0e6624a96965ad28e909f3a25
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/537408113752698407




🏃 View run лемматизация_N_low_TFIDF at: http://127.0.0.1:5000/#/experiments/537408113752698407/runs/8bb51f985d4d419bbc87d533700a5b59
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/537408113752698407
Итерация 18 / 24




🏃 View run лемматизация_NJ_None_OHE at: http://127.0.0.1:5000/#/experiments/537408113752698407/runs/c373910c09304da08877b452567fbd67
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/537408113752698407




🏃 View run лемматизация_NJ_None_COUNT at: http://127.0.0.1:5000/#/experiments/537408113752698407/runs/919d114ee89d4ce5be47eda3ca17810b
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/537408113752698407




🏃 View run лемматизация_NJ_None_TFIDF at: http://127.0.0.1:5000/#/experiments/537408113752698407/runs/754edbbfb8664eacaefa5ca60696b35f
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/537408113752698407
Итерация 19 / 24




🏃 View run лемматизация_NJ_low_OHE at: http://127.0.0.1:5000/#/experiments/537408113752698407/runs/cff463fe6400424aac5d358ba77c4df4
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/537408113752698407




🏃 View run лемматизация_NJ_low_COUNT at: http://127.0.0.1:5000/#/experiments/537408113752698407/runs/32f12162a1844ba99de2a2b0f1520e90
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/537408113752698407




🏃 View run лемматизация_NJ_low_TFIDF at: http://127.0.0.1:5000/#/experiments/537408113752698407/runs/4f47f544dc114cb3b1642460dfa5eebd
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/537408113752698407
Итерация 20 / 24




🏃 View run лемматизация_NJV_None_OHE at: http://127.0.0.1:5000/#/experiments/537408113752698407/runs/ad9f56a9ec2742d49de8b255e19cbaf5
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/537408113752698407




🏃 View run лемматизация_NJV_None_COUNT at: http://127.0.0.1:5000/#/experiments/537408113752698407/runs/166e10aadeb1474f8dd4fc8fd7f696b7
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/537408113752698407




🏃 View run лемматизация_NJV_None_TFIDF at: http://127.0.0.1:5000/#/experiments/537408113752698407/runs/2932242ddefb4ed493a685a0c567a969
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/537408113752698407
Итерация 21 / 24




🏃 View run лемматизация_NJV_low_OHE at: http://127.0.0.1:5000/#/experiments/537408113752698407/runs/2c16772c97ad4d21aa4f3219615ec22e
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/537408113752698407




🏃 View run лемматизация_NJV_low_COUNT at: http://127.0.0.1:5000/#/experiments/537408113752698407/runs/7384bd106b334764a7e90bbe8baadc9b
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/537408113752698407




🏃 View run лемматизация_NJV_low_TFIDF at: http://127.0.0.1:5000/#/experiments/537408113752698407/runs/4a657ffeadb742cdaebdbf22839ba5bf
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/537408113752698407
Итерация 22 / 24




🏃 View run лемматизация_ALL_None_OHE at: http://127.0.0.1:5000/#/experiments/537408113752698407/runs/1640ea870a0e4d1dbb6c5bcd95a0c967
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/537408113752698407




🏃 View run лемматизация_ALL_None_COUNT at: http://127.0.0.1:5000/#/experiments/537408113752698407/runs/a584e8e2cf694b8196cce5f1233993ca
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/537408113752698407




🏃 View run лемматизация_ALL_None_TFIDF at: http://127.0.0.1:5000/#/experiments/537408113752698407/runs/2edef4db63c246ba8624e0cbb16c4cae
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/537408113752698407
Итерация 23 / 24




🏃 View run лемматизация_ALL_low_OHE at: http://127.0.0.1:5000/#/experiments/537408113752698407/runs/5aec787fc3c047e982feb8517c51e530
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/537408113752698407




🏃 View run лемматизация_ALL_low_COUNT at: http://127.0.0.1:5000/#/experiments/537408113752698407/runs/b32e5a3fd2eb41859eafe27d8f7dcc54
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/537408113752698407




🏃 View run лемматизация_ALL_low_TFIDF at: http://127.0.0.1:5000/#/experiments/537408113752698407/runs/681722f6b7e0448fabb0a34b9f7f0111
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/537408113752698407
Итерация 24 / 24


In [16]:
cc = DecisionTreeClassifier()