# Multi-Class Classification with Machine Learning
In this notebook, we will explore various machine learning models to solve a multi-class classification problem. We will evaluate and compare the performance of different algorithms on the dataset.


In [1]:
import ast
import random
from pathlib import Path

import joblib
import numpy as np
import pandas as pd
import xgboost as xgb
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import train_test_split
from sklearn.multiclass import OneVsRestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.tree import DecisionTreeClassifier
from skmultilearn.model_selection import iterative_train_test_split
from tqdm import tqdm

from preprocess_functions import build_tree, extract_keys, preprocess_texts
from utils import CalibratedLabelRankClassifier, ChainOfClassifiers, LabelPowersetClassifier, \
    assess_models, prune_and_subsample, ConditionalDependencyNetwork, MetaBinaryRelevance


In [2]:
OVERWRITE = False
RANDOM_STATE = 42

np.random.seed(RANDOM_STATE)
random.seed(RANDOM_STATE)


In [3]:
INIT_POINTS = 1
N_ITER = 5
TEST_SIZE = 2e-1

BASE_CLASSIFIERS = {
    'logistic_regression': LogisticRegression(solver='liblinear', random_state=RANDOM_STATE),
    'gaussian_nb': GaussianNB(),
    'decision_tree': DecisionTreeClassifier(random_state=RANDOM_STATE),
    'random_forest': RandomForestClassifier(random_state=RANDOM_STATE),
    'xgb': xgb.XGBClassifier(random_state=RANDOM_STATE),
}

COLAB_PATH = Path('/content/drive/MyDrive')
KAGGLE_PATH = Path('/kaggle/input')
LOCAL_PATH = Path('./')

# Step 1: Check if running in Google Colab
try:
    import google.colab

    DATA_PATH = COLAB_PATH / Path('data')
    MODELS_PATH = COLAB_PATH / Path('models')
except ImportError:
    # Step 2: Check if running in Kaggle
    try:
        import kaggle_secrets

        DATA_PATH = KAGGLE_PATH
        MODELS_PATH = KAGGLE_PATH
    except ImportError:
        # Step 3: Default to local Jupyter Notebook
        DATA_PATH = LOCAL_PATH / Path('data')
        MODELS_PATH = LOCAL_PATH / Path('models')

GLOVE_6B_PATH = MODELS_PATH / Path('glove-embeddings')
THREAT_TWEETS_PATH = DATA_PATH / Path('tweets-dataset-for-cyberattack-detection')

GLOVE_6B_300D_TXT = GLOVE_6B_PATH / Path('glove.6B.300d.txt')
THREAT_TWEETS_CSV = THREAT_TWEETS_PATH / Path('tweets_final.csv')


## 1. Introduction

In this notebook, we are going to solve a multi-class classification problem using different machine learning models. Our goal is to predict the class of each sample based on the input features.


## 2. Data Loading and Preprocessing
We will load the dataset, inspect its structure, and preprocess it for machine learning models.


In [4]:
# Read the CSV file and process columns in one step
threat_tweets = (
    pd.read_csv(filepath_or_buffer=THREAT_TWEETS_CSV)
    .assign(
        tweet=lambda df: df['tweet'].apply(func=ast.literal_eval),
        watson=lambda df: df['watson'].apply(func=ast.literal_eval)
        .apply(func=lambda x: x.get('categories', []))
        .apply(func=build_tree),
        watson_list=lambda df: df['watson'].apply(func=extract_keys),
    )
    .query(expr='relevant == True')
    .drop(labels=['relevant'], axis=1)
    .dropna(subset=['text'], ignore_index=True)
)

threat_tweets.head()


Unnamed: 0,_id,date,id,text,tweet,type,watson,annotation,urls,destination_url,valid_certificate,watson_list
0,b'5b8876f9bb325e65fa7e78e4',2018-08-30 23:00:08+00:00,1035301167952211969,Protect your customers access Prestashop Ant...,{'created_at': 'Thu Aug 30 23:00:08 +0000 2018...,ddos,{'technology and computing': {'internet techno...,threat,['http://addons.prestashop.com/en/23513-anti-d...,https://addons.prestashop.com/en/23513-anti-dd...,True,"[technology and computing, internet technology..."
1,b'5b8876f9bb325e65fa7e78e5',2018-08-30 23:00:09+00:00,1035301173178249217,Data leak from Huazhu Hotels may affect 130 mi...,{'created_at': 'Thu Aug 30 23:00:09 +0000 2018...,leak,"{'travel': {'hotels': {}}, 'home and garden': ...",threat,['http://www.hotelmanagement.net/tech/data-lea...,http://www.hotelmanagement.net/tech/data-leak-...,True,"[travel, hotels, home and garden, home improve..."
2,b'5b8876fabb325e65fa7e78e6',2018-08-30 23:00:09+00:00,1035301174583353344,Instagram App 41.1788.50991.0 #Denial Of #Serv...,{'created_at': 'Thu Aug 30 23:00:09 +0000 2018...,general,{'science': {'weather': {'meteorological disas...,threat,['https://packetstormsecurity.com/files/149120...,https://packetstormsecurity.com/files/149120/i...,True,"[science, weather, meteorological disaster, hu..."
3,b'5b88770abb325e65fa7e78e7',2018-08-30 23:00:25+00:00,1035301242271096832,(good slides): \n\nThe Advanced Exploitation o...,{'created_at': 'Thu Aug 30 23:00:25 +0000 2018...,vulnerability,{'business and industrial': {'business operati...,threat,['https://twitter.com/i/web/status/10353012422...,https://twitter.com/i/web/status/1035301242271...,True,"[business and industrial, business operations,..."
4,b'5b887713bb325e65fa7e78e8',2018-08-30 23:00:35+00:00,1035301282095853569,CVE-2018-1000532 (beep)\nhttps://t.co/CaKbo38U...,{'created_at': 'Thu Aug 30 23:00:35 +0000 2018...,vulnerability,{'technology and computing': {'computer securi...,threat,['https://web.nvd.nist.gov/view/vuln/detail?vu...,https://nvd.nist.gov/vuln/detail/CVE-2018-1000532,True,"[technology and computing, computer security, ..."


For the goal of the project, the categories of interest are:
1. computer security/network security
2. computer security/antivirus and malware
3. operating systems/mac os
4. operating systems/windows
5. operating systems/unix
6. operating systems/linux
7. software
8. programming languages, included in software
9. software/databases
10. hardware
11. electronic components, included in hardware
12. hardware/computer/servers
13. hardware/computer/portable computer
14. hardware/computer/desktop computer
15. hardware/computer components
16. hardware/computer networking/router
17. hardware/computer networking/wireless technology
18. networking
19. internet technology, included in networking


In [5]:
FIX_TARGETS = {
    'computer security': 'computer security',
    'operating systems': 'operating systems',
    'software': 'software',
    'programming languages': 'software',
    'hardware': 'hardware',
    'electronic components': 'hardware',
    'networking': 'networking',
    'internet technology': 'networking'
}

chosen_categories = [
    list(set(FIX_TARGETS.keys()) & set(s))
    for s in threat_tweets['watson_list']
]

for i, watson_list in enumerate(chosen_categories):
    temp = list(set([FIX_TARGETS[c] for c in watson_list]))
    if len(temp) < 1:
        temp = ['other']
    chosen_categories[i] = temp

threat_tweets['target'] = chosen_categories

threat_tweets.head()


Unnamed: 0,_id,date,id,text,tweet,type,watson,annotation,urls,destination_url,valid_certificate,watson_list,target
0,b'5b8876f9bb325e65fa7e78e4',2018-08-30 23:00:08+00:00,1035301167952211969,Protect your customers access Prestashop Ant...,{'created_at': 'Thu Aug 30 23:00:08 +0000 2018...,ddos,{'technology and computing': {'internet techno...,threat,['http://addons.prestashop.com/en/23513-anti-d...,https://addons.prestashop.com/en/23513-anti-dd...,True,"[technology and computing, internet technology...","[software, computer security, networking]"
1,b'5b8876f9bb325e65fa7e78e5',2018-08-30 23:00:09+00:00,1035301173178249217,Data leak from Huazhu Hotels may affect 130 mi...,{'created_at': 'Thu Aug 30 23:00:09 +0000 2018...,leak,"{'travel': {'hotels': {}}, 'home and garden': ...",threat,['http://www.hotelmanagement.net/tech/data-lea...,http://www.hotelmanagement.net/tech/data-leak-...,True,"[travel, hotels, home and garden, home improve...",[other]
2,b'5b8876fabb325e65fa7e78e6',2018-08-30 23:00:09+00:00,1035301174583353344,Instagram App 41.1788.50991.0 #Denial Of #Serv...,{'created_at': 'Thu Aug 30 23:00:09 +0000 2018...,general,{'science': {'weather': {'meteorological disas...,threat,['https://packetstormsecurity.com/files/149120...,https://packetstormsecurity.com/files/149120/i...,True,"[science, weather, meteorological disaster, hu...",[hardware]
3,b'5b88770abb325e65fa7e78e7',2018-08-30 23:00:25+00:00,1035301242271096832,(good slides): \n\nThe Advanced Exploitation o...,{'created_at': 'Thu Aug 30 23:00:25 +0000 2018...,vulnerability,{'business and industrial': {'business operati...,threat,['https://twitter.com/i/web/status/10353012422...,https://twitter.com/i/web/status/1035301242271...,True,"[business and industrial, business operations,...",[operating systems]
4,b'5b887713bb325e65fa7e78e8',2018-08-30 23:00:35+00:00,1035301282095853569,CVE-2018-1000532 (beep)\nhttps://t.co/CaKbo38U...,{'created_at': 'Thu Aug 30 23:00:35 +0000 2018...,vulnerability,{'technology and computing': {'computer securi...,threat,['https://web.nvd.nist.gov/view/vuln/detail?vu...,https://nvd.nist.gov/vuln/detail/CVE-2018-1000532,True,"[technology and computing, computer security, ...","[software, computer security, hardware]"


In [6]:
FIX_TARGETS_COMPUTER_SECURITY = {
    'network security': 'network security',
    'antivirus and malware': 'antivirus and malware'
}

threat_tweets_computer_security = threat_tweets[
    threat_tweets["watson_list"].apply(lambda x: "computer security" in x)
].reset_index(drop=True)

chosen_categories = [
    list(set(FIX_TARGETS_COMPUTER_SECURITY.keys()) & set(s))
    for s in threat_tweets_computer_security['watson_list']
]

for i, watson_list in enumerate(chosen_categories):
    temp = list(set([FIX_TARGETS_COMPUTER_SECURITY[c] for c in watson_list]))
    if len(temp) < 1:
        temp = ['other']
    chosen_categories[i] = temp
print(len(chosen_categories))
threat_tweets_computer_security['target'] = chosen_categories

threat_tweets_computer_security.head()

5817


Unnamed: 0,_id,date,id,text,tweet,type,watson,annotation,urls,destination_url,valid_certificate,watson_list,target
0,b'5b8876f9bb325e65fa7e78e4',2018-08-30 23:00:08+00:00,1035301167952211969,Protect your customers access Prestashop Ant...,{'created_at': 'Thu Aug 30 23:00:08 +0000 2018...,ddos,{'technology and computing': {'internet techno...,threat,['http://addons.prestashop.com/en/23513-anti-d...,https://addons.prestashop.com/en/23513-anti-dd...,True,"[technology and computing, internet technology...",[antivirus and malware]
1,b'5b887713bb325e65fa7e78e8',2018-08-30 23:00:35+00:00,1035301282095853569,CVE-2018-1000532 (beep)\nhttps://t.co/CaKbo38U...,{'created_at': 'Thu Aug 30 23:00:35 +0000 2018...,vulnerability,{'technology and computing': {'computer securi...,threat,['https://web.nvd.nist.gov/view/vuln/detail?vu...,https://nvd.nist.gov/vuln/detail/CVE-2018-1000532,True,"[technology and computing, computer security, ...",[network security]
2,b'5b887722bb325e65fa7e78e9',2018-08-30 23:00:49+00:00,1035301341634224128,Will upload some of yesterday's videos which d...,{'created_at': 'Thu Aug 30 23:00:49 +0000 2018...,ddos,{'technology and computing': {'computer securi...,threat,[],,,"[technology and computing, computer security, ...",[antivirus and malware]
3,b'5b887816bb325e65fa7e78ed',2018-08-30 23:04:53+00:00,1035302366273257472,OpenSSH vulnerability affecting all versions b...,{'created_at': 'Thu Aug 30 23:04:53 +0000 2018...,vulnerability,{'technology and computing': {'computer securi...,threat,['https://lnkd.in/fuU3BPk'],https://lnkd.in/fuU3BPk,True,"[technology and computing, computer security, ...",[network security]
4,b'5b88781dbb325e65fa7e78ee',2018-08-30 23:05:00+00:00,1035302394148601857,law in Fiserv banking platform exposed persona...,{'created_at': 'Thu Aug 30 23:05:00 +0000 2018...,vulnerability,"{'finance': {'bank': {}}, 'technology and comp...",threat,['https://hubs.ly/H0dByWm0'],https://www.itpro.com/vulnerability/31797/flaw...,True,"[finance, bank, technology and computing, comp...",[network security]


In [7]:
X = preprocess_texts(
    list_str=threat_tweets_computer_security['text'],
    model_path=GLOVE_6B_300D_TXT,
    embedding_dim=300
)


## 4. Model Training

We will now train different models and evaluate their performance.


In [8]:
br = None
clr = None
cc = None
lp = None
pst = None
cdn = None
mbr = None
PATH_BR_COMPUTER_SECURITY = Path('models/binary_problems/br_computer_security.pkl')
PATH_CLR_COMPUTER_SECURITY = Path('models/binary_problems/clr_computer_security.pkl')
PATH_CC_COMPUTER_SECURITY = Path('models/binary_problems/cc_computer_security.pkl')
PATH_LP_COMPUTER_SECURITY = Path('models/multiclass_problems/lp_computer_security.pkl')
PATH_PST_COMPUTER_SECURITY = Path('models/multiclass_problems/pst_computer_security.pkl')
PATH_CDN_COMPUTER_SECURITY = Path('models/ensembles/cdn_computer_security.pkl')
PATH_MBR_COMPUTER_SECURITY = Path('models/ensembles/mbr_computer_security.pkl')

if PATH_BR_COMPUTER_SECURITY.exists():
    br = joblib.load(PATH_BR_COMPUTER_SECURITY)

if PATH_CLR_COMPUTER_SECURITY.exists():
    clr = joblib.load(PATH_CLR_COMPUTER_SECURITY)

if PATH_CC_COMPUTER_SECURITY.exists():
    cc = joblib.load(PATH_CC_COMPUTER_SECURITY)

if PATH_LP_COMPUTER_SECURITY.exists():
    lp = joblib.load(PATH_LP_COMPUTER_SECURITY)

if PATH_PST_COMPUTER_SECURITY.exists():
    pst = joblib.load(PATH_PST_COMPUTER_SECURITY)

if PATH_CDN_COMPUTER_SECURITY.exists():
    cdn = joblib.load(PATH_CDN_COMPUTER_SECURITY)

if PATH_MBR_COMPUTER_SECURITY.exists():
    mbr = joblib.load(PATH_MBR_COMPUTER_SECURITY)


In [9]:
mlb = MultiLabelBinarizer()
y = mlb.fit_transform(y=threat_tweets_computer_security['target'])

unique_label_sets, threat_tweets_computer_security['target_lp'] = np.unique(ar=y, axis=0, return_inverse=True)
threat_tweets_computer_security['target_pst'], label_map_pst = prune_and_subsample(y, pruning_threshold=5,
                                                                                   max_sub_samples=3)

y_lp = threat_tweets_computer_security['target_lp']
y_pst = threat_tweets_computer_security['target_pst']

label_map_lp = {i: tuple(lbl_set) for i, lbl_set in enumerate(unique_label_sets)}


In [10]:
# BR, CDR, CC, CDN, MBR
X_train_val, y_train_val, X_test, y_test = iterative_train_test_split(
    X=X,
    y=y,
    test_size=TEST_SIZE
)

X_train, y_train, X_val, y_val = iterative_train_test_split(
    X=X_train_val,
    y=y_train_val,
    test_size=TEST_SIZE
)

# LP
X_train_val_lp, X_test_lp, y_train_val_lp, y_test_lp = train_test_split(
    X, y_lp,
    test_size=TEST_SIZE,
    random_state=RANDOM_STATE,
    shuffle=True,
    stratify=y_lp
)

X_train_lp, X_val_lp, y_train_lp, y_val_lp = train_test_split(
    X_train_val_lp, y_train_val_lp,
    test_size=TEST_SIZE,
    random_state=RANDOM_STATE,
    shuffle=True,
    stratify=y_train_val_lp
)

# PSt
X_train_val_pst, X_test_pst, y_train_val_pst, y_test_pst = train_test_split(
    X, y_pst,
    test_size=TEST_SIZE,
    random_state=RANDOM_STATE,
    shuffle=True,
    stratify=y_pst
)

X_train_pst, X_val_pst, y_train_pst, y_val_pst = train_test_split(
    X_train_val_pst, y_train_val_pst,
    test_size=TEST_SIZE,
    random_state=RANDOM_STATE,
    shuffle=True,
    stratify=y_train_val_pst
)


### 4.1. Binary Problems


#### 4.1.1. BR (Binary Relevance)


In [11]:
if not br or OVERWRITE:
    br = {}

    for k in tqdm(BASE_CLASSIFIERS.keys()):
        br[k] = OneVsRestClassifier(estimator=BASE_CLASSIFIERS[k]).fit(
            X=X_train,
            y=y_train
        )

    joblib.dump(br, PATH_BR_COMPUTER_SECURITY, compress=9)


100%|██████████| 5/5 [00:33<00:00,  6.67s/it]


#### 4.1.2. CLR (Calibrated Label Ranking)


In [12]:
if not clr or OVERWRITE:
    clr = {}

    for k, v in tqdm(BASE_CLASSIFIERS.items()):
        model = CalibratedLabelRankClassifier(
            classifier=v,
            classes=mlb.classes_,
            random_state=RANDOM_STATE
        )

        clr[k] = model.fit(
            x=X_train,
            y=[list(mlb.classes_[np.where(row == 1)[0]]) for row in y_train]
        )

    joblib.dump(clr, PATH_CLR_COMPUTER_SECURITY, compress=9)


100%|██████████| 5/5 [00:44<00:00,  8.97s/it]


#### 4.1.3. CC (Classifier Chains)


In [13]:
if not cc or OVERWRITE:
    cc = {}

    for k, v in tqdm(BASE_CLASSIFIERS.items()):
        model = ChainOfClassifiers(
            classifier=v,
            classes=mlb.classes_,
            random_state=RANDOM_STATE
        )

        cc[k] = model.fit(
            x=X_train,
            y=y_train
        )

    joblib.dump(cc, PATH_CC_COMPUTER_SECURITY, compress=9)


100%|██████████| 5/5 [00:29<00:00,  5.97s/it]


### 4.2. Multi-class Problems



#### 4.2.1. LP (Label Powerset)


In [14]:
if not lp or OVERWRITE:
    lp = {}

    for k, v in tqdm(BASE_CLASSIFIERS.items()):
        model = LabelPowersetClassifier(
            classifier=v,
            label_map=label_map_lp,
            random_state=RANDOM_STATE
        )

        lp[k] = model.fit(
            x=X_train_lp,
            y=y_train_lp
        )

    joblib.dump(lp, PATH_LP_COMPUTER_SECURITY, compress=9)


100%|██████████| 5/5 [00:27<00:00,  5.52s/it]


#### 4.2.2. PSt (Pruned Sets)


In [15]:
if not pst or OVERWRITE:
    pst = {}

    for k, v in tqdm(BASE_CLASSIFIERS.items()):
        model = LabelPowersetClassifier(
            classifier=v,
            label_map=label_map_pst,
            random_state=RANDOM_STATE
        )

        pst[k] = model.fit(
            x=X_train_pst,
            y=y_train_pst
        )

    joblib.dump(pst, PATH_PST_COMPUTER_SECURITY, compress=9)


100%|██████████| 5/5 [00:28<00:00,  5.69s/it]


### 4.3. Ensembles


#### 4.3.1. CDN (Conditional Dependency Network)


In [16]:
if not cdn or OVERWRITE:
    cdn = {}

    for k, v in tqdm(BASE_CLASSIFIERS.items()):
        model = ConditionalDependencyNetwork(
            classifier=v,
            num_iterations=100,
            burn_in=10
        )

        cdn[k] = model.fit(
            x=X_train,
            y=y_train
        )

    joblib.dump(cdn, PATH_CDN_COMPUTER_SECURITY, compress=9)


100%|██████████| 5/5 [00:29<00:00,  5.85s/it]


#### 4.3.2. MBR (Meta-Binary Relevance)


In [17]:
if not mbr or OVERWRITE:
    mbr = {}

    for k, v in tqdm(BASE_CLASSIFIERS.items()):
        model = MetaBinaryRelevance(
            classifier=v,
            use_cross_val=True,
            n_splits=5
        )

        mbr[k] = model.fit(
            x=X_train,
            y=y_train
        )

    joblib.dump(mbr, PATH_MBR_COMPUTER_SECURITY, compress=9)


100%|██████████| 5/5 [03:10<00:00, 38.15s/it]


## 5. Model Evaluation

Now that we've trained the models, let's evaluate them in more detail.


In [18]:
evaluation = {
    'BR': assess_models(
        x=X_val,
        y=y_val,
        technique=br
    ),
    'CLR': assess_models(
        x=X_val,
        y=y_val,
        technique=clr
    ),
    'CC': assess_models(
        x=X_val,
        y=y_val,
        technique=cc
    ),
    'LP': assess_models(
        x=X_val_lp,
        y=np.array([list(label_map_lp[yp]) for yp in y_val_lp]),
        technique=lp
    ),
    'PST': assess_models(
        x=X_val_pst,
        y=np.array([list(label_map_pst[yp]) for yp in y_val_pst]),
        technique=pst
    ),
    'CDN': assess_models(
        x=X_val,
        y=y_val,
        technique=cdn
    ),
    'MBR': assess_models(
        x=X_val,
        y=y_val,
        technique=mbr
    )
}


In [19]:
performances = pd.DataFrame(evaluation).T
performances


Unnamed: 0,Accuracy,Classifier,Model,Precision example-based,Recall example-based,F1 example-based,Hamming loss,Micro precision,Micro recall,Micro F1,Macro precision,Macro recall,Macro F1,Coverage
BR,0.847476,xgb,OneVsRestClassifier(estimator=XGBClassifier(ba...,0.915145,0.932868,0.915503,0.064089,0.905624,0.92233,0.913901,0.932259,0.721238,0.771093,1.307197
CLR,0.857143,xgb,CalibratedLabelRankClassifier(classes=array(['...,0.922127,0.932331,0.919441,0.060508,0.91514,0.921359,0.918239,0.935306,0.758794,0.811394,1.29753
CC,0.862513,xgb,ChainOfClassifiers(classes=array(['antivirus a...,0.918904,0.906015,0.90691,0.069459,0.915507,0.894175,0.904715,0.9334,0.74463,0.803094,1.314715
LP,0.891515,xgb,LabelPowersetClassifier(classifier=XGBClassifi...,0.940924,0.922664,0.927318,0.054422,0.940763,0.909709,0.924975,0.957753,0.710411,0.778013,1.269603
PST,0.881847,xgb,LabelPowersetClassifier(classifier=XGBClassifi...,0.930183,0.915682,0.918367,0.060866,0.929142,0.903883,0.916339,0.903377,0.813642,0.853815,1.286788
CDN,0.835661,xgb,ConditionalDependencyNetwork(classifier=XGBCla...,0.893663,0.879162,0.880773,0.087003,0.892323,0.868932,0.880472,0.585586,0.576281,0.580882,1.367347
MBR,0.867884,xgb,MetaBinaryRelevance(classifier=XGBClassifier(b...,0.931794,0.934479,0.925886,0.056212,0.925024,0.92233,0.923675,0.943718,0.762769,0.817584,1.283566


In [25]:
z = evaluation['PST']['Model'].predict(X_val_pst)
acc = accuracy_score(np.array([list(label_map_pst[yp]) for yp in y_val_pst]), z)

print(classification_report(
    y_true=np.array([list(label_map_pst[yp]) for yp in y_val_pst]),
    y_pred=z,
    target_names=mlb.classes_,
    zero_division=0
))

print(acc)


                       precision    recall  f1-score   support

antivirus and malware       0.93      0.94      0.94       683
     network security       0.92      0.83      0.87       338
                other       0.86      0.67      0.75         9

            micro avg       0.93      0.90      0.92      1030
            macro avg       0.90      0.81      0.85      1030
         weighted avg       0.93      0.90      0.92      1030
          samples avg       0.93      0.92      0.92      1030

0.8818474758324383
