# Sangkak AI Challenge: POS tasks
-------------------------------------------------------

- **Author**: Elvis MBONING (NTeALan Research and Development Team)
- **Session**: septembre 2023

----------------------------------------------------------------

In this notebook, we try to train differents models.

We want to train these hypothesis for CRF_suite and Xgboost model:

- impact of features normalization to the model classification
- impact of features regulatization to the model classification
- impact of choice of classification algorithm to the model classification
- impact of data augmentation based on position (imbalence classes) to the model classification
- impact of data augmentation based on features (imbalence classes) to the model classification



# 1. Impact of features normalization to the model classification


In [1]:
from sangkak_estimators import SangkakPosProjetReader, SangkakPosFeaturisation

### 1.1. Case of Xgboost model

In [2]:
# Install python packages dependencies (if not already installed)
# version=xgboost-1.7.6
!pip3 install dython lazypredict -U

Collecting lazypredict
  Downloading lazypredict-0.2.12-py2.py3-none-any.whl (12 kB)
Collecting click
  Downloading click-8.1.7-py3-none-any.whl (97 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m97.9/97.9 KB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
Collecting lightgbm
  Downloading lightgbm-4.0.0-py3-none-manylinux_2_28_x86_64.whl (3.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.0/3.0 MB[0m [31m29.8 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Installing collected packages: click, lightgbm, lazypredict
Successfully installed click-8.1.7 lazypredict-0.2.12 lightgbm-4.0.0


In [2]:
import pandas as pd
from pathlib import Path

In [3]:
# Get path of test data 
language = 'bbj'
bbj_pos_path   = Path(f'../data_source/masakhane-pos/data/{language}')
train_data_path = bbj_pos_path / 'train.txt'
dev_data_path = bbj_pos_path / 'dev.txt'
test_data_path = bbj_pos_path / 'test.txt'

# read data from source with sklearn estimator
reader_estimator = SangkakPosProjetReader()
list_train_data, pd_train_data = reader_estimator.fit(train_data_path).transform_analysis()
list_dev_data, pd_dev_data = reader_estimator.fit(dev_data_path).transform_analysis()
list_test_data, pd_test_data = reader_estimator.fit(test_data_path).transform_analysis()

pd_train_data

12347 12347 12347
2447 2447 2447
9315 9315 9315


Unnamed: 0,sentence_id,word,tags
0,1,Mwɔ̌ʼ,NOUN
1,1,pfʉ́tə́,VERB
2,1,nə́,ADP
3,1,mwâsi,NOUN
4,1,máp,DET
...,...,...,...
12342,751,kə,AUX
12343,751,fǎʼ,VERB
12344,751,nə́,ADP
12345,751,é,PRON


In [4]:
feature_estimator = SangkakPosFeaturisation()
feature_estimator.fit([])

Xtrain = feature_estimator.transform(list_train_data)
Xdev  = feature_estimator.transform(list_dev_data)
Xtest = feature_estimator.transform(list_test_data)

ytrain = feature_estimator.transform(list_train_data, label=True)
ydev   = feature_estimator.transform(list_dev_data, label=True)
ytest  = feature_estimator.transform(list_test_data, label=True)

Xtrain[0]

[{'word': 'Mwɔ̌ʼ',
  'bias': 1.0,
  'word.tones': '̌',
  'word.normalized': 'Mwɔ̌ʼ',
  'word.position': 0,
  'word.has_hyphen': 0,
  'word.lower()': 'mwɔ̌ʼ',
  'word.start_with_capital': -1,
  'word.have_tone': 1,
  'word.prefix': 'Mw',
  'word.root': '̌ʼ',
  'word.ispunctuation': 0,
  'word.letters': -1,
  'word.isdigit()': 0,
  'word.EOS': 0,
  'word.BOS': 1,
  '-1:word': '',
  '-1:word.position': -1,
  '-1:word.tag': '',
  '-1:word.letters': -1,
  '-1:word.normalized': '',
  '-1:word.start_with_capital': -1,
  '-1:len(word-1)': -1,
  '-1:word.lower()': '',
  '-1:word.isdigit()': -1,
  '-1:word.ispunctuation': 0,
  '-1:word.BOS': 0,
  '-1:word.EOS': 0,
  '-1:word.prefix': '',
  '-1:word.root': '',
  '+1:word.prefix': 'pf',
  '+1:word.root': '́tə́',
  '+1:word': 'pfʉ́tə́',
  '+1:word.tag': 'VERB',
  '+1:word.position': 1,
  '+1:word.letters': 'p f ʉ ́ t ə ́',
  '+1:word.normalized': 'pfʉ́tə́',
  '+1:word.start_with_capital': 0,
  '+1:len(word+1)': 7,
  '+1:word.lower()': 'pfʉ́tə́',
  

In [5]:
features_types = {
    "categorical_features": [
        'word',
        'word.tones',
        'word.normalized',
        'word.lower()',
        'word.prefix',
        'word.root',
        '-1:word',
        '-1:word.tag',
        '-1:word.normalized',
        '-1:word.lower()',
        '-1:word.prefix',
        '-1:word.root',
        '+1:word.prefix',
        '+1:word.root',
        '+1:word',
        '+1:word.lower()',
        '+1:word.tag',
        '+1:word.normalized'
    ],
    "numerical_features": [
        'bias',
        'word.position',
        'word.have_tone',
        'word.ispunctuation',
        'word.isdigit()',
        'word.EOS',
        'word.BOS',
        'word.start_with_capital',
        'word.has_hyphen',
        '-1:word.position',
        '-1:word.start_with_capital',
        '-1:len(word-1)',
        '+1:word.position',
        '+1:word.start_with_capital',
        '+1:len(word+1)',
        '+1:word.isdigit()',
        '+1:word.ispunctuation',
        '+1:word.BOS',
        '+1:word.EOS',
        '-1:word.isdigit()',
        '-1:word.ispunctuation',
        '-1:word.BOS',
        '-1:word.EOS'
    ]
}

xgb_df_train = feature_estimator.transform_to_sagemaker_format(
    Xtrain, ytrain,
    normalize=features_types
)
xgb_df_dev = feature_estimator.transform_to_sagemaker_format(
    Xdev, ydev, 
    label='dev',
    normalize=features_types
)
xgb_df_test = feature_estimator.transform_to_sagemaker_format(
    Xtest, ytest, 
    label='test',
    normalize=features_types
)

[train] Building sagemaker data for classification


[dev] Building sagemaker data for classification
[test] Building sagemaker data for classification


In [6]:
all_data = pd.concat([xgb_df_train, xgb_df_dev, xgb_df_test], 
                     axis=0, ignore_index=True)
all_data

Unnamed: 0,labels,word,bias,word.tones,word.normalized,word.position,word.has_hyphen,word.lower(),word.start_with_capital,word.have_tone,...,+1:word.position,+1:word.letters,+1:word.normalized,+1:word.start_with_capital,+1:len(word+1),+1:word.lower(),+1:word.isdigit(),+1:word.ispunctuation,+1:word.BOS,+1:word.EOS
0,NOUN,Mwɔ̌ʼ,1,̌,Mwɔ̌ʼ,0,0,mwɔ̌ʼ,-1,1,...,1,p f ʉ ́ t ə ́,pfʉ́tə́,0,7,pfʉ́tə́,0,0,0,0
1,VERB,pfʉ́tə́,1,,pfʉ́tə́,1,0,pfʉ́tə́,0,1,...,2,n ə ́,nə́,0,3,nə́,0,0,0,0
2,ADP,nə́,1,,nə́,2,0,nə́,0,1,...,3,m w a ̂ s i,mwâsi,0,6,mwâsi,0,0,0,0
3,NOUN,mwâsi,1,̂,mwâsi,3,0,mwâsi,0,1,...,4,m a ́ p,máp,0,4,máp,0,0,0,0
4,DET,máp,1,,máp,4,0,máp,0,1,...,5,y ə ́,yə́,0,3,yə́,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24104,VERB,pfʉ́,1,,pfʉ́,11,0,pfʉ́,0,1,...,12,n ə ̂,nə̂,0,3,nə̂,0,0,0,0
24105,ADP,nə̂,1,̂,nə̂,12,0,nə̂,0,1,...,13,ŋ k a ʼ,ŋkaʼ,0,4,ŋkaʼ,0,0,0,0
24106,NOUN,ŋkaʼ,1,,ŋkaʼ,13,0,ŋkaʼ,0,1,...,14,8 4,84,0,2,84,1,0,0,0
24107,NUM,84,1,,84,14,0,84,0,1,...,15,.,.,0,1,.,0,1,0,1


mystring = "Welcome"
mybytes = mystring.encode('utf-8')
myint = int.from_bytes(mybytes, 'little')
print(myint)
recoveredbytes = myint.to_bytes((myint.bit_length() + 7) // 8, 'little')
recoveredstring = recoveredbytes.decode('utf-8')
print(recoveredstring)

In [7]:
import random

def string_int_transform(r):
    mybytes = r.encode('utf-8')
    myint = int.from_bytes(mybytes, 'little')
    return myint

all_ref_data, all_labels = {}, {}

def apply_ref_transform(r):
    global all_ref_data
    if r not in all_ref_data: 
        ref = random.randint(100000, 7000000)
        all_ref_data[r] = ref
        return ref
    else: return all_ref_data[r]

def apply_label_transform(r):
    global all_labels
    if r not in all_labels: 
        ref = random.randint(0, 15)
        all_labels[r] = ref
        return ref
    else: return all_labels[r]

i = 0
for x in all_data['labels']:
    if x not in all_labels:
        all_labels[x] = i
        i += 1

all_data_parse = all_data.copy()

all_data_parse['labels'] = all_data['labels'].map(all_labels).astype("int")
all_data_parse['word'] = all_data['word'].apply(apply_ref_transform).astype("int")
all_data_parse['word.tones'] = all_data['word.tones'].apply(apply_ref_transform).astype("int")
all_data_parse['word.normalized'] = all_data['word.normalized'].apply(apply_ref_transform).astype("int")
all_data_parse['word.lower()'] = all_data['word.lower()'].apply(apply_ref_transform).astype("int")
all_data_parse['word.prefix'] = all_data['word.prefix'].apply(apply_ref_transform).astype("int")
all_data_parse['word.root'] = all_data['word.root'].apply(apply_ref_transform).astype("int")
all_data_parse['-1:word'] = all_data['-1:word'].apply(apply_ref_transform).astype("int")
all_data_parse['-1:word.tag'] = all_data['-1:word.tag'].apply(apply_ref_transform).astype("int")
all_data_parse['-1:word.normalized'] = all_data['-1:word.normalized'].apply(apply_ref_transform).astype("int")
all_data_parse['-1:word.lower()'] = all_data['-1:word.lower()'].apply(apply_ref_transform).astype("int")
all_data_parse['+1:word'] = all_data['+1:word'].apply(apply_ref_transform).astype("int")
all_data_parse['+1:word.lower()'] = all_data['+1:word.lower()'].apply(apply_ref_transform).astype("int")
all_data_parse['+1:word.tag'] = all_data['+1:word.tag'].apply(apply_ref_transform).astype("int")
all_data_parse['+1:word.normalized'] = all_data['+1:word.normalized'].apply(apply_ref_transform).astype("int")
all_data_parse['+1:word.prefix'] = all_data['+1:word.prefix'].apply(apply_ref_transform).astype("int")
all_data_parse['+1:word.root'] = all_data['+1:word.root'].apply(apply_ref_transform).astype("int")
all_data_parse['-1:word.prefix'] = all_data['-1:word.prefix'].apply(apply_ref_transform).astype("int")
all_data_parse['-1:word.root'] = all_data['-1:word.root'].apply(apply_ref_transform).astype("int")
all_data_parse['+1:word.letters'] = all_data['+1:word.letters'].apply(apply_ref_transform).astype("int")
all_data_parse['-1:word.letters'] = all_data['-1:word.letters'].apply(apply_ref_transform).astype("int")
all_data_parse['word.letters'] = all_data['word.letters'].apply(apply_ref_transform).astype("int")

# all_data_parse = all_data_parse.astype(int)

all_data_parse.dtypes

labels                        int64
word                          int64
bias                          int64
word.tones                    int64
word.normalized               int64
word.position                 int64
word.has_hyphen               int64
word.lower()                  int64
word.start_with_capital       int64
word.have_tone                int64
word.prefix                   int64
word.root                     int64
word.ispunctuation            int64
word.letters                  int64
word.isdigit()                int64
word.EOS                      int64
word.BOS                      int64
-1:word                       int64
-1:word.position              int64
-1:word.tag                   int64
-1:word.letters               int64
-1:word.normalized            int64
-1:word.start_with_capital    int64
-1:len(word-1)                int64
-1:word.lower()               int64
-1:word.isdigit()             int64
-1:word.ispunctuation         int64
-1:word.BOS                 

In [8]:
# remove unused / non performants varaibles
for x in ['+1:word.isdigit()', '+1:word.ispunctuation', '-1:word.EOS',
        '+1:word.BOS', 'word.has_hyphen', '+1:word.EOS', '-1:word.BOS',
        '+1:word.EOS', '-1:word.isdigit()', '+1:word.BOS', 
        '-1:word.ispunctuation', '-1:word.BOS', '+1:word.normalized',
        '-1:word.EOS', '-1:word.tag', '+1:word.tag', 'word.EOS',
        '-1:word.start_with_capital','+1:word.start_with_capital']:
    try: del all_data_parse[x]
    except: pass


In [None]:
from dython.nominal import associations

# check correlation between all variables in dataset
associations(all_data_parse, nom_nom_assoc='theil', figsize=(15, 15))

In [9]:
from sklearn.model_selection import train_test_split
from sklearn.utils.multiclass import type_of_target

xgb_X_train, xgb_X_test, xgb_y_train, xgb_y_test = train_test_split(
    all_data_parse.drop('labels', axis=1).copy(),
    all_data_parse['labels'].copy(),
    test_size=0.2, random_state=None, shuffle=False
)

xgb_X_train, xgb_X_dev, xgb_y_train, xgb_y_dev = train_test_split(
    xgb_X_train, xgb_y_train, test_size=0.25, 
    random_state=None, shuffle=False
)

num_class = len(list(set(all_data_parse['labels'])))
print("Number of classes: %s" %num_class)

print("Type of target of ytrain data set: %s" %type_of_target(xgb_y_train))
print("Type of target of ytest data set: %s" %type_of_target(xgb_y_test))

Number of classes: 16
Type of target of ytrain data set: multiclass
Type of target of ytest data set: multiclass


In [10]:
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.metrics import balanced_accuracy_score, accuracy_score, f1_score
from sklearn.utils.class_weight import compute_sample_weight

In [15]:
# Compute weights by class for unbalanced datasets
optimised_labels_weights = compute_sample_weight(
    class_weight='balanced',
    y=xgb_y_train
)

print(optimised_labels_weights)

[0.30781835 0.38178315 1.16203406 ... 0.38178315 0.59360637 1.47964403]


In [11]:
from lazypredict.Supervised import LazyClassifier

clf = LazyClassifier(verbose=1,ignore_warnings=True, custom_metric=None)
models, predictions = clf.fit(xgb_X_train, xgb_X_test, xgb_y_train, xgb_y_test)
models

  3%|▎         | 1/29 [00:01<00:35,  1.28s/it]

{'Model': 'AdaBoostClassifier', 'Accuracy': 0.28038158440481126, 'Balanced Accuracy': 0.13333333333333333, 'ROC AUC': None, 'F1 Score': 0.1536212697033047, 'Time taken': 1.2779781818389893}


  7%|▋         | 2/29 [00:02<00:38,  1.44s/it]

{'Model': 'BaggingClassifier', 'Accuracy': 0.7287432600580672, 'Balanced Accuracy': 0.6793960115097673, 'ROC AUC': None, 'F1 Score': 0.7285582602599514, 'Time taken': 1.5556800365447998}
{'Model': 'BernoulliNB', 'Accuracy': 0.4328079635006222, 'Balanced Accuracy': 0.33185060713680076, 'ROC AUC': None, 'F1 Score': 0.42433378404471517, 'Time taken': 0.048421382904052734}


 14%|█▍        | 4/29 [01:00<07:43, 18.53s/it]

{'Model': 'CalibratedClassifierCV', 'Accuracy': 0.4968892575694733, 'Balanced Accuracy': 0.3610991133777006, 'ROC AUC': None, 'F1 Score': 0.46935457094405014, 'Time taken': 57.36065340042114}


 21%|██        | 6/29 [01:00<03:47,  9.87s/it]

{'Model': 'DecisionTreeClassifier', 'Accuracy': 0.6688096225632517, 'Balanced Accuracy': 0.6431891390474653, 'ROC AUC': None, 'F1 Score': 0.6713659271000213, 'Time taken': 0.36971354484558105}
{'Model': 'DummyClassifier', 'Accuracy': 0.19576939029448362, 'Balanced Accuracy': 0.06666666666666667, 'ROC AUC': None, 'F1 Score': 0.06410208270481876, 'Time taken': 0.0255889892578125}
{'Model': 'ExtraTreeClassifier', 'Accuracy': 0.5255080879303193, 'Balanced Accuracy': 0.45971911529415677, 'ROC AUC': None, 'F1 Score': 0.5289519454831997, 'Time taken': 0.042963504791259766}


 31%|███       | 9/29 [01:02<01:44,  5.22s/it]

{'Model': 'ExtraTreesClassifier', 'Accuracy': 0.7442969722107009, 'Balanced Accuracy': 0.6459167099754325, 'ROC AUC': None, 'F1 Score': 0.7414449277880977, 'Time taken': 1.7849054336547852}
{'Model': 'GaussianNB', 'Accuracy': 0.366860223973455, 'Balanced Accuracy': 0.2998089777009991, 'ROC AUC': None, 'F1 Score': 0.34363498208081006, 'Time taken': 0.04032158851623535}


 38%|███▊      | 11/29 [01:02<01:04,  3.58s/it]

{'Model': 'KNeighborsClassifier', 'Accuracy': 0.5470759021153049, 'Balanced Accuracy': 0.43356864129990574, 'ROC AUC': None, 'F1 Score': 0.539842972228343, 'Time taken': 0.29000401496887207}


 41%|████▏     | 12/29 [01:11<01:16,  4.52s/it]

{'Model': 'LabelPropagation', 'Accuracy': 0.5176275404396516, 'Balanced Accuracy': 0.43694959737458916, 'ROC AUC': None, 'F1 Score': 0.5203587969926515, 'Time taken': 8.55701470375061}


 45%|████▍     | 13/29 [01:29<01:59,  7.48s/it]

{'Model': 'LabelSpreading', 'Accuracy': 0.5176275404396516, 'Balanced Accuracy': 0.43694959737458916, 'ROC AUC': None, 'F1 Score': 0.5203589275061763, 'Time taken': 18.517716646194458}


 48%|████▊     | 14/29 [01:30<01:27,  5.81s/it]

{'Model': 'LinearDiscriminantAnalysis', 'Accuracy': 0.4962671090833679, 'Balanced Accuracy': 0.36672575728184437, 'ROC AUC': None, 'F1 Score': 0.4732971710532519, 'Time taken': 0.27785515785217285}


 52%|█████▏    | 15/29 [01:47<02:01,  8.69s/it]

{'Model': 'LinearSVC', 'Accuracy': 0.49730402322687683, 'Balanced Accuracy': 0.3528560821417585, 'ROC AUC': None, 'F1 Score': 0.45655781936007844, 'Time taken': 17.374536991119385}


 55%|█████▌    | 16/29 [01:49<01:29,  6.86s/it]

{'Model': 'LogisticRegression', 'Accuracy': 0.5066362505184571, 'Balanced Accuracy': 0.36792277854140937, 'ROC AUC': None, 'F1 Score': 0.48050521917202466, 'Time taken': 1.704707145690918}
{'Model': 'NearestCentroid', 'Accuracy': 0.35918705931148903, 'Balanced Accuracy': 0.33188046863940907, 'ROC AUC': None, 'F1 Score': 0.37165947018311957, 'Time taken': 0.05567455291748047}


 66%|██████▌   | 19/29 [01:49<00:33,  3.32s/it]

{'Model': 'PassiveAggressiveClassifier', 'Accuracy': 0.3531729572791373, 'Balanced Accuracy': 0.2856454897660464, 'ROC AUC': None, 'F1 Score': 0.34310560705608634, 'Time taken': 0.4362154006958008}


 72%|███████▏  | 21/29 [01:50<00:17,  2.14s/it]

{'Model': 'Perceptron', 'Accuracy': 0.3896723351306512, 'Balanced Accuracy': 0.31431560508328504, 'ROC AUC': None, 'F1 Score': 0.3833791404350315, 'Time taken': 0.3060479164123535}
{'Model': 'QuadraticDiscriminantAnalysis', 'Accuracy': 0.1897552882621319, 'Balanced Accuracy': 0.20848511411809814, 'ROC AUC': None, 'F1 Score': 0.20889384351694784, 'Time taken': 0.15691184997558594}


 76%|███████▌  | 22/29 [01:53<00:17,  2.48s/it]

{'Model': 'RandomForestClassifier', 'Accuracy': 0.7461634176690171, 'Balanced Accuracy': 0.647425353923911, 'ROC AUC': None, 'F1 Score': 0.7433845430608437, 'Time taken': 3.539419651031494}
{'Model': 'RidgeClassifier', 'Accuracy': 0.48963085856491084, 'Balanced Accuracy': 0.3332021949432608, 'ROC AUC': None, 'F1 Score': 0.4394841232981389, 'Time taken': 0.06689190864562988}


 83%|████████▎ | 24/29 [01:54<00:07,  1.53s/it]

{'Model': 'RidgeClassifierCV', 'Accuracy': 0.48963085856491084, 'Balanced Accuracy': 0.3332021949432608, 'ROC AUC': None, 'F1 Score': 0.4394841232981389, 'Time taken': 0.23463058471679688}


 86%|████████▌ | 25/29 [01:55<00:05,  1.41s/it]

{'Model': 'SGDClassifier', 'Accuracy': 0.42948983824139364, 'Balanced Accuracy': 0.3217481489630157, 'ROC AUC': None, 'F1 Score': 0.4139354071411019, 'Time taken': 1.007359266281128}


 90%|████████▉ | 26/29 [02:07<00:12,  4.20s/it]

{'Model': 'SVC', 'Accuracy': 0.5723766072169224, 'Balanced Accuracy': 0.4224295109310907, 'ROC AUC': None, 'F1 Score': 0.5517286448568289, 'Time taken': 12.81466269493103}


 97%|█████████▋| 28/29 [02:27<00:06,  6.41s/it]

{'Model': 'XGBClassifier', 'Accuracy': 0.7774782248029863, 'Balanced Accuracy': 0.6951193949069833, 'ROC AUC': None, 'F1 Score': 0.7765965425476828, 'Time taken': 19.173996925354004}
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4484
[LightGBM] [Info] Number of data points in the train set: 14465, number of used features: 28
[LightGBM] [Info] Start training from score -1.594343
[LightGBM] [Info] Start training from score -1.809686
[LightGBM] [Info] Start training from score -2.922761
[LightGBM] [Info] Start training from score -2.550400
[LightGBM] [Info] Start training from score -2.730421
[LightGBM] [Info] Start training from score -2.632511
[LightGBM] [Info] Start training from score -3.164390
[LightGBM] [Info] Start training from score -2.251050
[LightGBM] [Info] Start training from score -2.894875
[LightGBM] [Info] Start training from score -2.728302
[LightGBM] [Info] Start training from score -3.693383
[LightGBM] [Info] Start training from 

100%|██████████| 29/29 [02:30<00:00,  5.19s/it]

{'Model': 'LGBMClassifier', 'Accuracy': 0.3216507673164662, 'Balanced Accuracy': 0.28325516799755207, 'ROC AUC': None, 'F1 Score': 0.3231265714919173, 'Time taken': 3.284762144088745}





Unnamed: 0_level_0,Accuracy,Balanced Accuracy,ROC AUC,F1 Score,Time Taken
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
XGBClassifier,0.78,0.7,,0.78,19.17
BaggingClassifier,0.73,0.68,,0.73,1.56
RandomForestClassifier,0.75,0.65,,0.74,3.54
ExtraTreesClassifier,0.74,0.65,,0.74,1.78
DecisionTreeClassifier,0.67,0.64,,0.67,0.37
ExtraTreeClassifier,0.53,0.46,,0.53,0.04
LabelSpreading,0.52,0.44,,0.52,18.52
LabelPropagation,0.52,0.44,,0.52,8.56
KNeighborsClassifier,0.55,0.43,,0.54,0.29
SVC,0.57,0.42,,0.55,12.81
