In [38]:
import pandas as pd
import os
import sklearn
import json
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import f1_score
from sklearn.svm import SVC
import tensorflow as tf
from sklearn.preprocessing import OneHotEncoder, StandardScaler, MinMaxScaler
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.model_selection import StratifiedKFold
from tensorflow.python.keras.callbacks import EarlyStopping

In [2]:
embeddings_df = pd.read_csv(os.path.join("..","datasets", "embeddings.csv"))
accounts_df = pd.read_csv(os.path.join("..","datasets", "accounts_processed.csv"))

In [3]:
accounts_df.head(5)
accounts_df['username'] = accounts_df['username'].apply(lambda s: s.lower())

In [4]:
embeddings_df.head(5)

Unnamed: 0,username,embedding
0,achybicka,[-4.93041947e-02 -5.91295818e-03 -8.47795755e-...
1,ac_sobol,[-8.40702951e-02 6.32756064e-03 -9.36329067e-...
2,adambielan,[-6.17371537e-02 -1.24293072e-02 -7.66848996e-...
3,adamgaweda,[ 2.38768775e-02 1.51185095e-02 -8.82171020e-...
4,adamowiczpawel,[-4.28744927e-02 1.10798636e-02 -9.27639380e-...


In [5]:
embeddings_df['username'] = embeddings_df['username'].astype(object)

In [6]:
full_df = pd.merge(embeddings_df, accounts_df, on='username', how='inner')

In [7]:
embeddings_df.count()

username     548
embedding    548
dtype: int64

In [8]:
full_df.count()


username         549
embedding        549
Unnamed: 0       549
pozycja          549
coalition        549
party            549
name             549
link do konta    549
tweets_count     549
dtype: int64

In [9]:
full_df['username'].value_counts() >1


z_frankiewicz       True
ttruskolaski       False
danutahuebner      False
baszkobaszko       False
janolbrycht        False
                   ...  
radwitkowski       False
mcgramat           False
waldydzikowski     False
zdzkrasnodebski    False
w_bernacki         False
Name: username, Length: 548, dtype: bool

In [10]:
full_df[full_df['username'] == 'z_frankiewicz']


Unnamed: 0.1,username,embedding,Unnamed: 0,pozycja,coalition,party,name,link do konta,tweets_count
544,z_frankiewicz,[-1.86523292e-02 1.31246215e-02 -1.09709956e-...,510,Senat,KO,Platforma Obywatelska,Zygmunt Frankiewicz,https://twitter.com/z_frankiewicz,173
545,z_frankiewicz,[-1.86523292e-02 1.31246215e-02 -1.09709956e-...,637,Prezydent miasta,niez.,niez.,Zygmunt Frankiewicz,https://twitter.com/z_frankiewicz,173


In [11]:
index_to_be_deleted = 545

In [12]:
full_df = full_df.drop(545)


In [13]:
full_df.count()

username         548
embedding        548
Unnamed: 0       548
pozycja          548
coalition        548
party            548
name             548
link do konta    548
tweets_count     548
dtype: int64

In [14]:
df_to_training = full_df[['embedding','username', 'pozycja', 'coalition', 'party']]

In [15]:
df_to_training['coalition'].value_counts()

Zjednoczona Prawica    236
KO                     177
Lewica                  54
niez.                   37
PSL-Kukiz15             33
Konfederacja            11
Name: coalition, dtype: int64

In [16]:
df_to_training['party'].value_counts()

PiS                             198
Platforma Obywatelska           149
niez.                            55
SLD                              29
PSL                              26
Solidarna Polska                 17
Wiosna                           17
Porozumienie                     16
Nowoczesna                        7
Razem                             6
Kukiz15                           6
KORWiN                            4
Ruch Narodowy                     4
Partia Zieloni                    3
Bezpartyjni Samorządowcy          3
Inicjatywa Polska                 2
Konfederacja                      2
Teraz!                            1
PIS                               1
Polska Partia Socjalistyczna      1
Konfederacja Korony Polskiej      1
Name: party, dtype: int64

In [17]:
df_to_training['pozycja'].value_counts()

Sejm                     373
Senat                     49
Europoseł                 46
Prezydent miasta          45
Marszałek Województwa     11
Wojewoda                  11
Polska 2050               10
Prezydent Polski           2
Premier                    1
Name: pozycja, dtype: int64

In [18]:
embedding_size = 768

In [19]:
party_labels = df_to_training['party']
coalition_labels = df_to_training['coalition']
position_labels = df_to_training['pozycja']
parties_number_of_classes = len(party_labels.unique())
coalitions_number_of_classes = len(coalition_labels.unique())
positions_number_of_classes = len(position_labels.unique())

party_labels = party_labels.to_numpy()
coalition_labels = coalition_labels.to_numpy()
position_labels = position_labels.to_numpy()

party_encoder = OneHotEncoder(handle_unknown='ignore')
party_encoded = party_encoder.fit_transform(party_labels.reshape(-1,1)).toarray()

coalition_encoder = OneHotEncoder(handle_unknown='ignore')
coalition_encoded = coalition_encoder.fit_transform(coalition_labels.reshape(-1,1)).toarray()

position_encoder = OneHotEncoder(handle_unknown='ignore')
position_encoded = position_encoder.fit_transform(position_labels.reshape(-1,1)).toarray()


In [20]:
features = df_to_training['embedding']
features = list(features)
features = [np.fromstring(embedding[1:-1].replace("\n",""), count=embedding_size, sep=" ") for embedding in features]
features = np.array(features)

In [21]:
class MetricsCallback(keras.callbacks.Callback):

    def __init__(self, train_features, train_labels, val_features, val_labels):
        super(MetricsCallback, self).__init__()
        self.train_features = train_features
        self.train_labels = train_labels
        self.val_features = val_features
        self.val_labels = val_labels

    def on_train_begin(self, logs={}):
        self.train_f_scores = []
        self.val_f_scores = []

    def on_epoch_end(self, epoch, logs={}):
        indexes = tf.argmax(self.model.predict(self.train_features), axis=1)
        indexes = tf.expand_dims(indexes, axis=1)
        train_indexes = tf.argmax(self.train_labels, axis=1)
        train_indexes = tf.expand_dims(train_indexes, axis=1)

        train_f_score = f1_score(train_indexes, indexes, average='macro')

        indexes = tf.argmax(self.model.predict(self.val_features), axis=1)
        indexes = tf.expand_dims(indexes, axis=1)
        val_indexes = tf.argmax(self.val_labels, axis=1)
        val_indexes = tf.expand_dims(val_indexes, axis=1)
        val_f_score = f1_score(val_indexes, indexes, average='macro')

        self.train_f_scores.append(train_f_score)
        self.val_f_scores.append(val_f_score)

        # print(
        #     f"\nTraining F1-score: {train_f_score}, Validation F1-score: {val_f_score}")
        logs["train_f1_score"] = self.train_f_scores
        logs["val_f1_score"] = self.val_f_scores

In [22]:

def train_neural_network(batch_size: int, epoch: int, output_shape: int, lr: float, train_features, train_labels, val_features, val_labels):
    inputs = keras.Input(shape=embedding_size)

    x = layers.Dense(512, activation='relu')(inputs)
    x = layers.Dropout(0.3)(x)
    x = layers.Dense(128, activation='relu')(x)
    x = layers.Dense(64, activation='relu')(x)
    outputs = layers.Dense(output_shape, activation='softmax')(x)

    model = keras.Model(inputs=inputs, outputs=outputs)

    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=lr),
                  loss='categorical_crossentropy')

    metric_callback = MetricsCallback(train_features, train_labels, val_features, val_labels)
    es = EarlyStopping(monitor='val_loss', verbose=1, patience=10)

    history = model.fit(x=train_features, y=train_labels,
              validation_data=(val_features, val_labels),
              batch_size=batch_size, epochs=epoch,
              shuffle=True, callbacks=[metric_callback, es], verbose=0)

    return history

In [55]:
skf = StratifiedKFold(n_splits=10, shuffle=True)
for index, (train_indices, val_indices) in enumerate(skf.split(features, coalition_labels)):
    print("Training on fold " + str(index+1) + "/10...")
    xtrain, xval = features[train_indices], features[val_indices]
    ytrain, yval = coalition_labels[train_indices], coalition_labels[val_indices]

    ytrain = coalition_encoder.transform(ytrain.reshape(-1,1)).toarray()
    yval = coalition_encoder.transform(yval.reshape(-1,1)).toarray()

    print("Training new iteration on " + str(xtrain.shape[0]) + " training samples, " + str(xval.shape[0]) + " validation samples, this may be a while...")

    history = train_neural_network(50, 500, coalitions_number_of_classes, 0.0001, xtrain, ytrain, xval, yval)
    accuracy_history = history.history['train_f1_score']
    val_accuracy_history = history.history['val_f1_score']
    print("Last training accuracy: " + str(accuracy_history[-1][-1]) + ", last validation accuracy: " + str(val_accuracy_history[-1][-1]))


Training on fold 1/10...
Training new iteration on 493 training samples, 55 validation samples, this may be a while...
Epoch 00015: early stopping
Last training accuracy: 0.13779761904761906, last validation accuracy: 0.1084848484848485
Training on fold 2/10...
Training new iteration on 493 training samples, 55 validation samples, this may be a while...
Epoch 00199: early stopping
Last training accuracy: 0.315699864851601, last validation accuracy: 0.2531339031339031
Training on fold 3/10...
Training new iteration on 493 training samples, 55 validation samples, this may be a while...
Epoch 00166: early stopping
Last training accuracy: 0.21798174246394, last validation accuracy: 0.23165340406719717
Training on fold 4/10...
Training new iteration on 493 training samples, 55 validation samples, this may be a while...
Epoch 00220: early stopping
Last training accuracy: 0.3201923937592858, last validation accuracy: 0.23690476190476192
Training on fold 5/10...
Training new iteration on 493 t

In [None]:
skf = StratifiedKFold(n_splits=10, shuffle=True)
for index, (train_indices, val_indices) in enumerate(skf.split(features, party_labels)):
    print("Training on fold " + str(index+1) + "/10...")
    xtrain, xval = features[train_indices], features[val_indices]
    ytrain, yval = party_labels[train_indices], party_labels[val_indices]

    ytrain = party_encoder.transform(ytrain.reshape(-1,1)).toarray()
    yval = party_encoder.transform(yval.reshape(-1,1)).toarray()

    print("Training new iteration on " + str(xtrain.shape[0]) + " training samples, " + str(xval.shape[0]) + " validation samples, this may be a while...")

    history = train_neural_network(50, 500, parties_number_of_classes, 0.0001, xtrain, ytrain, xval, yval)
    accuracy_history = history.history['train_f1_score']
    val_accuracy_history = history.history['val_f1_score']
    print("Last training accuracy: " + str(accuracy_history[-1][-1]) + ", last validation accuracy: " + str(val_accuracy_history[-1][-1]))

In [57]:
skf = StratifiedKFold(n_splits=10, shuffle=True)
for index, (train_indices, val_indices) in enumerate(skf.split(features, position_labels)):
    print("Training on fold " + str(index+1) + "/10...")
    xtrain, xval = features[train_indices], features[val_indices]
    ytrain, yval = position_labels[train_indices], position_labels[val_indices]

    ytrain = position_encoder.transform(ytrain.reshape(-1,1)).toarray()
    yval = position_encoder.transform(yval.reshape(-1,1)).toarray()

    print("Training new iteration on " + str(xtrain.shape[0]) + " training samples, " + str(xval.shape[0]) + " validation samples, this may be a while...")

    history = train_neural_network(50, 500, positions_number_of_classes, 0.0001, xtrain, ytrain, xval, yval)
    accuracy_history = history.history['train_f1_score']
    val_accuracy_history = history.history['val_f1_score']
    print("Last training accuracy: " + str(accuracy_history[-1][-1]) + ", last validation accuracy: " + str(val_accuracy_history[-1][-1]))




Training on fold 1/10...
Training new iteration on 493 training samples, 55 validation samples, this may be a while...
Epoch 00182: early stopping
Last training accuracy: 0.20857481693642657, last validation accuracy: 0.1637885923600209
Training on fold 2/10...
Training new iteration on 493 training samples, 55 validation samples, this may be a while...
Epoch 00099: early stopping
Last training accuracy: 0.0954375202760492, last validation accuracy: 0.11674347158218126
Training on fold 3/10...
Training new iteration on 493 training samples, 55 validation samples, this may be a while...
Epoch 00026: early stopping
Last training accuracy: 0.09006835544833132, last validation accuracy: 0.11490683229813663
Training on fold 4/10...
Training new iteration on 493 training samples, 55 validation samples, this may be a while...
Epoch 00019: early stopping
Last training accuracy: 0.09006835544833132, last validation accuracy: 0.10054347826086955
Training on fold 5/10...
Training new iteration on

In [23]:
scaler = StandardScaler().fit(features)
features_scaled = scaler.transform(features)

minmax_scaler = MinMaxScaler().fit(features)
features_min_max = minmax_scaler.transform(features)

In [40]:
def train_model_kfold(sklearn_model, splits: int, features_arr, labels, labels_encoder):
    skf = StratifiedKFold(n_splits=splits, shuffle=True, random_state=42)
    train_f1_scores = []
    val_f1_scores = []

    for index, (train_indices, val_indices) in enumerate(skf.split(features_arr, labels)):
        # print("Training on fold " + str(index+1) + "/10...")
        train_x, val_x = features_arr[train_indices], features_arr[val_indices]
        train_y, val_y = labels[train_indices], labels[val_indices]

        train_y = labels_encoder.transform(train_y.reshape(-1,1)).toarray()
        val_y = labels_encoder.transform(val_y.reshape(-1,1)).toarray()

        train_y, val_y = np.argmax(train_y,axis=1), np.argmax(val_y,axis=1)
        # print("Training new fold on " + str(train_x.shape[0]) + " training samples, " + str(val_x.shape[0]) + " validation samples...")

        sklearn_model.fit(train_x, train_y, )
        train_pred = sklearn_model.predict(train_x)
        val_pred = sklearn_model.predict(val_x)

        train_f1_score = f1_score(train_y, train_pred, average='macro')
        val_f_score = f1_score(val_y, val_pred, average='macro')
        train_f1_scores.append(train_f1_score)
        val_f1_scores.append(val_f_score)
        # print("Training F1-score: " + str(train_f1_score) + ", validation F1-score: " + str(val_f_score))
    return np.mean(train_f1_scores), np.mean(val_f1_scores)

In [26]:
PATH_TO_RESULTS = os.path.join("..", "reports", "classification")

In [43]:
label_names =[]
train_f1_scores =[]
val_f1_scores = []
features_names = []
cs =[]
tols = []

for l, encoder, labels_name in [(coalition_labels, coalition_encoder, "coalitions"),
                   (party_labels, party_encoder, "parties"),
                   (position_labels, position_encoder, "positions")]:
    for name, feature in [("scaled", features_scaled),
                          ("original", features),
                          ("minmax", features_min_max)]:
        for c in [0.1, 0.25, 0.5, 0.75, 1.0]:
            for tol in [1e-4, 1e-3, 1e-2, 1e-1]:
                lr_model = LogisticRegression(penalty='l2', max_iter=100000, C=c, tol=tol)
                mean_train_f1_score, mean_val_f1_score = train_model_kfold(lr_model, 10, feature, l, encoder)
                print(f"Training F1-score: {mean_train_f1_score}, validation F1-score: {mean_val_f1_score},"
                      f"features - {name}, labels name - {labels_name}, C={c}, tol={tol}")
                label_names.append(labels_name)
                train_f1_scores.append(mean_train_f1_score)
                val_f1_scores.append(mean_val_f1_score)
                features_names.append(name)
                cs.append(c)
                tols.append(tol)


results = pd.DataFrame(data={"label_name": label_names,
                             "feature_type": features_names,
                             "C": cs,
                             "tol": tols,
                             "train_f1_score": train_f1_scores,
                             "val_f1_score": val_f1_scores})

results.to_csv(
    os.path.join(PATH_TO_RESULTS, "logistic_regression.csv"),
    index=False)


Training F1-score: 0.9557547689301258, validation F1-score: 0.46980489663765146,features - scaled, labels name - coalitions, C=0.1, tol=0.0001
Training F1-score: 0.9557547689301258, validation F1-score: 0.46980489663765146,features - scaled, labels name - coalitions, C=0.1, tol=0.001
Training F1-score: 0.9557547689301258, validation F1-score: 0.46980489663765146,features - scaled, labels name - coalitions, C=0.1, tol=0.01
Training F1-score: 0.9548396434345581, validation F1-score: 0.46980489663765146,features - scaled, labels name - coalitions, C=0.1, tol=0.1
Training F1-score: 0.9857399698756106, validation F1-score: 0.46470148347249757,features - scaled, labels name - coalitions, C=0.25, tol=0.0001
Training F1-score: 0.9857399698756106, validation F1-score: 0.46470148347249757,features - scaled, labels name - coalitions, C=0.25, tol=0.001
Training F1-score: 0.9858316971671057, validation F1-score: 0.46470148347249757,features - scaled, labels name - coalitions, C=0.25, tol=0.01
Train



In [44]:
label_names =[]
train_f1_scores =[]
val_f1_scores = []
features_names = []
msss =[]
msls = []
crits = []

for l, encoder, labels_name in [(coalition_labels, coalition_encoder, "coalitions"),
                   (party_labels, party_encoder, "parties"),
                   (position_labels, position_encoder, "positions")]:
    for name, feature in [("scaled", features_scaled),
                          ("original", features),
                          ("minmax", features_min_max)]:
        for crit in ['gini', 'entropy']:
            for mss in range(2,20):
                for msl in range(2,20):
                    dec_tree = DecisionTreeClassifier(criterion=crit, min_samples_leaf=msl, min_samples_split=mss)
                    mean_train_f1_score, mean_val_f1_score = train_model_kfold(dec_tree, 10, feature, l, encoder)
                    print(f"Training F1-score: {mean_train_f1_score}, validation F1-score: {mean_val_f1_score},"
                          f"features - {name}, labels name - {labels_name}, criterion={crit}, mss={mss}, msl={msl}")
                    label_names.append(labels_name)
                    train_f1_scores.append(mean_train_f1_score)
                    val_f1_scores.append(mean_val_f1_score)
                    features_names.append(name)
                    msss.append(mss)
                    msls.append(msl)
                    crits.append(crit)


results = pd.DataFrame(data={"label_name": label_names,
                             "feature_type": features_names,
                             "min_samples_leaf": msls,
                             "min_samples_split": msss,
                             "criterion": crits,
                             "train_f1_score": train_f1_scores,
                             "val_f1_score": val_f1_scores})

results.to_csv(
    os.path.join(PATH_TO_RESULTS, "decision_tree.csv"),
    index=False)

Training F1-score: 0.852782525429121, validation F1-score: 0.22572711215576136,features - scaled, labels name - coalitions, criterion=gini, mss=2, msl=2
Training F1-score: 0.7662812265323027, validation F1-score: 0.2019797796227861,features - scaled, labels name - coalitions, criterion=gini, mss=2, msl=3
Training F1-score: 0.6887396371683204, validation F1-score: 0.21594509166364523,features - scaled, labels name - coalitions, criterion=gini, mss=2, msl=4
Training F1-score: 0.6391643391832369, validation F1-score: 0.2184146207718709,features - scaled, labels name - coalitions, criterion=gini, mss=2, msl=5
Training F1-score: 0.5936281750321342, validation F1-score: 0.24875925495567083,features - scaled, labels name - coalitions, criterion=gini, mss=2, msl=6
Training F1-score: 0.540754193650786, validation F1-score: 0.22433136614199753,features - scaled, labels name - coalitions, criterion=gini, mss=2, msl=7
Training F1-score: 0.5144180248005236, validation F1-score: 0.23585909326736426,



In [45]:
label_names =[]
train_f1_scores =[]
val_f1_scores = []
features_names = []
cs =[]
tols = []
kernels = []

for l, encoder, labels_name in [(coalition_labels, coalition_encoder, "coalitions"),
                   (party_labels, party_encoder, "parties"),
                   (position_labels, position_encoder, "positions")]:
    for name, feature in [("scaled", features_scaled),
                          ("original", features),
                          ("minmax", features_min_max)]:
        for c in [0.1, 0.25, 0.5, 0.75, 1.0]:
            for tol in [1e-4, 1e-3, 1e-2, 1e-1]:
                for kernel in ['linear', 'poly', 'rbf', 'sigmoid']:
                    svm = SVC(kernel=kernel, C=c, tol=tol)
                    mean_train_f1_score, mean_val_f1_score = train_model_kfold(svm, 10, feature, l, encoder)
                    print(f"Training F1-score: {mean_train_f1_score}, validation F1-score: {mean_val_f1_score},"
                          f"features - {name}, labels name - {labels_name}, C={c}, tol={tol}, kernel={kernel}")
                    label_names.append(labels_name)
                    train_f1_scores.append(mean_train_f1_score)
                    val_f1_scores.append(mean_val_f1_score)
                    features_names.append(name)
                    cs.append(c)
                    tols.append(tol)
                    kernels.append(kernel)
results = pd.DataFrame(data={"label_name": label_names,
                             "feature_type": features_names,
                             "kernel": kernels,
                             "C": cs,
                             "tol": tols,
                             "train_f1_score": train_f1_scores,
                             "val_f1_score": val_f1_scores})

results.to_csv(
    os.path.join(PATH_TO_RESULTS, "svm.csv"),
    index=False)

Training F1-score: 0.9842823616776901, validation F1-score: 0.4738446643339687,features - scaled, labels name - coalitions, C=0.1, tol=0.0001, kernel=linear
Training F1-score: 0.15963029992825656, validation F1-score: 0.11296910358869053,features - scaled, labels name - coalitions, C=0.1, tol=0.0001, kernel=poly
Training F1-score: 0.10532542655819599, validation F1-score: 0.10033538894298386,features - scaled, labels name - coalitions, C=0.1, tol=0.0001, kernel=rbf
Training F1-score: 0.11128215080938313, validation F1-score: 0.10864502053888472,features - scaled, labels name - coalitions, C=0.1, tol=0.0001, kernel=sigmoid
Training F1-score: 0.9842823616776901, validation F1-score: 0.4738446643339687,features - scaled, labels name - coalitions, C=0.1, tol=0.001, kernel=linear
Training F1-score: 0.15963029992825656, validation F1-score: 0.11296910358869053,features - scaled, labels name - coalitions, C=0.1, tol=0.001, kernel=poly
Training F1-score: 0.10532542655819599, validation F1-scor



In [46]:
label_names =[]
train_f1_scores =[]
val_f1_scores = []
features_names = []
neighbours_list =[]
weights_list = []
distance_list = []

for l, encoder, labels_name in [(coalition_labels, coalition_encoder, "coalitions"),
                   (party_labels, party_encoder, "parties"),
                   (position_labels, position_encoder, "positions")]:
    for name, feature in [("scaled", features_scaled),
                          ("original", features),
                          ("minmax", features_min_max)]:
        for neighbors in [5,10,20,30,40,50]:
            for weights in ['uniform', 'distance']:
                for distance in ['euclidean', 'manhattan', 'chebyshev', 'minkowski']:
                    svm = KNeighborsClassifier(n_neighbors=neighbors, weights=weights, metric=distance)
                    mean_train_f1_score, mean_val_f1_score = train_model_kfold(svm, 10, feature, l, encoder)
                    print(f"Training F1-score: {mean_train_f1_score}, validation F1-score: {mean_val_f1_score},"
                          f"features - {name}, labels name - {labels_name}, n_neighbours={neighbors}, weights={weights}, distance={distance}")
                    label_names.append(labels_name)
                    train_f1_scores.append(mean_train_f1_score)
                    val_f1_scores.append(mean_val_f1_score)
                    features_names.append(name)
                    neighbours_list.append(neighbors)
                    weights_list.append(weights)
                    distance_list.append(distance)

results = pd.DataFrame(data={"label_name": label_names,
                             "feature_type": features_names,
                             "distance": distance_list,
                             "n_neighbours": neighbours_list,
                             "weights": weights_list,
                             "train_f1_score": train_f1_scores,
                             "val_f1_score": val_f1_scores})

results.to_csv(
    os.path.join(PATH_TO_RESULTS, "knn.csv"),
    index=False)

Training F1-score: 0.4447518468671233, validation F1-score: 0.26132073769352526,features - scaled, labels name - coalitions, n_neighbours=5, weights=uniform, distance=euclidean
Training F1-score: 0.4345821262311113, validation F1-score: 0.2536751622675357,features - scaled, labels name - coalitions, n_neighbours=5, weights=uniform, distance=manhattan
Training F1-score: 0.4605669057125724, validation F1-score: 0.22181967950728754,features - scaled, labels name - coalitions, n_neighbours=5, weights=uniform, distance=chebyshev
Training F1-score: 0.4447518468671233, validation F1-score: 0.26132073769352526,features - scaled, labels name - coalitions, n_neighbours=5, weights=uniform, distance=minkowski
Training F1-score: 1.0, validation F1-score: 0.27141635967235944,features - scaled, labels name - coalitions, n_neighbours=5, weights=distance, distance=euclidean
Training F1-score: 1.0, validation F1-score: 0.274871450743851,features - scaled, labels name - coalitions, n_neighbours=5, weight

