In [1]:
import nltk
import faiss
import importlib
import numpy as np
import pandas as pd
import io
import modules
import pickle
import sys
import numpy
import logging
import modules.ActiveLearning as ActiveLearning
import modules.preprocess as preprocess
from modules.ModelWrap import ModelWrap
from modules.ActiveLearningBase import ActiveLearningBase
from modules.Suggester import Suggester
from modules.Suggest import Suggest
importlib.reload(modules)
importlib.reload(modules.Suggest)
importlib.reload(modules.Suggester)
importlib.reload(modules.ActiveLearningBase)
importlib.reload(modules.ActiveLearning)
importlib.reload(preprocess)
from tqdm import tqdm
from joblib import dump, load
from multiprocessing import Pool
from sklearn import preprocessing
from nltk import WordNetLemmatizer
from sklearn.decomposition import TruncatedSVD
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from scipy.stats import entropy


np.set_printoptions(threshold=100000)
random_seed = 42

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\TOPAPEC\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


ModuleNotFoundError: No module named 'modules.ModelWrap'

In [2]:
# This time we are using reddit self-post classification dataset https://www.kaggle.com/mswarbrickjones/reddit-selfposts/?select=subreddit_info.csv
# We will assume that post title and post body can be concatenated into one text column.
embeddings_dim = 300
tqdm.pandas()

def preprocess_and_save_dataset(dataset_path, preprocessed_path):
    dataframe = pd.read_csv(dataset_path, sep='\t')
    dataframe.loc[:, "text"] = dataframe.loc[:, "title"] + " " + dataframe.loc[:, "selftext"]
    dataframe = dataframe.drop(["title", "selftext"], axis=1)
    preprocess_pipeline(dataframe)
    dataframe.to_pickle(preprocessed_path)
    del dataframe

def vectorize_and_save_dataset(pickle_path, output_path, output_path_labels):
    dataset = pd.read_pickle(pickle_path)
    vectorized = np.zeros((dataset.shape[0], embeddings_dim))
    embeddings = get_glove_reddit_embeddings()
    for i, (vec, row) in enumerate(zip(vectorized, dataset.iterrows())):
        vectorized[i] = preprocess.row_to_embedding(row, embeddings, embeddings_dim)
    del embeddings
    with open(output_path, "wb") as file:
        np.save(file, vectorized)
    with open(output_path_labels, "wb") as file:
        np.save(file, dataset.loc[:, "subreddit"].to_numpy())
    del dataset

    
def preprocess_pipeline(dataset):
    cores = 12
    multicore_tok(dataset, cores)
    wnl = WordNetLemmatizer()
    dataset.loc[:, "text"].progress_apply(lemmatize_sent, args=[wnl])

def lemmatize_sent(wordlist, wnl):
    return ' '.join([wnl.lemmatize(w) for w in wordlist])
    
def multicore_tok(dataset, cores=6):
    with Pool(processes=cores) as pool:
        dataset.loc[:, "text"] = pool.map(nltk.word_tokenize, dataset.loc[:, "text"])

def multicore_lem(dataset, cores=6):
    with Pool(processes=cores) as pool:
        wnl = WordNetLemmatizer()
        for i, line in tqdm(enumerate(dataset.text)):
            dataset.loc[i,"text"] = pool.map(wnl.lemmatize, dataset.loc[i, "text"])
            
def get_glove_reddit_embeddings():
    # Number of words - 1623397 
    embeddings = {}
    tmp = []
    with io.open("GloVe.Reddit.120B.300D.txt", "r", encoding='utf-8') as file:
        file.readline()
        for line in tqdm(file, total=1623397):
            tmp.append(line)
    with Pool(processes=14) as pool:
        tmp = list(tqdm(pool.imap(preprocess.fetch_embeddings_value, tmp, chunksize=200000), total=1623397))
    for word, vector in tqdm(tmp):
        embeddings[word] = vector
    del tmp
    return embeddings

  from pandas import Panel


In [None]:
dataset_path = "selfpost/rspct.tsv"
preprocessed_path = "selfpost/preprocessed.pkl"
preprocess_and_save_dataset(dataset_path, preprocessed_path)

In [44]:
vectorized_output_path = "selfpost/vectorized.npy"
vectorized_labels_output_path = "selfpost/vectorized_labels.npy"
vectorize_and_save_dataset(preprocessed_path, vectorized_output_path, vectorized_labels_output_path)

In [4]:
from sklearn.metrics import log_loss

class LinearModel(ModelWrap):
    def __init__(self, model):
        self.model = model
        self.last_loss = 0
        
    def train(self, X, y):
        self.model.train(X,y)
    
    def predict(self, X):
        return self.model.predict(X)
    
    def predict_proba(self, X):
        return self.model.predict_proba(X)
    
    def get_last_loss(self, X_train, y_train):
        return log_loss(y_train, model.predict_proba(X_train), eps=1e-15)
        

In [2]:
from scipy.stats import entropy



class ConfidenceSamplingSuggestion(ActiveLearningBase):
    def __init__(self, n_top=1000):
        self.n_top = n_top

    def get_samples_for_labeling(self, model, X_test, y_test):
        y_proba = model.predict_proba(X_test)
        y_proba = np.max(y_proba, axis=1)
        ind = np.lexsort((y_test, y_proba))
        return "oracle", ind[:min(self.n_top, y_proba.shape[0])]

class MarginSampling(ActiveLearningBase):
    def __init__(self, n_top=1000):
        self.n_top = n_top
        
    def get_samples_for_labeling(self, model, X_test, y_test):
        y_proba = model.predict_proba(X_test)
        y_proba = np.sort(y_proba, axis=1)[:,::-1]
        y_proba = y_proba[:,0] - y_proba[:,1]
        ind = np.lexsort((y_test, y_proba))
        return "oracle", ind[:min(self.n_top, y_proba.shape[0])]

class EntropySampling(ActiveLearningBase):
    def __init__(self, n_top=1000):
        self.n_top = n_top
        
    def get_samples_for_labeling(self, model, X_test, y_test):
        y_proba = model.predict_proba(X_test)
        y_proba = entropy(y_proba, axis=1)
        ind = np.lexsort((y_test, y_proba))[::-1]
        print(y_proba[ind[0]], y_proba[ind[-1]])
        return "oracle", ind[:min(self.n_top, y_proba.shape[0])]

In [2]:
from sklearn import preprocessing
le = preprocessing.LabelEncoder()
vectorized_output_path = "selfpost/vectorized.npy"
vectorized_labels_output_path = "selfpost/vectorized_labels.npy"
with open(vectorized_output_path, "rb") as vect_X, open(vectorized_labels_output_path, "rb") as vect_y:
    X = np.load(vect_X, allow_pickle=True)
    y = np.load(vect_y, allow_pickle=True)
le.fit(y)
y = le.transform(y)
del le
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.8, random_state=random_seed)

In [3]:
model_path = "selfpost/models/logreg_100it"
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.8, random_state=random_seed)
model = LogisticRegression(random_state=random_seed, n_jobs=-1, verbose=True)
model.fit(X_train, y_train)
dump(model, model_path)

NameError: name 'X' is not defined

In [3]:
model_path = "selfpost/models/logreg_100it"
model = load(model_path)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.8, random_state=random_seed)
X_train, y_train

(array([[ 0.06228423,  0.06092767, -0.14749891, ..., -0.3225482 ,
          0.14105452,  0.00854856],
        [ 0.08068533, -0.02982698, -0.12298278, ..., -0.27350412,
          0.05903656,  0.0867618 ],
        [ 0.03988495,  0.06417522, -0.20442137, ..., -0.29071138,
          0.03378086, -0.01860267],
        ...,
        [ 0.15182757,  0.10873834, -0.17133273, ..., -0.17019229,
          0.15758706,  0.04102596],
        [ 0.04931326,  0.03915333, -0.15517242, ..., -0.30393526,
          0.0845803 , -0.00351562],
        [ 0.09860376,  0.01065233, -0.13773885, ..., -0.28388423,
          0.1883342 , -0.11135513]]),
 array([470, 445, 853, ...,   9, 288, 667]))

In [None]:
wrappedModel = LinearModel(model)
confSuggest = ConfidenceSamplingSuggestion(10000)
sampl_ind = rng.choice(X_test.shape[0], 1000, replace=False)
ind = confSuggest.get_samples_for_labeling(model, X_test[sampl_ind, :], y_test[sampl_ind])

print(sampl_ind[ind[1]])

In [77]:
test = np.array([[1, 1], [2, 1]])
test_y = np.array([2, 4])

y_labels = np.array([1, 2, 3])
y_proba = np.array([[1, 0], [111, 1], [10, -2]])
y_proba = entropy(y_proba, axis=1)
ind = np.lexsort((y_proba, y_proba))
print(ind)

y_proba = np.array([[1, 0], [111, 1], [10, -2]])

test = np.append(test, y_proba[ind[0:2]], axis=0)
test

[2 0 1]


array([[ 1,  1],
       [ 2,  1],
       [10, -2],
       [ 1,  0]])

In [120]:
logging.getLogger().setLevel(logging.INFO)
sug = Suggester(X,y)


In [90]:
suggest_alg = EntropySampling(100)
sug.active_learning_suggest(suggest_alg, LinearModel(model), sample_ratio=0.01)

INFO:root:Looking at 8102 samples from test.


6.7792310653056544 8.090449338436718e-13


In [91]:
sug.last_suggest.indices

array([263663, 556963, 162239,  32687, 534678, 123910, 376963, 676468,
       232583, 569997, 102716, 393583, 645799, 105063, 738217, 767381,
       723656, 759143, 378407, 476941,  88606, 138546, 295977, 376829,
       185133, 183306, 299197, 167503, 128336, 406411, 341994, 229912,
       322558, 309247, 102168, 161123, 167053, 782591, 624216, 293301,
       495366, 319562, 602956,   5770, 205171, 627581, 189452, 194699,
       532958,   8917, 637468, 141753, 375895, 306330, 312546, 410753,
        36320, 772195, 399059, 795146, 142815, 580262, 405912,  42023,
       635218,   4919, 788857, 644924, 772022, 780991, 498361, 464755,
       586466,  40846, 517110, 786113, 320149, 306214, 610751, 782525,
       162965, 274700,  36375, 181143, 406103, 693918, 799682, 768447,
       142749,  46536, 604493,  93185, 282442, 147943, 483390, 623975,
       197345, 522718, 629541, 210976])

In [92]:
samples_to_move = sug.X_test[sug.last_suggest.indices]
samples_to_move.shape, sug.X_train.shape, sug.X_test.shape

((100, 300), (202800, 300), (810200, 300))

In [93]:
sug.apply_last_suggest()

In [94]:
sug.X_train.shape, sug.X_test.shape

((202900, 300), (810100, 300))

In [131]:
class PseudoLabeling(ActiveLearningBase):
    def __init__(self, n_top=1000):
        self.n_top = n_top

    def get_samples_for_labeling(self, model, X_test, y_test):
        y_proba = model.predict_proba(X_test)
        max_ind = np.argmax(y_proba, axis=1)
        y_proba = np.max(y_proba, axis=1)
        ind = np.lexsort((max_ind, y_proba))[::-1]
        ind_to_return = ind[:min(self.n_top, y_proba.shape[0])]
        return "relabeling", ind_to_return, max_ind[ind_to_return]

In [134]:
suggest_alg = PseudoLabeling(100)
sug.active_learning_suggest(suggest_alg, LinearModel(model), sample_ratio=0.01)

INFO:root:Looking at 8104 samples from test.


In [None]:
samples_to_move = sug.X_test[sug.last_suggest.indices]
samples_to_move.shape, sug.X_train.shape, sug.X_test.shape

In [136]:
sug.apply_last_suggest()



Let's benchmark our solutions.

In [5]:
vectorized_output_path = "selfpost/vectorized.npy"
vectorized_labels_output_path = "selfpost/vectorized_labels.npy"
with open(vectorized_output_path, "rb") as vect_X, open(vectorized_labels_output_path, "rb") as vect_y:
    X = np.load(vect_X, allow_pickle=True)
    y = np.load(vect_y, allow_pickle=True)
model_path = "selfpost/models/logreg_100it"
model = load(model_path)

In [None]:
logging.getLogger().setLevel(logging.INFO)
sug = Suggester(X,y)
suggest_alg = ActiveLearning.EntropySampling(10000)
epochs = 5
print(sug.evaluate_metrics(LinearModel(model)))
print(sug.X_test.shape, sug.y_test.shape)
for ep in range(epochs):
    sug.active_learning_suggest(suggest_alg, LinearModel(model), sample_ratio=0.1)
    sug.apply_last_suggest()
    print(sug.X_test.shape, sug.y_test.shape)
    model.fit(sug.X_train, sug.y_train)
    print(sug.evaluate_metrics(LinearModel(model)))

INFO:root:Looking at 81040 samples from test.


{'accuracy': 0.5454084402764067, 'f1_score': 0.5411580561102263, 'precision_score': 0.5616867560333363, 'recall_score': 0.5460126540293783}
(810400, 300) (810400,)
(800400, 300) (800400,)


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed: 29.2min finished
INFO:root:Looking at 80040 samples from test.


{'accuracy': 0.546228135932034, 'f1_score': 0.5417091029620242, 'precision_score': 0.5615681180355546, 'recall_score': 0.5465371881251833}
(790400, 300) (790400,)


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 16 concurrent workers.


It actually took too long to train logreg from sklearn. I am going to implement one with pytorch to train on cuda.

In [3]:
import torch
import torch.nn as nn
import random



class LogReg(nn.Module):
    def __init__(self):
        super().__init__()
        self.seq = nn.Sequential(
            nn.Linear(300, 1013),
            nn.LogSoftmax(dim=1)
        )
        
    def forward(self, x):
        return self.seq(x)

In [4]:
import torch.nn.functional as F
from tqdm import tqdm

from sklearn.metrics import log_loss
from torch.utils.data import TensorDataset, DataLoader 

def worker_init_fn(x):
    seed = args.seed + x
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)
    return


class LinearModelTorch(ModelWrap):
    def __init__(self, model, itr):
        self.model = model
        self.last_loss = 0
        self.iter = itr
        
    def train(self, X, y):
        torch.cuda.empty_cache()
        X = torch.from_numpy(X.astype(np.float32))
        y = torch.from_numpy(y.astype(np.int64))
        self.model = self.model.cuda()
        self.model.train()
        optimizer = torch.optim.Adam(self.model.parameters(), lr=1e-2, weight_decay=1e-5)
        criterion = nn.NLLLoss()
        dataset = TensorDataset(X, y)
        dataloader = DataLoader(dataset, batch_size=10000, num_workers=0)
        for epoch in tqdm(range(self.iter)):
            for bX, by in dataloader:
                bX = bX.view(-1, 300)
                bX = bX.cuda()
                by = by.cuda()
                torch.cuda.manual_seed(random_seed)
                optimizer.zero_grad()
                torch.cuda.manual_seed(random_seed)
                output = self.model(bX)
                loss = criterion(output, by)
                loss.backward()
                optimizer.step()
#             if (epoch % 10 == 0):
#                 print(f"ep{epoch}: {loss}")
            torch.cuda.empty_cache()
        X = X.cpu().detach().numpy()
        y = y.cpu().detach().numpy()
    
    def predict(self, X):
        X = torch.from_numpy(X)
        X = X.view(-1, 300)
        X = X.cpu()
        self.model = self.model.cpu()
        self.model = self.model.eval()
        result = torch.argmax(self.model(X.float()), axis=1)
        X = X.detach().numpy()
        return result.detach().numpy()
    
    def predict_proba(self, X):
        X = torch.from_numpy(X)
        X = X.view(-1, 300)
        X = X.cpu()
        result = torch.exp(self.model(X.float()))
        X = X.detach().numpy()
        return result.detach().numpy()
    
    def get_last_loss(self, X_train, y_train):
        return log_loss(y_train, self.model.predict_proba(X_train), eps=1e-15)
        

In [5]:
torch.manual_seed(random_seed)
torch.cuda.manual_seed(random_seed)
torch.cuda.manual_seed_all(random_seed)
np.random.seed(random_seed)
torch.use_deterministic_algorithms(False)
random.seed(random_seed)

model = LinearModelTorch(LogReg(), 100)
model.train(X_train, y_train.astype(np.int64))
# print(sug.evaluate_metrics(model))

100%|████████████████████████████████████████████████████████████████████████████████| 100/100 [02:43<00:00,  1.64s/it]


In [12]:
model_path = "selfpost/models/logreg_torch1000it"
torch.save(model.model.state_dict(), model_path)

In [5]:
model_path = "selfpost/models/logreg_torch1000it"
model = LogReg()
model.load_state_dict(torch.load(model_path))
model.eval()
model = LinearModelTorch(model, 100)

In [6]:
# Let's load training data by batches.
from torch.utils.data import IterableDataset
class IterDataset(IterableDataset):
    def __init__(self, filename, chunksize):
        self.filename = filename
        self.chunksize = chunksize
        
    def __iter__(self):
        csv = pd.read_csv(self.filename, chunksize=self.chunksize)
        for chunk in csv:
            y = chunk["label"]
            X = chunk["features"]
            yield X, y

In [9]:
torch.manual_seed(random_seed)
torch.cuda.manual_seed(random_seed)
torch.cuda.manual_seed_all(random_seed)
np.random.seed(random_seed)
torch.use_deterministic_algorithms(False)
random.seed(random_seed)

logging.getLogger().setLevel(logging.INFO)
sug = Suggester(X,y, test_size=0.999)
suggest_alg = ActiveLearning.EntropySampling(20000)
epochs = 5
model = LinearModelTorch(LogReg(), 100)
model.train(sug.X_train, sug.y_train)
print(sug.evaluate_metrics(model))
for ep in range(epochs):
    sug.active_learning_suggest(suggest_alg, model, sample_ratio=1)
    sug.apply_last_suggest()
    model = LinearModelTorch(LogReg(), 100)
    model.train(sug.X_train, sug.y_train)
    print(sug.evaluate_metrics(model)) 

100%|███████████████████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 153.14it/s]
INFO:root:Looking at 1011987 samples from test.


{'accuracy': 0.06928152239109792, 'f1_score': 0.04668933198411252, 'precision_score': 0.0905574345581354, 'recall_score': 0.06937743446586983}


100%|████████████████████████████████████████████████████████████████████████████████| 100/100 [00:17<00:00,  5.70it/s]
INFO:root:Looking at 991987 samples from test.


{'accuracy': 0.12032314939611104, 'f1_score': 0.08011109207989375, 'precision_score': 0.16564131417792763, 'recall_score': 0.12334363050572518}


100%|████████████████████████████████████████████████████████████████████████████████| 100/100 [00:33<00:00,  2.98it/s]
INFO:root:Looking at 971987 samples from test.


{'accuracy': 0.1761947433453328, 'f1_score': 0.1435605994590661, 'precision_score': 0.3220889701125707, 'recall_score': 0.17948055805536103}


100%|████████████████████████████████████████████████████████████████████████████████| 100/100 [00:54<00:00,  1.83it/s]
INFO:root:Looking at 951987 samples from test.


{'accuracy': 0.20563095924629224, 'f1_score': 0.17587917126308947, 'precision_score': 0.35339331627805903, 'recall_score': 0.21121393444142386}


100%|████████████████████████████████████████████████████████████████████████████████| 100/100 [01:09<00:00,  1.45it/s]
INFO:root:Looking at 931987 samples from test.


{'accuracy': 0.250815730262332, 'f1_score': 0.21925739638517897, 'precision_score': 0.40163269304307286, 'recall_score': 0.25717202419174234}


100%|████████████████████████████████████████████████████████████████████████████████| 100/100 [01:22<00:00,  1.21it/s]


{'accuracy': 0.2718777789595685, 'f1_score': 0.23986938423568663, 'precision_score': 0.42225925440403356, 'recall_score': 0.28008372187732206}


In [11]:
torch.manual_seed(random_seed)
torch.cuda.manual_seed(random_seed)
torch.cuda.manual_seed_all(random_seed)
np.random.seed(random_seed)
torch.use_deterministic_algorithms(False)
random.seed(random_seed)

logging.getLogger().setLevel(logging.INFO)
sug = Suggester(X,y, test_size=0.999)
suggest_alg = ActiveLearning.RandomSampling(20000)
epochs = 5
model = LinearModelTorch(LogReg(), 100)
model.train(sug.X_train, sug.y_train)
print(sug.evaluate_metrics(model))
for ep in range(epochs):
    sug.active_learning_suggest(suggest_alg, model, sample_ratio=1)
    sug.apply_last_suggest()
    model = LinearModelTorch(LogReg(), 100)
    model.train(sug.X_train, sug.y_train)
    print(sug.evaluate_metrics(model))

100%|███████████████████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 141.44it/s]
  _warn_prf(average, modifier, msg_start, len(result))
INFO:root:Looking at 1011987 samples from test.


{'accuracy': 0.06928152239109792, 'f1_score': 0.04668933198411252, 'precision_score': 0.0905574345581354, 'recall_score': 0.06937743446586983}


100%|████████████████████████████████████████████████████████████████████████████████| 100/100 [00:17<00:00,  5.59it/s]
INFO:root:Looking at 991987 samples from test.


{'accuracy': 0.25758603691379023, 'f1_score': 0.2295162463024065, 'precision_score': 0.3802541015988504, 'recall_score': 0.25801296594211937}


100%|████████████████████████████████████████████████████████████████████████████████| 100/100 [00:35<00:00,  2.85it/s]
INFO:root:Looking at 971987 samples from test.


{'accuracy': 0.3651293690141946, 'f1_score': 0.3467491736553693, 'precision_score': 0.46386106952914696, 'recall_score': 0.3656973686331774}


100%|████████████████████████████████████████████████████████████████████████████████| 100/100 [00:51<00:00,  1.95it/s]
INFO:root:Looking at 951987 samples from test.


{'accuracy': 0.42716969874588623, 'f1_score': 0.4141080949884524, 'precision_score': 0.4931442420478847, 'recall_score': 0.427722552321968}


100%|████████████████████████████████████████████████████████████████████████████████| 100/100 [01:09<00:00,  1.44it/s]
INFO:root:Looking at 931987 samples from test.


{'accuracy': 0.45980469684662983, 'f1_score': 0.4494874504307956, 'precision_score': 0.5124286969425367, 'recall_score': 0.46034060871317634}


100%|████████████████████████████████████████████████████████████████████████████████| 100/100 [01:27<00:00,  1.15it/s]


{'accuracy': 0.4784958557523298, 'f1_score': 0.46862981783226004, 'precision_score': 0.523155552472409, 'recall_score': 0.4790782979829941}


In [12]:
def test_sampling(sampling):
    torch.manual_seed(random_seed)
    torch.cuda.manual_seed(random_seed)
    torch.cuda.manual_seed_all(random_seed)
    np.random.seed(random_seed)
    torch.use_deterministic_algorithms(False)
    random.seed(random_seed)

    logging.getLogger().setLevel(logging.INFO)
    sug = Suggester(X,y, test_size=0.999)
    suggest_alg = sampling(20000)
    epochs = 5
    model = LinearModelTorch(LogReg(), 100)
    model.train(sug.X_train, sug.y_train)
    print(sug.evaluate_metrics(model))
    for ep in range(epochs):
        sug.active_learning_suggest(suggest_alg, model, sample_ratio=1)
        sug.apply_last_suggest()
        model = LinearModelTorch(LogReg(), 100)
        model.train(sug.X_train, sug.y_train)
        print(sug.evaluate_metrics(model))

In [13]:
test_sampling(ActiveLearning.RandomSampling)

100%|███████████████████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 135.32it/s]
  _warn_prf(average, modifier, msg_start, len(result))
INFO:root:Looking at 1011987 samples from test.


{'accuracy': 0.06928152239109792, 'f1_score': 0.04668933198411252, 'precision_score': 0.0905574345581354, 'recall_score': 0.06937743446586983}


100%|████████████████████████████████████████████████████████████████████████████████| 100/100 [00:20<00:00,  4.91it/s]
INFO:root:Looking at 991987 samples from test.


{'accuracy': 0.25758603691379023, 'f1_score': 0.2295162463024065, 'precision_score': 0.3802541015988504, 'recall_score': 0.25801296594211937}


100%|████████████████████████████████████████████████████████████████████████████████| 100/100 [00:39<00:00,  2.56it/s]
INFO:root:Looking at 971987 samples from test.


{'accuracy': 0.3651293690141946, 'f1_score': 0.3467491736553693, 'precision_score': 0.46386106952914696, 'recall_score': 0.3656973686331774}


100%|████████████████████████████████████████████████████████████████████████████████| 100/100 [00:57<00:00,  1.74it/s]
INFO:root:Looking at 951987 samples from test.


{'accuracy': 0.42716969874588623, 'f1_score': 0.4141080949884524, 'precision_score': 0.4931442420478847, 'recall_score': 0.427722552321968}


100%|████████████████████████████████████████████████████████████████████████████████| 100/100 [01:16<00:00,  1.30it/s]
INFO:root:Looking at 931987 samples from test.


{'accuracy': 0.45980469684662983, 'f1_score': 0.4494874504307956, 'precision_score': 0.5124286969425367, 'recall_score': 0.46034060871317634}


100%|████████████████████████████████████████████████████████████████████████████████| 100/100 [01:35<00:00,  1.04it/s]


{'accuracy': 0.4784958557523298, 'f1_score': 0.46862981783226004, 'precision_score': 0.523155552472409, 'recall_score': 0.4790782979829941}


In [14]:
test_sampling(ActiveLearning.MarginSamplinxg)

100%|███████████████████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 148.59it/s]
  _warn_prf(average, modifier, msg_start, len(result))
INFO:root:Looking at 1011987 samples from test.


{'accuracy': 0.06928152239109792, 'f1_score': 0.04668933198411252, 'precision_score': 0.0905574345581354, 'recall_score': 0.06937743446586983}


100%|████████████████████████████████████████████████████████████████████████████████| 100/100 [00:20<00:00,  4.93it/s]
INFO:root:Looking at 991987 samples from test.


{'accuracy': 0.20885757575452096, 'f1_score': 0.1713918650357639, 'precision_score': 0.31880790131829706, 'recall_score': 0.20964201571115237}


100%|████████████████████████████████████████████████████████████████████████████████| 100/100 [00:39<00:00,  2.53it/s]
INFO:root:Looking at 971987 samples from test.


{'accuracy': 0.29544119417235004, 'f1_score': 0.2812895706903934, 'precision_score': 0.4518881159402032, 'recall_score': 0.29597843118867534}


100%|████████████████████████████████████████████████████████████████████████████████| 100/100 [00:58<00:00,  1.71it/s]
INFO:root:Looking at 951987 samples from test.


{'accuracy': 0.3730071944259743, 'f1_score': 0.3646671354662496, 'precision_score': 0.4906963135925287, 'recall_score': 0.3731876283867597}


100%|████████████████████████████████████████████████████████████████████████████████| 100/100 [01:16<00:00,  1.30it/s]
INFO:root:Looking at 931987 samples from test.


{'accuracy': 0.42011852096649416, 'f1_score': 0.4202663346567754, 'precision_score': 0.5037458470077114, 'recall_score': 0.41979518849104924}


100%|████████████████████████████████████████████████████████████████████████████████| 100/100 [01:34<00:00,  1.05it/s]


{'accuracy': 0.4530810197952383, 'f1_score': 0.4558394587601572, 'precision_score': 0.5246939914541863, 'recall_score': 0.45223207448474995}


In [15]:
test_sampling(ActiveLearning.EntropySampling)

100%|███████████████████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 155.15it/s]
INFO:root:Looking at 1011987 samples from test.


{'accuracy': 0.06928152239109792, 'f1_score': 0.04668933198411252, 'precision_score': 0.0905574345581354, 'recall_score': 0.06937743446586983}


100%|████████████████████████████████████████████████████████████████████████████████| 100/100 [00:19<00:00,  5.08it/s]
INFO:root:Looking at 991987 samples from test.


{'accuracy': 0.12032314939611104, 'f1_score': 0.08011109207989375, 'precision_score': 0.16564131417792763, 'recall_score': 0.12334363050572518}


100%|████████████████████████████████████████████████████████████████████████████████| 100/100 [00:37<00:00,  2.64it/s]
INFO:root:Looking at 971987 samples from test.


{'accuracy': 0.1761947433453328, 'f1_score': 0.1435605994590661, 'precision_score': 0.3220889701125707, 'recall_score': 0.17948055805536103}


100%|████████████████████████████████████████████████████████████████████████████████| 100/100 [00:57<00:00,  1.75it/s]
INFO:root:Looking at 951987 samples from test.


{'accuracy': 0.20563095924629224, 'f1_score': 0.17587917126308947, 'precision_score': 0.35339331627805903, 'recall_score': 0.21121393444142386}


100%|████████████████████████████████████████████████████████████████████████████████| 100/100 [01:17<00:00,  1.30it/s]
INFO:root:Looking at 931987 samples from test.


{'accuracy': 0.250815730262332, 'f1_score': 0.21925739638517897, 'precision_score': 0.40163269304307286, 'recall_score': 0.25717202419174234}


100%|████████████████████████████████████████████████████████████████████████████████| 100/100 [01:38<00:00,  1.01it/s]


{'accuracy': 0.2718777789595685, 'f1_score': 0.23986938423568663, 'precision_score': 0.42225925440403356, 'recall_score': 0.28008372187732206}


In [16]:
test_samplingt_sampling(ActiveLearning.ConfidenceSamplingSuggestion)

100%|███████████████████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 151.52it/s]
  _warn_prf(average, modifier, msg_start, len(result))
INFO:root:Looking at 1011987 samples from test.


{'accuracy': 0.06928152239109792, 'f1_score': 0.04668933198411252, 'precision_score': 0.0905574345581354, 'recall_score': 0.06937743446586983}


100%|████████████████████████████████████████████████████████████████████████████████| 100/100 [00:19<00:00,  5.05it/s]
INFO:root:Looking at 991987 samples from test.


{'accuracy': 0.12348448114743439, 'f1_score': 0.08120031715059717, 'precision_score': 0.14956190363376695, 'recall_score': 0.12647391339714645}


100%|████████████████████████████████████████████████████████████████████████████████| 100/100 [00:40<00:00,  2.49it/s]
INFO:root:Looking at 971987 samples from test.


{'accuracy': 0.1864263616694462, 'f1_score': 0.1584124212508903, 'precision_score': 0.33191636161012233, 'recall_score': 0.1891674600090658}


100%|████████████████████████████████████████████████████████████████████████████████| 100/100 [00:58<00:00,  1.70it/s]
INFO:root:Looking at 951987 samples from test.


{'accuracy': 0.22465012652483698, 'f1_score': 0.19963730701523458, 'precision_score': 0.3805215460811074, 'recall_score': 0.2287199787284282}


100%|████████████████████████████████████████████████████████████████████████████████| 100/100 [01:17<00:00,  1.29it/s]
INFO:root:Looking at 931987 samples from test.


{'accuracy': 0.28668962120716274, 'f1_score': 0.2668259998297966, 'precision_score': 0.43653398161855067, 'recall_score': 0.29104322356700546}


100%|████████████████████████████████████████████████████████████████████████████████| 100/100 [01:39<00:00,  1.01it/s]


{'accuracy': 0.3230221483420268, 'f1_score': 0.30471003897422383, 'precision_score': 0.4644152379266481, 'recall_score': 0.3281183018721371}


In [17]:
test_sampling(ActiveLearning.PseudoLabeling)

100%|███████████████████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 149.03it/s]
INFO:root:Looking at 1011987 samples from test.


{'accuracy': 0.06928152239109792, 'f1_score': 0.04668933198411252, 'precision_score': 0.0905574345581354, 'recall_score': 0.06937743446586983}


100%|████████████████████████████████████████████████████████████████████████████████| 100/100 [00:20<00:00,  4.83it/s]
INFO:root:Looking at 991987 samples from test.


{'accuracy': 0.047714334966083224, 'f1_score': 0.02743961741807955, 'precision_score': 0.05530317286837833, 'recall_score': 0.04980937577977587}


100%|████████████████████████████████████████████████████████████████████████████████| 100/100 [00:40<00:00,  2.46it/s]
INFO:root:Looking at 971987 samples from test.


{'accuracy': 0.03425560218397983, 'f1_score': 0.01927343391226129, 'precision_score': 0.051314444957531916, 'recall_score': 0.03820572043909834}


100%|████████████████████████████████████████████████████████████████████████████████| 100/100 [01:02<00:00,  1.60it/s]
INFO:root:Looking at 951987 samples from test.


{'accuracy': 0.026843853960190633, 'f1_score': 0.01507116066391142, 'precision_score': 0.048106576126196045, 'recall_score': 0.03191526053884242}


100%|████████████████████████████████████████████████████████████████████████████████| 100/100 [01:21<00:00,  1.23it/s]
INFO:root:Looking at 931987 samples from test.


{'accuracy': 0.022224558926251116, 'f1_score': 0.01265036136148406, 'precision_score': 0.04218539275659873, 'recall_score': 0.027715996198391497}


100%|████████████████████████████████████████████████████████████████████████████████| 100/100 [01:41<00:00,  1.02s/it]


{'accuracy': 0.019238212825402116, 'f1_score': 0.01108025426126459, 'precision_score': 0.03880440281425076, 'recall_score': 0.024817215785548453}


In [None]:
class FindIndistinguishableObjectInTrain(ActiveLearningBase):
    def __init__(self):
        pass

    def build_class_confusion_dict(self, y_pred, y_train):
#         {"a-b":100, "b-c":10, ...}
        
        pass
    
    def get_samples_for_labeling(self, model, X_train, y_train):
        logging.Info("Class a and b were confused 1000 times in total. To relabel them to class a-b apply this suggest...")


# Estimated Error Reduction 


In [15]:
import importlib
import numpy as np
import torch.nn as nn
import modules
import torch
import pandas as pd
from tqdm import tqdm
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from modules.models.Wraps import TorchClassifierWrap
from modules.models.Linear import LogReg
importlib.reload(modules.models.Wraps)
importlib.reload(modules.models.Linear)



# https://archive.ics.uci.edu/ml/datasets/Wine+Quality - red wine quality
wineq = pd.read_csv("tmp/datasets/winequality.csv", sep=";")

X = wineq.drop("quality", axis=1)
y = wineq["quality"]
classes = np.unique(y)
X.shape, y.shape

((1599, 11), (1599,))

In [17]:
np.unique(y)

array([3, 4, 5, 6, 7, 8], dtype=int64)

In [16]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.999, random_state=42)
X_pool, X_val, y_pool, y_val = train_test_split(X_test, y_test, test_size=0.2, random_state=42)

In [18]:
def expectedErrorReduction(sample, classes, X_train, y_train, base_model):
    eer = 0.0
    for cls in classes:
        model = TorchClassifierWrap(LogReg(X_train.shape[1], len(classes)), 100, X_train.shape[1], X_train.shape[0])
        X_append = np.asarray([sample for i in range(len(classes))]).reshape((len(classes), X_train.shape[1]))
        y_append = np.asarray([cls for cls in classes]).reshape((len(classes)))
        X_new = np.append(X_train, X_append, axis=0)
        y_new = np.append(y_train, y_append, axis=0)
        model.fit(X_new, y_new)
        proba = model.predict_proba(X_append)
        loss = float(nn.NLLLoss()(torch.tensor(proba), torch.tensor(y_append, dtype=torch.int64)))
        print(loss)
        err += loss * base_model.predict_proba[cls]
    return eer

In [23]:
def EERsampling(X_train, y_train, X_pool, y_pool, X_val, y_val, itr):
    classes = np.unique(y_train)
    model = TorchClassifierWrap(LogReg(X_train.shape[1], len(classes)), 100, X_train.shape[1], 10)
    metrics = []
    for iteration in tqdm(range(itr)):
        sample_ind = np.random.choice(self.X_test.shape[0], num_of_elements, replace=False)
        loss_reduction = np.ones((sample_ind.shape[0]))
        for i, sample in enumerate(X_pool[sample_ind]):
            loss_reduction[i] = expectedErrorReduction(sample, classes, X_train, y_train, model)
        print(loss_reduction)
        srt = np.argsort(loss_reduction)
        X_train = np.append(X_train, X_pool[srt[0]], axis=0)
        y_train = np.append(y_train, y_pool[srt[0]], axis=0)
        X_pool = np.delete(X_pool, srt[0], axis=0)
        y_pool = np.delete(y_pool, srt[0], axis=0)
        model = TorchClassifierWrap(LogReg(X_train.shape[1], len(classes)), 100, X_train.shape[1], 10)
        model.fit(X_train, y_train)
        y_pred = model.predict(X_val)
        metrics.append(precision_score(y_pred, y_val, average="macro"))
    return metrics

In [24]:
EERsampling(X_train, y_train, X_pool, y_pool, X_val, y_val, 10)

  0%|                                                                                           | 0/10 [00:00<?, ?it/s]


ValueError: cannot reshape array of size 6 into shape (6,11)

In [117]:
def Randomsampling(X_train, y_train, X_pool, y_pool, X_val, y_val, itr):
    classes = np.unique(y_train)
    metrics = []
    for iteration in tqdm(range(itr)):
        loss_reduction = np.ones((X_pool.shape))
        for i, sample in enumerate(X_pool):
            loss_reduction[i] = np.random.random_sample()
        print(loss_reduction)
        srt = np.argsort(loss_reduction)
        X_train = np.append(X_train, X_pool[srt[0]], axis=0)
        y_train = np.append(y_train, y_pool[srt[0]], axis=0)
        X_pool = np.delete(X_pool, srt[0], axis=0)
        y_pool = np.delete(y_pool, srt[0], axis=0)
        model = TorchClassifierWrap(LogReg(X_train.shape[1], len(classes)), 100, X_train.shape[1], 10)
        model.fit(X_train, y_train)
        y_pred = model.predict(X_val)
        metrics.append(precision_score(y_pred, y_val, average="macro"))
    return metrics

In [118]:
Randomsampling(X_train, y_train, X_pool, y_pool, X_val, y_val, 10)

  0%|                                                                                           | 0/10 [00:00<?, ?it/s]

[[0.37454012 0.37454012 0.37454012 0.37454012]
 [0.95071431 0.95071431 0.95071431 0.95071431]
 [0.73199394 0.73199394 0.73199394 0.73199394]
 [0.59865848 0.59865848 0.59865848 0.59865848]
 [0.15601864 0.15601864 0.15601864 0.15601864]
 [0.15599452 0.15599452 0.15599452 0.15599452]
 [0.05808361 0.05808361 0.05808361 0.05808361]
 [0.86617615 0.86617615 0.86617615 0.86617615]
 [0.60111501 0.60111501 0.60111501 0.60111501]
 [0.70807258 0.70807258 0.70807258 0.70807258]
 [0.02058449 0.02058449 0.02058449 0.02058449]
 [0.96990985 0.96990985 0.96990985 0.96990985]
 [0.83244264 0.83244264 0.83244264 0.83244264]
 [0.21233911 0.21233911 0.21233911 0.21233911]
 [0.18182497 0.18182497 0.18182497 0.18182497]
 [0.18340451 0.18340451 0.18340451 0.18340451]
 [0.30424224 0.30424224 0.30424224 0.30424224]
 [0.52475643 0.52475643 0.52475643 0.52475643]
 [0.43194502 0.43194502 0.43194502 0.43194502]
 [0.29122914 0.29122914 0.29122914 0.29122914]
 [0.61185289 0.61185289 0.61185289 0.61185289]
 [0.13949386 

 30%|████████████████████████▉                                                          | 3/10 [00:00<00:00,  7.74it/s]

[[0.37454012 0.37454012 0.37454012 0.37454012]
 [0.95071431 0.95071431 0.95071431 0.95071431]
 [0.73199394 0.73199394 0.73199394 0.73199394]
 [0.59865848 0.59865848 0.59865848 0.59865848]
 [0.15601864 0.15601864 0.15601864 0.15601864]
 [0.15599452 0.15599452 0.15599452 0.15599452]
 [0.05808361 0.05808361 0.05808361 0.05808361]
 [0.86617615 0.86617615 0.86617615 0.86617615]
 [0.60111501 0.60111501 0.60111501 0.60111501]
 [0.70807258 0.70807258 0.70807258 0.70807258]
 [0.02058449 0.02058449 0.02058449 0.02058449]
 [0.96990985 0.96990985 0.96990985 0.96990985]
 [0.83244264 0.83244264 0.83244264 0.83244264]
 [0.21233911 0.21233911 0.21233911 0.21233911]
 [0.18182497 0.18182497 0.18182497 0.18182497]
 [0.18340451 0.18340451 0.18340451 0.18340451]
 [0.30424224 0.30424224 0.30424224 0.30424224]
 [0.52475643 0.52475643 0.52475643 0.52475643]
 [0.43194502 0.43194502 0.43194502 0.43194502]
 [0.29122914 0.29122914 0.29122914 0.29122914]
 [0.61185289 0.61185289 0.61185289 0.61185289]
 [0.13949386 

 40%|█████████████████████████████████▏                                                 | 4/10 [00:00<00:00,  6.31it/s]

[[0.37454012 0.37454012 0.37454012 0.37454012]
 [0.95071431 0.95071431 0.95071431 0.95071431]
 [0.73199394 0.73199394 0.73199394 0.73199394]
 [0.59865848 0.59865848 0.59865848 0.59865848]
 [0.15601864 0.15601864 0.15601864 0.15601864]
 [0.15599452 0.15599452 0.15599452 0.15599452]
 [0.05808361 0.05808361 0.05808361 0.05808361]
 [0.86617615 0.86617615 0.86617615 0.86617615]
 [0.60111501 0.60111501 0.60111501 0.60111501]
 [0.70807258 0.70807258 0.70807258 0.70807258]
 [0.02058449 0.02058449 0.02058449 0.02058449]
 [0.96990985 0.96990985 0.96990985 0.96990985]
 [0.83244264 0.83244264 0.83244264 0.83244264]
 [0.21233911 0.21233911 0.21233911 0.21233911]
 [0.18182497 0.18182497 0.18182497 0.18182497]
 [0.18340451 0.18340451 0.18340451 0.18340451]
 [0.30424224 0.30424224 0.30424224 0.30424224]
 [0.52475643 0.52475643 0.52475643 0.52475643]
 [0.43194502 0.43194502 0.43194502 0.43194502]
 [0.29122914 0.29122914 0.29122914 0.29122914]
 [0.61185289 0.61185289 0.61185289 0.61185289]
 [0.13949386 

 50%|█████████████████████████████████████████▌                                         | 5/10 [00:00<00:00,  5.59it/s]

[[0.37454012 0.37454012 0.37454012 0.37454012]
 [0.95071431 0.95071431 0.95071431 0.95071431]
 [0.73199394 0.73199394 0.73199394 0.73199394]
 [0.59865848 0.59865848 0.59865848 0.59865848]
 [0.15601864 0.15601864 0.15601864 0.15601864]
 [0.15599452 0.15599452 0.15599452 0.15599452]
 [0.05808361 0.05808361 0.05808361 0.05808361]
 [0.86617615 0.86617615 0.86617615 0.86617615]
 [0.60111501 0.60111501 0.60111501 0.60111501]
 [0.70807258 0.70807258 0.70807258 0.70807258]
 [0.02058449 0.02058449 0.02058449 0.02058449]
 [0.96990985 0.96990985 0.96990985 0.96990985]
 [0.83244264 0.83244264 0.83244264 0.83244264]
 [0.21233911 0.21233911 0.21233911 0.21233911]
 [0.18182497 0.18182497 0.18182497 0.18182497]
 [0.18340451 0.18340451 0.18340451 0.18340451]
 [0.30424224 0.30424224 0.30424224 0.30424224]
 [0.52475643 0.52475643 0.52475643 0.52475643]
 [0.43194502 0.43194502 0.43194502 0.43194502]
 [0.29122914 0.29122914 0.29122914 0.29122914]
 [0.61185289 0.61185289 0.61185289 0.61185289]
 [0.13949386 

 60%|█████████████████████████████████████████████████▊                                 | 6/10 [00:01<00:00,  5.17it/s]

[[0.37454012 0.37454012 0.37454012 0.37454012]
 [0.95071431 0.95071431 0.95071431 0.95071431]
 [0.73199394 0.73199394 0.73199394 0.73199394]
 [0.59865848 0.59865848 0.59865848 0.59865848]
 [0.15601864 0.15601864 0.15601864 0.15601864]
 [0.15599452 0.15599452 0.15599452 0.15599452]
 [0.05808361 0.05808361 0.05808361 0.05808361]
 [0.86617615 0.86617615 0.86617615 0.86617615]
 [0.60111501 0.60111501 0.60111501 0.60111501]
 [0.70807258 0.70807258 0.70807258 0.70807258]
 [0.02058449 0.02058449 0.02058449 0.02058449]
 [0.96990985 0.96990985 0.96990985 0.96990985]
 [0.83244264 0.83244264 0.83244264 0.83244264]
 [0.21233911 0.21233911 0.21233911 0.21233911]
 [0.18182497 0.18182497 0.18182497 0.18182497]
 [0.18340451 0.18340451 0.18340451 0.18340451]
 [0.30424224 0.30424224 0.30424224 0.30424224]
 [0.52475643 0.52475643 0.52475643 0.52475643]
 [0.43194502 0.43194502 0.43194502 0.43194502]
 [0.29122914 0.29122914 0.29122914 0.29122914]
 [0.61185289 0.61185289 0.61185289 0.61185289]
 [0.13949386 

 70%|██████████████████████████████████████████████████████████                         | 7/10 [00:01<00:00,  4.45it/s]

[[0.37454012 0.37454012 0.37454012 0.37454012]
 [0.95071431 0.95071431 0.95071431 0.95071431]
 [0.73199394 0.73199394 0.73199394 0.73199394]
 [0.59865848 0.59865848 0.59865848 0.59865848]
 [0.15601864 0.15601864 0.15601864 0.15601864]
 [0.15599452 0.15599452 0.15599452 0.15599452]
 [0.05808361 0.05808361 0.05808361 0.05808361]
 [0.86617615 0.86617615 0.86617615 0.86617615]
 [0.60111501 0.60111501 0.60111501 0.60111501]
 [0.70807258 0.70807258 0.70807258 0.70807258]
 [0.02058449 0.02058449 0.02058449 0.02058449]
 [0.96990985 0.96990985 0.96990985 0.96990985]
 [0.83244264 0.83244264 0.83244264 0.83244264]
 [0.21233911 0.21233911 0.21233911 0.21233911]
 [0.18182497 0.18182497 0.18182497 0.18182497]
 [0.18340451 0.18340451 0.18340451 0.18340451]
 [0.30424224 0.30424224 0.30424224 0.30424224]
 [0.52475643 0.52475643 0.52475643 0.52475643]
 [0.43194502 0.43194502 0.43194502 0.43194502]
 [0.29122914 0.29122914 0.29122914 0.29122914]
 [0.61185289 0.61185289 0.61185289 0.61185289]
 [0.13949386 

 80%|██████████████████████████████████████████████████████████████████▍                | 8/10 [00:01<00:00,  4.05it/s]

[[0.37454012 0.37454012 0.37454012 0.37454012]
 [0.95071431 0.95071431 0.95071431 0.95071431]
 [0.73199394 0.73199394 0.73199394 0.73199394]
 [0.59865848 0.59865848 0.59865848 0.59865848]
 [0.15601864 0.15601864 0.15601864 0.15601864]
 [0.15599452 0.15599452 0.15599452 0.15599452]
 [0.05808361 0.05808361 0.05808361 0.05808361]
 [0.86617615 0.86617615 0.86617615 0.86617615]
 [0.60111501 0.60111501 0.60111501 0.60111501]
 [0.70807258 0.70807258 0.70807258 0.70807258]
 [0.02058449 0.02058449 0.02058449 0.02058449]
 [0.96990985 0.96990985 0.96990985 0.96990985]
 [0.83244264 0.83244264 0.83244264 0.83244264]
 [0.21233911 0.21233911 0.21233911 0.21233911]
 [0.18182497 0.18182497 0.18182497 0.18182497]
 [0.18340451 0.18340451 0.18340451 0.18340451]
 [0.30424224 0.30424224 0.30424224 0.30424224]
 [0.52475643 0.52475643 0.52475643 0.52475643]
 [0.43194502 0.43194502 0.43194502 0.43194502]
 [0.29122914 0.29122914 0.29122914 0.29122914]
 [0.61185289 0.61185289 0.61185289 0.61185289]
 [0.13949386 

 90%|██████████████████████████████████████████████████████████████████████████▋        | 9/10 [00:02<00:00,  3.51it/s]

[[0.37454012 0.37454012 0.37454012 0.37454012]
 [0.95071431 0.95071431 0.95071431 0.95071431]
 [0.73199394 0.73199394 0.73199394 0.73199394]
 [0.59865848 0.59865848 0.59865848 0.59865848]
 [0.15601864 0.15601864 0.15601864 0.15601864]
 [0.15599452 0.15599452 0.15599452 0.15599452]
 [0.05808361 0.05808361 0.05808361 0.05808361]
 [0.86617615 0.86617615 0.86617615 0.86617615]
 [0.60111501 0.60111501 0.60111501 0.60111501]
 [0.70807258 0.70807258 0.70807258 0.70807258]
 [0.02058449 0.02058449 0.02058449 0.02058449]
 [0.96990985 0.96990985 0.96990985 0.96990985]
 [0.83244264 0.83244264 0.83244264 0.83244264]
 [0.21233911 0.21233911 0.21233911 0.21233911]
 [0.18182497 0.18182497 0.18182497 0.18182497]
 [0.18340451 0.18340451 0.18340451 0.18340451]
 [0.30424224 0.30424224 0.30424224 0.30424224]
 [0.52475643 0.52475643 0.52475643 0.52475643]
 [0.43194502 0.43194502 0.43194502 0.43194502]
 [0.29122914 0.29122914 0.29122914 0.29122914]
 [0.61185289 0.61185289 0.61185289 0.61185289]
 [0.13949386 

100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [00:02<00:00,  4.14it/s]


[0.6862745098039215,
 0.7058823529411765,
 0.8941176470588236,
 0.9441176470588234,
 0.823529411764706,
 0.9333333333333332,
 0.7058823529411765,
 0.8431372549019608,
 0.9833333333333334,
 0.9411764705882352]