In [67]:
import sys
import io
import os
import pandas as pd
import numpy as np

import warnings

warnings.simplefilter(action='ignore', category=FutureWarning)

def dummy(doc):
    return doc

Dataset = "pubmed"

threshold = 100

pp_textual_emb_type = "pv_dbow"
citation_emb_type = "n2v"

In [2]:
# extract different view of data
# view one, doc2vec
setting = "d2v"

viewOneFilesDir = "../Data/"+Dataset+"/vectors/"+setting+"/Doc2Vec(dbow,d100,n5,mc3,s0.001,t24).txt"
v1_all_features = []

with open(viewOneFilesDir, 'r', encoding = 'utf8') as f:
    for line in f:
        read_data = line.split(" ")
        paper_Vectors = read_data
        v1_all_features.append(paper_Vectors)
f.close()
        
print("Total vector records:",len(v1_all_features))

Total vector records: 3151504


In [3]:
# extract different view of data
# view two, node2vec
setting = "n2v"

viewTwoFilesDir = "../Data/"+Dataset+"/vectors/"+setting+"/n2v.txt"
v2_all_features = []

with open(viewTwoFilesDir, 'r', encoding = 'utf8') as f:
    for line in f:
        read_data = line.split(" ")
        if(len(read_data)==101):
            paper_Vectors = read_data
            v2_all_features.append(paper_Vectors)
f.close()
print("Total vector records:",len(v2_all_features))

Total vector records: 8602530


In [4]:
def read_file(infile):
    AllRecords_original = []
    with open(infile, 'r', encoding = 'utf8') as f:
        for line in f:
            read_data = line.split("\t")
            # get ride of bad formated lines
            if(len(read_data)==13 or len(read_data)==12):
                paper_detail = {"paperID": read_data[0], "authorID":read_data[1]}
                AllRecords_original.append(paper_detail)
            else:
                print(len(read_data))
        f.close()
    return pd.DataFrame(AllRecords_original)

In [5]:
# remove author(positive sample) from other(negative sample)
import random
def extractNegativeSample(positiveSample, allSample):
    negativeSample = [x for x in allSample if x not in positiveSample]
    return negativeSample

In [6]:
# collect class vectors
def extractVectors(authors_pids, allfeature):
    appended_data = []
    for label, pid_author in enumerate(authors_pids):
        pid_author = [int(x) for x in pid_author]
        pid_author = list(sorted(set(pid_author)))
        wanted_embedding = []
        labels = []
        for feature in allfeature:
            if(len(pid_author)==0):
                break
            while (pid_author[0]<=int(feature[0])):
                if pid_author[0]==int(feature[0]):
                    wanted_embedding.append(feature)
                    labels.append(label)
                    pid_author.remove(int(feature[0]))
                elif (pid_author[0]<int(feature[0])):
                    # remove paper that not in all dataset
                    pid_author.remove(pid_author[0])
                if len(pid_author)==0:
                    break
        print("Class ",label," sample size: ", len(wanted_embedding))
        # create df save one author data 
        authordf = pd.DataFrame(wanted_embedding)
        authordf['label'] = labels
        appended_data.append(authordf)
    # add all together
    labeled_data = pd.concat(appended_data, axis=0,ignore_index=True)
    # print shape for confirmation
    print(labeled_data.shape)
    return labeled_data


In [7]:
# collect unlabeled vectors
def extractUnlabeledVectors(unlabeled_pid,allfeature):
    unlabeled_pid = [int(x) for x in unlabeled_pid]
    unlabeled_pid = list(sorted(set(unlabeled_pid)))
    wanted_embedding = []
    for feature in allfeature:
        if(len(unlabeled_pid)==0):
            break
        while (unlabeled_pid[0]<=int(feature[0])):
            if unlabeled_pid[0]==int(feature[0]):
                wanted_embedding.append(feature)
                unlabeled_pid.remove(int(feature[0]))
            elif (unlabeled_pid[0]<int(feature[0])):
                # remove paper that not in all dataset
                unlabeled_pid.remove(unlabeled_pid[0])
            if len(unlabeled_pid)==0:
                break
    unlabeled_data = pd.DataFrame(wanted_embedding)
    unlabeled_data['label'] = -1
    return unlabeled_data

In [40]:
# some of the record doesn't have citation links, therefore we will have to remove those papers from train and test set
# synchronize data wrt pid
def synchro_views(labeled_dv1, labeled_dv2, unlabeled_data1, unlabeled_data2):
    noCitationPids_labeled = set(labeled_dv1[0])-set(labeled_dv2[0])
    print("labeled no citation link: ", len(noCitationPids_labeled))
    noCitationPids_unlabeled = set(unlabeled_data1[0])-set(unlabeled_data2[0])
    print("Unlabeled no citation link size: ", len(noCitationPids_unlabeled))
    # process unlabeled data
    unlabeled_dv1 = unlabeled_data1[~unlabeled_data1[0].isin(noCitationPids_unlabeled)].reset_index(drop=True)
    unlabeled_dv2 = unlabeled_data2
    # process labeled data
    labeled_dv1_final = labeled_dv1[~labeled_dv1[0].isin(noCitationPids_labeled)].reset_index(drop=True)
    labeled_dv2_final = labeled_dv2
    # since our input data are sorted, all data are in order with pid
    return labeled_dv1_final, labeled_dv2_final, unlabeled_dv1, unlabeled_dv2

In [9]:
# sort embedding so it's faster to process
v1_all_features = sorted(v1_all_features,key=lambda x: (int(x[0])))
v2_all_features = sorted(v2_all_features,key=lambda x: (int(x[0])))
print(v1_all_features[0])
print(v2_all_features[0])

['3', '-0.34923670', '-0.04892663', '-0.27903691', '0.10529071', '0.19954453', '-0.23419386', '-0.13973221', '-0.25341058', '0.05078398', '0.07314120', '0.04792935', '0.04512550', '-0.34305945', '0.24982655', '0.14885320', '0.24416804', '-0.31318465', '0.31739417', '-0.24141879', '-0.10775354', '-0.03281415', '-0.32475111', '-0.20807841', '-0.11642953', '0.22945502', '0.00897177', '-0.06829301', '-0.07419086', '-0.23871253', '0.27309087', '-0.18607365', '-0.06931890', '-0.29365629', '0.00250192', '0.08143978', '0.40333349', '0.52750504', '-0.18076964', '0.04289032', '0.16173831', '-0.22021942', '0.64566290', '-0.31809971', '-0.31761047', '-0.38931435', '-0.43065882', '-0.02252252', '0.16317327', '0.26548216', '0.09200200', '0.04588696', '-0.36349797', '-0.22141543', '0.30256146', '0.53977370', '0.41167527', '-0.20137097', '-0.62446582', '0.50166786', '-0.08408753', '-0.16419572', '0.05468309', '-0.11589842', '-0.08338299', '0.02960177', '-0.09213796', '0.03674682', '-0.14122994', '0.37

In [76]:
import numpy as np
import warnings
# create co training classifier
class Co_training_clf(object):
    
    import copy
    
    def __init__(self, clf1, clf2=None, p=1, n=1, k=30, u = 75):
        
        self.clf1 = clf1
        # assume co_training on one classifier
        if clf2 == None:
            self.clf2 = self.copy.copy(clf1)
        else:
            self.clf2 = clf2
        # take p example from most confidently positive labels to example
        self.p = p
        # take n example from most confidently negative label to example
        self.n = n
        # number of iteration
        self.k = k
        # size of pool of unlabeled samples
        self.u = u
        
    def label_p_n_samples(self, rank):
        p, n = [], []
        for label, conf_measure in enumerate(rank):
            # 0 positive sample
            if label==0:
                index = 0
                while(len(p) < self.p):
                    p.append(conf_measure[index])
                    index +=1
            # 1 negative sample
            elif label == 1:
                index = 0
                while(len(n) < self.n):
                    n.append(conf_measure[index])
                    index +=1
            else:
                print("Class label error")
        return p, n
        
    def fit(self, dataView1, dataView2, labels):
        
        labels = np.asarray(labels)
        print("P: ", self.p, " N: ", self.n)
        assert(self.p > 0 and self.n > 0 and self.k > 0 and self.u > 0)
        
        # index of the samples that are initially labeled
        L = [i for i, label_i in enumerate(labels) if label_i != -1]
        # index of unlabeled samples
        U = [i for i, label_i in enumerate(labels) if label_i == -1]
        print("Initial L size: ", len(L))
        print("Initial U size: ", len(U))
        # random drawing sample from U
        random.shuffle(U)
        U_prime = U[-min(len(U), self.u):]
        # remove the samples in U_prime from U
        U = U[:-len(U_prime)]
        iterCount = 0
        #loop until we have assigned labels to every sample in U and U_prime or we hit our iteration break condition
        while iterCount < self.k and U_prime:
            iterCount +=1
#             print("step",iterCount, " L: ",L)
#             print("step",iterCount, " U_prime: ",U_prime)
            iter_train_d1 = dataView1.iloc[L]
            iter_train_d2= dataView2.iloc[L]
            iter_train_label = labels[L]
#             print(iter_train_label.shape)
            self.clf1.fit(iter_train_d1, iter_train_label.ravel())
            self.clf2.fit(iter_train_d2, iter_train_label.ravel())
            
            iter_labeling_d1 = dataView1.iloc[U_prime]
            iter_labeling_d2 = dataView2.iloc[U_prime]
            # rank class probabilities for unlabeled sample for it's confidence measure
            dv1_proba = self.clf1.predict_proba(iter_labeling_d1)
            dv2_proba = self.clf1.predict_proba(iter_labeling_d2)
            # make prediction on data
#             y1 = self.clf1.predict(iter_labeling_d1)
#             y2 = self.clf2.predict(iter_labeling_d2)
#             print("dataviewone prediction on unlabeled: ",y1)
#             print("dataviewtwo prediction on unlabeled: ",y2)
            dv1_proba_rank = []
            dv2_proba_rank = []
            # proba1_rank[i] is label i's confidence measure
            for class_proba in dv1_proba.T:
                dv1_proba_rank.append((-class_proba).argsort())
            for class_proba in dv2_proba.T:
                dv2_proba_rank.append((-class_proba).argsort())
#             print(dv1_proba)
#             print(dv1_proba_rank)
#             print(dv2_proba)
#             print(dv2_proba_rank)
            # h1 classifier
            p1,n1 = self.label_p_n_samples(dv1_proba_rank)
            # h2 classifier
            p2,n2 = self.label_p_n_samples(dv2_proba_rank)
            finalP = set(p1+p2)
            finalN = set(n1+n2)
#             print("P: ", finalP, " N: ", finalN)
            # auto label the samples and remove it from U_prime
            auto_labeled_pos = [U_prime[x] for x in finalP]
            auto_labeled_neg = [U_prime[x] for x in finalN]
            auto_labeled_samples = auto_labeled_pos+auto_labeled_neg
            labels[auto_labeled_pos] = 0
            labels[auto_labeled_neg] = 1
            # extend the labeled sample
            L.extend(auto_labeled_pos)
            L.extend(auto_labeled_neg)
            # remove the labeled sample from U_prime
            U_prime = [x for x in U_prime if x not in auto_labeled_samples]
            #print(U_prime)
            # randomly choice 2p+2n examples from u to replenish u_prime
            replenishItem = U[-(2*self.p+2*self.n):]
            U_prime.extend(replenishItem)
            U = U[:-len(replenishItem)]
        print("Total Labeled number: ", len(L), " Still unlabeled number: ", len(U_prime))
        # final train
        newtrain_d1 = dataView1.iloc[L]
        newtrain_d2 = dataView2.iloc[L]
        self.clf1.fit(newtrain_d1, labels[L])
        self.clf2.fit(newtrain_d2, labels[L])
    
    def supports_proba(self, clf, x):
        try:
            clf.predict_proba([x])
            return True
        except:
            return False
        
    def predict(self, dataView1, dataView2):
        y1 = self.clf1.predict(dataView1)
        y2 = self.clf2.predict(dataView2)
        proba_supported = self.supports_proba(self.clf1, dataView1.iloc[0]) and self.supports_proba(self.clf2, dataView2.iloc[0])
        #fill pred with -1 so we can identify the samples in which sample classifiers failed to agree
        y_pred = np.asarray([-1] * dataView1.shape[0])
        for i, (y1_i, y2_i) in enumerate(zip(y1, y2)):
            # if both agree on label
            if y1_i == y2_i:
                y_pred[i] = y1_i
            # if disagree on label, times probability together, choice the class have higher probabilities
            elif proba_supported:
                y1_probas = self.clf1.predict_proba([dataView1.iloc[i]])[0]
                y2_probas = self.clf2.predict_proba([dataView2.iloc[i]])[0]
                print("y1 disagree on",i, " Proba: ",y1_probas)
                print("y2 not aggreed on ",i, "Proba: ", y2_probas)
                prod_y_probas = [proba_y1 + proba_y2 for (proba_y1, proba_y2) in zip(y1_probas, y2_probas)]
                print("product probas:",prod_y_probas)
                y_pred[i] = prod_y_probas.index(max(prod_y_probas))
                print("result",y_pred[i])
            else:
                #the classifiers disagree and don't support probability, so we guess
                warnings.warn("classifiers disagree with label, result may not accurate")
                print("sample at: ", i, " c1: ", y1_i, " c2: ", y2_i)
                y_pred[i] = random.randint(0, 1)
        #check if predict works
        assert not (-1 in y_pred)
        return y_pred
    
    def predict_proba(self, dataView1, dataView2):
        # the predicted probabilities is simply a average of probabilities given from each classifier trained
        proba = np.full((dataView1.shape[0], 2), -1)
        y1_probas = self.clf1.predict_proba(dataView1)
        y2_probas = self.clf2.predict_proba(dataView2)
        
        for i, (y1_i, y2_i) in enumerate(zip(y1_probas, y2_probas)):
            proba[i][0] = (y1_i[0] + y2_i[0]) / 2
            proba[i][1] = (y1_i[1] + y2_i[1]) / 2
        
        return y_proba

In [77]:
import collections
from sklearn import linear_model
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score

# loop through all files in directory add name to name list
fileDir = "../Data/"+Dataset+"/canopies/"
listfiles = os.listdir(fileDir)

# fix random seed for reproducibility
np.random.seed(1)

# collect statistic to output
allname = []
positive_sample_size = []
negative_sample_size = []

all_co_train_accuracy = []
all_co_train_f1 = []

# read all file in name group
for file in listfiles:
    # group name
    temp = file.split("_")
    name = temp[1]+"_"+temp[-1]
    print("For name: ",name)
    # read pid and aid from file
    data = read_file(fileDir+file)
    labeled_mask = data["authorID"] != "-1"
    labeled_data = data[labeled_mask]
    unlabeled_mask = data["authorID"] == "-1"
    ublabeled_data = data[unlabeled_mask]
    unlabeled_pid = ublabeled_data["paperID"].tolist()
    print(labeled_data.shape)
    # collect all labeled sample
    all_labeled_sample = labeled_data["paperID"].tolist()
    print("total sample size before apply threshold: ",len(labeled_data))
    # count number of paper each author write based on author ID
    paperCounter = collections.Counter(labeled_data["authorID"])
    print(paperCounter)
    # collect per class statistic
    for k in list(paperCounter):
        if paperCounter[k] < threshold:
            del paperCounter[k]
    temp =list(paperCounter.keys())
    print(temp)
    # remove authors that write smaller than threshold number of authors
    temp = labeled_data[labeled_data.authorID.isin(temp)]
    author_list = set(temp["authorID"])
    # if only have one class or no class pass the threshold, not applicable
    if(len(paperCounter)==0):
        print(name," pass")
    else:
        # obtain test set
        
        counter = 0
        # loop through each author
        for author in author_list:
            author_name = name+'_'+str(counter)
            allname.append(author_name)
            print(author_name)
            mask = labeled_data["authorID"] == author
            temp = labeled_data[mask]
            positive_sample_pid = temp["paperID"].tolist()
            negative_sample_pid = extractNegativeSample(positive_sample_pid, all_labeled_sample)
            all_authors = []
            all_authors.append(positive_sample_pid)
            all_authors.append(negative_sample_pid)
            
            # read in labeled data
            labeled_d1 = extractVectors(all_authors, v1_all_features)
            labeled_d2 = extractVectors(all_authors, v2_all_features)
            print("Labeled: ",len(labeled_d1), " : ", len(labeled_d2))
            # read in unlabeled data
            unlabeled_d1 = extractUnlabeledVectors(unlabeled_pid,v1_all_features)
            unlabeled_d2 = extractUnlabeledVectors(unlabeled_pid,v2_all_features)
            print("Unlabeled: ",len(unlabeled_d1), " : ", len(unlabeled_d2))
            # synchronize different view based on pid
            sorted_dv1, sorted_dv2, unlabeled_dv1, unlabeled_dv2= synchro_views(labeled_d1, labeled_d2,
                                                                                unlabeled_d1, unlabeled_d2)
            print(sorted_dv1.shape)
            print(sorted_dv2.shape)
            print(unlabeled_dv1.shape)
            print(unlabeled_dv2.shape)
            
            # find  number of positive and negative samples
            num_of_sample = sorted_dv1['label'].value_counts()
            positive_sample_size.append(num_of_sample[0])
            negative_sample_size.append(num_of_sample[1])
            # select 20% of sample as test data in labeled data
            labeled_index = sorted_dv1.index
            # extract test data
            test_data_v1 = sorted_dv1.sample(frac=0.2)
            test_index = test_data_v1.index
            test_data_v2 = sorted_dv2.iloc[test_index]
            # form train data
            train_index = np.setdiff1d(labeled_index, test_index)
            labeled_dv1 = sorted_dv1.iloc[train_index]
            labeled_dv2 = sorted_dv2.iloc[train_index]
            # form test data
            test_pid = test_data_v1[[0]].reset_index(drop=True)
            test_label = test_data_v1[["label"]].reset_index(drop=True)
            testdatav1 = test_data_v1.drop([0, "label"], axis=1).reset_index(drop=True)
            testdatav2 = test_data_v2.drop([0, "label"], axis=1).reset_index(drop=True)
            print(testdatav1.shape)
            # add ublabeled data to labeled to form final train set
            final_dv1 = pd.concat([labeled_dv1,unlabeled_dv1], ignore_index=True)
            final_dv2 = pd.concat([labeled_dv2,unlabeled_dv2], ignore_index=True)
#             print(final_dv1[30:40][0])
#             print(final_dv2[30:40][0])
            label = final_dv1[["label"]]
            pid = final_dv1[[0]]
            final_dv1.drop([0, "label"], axis=1, inplace = True)
            final_dv2.drop([0, "label"], axis=1, inplace = True)
            print(final_dv1.shape)
            print(final_dv2.shape)
            # co-training
            clf = Co_training_clf(clf1=linear_model.LogisticRegression(),p=1,n=1)
            clf.fit(final_dv1,final_dv2,label)
            pred_label = clf.predict(testdatav1,testdatav2)
            co_accuracy = accuracy_score(test_label, pred_label)
            co_f1 = f1_score(test_label, pred_label,average='macro')
            print("F1: ",co_f1)
            print(pred_label)
            print(test_label.iloc[:, 0].tolist())
            all_co_train_accuracy.append(co_accuracy)
            all_co_train_f1.append(co_f1)
            counter+=1
# write evaluation result to excel
output = pd.DataFrame({'Author Name':allname, "positive sample size":positive_sample_size,"negative sample size":negative_sample_size, 
                       "co_logisticRegression Accuracy":all_co_train_accuracy, "co_logisticRegression F1": all_co_train_f1})

savePath = "../result/"+Dataset+"/binary_co_train/"
if not os.path.exists(savePath):
    os.makedirs(savePath)
filename = "co_train_textual="+pp_textual_emb_type+"_citation="+citation_emb_type+"_threshold="+str(threshold)+".csv"
output.to_csv(savePath+filename, encoding='utf-8',index=False)

For name:  j_read
(136, 2)
total sample size before apply threshold:  136
Counter({'0000-0002-5159-1192': 57, '0000-0002-9029-5185': 39, '0000-0002-9697-0962': 31, '0000-0002-4739-9245': 3, '0000-0003-0605-5259': 3, '0000-0003-4316-7006': 1, '0000-0002-0784-0091': 1, '0000-0002-3888-6631': 1})
[]
j_read  pass
For name:  f_esteves
(34, 2)
total sample size before apply threshold:  34
Counter({'0000-0002-3046-1313': 18, '0000-0002-5403-0091': 12, '0000-0003-0589-0746': 3, '0000-0003-3172-6253': 1})
[]
f_esteves  pass
For name:  c_miller
(252, 2)
total sample size before apply threshold:  252
Counter({'0000-0003-4341-1283': 51, '0000-0002-3989-7973': 40, '0000-0002-3813-1706': 39, '0000-0003-2772-9531': 27, '0000-0001-6082-9273': 22, '0000-0002-2601-4422': 22, '0000-0002-9448-8144': 19, '0000-0001-8628-4902': 15, '0000-0002-2936-7717': 6, '0000-0003-3898-9734': 6, '0000-0002-5074-6914': 2, '0000-0003-4266-6700': 1, '0000-0002-9286-9787': 1, '0000-0002-0821-0892': 1})
[]
c_miller  pass
For

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


(625, 2)
total sample size before apply threshold:  625
Counter({'0000-0001-5188-7957': 141, '0000-0002-6063-7615': 82, '0000-0001-6665-6596': 79, '0000-0002-4688-3000': 66, '0000-0001-7152-765X': 51, '0000-0001-8251-4176': 28, '0000-0003-1235-5186': 26, '0000-0002-8883-7838': 25, '0000-0001-8331-3181': 20, '0000-0001-8377-5175': 15, '0000-0002-8861-0596': 14, '0000-0002-3804-2594': 14, '0000-0003-3815-0891': 14, '0000-0002-4497-4961': 10, '0000-0002-9801-9580': 9, '0000-0003-4400-5180': 5, '0000-0002-3500-914X': 5, '0000-0002-0195-6771': 4, '0000-0001-6105-0296': 3, '0000-0002-4681-3360': 3, '0000-0003-0161-0532': 3, '0000-0002-6511-1284': 3, '0000-0002-0195-5509': 2, '0000-0003-0500-1961': 2, '0000-0002-5355-3210': 1})
['0000-0001-5188-7957']
j_williams_0
Class  0  sample size:  141
Class  1  sample size:  484
(625, 102)
Class  0  sample size:  123
Class  1  sample size:  452
(575, 102)
Labeled:  625  :  575
Unlabeled:  13294  :  8063
labeled no citation link:  50
Unlabeled no citati

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[]
e_andrade  pass
For name:  t_santos
(45, 2)
total sample size before apply threshold:  45
Counter({'0000-0002-5365-4863': 18, '0000-0003-3765-5863': 14, '0000-0001-9072-5010': 3, '0000-0001-9947-6022': 2, '0000-0002-7694-306X': 2, '0000-0003-4171-5806': 1, '0000-0002-9744-0410': 1, '0000-0002-5325-3090': 1, '0000-0003-4620-0174': 1, '0000-0002-5738-4995': 1, '0000-0001-6892-0354': 1})
[]
t_santos  pass
For name:  k_kim
(1111, 2)
total sample size before apply threshold:  1111
Counter({'0000-0002-6929-5359': 211, '0000-0001-9498-284X': 154, '0000-0002-5878-8895': 139, '0000-0002-1864-3392': 92, '0000-0002-7045-8004': 57, '0000-0001-7896-6751': 57, '0000-0002-7991-9428': 55, '0000-0002-4010-1063': 45, '0000-0002-2186-3484': 28, '0000-0002-4899-1929': 25, '0000-0003-0487-4242': 24, '0000-0002-3642-1486': 22, '0000-0001-9965-3535': 17, '0000-0002-4168-757X': 17, '0000-0001-6525-3744': 14, '0000-0002-3897-0278': 14, '0000-0002-1181-5112': 12, '0000-0003-1447-9385': 11, '0000-0002-7305-87

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


product probas: [0.3168786688685369, 0.017789417092548443]
result 0
y1 disagree on 179  Proba:  [0.5582922 0.4417078]
y2 not aggreed on  179 Proba:  [0.21448018 0.78551982]
product probas: [0.11974261257962852, 0.3469702288004902]
result 1
y1 disagree on 188  Proba:  [0.29036784 0.70963216]
y2 not aggreed on  188 Proba:  [0.74463144 0.25536856]
product probas: [0.21621701985604314, 0.18121774520749742]
result 0
y1 disagree on 200  Proba:  [0.41430609 0.58569391]
y2 not aggreed on  200 Proba:  [0.6206296 0.3793704]
product probas: [0.257130620799065, 0.2221949358628141]
result 0
F1:  0.9900761483664946
[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 0 1
 0 1 0 1 1 1 0 1 1 1 1 0 1 1 1 0 1 1 1 1 1 1 0 1 0 1 1 1 1 1 0 1 1 1 1 1 1
 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 0 1 1 1 1 1 1 0 1 1 0 1 1 1 1 1 0 1 1 1 1 0 0 0 1 1 1 1 0 0 1 0 1 1 0 1
 1 1 1 0 1 1 1 0 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 0 1 1 1 1

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


y1 disagree on 17  Proba:  [0.90501371 0.09498629]
y2 not aggreed on  17 Proba:  [0.3620887 0.6379113]
product probas: [0.3276952363034419, 0.060592826701928046]
result 0
y1 disagree on 23  Proba:  [0.94156883 0.05843117]
y2 not aggreed on  23 Proba:  [0.24468458 0.75531542]
product probas: [0.2303873767358224, 0.04413396722168903]
result 0
y1 disagree on 33  Proba:  [0.5606668 0.4393332]
y2 not aggreed on  33 Proba:  [0.29071052 0.70928948]
product probas: [0.1629917376521022, 0.31161441515702437]
result 1
y1 disagree on 41  Proba:  [0.74634198 0.25365802]
y2 not aggreed on  41 Proba:  [0.42648116 0.57351884]
product probas: [0.3183007971180981, 0.14547765136704482]
result 0
y1 disagree on 47  Proba:  [0.78931073 0.21068927]
y2 not aggreed on  47 Proba:  [0.17602721 0.82397279]
product probas: [0.13894016525140415, 0.1736022232595305]
result 1
y1 disagree on 67  Proba:  [0.33992875 0.66007125]
y2 not aggreed on  67 Proba:  [0.5812895 0.4187105]
product probas: [0.1975970156483167, 0.2

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


product probas: [0.06736586572505417, 0.32470445071137327]
result 1
y1 disagree on 116  Proba:  [0.77485082 0.22514918]
y2 not aggreed on  116 Proba:  [0.01987677 0.98012323]
product probas: [0.01540153116259769, 0.2206739460808624]
result 1
y1 disagree on 119  Proba:  [0.59359278 0.40640722]
y2 not aggreed on  119 Proba:  [0.37310039 0.62689961]
product probas: [0.2214696974157391, 0.2547765269525844]
result 1
y1 disagree on 138  Proba:  [0.66823261 0.33176739]
y2 not aggreed on  138 Proba:  [0.00737034 0.99262966]
product probas: [0.004925100776008276, 0.32932215026154144]
result 1
y1 disagree on 139  Proba:  [0.04308011 0.95691989]
y2 not aggreed on  139 Proba:  [0.82326266 0.17673734]
product probas: [0.03546624730499632, 0.16912347981213682]
result 1
y1 disagree on 158  Proba:  [0.22874578 0.77125422]
y2 not aggreed on  158 Proba:  [0.5975559 0.4024441]
product probas: [0.13668838832001798, 0.31038671198705786]
result 1
y1 disagree on 161  Proba:  [0.23945176 0.76054824]
y2 not ag

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


(47, 2)
total sample size before apply threshold:  47
Counter({'0000-0001-8486-1219': 15, '0000-0002-5930-6456': 10, '0000-0003-1769-2587': 8, '0000-0002-1199-2856': 7, '0000-0003-2162-9652': 3, '0000-0003-0302-2523': 2, '0000-0001-5190-4219': 1, '0000-0003-2806-1262': 1})
[]
k_harris  pass
For name:  r_daniel
(173, 2)
total sample size before apply threshold:  173
Counter({'0000-0002-8646-7925': 123, '0000-0002-6483-5897': 37, '0000-0001-8835-8047': 8, '0000-0002-1753-6683': 5})
['0000-0002-8646-7925']
r_daniel_0
Class  0  sample size:  123
Class  1  sample size:  50
(173, 102)
Class  0  sample size:  120
Class  1  sample size:  44
(164, 102)
Labeled:  173  :  164
Unlabeled:  1061  :  664
labeled no citation link:  9
Unlabeled no citation link size:  397
(164, 102)
(164, 102)
(664, 102)
(664, 102)
(33, 100)
(795, 100)
(795, 100)
P:  1  N:  1
Initial L size:  131
Initial U size:  664
Total Labeled number:  248  Still unlabeled number:  80
y1 disagree on 1  Proba:  [0.3544024 0.6455976]

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


(262, 2)
total sample size before apply threshold:  262
Counter({'0000-0003-1984-1400': 61, '0000-0001-7621-6463': 32, '0000-0002-6570-8582': 27, '0000-0003-1751-0421': 25, '0000-0002-3840-2473': 20, '0000-0002-5549-9166': 19, '0000-0001-7214-5901': 18, '0000-0003-0075-3304': 13, '0000-0003-3874-3216': 11, '0000-0002-7129-7548': 10, '0000-0002-4914-5062': 9, '0000-0002-3632-4977': 8, '0000-0001-8915-8995': 3, '0000-0003-3423-5134': 2, '0000-0002-2344-4179': 1, '0000-0001-7857-8724': 1, '0000-0003-4213-7454': 1, '0000-0002-9245-731X': 1})
[]
a_patel  pass
For name:  r_graham
(52, 2)
total sample size before apply threshold:  52
Counter({'0000-0002-8686-4867': 41, '0000-0002-5530-8120': 9, '0000-0003-3082-8784': 1, '0000-0003-0103-2971': 1})
[]
r_graham  pass
For name:  a_nilsson
(42, 2)
total sample size before apply threshold:  42
Counter({'0000-0001-5885-7101': 29, '0000-0002-5609-4988': 5, '0000-0002-1217-2163': 4, '0000-0002-9476-4516': 2, '0000-0001-5774-7189': 1, '0000-0003-1968-8

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Class  0  sample size:  107
Class  1  sample size:  302
(409, 102)
Class  0  sample size:  99
Class  1  sample size:  289
(388, 102)
Labeled:  409  :  388
Unlabeled:  3395  :  2246
labeled no citation link:  21
Unlabeled no citation link size:  1149
(388, 102)
(388, 102)
(2246, 102)
(2246, 102)
(78, 100)
(2556, 100)
(2556, 100)
P:  1  N:  1
Initial L size:  310
Initial U size:  2246
Total Labeled number:  427  Still unlabeled number:  81
y1 disagree on 1  Proba:  [0.32767092 0.67232908]
y2 not aggreed on  1 Proba:  [0.96216317 0.03783683]
product probas: [0.315272893012922, 0.025438801854622512]
result 0
y1 disagree on 11  Proba:  [0.98204972 0.01795028]
y2 not aggreed on  11 Proba:  [0.31550315 0.68449685]
product probas: [0.30983977776555544, 0.01228690770924705]
result 0
y1 disagree on 36  Proba:  [0.18748789 0.81251211]
y2 not aggreed on  36 Proba:  [0.8064613 0.1935387]
product probas: [0.1512017246305826, 0.15725254056154472]
result 1
F1:  1.0
[1 0 1 1 1 1 1 0 1 1 0 0 1 1 1 1 1 0

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Class  0  sample size:  101
Class  1  sample size:  33
(134, 102)
Class  0  sample size:  88
Class  1  sample size:  31
(119, 102)
Labeled:  134  :  119
Unlabeled:  362  :  218
labeled no citation link:  15
Unlabeled no citation link size:  144
(119, 102)
(119, 102)
(218, 102)
(218, 102)
(24, 100)
(313, 100)
(313, 100)
P:  1  N:  1
Initial L size:  95
Initial U size:  218
Total Labeled number:  209  Still unlabeled number:  82
y1 disagree on 8  Proba:  [0.08283106 0.91716894]
y2 not aggreed on  8 Proba:  [0.5222244 0.4777756]
product probas: [0.04325640003270272, 0.4382009449820545]
result 1
F1:  1.0
[1 1 0 0 0 0 0 0 1 1 0 0 0 0 1 0 1 0 1 1 0 1 0 1]
[1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1]
For name:  h_song
(210, 2)
total sample size before apply threshold:  210
Counter({'0000-0001-5684-4059': 88, '0000-0001-5553-2539': 30, '0000-0002-3134-782X': 29, '0000-0003-3845-8079': 20, '0000-0002-7844-2293': 14, '0000-0001-5486-2560': 8, '0000-0002-8720-6436': 6,

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Counter({'0000-0002-5188-7030': 128, '0000-0001-5535-2714': 10, '0000-0001-5697-6086': 6, '0000-0003-1485-3154': 1, '0000-0003-3350-8719': 1})
['0000-0002-5188-7030']
m_pan_0
Class  0  sample size:  128
Class  1  sample size:  18
(146, 102)
Class  0  sample size:  127
Class  1  sample size:  16
(143, 102)
Labeled:  146  :  143
Unlabeled:  1289  :  890
labeled no citation link:  3
Unlabeled no citation link size:  399
(143, 102)
(143, 102)
(890, 102)
(890, 102)
(29, 100)
(1004, 100)
(1004, 100)
P:  1  N:  1
Initial L size:  114
Initial U size:  890
Total Labeled number:  228  Still unlabeled number:  82
y1 disagree on 14  Proba:  [0.23336178 0.76663822]
y2 not aggreed on  14 Proba:  [0.55714521 0.44285479]
product probas: [0.1300164013288289, 0.33950940295078064]
result 1
F1:  0.7314814814814814
[0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0]
For name:  a_simon
(117, 2)
total sample size 

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Class  0  sample size:  149
Class  1  sample size:  135
(284, 102)
Class  0  sample size:  140
Class  1  sample size:  129
(269, 102)
Labeled:  284  :  269
Unlabeled:  810  :  656
labeled no citation link:  15
Unlabeled no citation link size:  154
(269, 102)
(269, 102)
(656, 102)
(656, 102)
(54, 100)
(871, 100)
(871, 100)
P:  1  N:  1
Initial L size:  215
Initial U size:  656
Total Labeled number:  333  Still unlabeled number:  79
y1 disagree on 13  Proba:  [0.9627018 0.0372982]
y2 not aggreed on  13 Proba:  [0.47336297 0.52663703]
product probas: [0.45570738264873684, 0.0196426122914469]
result 0
y1 disagree on 20  Proba:  [0.25549747 0.74450253]
y2 not aggreed on  20 Proba:  [0.53301724 0.46698276]
product probas: [0.13618455506097232, 0.3476698455156281]
result 1
y1 disagree on 25  Proba:  [0.54673037 0.45326963]
y2 not aggreed on  25 Proba:  [0.13659603 0.86340397]
product probas: [0.07468119576470376, 0.39135479864238437]
result 1
y1 disagree on 49  Proba:  [0.2489913 0.7510087]
y

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


(182, 2)
total sample size before apply threshold:  182
Counter({'0000-0002-1739-3299': 72, '0000-0001-8756-229X': 66, '0000-0002-3323-2815': 30, '0000-0002-3263-4847': 8, '0000-0002-9323-9437': 6})
[]
n_young  pass
For name:  d_ross
(25, 2)
total sample size before apply threshold:  25
Counter({'0000-0002-8272-1877': 8, '0000-0002-8659-3833': 7, '0000-0001-7426-9561': 7, '0000-0002-5480-9978': 2, '0000-0001-6353-6951': 1})
[]
d_ross  pass
For name:  q_wang
(348, 2)
total sample size before apply threshold:  348
Counter({'0000-0002-2149-384X': 85, '0000-0001-7929-7692': 54, '0000-0001-9409-0251': 31, '0000-0002-7982-7275': 22, '0000-0001-5988-1293': 18, '0000-0002-5125-3724': 16, '0000-0002-6514-3470': 15, '0000-0002-1355-1616': 12, '0000-0001-7309-9580': 12, '0000-0002-2359-3262': 11, '0000-0002-0645-6514': 8, '0000-0002-9808-5035': 7, '0000-0002-4036-1818': 7, '0000-0001-7692-6721': 7, '0000-0001-8566-1120': 6, '0000-0002-6010-2178': 6, '0000-0002-9706-2421': 5, '0000-0003-2645-5807'

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Class  0  sample size:  146
Class  1  sample size:  19
(165, 102)
Class  0  sample size:  136
Class  1  sample size:  18
(154, 102)
Labeled:  165  :  154
Unlabeled:  1938  :  1485
labeled no citation link:  11
Unlabeled no citation link size:  453
(154, 102)
(154, 102)
(1485, 102)
(1485, 102)
(31, 100)
(1608, 100)
(1608, 100)
P:  1  N:  1
Initial L size:  123
Initial U size:  1485
Total Labeled number:  239  Still unlabeled number:  79
y1 disagree on 14  Proba:  [0.25784297 0.74215703]
y2 not aggreed on  14 Proba:  [0.81682089 0.18317911]
product probas: [0.21061152348118709, 0.1359476657824284]
result 0
y1 disagree on 20  Proba:  [0.55968183 0.44031817]
y2 not aggreed on  20 Proba:  [0.20028602 0.79971398]
product probas: [0.1120964483965968, 0.35212859508668976]
result 1
y1 disagree on 25  Proba:  [0.12612895 0.87387105]
y2 not aggreed on  25 Proba:  [0.68016502 0.31983498]
product probas: [0.0857884981677292, 0.2794945283333027]
result 1
F1:  0.8154761904761905
[0 0 0 0 0 0 0 0 0 1 

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)



['0000-0002-9723-4924']
e_thompson_0
Class  0  sample size:  163
Class  1  sample size:  18
(181, 102)
Class  0  sample size:  157
Class  1  sample size:  18
(175, 102)
Labeled:  181  :  175
Unlabeled:  2567  :  1606
labeled no citation link:  6
Unlabeled no citation link size:  961
(175, 102)
(175, 102)
(1606, 102)
(1606, 102)
(35, 100)
(1746, 100)
(1746, 100)
P:  1  N:  1
Initial L size:  140
Initial U size:  1606
Total Labeled number:  253  Still unlabeled number:  82
y1 disagree on 13  Proba:  [0.82965283 0.17034717]
y2 not aggreed on  13 Proba:  [0.29475541 0.70524459]
product probas: [0.24454466451910062, 0.12013641757250501]
result 0
y1 disagree on 16  Proba:  [0.6018992 0.3981008]
y2 not aggreed on  16 Proba:  [0.0493561 0.9506439]
product probas: [0.02970739830143129, 0.37845209413352565]
result 1
F1:  0.8833333333333333
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0]
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Class  0  sample size:  124
Class  1  sample size:  80
(204, 102)
Class  0  sample size:  122
Class  1  sample size:  79
(201, 102)
Labeled:  204  :  201
Unlabeled:  3330  :  2191
labeled no citation link:  3
Unlabeled no citation link size:  1139
(201, 102)
(201, 102)
(2191, 102)
(2191, 102)
(40, 100)
(2352, 100)
(2352, 100)
P:  1  N:  1
Initial L size:  161
Initial U size:  2191
Total Labeled number:  275  Still unlabeled number:  81
y1 disagree on 14  Proba:  [0.06867431 0.93132569]
y2 not aggreed on  14 Proba:  [0.54095128 0.45904872]
product probas: [0.037149457748626755, 0.4275238672886447]
result 1
y1 disagree on 20  Proba:  [0.39759054 0.60240946]
y2 not aggreed on  20 Proba:  [0.99838551 0.00161449]
product probas: [0.3969486353036263, 0.00097258499891405]
result 0
y1 disagree on 33  Proba:  [0.63329138 0.36670862]
y2 not aggreed on  33 Proba:  [0.19287123 0.80712877]
product probas: [0.12214368937319796, 0.2959810762650393]
result 1
F1:  0.9736668861092824
[0 0 0 0 1 1 1 0 1 

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Class  0  sample size:  122
Class  1  sample size:  55
(177, 102)
Class  0  sample size:  108
Class  1  sample size:  55
(163, 102)
Labeled:  177  :  163
Unlabeled:  3378  :  2183
labeled no citation link:  14
Unlabeled no citation link size:  1195
(163, 102)
(163, 102)
(2183, 102)
(2183, 102)
(33, 100)
(2313, 100)
(2313, 100)
P:  1  N:  1
Initial L size:  130
Initial U size:  2183
Total Labeled number:  247  Still unlabeled number:  80
y1 disagree on 0  Proba:  [0.98586574 0.01413426]
y2 not aggreed on  0 Proba:  [0.47496558 0.52503442]
product probas: [0.46825229342591684, 0.007420973786564536]
result 0
y1 disagree on 8  Proba:  [0.71621187 0.28378813]
y2 not aggreed on  8 Proba:  [0.44828479 0.55171521]
product probas: [0.3210668895743047, 0.1565702279080296]
result 0
F1:  0.9318181818181819
[0 0 1 0 1 0 1 0 0 0 0 0 0 0 0 1 1 1 1 0 0 1 0 0 0 1 0 1 0 1 0 0 0]
[0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0]
For name:  m_soares
(247, 

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


(43, 2)
total sample size before apply threshold:  43
Counter({'0000-0003-3131-9906': 15, '0000-0003-0931-5474': 11, '0000-0002-7511-0488': 11, '0000-0002-0118-1056': 3, '0000-0002-1508-2614': 2, '0000-0002-8191-3738': 1})
[]
c_morgan  pass
For name:  h_cui
(40, 2)
total sample size before apply threshold:  40
Counter({'0000-0001-6394-4808': 11, '0000-0003-3358-8958': 10, '0000-0002-9870-748X': 9, '0000-0002-6343-1014': 9, '0000-0002-8627-8534': 1})
[]
h_cui  pass
For name:  p_zhang
(137, 2)
total sample size before apply threshold:  137
Counter({'0000-0002-1765-5965': 26, '0000-0003-3603-0175': 25, '0000-0003-2228-3569': 20, '0000-0002-2774-5534': 17, '0000-0002-5409-7480': 16, '0000-0001-5574-0899': 8, '0000-0002-6218-1885': 6, '0000-0002-1806-4200': 5, '0000-0001-9539-1136': 5, '0000-0003-0606-6855': 3, '0000-0001-6953-800X': 3, '0000-0002-8462-0340': 1, '0000-0001-7331-6020': 1, '0000-0003-3344-4823': 1})
[]
p_zhang  pass
For name:  j_fernandes
(208, 2)
total sample size before app

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


(152, 2)
total sample size before apply threshold:  152
Counter({'0000-0002-9549-8504': 68, '0000-0002-3856-5010': 53, '0000-0001-5924-8615': 18, '0000-0002-1023-4260': 9, '0000-0003-4343-8937': 4})
[]
m_rizzo  pass
For name:  y_shi
(67, 2)
total sample size before apply threshold:  67
Counter({'0000-0001-6933-4971': 17, '0000-0003-4530-2056': 10, '0000-0003-2943-5465': 7, '0000-0001-6029-6526': 5, '0000-0001-7421-3306': 5, '0000-0001-7713-0813': 4, '0000-0003-4273-8663': 3, '0000-0001-9406-7967': 3, '0000-0003-1804-6990': 3, '0000-0002-6715-7681': 2, '0000-0002-7887-3050': 2, '0000-0001-7256-3628': 2, '0000-0001-6085-7880': 1, '0000-0003-0965-5751': 1, '0000-0001-7502-9201': 1, '0000-0002-3284-4449': 1})
[]
y_shi  pass
For name:  c_luo
(78, 2)
total sample size before apply threshold:  78
Counter({'0000-0003-0524-5886': 36, '0000-0002-6453-7435': 18, '0000-0003-2193-3670': 15, '0000-0002-3477-5969': 5, '0000-0001-5876-5266': 1, '0000-0002-0879-3127': 1, '0000-0003-1152-0557': 1, '0000

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)



Class  0  sample size:  144
Class  1  sample size:  45
(189, 102)
Class  0  sample size:  142
Class  1  sample size:  44
(186, 102)
Labeled:  189  :  186
Unlabeled:  3749  :  2510
labeled no citation link:  3
Unlabeled no citation link size:  1239
(186, 102)
(186, 102)
(2510, 102)
(2510, 102)
(37, 100)
(2659, 100)
(2659, 100)
P:  1  N:  1
Initial L size:  149
Initial U size:  2510
Total Labeled number:  262  Still unlabeled number:  82
y1 disagree on 4  Proba:  [0.98948871 0.01051129]
y2 not aggreed on  4 Proba:  [0.38183988 0.61816012]
product probas: [0.3778262461967475, 0.006497662499920434]
result 0
F1:  1.0
[0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 1 0 1 0 1 0 0 0 0 0 0 0 0]
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0]
For name:  w_jung
(33, 2)
total sample size before apply threshold:  33
Counter({'0000-0002-8697-9584': 17, '0000-0002-6853-2885': 8, '0000-0001-5266-3795': 4, '0000-0001-9590-3859': 2, '0000

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Class  0  sample size:  112
Class  1  sample size:  194
(306, 102)
Class  0  sample size:  107
Class  1  sample size:  186
(293, 102)
Labeled:  306  :  293
Unlabeled:  4226  :  2785
labeled no citation link:  13
Unlabeled no citation link size:  1441
(293, 102)
(293, 102)
(2785, 102)
(2785, 102)
(59, 100)
(3019, 100)
(3019, 100)
P:  1  N:  1
Initial L size:  234
Initial U size:  2785
Total Labeled number:  349  Still unlabeled number:  81
y1 disagree on 22  Proba:  [0.53219984 0.46780016]
y2 not aggreed on  22 Proba:  [0.11049224 0.88950776]
product probas: [0.05880395202956827, 0.4161118707658313]
result 1
y1 disagree on 46  Proba:  [0.55143796 0.44856204]
y2 not aggreed on  46 Proba:  [0.1633339 0.8366661]
product probas: [0.09006851398100729, 0.37529665311147964]
result 1
F1:  0.9803136469803136
[0 1 1 1 0 1 1 1 1 1 1 1 1 1 1 0 0 0 1 0 1 0 1 1 1 1 1 0 1 1 1 1 1 1 1 1 0
 0 0 0 1 1 0 0 0 0 1 1 1 1 0 1 1 1 1 1 1 0 0]
[0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1,

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Class  0  sample size:  177
Class  1  sample size:  8
(185, 102)
Class  0  sample size:  174
Class  1  sample size:  8
(182, 102)
Labeled:  185  :  182
Unlabeled:  136  :  91
labeled no citation link:  3
Unlabeled no citation link size:  45
(182, 102)
(182, 102)
(91, 102)
(91, 102)
(36, 100)
(237, 100)
(237, 100)
P:  1  N:  1
Initial L size:  146
Initial U size:  91
Total Labeled number:  245  Still unlabeled number:  0
F1:  1.0
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0]
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0]
For name:  r_young
(361, 2)
total sample size before apply threshold:  361
Counter({'0000-0002-6806-6503': 117, '0000-0001-8001-2914': 87, '0000-0002-6380-6314': 70, '0000-0001-7003-3017': 38, '0000-0001-6073-9489': 24, '0000-0002-1062-5691': 10, '0000-0002-5719-2205': 9, '0000-0001-7485-0604': 6})
['0000-0002-6806-6503']
r_young_0


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Class  0  sample size:  117
Class  1  sample size:  244
(361, 102)
Class  0  sample size:  110
Class  1  sample size:  217
(327, 102)
Labeled:  361  :  327
Unlabeled:  4892  :  2981
labeled no citation link:  34
Unlabeled no citation link size:  1911
(327, 102)
(327, 102)
(2981, 102)
(2981, 102)
(65, 100)
(3243, 100)
(3243, 100)
P:  1  N:  1
Initial L size:  262
Initial U size:  2981
Total Labeled number:  378  Still unlabeled number:  79
y1 disagree on 18  Proba:  [0.27677568 0.72322432]
y2 not aggreed on  18 Proba:  [0.57853524 0.42146476]
product probas: [0.16012448418761355, 0.3048135656530388]
result 1
y1 disagree on 22  Proba:  [0.40493428 0.59506572]
y2 not aggreed on  22 Proba:  [0.80630011 0.19369989]
product probas: [0.3264985547487076, 0.11526416524604063]
result 0
F1:  1.0
[0 0 1 1 0 0 1 0 1 1 1 1 0 0 0 1 0 1 1 0 0 1 0 0 1 0 1 1 0 0 0 0 1 0 1 1 1
 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 0 1 1 1 1 1 0 0 1 1]
[0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Class  0  sample size:  124
Class  1  sample size:  81
(205, 102)
Class  0  sample size:  119
Class  1  sample size:  75
(194, 102)
Labeled:  205  :  194
Unlabeled:  1731  :  1103
labeled no citation link:  11
Unlabeled no citation link size:  628
(194, 102)
(194, 102)
(1103, 102)
(1103, 102)
(39, 100)
(1258, 100)
(1258, 100)
P:  1  N:  1
Initial L size:  155
Initial U size:  1103
Total Labeled number:  267  Still unlabeled number:  85
y1 disagree on 13  Proba:  [0.02489194 0.97510806]
y2 not aggreed on  13 Proba:  [0.50461073 0.49538927]
product probas: [0.012560740740182824, 0.48305806719605465]
result 1
y1 disagree on 32  Proba:  [0.06267034 0.93732966]
y2 not aggreed on  32 Proba:  [0.73891526 0.26108474]
product probas: [0.04630807422090342, 0.24472246833307956]
result 1
F1:  0.9458333333333333
[0 1 0 0 1 0 1 0 1 0 0 0 0 1 0 1 0 0 0 0 0 1 0 0 0 0 1 0 1 1 0 1 1 1 0 0 1
 0 0]
[0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1,

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Class  0  sample size:  232
Class  1  sample size:  48
(280, 102)
Class  0  sample size:  225
Class  1  sample size:  45
(270, 102)
Labeled:  280  :  270
Unlabeled:  2961  :  1937
labeled no citation link:  10
Unlabeled no citation link size:  1024
(270, 102)
(270, 102)
(1937, 102)
(1937, 102)
(54, 100)
(2153, 100)
(2153, 100)
P:  1  N:  1
Initial L size:  216
Initial U size:  1937
Total Labeled number:  333  Still unlabeled number:  78
y1 disagree on 3  Proba:  [0.52669777 0.47330223]
y2 not aggreed on  3 Proba:  [0.38927582 0.61072418]
product probas: [0.20503070671758478, 0.289057116322653]
result 1
y1 disagree on 12  Proba:  [0.60292377 0.39707623]
y2 not aggreed on  12 Proba:  [0.32532646 0.67467354]
product probas: [0.19614705845012326, 0.2678968229155849]
result 1
y1 disagree on 26  Proba:  [0.94456319 0.05543681]
y2 not aggreed on  26 Proba:  [0.44760399 0.55239601]
product probas: [0.42279025478356314, 0.03062307206311275]
result 0
y1 disagree on 39  Proba:  [0.73879478 0.2612

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


(137, 2)
total sample size before apply threshold:  137
Counter({'0000-0002-2195-1695': 47, '0000-0002-6689-7768': 19, '0000-0002-6406-7832': 17, '0000-0001-6643-3173': 9, '0000-0002-0763-9953': 8, '0000-0002-4479-6157': 8, '0000-0001-7429-4724': 5, '0000-0002-5578-4960': 4, '0000-0002-1887-0632': 4, '0000-0002-9834-3006': 3, '0000-0002-9945-3514': 3, '0000-0001-8488-0399': 2, '0000-0001-9106-0049': 1, '0000-0003-4549-6110': 1, '0000-0002-2341-7971': 1, '0000-0003-4420-6353': 1, '0000-0002-7963-6890': 1, '0000-0002-7962-6668': 1, '0000-0003-1355-0055': 1, '0000-0002-1563-8811': 1})
[]
y_xu  pass
For name:  a_melo
(48, 2)
total sample size before apply threshold:  48
Counter({'0000-0001-6455-7834': 26, '0000-0002-9153-0773': 11, '0000-0002-4606-7791': 7, '0000-0001-5682-2116': 4})
[]
a_melo  pass
For name:  r_doyle
(11, 2)
total sample size before apply threshold:  11
Counter({'0000-0001-6229-4700': 5, '0000-0001-5001-1945': 4, '0000-0003-1019-6783': 1, '0000-0002-4704-7178': 1})
[]
r_d

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Class  0  sample size:  110
Class  1  sample size:  69
(179, 102)
Class  0  sample size:  105
Class  1  sample size:  55
(160, 102)
Labeled:  179  :  160
Unlabeled:  1381  :  606
labeled no citation link:  19
Unlabeled no citation link size:  775
(160, 102)
(160, 102)
(606, 102)
(606, 102)
(32, 100)
(734, 100)
(734, 100)
P:  1  N:  1
Initial L size:  128
Initial U size:  606
Total Labeled number:  240  Still unlabeled number:  84
y1 disagree on 14  Proba:  [0.77160645 0.22839355]
y2 not aggreed on  14 Proba:  [0.47915526 0.52084474]
product probas: [0.36971929063136344, 0.11895757794075423]
result 0
F1:  0.9307359307359306
[1 0 1 0 1 0 0 0 0 1 0 0 1 0 0 1 0 1 0 0 0 0 0 0 1 0 0 1 0 0 0 1]
[1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1]
For name:  p_robinson
(275, 2)
total sample size before apply threshold:  275
Counter({'0000-0002-7878-0313': 133, '0000-0002-0736-9199': 119, '0000-0002-3156-3418': 19, '0000-0002-0577-3147': 4})
['0000-00

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Class  0  sample size:  133
Class  1  sample size:  142
(275, 102)
Class  0  sample size:  131
Class  1  sample size:  137
(268, 102)
Labeled:  275  :  268
Unlabeled:  2917  :  1942
labeled no citation link:  7
Unlabeled no citation link size:  975
(268, 102)
(268, 102)
(1942, 102)
(1942, 102)
(54, 100)
(2156, 100)
(2156, 100)
P:  1  N:  1
Initial L size:  214
Initial U size:  1942
Total Labeled number:  329  Still unlabeled number:  81
y1 disagree on 12  Proba:  [2.85359373e-04 9.99714641e-01]
y2 not aggreed on  12 Proba:  [0.727789 0.272211]
product probas: [0.00020768141327549453, 0.2721333206543914]
result 1
y1 disagree on 13  Proba:  [0.47263509 0.52736491]
y2 not aggreed on  13 Proba:  [0.7958971 0.2041029]
product probas: [0.37616889879598725, 0.10763670715818908]
result 0
y1 disagree on 20  Proba:  [0.01404152 0.98595848]
y2 not aggreed on  20 Proba:  [0.77445204 0.22554796]
product probas: [0.010874487066708498, 0.2223809277652855]
result 1
y1 disagree on 22  Proba:  [0.093613

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Class  0  sample size:  119
Class  1  sample size:  156
(275, 102)
Class  0  sample size:  115
Class  1  sample size:  153
(268, 102)
Labeled:  275  :  268
Unlabeled:  2917  :  1942
labeled no citation link:  7
Unlabeled no citation link size:  975
(268, 102)
(268, 102)
(1942, 102)
(1942, 102)
(54, 100)
(2156, 100)
(2156, 100)
P:  1  N:  1
Initial L size:  214
Initial U size:  1942
Total Labeled number:  325  Still unlabeled number:  85
y1 disagree on 1  Proba:  [0.5702627 0.4297373]
y2 not aggreed on  1 Proba:  [0.34856929 0.65143071]
product probas: [0.19877606622658323, 0.2799440716465688]
result 1
y1 disagree on 12  Proba:  [0.15104178 0.84895822]
y2 not aggreed on  12 Proba:  [0.87940969 0.12059031]
product probas: [0.13282760420361642, 0.10237613583813916]
result 0
y1 disagree on 14  Proba:  [0.99691817 0.00308183]
y2 not aggreed on  14 Proba:  [0.44612145 0.55387855]
product probas: [0.4447465807821904, 0.0017069612047518822]
result 0
y1 disagree on 24  Proba:  [0.06351609 0.936

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Class  0  sample size:  167
Class  1  sample size:  46
(213, 102)
Class  0  sample size:  148
Class  1  sample size:  42
(190, 102)
Labeled:  213  :  190
Unlabeled:  753  :  542
labeled no citation link:  23
Unlabeled no citation link size:  211
(190, 102)
(190, 102)
(542, 102)
(542, 102)
(38, 100)
(694, 100)
(694, 100)
P:  1  N:  1
Initial L size:  152
Initial U size:  542
Total Labeled number:  267  Still unlabeled number:  81
F1:  1.0
[0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 1 1 0 0 0 0
 0]
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0]
For name:  m_reilly
(20, 2)
total sample size before apply threshold:  20
Counter({'0000-0001-8029-0084': 17, '0000-0002-5526-8245': 1, '0000-0001-8746-3224': 1, '0000-0003-2506-3190': 1})
[]
m_reilly  pass
For name:  d_nguyen
(25, 2)
total sample size before apply threshold:  25
Counter({'0000-0002-4997-555X': 8, '0000-0002-3283-3504': 7, '0000-0001-6420-7308':

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Class  0  sample size:  125
Class  1  sample size:  99
(224, 102)
Class  0  sample size:  122
Class  1  sample size:  81
(203, 102)
Labeled:  224  :  203
Unlabeled:  956  :  578
labeled no citation link:  21
Unlabeled no citation link size:  378
(203, 102)
(203, 102)
(578, 102)
(578, 102)
(41, 100)
(740, 100)
(740, 100)
P:  1  N:  1
Initial L size:  162
Initial U size:  578
Total Labeled number:  277  Still unlabeled number:  81
y1 disagree on 0  Proba:  [0.18299021 0.81700979]
y2 not aggreed on  0 Proba:  [0.56150644 0.43849356]
product probas: [0.10275018238411761, 0.35825353241068114]
result 1
y1 disagree on 4  Proba:  [0.46000891 0.53999109]
y2 not aggreed on  4 Proba:  [0.9922316 0.0077684]
product probas: [0.45643537783483656, 0.004194867084503777]
result 0
y1 disagree on 29  Proba:  [0.53969326 0.46030674]
y2 not aggreed on  29 Proba:  [0.38474223 0.61525777]
product probas: [0.2076427900528959, 0.2832072962616313]
result 1
F1:  1.0
[1 0 1 0 0 1 0 0 1 1 0 1 1 0 0 0 1 0 0 0 0 0 0

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Class  0  sample size:  104
Class  1  sample size:  27
(131, 102)
Class  0  sample size:  100
Class  1  sample size:  27
(127, 102)
Labeled:  131  :  127
Unlabeled:  650  :  444
labeled no citation link:  4
Unlabeled no citation link size:  206
(127, 102)
(127, 102)
(444, 102)
(444, 102)
(25, 100)
(546, 100)
(546, 100)
P:  1  N:  1
Initial L size:  102
Initial U size:  444
Total Labeled number:  220  Still unlabeled number:  79
F1:  1.0
[0 0 0 1 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0]
[0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0]
For name:  a_fontana
(203, 2)
total sample size before apply threshold:  203
Counter({'0000-0002-6660-5315': 65, '0000-0002-5453-461X': 59, '0000-0002-5391-7520': 44, '0000-0002-8481-1219': 16, '0000-0002-4791-8746': 14, '0000-0003-3820-2823': 3, '0000-0003-1556-2770': 2})
[]
a_fontana  pass
For name:  r_chen
(367, 2)
total sample size before apply threshold:  367
Counter({'0000-0002-8371-8629': 179, '0000-0001-6344-1442': 34, 

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Class  0  sample size:  179
Class  1  sample size:  188
(367, 102)
Class  0  sample size:  168
Class  1  sample size:  174
(342, 102)
Labeled:  367  :  342
Unlabeled:  9130  :  6398
labeled no citation link:  25
Unlabeled no citation link size:  2732
(342, 102)
(342, 102)
(6398, 102)
(6398, 102)
(68, 100)
(6672, 100)
(6672, 100)
P:  1  N:  1
Initial L size:  274
Initial U size:  6398
Total Labeled number:  389  Still unlabeled number:  81
y1 disagree on 20  Proba:  [0.13388948 0.86611052]
y2 not aggreed on  20 Proba:  [0.56742759 0.43257241]
product probas: [0.07597258352579021, 0.3746555149958491]
result 1
y1 disagree on 28  Proba:  [0.00399018 0.99600982]
y2 not aggreed on  28 Proba:  [0.54661666 0.45338334]
product probas: [0.0021810983383838303, 0.45157425673683566]
result 1
y1 disagree on 30  Proba:  [0.67565732 0.32434268]
y2 not aggreed on  30 Proba:  [0.00798413 0.99201587]
product probas: [0.005394538512337504, 0.32175308532593144]
result 1
y1 disagree on 49  Proba:  [0.334674

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Class  0  sample size:  154
Class  1  sample size:  449
(603, 102)
Class  0  sample size:  153
Class  1  sample size:  409
(562, 102)
Labeled:  603  :  562
Unlabeled:  8554  :  5640
labeled no citation link:  41
Unlabeled no citation link size:  2914
(562, 102)
(562, 102)
(5640, 102)
(5640, 102)
(112, 100)
(6090, 100)
(6090, 100)
P:  1  N:  1
Initial L size:  450
Initial U size:  5640
Total Labeled number:  566  Still unlabeled number:  81
y1 disagree on 2  Proba:  [0.59344443 0.40655557]
y2 not aggreed on  2 Proba:  [0.32543443 0.67456557]
product probas: [0.19312725032954356, 0.27424838921607114]
result 1
y1 disagree on 14  Proba:  [0.84118807 0.15881193]
y2 not aggreed on  14 Proba:  [0.42422256 0.57577744]
product probas: [0.3568509583800549, 0.0914403262864619]
result 0
y1 disagree on 63  Proba:  [0.61012111 0.38987889]
y2 not aggreed on  63 Proba:  [0.15534335 0.84465665]
product probas: [0.09477825755766182, 0.3293137947773772]
result 1
y1 disagree on 77  Proba:  [0.01151962 0.9

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Class  0  sample size:  113
Class  1  sample size:  490
(603, 102)
Class  0  sample size:  102
Class  1  sample size:  460
(562, 102)
Labeled:  603  :  562
Unlabeled:  8554  :  5640
labeled no citation link:  41
Unlabeled no citation link size:  2914
(562, 102)
(562, 102)
(5640, 102)
(5640, 102)
(112, 100)
(6090, 100)
(6090, 100)
P:  1  N:  1
Initial L size:  450
Initial U size:  5640
Total Labeled number:  564  Still unlabeled number:  82
y1 disagree on 36  Proba:  [0.24327931 0.75672069]
y2 not aggreed on  36 Proba:  [0.88031057 0.11968943]
product probas: [0.21416134600533887, 0.09057147036877508]
result 0
y1 disagree on 39  Proba:  [0.34360664 0.65639336]
y2 not aggreed on  39 Proba:  [0.59955869 0.40044131]
product probas: [0.2060123460521538, 0.2628470161797947]
result 1
y1 disagree on 51  Proba:  [0.81724925 0.18275075]
y2 not aggreed on  51 Proba:  [0.0291186 0.9708814]
product probas: [0.023797157761225703, 0.1774293007241205]
result 1
y1 disagree on 66  Proba:  [0.97575041 0.

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


(16, 2)
total sample size before apply threshold:  16
Counter({'0000-0001-9520-3465': 5, '0000-0003-1686-4885': 2, '0000-0001-8681-9831': 2, '0000-0001-6274-9197': 2, '0000-0001-6803-5865': 1, '0000-0003-4324-3486': 1, '0000-0003-1035-2117': 1, '0000-0003-4277-4816': 1, '0000-0003-3133-943X': 1})
[]
j_day  pass
For name:  d_truong
(13, 2)
total sample size before apply threshold:  13
Counter({'0000-0003-4946-8969': 7, '0000-0002-1720-1744': 4, '0000-0003-3200-1297': 2})
[]
d_truong  pass
For name:  s_pan
(101, 2)
total sample size before apply threshold:  101
Counter({'0000-0003-3154-6690': 34, '0000-0002-8247-2110': 12, '0000-0002-1189-4199': 11, '0000-0003-2082-4077': 10, '0000-0001-6451-4666': 10, '0000-0002-7581-1831': 9, '0000-0003-2620-7272': 6, '0000-0001-6565-3836': 5, '0000-0003-0794-527X': 4})
[]
s_pan  pass
For name:  a_andrade
(52, 2)
total sample size before apply threshold:  52
Counter({'0000-0001-9569-6503': 18, '0000-0002-5689-6606': 13, '0000-0003-4902-8728': 10, '0000

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Class  0  sample size:  167
Class  1  sample size:  289
(456, 102)
Class  0  sample size:  123
Class  1  sample size:  277
(400, 102)
Labeled:  456  :  400
Unlabeled:  2309  :  1549
labeled no citation link:  56
Unlabeled no citation link size:  760
(400, 102)
(400, 102)
(1549, 102)
(1549, 102)
(80, 100)
(1869, 100)
(1869, 100)
P:  1  N:  1
Initial L size:  320
Initial U size:  1549
Total Labeled number:  436  Still unlabeled number:  80
y1 disagree on 32  Proba:  [0.68517297 0.31482703]
y2 not aggreed on  32 Proba:  [0.40101129 0.59898871]
product probas: [0.27476209587464995, 0.18857783516728038]
result 0
y1 disagree on 61  Proba:  [0.44687849 0.55312151]
y2 not aggreed on  61 Proba:  [0.6443742 0.3556258]
product probas: [0.28795696659588316, 0.19670428131267909]
result 0
y1 disagree on 69  Proba:  [0.30093567 0.69906433]
y2 not aggreed on  69 Proba:  [0.59846244 0.40153756]
product probas: [0.18009869409316884, 0.2807005848118679]
result 1
y1 disagree on 78  Proba:  [0.35460221 0.6

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


(70, 2)
total sample size before apply threshold:  70
Counter({'0000-0001-6998-5686': 48, '0000-0001-5807-5820': 11, '0000-0003-3957-6288': 4, '0000-0003-4964-2197': 2, '0000-0002-9066-6935': 2, '0000-0003-4872-0632': 2, '0000-0002-7297-9639': 1})
[]
a_miranda  pass
For name:  h_vogel
(15, 2)
total sample size before apply threshold:  15
Counter({'0000-0001-9821-7731': 5, '0000-0002-9902-8120': 4, '0000-0003-2404-9485': 4, '0000-0003-0072-4239': 2})
[]
h_vogel  pass
For name:  m_campos
(148, 2)
total sample size before apply threshold:  148
Counter({'0000-0001-7738-9892': 107, '0000-0003-3217-9001': 12, '0000-0003-4313-7069': 8, '0000-0003-1012-6240': 6, '0000-0002-0883-0610': 5, '0000-0002-5233-3769': 5, '0000-0003-4683-0176': 3, '0000-0002-9516-6526': 2})
['0000-0001-7738-9892']
m_campos_0
Class  0  sample size:  107
Class  1  sample size:  41
(148, 102)
Class  0  sample size:  107
Class  1  sample size:  37
(144, 102)
Labeled:  148  :  144
Unlabeled:  1318  :  811
labeled no citatio

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Class  0  sample size:  210
Class  1  sample size:  84
(294, 102)
Class  0  sample size:  201
Class  1  sample size:  82
(283, 102)
Labeled:  294  :  283
Unlabeled:  3145  :  1944
labeled no citation link:  11
Unlabeled no citation link size:  1201
(283, 102)
(283, 102)
(1944, 102)
(1944, 102)
(57, 100)
(2170, 100)
(2170, 100)
P:  1  N:  1
Initial L size:  226
Initial U size:  1944
Total Labeled number:  345  Still unlabeled number:  78
y1 disagree on 1  Proba:  [0.02359657 0.97640343]
y2 not aggreed on  1 Proba:  [0.5012732 0.4987268]
product probas: [0.011828326051776247, 0.48695855683936046]
result 1
y1 disagree on 5  Proba:  [0.07128926 0.92871074]
y2 not aggreed on  5 Proba:  [0.65825004 0.34174996]
product probas: [0.04692615597515833, 0.31738685685457235]
result 1
y1 disagree on 11  Proba:  [0.00429444 0.99570556]
y2 not aggreed on  11 Proba:  [0.53359182 0.46640818]
product probas: [0.0022914806389485775, 0.46440522010944735]
result 1
y1 disagree on 43  Proba:  [0.92357228 0.07

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Class  0  sample size:  155
Class  1  sample size:  41
(196, 102)
Class  0  sample size:  146
Class  1  sample size:  37
(183, 102)
Labeled:  196  :  183
Unlabeled:  507  :  244
labeled no citation link:  13
Unlabeled no citation link size:  263
(183, 102)
(183, 102)
(244, 102)
(244, 102)
(37, 100)
(390, 100)
(390, 100)
P:  1  N:  1
Initial L size:  146
Initial U size:  244
Total Labeled number:  264  Still unlabeled number:  79
y1 disagree on 3  Proba:  [0.34907766 0.65092234]
y2 not aggreed on  3 Proba:  [0.88949162 0.11050838]
product probas: [0.3105016514928261, 0.07193237447347085]
result 0
y1 disagree on 9  Proba:  [0.48940337 0.51059663]
y2 not aggreed on  9 Proba:  [0.92687071 0.07312929]
product probas: [0.4536136480544264, 0.037339570136482356]
result 0
y1 disagree on 21  Proba:  [0.01534819 0.98465181]
y2 not aggreed on  21 Proba:  [0.57178108 0.42821892]
product probas: [0.008775804304172631, 0.42164653483425685]
result 1
y1 disagree on 23  Proba:  [0.05068446 0.94931554]
y

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Class  0  sample size:  188
Class  1  sample size:  248
(436, 102)
Class  0  sample size:  164
Class  1  sample size:  218
(382, 102)
Labeled:  436  :  382
Unlabeled:  2162  :  1453
labeled no citation link:  54
Unlabeled no citation link size:  709
(382, 102)
(382, 102)
(1453, 102)
(1453, 102)
(76, 100)
(1759, 100)
(1759, 100)
P:  1  N:  1
Initial L size:  306
Initial U size:  1453
Total Labeled number:  424  Still unlabeled number:  78
y1 disagree on 10  Proba:  [0.8337437 0.1662563]
y2 not aggreed on  10 Proba:  [0.31089298 0.68910702]
product probas: [0.25920506765656115, 0.1145683818354433]
result 0
y1 disagree on 11  Proba:  [0.60357205 0.39642795]
y2 not aggreed on  11 Proba:  [0.33119089 0.66880911]
product probas: [0.1998975652975146, 0.2651346220655478]
result 1
y1 disagree on 44  Proba:  [0.58339995 0.41660005]
y2 not aggreed on  44 Proba:  [0.04015297 0.95984703]
product probas: [0.02342523915471219, 0.3998723213273]
result 1
F1:  0.9868215710074562
[0 1 0 1 0 1 1 0 1 1 0 1

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


(171, 2)
total sample size before apply threshold:  171
Counter({'0000-0003-1657-2161': 78, '0000-0002-9324-9598': 44, '0000-0001-9843-404X': 22, '0000-0001-5613-5893': 14, '0000-0002-3289-2598': 6, '0000-0003-3927-8117': 4, '0000-0002-1472-3352': 2, '0000-0002-5458-6735': 1})
[]
k_anderson  pass
For name:  m_king
(58, 2)
total sample size before apply threshold:  58
Counter({'0000-0002-2587-9117': 26, '0000-0001-6030-5154': 13, '0000-0001-9895-7297': 9, '0000-0001-5611-9498': 7, '0000-0002-9558-8622': 2, '0000-0001-7993-8808': 1})
[]
m_king  pass
For name:  a_srivastava
(49, 2)
total sample size before apply threshold:  49
Counter({'0000-0002-2031-4643': 14, '0000-0002-0211-7814': 13, '0000-0001-9866-8145': 6, '0000-0001-7042-4317': 5, '0000-0001-8340-856X': 3, '0000-0001-9871-5781': 3, '0000-0001-5345-6405': 2, '0000-0002-7046-405X': 1, '0000-0002-4590-7947': 1, '0000-0002-5295-7176': 1})
[]
a_srivastava  pass
For name:  m_scholz
(42, 2)
total sample size before apply threshold:  42


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


(103, 2)
total sample size before apply threshold:  103
Counter({'0000-0001-6035-4829': 27, '0000-0001-7434-7339': 23, '0000-0002-0480-7501': 11, '0000-0001-9542-6634': 10, '0000-0003-1298-9795': 9, '0000-0002-1371-266X': 7, '0000-0002-1957-0543': 5, '0000-0002-2822-2049': 4, '0000-0002-9309-9577': 2, '0000-0003-1709-7788': 2, '0000-0001-7550-9483': 1, '0000-0002-8438-7155': 1, '0000-0003-2829-0735': 1})
[]
f_zhang  pass
For name:  s_chapman
(71, 2)
total sample size before apply threshold:  71
Counter({'0000-0003-3347-6024': 23, '0000-0003-0053-1584': 23, '0000-0002-4314-9193': 15, '0000-0003-0778-084X': 7, '0000-0003-2342-3383': 3})
[]
s_chapman  pass
For name:  j_rosa
(29, 2)
total sample size before apply threshold:  29
Counter({'0000-0003-0857-3746': 15, '0000-0001-7770-5381': 7, '0000-0002-7154-2494': 4, '0000-0001-7947-2681': 2, '0000-0002-0015-6254': 1})
[]
j_rosa  pass
For name:  y_yin
(152, 2)
total sample size before apply threshold:  152
Counter({'0000-0003-0218-3042': 127,

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


(129, 2)
total sample size before apply threshold:  129
Counter({'0000-0001-8802-9606': 93, '0000-0003-4268-4757': 21, '0000-0003-3509-0686': 10, '0000-0001-7235-5554': 4, '0000-0003-2258-2817': 1})
[]
b_yan  pass
For name:  r_hu
(128, 2)
total sample size before apply threshold:  128
Counter({'0000-0001-6709-031X': 93, '0000-0001-7412-8451': 27, '0000-0001-6893-529X': 4, '0000-0001-5549-3082': 2, '0000-0002-7126-4076': 1, '0000-0001-5921-6891': 1})
[]
r_hu  pass
For name:  j_braun
(72, 2)
total sample size before apply threshold:  72
Counter({'0000-0002-8886-078X': 37, '0000-0002-4504-6235': 25, '0000-0002-8309-6401': 5, '0000-0002-2491-5788': 5})
[]
j_braun  pass
For name:  c_he
(49, 2)
total sample size before apply threshold:  49
Counter({'0000-0002-4868-331X': 20, '0000-0002-1918-5186': 13, '0000-0002-0663-275X': 7, '0000-0001-7869-7627': 5, '0000-0001-5426-769X': 2, '0000-0001-9867-9629': 1, '0000-0001-5842-9617': 1})
[]
c_he  pass
For name:  w_lu
(138, 2)
total sample size befor

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[0.52157587 0.47842413]
y2 not aggreed on  199 Proba:  [0.04136732 0.95863268]
product probas: [0.021576196419625115, 0.45863300743541807]
result 1
y1 disagree on 229  Proba:  [0.89438766 0.10561234]
y2 not aggreed on  229 Proba:  [0.1548536 0.8451464]
product probas: [0.13849914529194218, 0.08925788837017357]
result 0
y1 disagree on 241  Proba:  [0.7076713 0.2923287]
y2 not aggreed on  241 Proba:  [0.10244862 0.89755138]
product probas: [0.07249994758688941, 0.2623800301751014]
result 1
y1 disagree on 276  Proba:  [0.46302102 0.53697898]
y2 not aggreed on  276 Proba:  [0.86258554 0.13741446]
product probas: [0.3993952346369682, 0.07378867788242509]
result 0
y1 disagree on 277  Proba:  [0.58999928 0.41000072]
y2 not aggreed on  277 Proba:  [0.03724247 0.96275753]
product probas: [0.021973031192800314, 0.3947312833080456]
result 1
F1:  0.9569384510560981
[1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 1 1 1 1 1 

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


y1 disagree on 2  Proba:  [0.62235166 0.37764834]
y2 not aggreed on  2 Proba:  [0.370703 0.629297]
product probas: [0.2307076304376206, 0.2376529648253331]
result 1
y1 disagree on 5  Proba:  [0.08207796 0.91792204]
y2 not aggreed on  5 Proba:  [0.54854816 0.45145184]
product probas: [0.0450237155188665, 0.4143975963216788]
result 1
y1 disagree on 12  Proba:  [0.59450136 0.40549864]
y2 not aggreed on  12 Proba:  [0.02557915 0.97442085]
product probas: [0.015206841593766868, 0.39512632540308645]
result 1
y1 disagree on 13  Proba:  [0.35736306 0.64263694]
y2 not aggreed on  13 Proba:  [0.62326278 0.37673722]
product probas: [0.2227310907626886, 0.24210525846492015]
result 1
y1 disagree on 16  Proba:  [0.95284649 0.04715351]
y2 not aggreed on  16 Proba:  [0.28330633 0.71669367]
product probas: [0.26994743915931785, 0.0337946229090339]
result 0
y1 disagree on 21  Proba:  [0.52664363 0.47335637]
y2 not aggreed on  21 Proba:  [0.24446745 0.75553255]
product probas: [0.1287472236720051, 0.3576

i_ferreira_0
Class  0  sample size:  166
Class  1  sample size:  178
(344, 102)
Class  0  sample size:  161
Class  1  sample size:  169
(330, 102)
Labeled:  344  :  330
Unlabeled:  426  :  292
labeled no citation link:  14
Unlabeled no citation link size:  134
(330, 102)
(330, 102)
(292, 102)
(292, 102)
(66, 100)
(556, 100)
(556, 100)
P:  1  N:  1
Initial L size:  264
Initial U size:  292
Total Labeled number:  375  Still unlabeled number:  86
y1 disagree on 0  Proba:  [0.16030947 0.83969053]
y2 not aggreed on  0 Proba:  [0.61926849 0.38073151]
product probas: [0.09927460412624593, 0.3196966427678857]
result 1
y1 disagree on 5  Proba:  [0.55874237 0.44125763]
y2 not aggreed on  5 Proba:  [0.04175314 0.95824686]
product probas: [0.02332925040558942, 0.42283373792924017]
result 1
y1 disagree on 30  Proba:  [0.94512115 0.05487885]
y2 not aggreed on  30 Proba:  [0.38543736 0.61456264]
product probas: [0.3642849976868769, 0.03372649305742926]
result 0
y1 disagree on 33  Proba:  [9.99036183e

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


(300, 2)
total sample size before apply threshold:  300
Counter({'0000-0003-3709-1690': 237, '0000-0001-8573-0990': 20, '0000-0001-6303-4417': 16, '0000-0001-6786-8769': 15, '0000-0003-3991-0573': 9, '0000-0001-6322-5862': 2, '0000-0002-7908-6884': 1})
['0000-0003-3709-1690']
c_torres_0
Class  0  sample size:  237
Class  1  sample size:  63
(300, 102)
Class  0  sample size:  230
Class  1  sample size:  60
(290, 102)
Labeled:  300  :  290
Unlabeled:  963  :  605
labeled no citation link:  10
Unlabeled no citation link size:  358
(290, 102)
(290, 102)
(605, 102)
(605, 102)
(58, 100)
(837, 100)
(837, 100)
P:  1  N:  1
Initial L size:  232
Initial U size:  605
Total Labeled number:  351  Still unlabeled number:  77
y1 disagree on 12  Proba:  [0.09907128 0.90092872]
y2 not aggreed on  12 Proba:  [0.50318238 0.49681762]
product probas: [0.04985092112907221, 0.4475972639070143]
result 1
y1 disagree on 13  Proba:  [0.00491047 0.99508953]
y2 not aggreed on  13 Proba:  [0.53591398 0.46408602]
pr

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Class  0  sample size:  112
Class  1  sample size:  51
(163, 102)
Class  0  sample size:  107
Class  1  sample size:  49
(156, 102)
Labeled:  163  :  156
Unlabeled:  1087  :  686
labeled no citation link:  7
Unlabeled no citation link size:  401
(156, 102)
(156, 102)
(686, 102)
(686, 102)
(31, 100)
(811, 100)
(811, 100)
P:  1  N:  1
Initial L size:  125
Initial U size:  686
Total Labeled number:  239  Still unlabeled number:  81
y1 disagree on 22  Proba:  [0.84478358 0.15521642]
y2 not aggreed on  22 Proba:  [0.20461593 0.79538407]
product probas: [0.17285618026990052, 0.12345666658869352]
result 0
F1:  0.8540031397174255
[0 0 1 0 0 0 0 0 0 1 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0]
[0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1]
For name:  l_rasmussen
(249, 2)
total sample size before apply threshold:  249
Counter({'0000-0002-7480-3004': 214, '0000-0002-4497-8049': 24, '0000-0001-6613-2469': 5, '0000-0001-5962-6647': 4, '0000-0001-5795-4

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Class  0  sample size:  214
Class  1  sample size:  35
(249, 102)
Class  0  sample size:  185
Class  1  sample size:  32
(217, 102)
Labeled:  249  :  217
Unlabeled:  1209  :  817
labeled no citation link:  32
Unlabeled no citation link size:  392
(217, 102)
(217, 102)
(817, 102)
(817, 102)
(43, 100)
(991, 100)
(991, 100)
P:  1  N:  1
Initial L size:  174
Initial U size:  817
Total Labeled number:  293  Still unlabeled number:  79
y1 disagree on 16  Proba:  [0.40939083 0.59060917]
y2 not aggreed on  16 Proba:  [0.65730081 0.34269919]
product probas: [0.26909292744805063, 0.20240128174149238]
result 0
F1:  1.0
[1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
 0 0 0 0 0 0]
[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0]
For name:  m_saad
(4, 2)
total sample size before apply threshold:  4
Counter({'0000-0003-0458-5942': 1, '0000-0002-8071-2328': 1, '0000-0002-5655-8674': 1, '0000-00

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Class  0  sample size:  179
Class  1  sample size:  92
(271, 102)
Class  0  sample size:  178
Class  1  sample size:  76
(254, 102)
Labeled:  271  :  254
Unlabeled:  1645  :  1068
labeled no citation link:  17
Unlabeled no citation link size:  577
(254, 102)
(254, 102)
(1068, 102)
(1068, 102)
(51, 100)
(1271, 100)
(1271, 100)
P:  1  N:  1
Initial L size:  203
Initial U size:  1068
Total Labeled number:  319  Still unlabeled number:  83
y1 disagree on 12  Proba:  [0.06545126 0.93454874]
y2 not aggreed on  12 Proba:  [0.66007168 0.33992832]
product probas: [0.04320252509819734, 0.31767958058504214]
result 1
y1 disagree on 20  Proba:  [0.63451315 0.36548685]
y2 not aggreed on  20 Proba:  [0.0303418 0.9696582]
product probas: [0.019252270454140817, 0.35439732598735024]
result 1
y1 disagree on 44  Proba:  [0.16261573 0.83738427]
y2 not aggreed on  44 Proba:  [0.67732839 0.32267161]
product probas: [0.11014424905238855, 0.2702001338186206]
result 1
y1 disagree on 45  Proba:  [0.79222338 0.20

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


(590, 2)
total sample size before apply threshold:  590
Counter({'0000-0003-3171-7672': 108, '0000-0001-5833-989X': 100, '0000-0003-3231-9764': 82, '0000-0002-1082-7592': 62, '0000-0003-3267-4811': 40, '0000-0001-7805-869X': 36, '0000-0003-2883-0391': 21, '0000-0002-0607-038X': 21, '0000-0002-5461-6770': 16, '0000-0002-3912-6095': 11, '0000-0001-6757-885X': 11, '0000-0001-6408-7668': 10, '0000-0002-9873-1033': 9, '0000-0001-7801-083X': 8, '0000-0001-8430-4797': 7, '0000-0002-2572-7287': 5, '0000-0002-6766-8481': 5, '0000-0001-8706-6026': 4, '0000-0002-0036-2859': 4, '0000-0002-9624-0505': 3, '0000-0002-3413-4029': 3, '0000-0003-1817-8395': 3, '0000-0003-1744-8525': 3, '0000-0001-8052-2420': 2, '0000-0003-0853-8561': 2, '0000-0001-7285-4054': 2, '0000-0001-9645-8179': 2, '0000-0002-4383-756X': 2, '0000-0003-1911-3454': 2, '0000-0003-4333-5444': 1, '0000-0002-7324-5792': 1, '0000-0002-2152-7210': 1, '0000-0003-4040-1100': 1, '0000-0003-0133-9076': 1, '0000-0002-7696-5517': 1})
['0000-000

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Class  0  sample size:  108
Class  1  sample size:  482
(590, 102)
Class  0  sample size:  99
Class  1  sample size:  462
(561, 102)
Labeled:  590  :  561
Unlabeled:  16329  :  12547
labeled no citation link:  29
Unlabeled no citation link size:  3782
(561, 102)
(561, 102)
(12547, 102)
(12547, 102)
(112, 100)
(12996, 100)
(12996, 100)
P:  1  N:  1
Initial L size:  449
Initial U size:  12547
Total Labeled number:  565  Still unlabeled number:  79
y1 disagree on 7  Proba:  [0.93318504 0.06681496]
y2 not aggreed on  7 Proba:  [0.0529144 0.9470856]
product probas: [0.04937892926539519, 0.0632794892332674]
result 1
y1 disagree on 23  Proba:  [0.06977539 0.93022461]
y2 not aggreed on  23 Proba:  [0.5451932 0.4548068]
product probas: [0.038041066967045525, 0.42307247728444597]
result 1
y1 disagree on 24  Proba:  [0.69606322 0.30393678]
y2 not aggreed on  24 Proba:  [0.22604204 0.77395796]
product probas: [0.15733955129093044, 0.23523428987541148]
result 1
y1 disagree on 54  Proba:  [0.8442177

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


F1:  0.6817650008357011
[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 0 1 0 1
 1 1 1 1 1 1 1 1 0 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 0 1 1 1 1
 1]
[1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0]
For name:  j_cheng
(66, 2)
total sample size before apply threshold:  66
Counter({'0000-0003-1786-6188': 19, '0000-0001-8285-3207': 16, '0000-0001-5318-5668': 8, '0000-0002-7004-5138': 6, '0000-0003-3928-1770': 6, '0000-0002-1881-012X': 5, '0000-0002-4364-9657': 3, '0000-0002-1722-2617': 1, '0000-0002-5434-1201': 1, '0000-0001-6065-2682': 1})
[]
j_cheng  pass
For name:  g_lewis
(367, 2)
total sample size be

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Class  0  sample size:  118
Class  1  sample size:  34
(152, 102)
Class  0  sample size:  112
Class  1  sample size:  33
(145, 102)
Labeled:  152  :  145
Unlabeled:  2531  :  1711
labeled no citation link:  7
Unlabeled no citation link size:  820
(145, 102)
(145, 102)
(1711, 102)
(1711, 102)
(29, 100)
(1827, 100)
(1827, 100)
P:  1  N:  1
Initial L size:  116
Initial U size:  1711
Total Labeled number:  227  Still unlabeled number:  84
y1 disagree on 5  Proba:  [0.15413766 0.84586234]
y2 not aggreed on  5 Proba:  [0.8506 0.1494]
product probas: [0.1311094921053953, 0.12637183658996873]
result 0
y1 disagree on 8  Proba:  [0.10043261 0.89956739]
y2 not aggreed on  8 Proba:  [0.73028328 0.26971672]
product probas: [0.07334425902476052, 0.24262836290461312]
result 1
y1 disagree on 11  Proba:  [0.92014181 0.07985819]
y2 not aggreed on  11 Proba:  [0.2639485 0.7360515]
product probas: [0.24287005317823965, 0.05877973888160033]
result 0
y1 disagree on 21  Proba:  [0.01568091 0.98431909]
y2 not

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


(439, 2)
total sample size before apply threshold:  439
Counter({'0000-0001-6715-6366': 100, '0000-0002-0253-647X': 45, '0000-0002-1006-6666': 39, '0000-0001-7639-0904': 39, '0000-0002-7233-1509': 31, '0000-0001-9366-6204': 26, '0000-0002-4723-845X': 18, '0000-0003-3326-2640': 17, '0000-0002-3745-7202': 13, '0000-0003-4837-5373': 11, '0000-0003-3103-6949': 10, '0000-0002-4548-2002': 9, '0000-0003-0266-9472': 9, '0000-0001-7984-6305': 8, '0000-0002-7645-0855': 8, '0000-0003-2394-5421': 7, '0000-0001-5451-6828': 6, '0000-0002-1852-4537': 5, '0000-0003-2183-9609': 3, '0000-0003-1837-1435': 3, '0000-0002-2781-2637': 3, '0000-0001-8959-0315': 3, '0000-0003-1313-4000': 3, '0000-0003-1724-4418': 2, '0000-0003-0345-6647': 2, '0000-0001-8519-3240': 2, '0000-0002-3292-9303': 2, '0000-0003-1679-6560': 2, '0000-0003-4341-672X': 2, '0000-0001-8806-6204': 1, '0000-0003-3125-4399': 1, '0000-0002-5450-5958': 1, '0000-0003-0658-4425': 1, '0000-0002-6370-0704': 1, '0000-0001-6604-5509': 1, '0000-0002-60

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


(221, 2)
total sample size before apply threshold:  221
Counter({'0000-0002-0776-5861': 75, '0000-0001-7221-6693': 51, '0000-0003-1072-2755': 45, '0000-0003-2027-2428': 44, '0000-0003-4196-1804': 6})
[]
r_moore  pass
For name:  m_thomsen
(98, 2)
total sample size before apply threshold:  98
Counter({'0000-0002-2469-6458': 37, '0000-0003-2453-5141': 32, '0000-0001-6805-7247': 17, '0000-0003-3081-9220': 7, '0000-0003-3814-1709': 3, '0000-0003-1208-5497': 2})
[]
m_thomsen  pass
For name:  l_ng
(44, 2)
total sample size before apply threshold:  44
Counter({'0000-0003-1905-3586': 37, '0000-0002-6973-9466': 3, '0000-0001-7500-9403': 1, '0000-0001-5988-008X': 1, '0000-0003-3135-244X': 1, '0000-0002-7189-1272': 1})
[]
l_ng  pass
For name:  a_phillips
(170, 2)
total sample size before apply threshold:  170
Counter({'0000-0002-5461-0598': 98, '0000-0001-6367-9784': 24, '0000-0001-5599-6499': 24, '0000-0003-4883-0022': 9, '0000-0003-4225-0158': 7, '0000-0003-4473-5108': 4, '0000-0001-6618-0145': 

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Class  0  sample size:  174
Class  1  sample size:  43
(217, 102)
Class  0  sample size:  172
Class  1  sample size:  42
(214, 102)
Labeled:  217  :  214
Unlabeled:  1410  :  785
labeled no citation link:  3
Unlabeled no citation link size:  625
(214, 102)
(214, 102)
(785, 102)
(785, 102)
(43, 100)
(956, 100)
(956, 100)
P:  1  N:  1
Initial L size:  171
Initial U size:  785
Total Labeled number:  281  Still unlabeled number:  85
y1 disagree on 3  Proba:  [0.76254525 0.23745475]
y2 not aggreed on  3 Proba:  [0.06244221 0.93755779]
product probas: [0.04761501329263254, 0.22262754990392353]
result 1
F1:  0.9703243616287094
[1 0 0 1 0 0 0 0 0 0 0 0 0 1 0 1 1 1 0 0 1 0 0 0 0 0 0 1 0 0 0 1 0 1 0 1 0
 0 1 0 0 0 0]
[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0]
For name:  h_moreira
(28, 2)
total sample size before apply threshold:  28
Counter({'0000-0002-1487-0539': 13, '0000-0002-5481-0688': 10, '0000-0002-4674

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


(48, 2)
total sample size before apply threshold:  48
Counter({'0000-0001-8578-5510': 17, '0000-0002-0067-991X': 9, '0000-0003-4870-8369': 8, '0000-0001-7418-5536': 6, '0000-0001-6227-5147': 3, '0000-0001-9404-9515': 3, '0000-0003-2292-7766': 2})
[]
h_brown  pass
For name:  s_martins
(84, 2)
total sample size before apply threshold:  84
Counter({'0000-0002-9396-5957': 18, '0000-0002-3720-2920': 15, '0000-0001-7217-6273': 15, '0000-0003-0237-6370': 12, '0000-0002-1812-8913': 8, '0000-0002-1874-0192': 7, '0000-0002-7733-4485': 5, '0000-0003-2122-0670': 3, '0000-0002-3526-3199': 1})
[]
s_martins  pass
For name:  m_ruiz
(111, 2)
total sample size before apply threshold:  111
Counter({'0000-0003-4174-6688': 40, '0000-0002-2734-2196': 32, '0000-0002-1530-9508': 9, '0000-0002-1337-0110': 5, '0000-0001-8617-667X': 4, '0000-0001-7492-9873': 3, '0000-0003-4419-1649': 3, '0000-0002-2926-702X': 3, '0000-0003-1437-5578': 2, '0000-0002-4670-9037': 2, '0000-0002-4917-1252': 2, '0000-0002-1286-6624': 

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Class  0  sample size:  108
Class  1  sample size:  278
(386, 102)
Class  0  sample size:  103
Class  1  sample size:  249
(352, 102)
Labeled:  386  :  352
Unlabeled:  4105  :  2422
labeled no citation link:  34
Unlabeled no citation link size:  1683
(352, 102)
(352, 102)
(2422, 102)
(2422, 102)
(70, 100)
(2704, 100)
(2704, 100)
P:  1  N:  1
Initial L size:  282
Initial U size:  2422
Total Labeled number:  401  Still unlabeled number:  79
y1 disagree on 0  Proba:  [0.39873549 0.60126451]
y2 not aggreed on  0 Proba:  [0.99255894 0.00744106]
product probas: [0.39576847222566114, 0.004474045296264649]
result 0
y1 disagree on 1  Proba:  [0.50431696 0.49568304]
y2 not aggreed on  1 Proba:  [0.0046225 0.9953775]
product probas: [0.0023312030525159313, 0.4933917499633475]
result 1
y1 disagree on 12  Proba:  [0.99355167 0.00644833]
y2 not aggreed on  12 Proba:  [0.32412625 0.67587375]
product probas: [0.3220361726935883, 0.004358257186594653]
result 0
y1 disagree on 45  Proba:  [0.86581695 0.1

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Class  0  sample size:  155
Class  1  sample size:  187
(342, 102)
Class  0  sample size:  148
Class  1  sample size:  178
(326, 102)
Labeled:  342  :  326
Unlabeled:  6912  :  4172
labeled no citation link:  16
Unlabeled no citation link size:  2740
(326, 102)
(326, 102)
(4172, 102)
(4172, 102)
(65, 100)
(4433, 100)
(4433, 100)
P:  1  N:  1
Initial L size:  261
Initial U size:  4172
Total Labeled number:  376  Still unlabeled number:  81
y1 disagree on 3  Proba:  [0.19965063 0.80034937]
y2 not aggreed on  3 Proba:  [0.63585475 0.36414525]
product probas: [0.12694880314513382, 0.291443422826297]
result 1
y1 disagree on 5  Proba:  [0.33282064 0.66717936]
y2 not aggreed on  5 Proba:  [0.58771796 0.41228204]
product probas: [0.19560466371586674, 0.2750660715634413]
result 1
y1 disagree on 7  Proba:  [0.79779889 0.20220111]
y2 not aggreed on  7 Proba:  [0.35301205 0.64698795]
product probas: [0.28163261773039316, 0.13082168530782767]
result 0
y1 disagree on 15  Proba:  [0.50880087 0.491199

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)



Counter({'0000-0002-4733-1224': 124, '0000-0002-7464-7435': 99, '0000-0003-0657-4032': 43, '0000-0001-6841-1808': 30, '0000-0002-1465-1010': 21, '0000-0003-3291-8468': 18, '0000-0002-2369-8291': 17, '0000-0003-4036-0521': 17, '0000-0001-6228-2988': 12, '0000-0002-7954-8073': 3, '0000-0001-9962-6166': 3, '0000-0003-1873-4531': 1})
['0000-0002-4733-1224']
j_jensen_0
Class  0  sample size:  124
Class  1  sample size:  264
(388, 102)
Class  0  sample size:  116
Class  1  sample size:  241
(357, 102)
Labeled:  388  :  357
Unlabeled:  4076  :  2589
labeled no citation link:  31
Unlabeled no citation link size:  1487
(357, 102)
(357, 102)
(2589, 102)
(2589, 102)
(71, 100)
(2875, 100)
(2875, 100)
P:  1  N:  1
Initial L size:  286
Initial U size:  2589
Total Labeled number:  402  Still unlabeled number:  79
y1 disagree on 0  Proba:  [0.82119909 0.17880091]
y2 not aggreed on  0 Proba:  [0.44902212 0.55097788]
product probas: [0.36873655579482945, 0.09851534485220335]
result 0
y1 disagree on 7  

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


(338, 2)
total sample size before apply threshold:  338
Counter({'0000-0003-1215-2565': 48, '0000-0002-7916-8687': 47, '0000-0001-6783-5182': 20, '0000-0002-9408-9979': 20, '0000-0002-6541-0612': 18, '0000-0002-5455-2586': 17, '0000-0002-2903-4218': 16, '0000-0003-0302-3470': 16, '0000-0002-6184-2530': 15, '0000-0003-1035-2272': 15, '0000-0002-6923-1099': 13, '0000-0002-1442-992X': 12, '0000-0001-6747-1665': 12, '0000-0003-3618-1379': 11, '0000-0002-9231-8360': 11, '0000-0003-1384-6024': 9, '0000-0002-0278-7543': 7, '0000-0001-8541-893X': 5, '0000-0001-8986-9164': 4, '0000-0002-2944-1315': 4, '0000-0001-8970-9398': 3, '0000-0001-8925-9462': 2, '0000-0003-1254-6732': 2, '0000-0002-5866-5932': 2, '0000-0001-8808-9481': 2, '0000-0003-1815-1408': 1, '0000-0002-4148-2603': 1, '0000-0003-1490-0416': 1, '0000-0002-7761-0072': 1, '0000-0002-6806-1593': 1, '0000-0003-4188-5725': 1, '0000-0003-2289-5709': 1})
[]
y_zhao  pass
For name:  s_hussain
(52, 2)
total sample size before apply threshold: 

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Class  0  sample size:  218
Class  1  sample size:  31
(249, 102)
Class  0  sample size:  215
Class  1  sample size:  26
(241, 102)
Labeled:  249  :  241
Unlabeled:  8153  :  5121
labeled no citation link:  8
Unlabeled no citation link size:  3032
(241, 102)
(241, 102)
(5121, 102)
(5121, 102)
(48, 100)
(5314, 100)
(5314, 100)
P:  1  N:  1
Initial L size:  193
Initial U size:  5121
Total Labeled number:  308  Still unlabeled number:  81
y1 disagree on 26  Proba:  [0.48334425 0.51665575]
y2 not aggreed on  26 Proba:  [0.84280932 0.15719068]
product probas: [0.40736703399296215, 0.0812134712509153]
result 0
y1 disagree on 39  Proba:  [0.11907425 0.88092575]
y2 not aggreed on  39 Proba:  [0.70606302 0.29393698]
product probas: [0.08407392452491788, 0.2589366526987373]
result 1
F1:  0.4947368421052632
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 1 0 0 0 0 0 0 0 0]
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  'recall', 'true', average, warn_for)


(1139, 2)
total sample size before apply threshold:  1139
Counter({'0000-0001-5077-4483': 92, '0000-0002-5756-3336': 87, '0000-0001-7858-8236': 73, '0000-0002-1752-4201': 61, '0000-0002-9220-8436': 55, '0000-0001-5859-3070': 51, '0000-0003-2996-5781': 49, '0000-0001-8807-3607': 43, '0000-0001-6527-4801': 41, '0000-0001-6879-5936': 35, '0000-0002-7253-2722': 33, '0000-0001-7336-8808': 31, '0000-0001-8634-1145': 29, '0000-0001-6491-6577': 28, '0000-0002-0662-782X': 19, '0000-0001-7381-0918': 18, '0000-0002-4429-283X': 17, '0000-0001-5168-7074': 16, '0000-0002-1591-9744': 14, '0000-0002-7409-7859': 14, '0000-0002-8021-7458': 13, '0000-0002-7530-4215': 12, '0000-0002-4114-3046': 12, '0000-0001-9970-4582': 12, '0000-0002-7000-1469': 11, '0000-0002-3850-4875': 11, '0000-0001-5648-9202': 11, '0000-0002-1038-4162': 11, '0000-0002-3671-553X': 10, '0000-0002-3329-6384': 9, '0000-0001-6661-1734': 9, '0000-0003-0326-8304': 9, '0000-0001-9202-404X': 8, '0000-0001-6321-0505': 8, '0000-0002-5323-1801

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Counter({'0000-0002-0215-4930': 181, '0000-0002-6973-7233': 9, '0000-0001-6464-548X': 7, '0000-0001-8377-616X': 3, '0000-0001-5797-8913': 1})
['0000-0002-0215-4930']
m_barreto_0
Class  0  sample size:  181
Class  1  sample size:  20
(201, 102)
Class  0  sample size:  170
Class  1  sample size:  20
(190, 102)
Labeled:  201  :  190
Unlabeled:  406  :  208
labeled no citation link:  11
Unlabeled no citation link size:  198
(190, 102)
(190, 102)
(208, 102)
(208, 102)
(38, 100)
(360, 100)
(360, 100)
P:  1  N:  1
Initial L size:  152
Initial U size:  208
Total Labeled number:  266  Still unlabeled number:  81
y1 disagree on 10  Proba:  [0.89231927 0.10768073]
y2 not aggreed on  10 Proba:  [0.19063445 0.80936555]
product probas: [0.17010679225883407, 0.08715307581161168]
result 0
y1 disagree on 16  Proba:  [0.98838537 0.01161463]
y2 not aggreed on  16 Proba:  [0.42034412 0.57965588]
product probas: [0.41546198061727113, 0.006732489387627046]
result 0
y1 disagree on 20  Proba:  [0.48741277 0.5

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Class  0  sample size:  107
Class  1  sample size:  14
(121, 102)
Class  0  sample size:  97
Class  1  sample size:  13
(110, 102)
Labeled:  121  :  110
Unlabeled:  1824  :  1065
labeled no citation link:  11
Unlabeled no citation link size:  759
(110, 102)
(110, 102)
(1065, 102)
(1065, 102)
(22, 100)
(1153, 100)
(1153, 100)
P:  1  N:  1
Initial L size:  88
Initial U size:  1065
Total Labeled number:  202  Still unlabeled number:  82
F1:  1.0
[0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0]
[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0]
For name:  v_martins
(104, 2)
total sample size before apply threshold:  104
Counter({'0000-0002-2909-8502': 71, '0000-0001-7611-861X': 18, '0000-0001-7565-9641': 6, '0000-0003-2465-5880': 5, '0000-0002-8824-7328': 3, '0000-0002-0327-538X': 1})
[]
v_martins  pass
For name:  t_zhou
(76, 2)
total sample size before apply threshold:  76
Counter({'0000-0002-3935-4637': 55, '0000-0002-7858-0047': 12, '0000-0002-8744-9083': 3, '0000-0001-7416

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)



s_patel_0
Class  0  sample size:  117
Class  1  sample size:  299
(416, 102)
Class  0  sample size:  85
Class  1  sample size:  282
(367, 102)
Labeled:  416  :  367
Unlabeled:  6752  :  5020
labeled no citation link:  49
Unlabeled no citation link size:  1732
(367, 102)
(367, 102)
(5020, 102)
(5020, 102)
(73, 100)
(5314, 100)
(5314, 100)
P:  1  N:  1
Initial L size:  294
Initial U size:  5020
Total Labeled number:  407  Still unlabeled number:  84
y1 disagree on 3  Proba:  [0.0768518 0.9231482]
y2 not aggreed on  3 Proba:  [0.63886576 0.36113424]
product probas: [0.049097981200958415, 0.3333804281114187]
result 1
y1 disagree on 9  Proba:  [0.45583723 0.54416277]
y2 not aggreed on  9 Proba:  [0.81299621 0.18700379]
product probas: [0.3705939354197184, 0.10176050344538802]
result 0
y1 disagree on 61  Proba:  [0.6622367 0.3377633]
y2 not aggreed on  61 Proba:  [0.06646005 0.93353995]
product probas: [0.044012286189663574, 0.3153155333007001]
result 1
F1:  0.9262626262626262
[0 1 1 1 1 1 

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Class  0  sample size:  115
Class  1  sample size:  177
(292, 102)
Class  0  sample size:  112
Class  1  sample size:  164
(276, 102)
Labeled:  292  :  276
Unlabeled:  4855  :  3261
labeled no citation link:  16
Unlabeled no citation link size:  1594
(276, 102)
(276, 102)
(3261, 102)
(3261, 102)
(55, 100)
(3482, 100)
(3482, 100)
P:  1  N:  1
Initial L size:  221
Initial U size:  3261
Total Labeled number:  337  Still unlabeled number:  79
y1 disagree on 17  Proba:  [0.59308293 0.40691707]
y2 not aggreed on  17 Proba:  [0.28874635 0.71125365]
product probas: [0.1712505321404192, 0.2894212494457575]
result 1
y1 disagree on 22  Proba:  [0.63899639 0.36100361]
y2 not aggreed on  22 Proba:  [0.44153664 0.55846336]
product probas: [0.28214031948163615, 0.20160728810519593]
result 0
y1 disagree on 35  Proba:  [0.06593243 0.93406757]
y2 not aggreed on  35 Proba:  [0.540576 0.459424]
product probas: [0.03564148963305014, 0.4291330604650543]
result 1
y1 disagree on 44  Proba:  [0.15505976 0.8449

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Class  0  sample size:  107
Class  1  sample size:  18
(125, 102)
Class  0  sample size:  103
Class  1  sample size:  17
(120, 102)
Labeled:  125  :  120
Unlabeled:  1819  :  1254
labeled no citation link:  5
Unlabeled no citation link size:  565
(120, 102)
(120, 102)
(1254, 102)
(1254, 102)
(24, 100)
(1350, 100)
(1350, 100)
P:  1  N:  1
Initial L size:  96
Initial U size:  1254
Total Labeled number:  208  Still unlabeled number:  83
y1 disagree on 16  Proba:  [0.49872844 0.50127156]
y2 not aggreed on  16 Proba:  [0.9368616 0.0631384]
product probas: [0.46723952715632794, 0.03164948462005108]
result 0
y1 disagree on 20  Proba:  [0.66941356 0.33058644]
y2 not aggreed on  20 Proba:  [0.00793102 0.99206898]
product probas: [0.005309132447724835, 0.32796455510103706]
result 1
F1:  0.9163763066202091
[0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0]
[0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0]
For name:  j_dai
(31, 2)
total sample size before apply threshold:  31

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


(86, 2)
total sample size before apply threshold:  86
Counter({'0000-0001-8950-1036': 28, '0000-0002-9381-3320': 21, '0000-0003-3317-8756': 13, '0000-0003-4392-4644': 7, '0000-0003-3421-7833': 4, '0000-0003-2100-0280': 4, '0000-0002-1937-6548': 4, '0000-0002-9602-2452': 3, '0000-0001-9718-3867': 1, '0000-0002-8132-0625': 1})
[]
f_pereira  pass
For name:  a_vitale
(56, 2)
total sample size before apply threshold:  56
Counter({'0000-0001-5586-2255': 43, '0000-0002-8682-3125': 7, '0000-0002-7339-4034': 4, '0000-0003-4980-5574': 2})
[]
a_vitale  pass
For name:  q_yang
(102, 2)
total sample size before apply threshold:  102
Counter({'0000-0002-3510-8906': 18, '0000-0001-9849-6996': 17, '0000-0003-4205-1909': 17, '0000-0001-6628-5393': 15, '0000-0002-4378-2335': 10, '0000-0003-4038-2464': 8, '0000-0002-6788-8775': 7, '0000-0003-0279-8784': 5, '0000-0001-6720-8795': 2, '0000-0001-8253-2278': 1, '0000-0002-1437-4498': 1, '0000-0003-2067-5999': 1})
[]
q_yang  pass
For name:  d_xue
(111, 2)
tota

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


8
Counter({'0000-0001-5891-9798': 3, '0000-0003-4790-3584': 2, '0000-0001-6241-7547': 2, '0000-0002-5535-0839': 1})
[]
m_iqbal  pass
For name:  s_lim
(136, 2)
total sample size before apply threshold:  136
Counter({'0000-0002-5475-4153': 27, '0000-0002-0360-6361': 23, '0000-0002-5192-0486': 20, '0000-0001-7589-5150': 15, '0000-0003-3807-4163': 13, '0000-0001-8471-5684': 6, '0000-0003-4528-8514': 6, '0000-0001-9086-5101': 5, '0000-0003-0312-9937': 5, '0000-0002-4890-0396': 4, '0000-0003-0377-9032': 3, '0000-0003-4246-6223': 3, '0000-0002-9783-9050': 1, '0000-0002-9907-0628': 1, '0000-0003-0845-9994': 1, '0000-0003-0598-4574': 1, '0000-0002-9460-5136': 1, '0000-0003-0204-4990': 1})
[]
s_lim  pass
For name:  p_li
(118, 2)
total sample size before apply threshold:  118
Counter({'0000-0002-5715-548X': 20, '0000-0001-9602-9550': 18, '0000-0001-9098-7598': 14, '0000-0002-5876-2177': 9, '0000-0001-5836-1069': 9, '0000-0002-2572-5935': 7, '0000-0001-9339-3111': 7, '0000-0002-4273-4577': 7, '000

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Class  0  sample size:  138
Class  1  sample size:  45
(183, 102)
Class  0  sample size:  131
Class  1  sample size:  43
(174, 102)
Labeled:  183  :  174
Unlabeled:  2555  :  1593
labeled no citation link:  9
Unlabeled no citation link size:  962
(174, 102)
(174, 102)
(1593, 102)
(1593, 102)
(35, 100)
(1732, 100)
(1732, 100)
P:  1  N:  1
Initial L size:  139
Initial U size:  1593
Total Labeled number:  255  Still unlabeled number:  79
F1:  1.0
[0 1 0 1 0 1 1 1 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0]
[0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0]
For name:  r_rodrigues
(74, 2)
total sample size before apply threshold:  74
Counter({'0000-0002-7631-743X': 30, '0000-0001-8592-850X': 22, '0000-0002-7557-1815': 10, '0000-0002-5894-2506': 2, '0000-0003-4493-2654': 2, '0000-0002-0437-2798': 2, '0000-0002-7589-7807': 1, '0000-0002-4261-1147': 1, '0000-0002-5115-6991': 1, '0000-0001-5631-0970': 1, '0000-0003-3522-9844': 1,

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Counter({'0000-0003-4083-8791': 106, '0000-0002-2350-9876': 69, '0000-0003-1906-1308': 45, '0000-0002-9732-361X': 40, '0000-0002-3363-5788': 35, '0000-0002-7618-9028': 16, '0000-0003-1584-6896': 14, '0000-0002-1788-045X': 11, '0000-0001-9008-1964': 10, '0000-0003-4734-214X': 9, '0000-0002-0776-3188': 2, '0000-0003-0409-8132': 1, '0000-0003-0721-272X': 1, '0000-0002-5935-8264': 1})
['0000-0003-4083-8791']
c_park_0
Class  0  sample size:  106
Class  1  sample size:  254
(360, 102)
Class  0  sample size:  103
Class  1  sample size:  238
(341, 102)
Labeled:  360  :  341
Unlabeled:  11506  :  8986
labeled no citation link:  19
Unlabeled no citation link size:  2520
(341, 102)
(341, 102)
(8986, 102)
(8986, 102)
(68, 100)
(9259, 100)
(9259, 100)
P:  1  N:  1
Initial L size:  273
Initial U size:  8986
Total Labeled number:  387  Still unlabeled number:  81
y1 disagree on 16  Proba:  [0.33781709 0.66218291]
y2 not aggreed on  16 Proba:  [0.87366616 0.12633384]
product probas: [0.295139354929385

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


(379, 2)
total sample size before apply threshold:  379
Counter({'0000-0001-7133-4970': 213, '0000-0002-0541-7556': 87, '0000-0002-2650-2426': 25, '0000-0001-8036-1161': 17, '0000-0003-2731-4707': 15, '0000-0002-9348-8740': 13, '0000-0001-7121-504X': 6, '0000-0001-5522-7796': 2, '0000-0002-5731-2692': 1})
['0000-0001-7133-4970']
m_wright_0
Class  0  sample size:  213
Class  1  sample size:  166
(379, 102)
Class  0  sample size:  197
Class  1  sample size:  150
(347, 102)
Labeled:  379  :  347
Unlabeled:  2376  :  1517
labeled no citation link:  32
Unlabeled no citation link size:  859
(347, 102)
(347, 102)
(1517, 102)
(1517, 102)
(69, 100)
(1795, 100)
(1795, 100)
P:  1  N:  1
Initial L size:  278
Initial U size:  1517
Total Labeled number:  390  Still unlabeled number:  83
y1 disagree on 23  Proba:  [9.99743806e-01 2.56193639e-04]
y2 not aggreed on  23 Proba:  [0.48244551 0.51755449]
product probas: [0.48232190793635843, 0.00013259416876143415]
result 0
y1 disagree on 35  Proba:  [0.70

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Class  0  sample size:  112
Class  1  sample size:  282
(394, 102)
Class  0  sample size:  106
Class  1  sample size:  276
(382, 102)
Labeled:  394  :  382
Unlabeled:  2262  :  1463
labeled no citation link:  12
Unlabeled no citation link size:  799
(382, 102)
(382, 102)
(1463, 102)
(1463, 102)
(76, 100)
(1769, 100)
(1769, 100)
P:  1  N:  1
Initial L size:  306
Initial U size:  1463
Total Labeled number:  423  Still unlabeled number:  79
y1 disagree on 14  Proba:  [0.92232307 0.07767693]
y2 not aggreed on  14 Proba:  [0.3573392 0.6426608]
product probas: [0.32958218436177955, 0.04991991973006915]
result 0
y1 disagree on 18  Proba:  [0.33109435 0.66890565]
y2 not aggreed on  18 Proba:  [0.66640951 0.33359049]
product probas: [0.2206444256761493, 0.22314056088704326]
result 1
y1 disagree on 56  Proba:  [0.97337414 0.02662586]
y2 not aggreed on  56 Proba:  [0.30675737 0.69324263]
product probas: [0.29858968848732304, 0.018458184681194225]
result 0
y1 disagree on 61  Proba:  [0.64477421 0.

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Class  0  sample size:  180
Class  1  sample size:  214
(394, 102)
Class  0  sample size:  179
Class  1  sample size:  203
(382, 102)
Labeled:  394  :  382
Unlabeled:  2262  :  1463
labeled no citation link:  12
Unlabeled no citation link size:  799
(382, 102)
(382, 102)
(1463, 102)
(1463, 102)
(76, 100)
(1769, 100)
(1769, 100)
P:  1  N:  1
Initial L size:  306
Initial U size:  1463
Total Labeled number:  423  Still unlabeled number:  81
y1 disagree on 3  Proba:  [0.32352525 0.67647475]
y2 not aggreed on  3 Proba:  [0.85421474 0.14578526]
product probas: [0.27636003903635276, 0.0986200489829766]
result 0
y1 disagree on 10  Proba:  [0.92852494 0.07147506]
y2 not aggreed on  10 Proba:  [0.29979211 0.70020789]
product probas: [0.278364451289438, 0.0500473984511384]
result 0
y1 disagree on 11  Proba:  [0.38494623 0.61505377]
y2 not aggreed on  11 Proba:  [0.63268373 0.36731627]
product probas: [0.2435492139176693, 0.2259192592865606]
result 0
y1 disagree on 13  Proba:  [0.96796443 0.032035

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)



a_popov  pass
For name:  w_liao
(79, 2)
total sample size before apply threshold:  79
Counter({'0000-0001-5362-6953': 29, '0000-0001-6383-3470': 25, '0000-0002-5619-4997': 16, '0000-0002-9768-0959': 5, '0000-0001-7221-5906': 3, '0000-0002-5333-2717': 1})
[]
w_liao  pass
For name:  j_zhong
(280, 2)
total sample size before apply threshold:  280
Counter({'0000-0002-2265-9338': 115, '0000-0002-1494-6396': 70, '0000-0003-3148-4143': 37, '0000-0002-3534-7480': 21, '0000-0003-1801-9642': 19, '0000-0001-7157-603X': 8, '0000-0002-8815-4105': 4, '0000-0002-8945-4599': 3, '0000-0002-0556-2964': 1, '0000-0003-2750-9782': 1, '0000-0001-8785-1729': 1})
['0000-0002-2265-9338']
j_zhong_0
Class  0  sample size:  115
Class  1  sample size:  165
(280, 102)
Class  0  sample size:  109
Class  1  sample size:  153
(262, 102)
Labeled:  280  :  262
Unlabeled:  2811  :  1843
labeled no citation link:  18
Unlabeled no citation link size:  968
(262, 102)
(262, 102)
(1843, 102)
(1843, 102)
(52, 100)
(2053, 100)

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


(785, 2)
total sample size before apply threshold:  785
Counter({'0000-0003-3791-7587': 146, '0000-0001-8153-1441': 115, '0000-0003-1224-6561': 64, '0000-0002-4192-3165': 49, '0000-0002-2499-8632': 39, '0000-0001-8667-0811': 33, '0000-0002-5887-0880': 24, '0000-0001-5227-2663': 23, '0000-0002-4350-7755': 23, '0000-0003-4913-8003': 22, '0000-0001-6460-2877': 21, '0000-0003-1954-334X': 20, '0000-0001-8572-649X': 20, '0000-0001-5574-7062': 15, '0000-0002-0352-2694': 15, '0000-0002-9390-795X': 13, '0000-0001-8904-1287': 13, '0000-0003-3410-3588': 12, '0000-0003-4384-8354': 9, '0000-0001-6833-8276': 9, '0000-0002-8746-3387': 9, '0000-0002-0796-0130': 8, '0000-0002-0435-7694': 8, '0000-0001-6454-0901': 7, '0000-0002-0123-9836': 6, '0000-0001-7120-4690': 6, '0000-0001-5100-6072': 6, '0000-0003-3913-5298': 6, '0000-0003-3177-5186': 5, '0000-0003-1240-7011': 5, '0000-0003-1470-4159': 5, '0000-0001-7910-1223': 4, '0000-0003-4289-894X': 4, '0000-0002-7289-5347': 4, '0000-0003-1328-1641': 2, '0000

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Class  0  sample size:  115
Class  1  sample size:  670
(785, 102)
Class  0  sample size:  102
Class  1  sample size:  638
(740, 102)
Labeled:  785  :  740
Unlabeled:  20255  :  15118
labeled no citation link:  45
Unlabeled no citation link size:  5137
(740, 102)
(740, 102)
(15118, 102)
(15118, 102)
(148, 100)
(15710, 100)
(15710, 100)
P:  1  N:  1
Initial L size:  592
Initial U size:  15118
Total Labeled number:  709  Still unlabeled number:  78


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


y1 disagree on 4  Proba:  [0.67330665 0.32669335]
y2 not aggreed on  4 Proba:  [0.13803353 0.86196647]
product probas: [0.09293889062954949, 0.2815987159488894]
result 1
y1 disagree on 12  Proba:  [0.13966901 0.86033099]
y2 not aggreed on  12 Proba:  [0.79568724 0.20431276]
product probas: [0.11113285122970237, 0.17577659470585608]
result 1
y1 disagree on 52  Proba:  [0.75853006 0.24146994]
y2 not aggreed on  52 Proba:  [0.0775891 0.9224109]
product probas: [0.058853665860311766, 0.22273450704804362]
result 1
y1 disagree on 63  Proba:  [0.54239395 0.45760605]
y2 not aggreed on  63 Proba:  [0.10438846 0.89561154]
product probas: [0.056619668427710726, 0.4098372620967351]
result 1
y1 disagree on 79  Proba:  [0.88729757 0.11270243]
y2 not aggreed on  79 Proba:  [0.0070412 0.9929588]
product probas: [0.006247642243697113, 0.11190886501246873]
result 1
y1 disagree on 100  Proba:  [0.02192907 0.97807093]
y2 not aggreed on  100 Proba:  [0.73032309 0.26967691]
product probas: [0.01601530746681

Class  0  sample size:  102
Class  1  sample size:  260
(362, 102)
Class  0  sample size:  102
Class  1  sample size:  234
(336, 102)
Labeled:  362  :  336
Unlabeled:  1359  :  863
labeled no citation link:  26
Unlabeled no citation link size:  496
(336, 102)
(336, 102)
(863, 102)
(863, 102)
(67, 100)
(1132, 100)
(1132, 100)
P:  1  N:  1
Initial L size:  269
Initial U size:  863
Total Labeled number:  384  Still unlabeled number:  80
y1 disagree on 6  Proba:  [0.78773592 0.21226408]
y2 not aggreed on  6 Proba:  [0.28836127 0.71163873]
product probas: [0.22715253130695479, 0.15105534053744107]
result 0
y1 disagree on 31  Proba:  [0.48600845 0.51399155]
y2 not aggreed on  31 Proba:  [0.85202477 0.14797523]
product probas: [0.4140912404230956, 0.07605801738017763]
result 0
y1 disagree on 41  Proba:  [0.11526009 0.88473991]
y2 not aggreed on  41 Proba:  [0.9006688 0.0993312]
product probas: [0.10381116355382597, 0.08788227688982873]
result 0
F1:  0.9819163292847504
[1 1 0 1 0 1 0 0 1 1 1 0

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Class  0  sample size:  145
Class  1  sample size:  73
(218, 102)
Class  0  sample size:  97
Class  1  sample size:  65
(162, 102)
Labeled:  218  :  162
Unlabeled:  2360  :  1504
labeled no citation link:  56
Unlabeled no citation link size:  856
(162, 102)
(162, 102)
(1504, 102)
(1504, 102)
(32, 100)
(1634, 100)
(1634, 100)
P:  1  N:  1
Initial L size:  130
Initial U size:  1504
Total Labeled number:  248  Still unlabeled number:  82
y1 disagree on 11  Proba:  [0.51095476 0.48904524]
y2 not aggreed on  11 Proba:  [0.37100647 0.62899353]
product probas: [0.1895675217896571, 0.30760629102707554]
result 1
y1 disagree on 13  Proba:  [0.14744186 0.85255814]
y2 not aggreed on  13 Proba:  [0.77520457 0.22479543]
product probas: [0.11429760542462077, 0.19165117218495714]
result 1
y1 disagree on 16  Proba:  [0.80127754 0.19872246]
y2 not aggreed on  16 Proba:  [0.47994361 0.52005639]
product probas: [0.3845680389748583, 0.10334688409222854]
result 0
y1 disagree on 18  Proba:  [0.70822334 0.291

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


(202, 2)
total sample size before apply threshold:  202
Counter({'0000-0002-6045-6937': 149, '0000-0003-3442-2298': 39, '0000-0003-1766-4068': 6, '0000-0001-5913-2292': 5, '0000-0002-6155-5910': 2, '0000-0003-1467-3196': 1})
['0000-0002-6045-6937']
r_day_0
Class  0  sample size:  149
Class  1  sample size:  53
(202, 102)
Class  0  sample size:  137
Class  1  sample size:  49
(186, 102)
Labeled:  202  :  186
Unlabeled:  1809  :  1124
labeled no citation link:  16
Unlabeled no citation link size:  685
(186, 102)
(186, 102)
(1124, 102)
(1124, 102)
(37, 100)
(1273, 100)
(1273, 100)
P:  1  N:  1
Initial L size:  149
Initial U size:  1124
Total Labeled number:  262  Still unlabeled number:  82
y1 disagree on 23  Proba:  [0.98418419 0.01581581]
y2 not aggreed on  23 Proba:  [0.42897343 0.57102657]
product probas: [0.42218886374167136, 0.009031249826918646]
result 0
y1 disagree on 31  Proba:  [0.94793489 0.05206511]
y2 not aggreed on  31 Proba:  [0.49498839 0.50501161]
product probas: [0.46921

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Class  0  sample size:  124
Class  1  sample size:  143
(267, 102)
Class  0  sample size:  111
Class  1  sample size:  134
(245, 102)
Labeled:  267  :  245
Unlabeled:  7455  :  4894
labeled no citation link:  22
Unlabeled no citation link size:  2561
(245, 102)
(245, 102)
(4894, 102)
(4894, 102)
(49, 100)
(5090, 100)
(5090, 100)
P:  1  N:  1
Initial L size:  196
Initial U size:  4894
Total Labeled number:  315  Still unlabeled number:  79
y1 disagree on 27  Proba:  [0.5484898 0.4515102]
y2 not aggreed on  27 Proba:  [0.3654646 0.6345354]
product probas: [0.20045360679754723, 0.2864992032497311]
result 1
y1 disagree on 33  Proba:  [0.47217624 0.52782376]
y2 not aggreed on  33 Proba:  [0.57398776 0.42601224]
product probas: [0.27102338550854244, 0.22485937982512472]
result 0
y1 disagree on 47  Proba:  [0.62188607 0.37811393]
y2 not aggreed on  47 Proba:  [0.13557943 0.86442057]
product probas: [0.08431495809359553, 0.3268494616210447]
result 1
F1:  0.9583333333333333
[1 1 1 1 1 1 0 0 0 1

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


(164, 2)
total sample size before apply threshold:  164
Counter({'0000-0001-5207-4210': 40, '0000-0002-2036-1220': 32, '0000-0003-3454-3604': 13, '0000-0003-0437-2015': 12, '0000-0002-1142-3100': 10, '0000-0002-7398-4229': 7, '0000-0002-5118-7755': 6, '0000-0002-5083-1799': 6, '0000-0002-4862-7422': 6, '0000-0003-2642-4963': 4, '0000-0002-1375-4800': 4, '0000-0001-8231-5556': 3, '0000-0002-5095-6735': 3, '0000-0003-0219-0023': 3, '0000-0002-2686-745X': 2, '0000-0002-9462-7992': 2, '0000-0002-5871-7894': 1, '0000-0002-5948-2353': 1, '0000-0001-6136-3575': 1, '0000-0003-4097-6318': 1, '0000-0002-1689-2002': 1, '0000-0003-0081-0938': 1, '0000-0003-0073-0823': 1, '0000-0001-7501-1378': 1, '0000-0002-5583-4032': 1, '0000-0002-4617-0713': 1, '0000-0001-6710-0012': 1})
[]
x_yang  pass
For name:  s_bianchi
(45, 2)
total sample size before apply threshold:  45
Counter({'0000-0002-1365-9408': 19, '0000-0001-7290-8489': 10, '0000-0001-7673-3030': 6, '0000-0003-3731-5463': 5, '0000-0003-2292-4303'

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Class  0  sample size:  131
Class  1  sample size:  26
(157, 102)
Class  0  sample size:  128
Class  1  sample size:  23
(151, 102)
Labeled:  157  :  151
Unlabeled:  1187  :  710
labeled no citation link:  6
Unlabeled no citation link size:  477
(151, 102)
(151, 102)
(710, 102)
(710, 102)
(30, 100)
(831, 100)
(831, 100)
P:  1  N:  1
Initial L size:  121
Initial U size:  710
Total Labeled number:  239  Still unlabeled number:  77
F1:  1.0
[0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
[0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
For name:  m_moore
(112, 2)
total sample size before apply threshold:  112
Counter({'0000-0002-5127-4509': 45, '0000-0003-3074-6631': 38, '0000-0002-7853-5756': 18, '0000-0003-4768-5329': 7, '0000-0002-7914-0166': 4})
[]
m_moore  pass
For name:  c_johnson
(300, 2)
total sample size before apply threshold:  300
Counter({'0000-0002-6864-6604': 114, '0000-0002-9719-3771': 47, '0000-0001-9616-6205': 44, '0

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Class  0  sample size:  114
Class  1  sample size:  186
(300, 102)
Class  0  sample size:  110
Class  1  sample size:  174
(284, 102)
Labeled:  300  :  284
Unlabeled:  8028  :  5096
labeled no citation link:  16
Unlabeled no citation link size:  2932
(284, 102)
(284, 102)
(5096, 102)
(5096, 102)
(57, 100)
(5323, 100)
(5323, 100)
P:  1  N:  1
Initial L size:  227
Initial U size:  5096
Total Labeled number:  342  Still unlabeled number:  80
y1 disagree on 29  Proba:  [0.80917156 0.19082844]
y2 not aggreed on  29 Proba:  [0.37956846 0.62043154]
product probas: [0.3071360067181207, 0.11839598100015405]
result 0
y1 disagree on 32  Proba:  [0.09478134 0.90521866]
y2 not aggreed on  32 Proba:  [0.91660304 0.08339696]
product probas: [0.08687686883350579, 0.07549248251507681]
result 0
y1 disagree on 41  Proba:  [0.14277311 0.85722689]
y2 not aggreed on  41 Proba:  [0.53121872 0.46878128]
product probas: [0.07584374881598742, 0.40185191689203426]
result 1
y1 disagree on 47  Proba:  [0.00809971 

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


(344, 2)
total sample size before apply threshold:  344
Counter({'0000-0003-0912-941X': 70, '0000-0003-3454-2089': 61, '0000-0001-6545-583X': 40, '0000-0003-1033-2546': 19, '0000-0001-9115-3296': 18, '0000-0002-3656-2596': 15, '0000-0002-3482-7001': 14, '0000-0002-9391-0155': 13, '0000-0001-9984-5385': 11, '0000-0002-1028-6255': 9, '0000-0001-6820-9896': 9, '0000-0002-0900-8370': 7, '0000-0003-4404-6089': 6, '0000-0002-8730-524X': 5, '0000-0002-8114-1539': 5, '0000-0001-6521-0998': 5, '0000-0002-8524-0809': 4, '0000-0001-5482-9744': 3, '0000-0001-9505-4842': 3, '0000-0002-5154-3318': 2, '0000-0002-4038-5924': 2, '0000-0001-5545-7831': 2, '0000-0002-1878-8516': 2, '0000-0003-1914-4955': 2, '0000-0001-5361-4303': 2, '0000-0002-4897-8812': 2, '0000-0001-5935-3829': 2, '0000-0003-3562-6807': 1, '0000-0001-9826-2508': 1, '0000-0001-5985-5781': 1, '0000-0002-0193-9349': 1, '0000-0001-9754-1724': 1, '0000-0003-4805-7383': 1, '0000-0002-0022-6240': 1, '0000-0001-7509-3115': 1, '0000-0001-5412-

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


(101, 2)
total sample size before apply threshold:  101
Counter({'0000-0003-0450-5375': 51, '0000-0002-9615-9002': 27, '0000-0002-7263-6505': 9, '0000-0001-5168-9416': 8, '0000-0001-8479-9910': 4, '0000-0002-1262-5935': 2})
[]
m_young  pass
For name:  s_saraf
(54, 2)
total sample size before apply threshold:  54
Counter({'0000-0002-8384-9370': 38, '0000-0002-0569-1213': 13, '0000-0003-3905-0542': 2, '0000-0002-4180-0931': 1})
[]
s_saraf  pass
For name:  r_pinto
(85, 2)
total sample size before apply threshold:  85
Counter({'0000-0002-1667-7871': 36, '0000-0002-2775-860X': 21, '0000-0001-5600-2396': 8, '0000-0002-6429-2087': 8, '0000-0003-0058-8652': 6, '0000-0002-4068-7391': 2, '0000-0001-9402-5775': 2, '0000-0002-1251-5007': 1, '0000-0002-4512-5566': 1})
[]
r_pinto  pass
For name:  m_brito
(86, 2)
total sample size before apply threshold:  86
Counter({'0000-0002-8493-4649': 51, '0000-0001-6394-658X': 31, '0000-0002-8973-104X': 2, '0000-0001-9689-7040': 1, '0000-0002-1779-4535': 1})
[]

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Proba:  [0.21221882 0.78778118]
product probas: [0.132325009202427, 0.29657521539000464]
result 1
F1:  0.9603174603174603
[1 0 0 1 0 0 1 0 0 1 0 0 1 0 1 1 1 1 1 0 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 0 0
 1 0 1 0 1 1 1 1 1 0 1 1 1 1 1 0 1 1 1 1 0 0 1 1 1 1 0 1 1 1 0 1 1 1 1 1 1
 1 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 0 1 1 1 1 0 1 1 1 0 0 1 0 0 0 0 1
 1 0 1 1 0 1 0 0 0]
[1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0]
For name:  p_melo
(28, 2)
total sample size before apply threshold:  28
Counter({'0000-0003-0590-0684': 14, '0000-0002-4486-0200': 6, '0000-0002-3892-4140': 5, '0000-0002-4117-239X': 3})
[]
p_melo  pass
For name:  c_lemos
(52, 2)
total sample size before apply threshold: 

Unlabeled:  4278  :  2641
labeled no citation link:  77
Unlabeled no citation link size:  1637
(709, 102)
(709, 102)
(2641, 102)
(2641, 102)
(142, 100)
(3208, 100)
(3208, 100)
P:  1  N:  1
Initial L size:  567
Initial U size:  2641
Total Labeled number:  684  Still unlabeled number:  79
y1 disagree on 16  Proba:  [0.71204521 0.28795479]
y2 not aggreed on  16 Proba:  [0.47938361 0.52061639]
product probas: [0.34134280254894245, 0.14991398310256843]
result 0
y1 disagree on 24  Proba:  [0.50086319 0.49913681]
y2 not aggreed on  24 Proba:  [0.01292729 0.98707271]
product probas: [0.006474805647571776, 0.4926843219114295]
result 1
y1 disagree on 28  Proba:  [0.56474583 0.43525417]
y2 not aggreed on  28 Proba:  [0.10903197 0.89096803]
product probas: [0.06157534951328879, 0.38779754919372844]
result 1
y1 disagree on 34  Proba:  [0.65138927 0.34861073]
y2 not aggreed on  34 Proba:  [0.05161264 0.94838736]
product probas: [0.03361991668950216, 0.33061801262430507]
result 1
y1 disagree on 36  P

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)



y2 not aggreed on  136 Proba:  [0.56619195 0.43380805]
product probas: [0.06889955286944559, 0.3810182168215542]
result 1
y1 disagree on 138  Proba:  [0.68449257 0.31550743]
y2 not aggreed on  138 Proba:  [0.42619264 0.57380736]
product probas: [0.2917256969279583, 0.1810404852791677]
result 0
F1:  0.9268983268983269
[1 1 1 1 1 1 0 1 0 1 1 0 1 1 0 0 0 1 0 0 1 1 1 1 1 1 1 0 1 1 1 1 1 0 1 1 0
 1 1 1 0 0 1 1 1 1 1 1 0 0 1 1 1 1 0 1 1 1 1 1 1 1 0 0 0 1 1 1 1 1 1 1 1 0
 1 0 1 1 1 0 1 0 1 1 1 0 1 1 0 1 1 1 1 0 1 1 1 0 0 1 0 1 0 1 1 1 1 0 1 0 1
 1 1 1 0 1 1 1 1 1 1 1 0 1 1 1 1 0 1 1 1 0 1 1 1 1 1 1 0 1 1 1]
[1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)



For name:  m_guerra
(18, 2)
total sample size before apply threshold:  18
Counter({'0000-0001-6286-4048': 8, '0000-0003-1970-7439': 4, '0000-0002-3655-9004': 3, '0000-0003-3863-8520': 3})
[]
m_guerra  pass
For name:  h_suzuki
(82, 2)
total sample size before apply threshold:  82
Counter({'0000-0003-4682-5086': 39, '0000-0002-8150-140X': 15, '0000-0003-4600-2506': 14, '0000-0002-8555-5448': 9, '0000-0001-5371-6385': 5})
[]
h_suzuki  pass
For name:  m_cohen
(251, 2)
total sample size before apply threshold:  251
Counter({'0000-0003-2038-6070': 103, '0000-0002-1879-3593': 69, '0000-0002-6090-2394': 46, '0000-0001-6731-4053': 13, '0000-0003-3183-2558': 8, '0000-0002-1548-2773': 4, '0000-0001-6362-6148': 4, '0000-0002-5876-6565': 3, '0000-0002-1372-680X': 1})
['0000-0003-2038-6070']
m_cohen_0
Class  0  sample size:  103
Class  1  sample size:  148
(251, 102)
Class  0  sample size:  96
Class  1  sample size:  143
(239, 102)
Labeled:  251  :  239
Unlabeled:  9697  :  5941
labeled no citation

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Class  0  sample size:  115
Class  1  sample size:  54
(169, 102)
Class  0  sample size:  110
Class  1  sample size:  50
(160, 102)
Labeled:  169  :  160
Unlabeled:  960  :  597
labeled no citation link:  9
Unlabeled no citation link size:  363
(160, 102)
(160, 102)
(597, 102)
(597, 102)
(32, 100)
(725, 100)
(725, 100)
P:  1  N:  1
Initial L size:  128
Initial U size:  597
Total Labeled number:  246  Still unlabeled number:  80
y1 disagree on 16  Proba:  [0.85860734 0.14139266]
y2 not aggreed on  16 Proba:  [0.47938784 0.52061216]
product probas: [0.41160591632966187, 0.0736107371027387]
result 0
y1 disagree on 23  Proba:  [0.06609675 0.93390325]
y2 not aggreed on  23 Proba:  [0.51625377 0.48374623]
product probas: [0.03412269879242117, 0.4517721709708543]
result 1
F1:  0.951734539969834
[0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 1 1 0 1 0 0 0 0 0]
[0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0]
For name:  c_west
(181, 2)
total sampl

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Class  0  sample size:  155
Class  1  sample size:  26
(181, 102)
Class  0  sample size:  144
Class  1  sample size:  25
(169, 102)
Labeled:  181  :  169
Unlabeled:  2134  :  1333
labeled no citation link:  12
Unlabeled no citation link size:  801
(169, 102)
(169, 102)
(1333, 102)
(1333, 102)
(34, 100)
(1468, 100)
(1468, 100)
P:  1  N:  1
Initial L size:  135
Initial U size:  1333
Total Labeled number:  248  Still unlabeled number:  83
F1:  0.9457735247208932
[0 0 1 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0]
[0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0]
For name:  a_marino
(15, 2)
total sample size before apply threshold:  15
Counter({'0000-0002-1709-538X': 7, '0000-0002-0528-4925': 6, '0000-0003-0308-859X': 1, '0000-0001-8751-8811': 1})
[]
a_marino  pass
For name:  r_jiang
(102, 2)
total sample size before apply threshold:  102
Counter({'0000-0002-8280-6029': 54, '0000-0002-7533-3753': 28, '0000-0002-3816-463

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Class  0  sample size:  166
Class  1  sample size:  156
(322, 102)
Class  0  sample size:  146
Class  1  sample size:  145
(291, 102)
Labeled:  322  :  291
Unlabeled:  1440  :  1025
labeled no citation link:  31
Unlabeled no citation link size:  415
(291, 102)
(291, 102)
(1025, 102)
(1025, 102)
(58, 100)
(1258, 100)
(1258, 100)
P:  1  N:  1
Initial L size:  233
Initial U size:  1025
Total Labeled number:  348  Still unlabeled number:  80
y1 disagree on 16  Proba:  [0.34545199 0.65454801]
y2 not aggreed on  16 Proba:  [0.83583506 0.16416494]
product probas: [0.28874088156214095, 0.10745383726248643]
result 0
y1 disagree on 39  Proba:  [0.57037423 0.42962577]
y2 not aggreed on  39 Proba:  [0.19369286 0.80630714]
product probas: [0.11047741768052312, 0.3464103264747595]
result 1
F1:  1.0
[0 0 1 1 1 1 1 1 1 1 1 0 1 1 0 1 0 1 1 0 0 1 0 0 1 0 0 0 1 1 0 0 1 0 0 1 0
 0 0 1 0 0 1 1 1 0 1 1 0 1 0 0 1 0 1 0 1 1]
[0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Class  0  sample size:  331
Class  1  sample size:  276
(607, 102)
Class  0  sample size:  325
Class  1  sample size:  257
(582, 102)
Labeled:  607  :  582
Unlabeled:  4918  :  3348
labeled no citation link:  25
Unlabeled no citation link size:  1570
(582, 102)
(582, 102)
(3348, 102)
(3348, 102)
(116, 100)
(3814, 100)
(3814, 100)
P:  1  N:  1
Initial L size:  466
Initial U size:  3348
Total Labeled number:  579  Still unlabeled number:  82
y1 disagree on 2  Proba:  [0.83645803 0.16354197]
y2 not aggreed on  2 Proba:  [0.37509205 0.62490795]
product probas: [0.31374875514883804, 0.10219867823764432]
result 0
y1 disagree on 70  Proba:  [0.74589014 0.25410986]
y2 not aggreed on  70 Proba:  [0.40019252 0.59980748]
product probas: [0.29849965999894434, 0.15241699068242714]
result 0
y1 disagree on 76  Proba:  [0.2477894 0.7522106]
y2 not aggreed on  76 Proba:  [0.59146482 0.40853518]
product probas: [0.14655871604095846, 0.3073044882496816]
result 1
y1 disagree on 83  Proba:  [0.81602117 0.1

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


(43, 2)
total sample size before apply threshold:  43
Counter({'0000-0002-5045-0507': 34, '0000-0002-3971-3505': 2, '0000-0003-0866-7822': 2, '0000-0002-0024-2742': 2, '0000-0002-3274-5707': 2, '0000-0001-6205-9719': 1})
[]
c_davis  pass
For name:  e_hall
(115, 2)
total sample size before apply threshold:  115
Counter({'0000-0001-5999-5020': 49, '0000-0002-5306-082X': 34, '0000-0002-9477-8619': 24, '0000-0002-9206-4436': 4, '0000-0002-2815-6651': 2, '0000-0003-0244-7458': 2})
[]
e_hall  pass
For name:  g_volpe
(31, 2)
total sample size before apply threshold:  31
Counter({'0000-0001-9993-5348': 15, '0000-0001-5057-1846': 14, '0000-0002-3916-5393': 1, '0000-0003-0760-4627': 1})
[]
g_volpe  pass
For name:  r_lewis
(427, 2)
total sample size before apply threshold:  427
Counter({'0000-0003-3470-923X': 185, '0000-0002-2002-4339': 175, '0000-0003-4044-9104': 41, '0000-0002-4598-7553': 7, '0000-0003-1395-3276': 6, '0000-0003-1859-0021': 4, '0000-0001-9929-2629': 3, '0000-0001-6642-5771': 3, 

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Class  0  sample size:  185
Class  1  sample size:  242
(427, 102)
Class  0  sample size:  178
Class  1  sample size:  228
(406, 102)
Labeled:  427  :  406
Unlabeled:  4624  :  2726
labeled no citation link:  21
Unlabeled no citation link size:  1898
(406, 102)
(406, 102)
(2726, 102)
(2726, 102)
(81, 100)
(3051, 100)
(3051, 100)
P:  1  N:  1
Initial L size:  325
Initial U size:  2726
Total Labeled number:  444  Still unlabeled number:  79
y1 disagree on 48  Proba:  [0.149337 0.850663]
y2 not aggreed on  48 Proba:  [0.83086982 0.16913018]
product probas: [0.12407960295535668, 0.14387278759605635]
result 1
y1 disagree on 56  Proba:  [0.33218033 0.66781967]
y2 not aggreed on  56 Proba:  [0.58696406 0.41303594]
product probas: [0.1949779141986124, 0.2758335275088222]
result 1
F1:  0.975
[0 0 0 1 1 1 1 0 0 0 1 1 0 1 1 0 1 1 1 0 1 1 0 0 0 1 1 1 0 1 1 1 0 0 1 1 0
 0 1 0 0 1 1 0 0 1 1 1 1 0 0 1 0 1 1 0 1 1 1 1 0 0 1 0 1 0 1 0 1 0 0 0 0 1
 1 1 1 1 0 0 1]
[0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Class  0  sample size:  144
Class  1  sample size:  40
(184, 102)
Class  0  sample size:  127
Class  1  sample size:  35
(162, 102)
Labeled:  184  :  162
Unlabeled:  1757  :  1005
labeled no citation link:  22
Unlabeled no citation link size:  752
(162, 102)
(162, 102)
(1005, 102)
(1005, 102)
(32, 100)
(1135, 100)
(1135, 100)
P:  1  N:  1
Initial L size:  130
Initial U size:  1005
Total Labeled number:  249  Still unlabeled number:  78
y1 disagree on 2  Proba:  [0.88943364 0.11056636]
y2 not aggreed on  2 Proba:  [0.41558147 0.58441853]
product probas: [0.36963213930789546, 0.06461702972296987]
result 0
y1 disagree on 3  Proba:  [0.65644009 0.34355991]
y2 not aggreed on  3 Proba:  [0.39075525 0.60924475]
product probas: [0.25650741020326234, 0.2093120714488297]
result 0
y1 disagree on 4  Proba:  [0.14428246 0.85571754]
y2 not aggreed on  4 Proba:  [0.98371688 0.01628312]
product probas: [0.14193309064595186, 0.013933747710722318]
result 0
y1 disagree on 5  Proba:  [0.41026021 0.5897397

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Counter({'0000-0002-3323-8846': 30, '0000-0002-8127-4753': 7, '0000-0001-9437-6292': 4, '0000-0002-3591-4959': 1})
[]
s_qin  pass
For name:  a_fabbri
(64, 2)
total sample size before apply threshold:  64
Counter({'0000-0003-2603-9715': 53, '0000-0003-0097-6348': 5, '0000-0003-2340-9338': 4, '0000-0002-3520-2417': 2})
[]
a_fabbri  pass
For name:  l_robinson
(93, 2)
total sample size before apply threshold:  93
Counter({'0000-0003-0209-2503': 61, '0000-0002-9016-648X': 13, '0000-0001-6811-0140': 8, '0000-0003-1972-4204': 6, '0000-0001-9287-6082': 3, '0000-0001-9544-5923': 1, '0000-0002-2236-0651': 1})
[]
l_robinson  pass
For name:  r_gross
(71, 2)
total sample size before apply threshold:  71
Counter({'0000-0001-5884-3607': 38, '0000-0003-4524-7552': 23, '0000-0003-0311-3003': 10})
[]
r_gross  pass
For name:  j_ahn
(130, 2)
total sample size before apply threshold:  130
Counter({'0000-0002-8135-7719': 69, '0000-0002-0177-0192': 26, '0000-0001-9341-009X': 14, '0000-0002-0394-9217': 6, '00

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Class  0  sample size:  174
Class  1  sample size:  15
(189, 102)
Class  0  sample size:  157
Class  1  sample size:  5
(162, 102)
Labeled:  189  :  162
Unlabeled:  1370  :  907
labeled no citation link:  27
Unlabeled no citation link size:  463
(162, 102)
(162, 102)
(907, 102)
(907, 102)
(32, 100)
(1037, 100)
(1037, 100)
P:  1  N:  1
Initial L size:  130
Initial U size:  907
Total Labeled number:  236  Still unlabeled number:  90
y1 disagree on 20  Proba:  [0.13733773 0.86266227]
y2 not aggreed on  20 Proba:  [0.90078029 0.09921971]
product probas: [0.12371112294451343, 0.08559310408944451]
result 0
F1:  0.49206349206349204
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
For name:  s_chang
(592, 2)
total sample size before apply threshold:  592
Counter({'0000-0001-6505-4139': 322, '0000-0002-5620-0867': 61, '0000-0003-3751-1720': 37, '0000-0002-6164-0875': 28, '0000-0002-

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  'precision', 'predicted', average, warn_for)


Class  0  sample size:  322
Class  1  sample size:  270
(592, 102)
Class  0  sample size:  314
Class  1  sample size:  252
(566, 102)
Labeled:  592  :  566
Unlabeled:  12026  :  8870
labeled no citation link:  26
Unlabeled no citation link size:  3156
(566, 102)
(566, 102)
(8870, 102)
(8870, 102)
(113, 100)
(9323, 100)
(9323, 100)
P:  1  N:  1
Initial L size:  453
Initial U size:  8870
Total Labeled number:  568  Still unlabeled number:  80
y1 disagree on 7  Proba:  [9.99240475e-01 7.59525256e-04]
y2 not aggreed on  7 Proba:  [0.49718658 0.50281342]
product probas: [0.49680895089540356, 0.00038189949417410647]
result 0
y1 disagree on 9  Proba:  [0.07658161 0.92341839]
y2 not aggreed on  9 Proba:  [0.80873821 0.19126179]
product probas: [0.06193447804427488, 0.17661465535572782]
result 1
y1 disagree on 24  Proba:  [0.56623458 0.43376542]
y2 not aggreed on  24 Proba:  [0.10216764 0.89783236]
product probas: [0.05785084912287397, 0.3894486298546989]
result 1
y1 disagree on 25  Proba:  [0.

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)



i_wilson_0
Class  0  sample size:  102
Class  1  sample size:  118
(220, 102)
Class  0  sample size:  98
Class  1  sample size:  113
(211, 102)
Labeled:  220  :  211
Unlabeled:  1958  :  1457
labeled no citation link:  9
Unlabeled no citation link size:  501
(211, 102)
(211, 102)
(1457, 102)
(1457, 102)
(42, 100)
(1626, 100)
(1626, 100)
P:  1  N:  1
Initial L size:  169
Initial U size:  1457
Total Labeled number:  284  Still unlabeled number:  82
y1 disagree on 30  Proba:  [0.77687581 0.22312419]
y2 not aggreed on  30 Proba:  [0.02461106 0.97538894]
product probas: [0.019119737451884406, 0.21763286363933804]
result 1
y1 disagree on 34  Proba:  [0.87318177 0.12681823]
y2 not aggreed on  34 Proba:  [0.17399159 0.82600841]
product probas: [0.15192628453705231, 0.10475292332413016]
result 0
y1 disagree on 36  Proba:  [0.88003632 0.11996368]
y2 not aggreed on  36 Proba:  [0.49895165 0.50104835]
product probas: [0.4390955709908289, 0.06010760402837461]
result 0
F1:  0.9758481886141461
[1 1 

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


(217, 2)
total sample size before apply threshold:  217
Counter({'0000-0002-3261-6868': 98, '0000-0001-5372-7885': 63, '0000-0003-2084-2718': 35, '0000-0002-6740-2472': 12, '0000-0002-3652-7029': 7, '0000-0001-9951-4674': 2})
[]
m_vitale  pass
For name:  r_castro
(116, 2)
total sample size before apply threshold:  116
Counter({'0000-0002-0959-7363': 43, '0000-0002-7417-0091': 35, '0000-0002-1329-965X': 11, '0000-0002-1263-9034': 6, '0000-0002-4381-3605': 5, '0000-0002-0701-2528': 4, '0000-0002-8054-1469': 3, '0000-0001-6873-9854': 3, '0000-0002-9337-062X': 2, '0000-0002-4698-7993': 2, '0000-0002-3769-7660': 1, '0000-0002-7289-9081': 1})
[]
r_castro  pass
For name:  a_hassan
(16, 2)
total sample size before apply threshold:  16
Counter({'0000-0001-9509-9266': 7, '0000-0002-7719-0805': 4, '0000-0001-9346-3765': 2, '0000-0001-8842-1798': 1, '0000-0002-1853-7987': 1, '0000-0002-5574-8791': 1})
[]
a_hassan  pass
For name:  w_martin
(259, 2)
total sample size before apply threshold:  259
Cou

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


(12, 2)
total sample size before apply threshold:  12
Counter({'0000-0002-9089-5367': 4, '0000-0002-0158-703X': 2, '0000-0002-7269-094X': 2, '0000-0001-5589-8741': 1, '0000-0001-7789-2842': 1, '0000-0003-4271-395X': 1, '0000-0001-6832-9150': 1})
[]
s_hasan  pass
For name:  m_teixeira
(313, 2)
total sample size before apply threshold:  313
Counter({'0000-0003-4124-6237': 149, '0000-0002-5676-6174': 51, '0000-0002-4896-5982': 48, '0000-0001-9355-2143': 17, '0000-0002-9466-7951': 17, '0000-0002-6944-3008': 13, '0000-0001-7456-5192': 7, '0000-0002-3338-8588': 4, '0000-0003-3989-9474': 3, '0000-0002-2228-2673': 2, '0000-0003-1205-3233': 2})
['0000-0003-4124-6237']
m_teixeira_0
Class  0  sample size:  149
Class  1  sample size:  164
(313, 102)
Class  0  sample size:  145
Class  1  sample size:  157
(302, 102)
Labeled:  313  :  302
Unlabeled:  2069  :  1460
labeled no citation link:  11
Unlabeled no citation link size:  609
(302, 102)
(302, 102)
(1460, 102)
(1460, 102)
(60, 100)
(1702, 100)
(

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)



c_su_0
Class  0  sample size:  140
Class  1  sample size:  157
(297, 102)
Class  0  sample size:  128
Class  1  sample size:  155
(283, 102)
Labeled:  297  :  283
Unlabeled:  3631  :  2504
labeled no citation link:  14
Unlabeled no citation link size:  1127
(283, 102)
(283, 102)
(2504, 102)
(2504, 102)
(57, 100)
(2730, 100)
(2730, 100)
P:  1  N:  1
Initial L size:  226
Initial U size:  2504
Total Labeled number:  339  Still unlabeled number:  82
y1 disagree on 8  Proba:  [0.93575094 0.06424906]
y2 not aggreed on  8 Proba:  [0.45274905 0.54725095]
product probas: [0.4236603534990896, 0.03516035813421789]
result 0
y1 disagree on 30  Proba:  [0.27920002 0.72079998]
y2 not aggreed on  30 Proba:  [0.77779491 0.22220509]
product probas: [0.2171603526457733, 0.16016542656815996]
result 0
y1 disagree on 39  Proba:  [0.67544988 0.32455012]
y2 not aggreed on  39 Proba:  [0.3579578 0.6420422]
product probas: [0.24178255326251896, 0.20837487213186048]
result 0
y1 disagree on 52  Proba:  [0.564514

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


(148, 2)
total sample size before apply threshold:  148
Counter({'0000-0002-5910-7625': 69, '0000-0002-2268-9748': 48, '0000-0001-6195-3284': 10, '0000-0001-7562-6049': 8, '0000-0002-4688-3414': 6, '0000-0002-6851-8899': 3, '0000-0002-5278-9045': 2, '0000-0002-9161-167X': 1, '0000-0002-3141-3567': 1})
[]
p_thompson  pass
For name:  a_castro
(126, 2)
total sample size before apply threshold:  126
Counter({'0000-0001-7526-6717': 39, '0000-0002-8311-0840': 17, '0000-0003-0428-9174': 15, '0000-0002-9253-7926': 14, '0000-0001-6964-6879': 13, '0000-0003-4035-3444': 11, '0000-0003-0524-156X': 7, '0000-0003-3052-6225': 4, '0000-0003-0328-1381': 3, '0000-0002-8025-4945': 2, '0000-0003-3327-967X': 1})
[]
a_castro  pass
For name:  j_zhang
(965, 2)
total sample size before apply threshold:  965
Counter({'0000-0002-4319-4285': 188, '0000-0002-6601-9180': 58, '0000-0003-2493-5209': 58, '0000-0003-3373-9621': 57, '0000-0002-0889-7057': 40, '0000-0001-8041-1608': 28, '0000-0002-9831-6796': 24, '0000-0

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


(117, 2)
total sample size before apply threshold:  117
Counter({'0000-0001-6699-1765': 45, '0000-0002-2558-3367': 38, '0000-0002-9818-7429': 12, '0000-0002-9643-5580': 8, '0000-0002-9042-2391': 6, '0000-0001-6063-7327': 4, '0000-0002-8112-9687': 4})
[]
s_watson  pass
For name:  c_barros
(34, 2)
total sample size before apply threshold:  34
Counter({'0000-0003-4666-5000': 16, '0000-0003-3244-7467': 13, '0000-0003-2330-398X': 2, '0000-0002-5863-2874': 2, '0000-0003-2236-4553': 1})
[]
c_barros  pass
For name:  f_cardoso
(178, 2)
total sample size before apply threshold:  178
Counter({'0000-0002-6692-2249': 139, '0000-0002-0068-9974': 18, '0000-0002-4391-1336': 9, '0000-0002-7042-1287': 7, '0000-0003-2249-9407': 5})
['0000-0002-6692-2249']
f_cardoso_0
Class  0  sample size:  139
Class  1  sample size:  39
(178, 102)
Class  0  sample size:  107
Class  1  sample size:  38
(145, 102)
Labeled:  178  :  145
Unlabeled:  503  :  319
labeled no citation link:  33
Unlabeled no citation link size: 

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


(441, 2)
total sample size before apply threshold:  441
Counter({'0000-0002-2775-3315': 98, '0000-0002-8439-6035': 42, '0000-0003-0018-8712': 25, '0000-0003-2379-2226': 23, '0000-0001-9760-9514': 21, '0000-0002-7491-6711': 21, '0000-0003-2206-4593': 20, '0000-0002-4850-8204': 19, '0000-0003-4897-3277': 15, '0000-0003-3257-2508': 15, '0000-0001-5408-9029': 14, '0000-0002-1161-6586': 13, '0000-0002-9663-4790': 13, '0000-0001-7348-9861': 12, '0000-0002-7532-5315': 10, '0000-0002-9210-9681': 10, '0000-0003-4805-7930': 9, '0000-0001-6121-5804': 8, '0000-0002-3864-9521': 7, '0000-0001-9302-7840': 6, '0000-0003-2891-8086': 4, '0000-0003-3179-6892': 4, '0000-0001-7938-8420': 3, '0000-0001-6336-6462': 3, '0000-0003-3284-9407': 3, '0000-0001-5007-7469': 2, '0000-0002-3280-1991': 2, '0000-0002-2894-3364': 2, '0000-0003-3940-8663': 2, '0000-0002-5086-7345': 2, '0000-0001-8047-9821': 2, '0000-0002-6639-8002': 1, '0000-0002-0723-5035': 1, '0000-0002-8328-4082': 1, '0000-0002-4663-3263': 1, '0000-000

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Class  0  sample size:  487
Class  1  sample size:  426
(913, 102)
Class  0  sample size:  456
Class  1  sample size:  404
(860, 102)
Labeled:  913  :  860
Unlabeled:  4208  :  2402
labeled no citation link:  53
Unlabeled no citation link size:  1806
(860, 102)
(860, 102)
(2402, 102)
(2402, 102)
(172, 100)
(3090, 100)
(3090, 100)
P:  1  N:  1
Initial L size:  688
Initial U size:  2402
Total Labeled number:  807  Still unlabeled number:  86
y1 disagree on 18  Proba:  [0.34515084 0.65484916]
y2 not aggreed on  18 Proba:  [0.99792455 0.00207545]
product probas: [0.3444345013755744, 0.0013591042619322997]
result 0
y1 disagree on 35  Proba:  [0.49098994 0.50901006]
y2 not aggreed on  35 Proba:  [0.53430544 0.46569456]
product probas: [0.2623385979992133, 0.23704321397574651]
result 0
y1 disagree on 43  Proba:  [0.3236334 0.6763666]
y2 not aggreed on  43 Proba:  [0.59190336 0.40809664]
product probas: [0.19155969782722154, 0.27602293573418435]
result 1
y1 disagree on 60  Proba:  [0.78781856 

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[0.5481553 0.4518447]
product probas: [0.03651679310195732, 0.42174388927097556]
result 1
y1 disagree on 145  Proba:  [0.47192099 0.52807901]
y2 not aggreed on  145 Proba:  [0.75055911 0.24944089]
product probas: [0.35420459756657546, 0.13172449762595279]
result 0
y1 disagree on 153  Proba:  [0.57093429 0.42906571]
y2 not aggreed on  153 Proba:  [0.05380634 0.94619366]
product probas: [0.030719886082244668, 0.4059792527482888]
result 1
y1 disagree on 158  Proba:  [0.45652889 0.54347111]
y2 not aggreed on  158 Proba:  [0.78859895 0.21140105]
product probas: [0.36001820115882216, 0.11489036419445932]
result 0
y1 disagree on 160  Proba:  [0.33588027 0.66411973]
y2 not aggreed on  160 Proba:  [0.99770602 0.00229398]
product probas: [0.3351097726363614, 0.0015234746913981019]
result 0
y1 disagree on 167  Proba:  [0.83511177 0.16488823]
y2 not aggreed on  167 Proba:  [0.01687787 0.98312213]
product probas: [0.014094911190802473, 0.1621052626771975]
result 1
F1:  0.9764866712235133
[1 1 1 0 0

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


(118, 2)
total sample size before apply threshold:  118
Counter({'0000-0003-1801-9386': 79, '0000-0002-7896-7655': 16, '0000-0002-6623-3806': 7, '0000-0002-4203-0457': 6, '0000-0001-8038-5876': 3, '0000-0002-7183-3400': 3, '0000-0003-4233-0174': 2, '0000-0001-5171-2890': 2})
[]
w_choi  pass
For name:  d_tavares
(13, 2)
total sample size before apply threshold:  13
Counter({'0000-0002-3196-7922': 4, '0000-0002-6811-9572': 3, '0000-0002-6807-8504': 3, '0000-0002-3358-9443': 2, '0000-0003-4646-5914': 1})
[]
d_tavares  pass
For name:  l_alves
(51, 2)
total sample size before apply threshold:  51
Counter({'0000-0001-6245-775X': 14, '0000-0001-5855-2754': 11, '0000-0001-5369-5019': 9, '0000-0002-1972-2658': 5, '0000-0002-7938-9850': 4, '0000-0002-7531-3648': 2, '0000-0002-8944-1851': 2, '0000-0001-8069-6527': 1, '0000-0003-4650-3140': 1, '0000-0002-8400-6148': 1, '0000-0001-6659-6431': 1})
[]
l_alves  pass
For name:  s_chan
(176, 2)
total sample size before apply threshold:  176
Counter({'00

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


(514, 2)
total sample size before apply threshold:  514
Counter({'0000-0002-4328-8716': 71, '0000-0002-3476-3802': 53, '0000-0001-6312-3719': 48, '0000-0003-4082-420X': 42, '0000-0002-0352-3144': 41, '0000-0001-9138-3075': 32, '0000-0001-8558-062X': 25, '0000-0003-0475-8399': 18, '0000-0001-5990-1346': 17, '0000-0002-2792-6247': 17, '0000-0002-6238-2871': 17, '0000-0003-0561-2340': 15, '0000-0001-8144-8496': 13, '0000-0001-8109-4974': 13, '0000-0002-9147-3879': 12, '0000-0002-8613-3597': 12, '0000-0002-0521-4230': 11, '0000-0003-4927-4814': 10, '0000-0002-5542-7576': 6, '0000-0003-0760-9209': 5, '0000-0002-0487-0420': 5, '0000-0002-3527-6600': 5, '0000-0003-1163-321X': 3, '0000-0003-3368-3082': 3, '0000-0002-5527-6819': 3, '0000-0002-6815-3316': 3, '0000-0003-3456-0455': 2, '0000-0001-5463-6926': 2, '0000-0001-6067-7505': 2, '0000-0002-9579-4426': 2, '0000-0002-2196-6854': 2, '0000-0002-5682-8531': 1, '0000-0002-1380-9533': 1, '0000-0001-7768-4066': 1, '0000-0001-5615-2693': 1})
[]
c_y

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Class  0  sample size:  120
Class  1  sample size:  243
(363, 102)
Class  0  sample size:  115
Class  1  sample size:  173
(288, 102)
Labeled:  363  :  288
Unlabeled:  2278  :  1346
labeled no citation link:  75
Unlabeled no citation link size:  932
(288, 102)
(288, 102)
(1346, 102)
(1346, 102)
(58, 100)
(1576, 100)
(1576, 100)
P:  1  N:  1
Initial L size:  230
Initial U size:  1346
Total Labeled number:  343  Still unlabeled number:  82
y1 disagree on 7  Proba:  [0.90711452 0.09288548]
y2 not aggreed on  7 Proba:  [0.20330969 0.79669031]
product probas: [0.18442516954778643, 0.0740009617966483]
result 0
y1 disagree on 22  Proba:  [0.71681217 0.28318783]
y2 not aggreed on  22 Proba:  [0.45403319 0.54596681]
product probas: [0.3254565147089952, 0.15461115772436623]
result 0
F1:  1.0
[1 0 1 1 0 0 0 0 0 1 1 0 1 0 0 1 1 0 0 1 0 1 0 0 0 1 0 1 1 0 0 0 1 0 1 0 1
 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 0 1 0 1 1 1]
[1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Class  0  sample size:  149
Class  1  sample size:  54
(203, 102)
Class  0  sample size:  143
Class  1  sample size:  51
(194, 102)
Labeled:  203  :  194
Unlabeled:  174  :  122
labeled no citation link:  9
Unlabeled no citation link size:  52
(194, 102)
(194, 102)
(122, 102)
(122, 102)
(39, 100)
(277, 100)
(277, 100)
P:  1  N:  1
Initial L size:  155
Initial U size:  122
Total Labeled number:  273  Still unlabeled number:  8
y1 disagree on 10  Proba:  [0.38768129 0.61231871]
y2 not aggreed on  10 Proba:  [0.78929688 0.21070312]
product probas: [0.30599563098941807, 0.12901746198215872]
result 0
y1 disagree on 12  Proba:  [0.56453992 0.43546008]
y2 not aggreed on  12 Proba:  [0.05639205 0.94360795]
product probas: [0.0318355635748728, 0.41090359689787137]
result 1
y1 disagree on 31  Proba:  [0.69526856 0.30473144]
y2 not aggreed on  31 Proba:  [0.07504799 0.92495201]
product probas: [0.05217851050385661, 0.2818619609189054]
result 1
y1 disagree on 33  Proba:  [0.19785463 0.80214537]
y2

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


(97, 2)
total sample size before apply threshold:  97
Counter({'0000-0002-9495-6892': 82, '0000-0002-1694-3295': 11, '0000-0002-7906-3324': 2, '0000-0002-3476-395X': 1, '0000-0001-6389-1048': 1})
[]
r_wood  pass
For name:  y_ding
(106, 2)
total sample size before apply threshold:  106
Counter({'0000-0003-1352-1000': 21, '0000-0002-6823-4722': 21, '0000-0001-7772-6449': 19, '0000-0002-8845-4618': 15, '0000-0001-7461-0213': 8, '0000-0001-8161-2743': 7, '0000-0003-4761-5486': 4, '0000-0003-0465-7870': 4, '0000-0003-1176-6397': 3, '0000-0001-8312-8672': 2, '0000-0002-9713-5694': 1, '0000-0002-0010-8279': 1})
[]
y_ding  pass
For name:  j_rasmussen
(33, 2)
total sample size before apply threshold:  33
Counter({'0000-0002-3543-690X': 15, '0000-0001-6997-3773': 9, '0000-0003-2898-1771': 6, '0000-0002-8389-6935': 1, '0000-0003-3426-551X': 1, '0000-0003-3257-5653': 1})
[]
j_rasmussen  pass
For name:  n_lee
(108, 2)
total sample size before apply threshold:  108
Counter({'0000-0002-5011-7499': 74

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


(135, 2)
total sample size before apply threshold:  135
Counter({'0000-0002-2975-7977': 52, '0000-0002-5778-4008': 16, '0000-0002-1947-4420': 15, '0000-0002-4388-6548': 13, '0000-0003-0561-5058': 10, '0000-0002-1156-9046': 8, '0000-0001-9892-4292': 4, '0000-0002-4577-2886': 4, '0000-0003-3187-2023': 3, '0000-0002-5840-007X': 3, '0000-0003-4173-8565': 3, '0000-0002-7827-0719': 1, '0000-0002-0962-902X': 1, '0000-0003-0951-0624': 1, '0000-0001-6460-408X': 1})
[]
h_jiang  pass
For name:  a_lewis
(98, 2)
total sample size before apply threshold:  98
Counter({'0000-0002-4075-3651': 41, '0000-0002-2519-7976': 37, '0000-0002-7986-0956': 8, '0000-0002-0756-7320': 6, '0000-0002-4195-1035': 4, '0000-0001-5373-7231': 1, '0000-0003-4737-2525': 1})
[]
a_lewis  pass
For name:  c_meyer
(136, 2)
total sample size before apply threshold:  136
Counter({'0000-0001-7599-3973': 34, '0000-0002-9877-1393': 29, '0000-0003-1334-2512': 27, '0000-0002-7214-9598': 18, '0000-0002-2268-3055': 14, '0000-0003-0851-276

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Class  0  sample size:  125
Class  1  sample size:  73
(198, 102)
Class  0  sample size:  120
Class  1  sample size:  69
(189, 102)
Labeled:  198  :  189
Unlabeled:  2759  :  1523
labeled no citation link:  9
Unlabeled no citation link size:  1236
(189, 102)
(189, 102)
(1523, 102)
(1523, 102)
(38, 100)
(1674, 100)
(1674, 100)
P:  1  N:  1
Initial L size:  151
Initial U size:  1523
Total Labeled number:  270  Still unlabeled number:  76
y1 disagree on 0  Proba:  [0.9656465 0.0343535]
y2 not aggreed on  0 Proba:  [0.45947548 0.54052452]
product probas: [0.4436908934954977, 0.01856890727639427]
result 0
y1 disagree on 25  Proba:  [0.03200408 0.96799592]
y2 not aggreed on  25 Proba:  [0.74581794 0.25418206]
product probas: [0.02386921554971272, 0.24604719913627343]
result 1
F1:  0.9732205778717407
[0 1 0 0 0 0 1 0 1 0 1 0 0 0 0 0 1 0 0 1 0 0 1 1 0 1 1 0 1 1 0 1 0 0 1 1 1
 0]
[0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0]
F

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Class  0  sample size:  141
Class  1  sample size:  173
(314, 102)
Class  0  sample size:  140
Class  1  sample size:  152
(292, 102)
Labeled:  314  :  292
Unlabeled:  3847  :  2272
labeled no citation link:  22
Unlabeled no citation link size:  1575
(292, 102)
(292, 102)
(2272, 102)
(2272, 102)
(58, 100)
(2506, 100)
(2506, 100)
P:  1  N:  1
Initial L size:  234
Initial U size:  2272
Total Labeled number:  349  Still unlabeled number:  82
y1 disagree on 27  Proba:  [0.94264522 0.05735478]
y2 not aggreed on  27 Proba:  [0.15425972 0.84574028]
product probas: [0.14541219057905447, 0.04850724580822798]
result 0
y1 disagree on 51  Proba:  [0.99029388 0.00970612]
y2 not aggreed on  51 Proba:  [0.48266838 0.51733162]
product probas: [0.4779835423122465, 0.005021285115258809]
result 0
F1:  0.9827534939042522
[1 0 1 0 1 1 1 0 1 1 0 1 0 1 1 1 0 0 1 0 0 1 0 1 0 1 1 0 1 0 0 1 0 0 1 0 1
 0 0 0 1 0 1 1 0 1 0 1 0 1 1 0 1 0 0 0 0 0]
[1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


(867, 2)
total sample size before apply threshold:  867
Counter({'0000-0002-5555-9034': 244, '0000-0002-5981-2762': 71, '0000-0002-4044-2888': 67, '0000-0001-6508-8355': 51, '0000-0001-8791-7505': 30, '0000-0002-4115-3287': 30, '0000-0002-2497-020X': 26, '0000-0002-6200-1178': 26, '0000-0002-7844-8417': 22, '0000-0003-1359-5130': 21, '0000-0002-4793-0550': 20, '0000-0001-8718-2780': 18, '0000-0002-4446-2480': 16, '0000-0003-1568-1999': 14, '0000-0001-9814-0383': 13, '0000-0002-7646-1132': 13, '0000-0003-0724-0982': 12, '0000-0002-2510-2236': 11, '0000-0002-3828-0971': 10, '0000-0002-4675-5367': 10, '0000-0002-6646-0929': 8, '0000-0002-4350-3375': 7, '0000-0002-7939-5150': 7, '0000-0001-7038-5119': 6, '0000-0002-9121-7883': 5, '0000-0002-4828-4183': 5, '0000-0001-8184-3197': 5, '0000-0001-6449-1505': 5, '0000-0003-0606-434X': 5, '0000-0003-0220-9003': 5, '0000-0002-0046-2016': 5, '0000-0001-7111-8485': 4, '0000-0002-4230-5676': 4, '0000-0002-6007-5149': 4, '0000-0003-4514-0149': 4, '000

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


(46, 2)
total sample size before apply threshold:  46
Counter({'0000-0003-1176-7592': 34, '0000-0003-2817-7353': 6, '0000-0001-5267-1277': 4, '0000-0002-3205-8819': 2})
[]
j_burton  pass
For name:  x_feng
(102, 2)
total sample size before apply threshold:  102
Counter({'0000-0001-6894-7979': 37, '0000-0002-3212-3051': 25, '0000-0002-9057-1549': 17, '0000-0002-6920-1519': 9, '0000-0002-9523-6096': 8, '0000-0002-0443-0628': 2, '0000-0002-9473-2848': 2, '0000-0003-1945-1605': 1, '0000-0001-8226-3389': 1})
[]
x_feng  pass
For name:  w_hussein
(33, 2)
total sample size before apply threshold:  33
Counter({'0000-0001-5392-1880': 18, '0000-0002-7416-4521': 13, '0000-0001-5928-6240': 1, '0000-0002-7589-7479': 1})
[]
w_hussein  pass
For name:  c_santos
(293, 2)
total sample size before apply threshold:  293
Counter({'0000-0002-0405-3500': 68, '0000-0003-4129-6381': 41, '0000-0001-6074-7825': 38, '0000-0002-7014-8014': 37, '0000-0002-7109-1101': 25, '0000-0002-4575-1807': 22, '0000-0003-4681-094

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Counter({'0000-0001-8702-9946': 105, '0000-0003-4352-0914': 55, '0000-0002-5887-4937': 36, '0000-0003-1919-8646': 14, '0000-0001-7241-7110': 12, '0000-0002-1231-7932': 11, '0000-0002-0594-4516': 7, '0000-0002-5330-5700': 6, '0000-0003-1889-2513': 4, '0000-0002-4770-2849': 1, '0000-0001-9731-8071': 1, '0000-0001-9371-3402': 1})
['0000-0001-8702-9946']
l_martin_0
Class  0  sample size:  105
Class  1  sample size:  148
(253, 102)
Class  0  sample size:  102
Class  1  sample size:  138
(240, 102)
Labeled:  253  :  240
Unlabeled:  4572  :  2843
labeled no citation link:  13
Unlabeled no citation link size:  1729
(240, 102)
(240, 102)
(2843, 102)
(2843, 102)
(48, 100)
(3035, 100)
(3035, 100)
P:  1  N:  1
Initial L size:  192
Initial U size:  2843
Total Labeled number:  309  Still unlabeled number:  79
y1 disagree on 2  Proba:  [0.68874696 0.31125304]
y2 not aggreed on  2 Proba:  [0.27444012 0.72555988]
product probas: [0.18901980024773607, 0.22583271939105082]
result 1
y1 disagree on 3  Prob

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


(29, 2)
total sample size before apply threshold:  29
Counter({'0000-0001-9471-4722': 21, '0000-0002-3600-0497': 6, '0000-0003-0544-6928': 1, '0000-0002-4392-3863': 1})
[]
s_chow  pass
For name:  m_simon
(66, 2)
total sample size before apply threshold:  66
Counter({'0000-0003-3655-6329': 44, '0000-0003-2349-7219': 12, '0000-0003-0611-495X': 5, '0000-0002-1509-2847': 3, '0000-0003-3080-3675': 1, '0000-0002-0065-6486': 1})
[]
m_simon  pass
For name:  s_kar
(36, 2)
total sample size before apply threshold:  36
Counter({'0000-0002-9411-2091': 20, '0000-0002-3788-372X': 7, '0000-0002-5032-4770': 4, '0000-0003-3702-6207': 3, '0000-0002-0498-812X': 2})
[]
s_kar  pass
For name:  d_vlachos
(101, 2)
total sample size before apply threshold:  101
Counter({'0000-0002-6795-8403': 80, '0000-0003-3740-2575': 19, '0000-0002-0430-2386': 1, '0000-0001-7225-2862': 1})
[]
d_vlachos  pass
For name:  e_law
(12, 2)
total sample size before apply threshold:  12
Counter({'0000-0002-4021-2150': 5, '0000-0001-5

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


(611, 2)
total sample size before apply threshold:  611
Counter({'0000-0002-6469-8415': 108, '0000-0003-1301-3030': 94, '0000-0002-8835-5302': 43, '0000-0001-6795-8879': 36, '0000-0003-1751-4975': 33, '0000-0002-8572-4977': 31, '0000-0002-9394-9148': 26, '0000-0002-9879-0164': 25, '0000-0001-7892-7648': 21, '0000-0002-1726-0576': 20, '0000-0001-5684-6388': 19, '0000-0002-6888-7993': 17, '0000-0001-9170-2566': 17, '0000-0003-1809-2938': 14, '0000-0001-9282-2041': 14, '0000-0003-3408-2019': 12, '0000-0002-8244-3002': 12, '0000-0001-9947-2822': 10, '0000-0002-4409-3160': 10, '0000-0002-0281-5858': 9, '0000-0002-2068-7618': 5, '0000-0001-6129-627X': 5, '0000-0002-8200-9898': 5, '0000-0001-8727-7528': 5, '0000-0001-7727-9669': 4, '0000-0002-8002-5800': 4, '0000-0001-7222-4917': 3, '0000-0002-5990-8529': 2, '0000-0003-0338-4268': 2, '0000-0002-6880-8861': 1, '0000-0001-7207-4082': 1, '0000-0001-7522-1463': 1, '0000-0003-3742-9989': 1, '0000-0002-3888-3211': 1})
['0000-0002-6469-8415']
s_yang

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


(38, 2)
total sample size before apply threshold:  38
Counter({'0000-0002-6192-259X': 20, '0000-0003-2048-4500': 15, '0000-0002-1497-1284': 2, '0000-0002-0658-8752': 1})
[]
d_huang  pass
For name:  h_kuo
(144, 2)
total sample size before apply threshold:  144
Counter({'0000-0002-3295-2984': 98, '0000-0003-1336-1203': 26, '0000-0001-6752-2231': 16, '0000-0002-0349-6983': 2, '0000-0001-9102-5104': 1, '0000-0002-0573-2636': 1})
[]
h_kuo  pass
For name:  a_santoro
(189, 2)
total sample size before apply threshold:  189
Counter({'0000-0002-0798-6816': 83, '0000-0003-1709-9492': 58, '0000-0002-5086-1453': 21, '0000-0003-2503-8219': 10, '0000-0002-1014-197X': 9, '0000-0002-6193-2050': 8})
[]
a_santoro  pass
For name:  q_lu
(35, 2)
total sample size before apply threshold:  35
Counter({'0000-0002-2804-0827': 22, '0000-0002-4261-5121': 5, '0000-0002-4514-0969': 4, '0000-0002-7952-2332': 3, '0000-0001-6234-4384': 1})
[]
q_lu  pass
For name:  s_kumar
(419, 2)
total sample size before apply thresh

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


(681, 2)
total sample size before apply threshold:  681
Counter({'0000-0003-3622-9707': 63, '0000-0001-7016-8990': 58, '0000-0003-2336-4731': 56, '0000-0001-8816-4832': 51, '0000-0002-0703-0742': 46, '0000-0002-6109-5707': 29, '0000-0002-2521-924X': 29, '0000-0001-7433-2081': 27, '0000-0002-5723-177X': 25, '0000-0001-8063-7906': 23, '0000-0001-5546-3852': 19, '0000-0001-9918-1638': 18, '0000-0001-7888-9725': 17, '0000-0002-5323-6733': 16, '0000-0003-3410-445X': 16, '0000-0003-1882-3892': 16, '0000-0002-3151-7956': 16, '0000-0001-7343-3884': 14, '0000-0002-6202-0993': 13, '0000-0001-7364-8412': 12, '0000-0002-8582-912X': 11, '0000-0002-4693-5667': 11, '0000-0001-8918-5627': 10, '0000-0002-6439-8754': 10, '0000-0002-6650-6245': 7, '0000-0003-1028-2454': 7, '0000-0001-7954-0736': 5, '0000-0002-9780-9062': 5, '0000-0002-6257-0389': 5, '0000-0002-2106-202X': 4, '0000-0002-2145-5034': 4, '0000-0002-5586-4776': 3, '0000-0002-9210-2754': 3, '0000-0003-2544-7215': 3, '0000-0001-6049-2615': 3, '

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


(193, 2)
total sample size before apply threshold:  193
Counter({'0000-0002-3756-8789': 48, '0000-0002-4351-2503': 30, '0000-0002-8654-4927': 23, '0000-0002-5964-3233': 23, '0000-0002-1698-6666': 11, '0000-0002-3681-2874': 9, '0000-0002-0192-4323': 8, '0000-0002-5392-558X': 8, '0000-0002-3294-0879': 6, '0000-0002-5421-9249': 5, '0000-0003-3894-873X': 5, '0000-0003-1057-9194': 5, '0000-0001-6573-6359': 2, '0000-0002-9937-1383': 2, '0000-0001-7965-2674': 2, '0000-0001-6497-1680': 2, '0000-0003-4294-9233': 1, '0000-0001-5709-6566': 1, '0000-0002-0639-0973': 1, '0000-0001-5396-7280': 1})
[]
l_yang  pass
For name:  h_hassan
(22, 2)
total sample size before apply threshold:  22
Counter({'0000-0002-6035-0040': 9, '0000-0002-2815-7996': 5, '0000-0002-9567-0896': 3, '0000-0001-5167-8063': 2, '0000-0002-6166-1342': 1, '0000-0001-7274-9414': 1, '0000-0003-0359-1208': 1})
[]
h_hassan  pass
For name:  f_chen
(40, 2)
total sample size before apply threshold:  40
Counter({'0000-0002-2191-0930': 8, '0

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Class  0  sample size:  138
Class  1  sample size:  304
(442, 102)
Class  0  sample size:  136
Class  1  sample size:  280
(416, 102)
Labeled:  442  :  416
Unlabeled:  4283  :  2891
labeled no citation link:  26
Unlabeled no citation link size:  1392
(416, 102)
(416, 102)
(2891, 102)
(2891, 102)
(83, 100)
(3224, 100)
(3224, 100)
P:  1  N:  1
Initial L size:  333
Initial U size:  2891
Total Labeled number:  448  Still unlabeled number:  80
y1 disagree on 31  Proba:  [0.02090908 0.97909092]
y2 not aggreed on  31 Proba:  [0.62843366 0.37156634]
product probas: [0.013139968467265188, 0.3637972327075039]
result 1
y1 disagree on 34  Proba:  [0.14874761 0.85125239]
y2 not aggreed on  34 Proba:  [0.65395366 0.34604634]
product probas: [0.09727404153391114, 0.29457277495383133]
result 1
y1 disagree on 39  Proba:  [0.84053778 0.15946222]
y2 not aggreed on  39 Proba:  [0.08353898 0.91646102]
product probas: [0.07021766726131745, 0.14614091109939842]
result 1
y1 disagree on 40  Proba:  [0.91067332

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Class  0  sample size:  166
Class  1  sample size:  9
(175, 102)
Class  0  sample size:  122
Class  1  sample size:  8
(130, 102)
Labeled:  175  :  130
Unlabeled:  2242  :  1430
labeled no citation link:  45
Unlabeled no citation link size:  812
(130, 102)
(130, 102)
(1430, 102)
(1430, 102)
(26, 100)
(1534, 100)
(1534, 100)
P:  1  N:  1
Initial L size:  104
Initial U size:  1430
Total Labeled number:  222  Still unlabeled number:  79
y1 disagree on 4  Proba:  [0.34824432 0.65175568]
y2 not aggreed on  4 Proba:  [0.77937246 0.22062754]
product probas: [0.27141202997140274, 0.14379525166292856]
result 0
y1 disagree on 15  Proba:  [0.35804718 0.64195282]
y2 not aggreed on  15 Proba:  [0.69911095 0.30088905]
product probas: [0.2503147066642698, 0.19315657343158488]
result 0
F1:  0.7291666666666667
[1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
[1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
For name:  c_ryan
(159, 2)
total sample size before apply thre

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Class  0  sample size:  140
Class  1  sample size:  135
(275, 102)
Class  0  sample size:  135
Class  1  sample size:  118
(253, 102)
Labeled:  275  :  253
Unlabeled:  1912  :  1214
labeled no citation link:  22
Unlabeled no citation link size:  698
(253, 102)
(253, 102)
(1214, 102)
(1214, 102)
(51, 100)
(1416, 100)
(1416, 100)
P:  1  N:  1
Initial L size:  202
Initial U size:  1214
Total Labeled number:  320  Still unlabeled number:  77
y1 disagree on 22  Proba:  [0.33733494 0.66266506]
y2 not aggreed on  22 Proba:  [0.81880033 0.18119967]
product probas: [0.27620995733525733, 0.12007468890201901]
result 0
y1 disagree on 29  Proba:  [0.84069163 0.15930837]
y2 not aggreed on  29 Proba:  [0.36626192 0.63373808]
product probas: [0.3079133288630274, 0.10095977890209994]
result 0
y1 disagree on 37  Proba:  [0.32066543 0.67933457]
y2 not aggreed on  37 Proba:  [0.55996988 0.44003012]
product probas: [0.17956298094866086, 0.2989276725389506]
result 1
F1:  1.0
[0 1 0 1 1 0 0 0 0 1 0 1 1 0 1 1

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Class  0  sample size:  110
Class  1  sample size:  257
(367, 102)
Class  0  sample size:  103
Class  1  sample size:  244
(347, 102)
Labeled:  367  :  347
Unlabeled:  4188  :  2651
labeled no citation link:  20
Unlabeled no citation link size:  1537
(347, 102)
(347, 102)
(2651, 102)
(2651, 102)
(69, 100)
(2929, 100)
(2929, 100)
P:  1  N:  1
Initial L size:  278
Initial U size:  2651
Total Labeled number:  391  Still unlabeled number:  82
y1 disagree on 22  Proba:  [0.66942197 0.33057803]
y2 not aggreed on  22 Proba:  [0.28698517 0.71301483]
product probas: [0.19211417645386158, 0.23570703871291615]
result 1
y1 disagree on 28  Proba:  [0.95353407 0.04646593]
y2 not aggreed on  28 Proba:  [0.40500943 0.59499057]
product probas: [0.38619029495046686, 0.02764678789938279]
result 0
y1 disagree on 32  Proba:  [0.7863883 0.2136117]
y2 not aggreed on  32 Proba:  [0.24038896 0.75961104]
product probas: [0.18903906290486042, 0.1622618068568945]
result 0
y1 disagree on 41  Proba:  [0.89323555 0.

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Class  0  sample size:  218
Class  1  sample size:  31
(249, 102)
Class  0  sample size:  186
Class  1  sample size:  28
(214, 102)
Labeled:  249  :  214
Unlabeled:  4869  :  3674
labeled no citation link:  35
Unlabeled no citation link size:  1195
(214, 102)
(214, 102)
(3674, 102)
(3674, 102)
(43, 100)
(3845, 100)
(3845, 100)
P:  1  N:  1
Initial L size:  171
Initial U size:  3674
Total Labeled number:  289  Still unlabeled number:  78
y1 disagree on 22  Proba:  [0.59893695 0.40106305]
y2 not aggreed on  22 Proba:  [0.38106175 0.61893825]
product probas: [0.2282319628362945, 0.2482332627142188]
result 1
y1 disagree on 24  Proba:  [0.50532553 0.49467447]
y2 not aggreed on  24 Proba:  [0.24377362 0.75622638]
product probas: [0.12318503330725716, 0.37408588448722563]
result 1
y1 disagree on 32  Proba:  [0.32018646 0.67981354]
y2 not aggreed on  32 Proba:  [0.77636348 0.22363652]
product probas: [0.24858107920862063, 0.1520311300188527]
result 0
F1:  0.9546891464699684
[0 0 0 1 0 0 0 0 0 

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


(161, 2)
total sample size before apply threshold:  161
Counter({'0000-0002-2652-5134': 66, '0000-0002-6654-9309': 33, '0000-0001-8700-583X': 23, '0000-0001-6327-9692': 15, '0000-0003-0108-3138': 11, '0000-0002-1031-7488': 7, '0000-0002-0089-7482': 6})
[]
c_pan  pass
For name:  x_cao
(74, 2)
total sample size before apply threshold:  74
Counter({'0000-0002-3004-7518': 25, '0000-0001-7222-5450': 14, '0000-0002-3476-9833': 12, '0000-0002-4782-853X': 11, '0000-0001-7571-6482': 10, '0000-0002-6771-0571': 1, '0000-0001-8124-7491': 1})
[]
x_cao  pass
For name:  j_yoo
(112, 2)
total sample size before apply threshold:  112
Counter({'0000-0001-8378-1583': 41, '0000-0001-7120-8464': 19, '0000-0002-3924-6919': 15, '0000-0002-3150-1727': 9, '0000-0002-5488-7925': 7, '0000-0001-7119-5421': 6, '0000-0003-3881-1995': 5, '0000-0003-2611-3399': 5, '0000-0002-0259-6237': 2, '0000-0003-0639-3944': 1, '0000-0002-2330-4053': 1, '0000-0002-9508-0757': 1})
[]
j_yoo  pass
For name:  l_wong
(131, 2)
total sam

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


(425, 2)
total sample size before apply threshold:  425
Counter({'0000-0002-6557-211X': 117, '0000-0002-5234-4986': 48, '0000-0001-5357-0451': 36, '0000-0001-9521-5650': 33, '0000-0002-5824-3318': 17, '0000-0002-1486-741X': 15, '0000-0002-1956-6229': 14, '0000-0003-1019-784X': 14, '0000-0001-9993-8004': 13, '0000-0001-8736-9545': 10, '0000-0002-0266-3233': 9, '0000-0002-4390-6502': 9, '0000-0002-1941-1200': 8, '0000-0001-6052-0663': 8, '0000-0002-4704-548X': 8, '0000-0002-8062-8708': 8, '0000-0001-6946-2105': 7, '0000-0002-9174-7542': 7, '0000-0002-9769-8075': 5, '0000-0003-4187-2967': 4, '0000-0002-9020-305X': 4, '0000-0003-2981-4537': 4, '0000-0003-3905-8915': 3, '0000-0003-3331-181X': 2, '0000-0001-5958-5849': 2, '0000-0003-3445-4353': 2, '0000-0001-7266-3610': 2, '0000-0003-4378-4776': 2, '0000-0002-2921-4294': 2, '0000-0001-7865-1020': 1, '0000-0001-9824-5716': 1, '0000-0001-7392-6363': 1, '0000-0002-1366-5170': 1, '0000-0002-0425-8311': 1, '0000-0002-4803-1477': 1, '0000-0002-097

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


(658, 2)
total sample size before apply threshold:  658
Counter({'0000-0002-1940-6428': 219, '0000-0002-7074-8087': 194, '0000-0002-1674-443X': 56, '0000-0003-3327-828X': 42, '0000-0001-6587-7055': 33, '0000-0002-8811-9203': 29, '0000-0002-7509-1643': 22, '0000-0003-2045-9372': 13, '0000-0002-9161-7940': 11, '0000-0003-3712-1554': 10, '0000-0001-7672-9357': 6, '0000-0003-2113-0245': 5, '0000-0001-6847-7065': 5, '0000-0003-0977-3600': 4, '0000-0003-1372-4764': 2, '0000-0003-1734-7994': 2, '0000-0002-3269-1681': 2, '0000-0002-0183-0490': 1, '0000-0001-6646-050X': 1, '0000-0002-6646-951X': 1})
['0000-0002-1940-6428', '0000-0002-7074-8087']
m_wu_0
Class  0  sample size:  194
Class  1  sample size:  464
(658, 102)
Class  0  sample size:  183
Class  1  sample size:  432
(615, 102)
Labeled:  658  :  615
Unlabeled:  10741  :  7482
labeled no citation link:  43
Unlabeled no citation link size:  3259
(615, 102)
(615, 102)
(7482, 102)
(7482, 102)
(123, 100)
(7974, 100)
(7974, 100)
P:  1  N:  1
In

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Class  0  sample size:  219
Class  1  sample size:  439
(658, 102)
Class  0  sample size:  211
Class  1  sample size:  404
(615, 102)
Labeled:  658  :  615
Unlabeled:  10741  :  7482
labeled no citation link:  43
Unlabeled no citation link size:  3259
(615, 102)
(615, 102)
(7482, 102)
(7482, 102)
(123, 100)
(7974, 100)
(7974, 100)
P:  1  N:  1
Initial L size:  492
Initial U size:  7482
Total Labeled number:  610  Still unlabeled number:  80
y1 disagree on 11  Proba:  [0.53684759 0.46315241]
y2 not aggreed on  11 Proba:  [0.36372624 0.63627376]
product probas: [0.19526555592860248, 0.29469172337696564]
result 1
y1 disagree on 18  Proba:  [0.60986225 0.39013775]
y2 not aggreed on  18 Proba:  [0.25411825 0.74588175]
product probas: [0.15497712512615955, 0.290996630268177]
result 1
y1 disagree on 24  Proba:  [0.89716453 0.10283547]
y2 not aggreed on  24 Proba:  [0.45888131 0.54111869]
product probas: [0.41169203618548905, 0.05564619315331893]
result 0
y1 disagree on 26  Proba:  [0.03442875

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


F1:  0.9408369408369408
[1 0 0 0 1 0 0 1 1 0 0 1 1 1 1 0 0 0 1 1 0 1 0 1 0 0 1 1 0 1 0 1 0 1 1 1 1
 0 1 0 1 0 0 1 0 1 0 0 1 1 1 1 0 1 0 0 0 1 1 1 1 1 1 0 1 1 0 0 1 0 0 1 0 1
 0 0 0 1 1 0 1 1 0 1 0 1 1 1 1 0 1 1 0 0 1 1 1 1 0 1 1 1 1 1 0 1 0 1 1 1 1
 1 0 1 0 1 1 1 0 1 1 1 1]
[1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1]
For name:  e_lee
(300, 2)
total sample size before apply threshold:  300
Counter({'0000-0003-0232-7704': 81, '0000-0003-0418-1454': 48, '0000-0001-7494-1776': 48, '0000-0003-1255-9808': 40, '0000-0001-7188-3857': 29, '0000-0002-6369-7429': 16, '0000-0001-9670-3242': 10, '0000-0001-8131-6872': 8, '0000-0001-5144-2552': 3, '0000-0003-4725-4959': 3, '0000-

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Class  0  sample size:  195
Class  1  sample size:  19
(214, 102)
Class  0  sample size:  184
Class  1  sample size:  17
(201, 102)
Labeled:  214  :  201
Unlabeled:  2828  :  1912
labeled no citation link:  13
Unlabeled no citation link size:  916
(201, 102)
(201, 102)
(1912, 102)
(1912, 102)
(40, 100)
(2073, 100)
(2073, 100)
P:  1  N:  1
Initial L size:  161
Initial U size:  1912
Total Labeled number:  277  Still unlabeled number:  82
F1:  0.9217221135029354
[0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0]
[0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
For name:  r_miranda
(81, 2)
total sample size before apply threshold:  81
Counter({'0000-0002-8467-5464': 69, '0000-0003-4798-314X': 7, '0000-0002-6551-9677': 4, '0000-0003-3222-7368': 1})
[]
r_miranda  pass
For name:  j_richardson
(84, 2)
total sample size before apply threshold:  84
Counter({'0000-0001-6521-610X': 70, '0000-000

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Class  0  sample size:  157
Class  1  sample size:  11
(168, 102)
Class  0  sample size:  155
Class  1  sample size:  9
(164, 102)
Labeled:  168  :  164
Unlabeled:  1851  :  1268
labeled no citation link:  4
Unlabeled no citation link size:  583
(164, 102)
(164, 102)
(1268, 102)
(1268, 102)
(33, 100)
(1399, 100)
(1399, 100)
P:  1  N:  1
Initial L size:  131
Initial U size:  1268
Total Labeled number:  243  Still unlabeled number:  83
y1 disagree on 10  Proba:  [0.32169414 0.67830586]
y2 not aggreed on  10 Proba:  [0.95039535 0.04960465]
product probas: [0.3057366196333545, 0.03364712224605414]
result 0
F1:  0.484375
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
For name:  s_mitra
(48, 2)
total sample size before apply threshold:  48
Counter({'0000-0001-7923-8887': 21, '0000-0001-7620-4809': 11, '0000-0002-0800-4626': 10, '0000-0001-6381-5344': 3, '0000-0003-3526-994

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  'precision', 'predicted', average, warn_for)


Class  0  sample size:  114
Class  1  sample size:  12
(126, 102)
Class  0  sample size:  111
Class  1  sample size:  11
(122, 102)
Labeled:  126  :  122
Unlabeled:  1098  :  727
labeled no citation link:  4
Unlabeled no citation link size:  371
(122, 102)
(122, 102)
(727, 102)
(727, 102)
(24, 100)
(825, 100)
(825, 100)
P:  1  N:  1
Initial L size:  98
Initial U size:  727
Total Labeled number:  218  Still unlabeled number:  76
y1 disagree on 17  Proba:  [0.53691114 0.46308886]
y2 not aggreed on  17 Proba:  [0.3448906 0.6551094]
product probas: [0.1851756089129594, 0.3033738608460335]
result 1
y1 disagree on 18  Proba:  [0.90303493 0.09696507]
y2 not aggreed on  18 Proba:  [0.4077756 0.5922244]
product probas: [0.368235612580469, 0.057425079772359465]
result 0
F1:  1.0
[0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 1 0 0 1 0]
[0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0]
For name:  c_jones
(354, 2)
total sample size before apply threshold:  354
Counter({'0000-0002-00

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Class  0  sample size:  194
Class  1  sample size:  160
(354, 102)
Class  0  sample size:  164
Class  1  sample size:  142
(306, 102)
Labeled:  354  :  306
Unlabeled:  7169  :  4708
labeled no citation link:  48
Unlabeled no citation link size:  2461
(306, 102)
(306, 102)
(4708, 102)
(4708, 102)
(61, 100)
(4953, 100)
(4953, 100)
P:  1  N:  1
Initial L size:  245
Initial U size:  4708
Total Labeled number:  359  Still unlabeled number:  82
y1 disagree on 45  Proba:  [0.05721805 0.94278195]
y2 not aggreed on  45 Proba:  [0.50540503 0.49459497]
product probas: [0.028918290722863643, 0.466295212125914]
result 1
y1 disagree on 46  Proba:  [0.13025057 0.86974943]
y2 not aggreed on  46 Proba:  [0.6049851 0.3950149]
product probas: [0.07879965225315178, 0.3435639877509944]
result 1
y1 disagree on 47  Proba:  [0.55736933 0.44263067]
y2 not aggreed on  47 Proba:  [0.10569936 0.89430064]
product probas: [0.05891357967086875, 0.39584488938016704]
result 1
y1 disagree on 50  Proba:  [0.54449135 0.4

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Class  0  sample size:  163
Class  1  sample size:  24
(187, 102)
Class  0  sample size:  155
Class  1  sample size:  21
(176, 102)
Labeled:  187  :  176
Unlabeled:  154  :  96
labeled no citation link:  11
Unlabeled no citation link size:  58
(176, 102)
(176, 102)
(96, 102)
(96, 102)
(35, 100)
(237, 100)
(237, 100)
P:  1  N:  1
Initial L size:  141
Initial U size:  96
Total Labeled number:  241  Still unlabeled number:  0
y1 disagree on 29  Proba:  [0.45639644 0.54360356]
y2 not aggreed on  29 Proba:  [0.67953637 0.32046363]
product probas: [0.31013797819663974, 0.17420517049101386]
result 0
y1 disagree on 32  Proba:  [0.99273839 0.00726161]
y2 not aggreed on  32 Proba:  [0.48326567 0.51673433]
product probas: [0.47975638498441825, 0.003752323437479588]
result 0
F1:  0.8923076923076924
[0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0]
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0]
For name:  r_ellis
(176,

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Class  0  sample size:  158
Class  1  sample size:  18
(176, 102)
Class  0  sample size:  155
Class  1  sample size:  18
(173, 102)
Labeled:  176  :  173
Unlabeled:  1970  :  1055
labeled no citation link:  3
Unlabeled no citation link size:  915
(173, 102)
(173, 102)
(1055, 102)
(1055, 102)
(35, 100)
(1193, 100)
(1193, 100)
P:  1  N:  1
Initial L size:  138
Initial U size:  1055
Total Labeled number:  256  Still unlabeled number:  79
F1:  1.0
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0]
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
For name:  v_saini
(18, 2)
total sample size before apply threshold:  18
Counter({'0000-0002-0258-2871': 11, '0000-0002-9944-0262': 5, '0000-0003-2734-0120': 1, '0000-0002-6796-5881': 1})
[]
v_saini  pass
For name:  a_ellis
(168, 2)
total sample size before apply threshold:  168
Counter({'0000-0001-7456-9214': 47, '0000-0002-0725-2353': 41, '0000-0002-0417-0547': 40, '000

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Class  0  sample size:  111
Class  1  sample size:  111
(222, 102)
Class  0  sample size:  103
Class  1  sample size:  100
(203, 102)
Labeled:  222  :  203
Unlabeled:  348  :  174
labeled no citation link:  19
Unlabeled no citation link size:  174
(203, 102)
(203, 102)
(174, 102)
(174, 102)
(41, 100)
(336, 100)
(336, 100)
P:  1  N:  1
Initial L size:  162
Initial U size:  174
Total Labeled number:  279  Still unlabeled number:  58
y1 disagree on 14  Proba:  [0.83517776 0.16482224]
y2 not aggreed on  14 Proba:  [0.41081839 0.58918161]
product probas: [0.3431063776760284, 0.09711023515592179]
result 0
y1 disagree on 16  Proba:  [0.57174573 0.42825427]
y2 not aggreed on  16 Proba:  [0.02830261 0.97169739]
product probas: [0.01618189630807258, 0.41613355358310006]
result 1
y1 disagree on 21  Proba:  [0.55591641 0.44408359]
y2 not aggreed on  21 Proba:  [0.35360901 0.64639099]
product probas: [0.19657705359884603, 0.28705162975636317]
result 1
y1 disagree on 29  Proba:  [0.02061976 0.979380

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


(77, 2)
total sample size before apply threshold:  77
Counter({'0000-0002-0286-6639': 54, '0000-0002-1046-4031': 19, '0000-0001-6585-3993': 2, '0000-0002-8193-6964': 2})
[]
c_reis  pass
For name:  f_scott
(26, 2)
total sample size before apply threshold:  26
Counter({'0000-0002-6021-0419': 15, '0000-0003-0229-3698': 8, '0000-0003-2041-4641': 3})
[]
f_scott  pass
For name:  l_han
(20, 2)
total sample size before apply threshold:  20
Counter({'0000-0003-4180-1288': 8, '0000-0002-0577-9661': 7, '0000-0002-2955-6307': 2, '0000-0003-3436-2811': 2, '0000-0003-3204-6313': 1})
[]
l_han  pass
For name:  c_martins
(121, 2)
total sample size before apply threshold:  121
Counter({'0000-0001-8634-6878': 38, '0000-0001-6600-6163': 18, '0000-0002-7901-7600': 17, '0000-0002-8488-5103': 16, '0000-0003-3506-674X': 11, '0000-0001-8710-1856': 4, '0000-0001-8561-5167': 4, '0000-0002-9335-6027': 3, '0000-0003-4341-1005': 3, '0000-0002-8269-9550': 2, '0000-0001-5953-3758': 2, '0000-0002-4886-9261': 1, '0000-

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Class  0  sample size:  101
Class  1  sample size:  664
(765, 102)
Class  0  sample size:  96
Class  1  sample size:  624
(720, 102)
Labeled:  765  :  720
Unlabeled:  31020  :  21569
labeled no citation link:  45
Unlabeled no citation link size:  9451
(720, 102)
(720, 102)
(21569, 102)
(21569, 102)
(144, 100)
(22145, 100)
(22145, 100)
P:  1  N:  1
Initial L size:  576
Initial U size:  21569
Total Labeled number:  689  Still unlabeled number:  82
y1 disagree on 1  Proba:  [0.69577346 0.30422654]
y2 not aggreed on  1 Proba:  [0.46391852 0.53608148]
product probas: [0.3227821926880854, 0.1630902129073828]
result 0
y1 disagree on 3  Proba:  [0.78875449 0.21124551]
y2 not aggreed on  3 Proba:  [0.41553342 0.58446658]
product probas: [0.327753845780605, 0.12346594376032186]
result 0
y1 disagree on 7  Proba:  [0.77995861 0.22004139]
y2 not aggreed on  7 Proba:  [0.40466439 0.59533561]
product probas: [0.3156214786482496, 0.13099847206461962]
result 0
y1 disagree on 11  Proba:  [0.33060145 0.6

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


 51 Proba:  [0.4415084 0.5584916]
product probas: [0.3584118694512159, 0.1051139987031445]
result 0
y1 disagree on 52  Proba:  [0.22242224 0.77757776]
y2 not aggreed on  52 Proba:  [0.65399863 0.34600137]
product probas: [0.14546383961123663, 0.26904297098322694]
result 1
y1 disagree on 53  Proba:  [0.31774668 0.68225332]
y2 not aggreed on  53 Proba:  [0.50614307 0.49385693]
product probas: [0.16082528384245406, 0.3369355246444648]
result 1
y1 disagree on 55  Proba:  [0.04522297 0.95477703]
y2 not aggreed on  55 Proba:  [0.51641623 0.48358377]
product probas: [0.023353875520394172, 0.46171467384656145]
result 1
y1 disagree on 56  Proba:  [0.33642969 0.66357031]
y2 not aggreed on  56 Proba:  [0.64718465 0.35281535]
product probas: [0.2177321320788124, 0.23411779002870214]
result 1
y1 disagree on 80  Proba:  [0.5147882 0.4852118]
y2 not aggreed on  80 Proba:  [0.2754658 0.7245342]
product probas: [0.14180654434940676, 0.35155254167489974]
result 1
y1 disagree on 83  Proba:  [0.66916659 0

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


(268, 2)
total sample size before apply threshold:  268
Counter({'0000-0001-9523-9441': 128, '0000-0003-3977-7418': 28, '0000-0002-3696-3955': 22, '0000-0002-6725-5767': 14, '0000-0003-2583-9518': 13, '0000-0001-9959-4272': 8, '0000-0002-5656-0897': 7, '0000-0001-9487-4259': 6, '0000-0002-6041-1763': 6, '0000-0002-1520-0799': 4, '0000-0001-8055-8925': 3, '0000-0001-9708-1043': 3, '0000-0002-7268-6465': 3, '0000-0003-1224-1699': 2, '0000-0002-7211-1661': 2, '0000-0002-7206-0550': 2, '0000-0003-1244-6483': 2, '0000-0003-4180-565X': 2, '0000-0001-9522-6181': 2, '0000-0001-9554-8797': 2, '0000-0002-6455-9618': 1, '0000-0003-2863-8068': 1, '0000-0003-2318-3893': 1, '0000-0002-9230-7135': 1, '0000-0003-4773-6771': 1, '0000-0002-1134-1252': 1, '0000-0002-1956-5779': 1, '0000-0003-1778-3833': 1, '0000-0001-5722-0213': 1})
['0000-0001-9523-9441']
j_silva_0
Class  0  sample size:  128
Class  1  sample size:  140
(268, 102)
Class  0  sample size:  121
Class  1  sample size:  131
(252, 102)
Labele

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


(14, 2)
total sample size before apply threshold:  14
Counter({'0000-0003-3893-1305': 6, '0000-0002-0850-3118': 4, '0000-0001-5425-5218': 3, '0000-0001-7615-7811': 1})
[]
a_pal  pass
For name:  v_costa
(141, 2)
total sample size before apply threshold:  141
Counter({'0000-0002-0471-2756': 33, '0000-0002-7868-4663': 32, '0000-0002-7294-6933': 27, '0000-0002-2113-7482': 18, '0000-0002-5412-8945': 18, '0000-0001-8188-831X': 7, '0000-0002-1513-0284': 2, '0000-0001-5786-633X': 2, '0000-0003-0122-3567': 1, '0000-0001-8801-5669': 1})
[]
v_costa  pass
For name:  j_allen
(111, 2)
total sample size before apply threshold:  111
Counter({'0000-0001-5219-4423': 36, '0000-0002-3829-066X': 27, '0000-0001-9974-4226': 12, '0000-0003-3566-3747': 12, '0000-0003-4740-9404': 11, '0000-0002-3894-4854': 5, '0000-0002-6576-2132': 3, '0000-0002-0950-0429': 3, '0000-0002-3084-7785': 1, '0000-0002-6717-8693': 1})
[]
j_allen  pass
For name:  y_dong
(76, 2)
total sample size before apply threshold:  76
Counter({'0

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


(69, 2)
total sample size before apply threshold:  69
Counter({'0000-0003-0676-6923': 34, '0000-0002-4475-2162': 10, '0000-0002-1725-4619': 6, '0000-0003-0659-4084': 6, '0000-0002-8554-0369': 6, '0000-0002-2195-268X': 3, '0000-0003-3290-025X': 2, '0000-0003-1039-494X': 1, '0000-0003-2698-3319': 1})
[]
x_kong  pass
For name:  w_cao
(126, 2)
total sample size before apply threshold:  126
Counter({'0000-0002-2447-1486': 91, '0000-0002-8952-9159': 27, '0000-0002-5369-9682': 7, '0000-0001-6209-3482': 1})
[]
w_cao  pass
For name:  c_ma
(126, 2)
total sample size before apply threshold:  126
Counter({'0000-0001-8818-6396': 31, '0000-0002-7480-5528': 28, '0000-0001-9245-0356': 18, '0000-0001-7092-7715': 16, '0000-0003-2054-0445': 15, '0000-0001-9612-7898': 9, '0000-0001-6478-5917': 3, '0000-0001-6507-2329': 2, '0000-0002-5936-789X': 2, '0000-0003-1073-4502': 1, '0000-0001-8942-3912': 1})
[]
c_ma  pass
For name:  j_chin
(27, 2)
total sample size before apply threshold:  27
Counter({'0000-0003-3

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Class  0  sample size:  173
Class  1  sample size:  190
(363, 102)
Class  0  sample size:  166
Class  1  sample size:  163
(329, 102)
Labeled:  363  :  329
Unlabeled:  2181  :  1264
labeled no citation link:  34
Unlabeled no citation link size:  917
(329, 102)
(329, 102)
(1264, 102)
(1264, 102)
(66, 100)
(1527, 100)
(1527, 100)
P:  1  N:  1
Initial L size:  263
Initial U size:  1264
Total Labeled number:  379  Still unlabeled number:  81
y1 disagree on 1  Proba:  [0.59971995 0.40028005]
y2 not aggreed on  1 Proba:  [0.05123446 0.94876554]
product probas: [0.03072633058411437, 0.37977191869580834]
result 1
y1 disagree on 11  Proba:  [0.94727698 0.05272302]
y2 not aggreed on  11 Proba:  [0.12039127 0.87960873]
product probas: [0.11404387613074987, 0.04637562460853744]
result 0
y1 disagree on 24  Proba:  [0.50531971 0.49468029]
y2 not aggreed on  24 Proba:  [0.48269657 0.51730343]
product probas: [0.2439160913476697, 0.2558998101831293]
result 1
y1 disagree on 46  Proba:  [0.80763242 0.19

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


(190, 2)
total sample size before apply threshold:  190
Counter({'0000-0003-0435-8651': 59, '0000-0001-8989-508X': 46, '0000-0001-6310-1472': 30, '0000-0002-7743-4515': 29, '0000-0003-2849-9096': 12, '0000-0002-5277-5487': 7, '0000-0002-3878-7684': 5, '0000-0002-3602-6849': 1, '0000-0002-4645-2593': 1})
[]
m_adams  pass
For name:  t_singh
(52, 2)
total sample size before apply threshold:  52
Counter({'0000-0001-7935-0457': 13, '0000-0001-7420-6739': 11, '0000-0001-7051-6529': 11, '0000-0002-0413-1935': 10, '0000-0003-1007-4540': 2, '0000-0002-5870-6204': 1, '0000-0003-0377-6122': 1, '0000-0002-9740-7776': 1, '0000-0002-7740-4826': 1, '0000-0003-1109-5626': 1})
[]
t_singh  pass
For name:  m_thompson
(150, 2)
total sample size before apply threshold:  150
Counter({'0000-0002-7764-4096': 80, '0000-0001-8958-0336': 29, '0000-0002-4933-009X': 11, '0000-0002-2865-9558': 10, '0000-0002-5649-1203': 6, '0000-0002-1789-312X': 6, '0000-0002-6910-4938': 4, '0000-0002-1194-1506': 1, '0000-0002-8551

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  'precision', 'predicted', average, warn_for)


(42, 2)
total sample size before apply threshold:  42
Counter({'0000-0002-7860-0319': 21, '0000-0002-0021-5613': 9, '0000-0002-8643-4920': 2, '0000-0002-8577-6339': 2, '0000-0001-8439-5270': 2, '0000-0001-6790-1362': 2, '0000-0002-3964-2356': 1, '0000-0003-2404-1985': 1, '0000-0003-2860-1150': 1, '0000-0002-6317-1718': 1})
[]
l_williams  pass
For name:  h_young
(109, 2)
total sample size before apply threshold:  109
Counter({'0000-0002-0457-8710': 75, '0000-0003-1538-445X': 28, '0000-0002-4249-9060': 5, '0000-0002-8866-7648': 1})
[]
h_young  pass
For name:  a_vincent
(79, 2)
total sample size before apply threshold:  79
Counter({'0000-0002-4185-3267': 39, '0000-0001-6446-3846': 21, '0000-0002-3760-7266': 12, '0000-0002-0360-6644': 7})
[]
a_vincent  pass
For name:  a_monteiro
(132, 2)
total sample size before apply threshold:  132
Counter({'0000-0002-8448-4801': 76, '0000-0001-9696-459X': 35, '0000-0001-8182-3380': 7, '0000-0002-2185-0720': 5, '0000-0002-7839-2556': 4, '0000-0002-2322-3

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


(57, 2)
total sample size before apply threshold:  57
Counter({'0000-0001-9927-3372': 24, '0000-0001-5854-0542': 16, '0000-0002-1964-2711': 15, '0000-0002-4268-9729': 2})
[]
e_zimmermann  pass
For name:  c_zhang
(321, 2)
total sample size before apply threshold:  321
Counter({'0000-0002-7784-1188': 120, '0000-0003-2349-3138': 52, '0000-0002-1581-5806': 25, '0000-0001-9042-4007': 13, '0000-0002-6502-288X': 10, '0000-0002-5957-2287': 9, '0000-0002-3721-8586': 8, '0000-0002-4067-2798': 7, '0000-0003-3435-0247': 7, '0000-0002-7687-0518': 7, '0000-0001-8663-3674': 6, '0000-0001-8222-4566': 5, '0000-0003-0679-7623': 4, '0000-0003-3212-4270': 4, '0000-0001-6885-1678': 4, '0000-0003-1616-4715': 4, '0000-0001-8206-5171': 4, '0000-0001-6685-0137': 3, '0000-0002-7913-4858': 3, '0000-0002-7167-0840': 3, '0000-0002-1207-4264': 3, '0000-0003-0399-1201': 3, '0000-0002-9461-1755': 2, '0000-0003-4968-8793': 2, '0000-0003-2693-6643': 2, '0000-0003-3871-0342': 2, '0000-0001-5249-141X': 2, '0000-0003-1095

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


(615, 2)
total sample size before apply threshold:  615
Counter({'0000-0002-4295-6129': 423, '0000-0002-9639-7940': 113, '0000-0002-9872-9865': 27, '0000-0001-9689-4085': 21, '0000-0002-0681-4721': 10, '0000-0003-0328-1840': 7, '0000-0003-0937-8045': 7, '0000-0003-3746-6894': 4, '0000-0002-6618-2412': 2, '0000-0002-6935-3459': 1})
['0000-0002-9639-7940', '0000-0002-4295-6129']
r_reis_0
Class  0  sample size:  113
Class  1  sample size:  502
(615, 102)
Class  0  sample size:  92
Class  1  sample size:  480
(572, 102)
Labeled:  615  :  572
Unlabeled:  785  :  420
labeled no citation link:  43
Unlabeled no citation link size:  365
(572, 102)
(572, 102)
(420, 102)
(420, 102)
(114, 100)
(878, 100)
(878, 100)
P:  1  N:  1
Initial L size:  458
Initial U size:  420
Total Labeled number:  572  Still unlabeled number:  82
y1 disagree on 97  Proba:  [0.79530112 0.20469888]
y2 not aggreed on  97 Proba:  [0.1172768 0.8827232]
product probas: [0.09327036939975901, 0.18069245225155486]
result 1
y1 di

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Class  0  sample size:  423
Class  1  sample size:  192
(615, 102)
Class  0  sample size:  408
Class  1  sample size:  164
(572, 102)
Labeled:  615  :  572
Unlabeled:  785  :  420
labeled no citation link:  43
Unlabeled no citation link size:  365
(572, 102)
(572, 102)
(420, 102)
(420, 102)
(114, 100)
(878, 100)
(878, 100)
P:  1  N:  1
Initial L size:  458
Initial U size:  420
Total Labeled number:  565  Still unlabeled number:  88
y1 disagree on 76  Proba:  [0.21539834 0.78460166]
y2 not aggreed on  76 Proba:  [0.86199028 0.13800972]
product probas: [0.1856712757297043, 0.10828265525309422]
result 0
y1 disagree on 80  Proba:  [0.39999706 0.60000294]
y2 not aggreed on  80 Proba:  [0.64786335 0.35213665]
product probas: [0.25914343836096, 0.21128302237856503]
result 0
F1:  1.0
[0 0 1 0 0 1 1 1 0 0 0 0 0 0 0 0 1 1 0 1 1 0 1 1 0 1 0 1 0 0 1 1 0 0 1 0 0
 0 1 1 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 1 1 0 0 0 0 1 0 0 0 0 0 0 0
 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


(31, 2)
total sample size before apply threshold:  31
Counter({'0000-0001-7606-0331': 25, '0000-0001-7265-065X': 4, '0000-0002-4559-4637': 1, '0000-0003-4208-5076': 1})
[]
z_ren  pass
For name:  m_kumar
(104, 2)
total sample size before apply threshold:  104
Counter({'0000-0003-3769-052X': 22, '0000-0003-1656-1649': 16, '0000-0003-0970-4875': 14, '0000-0001-9173-3872': 10, '0000-0002-0855-3406': 9, '0000-0002-9049-2760': 6, '0000-0002-3554-0563': 4, '0000-0002-4198-5892': 4, '0000-0003-3490-5062': 3, '0000-0002-7630-7389': 2, '0000-0001-6657-1277': 2, '0000-0002-0141-5318': 2, '0000-0001-6745-7425': 2, '0000-0001-5606-401X': 2, '0000-0001-6389-2040': 2, '0000-0002-7936-9892': 1, '0000-0001-5545-3793': 1, '0000-0002-7728-5572': 1, '0000-0001-6578-9741': 1})
[]
m_kumar  pass
For name:  j_wong
(183, 2)
total sample size before apply threshold:  183
Counter({'0000-0003-2953-7728': 59, '0000-0003-2592-3226': 30, '0000-0002-7213-4898': 24, '0000-0001-5572-4143': 21, '0000-0002-8167-540X': 17

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


(293, 2)
total sample size before apply threshold:  293
Counter({'0000-0001-7147-8237': 174, '0000-0002-5998-3270': 76, '0000-0003-3377-6692': 21, '0000-0002-5372-5457': 17, '0000-0002-9499-3538': 2, '0000-0002-6170-5077': 1, '0000-0002-2724-7017': 1, '0000-0001-6596-6437': 1})
['0000-0001-7147-8237']
t_johnson_0
Class  0  sample size:  174
Class  1  sample size:  119
(293, 102)
Class  0  sample size:  152
Class  1  sample size:  115
(267, 102)
Labeled:  293  :  267
Unlabeled:  4108  :  2836
labeled no citation link:  26
Unlabeled no citation link size:  1272
(267, 102)
(267, 102)
(2836, 102)
(2836, 102)
(53, 100)
(3050, 100)
(3050, 100)
P:  1  N:  1
Initial L size:  214
Initial U size:  2836
Total Labeled number:  330  Still unlabeled number:  79
y1 disagree on 4  Proba:  [0.23972642 0.76027358]
y2 not aggreed on  4 Proba:  [0.62729264 0.37270736]
product probas: [0.15037861503374741, 0.2833595633287243]
result 1
y1 disagree on 16  Proba:  [0.35143361 0.64856639]
y2 not aggreed on  16

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


(118, 2)
total sample size before apply threshold:  118
Counter({'0000-0001-9391-9574': 60, '0000-0002-1840-616X': 28, '0000-0002-0765-474X': 7, '0000-0001-6533-4309': 6, '0000-0001-7969-2107': 3, '0000-0002-1206-1367': 3, '0000-0001-9239-1202': 3, '0000-0001-7536-2506': 3, '0000-0002-0051-3389': 2, '0000-0002-8009-7513': 2, '0000-0002-9556-7741': 1})
[]
m_fernandes  pass
For name:  l_cui
(25, 2)
total sample size before apply threshold:  25
Counter({'0000-0001-5549-8780': 18, '0000-0001-5706-9525': 3, '0000-0002-5546-5097': 2, '0000-0002-9818-4543': 1, '0000-0001-5907-0538': 1})
[]
l_cui  pass
For name:  s_monteiro
(50, 2)
total sample size before apply threshold:  50
Counter({'0000-0002-4026-5965': 19, '0000-0002-7069-0591': 14, '0000-0003-0059-9837': 7, '0000-0002-3037-9635': 4, '0000-0001-5040-6170': 2, '0000-0002-8784-7276': 2, '0000-0002-1389-3851': 1, '0000-0003-3507-9911': 1})
[]
s_monteiro  pass
For name:  m_hsieh
(35, 2)
total sample size before apply threshold:  35
Counter({

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


(182, 2)
total sample size before apply threshold:  182
Counter({'0000-0002-1526-4303': 128, '0000-0002-7444-5550': 16, '0000-0002-4712-1921': 9, '0000-0003-1586-5595': 8, '0000-0002-3851-8257': 8, '0000-0001-6344-4141': 4, '0000-0001-7667-5918': 3, '0000-0002-8442-0349': 2, '0000-0001-5330-1438': 2, '0000-0002-5398-2496': 1, '0000-0002-3418-0260': 1})
['0000-0002-1526-4303']
r_patel_0
Class  0  sample size:  128
Class  1  sample size:  54
(182, 102)
Class  0  sample size:  122
Class  1  sample size:  52
(174, 102)
Labeled:  182  :  174
Unlabeled:  4470  :  3032
labeled no citation link:  8
Unlabeled no citation link size:  1438
(174, 102)
(174, 102)
(3032, 102)
(3032, 102)
(35, 100)
(3171, 100)
(3171, 100)
P:  1  N:  1
Initial L size:  139
Initial U size:  3032
Total Labeled number:  258  Still unlabeled number:  78
y1 disagree on 27  Proba:  [0.40404596 0.59595404]
y2 not aggreed on  27 Proba:  [0.95347199 0.04652801]
product probas: [0.3852465104717076, 0.02772855308340185]
result 0

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


(31, 2)
total sample size before apply threshold:  31
Counter({'0000-0001-6783-4382': 11, '0000-0003-2226-7913': 10, '0000-0002-2258-5738': 5, '0000-0001-9797-6322': 2, '0000-0002-6492-1621': 2, '0000-0001-7734-9428': 1})
[]
j_alexander  pass
For name:  j_schneider
(40, 2)
total sample size before apply threshold:  40
Counter({'0000-0001-8016-8687': 13, '0000-0002-6028-9956': 7, '0000-0003-1114-618X': 5, '0000-0001-7169-3973': 5, '0000-0003-1176-8309': 3, '0000-0001-5187-6756': 3, '0000-0002-5863-7747': 1, '0000-0001-6093-5404': 1, '0000-0001-5556-0919': 1, '0000-0001-9610-6501': 1})
[]
j_schneider  pass
For name:  g_russo
(58, 2)
total sample size before apply threshold:  58
Counter({'0000-0002-8764-7389': 22, '0000-0002-2716-369X': 11, '0000-0003-1493-1087': 7, '0000-0001-9321-1613': 5, '0000-0003-4687-7353': 5, '0000-0001-5001-3027': 4, '0000-0002-4565-3131': 2, '0000-0003-4215-1926': 1, '0000-0002-7779-6225': 1})
[]
g_russo  pass
For name:  j_carvalho
2
(136, 2)
total sample size b

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


(83, 2)
total sample size before apply threshold:  83
Counter({'0000-0002-4028-4867': 43, '0000-0001-7165-9812': 28, '0000-0002-7872-4194': 7, '0000-0003-2800-5308': 2, '0000-0002-1609-078X': 1, '0000-0002-7302-1190': 1, '0000-0002-6242-6782': 1})
[]
t_martin  pass
For name:  t_o'brien
(262, 2)
total sample size before apply threshold:  262
Counter({'0000-0002-7198-8621': 202, '0000-0002-9161-8070': 39, '0000-0001-9028-5481': 20, '0000-0002-5031-736X': 1})
['0000-0002-7198-8621']
t_o'brien_0
Class  0  sample size:  202
Class  1  sample size:  60
(262, 102)
Class  0  sample size:  196
Class  1  sample size:  59
(255, 102)
Labeled:  262  :  255
Unlabeled:  2086  :  1466
labeled no citation link:  7
Unlabeled no citation link size:  620
(255, 102)
(255, 102)
(1466, 102)
(1466, 102)
(51, 100)
(1670, 100)
(1670, 100)
P:  1  N:  1
Initial L size:  204
Initial U size:  1466
Total Labeled number:  316  Still unlabeled number:  84
y1 disagree on 44  Proba:  [0.7654943 0.2345057]
y2 not aggreed 

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


['0000-0002-8724-7684']
z_cai_0
Class  0  sample size:  200
Class  1  sample size:  44
(244, 102)
Class  0  sample size:  195
Class  1  sample size:  42
(237, 102)
Labeled:  244  :  237
Unlabeled:  2727  :  1854
labeled no citation link:  7
Unlabeled no citation link size:  873
(237, 102)
(237, 102)
(1854, 102)
(1854, 102)
(47, 100)
(2044, 100)
(2044, 100)
P:  1  N:  1
Initial L size:  190
Initial U size:  1854
Total Labeled number:  305  Still unlabeled number:  83
y1 disagree on 1  Proba:  [0.6773631 0.3226369]
y2 not aggreed on  1 Proba:  [0.32103003 0.67896997]
product probas: [0.21745389543176918, 0.21906076912073552]
result 1
y1 disagree on 23  Proba:  [0.83586775 0.16413225]
y2 not aggreed on  23 Proba:  [0.46981052 0.53018948]
product probas: [0.39269946668900085, 0.08702119018046786]
result 0
y1 disagree on 30  Proba:  [0.26487283 0.73512717]
y2 not aggreed on  30 Proba:  [0.7020816 0.2979184]
product probas: [0.18596234278764234, 0.21900790850398394]
result 1
y1 disagree on 3

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


(154, 2)
total sample size before apply threshold:  154
Counter({'0000-0002-4125-4053': 64, '0000-0002-1411-9307': 37, '0000-0002-0873-3714': 29, '0000-0001-8523-0857': 9, '0000-0002-6174-6696': 4, '0000-0001-8454-6219': 3, '0000-0003-4573-932X': 2, '0000-0001-5655-1213': 2, '0000-0002-0023-4363': 2, '0000-0001-9274-7803': 1, '0000-0002-2002-622X': 1})
[]
a_james  pass
For name:  c_cao
(74, 2)
total sample size before apply threshold:  74
Counter({'0000-0003-2139-1648': 25, '0000-0003-2830-4383': 20, '0000-0001-8621-8403': 19, '0000-0002-0320-1110': 5, '0000-0002-3407-7837': 4, '0000-0001-6909-5739': 1})
[]
c_cao  pass
For name:  c_brown
(384, 2)
total sample size before apply threshold:  384
Counter({'0000-0002-0294-2419': 85, '0000-0002-8959-0101': 60, '0000-0003-2305-846X': 49, '0000-0002-9637-9355': 44, '0000-0003-2506-4871': 33, '0000-0003-0079-7067': 28, '0000-0003-4776-3403': 13, '0000-0002-7271-4091': 12, '0000-0002-0210-1820': 11, '0000-0003-2057-3976': 8, '0000-0002-1559-3238

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Class  0  sample size:  129
Class  1  sample size:  87
(216, 102)
Class  0  sample size:  104
Class  1  sample size:  87
(191, 102)
Labeled:  216  :  191
Unlabeled:  1343  :  887
labeled no citation link:  25
Unlabeled no citation link size:  456
(191, 102)
(191, 102)
(887, 102)
(887, 102)
(38, 100)
(1040, 100)
(1040, 100)
P:  1  N:  1
Initial L size:  153
Initial U size:  887
Total Labeled number:  270  Still unlabeled number:  79
y1 disagree on 8  Proba:  [0.99413248 0.00586752]
y2 not aggreed on  8 Proba:  [0.49590622 0.50409378]
product probas: [0.4929964848867214, 0.002957778322952578]
result 0
y1 disagree on 14  Proba:  [0.43639772 0.56360228]
y2 not aggreed on  14 Proba:  [0.75430518 0.24569482]
product probas: [0.3291770627885772, 0.1384741598072667]
result 0
y1 disagree on 35  Proba:  [0.10526529 0.89473471]
y2 not aggreed on  35 Proba:  [0.57010624 0.42989376]
product probas: [0.060012399346837234, 0.3846408692070093]
result 1
F1:  0.9732205778717407
[0 1 0 1 0 0 0 1 0 1 0 0 

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


(568, 2)
total sample size before apply threshold:  568
Counter({'0000-0003-4982-4441': 109, '0000-0001-5193-1428': 95, '0000-0003-4087-8021': 48, '0000-0003-0806-8969': 39, '0000-0001-6568-2469': 34, '0000-0002-9578-5722': 27, '0000-0001-9827-7531': 27, '0000-0003-2920-9038': 23, '0000-0002-7975-2437': 23, '0000-0001-9802-0568': 22, '0000-0003-3950-7557': 22, '0000-0002-4032-1285': 17, '0000-0001-5328-0913': 15, '0000-0002-2116-4579': 14, '0000-0002-4375-8095': 11, '0000-0001-7071-1455': 10, '0000-0002-5239-3833': 9, '0000-0002-5104-6565': 4, '0000-0002-0691-9072': 4, '0000-0002-9355-7574': 3, '0000-0003-4835-0707': 3, '0000-0002-7683-7259': 2, '0000-0002-6944-4385': 2, '0000-0002-2225-1199': 2, '0000-0002-3594-826X': 1, '0000-0002-6494-1868': 1, '0000-0001-5162-5420': 1})
['0000-0003-4982-4441']
t_kim_0
Class  0  sample size:  109
Class  1  sample size:  459
(568, 102)
Class  0  sample size:  98
Class  1  sample size:  410
(508, 102)
Labeled:  568  :  508
Unlabeled:  14190  :  11319


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


(80, 2)
total sample size before apply threshold:  80
Counter({'0000-0002-0465-4111': 38, '0000-0002-8251-8422': 13, '0000-0002-1662-3072': 10, '0000-0003-0368-9731': 9, '0000-0003-4537-1318': 7, '0000-0002-4591-4362': 3})
[]
a_cruz  pass
For name:  a_mora
(84, 2)
total sample size before apply threshold:  84
Counter({'0000-0002-0785-5795': 54, '0000-0002-6397-4836': 20, '0000-0003-1344-1131': 5, '0000-0003-1354-4739': 3, '0000-0002-9132-5622': 2})
[]
a_mora  pass
For name:  j_walker
(253, 2)
total sample size before apply threshold:  253
Counter({'0000-0002-8922-083X': 71, '0000-0002-5349-1689': 70, '0000-0002-2050-1641': 64, '0000-0002-2995-0398': 17, '0000-0002-8683-0026': 15, '0000-0001-6034-7514': 9, '0000-0002-9732-5738': 4, '0000-0001-5151-1693': 1, '0000-0003-1349-2633': 1, '0000-0002-8241-9424': 1})
[]
j_walker  pass
For name:  j_alves
(53, 2)
total sample size before apply threshold:  53
Counter({'0000-0001-5914-2087': 15, '0000-0001-7221-871X': 13, '0000-0001-7554-2419': 8, 

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


(57, 2)
total sample size before apply threshold:  57
Counter({'0000-0001-5474-1451': 28, '0000-0002-4389-5516': 12, '0000-0001-8602-1248': 9, '0000-0001-6050-8699': 6, '0000-0003-2550-6764': 2})
[]
a_guerrero  pass
For name:  a_grant
(45, 2)
total sample size before apply threshold:  45
Counter({'0000-0002-1147-2375': 22, '0000-0001-6146-101X': 9, '0000-0001-7205-5869': 7, '0000-0002-7032-3716': 4, '0000-0001-9746-2989': 2, '0000-0002-1553-596X': 1})
[]
a_grant  pass
For name:  v_kumar
(98, 2)
total sample size before apply threshold:  98
Counter({'0000-0003-3522-1121': 18, '0000-0001-6643-7465': 15, '0000-0002-9795-5967': 15, '0000-0001-6477-8274': 9, '0000-0001-5559-0624': 8, '0000-0003-4937-7442': 7, '0000-0003-0910-233X': 7, '0000-0002-7335-0824': 6, '0000-0003-2121-3964': 4, '0000-0002-1583-7749': 3, '0000-0003-1988-2536': 3, '0000-0002-3834-1906': 1, '0000-0002-1513-5835': 1, '0000-0002-3980-1345': 1})
[]
v_kumar  pass
For name:  p_shah
(84, 2)
total sample size before apply thr

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


(150, 2)
total sample size before apply threshold:  150
Counter({'0000-0002-3041-2917': 74, '0000-0002-7579-4031': 25, '0000-0002-2986-1272': 22, '0000-0001-6370-605X': 12, '0000-0003-3723-5957': 6, '0000-0001-8535-7348': 5, '0000-0002-7447-6146': 2, '0000-0003-0990-0403': 1, '0000-0003-0283-4263': 1, '0000-0001-7009-6552': 1, '0000-0002-3310-7715': 1})
[]
m_ferrari  pass
For name:  j_paredes
(68, 2)
total sample size before apply threshold:  68
Counter({'0000-0002-1076-1343': 44, '0000-0002-7788-8939': 9, '0000-0002-0974-8109': 7, '0000-0002-1566-9044': 5, '0000-0002-0620-0770': 3})
[]
j_paredes  pass
For name:  z_zhao
(186, 2)
total sample size before apply threshold:  186
Counter({'0000-0003-0654-1193': 79, '0000-0003-2743-9008': 28, '0000-0002-1279-2207': 15, '0000-0002-1876-1284': 15, '0000-0001-6079-1631': 14, '0000-0002-1701-3751': 7, '0000-0002-0862-8471': 6, '0000-0002-2901-5033': 6, '0000-0001-8978-8866': 5, '0000-0002-8679-3130': 4, '0000-0001-8979-844X': 3, '0000-0001-6529-

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


y1 disagree on 100  Proba:  [0.52951402 0.47048598]
y2 not aggreed on  100 Proba:  [0.00428902 0.99571098]
product probas: [0.002271094302013635, 0.4684680606323576]
result 1
y1 disagree on 107  Proba:  [0.07610933 0.92389067]
y2 not aggreed on  107 Proba:  [0.62061901 0.37938099]
product probas: [0.047234899705205405, 0.3505065588864438]
result 1
y1 disagree on 114  Proba:  [0.46477656 0.53522344]
y2 not aggreed on  114 Proba:  [0.58725313 0.41274687]
product probas: [0.27294148756751646, 0.22091180168514984]
result 0
y1 disagree on 121  Proba:  [0.3199548 0.6800452]
y2 not aggreed on  121 Proba:  [0.94692101 0.05307899]
product probas: [0.3029719194289026, 0.03609611339451496]
result 0
F1:  0.8924937915565168
[0 1 1 0 1 1 1 1 1 0 1 0 1 1 0 1 0 1 1 1 0 1 1 1 1 1 1 1 1 1 0 0 0 1 0 0 0
 0 0 1 1 0 1 1 1 1 1 1 0 1 0 1 1 1 1 1 1 0 1 0 0 1 1 1 1 0 0 0 1 1 1 0 0 1
 1 1 0 1 0 0 1 0 0 1 0 1 1 1 1 1 1 0 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 0 0 1 1 1 1 0 1 0]
[0, 1, 1, 0, 1, 1, 1, 1, 1, 0,

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


54  Proba:  [0.43362122 0.56637878]
y2 not aggreed on  54 Proba:  [0.65337885 0.34662115]
product probas: [0.28331893485740833, 0.19631886258994846]
result 0
y1 disagree on 61  Proba:  [0.57500313 0.42499687]
y2 not aggreed on  61 Proba:  [0.01624759 0.98375241]
product probas: [0.009342414008182235, 0.41809169563272947]
result 1
y1 disagree on 63  Proba:  [0.14744213 0.85255787]
y2 not aggreed on  63 Proba:  [0.53154936 0.46845064]
product probas: [0.07837276942012607, 0.39938128117202276]
result 1
y1 disagree on 65  Proba:  [0.20460961 0.79539039]
y2 not aggreed on  65 Proba:  [0.98690296 0.01309704]
product probas: [0.20192982409966326, 0.010417262883305304]
result 0
y1 disagree on 71  Proba:  [0.90802871 0.09197129]
y2 not aggreed on  71 Proba:  [0.40296268 0.59703732]
product probas: [0.36590168593292405, 0.054910292758273395]
result 0
y1 disagree on 78  Proba:  [0.06751633 0.93248367]
y2 not aggreed on  78 Proba:  [0.84641064 0.15358936]
product probas: [0.05714653652076051, 0.14

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


['0000-0002-6875-7566']
j_parsons_0
Class  0  sample size:  212
Class  1  sample size:  43
(255, 102)
Class  0  sample size:  211
Class  1  sample size:  41
(252, 102)
Labeled:  255  :  252
Unlabeled:  1902  :  1130
labeled no citation link:  3
Unlabeled no citation link size:  772
(252, 102)
(252, 102)
(1130, 102)
(1130, 102)
(50, 100)
(1332, 100)
(1332, 100)
P:  1  N:  1
Initial L size:  202
Initial U size:  1130
Total Labeled number:  310  Still unlabeled number:  88
y1 disagree on 38  Proba:  [0.92545055 0.07454945]
y2 not aggreed on  38 Proba:  [0.47918993 0.52081007]
product probas: [0.44346658491603735, 0.03882610515673121]
result 0
F1:  1.0
[0 0 0 1 0 0 0 1 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 1 0 0 1 0 0 1 0 0 0 0 0 0 0
 0 0 1 0 1 0 0 0 0 1 0 0 0]
[0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0]
For name:  s_oliveira
(143, 2)
total sample size before apply threshold:  143
Counter({'0

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


(93, 2)
total sample size before apply threshold:  93
Counter({'0000-0002-8034-5513': 68, '0000-0003-3466-9995': 9, '0000-0002-9009-5183': 8, '0000-0002-0393-5712': 8})
[]
s_vogt  pass
For name:  d_garcia
(60, 2)
total sample size before apply threshold:  60
Counter({'0000-0002-8552-1475': 32, '0000-0003-3356-4454': 24, '0000-0002-2820-9151': 2, '0000-0001-6669-9457': 1, '0000-0001-6777-9184': 1})
[]
d_garcia  pass
For name:  w_xie
(115, 2)
total sample size before apply threshold:  115
Counter({'0000-0002-2768-3572': 44, '0000-0003-2410-2135': 17, '0000-0003-0493-062X': 15, '0000-0003-4655-6496': 10, '0000-0003-4504-8609': 7, '0000-0003-1762-7224': 6, '0000-0002-5500-8195': 6, '0000-0003-2546-2415': 5, '0000-0003-1501-896X': 2, '0000-0002-4887-3711': 1, '0000-0003-3856-9887': 1, '0000-0002-9983-7948': 1})
[]
w_xie  pass
For name:  m_cruz
(141, 2)
total sample size before apply threshold:  141
Counter({'0000-0001-9759-5466': 57, '0000-0001-9846-6754': 46, '0000-0003-1822-0514': 30, '00

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


(2116, 2)
total sample size before apply threshold:  2116
Counter({'0000-0003-1835-9436': 200, '0000-0003-3477-1172': 146, '0000-0003-1232-5307': 124, '0000-0001-6537-0350': 78, '0000-0003-0934-3344': 73, '0000-0001-7964-106X': 56, '0000-0003-2337-6935': 52, '0000-0003-2068-7287': 51, '0000-0002-3573-638X': 46, '0000-0003-4085-293X': 41, '0000-0002-6349-6950': 41, '0000-0002-6931-8581': 38, '0000-0002-4171-3803': 38, '0000-0003-0373-5080': 36, '0000-0002-1299-4300': 36, '0000-0002-8383-8524': 33, '0000-0002-0087-1151': 32, '0000-0002-3500-7494': 32, '0000-0002-4687-6732': 31, '0000-0001-5979-5774': 30, '0000-0001-9660-6303': 29, '0000-0002-1903-8354': 28, '0000-0002-5390-8763': 27, '0000-0003-0767-1918': 26, '0000-0002-4747-9763': 25, '0000-0003-0103-7457': 24, '0000-0003-4035-0438': 23, '0000-0003-2841-147X': 23, '0000-0003-0693-1415': 23, '0000-0002-3566-3379': 19, '0000-0003-4978-1867': 18, '0000-0002-9570-4216': 18, '0000-0001-5080-7097': 17, '0000-0002-1672-5730': 17, '0000-0002-9

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


y1 disagree on 62  Proba:  [0.10728989 0.89271011]
y2 not aggreed on  62 Proba:  [0.53279029 0.46720971]
product probas: [0.05716301097269611, 0.4170828349741849]
result 1
y1 disagree on 68  Proba:  [0.29573871 0.70426129]
y2 not aggreed on  68 Proba:  [0.85658497 0.14341503]
product probas: [0.253325337450018, 0.10100165176685605]
result 0
y1 disagree on 80  Proba:  [0.76667429 0.23332571]
y2 not aggreed on  80 Proba:  [0.22574529 0.77425471]
product probas: [0.17307311297887112, 0.18065352858099942]
result 1
y1 disagree on 89  Proba:  [0.74046488 0.25953512]
y2 not aggreed on  89 Proba:  [0.25502882 0.74497118]
product probas: [0.18883988456765619, 0.1933461837205719]
result 1
y1 disagree on 195  Proba:  [0.70133364 0.29866636]
y2 not aggreed on  195 Proba:  [0.25433836 0.74566164]
product probas: [0.1783760471577394, 0.22270404861019813]
result 1
y1 disagree on 222  Proba:  [0.99541424 0.00458576]
y2 not aggreed on  222 Proba:  [0.26241712 0.73758288]
product probas: [0.261213742915

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


y1 disagree on 70  Proba:  [0.52423013 0.47576987]
y2 not aggreed on  70 Proba:  [0.01472727 0.98527273]
product probas: [0.007720480104755272, 0.46876307379833504]
result 1
y1 disagree on 89  Proba:  [0.16395002 0.83604998]
y2 not aggreed on  89 Proba:  [0.54475018 0.45524982]
product probas: [0.08931180277218786, 0.3806116059985044]
result 1
y1 disagree on 100  Proba:  [0.28272845 0.71727155]
y2 not aggreed on  100 Proba:  [0.63277872 0.36722128]
product probas: [0.17890454467050615, 0.26339737858175793]
result 1
y1 disagree on 104  Proba:  [0.91512918 0.08487082]
y2 not aggreed on  104 Proba:  [0.39239138 0.60760862]
product probas: [0.3590888020192864, 0.05156824088878096]
result 0
y1 disagree on 107  Proba:  [0.97730796 0.02269204]
y2 not aggreed on  107 Proba:  [0.18235058 0.81764942]
product probas: [0.17821267707959826, 0.01855413102165617]
result 0
y1 disagree on 122  Proba:  [0.70832967 0.29167033]
y2 not aggreed on  122 Proba:  [0.09335154 0.90664846]
product probas: [0.0661

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


 Proba:  [0.44057858 0.55942142]
product probas: [0.2586009999620777, 0.23106469798859042]
result 0
y1 disagree on 77  Proba:  [0.14857651 0.85142349]
y2 not aggreed on  77 Proba:  [0.85047076 0.14952924]
product probas: [0.1263599792660326, 0.1273127095186926]
result 1
y1 disagree on 78  Proba:  [0.21048126 0.78951874]
y2 not aggreed on  78 Proba:  [0.68134314 0.31865686]
product probas: [0.1434099621031263, 0.2515855619745561]
result 1
y1 disagree on 84  Proba:  [0.07750853 0.92249147]
y2 not aggreed on  84 Proba:  [0.53659044 0.46340956]
product probas: [0.0415903371376481, 0.427491363738879]
result 1
y1 disagree on 119  Proba:  [0.03620771 0.96379229]
y2 not aggreed on  119 Proba:  [0.57973078 0.42026922]
product probas: [0.020990726102247826, 0.40505222870846214]
result 1
y1 disagree on 149  Proba:  [0.16968011 0.83031989]
y2 not aggreed on  149 Proba:  [0.80960278 0.19039722]
product probas: [0.13737348703390606, 0.1580905989554502]
result 1
y1 disagree on 153  Proba:  [0.4554460

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


(86, 2)
total sample size before apply threshold:  86
Counter({'0000-0001-6460-8136': 36, '0000-0003-2861-682X': 19, '0000-0002-3483-0219': 10, '0000-0002-8756-8445': 6, '0000-0001-5858-5126': 4, '0000-0001-8117-9695': 3, '0000-0003-2876-9199': 3, '0000-0002-2416-4101': 2, '0000-0001-7479-6206': 1, '0000-0001-9726-9943': 1, '0000-0001-7321-6927': 1})
[]
m_tang  pass
For name:  a_baranov
(42, 2)
total sample size before apply threshold:  42
Counter({'0000-0002-9976-8532': 20, '0000-0002-9112-0838': 14, '0000-0003-3987-8112': 7, '0000-0001-8810-9972': 1})
[]
a_baranov  pass
For name:  r_gray
(162, 2)
total sample size before apply threshold:  162
Counter({'0000-0001-9694-4206': 83, '0000-0002-9858-0191': 48, '0000-0002-2203-2703': 19, '0000-0001-9668-6497': 6, '0000-0002-5890-1819': 6})
[]
r_gray  pass
For name:  r_nunes
(46, 2)
total sample size before apply threshold:  46
Counter({'0000-0001-7425-5717': 28, '0000-0002-1377-9899': 13, '0000-0001-8633-4404': 3, '0000-0002-9014-0570': 2})

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


(108, 2)
total sample size before apply threshold:  108
Counter({'0000-0003-1720-6526': 20, '0000-0002-8340-2739': 19, '0000-0003-2180-3091': 17, '0000-0003-4025-3160': 9, '0000-0001-9732-0833': 6, '0000-0002-1440-9902': 6, '0000-0002-3940-3283': 5, '0000-0002-0017-4276': 5, '0000-0003-3604-7145': 5, '0000-0002-6708-0223': 5, '0000-0002-0349-2181': 4, '0000-0002-9090-258X': 3, '0000-0002-5177-3391': 2, '0000-0002-6881-660X': 1, '0000-0002-9443-4031': 1})
[]
h_lu  pass
For name:  j_cordeiro
(30, 2)
total sample size before apply threshold:  30
Counter({'0000-0003-4656-6045': 14, '0000-0003-4605-1615': 9, '0000-0003-0902-9638': 5, '0000-0001-7876-0219': 1, '0000-0002-2118-1192': 1})
[]
j_cordeiro  pass
For name:  c_yu
(335, 2)
total sample size before apply threshold:  335
Counter({'0000-0001-5664-9392': 252, '0000-0002-2136-2444': 26, '0000-0002-8648-8419': 16, '0000-0002-8453-5023': 11, '0000-0002-1742-2344': 10, '0000-0001-8062-9498': 6, '0000-0003-0084-6746': 5, '0000-0002-2934-2122'

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


(848, 2)
total sample size before apply threshold:  848
Counter({'0000-0002-0211-9000': 91, '0000-0003-0477-2908': 62, '0000-0002-5051-4929': 44, '0000-0002-7528-7494': 44, '0000-0002-9887-5555': 39, '0000-0001-5836-4120': 31, '0000-0002-8796-0367': 30, '0000-0002-7752-6217': 30, '0000-0003-1708-8734': 28, '0000-0002-6107-5095': 27, '0000-0001-8238-1641': 27, '0000-0003-4623-1878': 26, '0000-0001-9570-3611': 25, '0000-0001-9199-0721': 25, '0000-0002-7959-7377': 24, '0000-0003-2420-3147': 23, '0000-0002-8066-475X': 19, '0000-0003-1625-3400': 18, '0000-0002-1483-5135': 17, '0000-0001-6590-7736': 15, '0000-0002-9634-8778': 15, '0000-0001-7964-0809': 14, '0000-0001-6507-5503': 13, '0000-0003-4107-2062': 11, '0000-0002-3355-2448': 11, '0000-0002-6567-9144': 10, '0000-0003-4414-3372': 10, '0000-0002-6859-5683': 9, '0000-0002-2565-5543': 8, '0000-0003-0388-510X': 7, '0000-0003-1086-5318': 7, '0000-0002-4858-8195': 5, '0000-0002-2325-0120': 5, '0000-0003-4453-8059': 5, '0000-0001-9243-3935': 5

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


(665, 2)
total sample size before apply threshold:  665
Counter({'0000-0002-8633-0873': 115, '0000-0002-6266-9864': 97, '0000-0003-1391-8040': 73, '0000-0001-8839-8161': 50, '0000-0002-6782-2813': 43, '0000-0001-7896-1184': 39, '0000-0002-3598-7218': 35, '0000-0003-4275-0515': 26, '0000-0002-0007-6481': 16, '0000-0003-3711-2842': 12, '0000-0003-0195-9478': 11, '0000-0001-8572-5155': 10, '0000-0001-8971-4648': 8, '0000-0003-2442-3713': 8, '0000-0002-0491-8295': 8, '0000-0002-7540-3301': 8, '0000-0002-2767-9354': 8, '0000-0002-5982-1706': 8, '0000-0001-7139-1254': 6, '0000-0001-6417-3654': 6, '0000-0001-5769-1795': 6, '0000-0003-0298-8641': 5, '0000-0002-5599-0975': 5, '0000-0002-1707-0633': 5, '0000-0003-1536-343X': 4, '0000-0002-8514-8228': 4, '0000-0001-9306-3227': 3, '0000-0002-5033-6210': 3, '0000-0003-3428-1587': 3, '0000-0002-7856-2009': 3, '0000-0002-5808-0109': 3, '0000-0002-8565-6214': 3, '0000-0002-1837-3628': 3, '0000-0002-6976-7416': 2, '0000-0002-3487-8730': 2, '0000-0001-8

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


(166, 2)
total sample size before apply threshold:  166
Counter({'0000-0002-3188-8482': 88, '0000-0002-1879-4086': 65, '0000-0002-2716-4609': 9, '0000-0002-8475-7486': 3, '0000-0001-7617-8559': 1})
[]
s_hsieh  pass
For name:  c_baptista
(19, 2)
total sample size before apply threshold:  19
Counter({'0000-0002-1263-7880': 7, '0000-0002-8158-4743': 7, '0000-0003-4664-6766': 2, '0000-0002-7807-0995': 2, '0000-0002-9966-0708': 1})
[]
c_baptista  pass
For name:  d_kavanagh
(178, 2)
total sample size before apply threshold:  178
Counter({'0000-0001-9072-8828': 113, '0000-0003-4718-0072': 58, '0000-0003-1531-6617': 4, '0000-0003-2854-7270': 3})
['0000-0001-9072-8828']
d_kavanagh_0
Class  0  sample size:  113
Class  1  sample size:  65
(178, 102)
Class  0  sample size:  106
Class  1  sample size:  65
(171, 102)
Labeled:  178  :  171
Unlabeled:  216  :  166
labeled no citation link:  7
Unlabeled no citation link size:  50
(171, 102)
(171, 102)
(166, 102)
(166, 102)
(34, 100)
(303, 100)
(303, 10

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


(828, 2)
total sample size before apply threshold:  828
Counter({'0000-0001-9783-4383': 98, '0000-0003-3870-3388': 64, '0000-0002-5947-306X': 63, '0000-0002-5773-1627': 56, '0000-0002-5859-2526': 53, '0000-0002-5126-1046': 48, '0000-0002-4344-8791': 40, '0000-0001-8927-6772': 31, '0000-0001-5813-9505': 31, '0000-0002-1709-9401': 30, '0000-0003-3463-0740': 27, '0000-0003-1382-9195': 25, '0000-0003-3075-6872': 22, '0000-0001-9556-2361': 19, '0000-0002-4809-3109': 17, '0000-0002-1919-9107': 17, '0000-0002-4747-0419': 17, '0000-0002-6156-9028': 16, '0000-0001-7302-4714': 15, '0000-0001-7124-2718': 14, '0000-0001-8412-2985': 9, '0000-0002-8208-7079': 7, '0000-0003-4276-0051': 7, '0000-0002-4165-4022': 6, '0000-0002-7300-9271': 6, '0000-0002-0933-2808': 5, '0000-0001-9355-1167': 5, '0000-0002-4930-8618': 5, '0000-0001-7383-934X': 5, '0000-0001-7324-2682': 5, '0000-0003-1117-1326': 5, '0000-0002-7579-0233': 5, '0000-0002-2753-0947': 4, '0000-0002-1869-9871': 4, '0000-0002-0543-5519': 4, '0000

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


(58, 2)
total sample size before apply threshold:  58
Counter({'0000-0002-1541-9627': 41, '0000-0002-7633-6227': 8, '0000-0002-9886-3570': 3, '0000-0001-9220-4219': 2, '0000-0002-1059-627X': 1, '0000-0002-7628-5431': 1, '0000-0002-6155-8548': 1, '0000-0002-1275-4171': 1})
[]
j_qiu  pass
For name:  m_antunes
(27, 2)
total sample size before apply threshold:  27
Counter({'0000-0001-5545-2520': 7, '0000-0001-5888-2278': 6, '0000-0002-8913-6136': 6, '0000-0002-1257-2829': 5, '0000-0001-8216-8066': 3})
[]
m_antunes  pass
For name:  m_andersen
(399, 2)
total sample size before apply threshold:  399
Counter({'0000-0002-3894-4811': 222, '0000-0003-4694-486X': 58, '0000-0003-4794-6808': 39, '0000-0001-7029-2860': 21, '0000-0003-1125-1553': 14, '0000-0002-0234-0266': 11, '0000-0001-8275-9472': 11, '0000-0003-4977-3031': 8, '0000-0003-3845-4465': 7, '0000-0002-4833-1867': 3, '0000-0002-4654-3946': 2, '0000-0002-6803-0981': 2, '0000-0002-8164-278X': 1})
['0000-0002-3894-4811']
m_andersen_0
Class  

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Class  0  sample size:  267
Class  1  sample size:  35
(302, 102)
Class  0  sample size:  260
Class  1  sample size:  33
(293, 102)
Labeled:  302  :  293
Unlabeled:  3127  :  2110
labeled no citation link:  9
Unlabeled no citation link size:  1017
(293, 102)
(293, 102)
(2110, 102)
(2110, 102)
(59, 100)
(2344, 100)
(2344, 100)
P:  1  N:  1
Initial L size:  234
Initial U size:  2110
Total Labeled number:  345  Still unlabeled number:  85
y1 disagree on 23  Proba:  [0.35286277 0.64713723]
y2 not aggreed on  23 Proba:  [0.71480891 0.28519109]
product probas: [0.25222945212115244, 0.1845577721209973]
result 0
y1 disagree on 54  Proba:  [9.99689968e-01 3.10032037e-04]
y2 not aggreed on  54 Proba:  [0.49343478 0.50656522]
product probas: [0.4932818001720896, 0.0001570514466598236]
result 0
F1:  1.0
[0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0
 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0]
[0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


211
Counter({'0000-0002-3009-3290': 117, '0000-0001-9424-4150': 28, '0000-0002-4524-2260': 28, '0000-0003-2305-4813': 9, '0000-0003-4957-7831': 8, '0000-0002-5269-3342': 5, '0000-0002-5397-4672': 4, '0000-0002-9946-4926': 4, '0000-0003-2238-1070': 4, '0000-0003-1687-942X': 2, '0000-0003-2575-3263': 1, '0000-0003-0012-7493': 1})
['0000-0002-3009-3290']
m_sousa_0
Class  0  sample size:  117
Class  1  sample size:  94
(211, 102)
Class  0  sample size:  106
Class  1  sample size:  91
(197, 102)
Labeled:  211  :  197
Unlabeled:  844  :  519
labeled no citation link:  14
Unlabeled no citation link size:  325
(197, 102)
(197, 102)
(519, 102)
(519, 102)
(39, 100)
(677, 100)
(677, 100)
P:  1  N:  1
Initial L size:  158
Initial U size:  519
Total Labeled number:  276  Still unlabeled number:  80
y1 disagree on 9  Proba:  [0.95490113 0.04509887]
y2 not aggreed on  9 Proba:  [0.31340477 0.68659523]
product probas: [0.29927056683766, 0.03096466627539192]
result 0
y1 disagree on 10  Proba:  [0.77001

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


(290, 2)
total sample size before apply threshold:  290
Counter({'0000-0003-2417-0787': 108, '0000-0001-6391-7455': 72, '0000-0001-5341-1055': 44, '0000-0003-4175-3829': 21, '0000-0002-5777-6542': 13, '0000-0002-7780-7131': 11, '0000-0003-4309-5153': 7, '0000-0002-9754-0630': 4, '0000-0002-9214-2932': 4, '0000-0002-0458-3739': 3, '0000-0003-2770-9899': 1, '0000-0002-8449-476X': 1, '0000-0003-2064-8050': 1})
['0000-0003-2417-0787']
s_nielsen_0
Class  0  sample size:  108
Class  1  sample size:  182
(290, 102)
Class  0  sample size:  98
Class  1  sample size:  158
(256, 102)
Labeled:  290  :  256
Unlabeled:  2543  :  1641
labeled no citation link:  34
Unlabeled no citation link size:  902
(256, 102)
(256, 102)
(1641, 102)
(1641, 102)
(51, 100)
(1846, 100)
(1846, 100)
P:  1  N:  1
Initial L size:  205
Initial U size:  1641
Total Labeled number:  318  Still unlabeled number:  82
y1 disagree on 10  Proba:  [0.49669337 0.50330663]
y2 not aggreed on  10 Proba:  [0.90668537 0.09331463]
product

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


(612, 2)
total sample size before apply threshold:  612
Counter({'0000-0002-3611-0258': 120, '0000-0002-1720-7863': 65, '0000-0003-3456-3373': 43, '0000-0002-2573-8736': 39, '0000-0002-1751-461X': 35, '0000-0001-5579-2197': 33, '0000-0002-2985-219X': 23, '0000-0002-8621-4098': 23, '0000-0003-0365-5590': 23, '0000-0001-9359-1863': 23, '0000-0003-0253-1625': 17, '0000-0001-9142-456X': 14, '0000-0002-0833-1205': 12, '0000-0003-3191-3163': 11, '0000-0002-4459-087X': 10, '0000-0002-7919-1107': 10, '0000-0002-9460-3579': 9, '0000-0002-9289-1271': 9, '0000-0002-3612-7818': 8, '0000-0001-5035-4577': 8, '0000-0002-3805-6515': 7, '0000-0002-5163-0884': 6, '0000-0003-1028-1785': 6, '0000-0003-3511-4270': 5, '0000-0001-7857-0247': 5, '0000-0003-3970-3160': 5, '0000-0001-7247-7404': 5, '0000-0002-1457-3681': 4, '0000-0002-8937-4417': 4, '0000-0003-0878-7605': 4, '0000-0002-8858-1289': 4, '0000-0001-5083-8950': 4, '0000-0003-4542-1741': 3, '0000-0002-1509-1721': 3, '0000-0003-2874-8267': 2, '0000-00

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


(60, 2)
total sample size before apply threshold:  60
Counter({'0000-0001-7612-3486': 32, '0000-0002-0082-1285': 16, '0000-0003-4463-1480': 4, '0000-0001-7379-4233': 4, '0000-0001-5818-025X': 2, '0000-0002-2971-5013': 1, '0000-0001-5557-9388': 1})
[]
d_sharma  pass
For name:  a_wilson
(252, 2)
total sample size before apply threshold:  252
Counter({'0000-0002-5045-2051': 61, '0000-0003-3679-9232': 48, '0000-0002-5016-4164': 36, '0000-0003-1098-8457': 35, '0000-0002-2000-2914': 27, '0000-0002-7696-1671': 10, '0000-0003-1325-8513': 9, '0000-0003-2352-5232': 7, '0000-0001-5865-6537': 7, '0000-0003-3362-7806': 4, '0000-0001-5775-6085': 3, '0000-0002-6473-7234': 2, '0000-0003-1461-6212': 2, '0000-0002-1015-3786': 1})
[]
a_wilson  pass
For name:  f_marini
(65, 2)
total sample size before apply threshold:  65
Counter({'0000-0001-8266-1117': 37, '0000-0002-9495-2349': 12, '0000-0003-0747-5060': 12, '0000-0003-3252-7758': 4})
[]
f_marini  pass
For name:  h_tsai
(93, 2)
total sample size before 

In [79]:
# pairwise f1
from statistics import mean 
cleaned_co_train_f1 = [x for x in all_co_train_f1 if isinstance(x, float)]
print(len(cleaned_co_train_f1))
print(mean(cleaned_co_train_f1))

187
0.9292662995237037


In [None]:
# %whos
del v1_all_features
del v2_all_features