In [1]:
import time
import sklearn
import numpy as np
import pandas as pd

import sys
sys.path.append("..")
from baggingPU import BaggingClassifierPU

from sklearn.utils import resample
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import random

In [2]:
def print_cm(cm, labels, hide_zeroes=False, hide_diagonal=False, hide_threshold=None):
    columnwidth = max([len(x) for x in labels]) + 4
    empty_cell = " " * columnwidth
    print("    " + empty_cell, end=' ')
    for label in labels:
        print("%{0}s".format(columnwidth) % 'pred_' + label, end=" ")
    print()

    for i, label1 in enumerate(labels):
        print("    %{0}s".format(columnwidth) % 'true_' + label1, end=" ")
        for j in range(len(labels)):
            cell = "%{0}.1f".format(columnwidth) % cm[i, j]
            if hide_zeroes:
                cell = cell if float(cm[i, j]) != 0 else empty_cell
            if hide_diagonal:
                cell = cell if i != j else empty_cell
            if hide_threshold:
                cell = cell if cm[i, j] > hide_threshold else empty_cell
            if cell:
                print(cell, end=" ")
        print()

# import data

In [3]:
df_raw = pd.read_csv('../data/w-independence.csv')

#df_raw = pd.read_csv('../data/1place-dependence.csv')

#df_raw = pd.read_csv('../data/w-related.csv')


df_raw['label'] = df_raw['label'].astype("int")
print(df_raw.label.value_counts())
print('Has null values', df_raw.isnull().values.any())

1    10000
0    10000
Name: label, dtype: int64
Has null values False


In [4]:
df_raw.head(10)

Unnamed: 0,p1,p2,p3,label
0,0,0,1,1
1,1,0,1,1
2,0,1,1,1
3,2,0,1,1
4,1,1,1,1
5,0,1,0,1
6,0,2,1,1
7,3,0,1,1
8,2,1,1,1
9,1,1,0,1


In [5]:
print(df_raw.iloc[:10000,:])
df_max = np.amax(df_raw.iloc[:10000,[0,1]])
print(df_max)

      p1  p2  p3  label
0      0   0   1      1
1      1   0   1      1
2      0   1   1      1
3      2   0   1      1
4      1   1   1      1
...   ..  ..  ..    ...
9995  52  47   0      1
9996  52  48   1      1
9997  51  48   0      1
9998  51  49   1      1
9999  50  49   0      1

[10000 rows x 4 columns]
p1    100
p2     99
dtype: int64


In [6]:
print(df_raw.iloc[10000:,:])
print(np.amax(df_raw.iloc[10000:,[0,1]]))

       p1  p2   p3  label
10000   8  41   52      0
10001  65  62  249      0
10002   4  21  509      0
10003  80  81  251      0
10004  75  12  919      0
...    ..  ..  ...    ...
19995  63  94  184      0
19996  43  65  209      0
19997  17  84  717      0
19998  69  83  269      0
19999  68  54  878      0

[10000 rows x 4 columns]
p1    100
p2     99
dtype: int64


In [7]:
neg_process = df_raw.iloc[10000:,:].copy()
print(neg_process)

for i in range(10000):
    if neg_process.iloc[i,0] > df_max[0]:
        neg_process.iloc[i,0]  = int(neg_process.iloc[i,0] / df_max[0])
    if neg_process.iloc[i,1] > df_max[1]:
        neg_process.iloc[i,1]  = int(neg_process.iloc[i,1] / df_max[1])

       p1  p2   p3  label
10000   8  41   52      0
10001  65  62  249      0
10002   4  21  509      0
10003  80  81  251      0
10004  75  12  919      0
...    ..  ..  ...    ...
19995  63  94  184      0
19996  43  65  209      0
19997  17  84  717      0
19998  69  83  269      0
19999  68  54  878      0

[10000 rows x 4 columns]


In [8]:
neg_process.drop_duplicates(inplace=True)
print(neg_process)
print(np.amax(neg_process))

       p1  p2   p3  label
10000   8  41   52      0
10001  65  62  249      0
10002   4  21  509      0
10003  80  81  251      0
10004  75  12  919      0
...    ..  ..  ...    ...
19995  63  94  184      0
19996  43  65  209      0
19997  17  84  717      0
19998  69  83  269      0
19999  68  54  878      0

[10000 rows x 4 columns]
p1       100
p2        99
p3       999
label      0
dtype: int64


In [9]:
df_process = pd.concat([df_raw.iloc[:10000,:], neg_process])
print(df_process)

       p1  p2   p3  label
0       0   0    1      1
1       1   0    1      1
2       0   1    1      1
3       2   0    1      1
4       1   1    1      1
...    ..  ..  ...    ...
19995  63  94  184      0
19996  43  65  209      0
19997  17  84  717      0
19998  69  83  269      0
19999  68  54  878      0

[20000 rows x 4 columns]


In [10]:
x_data = df_process.iloc[:,:-1]
y_data = df_process.iloc[:,-1]

x_train, x_test, y_train, y_test = train_test_split(x_data, y_data, test_size=0.2, random_state=7)

In [11]:
x_train_input = pd.concat([x_train.iloc[:,:], y_train],axis=1)
x_test_input = pd.concat([x_test.iloc[:,:],y_test],axis=1)

print(len(x_train_input))
print(len(x_test_input))

print(x_train_input.label.value_counts())
print('Has null values', x_train_input.isnull().values.any())

print(x_test_input.label.value_counts())
print('Has null values', x_test_input.isnull().values.any())

16000
4000
1    8077
0    7923
Name: label, dtype: int64
Has null values False
0    2077
1    1923
Name: label, dtype: int64
Has null values False


In [12]:
x_train_input.head(10)

Unnamed: 0,p1,p2,p3,label
8118,36,54,1,1
10171,46,67,333,0
9399,53,43,0,1
12669,21,1,930,0
18809,29,96,858,0
13869,44,12,453,0
3320,51,7,1,1
14689,12,79,617,0
13087,21,66,519,0
15992,98,59,520,0


In [13]:
x_test_input.head(10)

Unnamed: 0,p1,p2,p3,label
14356,39,20,653,0
3439,50,8,0,1
12153,82,97,899,0
15029,47,35,549,0
18549,80,83,15,0
15762,74,61,231,0
12313,61,17,739,0
16034,88,14,924,0
11496,16,39,993,0
5653,23,51,0,1


In [14]:
iter = 13000
alllist = np.zeros([13000,4],dtype=np.int)

a = np.random.choice(range(9,999),size=iter,replace=True)
b = np.random.choice(range(0,999),size=iter,replace=True)
c = np.random.choice(range(9,999),size=iter,replace=True)

for idx in range(iter):  
    alllist[idx][0] = a[idx]
    alllist[idx][1] = b[idx]
    alllist[idx][2] = c[idx]
    alllist[idx][3] = 0
unlabel = np.unique(np.array(alllist), axis = 0)

print(len(unlabel))

13000


In [15]:
pos_list = np.random.randint(low=100, high=9999, size=3000).tolist()

data_pos = df_process.iloc[pos_list,:]
data_unl = pd.DataFrame(unlabel, columns=['p1','p2','p3','label'],)

data =  pd.concat([data_pos, data_unl], axis = 0, ignore_index=True)
print(data)

        p1   p2   p3  label
0        5   41    1      1
1       30   40    1      1
2       23   15    0      1
3       12   18    0      1
4       20   69    0      1
...    ...  ...  ...    ...
15995  998  406  398      0
15996  998  534  431      0
15997  998  695  848      0
15998  998  727  186      0
15999  998  762  647      0

[16000 rows x 4 columns]


In [16]:
df = data.copy()

X = df.iloc[:,:-1]
y = df.iloc[:,-1]

y_orig = y.copy()

print(X)
print(y)

        p1   p2   p3
0        5   41    1
1       30   40    1
2       23   15    0
3       12   18    0
4       20   69    0
...    ...  ...  ...
15995  998  406  398
15996  998  534  431
15997  998  695  848
15998  998  727  186
15999  998  762  647

[16000 rows x 3 columns]
0        1
1        1
2        1
3        1
4        1
        ..
15995    0
15996    0
15997    0
15998    0
15999    0
Name: label, Length: 16000, dtype: int64


In [17]:
pd.Series(y).value_counts()

0    13000
1     3000
Name: label, dtype: int64

# Trainning directly

In [18]:
print('Training XGboost model ...')

#import xgboost as xgb

#model = xgb.XGBClassifier()

from sklearn.linear_model import LogisticRegression
model = LogisticRegression()


#from sklearn.neural_network import MLPClassifier

#model = MLPClassifier(solver='adam', alpha=1e-5,hidden_layer_sizes=(1), random_state=1,max_iter=100000)

model.fit(X, y)

print('Done')

Training XGboost model ...
Done


In [19]:
print('---- {} ----'.format('XGboost model'))
print(print_cm(sklearn.metrics.confusion_matrix(y_test, model.predict(x_test_input.iloc[:,:-1])), labels=['negative', 'positive']))
print('')
print('Precision: ', precision_score(y_test, model.predict(x_test_input.iloc[:,:-1])))
print('Recall: ', recall_score(y_test, model.predict(x_test_input.iloc[:,:-1])))
print('Accuracy: ', accuracy_score(y_test, model.predict(x_test_input.iloc[:,:-1])))
print('f1_score: ', f1_score(y_test, model.predict(x_test_input.iloc[:,:-1])))

---- XGboost model ----
                        pred_negative        pred_positive 
           true_negative       2062.0         15.0 
           true_positive          0.0       1923.0 
None

Precision:  0.9922600619195047
Recall:  1.0
Accuracy:  0.99625
f1_score:  0.9961149961149962


# Training by bagging

In [32]:
#初始化多个分类器

from sklearn.linear_model import LogisticRegression
model1 = LogisticRegression()

from sklearn.tree import DecisionTreeClassifier
model2 = DecisionTreeClassifier()

from sklearn.neural_network import MLPClassifier
model3 = MLPClassifier(solver='adam', alpha=1e-5,hidden_layer_sizes=(1), random_state=1,max_iter=100000)

from sklearn import svm
model4 = svm.LinearSVC()

from sklearn.naive_bayes import MultinomialNB
model5 = MultinomialNB()

from sklearn.ensemble import RandomForestClassifier
model6 = RandomForestClassifier(n_estimators = 50,n_jobs = -1)

import xgboost as xgb
model7 = xgb.XGBClassifier()


In [33]:
model_list = [model1, model2, model3, model4, model5, model6, model7]
f1 = np.zeros([len(model_list)],dtype=np.float32)

In [34]:
for i,j in zip(model_list,range(len(model_list))):
    model = BaggingClassifierPU(i,
                         n_estimators = 50, 
                         n_jobs = -1, 
                         max_samples = sum(y)  
                        )
    model.fit(X,y)
    print(f1_score(y_orig, model.predict(X)))
    f1[j] = f1_score(y_orig, model.predict(X))

print(f1)

0.9996667777407531
1.0
1.0
1.0
0.823045267489712
1.0
1.0
[0.99966675 1.         1.         1.         0.82304525 1.
 1.        ]


In [35]:
f1_index = []
for i in range(len(f1)):
    if f1[i] >= 0.95:
        f1_index.append(i)
        
print(f1_index)
        
predict_sum = np.zeros([len(X)],dtype=np.float32)
for i in f1_index:
    model = BaggingClassifierPU(model_list[i],
                         n_estimators = 50, 
                         n_jobs = -1, 
                         max_samples = sum(y)  
                        )
    model.fit(X,y)
    predict_sum += model.predict(X)
print(predict_sum)

[0, 1, 2, 3, 5, 6]
[6. 6. 6. ... 0. 0. 0.]


In [36]:
threshold = len(f1_index) / 2
print(predict_sum)

predict = np.zeros([len(y)],dtype=np.int64)

for i in range(len(X)):
    if predict_sum[i] >= threshold :
        predict[i] = 1
    if predict_sum[i] < threshold :
        predict[i] = 0


print(predict)
print(y_orig)

[6. 6. 6. ... 0. 0. 0.]
[1 1 1 ... 0 0 0]
0        1
1        1
2        1
3        1
4        1
        ..
15995    0
15996    0
15997    0
15998    0
15999    0
Name: label, Length: 16000, dtype: int64


In [37]:
#train data
print('---- {} ----'.format('PU Bagging'))
print(print_cm(sklearn.metrics.confusion_matrix(y_orig,predict), labels=['negative', 'positive']))
print('')
print('Precision: ', precision_score(y_orig, predict))
print('Recall: ', recall_score(y_orig,predict))
print('Accuracy: ', accuracy_score(y_orig, predict))
print('f1_score: ', f1_score(y_orig, predict))

---- PU Bagging ----
                        pred_negative        pred_positive 
           true_negative      13000.0          0.0 
           true_positive          0.0       3000.0 
None

Precision:  1.0
Recall:  1.0
Accuracy:  1.0
f1_score:  1.0


In [38]:
print(np.argmax(f1))

best_model = BaggingClassifierPU(model_list[np.argmax(f1)],
                         n_estimators = 50, 
                         n_jobs = -1, 
                         max_samples = sum(y)  
                        )
print(best_model)

1
BaggingClassifierPU(base_estimator=DecisionTreeClassifier(), max_samples=3000,
                    n_estimators=50, n_jobs=-1)


In [39]:
print('Training bagging classifier...')

pu_start = time.perf_counter()

best_model.fit(X, y)
pu_end = time.perf_counter()
print('Done!')
print('Time:', pu_end - pu_start)


Training bagging classifier...
Done!
Time: 0.5954050199943595


In [40]:
#train data
print('---- {} ----'.format('PU Bagging'))
print(print_cm(sklearn.metrics.confusion_matrix(y_orig, best_model.predict(X)), labels=['negative', 'positive']))
print('')
print('Precision: ', precision_score(y_orig, best_model.predict(X)))
print('Recall: ', recall_score(y_orig, best_model.predict(X)))
print('Accuracy: ', accuracy_score(y_orig, best_model.predict(X)))
print('f1_score: ', f1_score(y_orig, best_model.predict(X)))

---- PU Bagging ----
                        pred_negative        pred_positive 
           true_negative      13000.0          0.0 
           true_positive          0.0       3000.0 
None

Precision:  1.0
Recall:  1.0
Accuracy:  1.0
f1_score:  1.0


In [41]:
#print wrong predictions
y_pre = best_model.predict(X)
y_orig_index = y_orig.index.tolist()

FN_index = []
FT_index = []

for i in range(len(y_orig)):
    if y_orig.iloc[i] == 1 and y_pre[i] == 0 :
        FN_index.append(y_orig_index[i])
    if y_orig.iloc[i] == 0 and y_pre[i] == 1 :
        FT_index.append(y_orig_index[i])
        
print("False Negtive:")
print(df_process.loc[FN_index])
print("False Positive:")
print(df_process.loc[FT_index])

False Negtive:
Empty DataFrame
Columns: [p1, p2, p3, label]
Index: []
False Positive:
Empty DataFrame
Columns: [p1, p2, p3, label]
Index: []


In [42]:
#test data
print('---- {} ----'.format('PU Bagging'))
print(print_cm(sklearn.metrics.confusion_matrix(y_test, best_model.predict(x_test_input.iloc[:,:-1])), labels=['negative', 'positive']))
print('')
print('Precision: ', precision_score(y_test, best_model.predict(x_test_input.iloc[:,:-1])))
print('Recall: ', recall_score(y_test, best_model.predict(x_test_input.iloc[:,:-1])))
print('Accuracy: ', accuracy_score(y_test, best_model.predict(x_test_input.iloc[:,:-1])))
print('f1_score: ', f1_score(y_test, best_model.predict(x_test_input.iloc[:,:-1])))

---- PU Bagging ----
                        pred_negative        pred_positive 
           true_negative       2069.0          8.0 
           true_positive          0.0       1923.0 
None

Precision:  0.9958570688762299
Recall:  1.0
Accuracy:  0.998
f1_score:  0.9979242345614945


In [43]:
#print wrong predictions
y_test_pre = best_model.predict(x_test_input.iloc[:,:-1])
y_test_index = y_test.index.tolist()

FN_test_index = []
FT_test_index = []

for i in range(len(y_test)):
    if y_test.iloc[i] == 1 and y_test_pre[i] == 0 :
        FN_test_index.append(y_test_index[i])
    if y_test.iloc[i] == 0 and y_test_pre[i] == 1 :
        FT_test_index.append(y_test_index[i])

print("False Negtive:")
print(df_process.loc[FN_test_index])
print("False Positive:")
print(df_process.loc[FT_test_index])

False Negtive:
Empty DataFrame
Columns: [p1, p2, p3, label]
Index: []
False Positive:
       p1  p2  p3  label
18639  45  17   5      0
11312  57  38   4      0
18195  87  67   5      0
18601  48  22   4      0
18264   9  63   4      0
12803  46  70   2      0
10538  75  55   2      0
13339  22  53   5      0


In [44]:
orig_data = df_process.iloc[:,:-1].copy()
orig_label = best_model.predict(orig_data)

print(orig_data)
print(orig_label)

       p1  p2   p3
0       0   0    1
1       1   0    1
2       0   1    1
3       2   0    1
4       1   1    1
...    ..  ..  ...
19995  63  94  184
19996  43  65  209
19997  17  84  717
19998  69  83  269
19999  68  54  878

[20000 rows x 3 columns]
[1 1 1 ... 0 0 0]


In [45]:
import xgboost as xgb
model = BaggingClassifierPU(xgb.XGBClassifier(),
                         n_estimators = 50, 
                         n_jobs = -1, 
                         max_samples = sum(y)  
                        )
model.fit(orig_data, orig_label)

BaggingClassifierPU(base_estimator=XGBClassifier(base_score=None, booster=None,
                                                 colsample_bylevel=None,
                                                 colsample_bynode=None,
                                                 colsample_bytree=None,
                                                 gamma=None, gpu_id=None,
                                                 importance_type='gain',
                                                 interaction_constraints=None,
                                                 learning_rate=None,
                                                 max_delta_step=None,
                                                 max_depth=None,
                                                 min_child_weight=None,
                                                 missing=nan,
                                                 monotone_constraints=None,
                                                 n_estimators=1

In [46]:
#verification
import random

place_max = max(np.amax(df_process.iloc[:10000, [0, 1]]))  #w_dependence
name1 = ['min', 'max', 'Precision', 'Recall', 'Accuracy', 'f1_score','time']
test = pd.DataFrame(columns=name1)

for i in range(1, 100):
    
    pu_start = time.perf_counter()
    
    pos_list = np.random.randint(low=100, high=9999, size=2000).tolist()
    neg_list = np.random.randint(low=10000, high=19000, size=2000).tolist()
    all_list = pos_list + neg_list
    
    flag1 = int((0.1 * (i - 1)) * place_max)
    flag2 = int((0.1 * i) * place_max)
    print(flag2)
    
    temp_data = df_process.iloc[all_list, : -1].copy()
    temp_label = df_process.iloc[all_list, -1].copy()
    
    temp_data.iloc[:, 0] = temp_data.iloc[:, 0] + random.randint((df_max[0] + flag1 + 1), ((df_max[0] + flag2 + 1)))
    temp_data.iloc[:, 1] = temp_data.iloc[:, 1] + random.randint((df_max[1] + flag1 + 1), ((df_max[1] + flag2 + 1)))

    
    print('======')
    print(temp_data.iloc[:, :])

    temp_max = max(np.amax(temp_data.iloc[:, [0, 1]]))
    temp_min = min(np.amin(temp_data.iloc[:, [0, 1]]))

    print('---- {} ----'.format(i))
    print('Precision: ', precision_score(temp_label, model.predict(temp_data)))
    print('Recall: ', recall_score(temp_label, model.predict(temp_data)))
    print('Accuracy: ', accuracy_score(temp_label, model.predict(temp_data)))
    print('f1_score: ', f1_score(temp_label, model.predict(temp_data)))

    orig_data = pd.concat([orig_data, temp_data], ignore_index=True)
    orig_label = pd.Series(orig_label.tolist() + model.predict(temp_data).tolist())
    model.fit(orig_data,orig_label)
    
    pu_end = time.perf_counter()
    
    test.loc[i] = [  temp_min, temp_max,
                        precision_score(temp_label, model.predict(temp_data)),
                        recall_score(temp_label, model.predict(temp_data)),
                        accuracy_score(temp_label, model.predict(temp_data)),
                        f1_score(temp_label, model.predict(temp_data)),
                       (pu_end - pu_start) ]
    
test.to_csv('../result/w_independence_unknown_result.csv')

10
        p1   p2   p3
574    119  121    1
9685   114  199    0
5888   165  128    1
5383   115  173    0
9100   116  195    1
...    ...  ...  ...
18922  165  151  825
10451  142  189  468
17943  189  179  101
16424  170  126  458
18850  179  130  883

[4000 rows x 3 columns]
---- 1 ----
Precision:  0.9945300845350572
Recall:  1.0
Accuracy:  0.99725
f1_score:  0.9972575417601595
20
        p1   p2   p3
1926   148  132    1
918    127  139    1
5552   195  116    1
1363   142  130    0
2307   143  140    0
...    ...  ...  ...
17816  145  166  380
13203  167  122  596
11495  211  189  679
14582  213  187  271
15844  211  125  961

[4000 rows x 3 columns]
---- 2 ----
Precision:  0.9945300845350572
Recall:  1.0
Accuracy:  0.99725
f1_score:  0.9972575417601595
30
        p1   p2   p3
3862   146  170    1
7862   198  145    1
4071   168  149    0
6769   142  193    0
1932   148  150    1
...    ...  ...  ...
14972  183  165  430
11733  185  197   76
13513  187  178  947
10388  132  133  

f1_score:  0.9992505620784412
220
        p1   p2   p3
1690   337  340    1
6401   360  355    0
2562   366  321    1
5891   378  334    0
5686   328  383    1
...    ...  ...  ...
18795  420  334  557
14326  387  371  123
18995  364  354  495
15985  333  399  727
13136  367  407   11

[4000 rows x 3 columns]
---- 22 ----
Precision:  0.9945300845350572
Recall:  1.0
Accuracy:  0.99725
f1_score:  0.9972575417601595
230
        p1   p2   p3
3702   363  348    1
5011   373  347    0
9605   371  376    0
8846   365  379    1
4261   337  377    0
...    ...  ...  ...
17036  395  366  904
13003  325  398  485
14473  354  339  830
12675  419  393  174
17040  364  403  822

[4000 rows x 3 columns]
---- 23 ----
Precision:  0.9970089730807578
Recall:  1.0
Accuracy:  0.9985
f1_score:  0.9985022466300548
240
        p1   p2   p3
6669   401  352    0
9614   377  393    1
6523   392  360    0
2623   347  375    0
6776   348  406    1
...    ...  ...  ...
13541  429  428  678
13665  427  356  963
1114

Recall:  1.0
Accuracy:  0.99725
f1_score:  0.9972575417601595
430
        p1   p2   p3
8642   575  566    1
1177   531  550    0
8841   569  572    0
1358   549  536    1
227    531  531    0
...    ...  ...  ...
16170  547  568  164
12047  574  548  207
17924  579  561   89
15286  554  583  124
11523  600  599   66

[4000 rows x 3 columns]
---- 43 ----
Precision:  0.9970089730807578
Recall:  1.0
Accuracy:  0.9985
f1_score:  0.9985022466300548
440
        p1   p2   p3
4368   562  573    1
4562   600  537    1
7226   577  577    1
299    538  547    0
3894   541  590    1
...    ...  ...  ...
12646  634  590  977
17258  559  597  352
14345  604  568  318
13478  574  536   71
10955  614  615  763

[4000 rows x 3 columns]
---- 44 ----
Precision:  0.9960159362549801
Recall:  1.0
Accuracy:  0.998
f1_score:  0.998003992015968
450
        p1   p2   p3
9078   568  620    1
6327   623  549    0
8670   583  603    1
5118   616  549    1
1665   575  558    0
...    ...  ...  ...
16059  619  567  

Precision:  0.998003992015968
Recall:  1.0
Accuracy:  0.999
f1_score:  0.9990009990009989
640
        p1   p2   p3
7681   806  747    0
1677   753  753    0
5448   782  758    1
8767   812  747    0
1917   762  747    0
...    ...  ...  ...
12233  757  828  491
10700  824  802  336
18961  802  750  804
17104  822  747  572
17568  763  820  345

[4000 rows x 3 columns]
---- 64 ----
Precision:  0.9950248756218906
Recall:  1.0
Accuracy:  0.9975
f1_score:  0.9975062344139651
650
        p1   p2   p3
3791   808  753    0
4730   801  768    1
5723   815  760    0
1152   770  764    1
8899   766  827    0
...    ...  ...  ...
10314  795  821  647
13008  795  784  434
12354  755  801  614
14706  839  818  881
14560  835  761  693

[4000 rows x 3 columns]
---- 65 ----
Precision:  0.9940357852882704
Recall:  1.0
Accuracy:  0.997
f1_score:  0.9970089730807578
660
        p1   p2   p3
7740   799  797    1
1738   787  763    1
8226   826  773    1
9139   839  764    0
2139   764  789    0
...    ..

Recall:  1.0
Accuracy:  0.99925
f1_score:  0.9992505620784412
850
         p1    p2   p3
926     944   969    1
2991    986   951    0
3488    968   974    1
729     955   954    0
8575   1025   950    0
...     ...   ...  ...
18311  1039  1024  683
16201   981  1010   69
17155   988  1016  895
17490   973  1007  959
17133  1010  1007  457

[4000 rows x 3 columns]
---- 85 ----
Precision:  0.9945300845350572
Recall:  1.0
Accuracy:  0.99725
f1_score:  0.9972575417601595
860
         p1    p2   p3
1374    970   976    1
2657   1003   957    0
2338    961   996    1
5759   1000   984    0
4668    966  1011    1
...     ...   ...  ...
11057   980  1007  478
10582  1027  1024  637
10799   955   979   58
14311  1025   959  285
16148   970   968  898

[4000 rows x 3 columns]
---- 86 ----
Precision:  0.9945300845350572
Recall:  1.0
Accuracy:  0.99725
f1_score:  0.9972575417601595
870
         p1    p2   p3
2004    995   982    1
1333    999   970    1
5489    992  1013    0
2095    995   982   