In [1]:
import time
import sklearn
import numpy as np
import pandas as pd

import sys
sys.path.append("..")
from baggingPU import BaggingClassifierPU

from sklearn.utils import resample
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import random

In [2]:
def print_cm(cm, labels, hide_zeroes=False, hide_diagonal=False, hide_threshold=None):
    columnwidth = max([len(x) for x in labels]) + 4
    empty_cell = " " * columnwidth
    print("    " + empty_cell, end=' ')
    for label in labels:
        print("%{0}s".format(columnwidth) % 'pred_' + label, end=" ")
    print()

    for i, label1 in enumerate(labels):
        print("    %{0}s".format(columnwidth) % 'true_' + label1, end=" ")
        for j in range(len(labels)):
            cell = "%{0}.1f".format(columnwidth) % cm[i, j]
            if hide_zeroes:
                cell = cell if float(cm[i, j]) != 0 else empty_cell
            if hide_diagonal:
                cell = cell if i != j else empty_cell
            if hide_threshold:
                cell = cell if cm[i, j] > hide_threshold else empty_cell
            if cell:
                print(cell, end=" ")
        print()

# import data

In [3]:
df_raw = pd.read_csv('../data/w-independence.csv')

#df_raw = pd.read_csv('../data/1place-dependence.csv')

#df_raw = pd.read_csv('../data/w-related.csv')


df_raw['label'] = df_raw['label'].astype("int")
print(df_raw.label.value_counts())
print('Has null values', df_raw.isnull().values.any())

1    10000
0    10000
Name: label, dtype: int64
Has null values False


In [4]:
df_raw.head(10)

Unnamed: 0,p1,p2,p3,label
0,0,0,1,1
1,1,0,1,1
2,0,1,1,1
3,2,0,1,1
4,1,1,1,1
5,0,1,0,1
6,0,2,1,1
7,3,0,1,1
8,2,1,1,1
9,1,1,0,1


In [5]:
print(df_raw.iloc[:10000,:])
df_max = np.amax(df_raw.iloc[:10000,[0,1]])
print(df_max)

      p1  p2  p3  label
0      0   0   1      1
1      1   0   1      1
2      0   1   1      1
3      2   0   1      1
4      1   1   1      1
...   ..  ..  ..    ...
9995  52  47   0      1
9996  52  48   1      1
9997  51  48   0      1
9998  51  49   1      1
9999  50  49   0      1

[10000 rows x 4 columns]
p1    100
p2     99
dtype: int64


In [6]:
print(df_raw.iloc[10000:,:])
print(np.amax(df_raw.iloc[10000:,[0,1]]))

       p1  p2   p3  label
10000   8  41   52      0
10001  65  62  249      0
10002   4  21  509      0
10003  80  81  251      0
10004  75  12  919      0
...    ..  ..  ...    ...
19995  63  94  184      0
19996  43  65  209      0
19997  17  84  717      0
19998  69  83  269      0
19999  68  54  878      0

[10000 rows x 4 columns]
p1    100
p2     99
dtype: int64


In [7]:
neg_process = df_raw.iloc[10000:,:].copy()
print(neg_process)

for i in range(10000):
    if neg_process.iloc[i,0] > df_max[0]:
        neg_process.iloc[i,0]  = int(neg_process.iloc[i,0] / df_max[0])
    if neg_process.iloc[i,1] > df_max[1]:
        neg_process.iloc[i,1]  = int(neg_process.iloc[i,1] / df_max[1])

       p1  p2   p3  label
10000   8  41   52      0
10001  65  62  249      0
10002   4  21  509      0
10003  80  81  251      0
10004  75  12  919      0
...    ..  ..  ...    ...
19995  63  94  184      0
19996  43  65  209      0
19997  17  84  717      0
19998  69  83  269      0
19999  68  54  878      0

[10000 rows x 4 columns]


In [8]:
neg_process.drop_duplicates(inplace=True)
print(neg_process)
print(np.amax(neg_process))

       p1  p2   p3  label
10000   8  41   52      0
10001  65  62  249      0
10002   4  21  509      0
10003  80  81  251      0
10004  75  12  919      0
...    ..  ..  ...    ...
19995  63  94  184      0
19996  43  65  209      0
19997  17  84  717      0
19998  69  83  269      0
19999  68  54  878      0

[10000 rows x 4 columns]
p1       100
p2        99
p3       999
label      0
dtype: int64


In [9]:
df_process = pd.concat([df_raw.iloc[:10000,:], neg_process])
print(df_process)

       p1  p2   p3  label
0       0   0    1      1
1       1   0    1      1
2       0   1    1      1
3       2   0    1      1
4       1   1    1      1
...    ..  ..  ...    ...
19995  63  94  184      0
19996  43  65  209      0
19997  17  84  717      0
19998  69  83  269      0
19999  68  54  878      0

[20000 rows x 4 columns]


In [10]:
x_data = df_process.iloc[:,:-1]
y_data = df_process.iloc[:,-1]

x_train, x_test, y_train, y_test = train_test_split(x_data, y_data, test_size=0.2, random_state=7)

In [11]:
x_train_input = pd.concat([x_train.iloc[:,:], y_train],axis=1)
x_test_input = pd.concat([x_test.iloc[:,:],y_test],axis=1)

print(len(x_train_input))
print(len(x_test_input))

print(x_train_input.label.value_counts())
print('Has null values', x_train_input.isnull().values.any())

print(x_test_input.label.value_counts())
print('Has null values', x_test_input.isnull().values.any())

16000
4000
1    8077
0    7923
Name: label, dtype: int64
Has null values False
0    2077
1    1923
Name: label, dtype: int64
Has null values False


In [12]:
x_train_input.head(10)

Unnamed: 0,p1,p2,p3,label
8118,36,54,1,1
10171,46,67,333,0
9399,53,43,0,1
12669,21,1,930,0
18809,29,96,858,0
13869,44,12,453,0
3320,51,7,1,1
14689,12,79,617,0
13087,21,66,519,0
15992,98,59,520,0


In [13]:
x_test_input.head(10)

Unnamed: 0,p1,p2,p3,label
14356,39,20,653,0
3439,50,8,0,1
12153,82,97,899,0
15029,47,35,549,0
18549,80,83,15,0
15762,74,61,231,0
12313,61,17,739,0
16034,88,14,924,0
11496,16,39,993,0
5653,23,51,0,1


In [14]:
iter = 13000
alllist = np.zeros([13000,4],dtype=np.int)

a = np.random.choice(range(9,999),size=iter,replace=True)
b = np.random.choice(range(0,999),size=iter,replace=True)
c = np.random.choice(range(9,999),size=iter,replace=True)

for idx in range(iter):  
    alllist[idx][0] = a[idx]
    alllist[idx][1] = b[idx]
    alllist[idx][2] = c[idx]
    alllist[idx][3] = 0
unlabel = np.unique(np.array(alllist), axis = 0)

print(len(unlabel))

13000


In [15]:
pos_list = np.random.randint(low=100, high=9999, size=3000).tolist()

data_pos = df_process.iloc[pos_list,:]
data_unl = pd.DataFrame(unlabel, columns=['p1','p2','p3','label'],)

data =  pd.concat([data_pos, data_unl], axis = 0, ignore_index=True)
print(data)

        p1   p2   p3  label
0       40   16    1      1
1       31    4    1      1
2       42   46    0      1
3       55   11    1      1
4       19   40    1      1
...    ...  ...  ...    ...
15995  998  327  536      0
15996  998  442  460      0
15997  998  583  258      0
15998  998  595  754      0
15999  998  879  304      0

[16000 rows x 4 columns]


In [16]:
df = data.copy()

X = df.iloc[:,:-1]
y = df.iloc[:,-1]

y_orig = y.copy()

print(X)
print(y)

        p1   p2   p3
0       40   16    1
1       31    4    1
2       42   46    0
3       55   11    1
4       19   40    1
...    ...  ...  ...
15995  998  327  536
15996  998  442  460
15997  998  583  258
15998  998  595  754
15999  998  879  304

[16000 rows x 3 columns]
0        1
1        1
2        1
3        1
4        1
        ..
15995    0
15996    0
15997    0
15998    0
15999    0
Name: label, Length: 16000, dtype: int64


In [17]:
pd.Series(y).value_counts()

0    13000
1     3000
Name: label, dtype: int64

# Trainning directly

In [18]:
print('Training XGboost model ...')

#import xgboost as xgb

#model = xgb.XGBClassifier()

from sklearn.linear_model import LogisticRegression
model = LogisticRegression()


#from sklearn.neural_network import MLPClassifier

#model = MLPClassifier(solver='adam', alpha=1e-5,hidden_layer_sizes=(1), random_state=1,max_iter=100000)

model.fit(X, y)

print('Done')

Training XGboost model ...
Done


In [19]:
print('---- {} ----'.format('XGboost model'))
print(print_cm(sklearn.metrics.confusion_matrix(y_test, model.predict(x_test_input.iloc[:,:-1])), labels=['negative', 'positive']))
print('')
print('Precision: ', precision_score(y_test, model.predict(x_test_input.iloc[:,:-1])))
print('Recall: ', recall_score(y_test, model.predict(x_test_input.iloc[:,:-1])))
print('Accuracy: ', accuracy_score(y_test, model.predict(x_test_input.iloc[:,:-1])))
print('f1_score: ', f1_score(y_test, model.predict(x_test_input.iloc[:,:-1])))

---- XGboost model ----
                        pred_negative        pred_positive 
           true_negative       2053.0         24.0 
           true_positive          0.0       1923.0 
None

Precision:  0.987673343605547
Recall:  1.0
Accuracy:  0.994
f1_score:  0.993798449612403


# Training by bagging

In [20]:
#初始化多个分类器

from sklearn.linear_model import LogisticRegression
model1 = LogisticRegression()

from sklearn.tree import DecisionTreeClassifier
model2 = DecisionTreeClassifier()

from sklearn.neural_network import MLPClassifier
model3 = MLPClassifier(solver='adam', alpha=1e-5,hidden_layer_sizes=(1), random_state=1,max_iter=100000)

from sklearn import svm
model4 = svm.LinearSVC()

from sklearn.naive_bayes import MultinomialNB
model5 = MultinomialNB()

from sklearn.ensemble import RandomForestClassifier
model6 = RandomForestClassifier(n_estimators = 50,n_jobs = -1)

import xgboost as xgb
model7 = xgb.XGBClassifier()


In [21]:
model_list = [model1, model2, model3, model4, model5, model6, model7]
f1 = np.zeros([len(model_list)],dtype=np.float32)

In [22]:
for i,j in zip(model_list,range(len(model_list))):
    model = BaggingClassifierPU(i,
                         n_estimators = 50, 
                         n_jobs = -1, 
                         max_samples = sum(y)  
                        )
    model.fit(X,y)
    print(f1_score(y_orig, model.predict(X)))
    f1[j] = f1_score(y_orig, model.predict(X))

print(f1)

0.9998333611064822
1.0
1.0
0.9966777408637874
0.8179959100204499
1.0
1.0
[0.99983335 1.         1.         0.99667776 0.8179959  1.
 1.        ]


In [23]:
f1_index = []
for i in range(len(f1)):
    if f1[i] >= 0.95:
        f1_index.append(i)
        
print(f1_index)
        
predict_sum = np.zeros([len(X)],dtype=np.float32)
for i in f1_index:
    model = BaggingClassifierPU(model_list[i],
                         n_estimators = 50, 
                         n_jobs = -1, 
                         max_samples = sum(y)  
                        )
    model.fit(X,y)
    predict_sum += model.predict(X)
print(predict_sum)

[0, 1, 2, 3, 5, 6]
[6. 6. 6. ... 0. 0. 0.]


In [24]:
threshold = len(f1_index) / 2
print(predict_sum)

predict = np.zeros([len(y)],dtype=np.int64)

for i in range(len(X)):
    if predict_sum[i] >= threshold :
        predict[i] = 1
    if predict_sum[i] < threshold :
        predict[i] = 0


print(predict)
print(y_orig)

[6. 6. 6. ... 0. 0. 0.]
[1 1 1 ... 0 0 0]
0        1
1        1
2        1
3        1
4        1
        ..
15995    0
15996    0
15997    0
15998    0
15999    0
Name: label, Length: 16000, dtype: int64


In [25]:
#train data
print('---- {} ----'.format('PU Bagging'))
print(print_cm(sklearn.metrics.confusion_matrix(y_orig,predict), labels=['negative', 'positive']))
print('')
print('Precision: ', precision_score(y_orig, predict))
print('Recall: ', recall_score(y_orig,predict))
print('Accuracy: ', accuracy_score(y_orig, predict))
print('f1_score: ', f1_score(y_orig, predict))

---- PU Bagging ----
                        pred_negative        pred_positive 
           true_negative      13000.0          0.0 
           true_positive          0.0       3000.0 
None

Precision:  1.0
Recall:  1.0
Accuracy:  1.0
f1_score:  1.0


In [26]:
print(np.argmax(f1))

best_model = BaggingClassifierPU(model_list[np.argmax(f1)],
                         n_estimators = 50, 
                         n_jobs = -1, 
                         max_samples = sum(y)  
                        )
print(best_model)

1
BaggingClassifierPU(base_estimator=DecisionTreeClassifier(), max_samples=3000,
                    n_estimators=50, n_jobs=-1)


In [27]:
print('Training bagging classifier...')

pu_start = time.perf_counter()

best_model.fit(X, y)
pu_end = time.perf_counter()
print('Done!')
print('Time:', pu_end - pu_start)


Training bagging classifier...
Done!
Time: 1.2189836839970667


In [28]:
#train data
print('---- {} ----'.format('PU Bagging'))
print(print_cm(sklearn.metrics.confusion_matrix(y_orig, best_model.predict(X)), labels=['negative', 'positive']))
print('')
print('Precision: ', precision_score(y_orig, best_model.predict(X)))
print('Recall: ', recall_score(y_orig, best_model.predict(X)))
print('Accuracy: ', accuracy_score(y_orig, best_model.predict(X)))
print('f1_score: ', f1_score(y_orig, best_model.predict(X)))

---- PU Bagging ----
                        pred_negative        pred_positive 
           true_negative      13000.0          0.0 
           true_positive          0.0       3000.0 
None

Precision:  1.0
Recall:  1.0
Accuracy:  1.0
f1_score:  1.0


In [29]:
#print wrong predictions
y_pre = best_model.predict(X)
y_orig_index = y_orig.index.tolist()

FN_index = []
FT_index = []

for i in range(len(y_orig)):
    if y_orig.iloc[i] == 1 and y_pre[i] == 0 :
        FN_index.append(y_orig_index[i])
    if y_orig.iloc[i] == 0 and y_pre[i] == 1 :
        FT_index.append(y_orig_index[i])
        
print("False Negtive:")
print(df_process.loc[FN_index])
print("False Positive:")
print(df_process.loc[FT_index])

False Negtive:
Empty DataFrame
Columns: [p1, p2, p3, label]
Index: []
False Positive:
Empty DataFrame
Columns: [p1, p2, p3, label]
Index: []


In [30]:
#test data
print('---- {} ----'.format('PU Bagging'))
print(print_cm(sklearn.metrics.confusion_matrix(y_test, best_model.predict(x_test_input.iloc[:,:-1])), labels=['negative', 'positive']))
print('')
print('Precision: ', precision_score(y_test, best_model.predict(x_test_input.iloc[:,:-1])))
print('Recall: ', recall_score(y_test, best_model.predict(x_test_input.iloc[:,:-1])))
print('Accuracy: ', accuracy_score(y_test, best_model.predict(x_test_input.iloc[:,:-1])))
print('f1_score: ', f1_score(y_test, best_model.predict(x_test_input.iloc[:,:-1])))

---- PU Bagging ----
                        pred_negative        pred_positive 
           true_negative       2069.0          8.0 
           true_positive          0.0       1923.0 
None

Precision:  0.9958570688762299
Recall:  1.0
Accuracy:  0.998
f1_score:  0.9979242345614945


In [31]:
#print wrong predictions
y_test_pre = best_model.predict(x_test_input.iloc[:,:-1])
y_test_index = y_test.index.tolist()

FN_test_index = []
FT_test_index = []

for i in range(len(y_test)):
    if y_test.iloc[i] == 1 and y_test_pre[i] == 0 :
        FN_test_index.append(y_test_index[i])
    if y_test.iloc[i] == 0 and y_test_pre[i] == 1 :
        FT_test_index.append(y_test_index[i])

print("False Negtive:")
print(df_process.loc[FN_test_index])
print("False Positive:")
print(df_process.loc[FT_test_index])

False Negtive:
Empty DataFrame
Columns: [p1, p2, p3, label]
Index: []
False Positive:
       p1  p2  p3  label
18639  45  17   5      0
11312  57  38   4      0
18195  87  67   5      0
18601  48  22   4      0
18264   9  63   4      0
12803  46  70   2      0
10538  75  55   2      0
13339  22  53   5      0


In [32]:
orig_data = df_process.iloc[:,:-1].copy()
orig_label = best_model.predict(orig_data)

print(orig_data)
print(orig_label)

       p1  p2   p3
0       0   0    1
1       1   0    1
2       0   1    1
3       2   0    1
4       1   1    1
...    ..  ..  ...
19995  63  94  184
19996  43  65  209
19997  17  84  717
19998  69  83  269
19999  68  54  878

[20000 rows x 3 columns]
[1 1 1 ... 0 0 0]


In [33]:
import xgboost as xgb
model = BaggingClassifierPU(xgb.XGBClassifier(),
                         n_estimators = 50, 
                         n_jobs = -1, 
                         max_samples = sum(y)  
                        )
model.fit(orig_data, orig_label)

BaggingClassifierPU(base_estimator=XGBClassifier(base_score=None, booster=None,
                                                 colsample_bylevel=None,
                                                 colsample_bynode=None,
                                                 colsample_bytree=None,
                                                 gamma=None, gpu_id=None,
                                                 importance_type='gain',
                                                 interaction_constraints=None,
                                                 learning_rate=None,
                                                 max_delta_step=None,
                                                 max_depth=None,
                                                 min_child_weight=None,
                                                 missing=nan,
                                                 monotone_constraints=None,
                                                 n_estimators=1

In [34]:
#verification
import random

place_max = max(np.amax(df_process.iloc[:10000, [0, 1]]))  #w_dependence
name1 = ['min', 'max', 'Precision', 'Recall', 'Accuracy', 'f1_score','time']
test = pd.DataFrame(columns=name1)

for i in range(1, 100):
    
    pu_start = time.perf_counter()
    
    pos_list = np.random.randint(low=100, high=9999, size=2000).tolist()
    neg_list = np.random.randint(low=10000, high=19000, size=2000).tolist()
    all_list = pos_list + neg_list
    
    flag1 = int((0.1 * (i - 1)) * place_max)
    flag2 = int((0.1 * i) * place_max)
    print(flag2)
    
    temp_data = df_process.iloc[all_list, : -1].copy()
    temp_label = df_process.iloc[all_list, -1].copy()
    
    temp_data.iloc[:, 0] = temp_data.iloc[:, 0] + random.randint((df_max[0] + flag1 + 1), ((df_max[0] + flag2 + 1)))
    temp_data.iloc[:, 1] = temp_data.iloc[:, 1] + random.randint((df_max[1] + flag1 + 1), ((df_max[1] + flag2 + 1)))

    
    print('======')
    print(temp_data.iloc[:, :])

    temp_max = max(np.amax(temp_data.iloc[:, [0, 1]]))
    temp_min = min(np.amin(temp_data.iloc[:, [0, 1]]))

    print('---- {} ----'.format(i))
    print('Precision: ', precision_score(temp_label, model.predict(temp_data)))
    print('Recall: ', recall_score(temp_label, model.predict(temp_data)))
    print('Accuracy: ', accuracy_score(temp_label, model.predict(temp_data)))
    print('f1_score: ', f1_score(temp_label, model.predict(temp_data)))

    orig_data = pd.concat([orig_data, temp_data], ignore_index=True)
    orig_label = pd.Series(orig_label.tolist() + model.predict(temp_data).tolist())
    model.fit(orig_data,orig_label)
    
    pu_end = time.perf_counter()
    
    test.loc[i] = [  temp_min, temp_max,
                        precision_score(temp_label, model.predict(temp_data)),
                        recall_score(temp_label, model.predict(temp_data)),
                        accuracy_score(temp_label, model.predict(temp_data)),
                        f1_score(temp_label, model.predict(temp_data)),
                       (pu_end - pu_start) ]
    
test.to_csv('../result/w_independence_unknown_result.csv')

10
        p1   p2   p3
8224   178  126    1
7307   105  192    0
2518   120  143    1
8219   180  123    0
5456   151  136    1
...    ...  ...  ...
10979  157  177  865
18121  174  176  116
15035  150  194  780
12367  111  173  548
13082  137  173  594

[4000 rows x 3 columns]
---- 1 ----
Precision:  0.9940357852882704
Recall:  1.0
Accuracy:  0.997
f1_score:  0.9970089730807578
20
        p1   p2   p3
5622   151  153    1
4938   128  171    1
2847   119  162    0
3294   118  168    1
3972   142  150    1
...    ...  ...  ...
12847  173  166  192
14502  206  191  306
12778  139  196   45
18373  188  195  351
14218  198  165  720

[4000 rows x 3 columns]
---- 2 ----
Precision:  0.9925558312655087
Recall:  1.0
Accuracy:  0.99625
f1_score:  0.9962640099626401
30
        p1   p2   p3
4973   200  124    0
9676   144  208    1
6290   146  187    1
9277   148  201    0
6901   166  170    0
...    ...  ...  ...
12792  187  176  868
17470  166  196   35
17730  220  184  135
18565  199  215  14

f1_score:  0.9987515605493134
220
        p1   p2   p3
3402   327  363    1
6506   385  328    1
5463   360  345    0
8875   344  381    0
9413   363  365    0
...    ...  ...  ...
10748  323  406  284
15832  378  386   49
15388  341  383  285
11562  377  343   69
11551  348  403  832

[4000 rows x 3 columns]
---- 22 ----
Precision:  0.9935419771485345
Recall:  1.0
Accuracy:  0.99675
f1_score:  0.99676052828308
230
        p1   p2   p3
7676   404  334    1
686    334  342    1
6751   353  378    0
6340   396  334    1
5509   346  377    0
...    ...  ...  ...
14720  363  374  527
10942  335  354  391
12859  356  421  581
17093  340  361  124
12380  365  347  368

[4000 rows x 3 columns]
---- 23 ----
Precision:  0.9975062344139651
Recall:  1.0
Accuracy:  0.99875
f1_score:  0.9987515605493134
240
        p1   p2   p3
7474   338  417    1
990    335  365    1
108    335  344    1
3787   393  337    0
167    341  340    0
...    ...  ...  ...
12224  394  401   90
17953  366  387  233
15706

Recall:  1.0
Accuracy:  0.99825
f1_score:  0.9982530571499876
430
        p1   p2   p3
7648   534  603    1
2777   572  530    0
789    541  536    0
864    533  546    1
8066   592  548    1
...    ...  ...  ...
15147  617  570  801
17664  609  593  209
13867  555  533  652
18864  596  524  648
17750  610  567  402

[4000 rows x 3 columns]
---- 43 ----
Precision:  0.9950248756218906
Recall:  1.0
Accuracy:  0.9975
f1_score:  0.9975062344139651
440
        p1   p2   p3
9937   615  553    0
7784   558  599    1
3201   586  539    0
799    540  556    0
7926   576  582    1
...    ...  ...  ...
13974  579  604  574
10656  571  606   86
13521  584  618  346
16772  621  617  170
14881  624  579  702

[4000 rows x 3 columns]
---- 44 ----
Precision:  0.9975062344139651
Recall:  1.0
Accuracy:  0.99875
f1_score:  0.9987515605493134
450
        p1   p2   p3
4493   572  577    0
1218   562  556    1
9925   628  554    0
9748   617  565    1
4813   549  602    0
...    ...  ...  ...
10247  605  63

Precision:  0.9985022466300549
Recall:  1.0
Accuracy:  0.99925
f1_score:  0.9992505620784412
640
        p1   p2   p3
4524   756  784    1
9105   747  820    0
1394   746  764    1
2935   757  769    0
506    740  755    1
...    ...  ...  ...
18063  763  750  343
16380  824  815  772
14035  836  750  368
13331  777  805  257
18697  792  759   52

[4000 rows x 3 columns]
---- 64 ----
Precision:  0.9960159362549801
Recall:  1.0
Accuracy:  0.998
f1_score:  0.998003992015968
650
        p1   p2   p3
1284   768  753    1
8226   817  759    1
1708   751  775    1
2027   765  764    0
1637   745  779    0
...    ...  ...  ...
15634  833  780  439
11751  784  823  648
14729  754  743   88
14978  761  748  955
12097  755  757  839

[4000 rows x 3 columns]
---- 65 ----
Precision:  0.9975062344139651
Recall:  1.0
Accuracy:  0.99875
f1_score:  0.9987515605493134
660
        p1   p2   p3
5033   795  781    0
8903   769  830    0
7484   842  751    1
299    759  763    0
4671   766  807    0
...   

Precision:  0.9960159362549801
Recall:  1.0
Accuracy:  0.998
f1_score:  0.998003992015968
850
         p1    p2   p3
518     963   947    1
6831   1016   953    0
9700    947  1038    1
9948   1022   965    1
8353    955  1022    0
...     ...   ...  ...
13540   984  1021  662
18187   966   945  842
11114  1020   984  985
16734   955   980   31
16221  1038  1037  817

[4000 rows x 3 columns]
---- 85 ----
Precision:  0.9995002498750625
Recall:  1.0
Accuracy:  0.99975
f1_score:  0.9997500624843789
860
         p1    p2   p3
4332   1004   974    1
633     967   969    0
8584   1038   967    1
7966    981  1020    1
1547    965   985    0
...     ...   ...  ...
17457  1055   956  738
12857   973   987  205
10976  1023   984  989
10773   981   957  345
12246   962   971  299

[4000 rows x 3 columns]
---- 86 ----
Precision:  0.9935419771485345
Recall:  1.0
Accuracy:  0.99675
f1_score:  0.99676052828308
870
         p1    p2   p3
7463    979  1036    0
3827   1009   982    0
8651   1015  1007