In [1]:
import time
import sklearn
import numpy as np
import pandas as pd

import sys
sys.path.append("..")
from baggingPU import BaggingClassifierPU

from sklearn.utils import resample
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import random

In [2]:
def print_cm(cm, labels, hide_zeroes=False, hide_diagonal=False, hide_threshold=None):
    columnwidth = max([len(x) for x in labels]) + 4
    empty_cell = " " * columnwidth
    print("    " + empty_cell, end=' ')
    for label in labels:
        print("%{0}s".format(columnwidth) % 'pred_' + label, end=" ")
    print()

    for i, label1 in enumerate(labels):
        print("    %{0}s".format(columnwidth) % 'true_' + label1, end=" ")
        for j in range(len(labels)):
            cell = "%{0}.1f".format(columnwidth) % cm[i, j]
            if hide_zeroes:
                cell = cell if float(cm[i, j]) != 0 else empty_cell
            if hide_diagonal:
                cell = cell if i != j else empty_cell
            if hide_threshold:
                cell = cell if cm[i, j] > hide_threshold else empty_cell
            if cell:
                print(cell, end=" ")
        print()

# import data

In [3]:
df_raw = pd.read_csv('../data/cannery.csv')

df_raw['label'] = df_raw['label'].astype("int")
print(df_raw.label.value_counts())
print('Has null values', df_raw.isnull().values.any())

1    10000
0    10000
Name: label, dtype: int64
Has null values False


In [4]:
df_raw.head(10)

Unnamed: 0,p1,p2,p3,p4,p5,p6,p7,p8,p9,p10,...,p29,p30,p31,p32,p33,p34,p35,p36,p37,label
0,0,0,1,0,0,0,0,0,0,0,...,0,1,0,0,1,1,0,1,0,1
1,0,0,1,0,0,0,0,0,0,0,...,0,1,0,0,1,1,0,1,0,1
2,0,0,1,0,0,0,0,0,0,0,...,0,1,0,0,1,1,0,1,0,1
3,0,0,1,0,0,0,0,0,0,0,...,0,1,0,0,2,1,0,1,0,1
4,0,0,1,0,0,0,0,0,0,0,...,0,1,0,0,2,1,0,1,0,1
5,0,0,1,0,0,0,0,0,0,0,...,0,1,0,0,2,1,0,1,0,1
6,0,0,1,0,0,0,0,0,0,0,...,0,1,0,0,2,1,0,1,0,1
7,0,0,1,0,0,0,0,0,0,0,...,0,1,0,0,2,1,0,1,0,1
8,0,0,1,0,0,0,0,0,0,0,...,0,1,0,0,2,1,0,1,0,1
9,0,0,1,0,0,0,0,0,0,0,...,0,1,0,0,2,1,0,1,0,1


In [5]:
print(df_raw.iloc[:10000,:])
df_max = np.amax(df_raw.iloc[:10000,[31,32]])
print(df_max)

      p1  p2  p3  p4  p5  p6  p7  p8  p9  p10  ...  p29  p30  p31  p32  p33  \
0      0   0   1   0   0   0   0   0   0    0  ...    0    1    0    0    1   
1      0   0   1   0   0   0   0   0   0    0  ...    0    1    0    0    1   
2      0   0   1   0   0   0   0   0   0    0  ...    0    1    0    0    1   
3      0   0   1   0   0   0   0   0   0    0  ...    0    1    0    0    2   
4      0   0   1   0   0   0   0   0   0    0  ...    0    1    0    0    2   
...   ..  ..  ..  ..  ..  ..  ..  ..  ..  ...  ...  ...  ...  ...  ...  ...   
9995   0   0   0   0   0   0   1   0   0    0  ...    0    1    0    8    2   
9996   0   0   0   0   0   1   0   0   0    1  ...    0    1    0    8    2   
9997   0   0   0   0   0   1   0   0   1    0  ...    0    1    0    8    2   
9998   0   0   0   0   0   0   1   0   0    0  ...    0    1    0    8    2   
9999   0   0   0   0   1   0   0   0   0    1  ...    0    1    0    8    2   

      p34  p35  p36  p37  label  
0       1    0   

In [6]:
df_process = df_raw
print(df_process)

       p1  p2  p3  p4  p5  p6  p7  p8  p9  p10  ...  p29  p30  p31  p32  p33  \
0       0   0   1   0   0   0   0   0   0    0  ...    0    1    0    0    1   
1       0   0   1   0   0   0   0   0   0    0  ...    0    1    0    0    1   
2       0   0   1   0   0   0   0   0   0    0  ...    0    1    0    0    1   
3       0   0   1   0   0   0   0   0   0    0  ...    0    1    0    0    2   
4       0   0   1   0   0   0   0   0   0    0  ...    0    1    0    0    2   
...    ..  ..  ..  ..  ..  ..  ..  ..  ..  ...  ...  ...  ...  ...  ...  ...   
19995  23   4  16   7  14   2   0  11   9    9  ...   18   20   18   23   18   
19996   8   3   6  25  19   6  14  18  14   17  ...    8   10   10   18   10   
19997  20  15   8  20  11   0   7  18  16   15  ...   16   12    6   17   21   
19998   3  12  10   4  15  25   6  19  17   12  ...   20   12    7   17   19   
19999   7  15   3  18   4  14  17   8  10   25  ...   10   20   18    9   19   

       p34  p35  p36  p37  label  
0   

In [7]:
x_data = df_process.iloc[:,:-1]
y_data = df_process.iloc[:,-1]

x_train, x_test, y_train, y_test = train_test_split(x_data, y_data, test_size=0.2, random_state=7)

In [8]:
x_train_input = pd.concat([x_train.iloc[:,:], y_train],axis=1)
x_test_input = pd.concat([x_test.iloc[:,:],y_test],axis=1)

print(len(x_train_input))
print(len(x_test_input))

print(x_train_input.label.value_counts())
print('Has null values', x_train_input.isnull().values.any())

print(x_test_input.label.value_counts())
print('Has null values', x_test_input.isnull().values.any())

16000
4000
1    8077
0    7923
Name: label, dtype: int64
Has null values False
0    2077
1    1923
Name: label, dtype: int64
Has null values False


In [9]:
x_train_input.head(10)

Unnamed: 0,p1,p2,p3,p4,p5,p6,p7,p8,p9,p10,...,p29,p30,p31,p32,p33,p34,p35,p36,p37,label
8118,1,0,0,0,0,0,0,0,1,0,...,0,1,0,6,3,1,0,1,0,1
10171,14,1,14,16,4,25,22,13,1,2,...,16,6,11,8,21,12,21,17,0,0
9399,0,0,0,0,1,0,0,0,0,0,...,0,1,0,7,4,1,0,1,0,1
12669,23,0,0,9,19,19,18,18,10,18,...,25,7,24,5,8,4,19,16,2,0
18809,2,10,8,13,17,21,6,7,8,2,...,8,13,1,16,13,18,11,8,22,0
13869,4,3,7,24,11,23,5,0,21,17,...,11,11,0,2,23,13,11,13,5,0
3320,0,0,1,0,0,0,0,0,0,0,...,0,1,0,3,2,1,0,1,0,1
14689,10,20,18,1,24,19,6,15,7,25,...,13,2,22,2,22,14,22,21,10,0
13087,25,20,4,24,1,4,23,16,2,16,...,17,25,17,15,22,9,11,21,2,0
15992,25,12,10,19,2,6,9,18,17,13,...,16,7,11,10,18,22,12,16,6,0


In [10]:
x_test_input.head(10)

Unnamed: 0,p1,p2,p3,p4,p5,p6,p7,p8,p9,p10,...,p29,p30,p31,p32,p33,p34,p35,p36,p37,label
14356,15,23,5,7,5,22,19,1,4,1,...,25,13,2,8,19,13,8,11,3,0
3439,0,0,0,1,0,0,0,0,0,0,...,0,0,1,3,2,1,0,0,1,1
12153,0,16,17,7,20,7,2,11,20,7,...,9,17,12,12,17,22,10,18,12,0
15029,23,10,20,16,0,3,1,14,22,23,...,12,9,22,8,18,0,11,11,18,0
18549,21,13,5,4,19,24,10,12,0,18,...,8,10,21,5,4,12,5,24,9,0
15762,14,17,20,6,25,20,3,14,3,0,...,14,13,7,3,20,19,9,7,19,0
12313,7,15,14,14,20,9,10,14,12,20,...,21,17,25,18,8,6,5,23,16,0
16034,3,0,6,3,25,10,20,8,22,5,...,24,2,7,10,18,17,7,7,14,0
11496,25,17,8,9,5,16,15,20,9,14,...,9,20,20,2,17,22,6,13,13,0
5653,0,0,0,0,1,0,0,0,0,1,...,0,1,0,4,4,1,0,1,0,1


In [11]:
df = x_train_input.copy()

NON_LBL = [c for c in df.columns if c != 'label']
X = df[NON_LBL]
y = df['label']

y_orig = y.copy()

hidden_size = 5000
y.loc[
    np.random.choice(
        y[y == 1].index, 
        replace = False, 
        size = hidden_size
    )
] = 0

print(X)
print(y)

       p1  p2  p3  p4  p5  p6  p7  p8  p9  p10  ...  p28  p29  p30  p31  p32  \
8118    1   0   0   0   0   0   0   0   1    0  ...    1    0    1    0    6   
10171  14   1  14  16   4  25  22  13   1    2  ...   19   16    6   11    8   
9399    0   0   0   0   1   0   0   0   0    0  ...    1    0    1    0    7   
12669  23   0   0   9  19  19  18  18  10   18  ...   21   25    7   24    5   
18809   2  10   8  13  17  21   6   7   8    2  ...   14    8   13    1   16   
...    ..  ..  ..  ..  ..  ..  ..  ..  ..  ...  ...  ...  ...  ...  ...  ...   
13927  14  13   8  14  16   0   4  16  20   15  ...    8    6   25   23    8   
919     0   0   0   1   0   0   0   0   1    0  ...    1    0    1    0    1   
5699    0   0   0   0   0   0   1   0   0    0  ...    0    1    1    0    4   
10742  13   5  18  14   8  17   1  19  12    3  ...   18    9   15   22   21   
16921  22  11  18  21   0  16   1  13  18   25  ...   10   24    3   23    6   

       p33  p34  p35  p36  p37  
8118  

In [12]:
pd.Series(y).value_counts()

0    12923
1     3077
Name: label, dtype: int64

# Trainning directly

In [13]:
print('Training XGboost model ...')

import xgboost as xgb

model = xgb.XGBClassifier()
model.fit(X, y)

print('Done')

Training XGboost model ...
Done


In [14]:
print('---- {} ----'.format('XGboost model'))
print(print_cm(sklearn.metrics.confusion_matrix(y_test, model.predict(x_test_input.iloc[:,:-1])), labels=['negative', 'positive']))
print('')
print('Precision: ', precision_score(y_test, model.predict(x_test_input.iloc[:,:-1])))
print('Recall: ', recall_score(y_test, model.predict(x_test_input.iloc[:,:-1])))
print('Accuracy: ', accuracy_score(y_test, model.predict(x_test_input.iloc[:,:-1])))
print('f1_score: ', f1_score(y_test, model.predict(x_test_input.iloc[:,:-1])))

---- XGboost model ----
                        pred_negative        pred_positive 
           true_negative       2077.0          0.0 
           true_positive       1463.0        460.0 
None

Precision:  1.0
Recall:  0.2392095683827353
Accuracy:  0.63425
f1_score:  0.3860679815358791


# Training by bagging

In [15]:
from sklearn.linear_model import LogisticRegression
model1 = LogisticRegression()

from sklearn.tree import DecisionTreeClassifier
model2 = DecisionTreeClassifier()

from sklearn.neural_network import MLPClassifier
model3 = MLPClassifier(solver='adam', alpha=1e-5,hidden_layer_sizes=(1), random_state=1,max_iter=100000)

from sklearn import svm
model4 = svm.LinearSVC()

from sklearn.naive_bayes import MultinomialNB
model5 = MultinomialNB()

from sklearn.ensemble import RandomForestClassifier
model6 = RandomForestClassifier(n_estimators = 50,n_jobs = -1)

import xgboost as xgb
model7 = xgb.XGBClassifier()


In [16]:
model_list = [model1, model2, model3, model4, model5, model6, model7]
f1 = np.zeros([len(model_list)],dtype=np.float32)

In [None]:
for i,j in zip(model_list,range(len(model_list))):
    model = BaggingClassifierPU(i,
                         n_estimators = 50, 
                         n_jobs = -1, 
                         max_samples = sum(y)  
                        )
    model.fit(X,y)
    print(f1_score(y_orig, model.predict(X)))
    f1[j] = f1_score(y_orig, model.predict(X))

print(f1)

1.0
0.9039218347129868
1.0
1.0
0.9977664722670307


In [None]:
f1_index = []
for i in range(len(f1)):
    if f1[i] >= 0.95:
        f1_index.append(i)
        
print(f1_index)
        
predict_sum = np.zeros([len(X)],dtype=np.float32)
for i in f1_index:
    model = BaggingClassifierPU(model_list[i],
                         n_estimators = 50, 
                         n_jobs = -1, 
                         max_samples = sum(y)  
                        )
    model.fit(X,y)
    predict_sum += model.predict(X)
print(predict_sum)

In [None]:
threshold = len(f1_index) / 2
print(predict_sum)

predict = np.zeros([len(y)],dtype=np.int64)

for i in range(len(X)):
    if predict_sum[i] >= threshold :
        predict[i] = 1
    if predict_sum[i] < threshold :
        predict[i] = 0

print(predict)
print(y_orig)

In [None]:
#train data
print('---- {} ----'.format('PU Bagging'))
print(print_cm(sklearn.metrics.confusion_matrix(y_orig,predict), labels=['negative', 'positive']))
print('')
print('Precision: ', precision_score(y_orig, predict))
print('Recall: ', recall_score(y_orig,predict))
print('Accuracy: ', accuracy_score(y_orig, predict))
print('f1_score: ', f1_score(y_orig, predict))

In [None]:
best_model = BaggingClassifierPU(model_list[np.argmax(f1)],
                         n_estimators = 50, 
                         n_jobs = -1, 
                         max_samples = sum(y)  
                        )
print(best_model)

In [None]:
print('Training bagging classifier...')

pu_start = time.perf_counter()

best_model.fit(X, y)
pu_end = time.perf_counter()
print('Done!')
print('Time:', pu_end - pu_start)


In [None]:
#train data
print('---- {} ----'.format('PU Bagging'))
print(print_cm(sklearn.metrics.confusion_matrix(y_orig, best_model.predict(X)), labels=['negative', 'positive']))
print('')
print('Precision: ', precision_score(y_orig, best_model.predict(X)))
print('Recall: ', recall_score(y_orig, best_model.predict(X)))
print('Accuracy: ', accuracy_score(y_orig, best_model.predict(X)))
print('f1_score: ', f1_score(y_orig, best_model.predict(X)))

In [None]:
#print wrong predictions
y_pre = best_model.predict(X)
y_orig_index = y_orig.index.tolist()

FN_index = []
FT_index = []

for i in range(len(y_orig)):
    if y_orig.iloc[i] == 1 and y_pre[i] == 0 :
        FN_index.append(y_orig_index[i])
    if y_orig.iloc[i] == 0 and y_pre[i] == 1 :
        FT_index.append(y_orig_index[i])
        
print("False Negtive:")
print(df_process.loc[FN_index])
print("False Positive:")
print(df_process.loc[FT_index])

In [None]:
#test data
print('---- {} ----'.format('PU Bagging'))
print(print_cm(sklearn.metrics.confusion_matrix(y_test, best_model.predict(x_test_input.iloc[:,:-1])), labels=['negative', 'positive']))
print('')
print('Precision: ', precision_score(y_test, best_model.predict(x_test_input.iloc[:,:-1])))
print('Recall: ', recall_score(y_test, best_model.predict(x_test_input.iloc[:,:-1])))
print('Accuracy: ', accuracy_score(y_test, best_model.predict(x_test_input.iloc[:,:-1])))
print('f1_score: ', f1_score(y_test, best_model.predict(x_test_input.iloc[:,:-1])))

In [None]:
#print wrong predictions
y_test_pre = best_model.predict(x_test_input.iloc[:,:-1])
y_test_index = y_test.index.tolist()

FN_test_index = []
FT_test_index = []

for i in range(len(y_test)):
    if y_test.iloc[i] == 1 and y_test_pre[i] == 0 :
        FN_test_index.append(y_test_index[i])
    if y_test.iloc[i] == 0 and y_test_pre[i] == 1 :
        FT_test_index.append(y_test_index[i])

print("False Negtive:")
print(df_process.loc[FN_test_index])
print("False Positive:")
print(df_process.loc[FT_test_index])

In [None]:
#without interative strategy
import random

place_max = max(np.amax(df_process.iloc[:10000, [31, 32]]))  
name1 = ['min', 'max', 'Precision', 'Recall', 'Accuracy', 'f1_score','time']
test = pd.DataFrame(columns=name1)

for i in range(1, 101):
    pu_start = time.perf_counter()
        
    pos_list = np.random.randint(low=100, high=10000, size=2000).tolist()
    neg_list = np.random.randint(low=10000, high=20000, size=2000).tolist()
    all_list = pos_list + neg_list
    
    flag1 = int((0.1 * (i - 1)) * place_max)
    flag2 = int((0.1 * i) * place_max)
    print(flag2)
    
    temp_data = df_process.iloc[all_list, : -1].copy()
    temp_label = df_process.iloc[all_list, -1].copy()
    
    temp_data.iloc[:, 31] = temp_data.iloc[:, 31] + random.randint((df_max[0] + flag1 + 1), ((df_max[0] + flag2 + 1)))
    temp_data.iloc[:, 32] = temp_data.iloc[:, 32] + random.randint((df_max[1] + flag1 + 1), ((df_max[1] + flag2 + 1)))

    
    print('======')
    print(temp_data.iloc[:, :])

    temp_max = max(np.amax(temp_data.iloc[:, [31, 32]]))
    temp_min = min(np.amin(temp_data.iloc[:, [31, 32]]))

    print('---- {} ----'.format(i))
    print('Precision: ', precision_score(temp_label, best_model.predict(temp_data)))
    print('Recall: ', recall_score(temp_label, best_model.predict(temp_data)))
    print('Accuracy: ', accuracy_score(temp_label, best_model.predict(temp_data)))
    print('f1_score: ', f1_score(temp_label, best_model.predict(temp_data)))

    pu_end = time.perf_counter()
    
    test.loc[i] = [  temp_min, temp_max,
                        precision_score(temp_label, best_model.predict(temp_data)),
                        recall_score(temp_label, best_model.predict(temp_data)),
                        accuracy_score(temp_label, best_model.predict(temp_data)),
                        f1_score(temp_label, best_model.predict(temp_data)),
                         (pu_end - pu_start)]
    
test.to_csv('../result/cannery_result_without.csv')

In [27]:
orig_data = df_process.iloc[:,:-1].copy()
orig_label = best_model.predict(orig_data)

print(orig_data)
print(orig_label)

       p1  p2  p3  p4  p5  p6  p7  p8  p9  p10  ...  p28  p29  p30  p31  p32  \
0       0   0   1   0   0   0   0   0   0    0  ...    1    0    1    0    0   
1       0   0   1   0   0   0   0   0   0    0  ...    1    0    1    0    0   
2       0   0   1   0   0   0   0   0   0    0  ...    1    0    1    0    0   
3       0   0   1   0   0   0   0   0   0    0  ...    1    0    1    0    0   
4       0   0   1   0   0   0   0   0   0    0  ...    1    0    1    0    0   
...    ..  ..  ..  ..  ..  ..  ..  ..  ..  ...  ...  ...  ...  ...  ...  ...   
19995  23   4  16   7  14   2   0  11   9    9  ...   14   18   20   18   23   
19996   8   3   6  25  19   6  14  18  14   17  ...    6    8   10   10   18   
19997  20  15   8  20  11   0   7  18  16   15  ...   15   16   12    6   17   
19998   3  12  10   4  15  25   6  19  17   12  ...    0   20   12    7   17   
19999   7  15   3  18   4  14  17   8  10   25  ...   16   10   20   18    9   

       p33  p34  p35  p36  p37  
0     

In [28]:
import xgboost as xgb
model = BaggingClassifierPU(xgb.XGBClassifier(),
                         n_estimators = 50, 
                         n_jobs = -1, 
                         max_samples = sum(y)  
                        )
model.fit(orig_data, orig_label)

BaggingClassifierPU(base_estimator=XGBClassifier(base_score=None, booster=None,
                                                 colsample_bylevel=None,
                                                 colsample_bynode=None,
                                                 colsample_bytree=None,
                                                 gamma=None, gpu_id=None,
                                                 importance_type='gain',
                                                 interaction_constraints=None,
                                                 learning_rate=None,
                                                 max_delta_step=None,
                                                 max_depth=None,
                                                 min_child_weight=None,
                                                 missing=nan,
                                                 monotone_constraints=None,
                                                 n_estimators=1

In [29]:
#verification
import random

place_max = max(np.amax(df_process.iloc[:10000, [31, 32]]))  
name1 = ['min', 'max', 'Precision', 'Recall', 'Accuracy', 'f1_score','time']
test = pd.DataFrame(columns=name1)

for i in range(1, 101):
    pu_start = time.perf_counter()
        
    pos_list = np.random.randint(low=100, high=10000, size=2000).tolist()
    neg_list = np.random.randint(low=10000, high=20000, size=2000).tolist()
    all_list = pos_list + neg_list
    
    flag1 = int((0.1 * (i - 1)) * place_max)
    flag2 = int((0.1 * i) * place_max)
    print(flag2)
    
    temp_data = df_process.iloc[all_list, : -1].copy()
    temp_label = df_process.iloc[all_list, -1].copy()
    
    temp_data.iloc[:, 31] = temp_data.iloc[:, 31] + random.randint((df_max[0] + flag1 + 1), ((df_max[0] + flag2 + 1)))
    temp_data.iloc[:, 32] = temp_data.iloc[:, 32] + random.randint((df_max[1] + flag1 + 1), ((df_max[1] + flag2 + 1)))

    
    print('======')
    print(temp_data.iloc[:, :])

    temp_max = max(np.amax(temp_data.iloc[:, [31, 32]]))
    temp_min = min(np.amin(temp_data.iloc[:, [31, 32]]))

    print('---- {} ----'.format(i))
    print('Precision: ', precision_score(temp_label, model.predict(temp_data)))
    print('Recall: ', recall_score(temp_label, model.predict(temp_data)))
    print('Accuracy: ', accuracy_score(temp_label, model.predict(temp_data)))
    print('f1_score: ', f1_score(temp_label, model.predict(temp_data)))

    orig_data = pd.concat([orig_data, temp_data], ignore_index=True)
    orig_label = pd.Series(orig_label.tolist() + model.predict(temp_data).tolist())
    model.fit(orig_data,orig_label)
    
    pu_end = time.perf_counter()
    
    test.loc[i] = [  temp_min, temp_max,
                        precision_score(temp_label, model.predict(temp_data)),
                        recall_score(temp_label, model.predict(temp_data)),
                        accuracy_score(temp_label, model.predict(temp_data)),
                        f1_score(temp_label, model.predict(temp_data)),
                         (pu_end - pu_start)]
    
test.to_csv('../result/cannery_result.csv')

0
       p1  p2  p3  p4  p5  p6  p7  p8  p9  p10  ...  p28  p29  p30  p31  p32  \
9624    0   0   0   1   0   0   0   0   0    1  ...    1    0    1    0   17   
2043    0   0   0   0   0   1   0   0   1    0  ...    1    0    1    0   11   
1301    0   0   0   0   1   0   0   0   0    0  ...    1    0    1    0   10   
7597    0   0   0   0   0   1   0   0   0    1  ...    1    0    1    0   15   
9507    0   0   0   1   0   0   0   0   0    0  ...    1    0    1    0   17   
...    ..  ..  ..  ..  ..  ..  ..  ..  ..  ...  ...  ...  ...  ...  ...  ...   
19416   2  10   4   1  12  13  12  22   2   23  ...    8   22   21   11   10   
18438  12   1   5   1  20  17   0   4  15   22  ...    8   17   24   18   11   
12121  20  22   5  22   3  11  22  23  19   17  ...   20    6    0   16   26   
12431   9   8  25  25  12  12  22  18   8   21  ...    2   10   13   13   12   
13973   9   8   7   3  25   8   2  17   9   25  ...   24    1    7   12   31   

       p33  p34  p35  p36  p37  
9624

Recall:  1.0
Accuracy:  1.0
f1_score:  1.0
5
       p1  p2  p3  p4  p5  p6  p7  p8  p9  p10  ...  p28  p29  p30  p31  p32  \
6936    0   0   0   0   1   0   0   0   0    0  ...    1    0    1    0   19   
9589    0   0   0   0   0   1   0   0   1    0  ...    1    0    1    0   22   
8381    0   0   0   1   0   0   0   0   1    0  ...    1    0    1    0   21   
6855    0   0   1   0   0   0   0   0   0    0  ...    1    0    1    0   19   
2735    0   0   0   0   1   0   0   0   0    0  ...    1    0    1    0   16   
...    ..  ..  ..  ..  ..  ..  ..  ..  ..  ...  ...  ...  ...  ...  ...  ...   
15964  18   5  25   5   1  11   4   5   8    7  ...    2    7   18   11   36   
15185   9  17  17   1   2  16  10  20  13   13  ...   10    7   10   25   23   
10669  11  20  19   0   6   0  13  16  13    0  ...    5   24    6    4   25   
15825   6   8   1  13  12   2  18  15   9    2  ...    4    9   11    3   23   
18721   0  16   7  14  25  13  16  25   5    3  ...    8   17   10    6   3

Precision:  1.0
Recall:  1.0
Accuracy:  1.0
f1_score:  1.0
10
       p1  p2  p3  p4  p5  p6  p7  p8  p9  p10  ...  p28  p29  p30  p31  p32  \
6072    0   1   0   0   0   0   0   0   0    0  ...    1    0    1    0   24   
6986    0   0   0   0   1   0   0   0   0    0  ...    1    0    0    1   24   
5603    0   0   0   0   1   0   0   0   0    0  ...    1    0    1    0   23   
7661    0   0   0   0   0   0   1   0   1    0  ...    1    0    1    0   25   
4725    0   1   0   0   0   0   0   0   0    0  ...    1    0    1    0   23   
...    ..  ..  ..  ..  ..  ..  ..  ..  ..  ...  ...  ...  ...  ...  ...  ...   
12319   6  13  15   4  21  11   7  11  13   20  ...   18   13   14   15   34   
18760   4  22  10  10  12  20  25  20   6   24  ...    4    4    3    3   31   
10815  20  22  16   5   7  10  19   8   7   16  ...    6    1    3    1   25   
17051   2  17   4   9  25  18  23  13   5   25  ...   20    9    4   16   27   
19238  14   1  17  23  11  12  23  17  22   22  ...    5  

Precision:  1.0
Recall:  1.0
Accuracy:  1.0
f1_score:  1.0
15
       p1  p2  p3  p4  p5  p6  p7  p8  p9  p10  ...  p28  p29  p30  p31  p32  \
1377    0   0   0   0   0   0   1   0   0    1  ...    1    0    1    0   24   
9202    0   0   0   0   0   1   0   0   0    1  ...    1    0    1    0   30   
6688    0   0   0   0   1   0   0   0   1    0  ...    1    0    1    0   28   
2578    1   0   0   0   0   0   0   0   0    0  ...    1    0    1    0   25   
493     0   0   0   0   1   0   0   0   0    0  ...    1    0    1    0   24   
...    ..  ..  ..  ..  ..  ..  ..  ..  ..  ...  ...  ...  ...  ...  ...  ...   
16834  10   1   6   2  11  13  23   4  14   19  ...    2   12   22   21   42   
19574   5   5   9   4  21   8  23   4   7    4  ...   20    3   16   15   28   
18139   1   8  16  17  23  21   3  24   4   24  ...    2   10    2   25   35   
18678  24  15   7  19   1   4  11  15   5    9  ...   18   19    0   15   37   
11155  18   4   4  24  22  10  19  23  12   12  ...   21  

Precision:  1.0
Recall:  1.0
Accuracy:  1.0
f1_score:  1.0
20
       p1  p2  p3  p4  p5  p6  p7  p8  p9  p10  ...  p28  p29  p30  p31  p32  \
3650    1   0   0   0   0   0   0   0   1    0  ...    1    0    1    0   31   
5151    0   0   0   0   0   1   0   0   0    0  ...    1    0    1    0   32   
9627    0   0   0   0   0   1   0   0   1    0  ...    1    0    1    0   36   
1480    0   0   1   0   0   0   0   0   0    0  ...    1    0    1    0   29   
5621    0   0   0   0   0   1   0   0   0    0  ...    1    0    1    0   32   
...    ..  ..  ..  ..  ..  ..  ..  ..  ..  ...  ...  ...  ...  ...  ...  ...   
13021  16   3   5  10   1   8   3   5  11   23  ...   15   23   16    6   38   
18920  24  25   4   4   5  11  14  15  13    1  ...    5    2    5   24   28   
12080  13  10  13  17  12  18  20  14   9    0  ...   11   12   18   17   39   
14921  17  18  18   8   0  16   1   2   3    0  ...    2    1   18   13   40   
16725   2   0  10  20  12  15   9   2  18   23  ...   24  

Precision:  1.0
Recall:  1.0
Accuracy:  1.0
f1_score:  1.0
24
       p1  p2  p3  p4  p5  p6  p7  p8  p9  p10  ...  p28  p29  p30  p31  p32  \
4785    0   0   0   1   0   0   0   0   0    0  ...    1    0    1    0   37   
5729    0   0   0   0   0   0   1   0   1    0  ...    1    0    1    0   37   
3989    0   0   0   0   0   0   0   1   0    1  ...    1    0    1    0   36   
661     0   0   0   0   0   0   0   1   0    0  ...    1    0    1    0   34   
2057    0   0   0   0   1   0   0   0   1    0  ...    1    0    1    0   35   
...    ..  ..  ..  ..  ..  ..  ..  ..  ..  ...  ...  ...  ...  ...  ...  ...   
12544   9   3  18   6  25  12  24  10  11    2  ...   14    3   11   16   35   
15966   2   2  17  23  20  21   6   1   5   25  ...   24    0   24   21   39   
10446  14  17   2  22  24   2  15  13   5    7  ...   15    2   12    4   43   
13292  17  23  16   6   7  18  14   3  21    0  ...    5    3    0    9   57   
13054  12  12   1  18  24  18  20   5   7   13  ...   17  

Precision:  1.0
Recall:  1.0
Accuracy:  1.0
f1_score:  1.0
29
       p1  p2  p3  p4  p5  p6  p7  p8  p9  p10  ...  p28  p29  p30  p31  p32  \
2938    0   1   0   0   0   0   0   0   1    0  ...    1    0    1    0   40   
7111    0   0   0   1   0   0   0   0   0    0  ...    1    0    1    0   44   
7209    0   0   0   0   0   1   0   0   0    0  ...    1    0    1    0   44   
5011    1   0   0   0   0   0   0   0   0    0  ...    1    0    1    0   42   
4893    0   0   0   0   1   0   0   0   0    0  ...    1    0    1    0   42   
...    ..  ..  ..  ..  ..  ..  ..  ..  ..  ...  ...  ...  ...  ...  ...  ...   
13566  19   5   0  19  11   4   4  24   9   23  ...    4   11   10   11   53   
19404  16   7   9  11  23  12   6   4  13   25  ...   14   23    6   15   52   
18632  20  16   1  22  21   1   6   7   7    2  ...   25   23   21   15   42   
10533  21  19  22  25  13  10  24   6  18    3  ...   25   16    6   15   53   
14612   7  13  13   8  20   4   7  22  15   19  ...   15  

Precision:  1.0
Recall:  1.0
Accuracy:  1.0
f1_score:  1.0
34
       p1  p2  p3  p4  p5  p6  p7  p8  p9  p10  ...  p28  p29  p30  p31  p32  \
8139    1   0   0   0   0   0   0   0   0    1  ...    1    0    1    0   49   
4008    0   1   0   0   0   0   0   0   1    0  ...    1    0    1    0   46   
3932    0   0   0   0   0   0   1   0   0    0  ...    0    1    1    0   46   
3953    1   0   0   0   0   0   0   0   0    0  ...    1    0    1    0   46   
4801    0   0   0   1   0   0   0   0   0    0  ...    1    0    1    0   47   
...    ..  ..  ..  ..  ..  ..  ..  ..  ..  ...  ...  ...  ...  ...  ...  ...   
10775   7   0   4   8  25   3  12  24  23   12  ...   21   21   22    4   45   
16097  13  20  11  14   3   4   0  21  18   21  ...    9    9    0   16   52   
11579   4   4  12  13  20   3  23  20  13   21  ...   21   15   19   24   47   
10679   0  14   3  25  17   5  20   6  24   10  ...   13   16   18   22   50   
14357   1   7  21   1  15   9   6  17   1   18  ...   23  

Precision:  1.0
Recall:  1.0
Accuracy:  1.0
f1_score:  1.0
39
       p1  p2  p3  p4  p5  p6  p7  p8  p9  p10  ...  p28  p29  p30  p31  p32  \
5710    0   0   0   0   0   1   0   0   0    1  ...    1    0    1    0   52   
3731    0   0   0   0   1   0   0   0   0    0  ...    1    0    1    0   51   
9961    0   0   0   0   1   0   0   0   0    0  ...    1    0    1    0   56   
2826    0   0   0   0   0   1   0   0   1    0  ...    1    0    1    0   50   
4984    0   0   0   0   0   1   0   0   0    1  ...    1    0    1    0   52   
...    ..  ..  ..  ..  ..  ..  ..  ..  ..  ...  ...  ...  ...  ...  ...  ...   
14161  19  16  14   4   3   0  19  22   2   20  ...    6    3    1   19   62   
13726  25  17   2   1   2   9  16  21   2   25  ...    5    7   25   10   50   
11422  25  16  11   6   2  24   1   6   4   10  ...    9    9   25   16   49   
15108   8  23   0  24  17  18   3  15  17   21  ...    1   14   25   20   72   
12868   2  17   0   3  11   6   1  21   7   13  ...   12  

Precision:  1.0
Recall:  1.0
Accuracy:  1.0
f1_score:  1.0
44
       p1  p2  p3  p4  p5  p6  p7  p8  p9  p10  ...  p28  p29  p30  p31  p32  \
8916    0   0   0   0   0   0   0   1   0    1  ...    1    0    1    0   59   
2543    0   0   0   0   0   0   0   1   0    1  ...    1    0    1    0   54   
2278    0   0   0   0   1   0   0   0   0    0  ...    1    0    1    0   54   
5797    0   0   1   0   0   0   0   0   0    0  ...    1    0    1    0   57   
1932    0   0   0   0   1   0   0   0   0    0  ...    1    0    1    0   54   
...    ..  ..  ..  ..  ..  ..  ..  ..  ..  ...  ...  ...  ...  ...  ...  ...   
18204  11   1  19  23  23  19   7  22  19   15  ...    9    5    9   11   56   
13089   8  16  14  14  19  23  23  11   6    9  ...   16   25    7    6   74   
12181   6  17  12  13  21  14  12   9  13   11  ...   16   10    1   23   56   
10083  23   4  23  16  11   2  13  24  17   17  ...   12    3    1    6   64   
19691  22  17  13  11   5   1  25  14  12    2  ...   16  

Recall:  1.0
Accuracy:  1.0
f1_score:  1.0
48
       p1  p2  p3  p4  p5  p6  p7  p8  p9  p10  ...  p28  p29  p30  p31  p32  \
5835    0   0   0   0   1   0   0   0   0    0  ...    1    0    1    0   62   
4183    0   0   0   0   0   1   0   0   0    0  ...    1    0    0    1   60   
7639    0   0   0   0   0   0   1   0   0    0  ...    0    1    1    0   63   
1369    0   0   0   0   0   0   0   1   0    0  ...    1    0    1    0   58   
9029    0   0   0   0   1   0   0   0   0    0  ...    1    0    1    0   64   
...    ..  ..  ..  ..  ..  ..  ..  ..  ..  ...  ...  ...  ...  ...  ...  ...   
15772  21  16   4  18   6   6  20  25  21   22  ...   21   18    2   19   75   
16655  12  20  23   5  23  12  13   0   4   19  ...   19    6    6   20   65   
11633  14  17  25   2   0   3  15  21   1    2  ...    7   17    9    8   72   
11565   8   8   6  10  20   3   6  15  18   21  ...    7    2    9   23   79   
11747   8   8  21  20  19  24   1  12  20   22  ...   10   17    8   14   

Precision:  1.0
Recall:  1.0
Accuracy:  1.0
f1_score:  1.0
53
       p1  p2  p3  p4  p5  p6  p7  p8  p9  p10  ...  p28  p29  p30  p31  p32  \
4169    0   0   0   0   1   0   0   0   0    0  ...    1    0    0    1   64   
591     0   0   0   0   0   1   0   0   0    1  ...    1    0    1    0   62   
9959    0   0   0   0   0   1   0   0   0    0  ...    1    0    1    0   69   
2855    0   0   0   0   1   0   0   0   0    1  ...    1    0    1    0   63   
6121    0   0   1   0   0   0   0   0   0    0  ...    1    0    1    0   66   
...    ..  ..  ..  ..  ..  ..  ..  ..  ..  ...  ...  ...  ...  ...  ...  ...   
16621   3  14  23  21  11   3  10   6  14    2  ...   17    0   14    5   62   
15537   3   5  25   1   5  18   0  21   3    1  ...   21   18   24   18   64   
14349  13  17  16  12  23   7   9  19  18    6  ...   15   12   12   17   64   
13505  15   6   9  23  12  24  25  22  21    9  ...   19   13   17   22   75   
10355   5  17  25  13  11  22  22  22   9   20  ...    6  

Precision:  1.0
Recall:  1.0
Accuracy:  1.0
f1_score:  1.0
58
       p1  p2  p3  p4  p5  p6  p7  p8  p9  p10  ...  p28  p29  p30  p31  p32  \
2644    0   0   0   1   0   0   0   0   0    0  ...    1    0    1    0   69   
1246    0   0   0   0   1   0   0   0   0    0  ...    1    0    1    0   68   
9184    0   0   0   0   1   0   0   0   1    0  ...    1    0    1    0   74   
9507    0   0   0   1   0   0   0   0   0    0  ...    1    0    1    0   75   
361     0   1   0   0   0   0   0   0   1    0  ...    1    0    1    0   68   
...    ..  ..  ..  ..  ..  ..  ..  ..  ..  ...  ...  ...  ...  ...  ...  ...   
12870   2  18  22  14  21  11   4  12  18    3  ...   13   17   17   25   67   
11230  16  19  11  20   8  23  18   2   1   19  ...   21    1   11    0   70   
10150   3  23  10  10   3   6   9  12   5    5  ...   22   17   25   20   88   
12185  22  14  17   5  14   6   0  22  16    7  ...   13   16   23   10   77   
12669  23   0   0   9  19  19  18  18  10   18  ...   21  

Precision:  1.0
Recall:  1.0
Accuracy:  1.0
f1_score:  1.0
63
       p1  p2  p3  p4  p5  p6  p7  p8  p9  p10  ...  p28  p29  p30  p31  p32  \
967     0   0   0   0   0   0   1   0   0    0  ...    0    1    1    0   73   
610     0   0   0   0   0   0   1   0   1    0  ...    1    0    1    0   73   
4726    1   0   0   0   0   0   0   0   0    1  ...    1    0    1    0   76   
412     0   0   0   0   0   1   0   0   0    0  ...    1    0    1    0   73   
9066    0   0   0   0   0   1   0   0   0    0  ...    1    0    1    0   79   
...    ..  ..  ..  ..  ..  ..  ..  ..  ..  ...  ...  ...  ...  ...  ...  ...   
13865  10   4   7  19  21  21  14  15  17   17  ...    6    7   20    2   75   
10509   1   8  20  20  18  23  24   8   8   14  ...    0    1    0   23   92   
13664  24   8  20  13  17  18  20  20  18   12  ...   17   22    6   14   72   
11585  25   4   2   4   9   2   3   5  22    4  ...   22   14   21   15   85   
19788  15  19   1   0   8  12  17  13  11   12  ...   25  

Precision:  1.0
Recall:  1.0
Accuracy:  1.0
f1_score:  1.0
68
       p1  p2  p3  p4  p5  p6  p7  p8  p9  p10  ...  p28  p29  p30  p31  p32  \
727     1   0   0   0   0   0   0   0   0    1  ...    1    0    1    0   77   
8646    0   0   0   0   1   0   0   0   0    0  ...    1    0    1    0   83   
9575    0   0   0   0   1   0   0   0   1    0  ...    1    0    1    0   84   
3485    0   0   0   0   1   0   0   0   0    1  ...    1    0    1    0   79   
936     0   0   0   0   1   0   0   0   0    0  ...    1    0    1    0   77   
...    ..  ..  ..  ..  ..  ..  ..  ..  ..  ...  ...  ...  ...  ...  ...  ...   
17645   1   1  14   3  12  19  13   3  10    0  ...    3    4   18    6   79   
13350  18  12   7  11  20   1  24  21   9    8  ...    9   24   20   19   90   
17447  11   4   9   9   2   1  22   6   6   25  ...    6    9   25    2   96   
15666  21  11   6  22   7  10  15  16   2   25  ...    7    5   23    6   96   
19678   4   3  17  16  11  23   5  12  10   15  ...    0  

Precision:  1.0
Recall:  1.0
Accuracy:  1.0
f1_score:  1.0
72
       p1  p2  p3  p4  p5  p6  p7  p8  p9  p10  ...  p28  p29  p30  p31  p32  \
1637    0   0   0   1   0   0   0   0   0    0  ...    1    0    1    0   83   
6297    0   0   0   0   0   1   0   0   0    0  ...    1    0    1    0   86   
8010    0   0   0   0   0   0   1   0   0    1  ...    1    0    1    0   87   
8181    0   0   0   0   0   1   0   0   0    0  ...    1    0    1    0   87   
5364    0   0   0   0   0   0   1   0   0    1  ...    0    1    1    0   85   
...    ..  ..  ..  ..  ..  ..  ..  ..  ..  ...  ...  ...  ...  ...  ...  ...   
15811  13  25  20  14   7  13  22   6  12   23  ...    5   14   24   11   98   
10360  12  11   3  17   8  20   1   3  18   19  ...   22   16   25    7   83   
14117   6  24  14  14   5   7  22   3   0   20  ...   21    3    7   24   95   
17040  19   9  11  18   9   6  13  23   6   23  ...    5    8   13    2   81   
11633  14  17  25   2   0   3  15  21   1    2  ...    7  

Precision:  1.0
Recall:  1.0
Accuracy:  1.0
f1_score:  1.0
77
       p1  p2  p3  p4  p5  p6  p7  p8  p9  p10  ...  p28  p29  p30  p31  p32  \
9608    0   0   0   0   0   1   0   0   1    0  ...    1    0    1    0   94   
3360    0   0   0   1   0   0   0   0   0    0  ...    1    0    1    0   89   
337     0   1   0   0   0   0   0   0   0    0  ...    1    0    1    0   87   
8274    0   0   0   0   0   1   0   0   0    0  ...    1    0    0    1   92   
1397    0   0   0   0   0   0   0   1   0    0  ...    1    0    1    0   87   
...    ..  ..  ..  ..  ..  ..  ..  ..  ..  ...  ...  ...  ...  ...  ...  ...   
10749  17  10  15  20  13  14  25  17  25    3  ...   13   15    9   14   89   
12880  24   5   5  13  22  10   9  17  17   20  ...   18    4    9    1   89   
11866   7  23  21   2   3  16   3  18  20   23  ...   25    5   24    4  110   
13483  25   4  20   8  16   6  11   9  17   22  ...   15    1   22   11   94   
11267  23  14  21   5   7   4  13  15  25   10  ...    9  