In [1]:
import time
import sklearn
import numpy as np
import pandas as pd

import sys
sys.path.append("..")
from baggingPU import BaggingClassifierPU

from sklearn.utils import resample
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import random

In [2]:
def print_cm(cm, labels, hide_zeroes=False, hide_diagonal=False, hide_threshold=None):
    columnwidth = max([len(x) for x in labels]) + 4
    empty_cell = " " * columnwidth
    print("    " + empty_cell, end=' ')
    for label in labels:
        print("%{0}s".format(columnwidth) % 'pred_' + label, end=" ")
    print()

    for i, label1 in enumerate(labels):
        print("    %{0}s".format(columnwidth) % 'true_' + label1, end=" ")
        for j in range(len(labels)):
            cell = "%{0}.1f".format(columnwidth) % cm[i, j]
            if hide_zeroes:
                cell = cell if float(cm[i, j]) != 0 else empty_cell
            if hide_diagonal:
                cell = cell if i != j else empty_cell
            if hide_threshold:
                cell = cell if cm[i, j] > hide_threshold else empty_cell
            if cell:
                print(cell, end=" ")
        print()

# import data

In [3]:
#df_raw = pd.read_csv('../data/w-dependence.csv')

df_raw = pd.read_csv('../data/1place-independence.csv')

#df_raw = pd.read_csv('../data/w-related.csv')


df_raw['label'] = df_raw['label'].astype("int")
print(df_raw.label.value_counts())
print('Has null values', df_raw.isnull().values.any())

0    10000
1    10000
Name: label, dtype: int64
Has null values False


In [4]:
df_raw.head(10)

Unnamed: 0,p1,p2,p3,p4,label
0,1,0,0,0,1
1,0,1,1,0,1
2,1,0,1,0,1
3,0,0,0,1,1
4,0,1,2,0,1
5,1,0,2,0,1
6,0,0,1,1,1
7,0,1,3,0,1
8,1,0,3,0,1
9,0,0,2,1,1


In [5]:
print(df_raw.iloc[:10000,:])
df_max = np.amax(df_raw.iloc[:10000,[2]])
print(df_max)

      p1  p2    p3  p4  label
0      1   0     0   0      1
1      0   1     1   0      1
2      1   0     1   0      1
3      0   0     0   1      1
4      0   1     2   0      1
...   ..  ..   ...  ..    ...
9995   1   0  3332   0      1
9996   0   0  3331   1      1
9997   0   1  3333   0      1
9998   1   0  3333   0      1
9999   0   0  3332   1      1

[10000 rows x 5 columns]
p3    3333
dtype: int64


In [6]:
print(df_raw.iloc[10000:,:])
print(np.amax(df_raw.iloc[10000:,[2]]))

         p1    p2    p3    p4  label
10000     0     0     0     0      0
10001     0     0     1     0      0
10002     0     1     0     0      0
10003     0     1     0     1      0
10004     0     1     1     1      0
...     ...   ...   ...   ...    ...
19995  1948  3267  3248    22      0
19996   678  2207  1707  1330      0
19997   796  3271  1813  1477      0
19998  2163   614  2392  2879      0
19999  1420  2443  2561  1150      0

[10000 rows x 5 columns]
p3    3333
dtype: int64


In [7]:
neg_process = df_raw.iloc[10000:,:].copy()
print(neg_process)

for i in range(10000):
    if neg_process.iloc[i,2] > df_max[0]:
        neg_process.iloc[i,2]  = int(neg_process.iloc[i,2] / df_max[0])

         p1    p2    p3    p4  label
10000     0     0     0     0      0
10001     0     0     1     0      0
10002     0     1     0     0      0
10003     0     1     0     1      0
10004     0     1     1     1      0
...     ...   ...   ...   ...    ...
19995  1948  3267  3248    22      0
19996   678  2207  1707  1330      0
19997   796  3271  1813  1477      0
19998  2163   614  2392  2879      0
19999  1420  2443  2561  1150      0

[10000 rows x 5 columns]


In [8]:
neg_process.drop_duplicates(inplace=True)
print(neg_process)
print(len(neg_process))
print(np.amax(neg_process))

         p1    p2    p3    p4  label
10000     0     0     0     0      0
10001     0     0     1     0      0
10002     0     1     0     0      0
10003     0     1     0     1      0
10004     0     1     1     1      0
...     ...   ...   ...   ...    ...
19995  1948  3267  3248    22      0
19996   678  2207  1707  1330      0
19997   796  3271  1813  1477      0
19998  2163   614  2392  2879      0
19999  1420  2443  2561  1150      0

[10000 rows x 5 columns]
10000
p1       3333
p2       3332
p3       3333
p4       3333
label       0
dtype: int64


In [9]:
df_process = pd.concat([df_raw.iloc[:10000,:], neg_process])
print(df_process)

         p1    p2    p3    p4  label
0         1     0     0     0      1
1         0     1     1     0      1
2         1     0     1     0      1
3         0     0     0     1      1
4         0     1     2     0      1
...     ...   ...   ...   ...    ...
19995  1948  3267  3248    22      0
19996   678  2207  1707  1330      0
19997   796  3271  1813  1477      0
19998  2163   614  2392  2879      0
19999  1420  2443  2561  1150      0

[20000 rows x 5 columns]


In [10]:
x_data = df_process.iloc[:,:-1]
y_data = df_process.iloc[:,-1]

x_train, x_test, y_train, y_test = train_test_split(x_data, y_data, test_size=0.2, random_state=7)

In [11]:
x_train_input = pd.concat([x_train.iloc[:,:], y_train],axis=1)
x_test_input = pd.concat([x_test.iloc[:,:],y_test],axis=1)

print(len(x_train_input))
print(len(x_test_input))

print(x_train_input.label.value_counts())
print('Has null values', x_train_input.isnull().values.any())

print(x_test_input.label.value_counts())
print('Has null values', x_test_input.isnull().values.any())

16000
4000
1    8077
0    7923
Name: label, dtype: int64
Has null values False
0    2077
1    1923
Name: label, dtype: int64
Has null values False


In [12]:
x_train_input.head(10)

Unnamed: 0,p1,p2,p3,p4,label
8118,0,0,2705,1,1
10171,0,0,2141,0,0
9399,0,0,3132,1,1
12669,1,1,2336,0,0
18809,1457,3115,942,1111,0
13869,0,0,1609,0,0
3320,1,0,1107,0,1
14689,1,1,1319,0,0
13087,1,1,2692,1,0
15992,515,833,2507,1884,0


In [13]:
x_test_input.head(10)

Unnamed: 0,p1,p2,p3,p4,label
14356,1,1,3189,1,0
3439,0,1,1147,0,1
12153,1,0,2606,1,0
15029,2966,1336,754,1574,0
18549,3088,1378,1096,3034,0
15762,622,795,773,1953,0
12313,0,1,1788,1,0
16034,704,1577,717,2627,0
11496,1,0,1418,1,0
5653,0,1,1885,0,1


In [14]:
df = x_train_input.copy()

NON_LBL = [c for c in df.columns if c != 'label']
X = df[NON_LBL]
y = df['label']

y_orig = y.copy()

hidden_size = 5000
y.loc[
    np.random.choice(
        y[y == 1].index, 
        replace = False, 
        size = hidden_size
    )
] = 0

print(X)
print(y)

         p1    p2    p3    p4
8118      0     0  2705     1
10171     0     0  2141     0
9399      0     0  3132     1
12669     1     1  2336     0
18809  1457  3115   942  1111
...     ...   ...   ...   ...
13927     1     1  1066     0
919       0     1   307     0
5699      1     0  1900     0
10742     0     1  2930     1
16921  2113   604  1171  1381

[16000 rows x 4 columns]
8118     1
10171    0
9399     1
12669    0
18809    0
        ..
13927    0
919      1
5699     0
10742    0
16921    0
Name: label, Length: 16000, dtype: int64


In [15]:
pd.Series(y).value_counts()

0    12923
1     3077
Name: label, dtype: int64

In [16]:
print('- %d samples and %d features' % (X.shape))
print('- %d positive out of %d total before hiding labels' % (sum(df_process.label), len(df_process.label)))
print('- %d positive out of %d total after hiding labels' % (sum(y), len(y)))

- 16000 samples and 4 features
- 10000 positive out of 20000 total before hiding labels
- 3077 positive out of 16000 total after hiding labels


# Trainning directly

In [17]:
print('Training XGboost model ...')

import xgboost as xgb

model = xgb.XGBClassifier()

#from sklearn.linear_model import LogisticRegression
#model = LogisticRegression()


#from sklearn.neural_network import MLPClassifier
#model = MLPClassifier(solver='adam', alpha=1e-5,hidden_layer_sizes=(1), random_state=1,max_iter=100000)

model.fit(X, y)

print('Done')

Training XGboost model ...
Done


In [18]:
print('---- {} ----'.format('XGboost model'))
print(print_cm(sklearn.metrics.confusion_matrix(y_test, model.predict(x_test_input.iloc[:,:-1])), labels=['negative', 'positive']))
print('')
print('Precision: ', precision_score(y_test, model.predict(x_test_input.iloc[:,:-1])))
print('Recall: ', recall_score(y_test, model.predict(x_test_input.iloc[:,:-1])))
print('Accuracy: ', accuracy_score(y_test, model.predict(x_test_input.iloc[:,:-1])))
print('f1_score: ', f1_score(y_test, model.predict(x_test_input.iloc[:,:-1])))

---- XGboost model ----
                        pred_negative        pred_positive 
           true_negative       2077.0          0.0 
           true_positive       1763.0        160.0 
None

Precision:  1.0
Recall:  0.08320332813312532
Accuracy:  0.55925
f1_score:  0.15362457993278925


# Training by bagging

In [19]:
from sklearn.linear_model import LogisticRegression
model1 = LogisticRegression()

from sklearn.tree import DecisionTreeClassifier
model2 = DecisionTreeClassifier()

from sklearn.neural_network import MLPClassifier
model3 = MLPClassifier(solver='adam', alpha=1e-5,hidden_layer_sizes=(1), random_state=1,max_iter=100000)

from sklearn import svm
model4 = svm.LinearSVC()

from sklearn.naive_bayes import GaussianNB
model5 = GaussianNB()

from sklearn.ensemble import RandomForestClassifier
model6 = RandomForestClassifier(n_estimators = 50,n_jobs = -1)

import xgboost as xgb
model7 = xgb.XGBClassifier()


In [20]:
model_list = [model1, model2, model3, model4, model5, model6, model7]
f1 = np.zeros([len(model_list)],dtype=np.float32)

In [21]:
for i,j in zip(model_list,range(len(model_list))):
    model = BaggingClassifierPU(i,
                         n_estimators = 50, 
                         n_jobs = -1, 
                         max_samples = sum(y)  
                        )
    model.fit(X,y)
    print(f1_score(y_orig, model.predict(X)))
    f1[j] = f1_score(y_orig, model.predict(X))

print(f1)

0.9542205682556559
0.8810084499238121
0.8046423590356645
0.8828287244507597
0.8044820717131473
0.9354243542435424
0.9959604747995774
[0.9542206  0.88100845 0.8046424  0.8828287  0.80448204 0.9354243
 0.9959605 ]


In [22]:
f1_index = []
for i in range(len(f1)):
    if f1[i] >= 0.95:
        f1_index.append(i)
        
print(f1_index)
        
predict_sum = np.zeros([len(X)],dtype=np.float32)
for i in f1_index:
    model = BaggingClassifierPU(model_list[i],
                         n_estimators = 50, 
                         n_jobs = -1, 
                         max_samples = sum(y)  
                        )
    model.fit(X,y)
    predict_sum += model.predict(X)
print(predict_sum)

[0, 6]
[2. 1. 2. ... 2. 0. 0.]


In [23]:
threshold = len(f1_index) / 2
print(predict_sum)

predict = np.zeros([len(y)],dtype=np.int64)

for i in range(len(X)):
    if predict_sum[i] >= threshold :
        predict[i] = 1
    if predict_sum[i] < threshold :
        predict[i] = 0


print(predict)
print(y_orig)

[2. 1. 2. ... 2. 0. 0.]
[1 1 1 ... 1 0 0]
8118     1
10171    0
9399     1
12669    0
18809    0
        ..
13927    0
919      1
5699     1
10742    0
16921    0
Name: label, Length: 16000, dtype: int64


In [24]:
#train data
print('---- {} ----'.format('PU Bagging'))
print(print_cm(sklearn.metrics.confusion_matrix(y_orig,predict), labels=['negative', 'positive']))
print('')
print('Precision: ', precision_score(y_orig, predict))
print('Recall: ', recall_score(y_orig,predict))
print('Accuracy: ', accuracy_score(y_orig, predict))
print('f1_score: ', f1_score(y_orig, predict))

---- PU Bagging ----
                        pred_negative        pred_positive 
           true_negative       7148.0        775.0 
           true_positive          0.0       8077.0 
None

Precision:  0.9124491640307275
Recall:  1.0
Accuracy:  0.9515625
f1_score:  0.9542205682556559


In [25]:
best_model = BaggingClassifierPU(model_list[np.argmax(f1)],
                         n_estimators = 50, 
                         n_jobs = -1, 
                         max_samples = sum(y)  
                        )
print(best_model)

BaggingClassifierPU(base_estimator=XGBClassifier(base_score=None, booster=None,
                                                 colsample_bylevel=None,
                                                 colsample_bynode=None,
                                                 colsample_bytree=None,
                                                 gamma=None, gpu_id=None,
                                                 importance_type='gain',
                                                 interaction_constraints=None,
                                                 learning_rate=None,
                                                 max_delta_step=None,
                                                 max_depth=None,
                                                 min_child_weight=None,
                                                 missing=nan,
                                                 monotone_constraints=None,
                                                 n_estimators=1

In [26]:
print('Training bagging classifier...')

pu_start = time.perf_counter()

best_model.fit(X, y)
pu_end = time.perf_counter()
print('Done!')
print('Time:', pu_end - pu_start)


Training bagging classifier...
Done!
Time: 21.021124048973434


In [27]:
#train data
print('---- {} ----'.format('PU Bagging'))
print(print_cm(sklearn.metrics.confusion_matrix(y_orig, best_model.predict(X)), labels=['negative', 'positive']))
print('')
print('Precision: ', precision_score(y_orig, best_model.predict(X)))
print('Recall: ', recall_score(y_orig, best_model.predict(X)))
print('Accuracy: ', accuracy_score(y_orig, best_model.predict(X)))
print('f1_score: ', f1_score(y_orig, best_model.predict(X)))

---- PU Bagging ----
                        pred_negative        pred_positive 
           true_negative       7922.0          1.0 
           true_positive         56.0       8021.0 
None

Precision:  0.99987534280728
Recall:  0.9930667326977838
Accuracy:  0.9964375
f1_score:  0.9964594074166097


In [28]:
#print wrong predictions
y_pre = best_model.predict(X)
y_orig_index = y_orig.index.tolist()

FN_index = []
FT_index = []

for i in range(len(y_orig)):
    if y_orig.iloc[i] == 1 and y_pre[i] == 0 :
        FN_index.append(y_orig_index[i])
    if y_orig.iloc[i] == 0 and y_pre[i] == 1 :
        FT_index.append(y_orig_index[i])
        
print("False Negtive:")
print(df_process.loc[FN_index])
print("False Positive:")
print(df_process.loc[FT_index])

False Negtive:
      p1  p2    p3  p4  label
917    1   0   306   0      1
390    0   0   129   1      1
21     0   0     6   1      1
9357   0   0  3118   1      1
9964   0   1  3322   0      1
387    0   0   128   1      1
405    0   0   134   1      1
9342   0   0  3113   1      1
1079   1   0   360   0      1
9356   1   0  3119   0      1
9354   0   0  3117   1      1
9347   1   0  3116   0      1
9335   1   0  3112   0      1
213    0   0    70   1      1
911    1   0   304   0      1
9744   0   0  3247   1      1
902    1   0   301   0      1
9969   0   0  3322   1      1
950    1   0   317   0      1
920    1   0   307   0      1
45     0   0    14   1      1
1088   1   0   363   0      1
42     0   0    13   1      1
1085   1   0   362   0      1
500    1   0   167   0      1
18     0   0     5   1      1
396    0   0   131   1      1
632    1   0   211   0      1
944    1   0   315   0      1
905    1   0   302   0      1
9351   0   0  3116   1      1
240    0   0    79   1   

In [29]:
#test data
print('---- {} ----'.format('PU Bagging'))
print(print_cm(sklearn.metrics.confusion_matrix(y_test, best_model.predict(x_test_input.iloc[:,:-1])), labels=['negative', 'positive']))
print('')
print('Precision: ', precision_score(y_test, best_model.predict(x_test_input.iloc[:,:-1])))
print('Recall: ', recall_score(y_test, best_model.predict(x_test_input.iloc[:,:-1])))
print('Accuracy: ', accuracy_score(y_test, best_model.predict(x_test_input.iloc[:,:-1])))
print('f1_score: ', f1_score(y_test, best_model.predict(x_test_input.iloc[:,:-1])))

---- PU Bagging ----
                        pred_negative        pred_positive 
           true_negative       2077.0          0.0 
           true_positive         15.0       1908.0 
None

Precision:  1.0
Recall:  0.9921996879875195
Accuracy:  0.99625
f1_score:  0.9960845732184808


In [30]:
#print wrong predictions
y_test_pre = best_model.predict(x_test_input.iloc[:,:-1])
y_test_index = y_test.index.tolist()

FN_test_index = []
FT_test_index = []

for i in range(len(y_test)):
    if y_test.iloc[i] == 1 and y_test_pre[i] == 0 :
        FN_test_index.append(y_test_index[i])
    if y_test.iloc[i] == 0 and y_test_pre[i] == 1 :
        FT_test_index.append(y_test_index[i])

print("False Negtive:")
print(df_process.loc[FN_test_index])
print("False Positive:")
print(df_process.loc[FT_test_index])

False Negtive:
      p1  p2    p3  p4  label
402    0   0   133   1      1
923    1   0   308   0      1
9348   0   0  3115   1      1
9344   1   0  3115   0      1
210    0   0    69   1      1
9360   0   0  3119   1      1
414    0   0   137   1      1
231    0   0    76   1      1
908    1   0   303   0      1
9961   0   1  3321   0      1
1076   1   0   359   0      1
9966   0   0  3321   1      1
9345   0   0  3114   1      1
417    0   0   138   1      1
399    0   0   132   1      1
False Positive:
Empty DataFrame
Columns: [p1, p2, p3, p4, label]
Index: []


In [32]:
#without interative strategy
import random

place_max = max(np.amax(df_process.iloc[:10000, [2]]))  #1-place
name1 = ['min', 'max', 'Precision', 'Recall', 'Accuracy', 'f1_score','time']
test = pd.DataFrame(columns=name1)


for i in range(1, 101):

    pu_start = time.perf_counter()
    
    pos_list = np.random.randint(low=0, high=10000, size=2000).tolist()
    neg_list = np.random.randint(low=10000, high=20000, size=2000).tolist()
    all_list = pos_list + neg_list
    
    flag1 = int((0.1 * (i - 1)) * place_max)
    flag2 = int((0.1 * i) * place_max)
    print(flag2)
    
    temp_data = df_process.iloc[all_list, : -1].copy()
    temp_label = df_process.iloc[all_list, -1].copy()
    
    temp_data.iloc[:, 2] = temp_data.iloc[:, 2] + random.randint((df_max[0] + flag1 + 1), ((df_max[0] + flag2 + 1)))
    
    print('======')
    print(temp_data.iloc[:, :])

    temp_max = max(np.amax(temp_data.iloc[:, [2]]))
    temp_min = min(np.amin(temp_data.iloc[:, [2]]))

    print('---- {} ----'.format(i))
    print('Precision: ', precision_score(temp_label, best_model.predict(temp_data)))
    print('Recall: ', recall_score(temp_label, best_model.predict(temp_data)))
    print('Accuracy: ', accuracy_score(temp_label, best_model.predict(temp_data)))
    print('f1_score: ', f1_score(temp_label, best_model.predict(temp_data)))
    
    pu_end = time.perf_counter()

    test.loc[i] = [temp_min, temp_max,
                    precision_score(temp_label, best_model.predict(temp_data)),
                    recall_score(temp_label, best_model.predict(temp_data)),
                    accuracy_score(temp_label, best_model.predict(temp_data)),
                    f1_score(temp_label, best_model.predict(temp_data)),
                    (pu_end - pu_start)]
      
test.to_csv('../result/1_place_unknown_result_without.csv')

333
         p1    p2    p3    p4
1328      1     0  3866     0
2189      1     0  4153     0
1406      1     0  3892     0
7335      0     0  5867     1
6361      0     1  5544     0
...     ...   ...   ...   ...
19371   887   304  3946  2797
16349  2639   922  4575  3236
12673     0     1  5577     1
15688  2561  1364  3482  2680
15282   439   237  3492  3118

[4000 rows x 4 columns]
---- 1 ----
Precision:  1.0
Recall:  1.0
Accuracy:  1.0
f1_score:  1.0
666
         p1    p2    p3    p4
4165      0     1  5337     0
9080      1     0  6975     0
4889      1     0  5578     0
3440      1     0  5095     0
5651      1     0  5832     0
...     ...   ...   ...   ...
18953  2511  2986  4400  2719
16152  2930  2463  5560   341
15235  2382  1898  4569   339
13354     0     1  6860     1
10778     1     1  7280     0

[4000 rows x 4 columns]
---- 2 ----
Precision:  1.0
Recall:  1.0
Accuracy:  1.0
f1_score:  1.0
999
         p1    p2    p3    p4
7243      0     1  6599     0
8126      1     

Precision:  1.0
Recall:  1.0
Accuracy:  1.0
f1_score:  1.0
6332
         p1    p2     p3    p4
1838      1     0  10173     0
9709      0     1  12797     0
3232      0     1  10638     0
8501      1     0  12394     0
4398      0     0  11025     1
...     ...   ...    ...   ...
13519     1     1  10956     0
18610  1388  1930  11709  2692
17574  1967  2906  12477   862
19507   749   757  12884  2929
11250     1     1  12704     0

[4000 rows x 4 columns]
---- 19 ----
Precision:  1.0
Recall:  1.0
Accuracy:  1.0
f1_score:  1.0
6666
         p1    p2     p3    p4
1164      0     0  10181     1
3323      1     0  10902     0
5965      0     1  11783     0
4115      1     0  11166     0
2670      0     0  10683     1
...     ...   ...    ...   ...
10641     0     1  10898     1
11030     1     1  12429     0
11080     1     1  11106     1
19814  2219  3228  12429  3305
10099     1     1  10814     1

[4000 rows x 4 columns]
---- 20 ----
Precision:  1.0
Recall:  1.0
Accuracy:  1.0
f1_score

KeyboardInterrupt: 

In [33]:
orig_data = df_process.iloc[:,:-1].copy()
orig_label = best_model.predict(orig_data)

print(orig_data)
print(orig_label)

         p1    p2    p3    p4
0         1     0     0     0
1         0     1     1     0
2         1     0     1     0
3         0     0     0     1
4         0     1     2     0
...     ...   ...   ...   ...
19995  1948  3267  3248    22
19996   678  2207  1707  1330
19997   796  3271  1813  1477
19998  2163   614  2392  2879
19999  1420  2443  2561  1150

[20000 rows x 4 columns]
[1 1 1 ... 0 0 0]


In [34]:
import xgboost as xgb
model = BaggingClassifierPU(xgb.XGBClassifier(),
                         n_jobs = -1, 
                         max_samples = sum(y)  
                        )
model.fit(orig_data, orig_label)

BaggingClassifierPU(base_estimator=XGBClassifier(base_score=None, booster=None,
                                                 colsample_bylevel=None,
                                                 colsample_bynode=None,
                                                 colsample_bytree=None,
                                                 gamma=None, gpu_id=None,
                                                 importance_type='gain',
                                                 interaction_constraints=None,
                                                 learning_rate=None,
                                                 max_delta_step=None,
                                                 max_depth=None,
                                                 min_child_weight=None,
                                                 missing=nan,
                                                 monotone_constraints=None,
                                                 n_estimators=1

In [35]:
#verification

import random

place_max = max(np.amax(df_process.iloc[:10000, [2]]))  #1-place
name1 = ['min', 'max', 'Precision', 'Recall', 'Accuracy', 'f1_score','time']
test = pd.DataFrame(columns=name1)


for i in range(1, 101):

    pu_start = time.perf_counter()
    
    pos_list = np.random.randint(low=0, high=10000, size=2000).tolist()
    neg_list = np.random.randint(low=10000, high=20000, size=2000).tolist()
    all_list = pos_list + neg_list
    
    flag1 = int((0.1 * (i - 1)) * place_max)
    flag2 = int((0.1 * i) * place_max)
    print(flag2)
    
    temp_data = df_process.iloc[all_list, : -1].copy()
    temp_label = df_process.iloc[all_list, -1].copy()
    
    temp_data.iloc[:, 2] = temp_data.iloc[:, 2] + random.randint((df_max[0] + flag1 + 1), ((df_max[0] + flag2 + 1)))
    
    print('======')
    print(temp_data.iloc[:, :])

    temp_max = max(np.amax(temp_data.iloc[:, [2]]))
    temp_min = min(np.amin(temp_data.iloc[:, [2]]))

    print('---- {} ----'.format(i))
    print('Precision: ', precision_score(temp_label, model.predict(temp_data)))
    print('Recall: ', recall_score(temp_label, model.predict(temp_data)))
    print('Accuracy: ', accuracy_score(temp_label, model.predict(temp_data)))
    print('f1_score: ', f1_score(temp_label, model.predict(temp_data)))
    
    
    orig_data = pd.concat([orig_data, temp_data], ignore_index=True)
    orig_label = pd.Series(orig_label.tolist() + model.predict(temp_data).tolist())
    model.fit(orig_data,orig_label)
    
    
    pu_end = time.perf_counter()

    test.loc[i] = [temp_min, temp_max,
                    precision_score(temp_label, model.predict(temp_data)),
                    recall_score(temp_label, model.predict(temp_data)),
                    accuracy_score(temp_label, model.predict(temp_data)),
                    f1_score(temp_label, model.predict(temp_data)),
                    (pu_end - pu_start)]
      
test.to_csv('../result/1_place_unknown_result.csv')

333
         p1    p2    p3    p4
4702      0     1  5194     0
5665      0     1  5515     0
3913      0     1  4931     0
1530      0     0  4135     1
3382      0     1  4754     0
...     ...   ...   ...   ...
18399  2759   510  5154  2303
18752  3229   473  5058  3121
14558     1     1  4578     0
17247  1990  1394  4943  1676
14393     0     1  4497     1

[4000 rows x 4 columns]
---- 1 ----
Precision:  1.0
Recall:  1.0
Accuracy:  1.0
f1_score:  1.0
666
         p1   p2    p3    p4
7515      0    0  6503     1
6306      0    0  6100     1
5226      0    0  5740     1
1357      0    1  4452     0
9575      1    0  7191     0
...     ...  ...   ...   ...
11772     0    1  6474     1
18437  3133  787  5258  1306
10066     1    1  6162     0
12143     1    1  5300     1
10369     1    0  7150     1

[4000 rows x 4 columns]
---- 2 ----
Precision:  1.0
Recall:  1.0
Accuracy:  1.0
f1_score:  1.0
999
         p1    p2    p3    p4
4754      1     0  5612     0
899       1     0  4327     

Precision:  1.0
Recall:  1.0
Accuracy:  1.0
f1_score:  1.0
6332
         p1    p2     p3    p4
5434      0     1  11308     0
3329      1     0  10606     0
8497      0     1  12329     0
3428      1     0  10639     0
8668      0     1  12386     0
...     ...   ...    ...   ...
11010     0     0  12422     0
19225  2232    32  12286  1270
18656  1856   457  11705   494
10398     0     1  10209     1
15431   769  3018   9694   600

[4000 rows x 4 columns]
---- 19 ----
Precision:  1.0
Recall:  1.0
Accuracy:  1.0
f1_score:  1.0
6666
        p1    p2     p3    p4
6588     0     0  12106     1
7533     0     0  12421     1
535      0     1  10090     0
8449     0     1  12728     0
13       0     1   9916     0
...    ...   ...    ...   ...
13133    1     0  12719     1
11940    1     1  11365     1
15213  170  2539  10832  1499
15002   22   108  10553  2155
12297    1     0  11056     1

[4000 rows x 4 columns]
---- 20 ----
Precision:  1.0
Recall:  1.0
Accuracy:  1.0
f1_score:  1.0
6999


Precision:  1.0
Recall:  1.0
Accuracy:  1.0
f1_score:  1.0
11998
         p1    p2     p3    p4
4029      0     0  16669     1
6708      0     0  17562     1
9213      0     0  18397     1
633       0     0  15537     1
4160      1     0  16714     0
...     ...   ...    ...   ...
10095     0     1  17180     1
10373     1     1  18050     1
18671  1650   598  16745   370
18057  2463  2750  16580  3150
17236  2209  2552  18071  2717

[4000 rows x 4 columns]
---- 36 ----
Precision:  1.0
Recall:  1.0
Accuracy:  1.0
f1_score:  1.0
12332
         p1    p2     p3    p4
8637      0     0  18442     1
1403      1     0  16032     0
9909      0     0  18866     1
4488      0     0  17059     1
4568      1     0  17087     0
...     ...   ...    ...   ...
14003     0     1  17288     1
19844  2131  2226  16783   560
19612  2413  2337  16792  2994
10546     1     1  18261     1
16362  3241  1333  16083   505

[4000 rows x 4 columns]
---- 37 ----
Precision:  1.0
Recall:  1.0
Accuracy:  1.0
f1_sco

Precision:  0.9995002498750625
Recall:  1.0
Accuracy:  0.99975
f1_score:  0.9997500624843789
17664
         p1    p2     p3    p4
4166      1     0  22104     0
6326      1     0  22824     0
1521      0     0  21221     1
8235      0     0  23459     1
8059      0     1  23402     0
...     ...   ...    ...   ...
19796   132  2198  21366  2016
17657  2822  1116  21805  2831
11659     1     1  22542     0
16396  1061  2031  22571  2566
16321  1958   553  22236    46

[4000 rows x 4 columns]
---- 53 ----
Precision:  0.9995002498750625
Recall:  1.0
Accuracy:  0.99975
f1_score:  0.9997500624843789
17998
         p1    p2     p3    p4
2402      1     0  21982     0
9768      0     0  24436     1
2286      0     0  21942     1
7221      0     0  23587     1
2860      0     1  22135     0
...     ...   ...    ...   ...
12831     1     0  22926     1
18814  2859  1687  23555  2668
11538     0     1  23429     1
15846  1479   157  23715  1734
14295     1     1  24431     0

[4000 rows x 4 colu

Precision:  1.0
Recall:  1.0
Accuracy:  1.0
f1_score:  1.0
23331
         p1    p2     p3    p4
8487      0     0  29176     1
5733      0     0  28258     1
5067      0     0  28036     1
2220      0     0  27087     1
9460      0     1  29502     0
...     ...   ...    ...   ...
18541  3245  3139  28520   911
18715  3183   151  28510  3185
13357     1     1  27135     0
19913  2978   850  27370   438
17245  3320  2270  28702  1359

[4000 rows x 4 columns]
---- 70 ----
Precision:  1.0
Recall:  1.0
Accuracy:  1.0
f1_score:  1.0
23664
         p1    p2     p3    p4
2762      1     0  27657     0
1921      0     1  27377     0
173       1     0  26794     0
8193      0     0  29466     1
6417      0     0  28874     1
...     ...   ...    ...   ...
16110   199  2750  27529  3039
11812     0     0  28207     0
14051     1     1  27768     0
17752  2998  2673  28726  1455
13654     0     1  28714     1

[4000 rows x 4 columns]
---- 71 ----
Precision:  1.0
Recall:  1.0
Accuracy:  1.0
f1_sco

Precision:  1.0
Recall:  1.0
Accuracy:  1.0
f1_score:  1.0
28997
         p1    p2     p3    p4
1868      1     0  32632     0
9322      0     1  35117     0
3694      0     1  33241     0
4776      0     0  33600     1
248       1     0  32092     0
...     ...   ...    ...   ...
17849  2795  1439  32105   340
15635  3104  3145  32797  1576
17487  3329   751  32660  2481
10324     0     1  32940     1
18563    78  1249  35287  2106

[4000 rows x 4 columns]
---- 87 ----
Precision:  1.0
Recall:  1.0
Accuracy:  1.0
f1_score:  1.0
29330
         p1    p2     p3    p4
4442      1     0  33901     0
909       0     0  32722     1
1165      0     1  32809     0
1173      0     0  32810     1
8480      1     0  35247     0
...     ...   ...    ...   ...
18204  2354  2942  33101  2114
15106  2031  1026  34658   497
18795  1335  1705  33841   490
18695   218  2321  34493  3317
17513  2111  1582  34871  2603

[4000 rows x 4 columns]
---- 88 ----
Precision:  1.0
Recall:  1.0
Accuracy:  1.0
f1_sco