In [1]:
import time
import sklearn
import numpy as np
import pandas as pd

import sys
sys.path.append("..")
from baggingPU import BaggingClassifierPU

from sklearn.utils import resample
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import random

In [2]:
def print_cm(cm, labels, hide_zeroes=False, hide_diagonal=False, hide_threshold=None):
    columnwidth = max([len(x) for x in labels]) + 4
    empty_cell = " " * columnwidth
    print("    " + empty_cell, end=' ')
    for label in labels:
        print("%{0}s".format(columnwidth) % 'pred_' + label, end=" ")
    print()

    for i, label1 in enumerate(labels):
        print("    %{0}s".format(columnwidth) % 'true_' + label1, end=" ")
        for j in range(len(labels)):
            cell = "%{0}.1f".format(columnwidth) % cm[i, j]
            if hide_zeroes:
                cell = cell if float(cm[i, j]) != 0 else empty_cell
            if hide_diagonal:
                cell = cell if i != j else empty_cell
            if hide_threshold:
                cell = cell if cm[i, j] > hide_threshold else empty_cell
            if cell:
                print(cell, end=" ")
        print()

# import data

In [3]:
#df_raw = pd.read_csv('../data/w-dependence.csv')

df_raw = pd.read_csv('../data/1place-dependence.csv')

#df_raw = pd.read_csv('../data/w-related.csv')


df_raw['label'] = df_raw['label'].astype("int")
print(df_raw.label.value_counts())
print('Has null values', df_raw.isnull().values.any())

1    10000
0    10000
Name: label, dtype: int64
Has null values False


In [4]:
df_raw.head(10)

Unnamed: 0,p1,p2,p3,p4,label
0,1,0,0,0,1
1,0,1,1,0,1
2,1,0,1,0,1
3,0,0,0,1,1
4,0,1,2,0,1
5,1,0,2,0,1
6,0,0,1,1,1
7,0,1,3,0,1
8,1,0,3,0,1
9,0,0,2,1,1


In [5]:
print(df_raw.iloc[:10000,:])
df_max = np.amax(df_raw.iloc[:10000,[2]])
print(df_max)

      p1  p2    p3  p4  label
0      1   0     0   0      1
1      0   1     1   0      1
2      1   0     1   0      1
3      0   0     0   1      1
4      0   1     2   0      1
...   ..  ..   ...  ..    ...
9995   1   0  3332   0      1
9996   0   0  3331   1      1
9997   0   1  3333   0      1
9998   1   0  3333   0      1
9999   0   0  3332   1      1

[10000 rows x 5 columns]
p3    3333
dtype: int64


In [6]:
print(df_raw.iloc[10000:,:])
print(np.amax(df_raw.iloc[10000:,[2]]))

       p1  p2    p3  p4  label
10000   0   0     1   0      0
10001   0   1     0   0      0
10002   0   1     0   1      0
10003   0   1     1   1      0
10004   1   0     0   1      0
...    ..  ..   ...  ..    ...
19995   1   1  2663   0      0
19996   1   0  7313   1      0
19997   1   1   999   1      0
19998   0   1   999   1      0
19999   0   1   350   1      0

[10000 rows x 5 columns]
p3    19998
dtype: int64


In [7]:
neg_process = df_raw.iloc[10000:,:].copy()
print(neg_process)

for i in range(10000):
    if neg_process.iloc[i,2] > df_max[0]:
        neg_process.iloc[i,2]  = int(neg_process.iloc[i,2] / df_max[0])

       p1  p2    p3  p4  label
10000   0   0     1   0      0
10001   0   1     0   0      0
10002   0   1     0   1      0
10003   0   1     1   1      0
10004   1   0     0   1      0
...    ..  ..   ...  ..    ...
19995   1   1  2663   0      0
19996   1   0  7313   1      0
19997   1   1   999   1      0
19998   0   1   999   1      0
19999   0   1   350   1      0

[10000 rows x 5 columns]


In [36]:
neg_process.drop_duplicates(inplace=True)
print(neg_process)
print(len(neg_process))
print(np.amax(neg_process))

       p1  p2    p3  p4  label
10000   0   0     1   0      0
10001   0   1     0   0      0
10002   0   1     0   1      0
10003   0   1     1   1      0
10004   1   0     0   1      0
...    ..  ..   ...  ..    ...
19991   0   1  2748   1      0
19995   1   1  2663   0      0
19997   1   1   999   1      0
19998   0   1   999   1      0
19999   0   1   350   1      0

[5871 rows x 5 columns]
5871
p1        999
p2        999
p3       3329
p4        999
label       0
dtype: int64


In [9]:
df_process = pd.concat([df_raw.iloc[:10000,:], neg_process])
print(df_process)

       p1  p2    p3  p4  label
0       1   0     0   0      1
1       0   1     1   0      1
2       1   0     1   0      1
3       0   0     0   1      1
4       0   1     2   0      1
...    ..  ..   ...  ..    ...
19991   0   1  2748   1      0
19995   1   1  2663   0      0
19997   1   1   999   1      0
19998   0   1   999   1      0
19999   0   1   350   1      0

[15871 rows x 5 columns]


In [10]:
x_data = df_process.iloc[:,:-1]
y_data = df_process.iloc[:,-1]

x_train, x_test, y_train, y_test = train_test_split(x_data, y_data, test_size=0.2, random_state=7)

In [11]:
x_train_input = pd.concat([x_train.iloc[:,:], y_train],axis=1)
x_test_input = pd.concat([x_test.iloc[:,:],y_test],axis=1)

print(len(x_train_input))
print(len(x_test_input))

print(x_train_input.label.value_counts())
print('Has null values', x_train_input.isnull().values.any())

print(x_test_input.label.value_counts())
print('Has null values', x_test_input.isnull().values.any())

12696
3175
1    7992
0    4704
Name: label, dtype: int64
Has null values False
1    2008
0    1167
Name: label, dtype: int64
Has null values False


In [12]:
x_train_input.head(10)

Unnamed: 0,p1,p2,p3,p4,label
247,0,1,83,0,1
1457,1,0,486,0,1
7087,0,1,2363,0,1
9116,1,0,3039,0,1
14323,30,619,665,231,0
2496,0,0,831,1,1
8823,0,0,2940,1,1
13598,225,596,411,325,0
4122,0,0,1373,1,1
6867,0,0,2288,1,1


In [13]:
x_test_input.head(10)

Unnamed: 0,p1,p2,p3,p4,label
5660,1,0,1887,0,1
8898,0,0,2965,1,1
11431,359,820,957,109,0
2155,0,1,719,0,1
7068,0,0,2355,1,1
6720,0,0,2239,1,1
18138,0,1,139,1,0
6138,0,0,2045,1,1
14933,636,464,931,255,0
3787,0,1,1263,0,1


In [14]:
df = x_train_input.copy()

NON_LBL = [c for c in df.columns if c != 'label']
X = df[NON_LBL]
y = df['label']

y_orig = y.copy()

hidden_size = 5000
y.loc[
    np.random.choice(
        y[y == 1].index, 
        replace = False, 
        size = hidden_size
    )
] = 0

print(X)
print(y)

        p1   p2    p3   p4
247      0    1    83    0
1457     1    0   486    0
7087     0    1  2363    0
9116     1    0  3039    0
14323   30  619   665  231
...    ...  ...   ...  ...
5699     1    0  1900    0
10742  130  942   901  200
537      0    0   178    1
9412     0    1  3138    0
12463  271  823   928  567

[12696 rows x 4 columns]
247      0
1457     0
7087     0
9116     1
14323    0
        ..
5699     1
10742    0
537      0
9412     0
12463    0
Name: label, Length: 12696, dtype: int64


In [15]:
pd.Series(y).value_counts()

0    9704
1    2992
Name: label, dtype: int64

In [16]:
print('- %d samples and %d features' % (X.shape))
print('- %d positive out of %d total before hiding labels' % (sum(df_process.label), len(df_process.label)))
print('- %d positive out of %d total after hiding labels' % (sum(y), len(y)))

- 12696 samples and 4 features
- 10000 positive out of 15871 total before hiding labels
- 2992 positive out of 12696 total after hiding labels


# Trainning directly

In [17]:
print('Training XGboost model ...')

import xgboost as xgb

model = xgb.XGBClassifier()

#from sklearn.linear_model import LogisticRegression
#model = LogisticRegression()


#from sklearn.neural_network import MLPClassifier

#model = MLPClassifier(solver='adam', alpha=1e-5,hidden_layer_sizes=(1), random_state=1,max_iter=100000)

model.fit(X, y)

print('Done')

Training XGboost model ...
Done


In [18]:
print('---- {} ----'.format('XGboost model'))
print(print_cm(sklearn.metrics.confusion_matrix(y_test, model.predict(x_test_input.iloc[:,:-1])), labels=['negative', 'positive']))
print('')
print('Precision: ', precision_score(y_test, model.predict(x_test_input.iloc[:,:-1])))
print('Recall: ', recall_score(y_test, model.predict(x_test_input.iloc[:,:-1])))
print('Accuracy: ', accuracy_score(y_test, model.predict(x_test_input.iloc[:,:-1])))
print('f1_score: ', f1_score(y_test, model.predict(x_test_input.iloc[:,:-1])))

---- XGboost model ----
                        pred_negative        pred_positive 
           true_negative       1166.0          1.0 
           true_positive       1844.0        164.0 
None

Precision:  0.9939393939393939
Recall:  0.08167330677290836
Accuracy:  0.4188976377952756
f1_score:  0.1509433962264151


# Training by bagging

In [19]:
#初始化多个分类器

from sklearn.linear_model import LogisticRegression
model1 = LogisticRegression()

from sklearn.tree import DecisionTreeClassifier
model2 = DecisionTreeClassifier()

from sklearn.neural_network import MLPClassifier
model3 = MLPClassifier(solver='adam', alpha=1e-5,hidden_layer_sizes=(1), random_state=1,max_iter=100000)

from sklearn import svm
model4 = svm.LinearSVC()

from sklearn.naive_bayes import MultinomialNB
model5 = MultinomialNB()

from sklearn.ensemble import RandomForestClassifier
model6 = RandomForestClassifier(n_estimators = 50,n_jobs = -1)

import xgboost as xgb
model7 = xgb.XGBClassifier()


In [20]:
model_list = [model1, model2, model3, model4, model5, model6, model7]
f1 = np.zeros([len(model_list)],dtype=np.float32)

In [21]:
for i,j in zip(model_list,range(len(model_list))):
    model = BaggingClassifierPU(i,
                         n_estimators = 50, 
                         n_jobs = -1, 
                         max_samples = sum(y)  
                        )
    model.fit(X,y)
    print(f1_score(y_orig, model.predict(X)))
    f1[j] = f1_score(y_orig, model.predict(X))

print(f1)

0.9911328827432256
0.8339408076979151
0.9577566061477619
0.987520079080687
0.9567047504509921
0.8929214572655493
0.9775589796048846
[0.99113286 0.8339408  0.9577566  0.9875201  0.95670474 0.89292145
 0.977559  ]


In [22]:
f1_index = []
for i in range(len(f1)):
    if f1[i] >= 0.95:
        f1_index.append(i)
        
print(f1_index)
        
predict_sum = np.zeros([len(X)],dtype=np.float32)
for i in f1_index:
    model = BaggingClassifierPU(model_list[i],
                         n_estimators = 50, 
                         n_jobs = -1, 
                         max_samples = sum(y)  
                        )
    model.fit(X,y)
    predict_sum += model.predict(X)
print(predict_sum)

[0, 2, 3, 4, 6]
[5. 5. 5. ... 5. 5. 0.]


In [23]:
threshold = len(f1_index) / 2
print(predict_sum)

predict = np.zeros([len(y)],dtype=np.int64)

for i in range(len(X)):
    if predict_sum[i] >= threshold :
        predict[i] = 1
    if predict_sum[i] < threshold :
        predict[i] = 0


print(predict)
print(y_orig)

[5. 5. 5. ... 5. 5. 0.]
[1 1 1 ... 1 1 0]
247      1
1457     1
7087     1
9116     1
14323    0
        ..
5699     1
10742    0
537      1
9412     1
12463    0
Name: label, Length: 12696, dtype: int64


In [24]:
#train data
print('---- {} ----'.format('PU Bagging'))
print(print_cm(sklearn.metrics.confusion_matrix(y_orig,predict), labels=['negative', 'positive']))
print('')
print('Precision: ', precision_score(y_orig, predict))
print('Recall: ', recall_score(y_orig,predict))
print('Accuracy: ', accuracy_score(y_orig, predict))
print('f1_score: ', f1_score(y_orig, predict))

---- PU Bagging ----
                        pred_negative        pred_positive 
           true_negative       4294.0        410.0 
           true_positive          3.0       7989.0 
None

Precision:  0.9511846648410525
Recall:  0.9996246246246246
Accuracy:  0.9674700693131695
f1_score:  0.9748032456836068


In [25]:
best_model = BaggingClassifierPU(model_list[np.argmax(f1)],
                         n_estimators = 50, 
                         n_jobs = -1, 
                         max_samples = sum(y)  
                        )
print(best_model)

BaggingClassifierPU(base_estimator=LogisticRegression(), max_samples=2992,
                    n_estimators=50, n_jobs=-1)


In [26]:
print('Training bagging classifier...')

pu_start = time.perf_counter()

best_model.fit(X, y)
pu_end = time.perf_counter()
print('Done!')
print('Time:', pu_end - pu_start)


Training bagging classifier...
Done!
Time: 2.0458659247960895


In [27]:
#train data
print('---- {} ----'.format('PU Bagging'))
print(print_cm(sklearn.metrics.confusion_matrix(y_orig, best_model.predict(X)), labels=['negative', 'positive']))
print('')
print('Precision: ', precision_score(y_orig, best_model.predict(X)))
print('Recall: ', recall_score(y_orig, best_model.predict(X)))
print('Accuracy: ', accuracy_score(y_orig, best_model.predict(X)))
print('f1_score: ', f1_score(y_orig, best_model.predict(X)))

---- PU Bagging ----
                        pred_negative        pred_positive 
           true_negative       4561.0        143.0 
           true_positive          0.0       7992.0 
None

Precision:  0.982421634910879
Recall:  1.0
Accuracy:  0.9887366099558916
f1_score:  0.9911328827432256


In [28]:
#print wrong predictions
y_pre = best_model.predict(X)
y_orig_index = y_orig.index.tolist()

FN_index = []
FT_index = []

for i in range(len(y_orig)):
    if y_orig.iloc[i] == 1 and y_pre[i] == 0 :
        FN_index.append(y_orig_index[i])
    if y_orig.iloc[i] == 0 and y_pre[i] == 1 :
        FT_index.append(y_orig_index[i])
        
print("False Negtive:")
print(df_process.loc[FN_index])
print("False Positive:")
print(df_process.loc[FT_index])

False Negtive:
Empty DataFrame
Columns: [p1, p2, p3, p4, label]
Index: []
False Positive:
       p1  p2    p3  p4  label
16895   0   0  3261   0      0
18608   0   0  1700   0      0
18362   0   0  2973   0      0
15471   0   0  2069   0      0
18811   0   0   979   0      0
...    ..  ..   ...  ..    ...
16340   0   0   343   0      0
18181   0   0  1832   0      0
19053   0   0   784   0      0
19936   0   0  2756   0      0
19107   0   0  2892   0      0

[143 rows x 5 columns]


In [29]:
#test data
print('---- {} ----'.format('PU Bagging'))
print(print_cm(sklearn.metrics.confusion_matrix(y_test, best_model.predict(x_test_input.iloc[:,:-1])), labels=['negative', 'positive']))
print('')
print('Precision: ', precision_score(y_test, best_model.predict(x_test_input.iloc[:,:-1])))
print('Recall: ', recall_score(y_test, best_model.predict(x_test_input.iloc[:,:-1])))
print('Accuracy: ', accuracy_score(y_test, best_model.predict(x_test_input.iloc[:,:-1])))
print('f1_score: ', f1_score(y_test, best_model.predict(x_test_input.iloc[:,:-1])))

---- PU Bagging ----
                        pred_negative        pred_positive 
           true_negative       1129.0         38.0 
           true_positive          0.0       2008.0 
None

Precision:  0.9814271749755621
Recall:  1.0
Accuracy:  0.9880314960629921
f1_score:  0.9906265416872225


In [30]:
#print wrong predictions
y_test_pre = best_model.predict(x_test_input.iloc[:,:-1])
y_test_index = y_test.index.tolist()

FN_test_index = []
FT_test_index = []

for i in range(len(y_test)):
    if y_test.iloc[i] == 1 and y_test_pre[i] == 0 :
        FN_test_index.append(y_test_index[i])
    if y_test.iloc[i] == 0 and y_test_pre[i] == 1 :
        FT_test_index.append(y_test_index[i])

print("False Negtive:")
print(df_process.loc[FN_test_index])
print("False Positive:")
print(df_process.loc[FT_test_index])

False Negtive:
Empty DataFrame
Columns: [p1, p2, p3, p4, label]
Index: []
False Positive:
       p1  p2    p3  p4  label
17056   0   0   393   0      0
17597   0   0  3039   0      0
15745   0   0   710   0      0
19906   0   0  2171   0      0
18619   0   0  2418   0      0
17474   0   0  2501   0      0
16186   0   0  2852   0      0
18743   0   0   453   0      0
17565   0   0  1706   0      0
19743   0   0  2932   0      0
17660   0   0  2651   0      0
15549   0   0  2352   0      0
19049   0   0   333   0      0
16215   0   0   608   0      0
16787   0   0  1484   0      0
18881   0   0  3060   0      0
16493   0   0   801   0      0
15124   0   0  3150   0      0
16270   0   0  1860   0      0
18748   0   0  1928   0      0
19822   0   0  1583   0      0
15122   0   0  2127   0      0
19123   0   0   318   0      0
17816   0   0  3129   0      0
15645   0   0  1428   0      0
18517   0   0   315   0      0
15630   0   0   279   0      0
10001   0   1     0   0      0
19857   0  

In [31]:
orig_data = df_process.iloc[:,:-1].copy()
orig_label = best_model.predict(orig_data)

print(orig_data)
print(orig_label)

       p1  p2    p3  p4
0       1   0     0   0
1       0   1     1   0
2       1   0     1   0
3       0   0     0   1
4       0   1     2   0
...    ..  ..   ...  ..
19991   0   1  2748   1
19995   1   1  2663   0
19997   1   1   999   1
19998   0   1   999   1
19999   0   1   350   1

[15871 rows x 4 columns]
[1 1 1 ... 0 0 0]


In [33]:
import xgboost as xgb
model = BaggingClassifierPU(xgb.XGBClassifier(),
                         n_estimators = 50, 
                         n_jobs = -1, 
                         max_samples = sum(y)  
                        )
print(model)
model.fit(orig_data, orig_label)

BaggingClassifierPU(base_estimator=XGBClassifier(base_score=None, booster=None,
                                                 colsample_bylevel=None,
                                                 colsample_bynode=None,
                                                 colsample_bytree=None,
                                                 gamma=None, gpu_id=None,
                                                 importance_type='gain',
                                                 interaction_constraints=None,
                                                 learning_rate=None,
                                                 max_delta_step=None,
                                                 max_depth=None,
                                                 min_child_weight=None,
                                                 missing=nan,
                                                 monotone_constraints=None,
                                                 n_estimators=1

BaggingClassifierPU(base_estimator=XGBClassifier(base_score=None, booster=None,
                                                 colsample_bylevel=None,
                                                 colsample_bynode=None,
                                                 colsample_bytree=None,
                                                 gamma=None, gpu_id=None,
                                                 importance_type='gain',
                                                 interaction_constraints=None,
                                                 learning_rate=None,
                                                 max_delta_step=None,
                                                 max_depth=None,
                                                 min_child_weight=None,
                                                 missing=nan,
                                                 monotone_constraints=None,
                                                 n_estimators=1

In [35]:
#verification

import random

place_max = max(np.amax(df_process.iloc[:10000, [2]]))  #w_dependence
name1 = ['min', 'max', 'Precision', 'Recall', 'Accuracy', 'f1_score','time']
test = pd.DataFrame(columns=name1)


for i in range(1, 100):
   
    '''
    flag = int((0.1 * i) * place_max)
    print(flag)
    temp_data = df_process.iloc[(10000 - int(0.1 * place_max)) : (10000 + int(0.1 * place_max)), : -1].copy()
    temp_label = df_process.iloc[(10000 - int(0.1 * place_max)) : (10000 + int(0.1 * place_max)), -1].copy()
    temp_data.iloc[:, 2] = temp_data.iloc[:, 2] + flag + 1
    '''
    pu_start = time.perf_counter()
    
    pos_list = np.random.randint(low=0, high=9999, size=2000).tolist()
    neg_list = np.random.randint(low=10000, high=15000, size=2000).tolist()
    all_list = pos_list + neg_list
    
    flag1 = int((0.1 * (i - 1)) * place_max)
    flag2 = int((0.1 * i) * place_max)
    print(flag2)
    
    temp_data = df_process.iloc[all_list, : -1].copy()
    temp_label = df_process.iloc[all_list, -1].copy()
    
    temp_data.iloc[:, 2] = temp_data.iloc[:, 2] + random.randint((df_max[0] + flag1 + 1), ((df_max[0] + flag2 + 1)))
    
    print('======')
    print(temp_data.iloc[:, :])

    temp_max = max(np.amax(temp_data.iloc[:, [2]]))
    temp_min = min(np.amin(temp_data.iloc[:, [2]]))

    print('---- {} ----'.format(i))
    print('Precision: ', precision_score(temp_label, model.predict(temp_data)))
    print('Recall: ', recall_score(temp_label, model.predict(temp_data)))
    print('Accuracy: ', accuracy_score(temp_label, model.predict(temp_data)))
    print('f1_score: ', f1_score(temp_label, model.predict(temp_data)))
    
    pu_end = time.perf_counter()

    test.loc[i] = [temp_min, temp_max,
                    precision_score(temp_label, model.predict(temp_data)),
                    recall_score(temp_label, model.predict(temp_data)),
                    accuracy_score(temp_label, model.predict(temp_data)),
                    f1_score(temp_label, model.predict(temp_data)),
                    (pu_end - pu_start)]
    
    orig_data = pd.concat([orig_data, temp_data], ignore_index=True)
    orig_label = pd.Series(orig_label.tolist() + model.predict(temp_data).tolist())

    model.fit(orig_data,orig_label)
      
test.to_csv('../result/1_place_unknown_processdata_result.csv')

333
        p1   p2    p3   p4
3770     1    0  4827    0
1696     0    1  4136    0
8368     0    1  6360    0
2951     1    0  4554    0
918      0    0  3875    1
...    ...  ...   ...  ...
12167  599  761  3637  212
11127  264  293  3664  597
10581  892  797  4164  557
14933  636  464  4501  255
13541    9  637  3746  887

[4000 rows x 4 columns]
---- 1 ----
Precision:  0.0
Recall:  0.0
Accuracy:  0.5
f1_score:  0.0
666
        p1   p2    p3   p4
5121     0    0  5419    1
4272     0    0  5136    1
7454     1    0  6198    0
1634     1    0  4258    0
4299     0    0  5145    1
...    ...  ...   ...  ...
12865  515  638  4651  750
12677  470  773  4299  639
11613  354   95  4308  889
11871  655  909  4001  502
10037  970  615  3827  332

[4000 rows x 4 columns]
---- 2 ----
Precision:  0.9975062344139651
Recall:  1.0
Accuracy:  0.99875
f1_score:  0.9987515605493134
999
        p1   p2    p3   p4
2376     0    0  4976    1
6521     1    0  6359    0
1915     0    1  4824    0
7049  

Precision:  0.9970089730807578
Recall:  1.0
Accuracy:  0.9985
f1_score:  0.9985022466300548
6332
        p1   p2     p3   p4
5872     0    1  11436    0
9803     1    0  12746    0
1328     1    0   9921    0
173      1    0   9536    0
2710     0    1  10382    0
...    ...  ...    ...  ...
13700  289  906   9623  368
14135  907  399   9889  170
12674  172  845  10458  682
10947  426  227   9907  813
11184   28  418   9625  343

[4000 rows x 4 columns]
---- 19 ----
Precision:  0.9960159362549801
Recall:  1.0
Accuracy:  0.998
f1_score:  0.998003992015968
6666
        p1   p2     p3   p4
4101     0    0  11145    1
8541     0    0  12625    1
6924     0    0  12086    1
1261     0    1  10200    0
6619     0    1  11986    0
...    ...  ...    ...  ...
12873  477  914  10285  776
10352  327  214  10326  131
13531  898  274   9909  759
10072  733  367  10585  207
11296  907  737  10553  678

[4000 rows x 4 columns]
---- 20 ----
Precision:  0.998003992015968
Recall:  1.0
Accuracy:  0.999


KeyboardInterrupt: 