In [1]:
import tensorflow as tf
import numpy as np
%load_ext autoreload
%autoreload 2
import os
import sys
import time
from tensorflow.keras.layers import Dense, Flatten, Dropout
from tensorflow.keras import Model
import pandas as pd

In [2]:
import matplotlib.pyplot as plt
%matplotlib inline
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'
from sklearn import metrics
import sklearn


In [3]:
class DataHandler():
    def __init__(self,dataset,labels,weights,batch_size = 64,shuffle=False):
        self.dataset = dataset
        self.current = 0
        self.len = len(dataset)
        self.batch_size = batch_size
        self.labels = labels
        self.do_shuffle = shuffle
        self.inds = np.arange(self.len)
        self.weights = weights
        assert self.len>=batch_size
        assert len(labels)==len(dataset)
        if self.do_shuffle:
            self.shuffle()
    def shuffle(self):
        if self.do_shuffle:
            p = np.random.permutation(len(self.dataset))
            self.inds = p
    def next_batch(self,batch_size = None):
        if batch_size is None:
            batch_size = self.batch_size
        current_inds = self.inds[self.current:self.current+batch_size]
        batch = self.dataset[current_inds]
        y_batch = self.labels[current_inds]
        if self.weights is not None:
            w_batch = self.weights[current_inds]
        self.current +=batch_size
        if self.current>=self.len:
            new_inds = self.inds[:batch_size-len(batch)]
#             batch = torch.cat((batch,self.dataset[new_inds]))
            batch = np.concatenate((batch,self.dataset[new_inds]))
#             y_batch = torch.cat((y_batch,self.labels[new_inds]))
            y_batch = np.concatenate((y_batch,self.labels[new_inds]))
            if self.weights is not None:
#                 w_batch =  torch.cat((w_batch,self.weights[new_inds]))
                w_batch =  np.concatenate((w_batch,self.weights[new_inds]))
            self.current=0
            self.shuffle()
        if self.weights is not None:
            return batch,y_batch,w_batch
        return batch,y_batch

In [4]:
class PacketModel(Model):
    def __init__(self):
        super(PacketModel, self).__init__()
        self.flatten = Flatten()
        self.d1 = Dense(2048, activation='relu')
        self.d2 = Dense(2)

    def call(self, x):
        x = self.flatten(x)
        dense1_out = self.d1(x)
        dense2_out = self.d2(dense1_out)
        return dense2_out

In [5]:
def adjust_learning_rate_inv(lr, optimizer, iters, alpha=0.001, beta=0.75):
    lr = lr / pow(1.0 + alpha * iters, beta)
    optimizer.lr.assign(lr)

In [6]:
class Solver():
    def __init__(self,optimizer,net,base_lr):
        self.iters = 0
        self.optimizer = optimizer
        self.net = net
        self.base_lr = base_lr
    def update_lr(self):
        adjust_learning_rate_inv(self.base_lr,self.optimizer,self.iters)

In [7]:
def get_files(day,prefix = '../../../anomaly_datasets/IDS2017/packet_based/'):
    all_files = []
    prefix = prefix+day
    for file in os.listdir(prefix):
        if file.endswith(".npy") and file.startswith('part'):
            all_files.append(os.path.join(prefix, file))
    all_files = sorted(all_files)
    return all_files

In [8]:
def preproces_datasets(src_day,trg_day):
    src_files = [
        src_day,
    ]
    trg_files = [
       trg_day,
    ]
    def get_ds(day):
        all_files = get_files(day)
        x_test = []
        for f in all_files:
            print (f)
            x_test.append(np.load(f))
        x_test = np.concatenate(x_test,axis=0)
#         x_test = (x_test - train_min)/(train_max - train_min+0.000001)
        a = timesteps -  len(x_test) % timesteps
        temp = x_test[:a]
        x_test = np.concatenate((x_test,temp),axis=0)
        x_test = x_test.reshape(-1,timesteps*num_input)
        x_test = x_test.astype(np.float32)
        if day.find('monday')>=0:
            y_test = np.zeros(len(x_test),dtype=np.float32)
        else:
            yt = np.load('../../../anomaly_datasets/IDS2017/packet_based/'+day+'/labels.npy')
            a = timesteps -  len(yt) % timesteps
            temp = yt[:a]
            y_test = np.concatenate((yt,temp),axis=0)
            y_test = y_test.reshape(-1,timesteps)
            y_test = y_test[:,-1]
        return x_test,y_test
    x_src = []
    y_src = []
    for day in src_files:
        t1,t2 = get_ds(day)
        x_src.append(t1)
        y_src.append(t2)
    x_src = np.concatenate(x_src)
    y_src = np.concatenate(y_src)

    x_trg = []
    y_trg = []
    for day in trg_files:
        t1,t2 = get_ds(day)
        x_trg.append(t1)
        y_trg.append(t2)
    x_trg = np.concatenate(x_trg)
    y_trg = np.concatenate(y_trg)
    
    print(x_src.shape,y_src.shape,x_trg.shape,y_trg.shape)
    
    x_all = np.concatenate((x_src,x_trg),axis=0)
#     x_all = x_src
    
    train_min = np.min(x_all,axis=0)
    train_max = np.max(x_all,axis=0)
    # train_min = np.min(x_train,axis=0)
    # train_max = np.max(x_train,axis=0)

    x_src  = (x_src - train_min)/(train_max - train_min + 1e-6)
    x_trg  = (x_trg - train_min)/(train_max - train_min + 1e-6)

    
#     print('mal rate in x_test2',np.sum(y_test2==1)/len(y_test2))
    
    return x_src,y_src,x_trg,y_trg,train_min,train_max

    
    
    
    

In [9]:
@tf.function
def train_step(x, y, optimizer,net):
    with tf.GradientTape() as tape:
        y_pred = net(x, training=True)
        mse_loss = tf.reduce_sum((y - y_pred)**2,axis=1)
        mse_loss = tf.reduce_mean(mse_loss)
    gradients = tape.gradient(mse_loss, net.trainable_variables)
    optimizer.apply_gradients(zip(gradients, net.trainable_variables))
    train_loss(mse_loss)


In [10]:
def train_model2(nb_iters,dhandler,tsolver):
    st = time.time()
    train_loss.reset_states()
    for i in range(nb_iters):
        x_batch,y_batch = dhandler.next_batch()

        if i%5==0:
            tsolver.iters+=1
            tsolver.update_lr()

        train_step(x_batch,y_batch,tsolver.optimizer,tsolver.net)
        if i % 50 == 49 or i == nb_iters - 1:
            remained_iters = nb_iters - i
            passed_time = time.time() - st
            ETA = int(passed_time * remained_iters / i)
            ETA_min, ETA_sec = ETA // 60, ETA % 60
            mean_loss = train_loss.result().numpy()
            print ('\r' + \
                  ' iter: ' + str(i + 1) + '/' + str(nb_iters) + \
                  ' ETA: ' + str(ETA_min) + ':' + "{0:02d}".format(ETA_sec) + \
                  ' loss: ' + "{0:0.4f}".format(mean_loss),end=" ")
            sys.stdout.flush()
    print(' ')



In [11]:
@tf.function
def test_step(x,net):
    return net(x,training=False)

In [12]:
def test(x_ds,y_ds,net,ret=False):
    all_y_pred = np.zeros_like(y_ds)
    all_scores = np.zeros((len(y_ds),2))
    for i in range(0,len(x_ds),batch_size):
        x_batch = x_ds[i:i+batch_size]
        y_batch = y_ds[i:i+batch_size]
        y_pred = test_step(x_batch,net)
        y_pred = y_pred.numpy()
        all_scores[i:i+batch_size] = y_pred
        y_pred = y_pred.argmax(axis=1)
    #             return np.sum(Preds==Labels)/len(Preds)
        all_y_pred[i:i+batch_size] = y_pred
    print('accuracy:',np.sum(all_y_pred==y_ds)/len(y_ds))
    if ret:
        return all_y_pred,all_scores


In [13]:
def calc_scores(x_ds,y_ds,net,draw=True):
    all_y_pred,probs_np = test(x_ds,y_ds,net,ret=True)
    gt_labels = y_ds
    acc = sklearn.metrics.accuracy_score(y_true=gt_labels,y_pred=probs_np.argmax(axis=1))
    print('accuracy:',acc)
    f1 = sklearn.metrics.f1_score(y_true=gt_labels,y_pred=probs_np.argmax(axis=1))
    print('f1 score:',f1)

    precision, recall, thresholds = sklearn.metrics.precision_recall_curve(y_true=gt_labels,probas_pred=probs_np[:,1],pos_label=1)
    auprc = sklearn.metrics.auc(recall,precision)
    print('AUPRC:',auprc)
    
    
    if draw:
        print('balanced accuracy:',sklearn.metrics.balanced_accuracy_score(y_true=gt_labels,y_pred=probs_np.argmax(axis=1)))
        print('precision:',sklearn.metrics.precision_score(y_true=gt_labels,y_pred=probs_np.argmax(axis=1)))
        print('recall:',sklearn.metrics.recall_score(y_true=gt_labels,y_pred=probs_np.argmax(axis=1)))

        
        plt.figure(figsize=(15,10))
        plt.subplot(2,2,1)
        plt.step(recall, precision, color='b', alpha=0.2, where='post')
        plt.fill_between(recall, precision, step='post', alpha=0.2, color='b')
        plt.xlabel('Recall')
        plt.ylabel('Precision')
        plt.ylim([0.0, 1.05])
        plt.xlim([0.0, 1.0])
        
        fpr,tpr,thr = sklearn.metrics.roc_curve(y_true=gt_labels,y_score=probs_np[:,1])
        print('ROC-AUC',sklearn.metrics.auc(fpr,tpr))
        
        plt.subplot(2,2,2)
        plt.step(fpr, tpr, color='b', alpha=0.2, where='post')
        plt.fill_between(fpr, tpr, step='post', alpha=0.2, color='b')
        plt.xlabel('FPR')
        plt.ylabel('TPR')
        plt.ylim([0.0, 1.05])
        plt.xlim([0.0, 1.0])
        return precision,recall,fpr,tpr
    return f1,auprc,acc


def calc_f1_score(x_ds,y_ds):
    all_y_pred,probs_np = test(x_ds,y_ds,net,ret=True)
    f1_score = metrics.f1_score(y_true=y_ds,y_pred=all_y_pred)
    print('f1 score:',f1_score)
    return f1_score


    

# train_model

In [14]:
@tf.function
def train_step_DA(x, y, w, optimizer,net):
    with tf.GradientTape() as tape:
        y_pred = net(x, training=True)
        mse_loss = tf.reduce_sum((y - y_pred)**2,axis=1)*w
        mse_loss = tf.reduce_mean(mse_loss)
    gradients = tape.gradient(mse_loss, net.trainable_variables)
    optimizer.apply_gradients(zip(gradients, net.trainable_variables))
    train_loss(mse_loss)


In [15]:
def train_model(nb_epochs,dhandler,tsolver):
    total_batch = dhandler.len // dhandler.batch_size
    if dhandler.len % dhandler.batch_size != 0:
        total_batch += 1
    st = time.time()
    for ep in range(nb_epochs):
        train_loss.reset_states()
        for i in range(total_batch):
            x_batch,y_batch_t,w_batch = dhandler.next_batch()
            y_batch = np.zeros((dhandler.batch_size,2),dtype=np.float32)
            y_batch[range(dhandler.batch_size),y_batch_t] = 1
#             if i%5==0:
#                 tsolver.iters+=1
#                 tsolver.update_lr()
            train_step_DA(x_batch,y_batch,w_batch,tsolver.optimizer,tsolver.net)
#             train_step(x_batch,y_batch,tsolver.optimizer,tsolver.net)


            # update the network
        passed_time = time.time() - st
        remained_epochs = nb_epochs - ep
        ETA = int(passed_time * remained_epochs)
        ETA_min, ETA_sec = ETA // 60, ETA % 60
        print ('\r' + 'epoch: ' + str(ep + 1) + '/' + str(nb_epochs) + \
                      ' ETA: ' + str(ETA_min) + ':' + "{0:02d}".format(ETA_sec) + \
                      ' loss: ' + "{0:0.4f}".format(train_loss.result().numpy()),end=" ")
        sys.stdout.flush()
    print(' ')



In [16]:
# def recalc_labels(trg_probs_np,trg_gt_class_percentage):
#     n_classes = 2
#     labels_w_class_rates = trg_probs_np.argmax(axis=1)
#     current_class_percentage = np.zeros(n_classes)
#     for i2 in range(n_classes):
#         current_class_percentage[i2] = np.sum(labels_w_class_rates==i2)/len(trg_probs_np)

#     diff_class_rates =  current_class_percentage - trg_gt_class_percentage
#     for i in range(len(diff_class_rates)):
#         if diff_class_rates[i]<=0:
#             continue
#         predicted_as_c = labels_w_class_rates==i
#         current_class = i
#         current_diff = diff_class_rates[i]
#         current_num = np.round(current_diff*len(trg_probs_np)).astype(np.int32)

#         current_probs = trg_probs_np[labels_w_class_rates==current_class]
#         avg_score_sorted = np.sort(current_probs,axis=1)
#         diff_probs = avg_score_sorted[:,-1] - avg_score_sorted[:,-2]
        
#         diff_probs_sorted_inds = np.argsort(diff_probs)
#         y_val = np.ones(len(diff_probs))*current_class
#         for i in range(current_num):
#             y_val[i]=-1
#         weights_trg2 = np.zeros(len(diff_probs))
#         weights_trg2[diff_probs_sorted_inds] = y_val
#         labels_w_class_rates[predicted_as_c] = weights_trg2
    
#     return labels_w_class_rates,trg_probs_np

In [17]:
# trg_probs_np = af.calculate_with_small_batch([my_model.probs],[last_fixed_layer],[global_pool_trg],sess,batch_size=128)
def recalc_labels(trg_probs_np,trg_gt_class_percentage):

#     diff_class_rates =  trg_gt_class_percentage - current_class_percentage
    # print(diff_class_rates*len(trg_probs_np))
#     sorted_ind_class_rates = np.argsort(diff_class_rates)
    
    visited = []
    labels_w_class_rates = trg_probs_np.argmax(axis=1)
    for i in range(n_classes):

        current_class_percentage = np.zeros(n_classes)
        for i2 in range(n_classes):
#             current_class_percentage[i2] = np.sum(trg_probs_np.argmax(axis=1)==i2)/len(trg_probs_np)
            current_class_percentage[i2] = np.sum(labels_w_class_rates==i2)/len(trg_probs_np)

        diff_class_rates =  trg_gt_class_percentage - current_class_percentage
        
#         print(diff_class_rates*len(trg_probs_np))
    #     sorted_ind_class_rates = np.argsort(diff_class_rates)
    #     current_class = sorted_ind_class_rates[i]
        current_class = np.argmin(diff_class_rates)
        current_diff = -1*diff_class_rates[current_class]
        current_num = np.round(current_diff*len(trg_probs_np)).astype(np.int32)
#         print(current_num,current_class)
        visited.append(current_class)
        if current_num <= 0:
            continue
        ### take the samples which are predicted as current_class
#         current_probs = trg_probs_np[trg_probs_np.argmax(axis=1)==current_class]
        current_probs = trg_probs_np[labels_w_class_rates==current_class]
#         print(current_probs.shape)
        current_probs_sorted = np.argsort(current_probs.max(axis=1))
        ### need to change their labels:
        tobe_changed_indx = current_probs_sorted[:current_num]
        tobe_changed_current_probs = current_probs[tobe_changed_indx]
        #### sorted value of probs
        as_probs = np.argsort(tobe_changed_current_probs)

        o2 = as_probs
        cond = np.ones_like(o2).astype(np.bool)
        for e in visited:
        #     o2 = o2[o2!=e]
            cond = cond & (o2!=e)
    #         print(cond.shape)
        o2 = o2[cond]
        if len(o2)==0:
            continue
        o2 = o2.reshape(len(as_probs),-1)
    #     print (o2.shape)
        new_labels = o2[:,-1]
        res = np.ones(len(current_probs))*current_class
        temp = tobe_changed_current_probs[range(len(tobe_changed_current_probs)),current_class]
        tobe_changed_current_probs[range(len(tobe_changed_current_probs)),current_class]=tobe_changed_current_probs[range(len(tobe_changed_current_probs)),new_labels]
        tobe_changed_current_probs[range(len(tobe_changed_current_probs)),new_labels]=temp
#         tobe_changed_current_probs[range(len(tobe_changed_current_probs)),current_class]=0
#         tobe_changed_current_probs[range(len(tobe_changed_current_probs)),new_labels]+=temp

    #     print(res.shape)
        res[tobe_changed_indx] = new_labels
        current_probs[tobe_changed_indx] = tobe_changed_current_probs
#         print(current_probs.shape,res.shape)
        trg_probs_np[labels_w_class_rates==current_class] = current_probs
        labels_w_class_rates[labels_w_class_rates==current_class] = res

    #     print(labels_w_class_rates[50],'rrrrr')
    #     print(res)
    #     print(np.sort(tobe_changed_indx))
    #     if i==80:
    #         break
    return labels_w_class_rates,trg_probs_np


In [18]:
#### for some iterations:
def DA(tsolver,trg_data,trg_gts,src_data,src_gts,trg_gt_class_percentage,st_nnn = 0,mode=1):
#     tsolver.net.eval()
    begin_time = time.time()
    my_coef = 0.05
#     for nnn in range(0,90,2):
    for nnn in range(st_nnn,90,2):
        print('#########################################',nnn)
        inner_loop = 1
        if nnn>=88:
            inner_loop=1
        for jjj in range(inner_loop):
            print ('^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ epoch:',jjj,
               'Elapsed Time(m): {0:0.2f}'.format((time.time()-begin_time)/60))
#             weights_src3 = torch.ones(len(src_gts))
            weights_src3 = np.ones(len(src_gts))

            y_pred_trg,logits_trg = test(trg_data,trg_gts,net,ret=True)
            big_avg_score = logits_trg
            avg_pred_labels = big_avg_score.argmax(axis=1)
            
            if mode==2:
                if nnn<60:
                    #do something
                    for i in range(2):
                        temp = np.sum(avg_pred_labels==i)/len(avg_pred_labels)
                        trg_gt_class_percentage[i] = 0.7*trg_gt_class_percentage[i] + 0.3*temp
    #                 trg_gt_class_percentage[i] = temp
    #             else:
    #             trg_gt_class_percentage_current = 0.9*trg_gt_class_percentage_current + 0.1*trg_gt_class_percentage
                
            print('%%%%%%%',np.sum(np.abs(trg_gt_class_percentage - trg_gt_class_percentage0)))
            
            avg_pred_labels2, big_avg_score2 = recalc_labels(np.copy(big_avg_score),trg_gt_class_percentage)
#             big_avg_score = big_avg_score2
            
            
        
            if nnn<60:
                big_avg_score = big_avg_score2
                avg_pred_labels = avg_pred_labels2

            
            
    #         y_trg = trg_gts.cpu()
#             y_trg = trg_labels.cpu()
            y_trg = trg_gts

            avg_score_sorted = np.sort(big_avg_score,axis=1)
            diff_probs = avg_score_sorted[:,-1] - avg_score_sorted[:,-2]


#             weights_trg3 = np.ones(len(diff_probs))
            
            weights_trg3 = np.zeros(len(diff_probs))
            for c in range(n_classes):
                predicted_as_c = avg_pred_labels==c
                size_c = np.sum(predicted_as_c)
                if size_c>1:
    #                 print(c,size_c,end=' ')
    #                 if nnn<=15:
                    left_size = int(np.ceil(((nnn+1)*0.01+0.1)*size_c))
                    x_val_left = 1+(10/2 - 1)/left_size*(np.arange(left_size))
                    right_size = size_c - left_size
    #                 x_val_right = 5 + (size_c - 5)/right_size*(np.arange(0,right_size))
                    x_val_right = 10000*(np.arange(1,right_size+1))
                    x_val = np.concatenate((x_val_left,x_val_right))
                    y_val = np.power(x_val,-1)
                    y_val = y_val[::-1]
    #                 x_val = np.arange(size_c)*1+1 + my_coef*nnn
    #                 y_val = np.power(x_val,-.99)
    #                 y_val = (y_val - y_val.min())/(y_val.max() - y_val.min())
    #                 y_val = y_val[::-1]

                    diff_c = diff_probs[predicted_as_c]
                    diff_probs_sorted_inds_c = np.argsort(diff_c)
                    weights_trg2 = np.zeros(len(diff_c))
                    weights_trg2[diff_probs_sorted_inds_c] = y_val
                    weights_trg3[predicted_as_c] = weights_trg2

            coef = (avg_pred_labels==avg_pred_labels2)*1
            weights_trg3*=coef
            weights_trg3 = weights_trg3.astype(np.float32)
#             plt.scatter(np.arange(len(weights_trg3)),np.sort(weights_trg3))
#             weights_trg3 = torch.tensor(weights_trg3)


            #     big_cond = (diff_probs>=thr)
#             big_cond = np.zeros(len(big_avg_score))==0
            big_cond = weights_trg3>=0.001
            print('len of big_cond',np.sum(big_cond),'len of trg weights',len(weights_trg3))
#             correct_indices = avg_pred_labels == y_trg.numpy()
            correct_indices = avg_pred_labels == y_trg
            mal_inds = y_trg==1    
            print('len of correcT_indices',np.sum(correct_indices),'correct mals:',np.sum(correct_indices&mal_inds))
            
            

            new_ratio = np.sum(~correct_indices&big_cond)/np.sum(big_cond)
            #         new_ratio = (len(big_cond) - len(correct_indices)) / len(big_cond)
            #         print(new_ratio*100,np.sum(big_cond),thr)
            print(">>>>>>>>>>>>>>>>>>>>>>>>>>{0:0.4f}".format(new_ratio*100),'iter:',jjj,'elapssed time:',time.time() - begin_time)

#             big_cond = np.argwhere(big_cond)
#             big_cond = big_cond.reshape(-1)
#             correct_indices = np.argwhere(correct_indices)
#             correct_indices = correct_indices.reshape(-1)
            
            correct_x_trg = trg_data[big_cond]
            correct_pseudo_labels = avg_pred_labels[big_cond].astype(np.int32)
            weights_trg3 = weights_trg3[big_cond]

        
#             correct_x_trg = trg_data[big_cond&correct_indices]
#             correct_pseudo_labels = avg_pred_labels[big_cond&correct_indices].astype(np.int32)
#             weights_trg3 = weights_trg3[big_cond&correct_indices]

#             correct_pseudo_labels = torch.tensor(correct_pseudo_labels,dtype=torch.long)

            p = np.random.permutation(len(src_data))
            p = p[:len(trg_data)]
            x_temp = src_data[p]
            y_temp = src_gts[p]
            w_temp = weights_src3[:len(trg_data)]

            m1 = np.concatenate((x_temp,correct_x_trg))
            m2 = np.concatenate((y_temp,correct_pseudo_labels)).astype(np.int32)
            m3 = np.concatenate((w_temp,weights_trg3)).astype(np.float32)
            
                                        
            if nnn<60:
                pos_inds = m2==1
                x_train_pos = m1[pos_inds]
                y_train_pos = m2[pos_inds]
                w_train_pos = m3[pos_inds]
                x_train_neg = m1[~pos_inds]
                y_train_neg = m2[~pos_inds]
                w_train_neg = m3[~pos_inds]
                p = np.random.permutation(len(x_train_neg))
                x_train_neg = x_train_neg[p]
                y_train_neg = y_train_neg[p]
                w_train_neg = w_train_neg[p]
                pos_len = len(x_train_pos)

                correct_dhandler = DataHandler(np.concatenate((x_train_pos,x_train_neg[:pos_len]))
                                                   ,np.concatenate((y_train_pos,y_train_neg[:pos_len]))
                                              ,np.concatenate((w_train_pos,w_train_neg[:pos_len])),batch_size=256,shuffle=True)

            else:
                correct_dhandler = DataHandler(m1, m2, m3, 256,shuffle=True)                


            
            
            print(m1.shape,m2.shape,m3.shape)

            print('------------',correct_dhandler.len)
    #         correct_dhandler_src = DataHandler(x_temp, y_temp, w_temp, 32,shuffle=True)
    #         correct_dhandler_trg = DataHandler(trg_data, correct_pseudo_labels, weights_trg3, 32,shuffle=True)
    #         ep = 2 if nnn == 0 else 3
            ep = 1
            train_model(ep,correct_dhandler,tsolver)

            if nnn%8==0:
    #             train_solver.net.module.set_bn_domain(train_solver.bn_domain_map[train_solver.target_name])
#                 with torch.no_grad():
                print('current accuracy on target domain: ',end=' ')
                test(trg_data,trg_gts,net)
#                 print('current accuracy on target domain: ',current_acc)
#             else:
#                 net.eval()


In [19]:
timesteps = 20
num_input = 29
label_names = ['Benign','FTP-Patator','SSH-Patator','Slowloris','Slowhttptest','Hulk','GoldenEye','Heartbleed', 
               'Web-Attack', 'Infiltration','Botnet','PortScan','DDoS']

In [20]:
days = ['tuesday','wednesday','thursday']

In [21]:
# onlysrc_res = {}
# pppl_res = {}
# for src_day in days:
#     for trg_day in days:
#         if src_day==trg_day:
#             continue
#         print('**************************************************',src_day,trg_day)
#         x_src,y_src,x_trg,y_trg,train_min,train_max = preproces_datasets(src_day,trg_day)
#         real_labels = y_src!=-1
#         x_src = x_src[real_labels]
#         y_src = y_src[real_labels]

#         real_labels = y_trg!=-1
#         x_trg = x_trg[real_labels]
#         y_trg = y_trg[real_labels]

#         print(x_src.shape,y_src.shape,x_trg.shape,y_trg.shape)
        
#         y_src[y_src!=0] = 1
#         y_trg[y_trg!=0] = 1
        
#         print('mal rate in src',np.sum(y_src==1)/len(y_src))
#         print('mal rate in trg',np.sum(y_trg==1)/len(y_trg))
        
#         y_src_onehot = np.zeros((len(y_src),2),np.float32)
#         y_src_onehot[range(len(y_src)),y_src.astype(np.int32)] = 1.

#         y_trg_onehot = np.zeros((len(y_trg),2),np.float32)
#         y_trg_onehot[range(len(y_trg)),y_trg.astype(np.int32)] = 1.
        
#         input_size = x_src.shape[1]
#         net = PacketModel()
#         net._set_inputs(tf.TensorSpec([None,timesteps*num_input]))
        
        
#         base_lr = 0.0001
#         optimizer = tf.keras.optimizers.Adam(learning_rate=base_lr)
#         grads = []
#         for v in net.trainable_variables:
#             grads.append(np.zeros(v.shape))
#         optimizer.apply_gradients(zip(grads,net.trainable_variables))

#         tsolver = Solver(optimizer,net,base_lr)
#         train_loss = tf.keras.metrics.Mean(name='train_loss')
        
        
#         nb_epochs = 6
#         batch_size = 256
#         total_batch = len(x_src)//batch_size
#         if len(x_src) % batch_size!=0:
#             total_batch+=1
#         RANDOM_SEED = 2020
#         rng = np.random.RandomState(RANDOM_SEED)
        
        
#         for i in range(nb_epochs):
#             pos_inds = y_src==1
#             x_src_pos = x_src[pos_inds]
#             y_src_pos = y_src_onehot[pos_inds]
#             x_src_neg = x_src[~pos_inds]
#             y_src_neg = y_src_onehot[~pos_inds]
#             p = np.random.permutation(len(x_src_neg))
#             x_src_neg = x_src_neg[p]
#             y_src_neg = y_src_neg[p]
#             pos_len = len(x_src_pos)

#             src_dhandler = DataHandler(np.concatenate((x_src_pos,x_src_neg[:pos_len]))
#                                                ,np.concatenate((y_src_pos,y_src_neg[:pos_len])),None,batch_size=256,shuffle=True)

#             train_model2(total_batch,src_dhandler,tsolver)
            
#         name = src_day[:2]+'_'+trg_day[:2]
#         net.save('../../../saved_models/IDS/'+name+'_only_src')
        
#         x_trg = x_trg[::2]
#         y_trg = y_trg[::2]
        
        
        
        
#         f1,auprc,acc = calc_scores(x_trg,y_trg,net,draw=False)
#         onlysrc_res[name] = [('f1',f1),('auprc',auprc),('acc',acc)]
        
#         y_pred_trg,scores_trg = test(x_trg,y_trg,net,ret=True)
#         for fpr in [0.001,0.01,0.1]:
#             scores = scores_trg[:,1]
#         #     fpr = 0.01
#             benign_scores = scores[y_trg==0]
#             benign_scores_sorted = np.sort(benign_scores)
#             thr_ind = int(np.ceil(len(benign_scores_sorted)*fpr))
#             thr = benign_scores_sorted[-thr_ind]
#             print (thr)
#             mal_scores = scores[y_trg==1]
#             tpr = np.sum(mal_scores>thr)/len(mal_scores)
#             print('TPR:',tpr)
#             onlysrc_res[name].append((fpr,tpr))
            
            
        
        
#         ########################### DA
#         base_lr = 0.0001
#         # optimizer = torch.optim.Adam(net.parameters(),lr=base_lr)
#         optimizer = tf.keras.optimizers.Adam(lr=base_lr)
#         grads = []
#         for v in net.trainable_variables:
#             grads.append(np.zeros(v.shape))
#         optimizer.apply_gradients(zip(grads,net.trainable_variables))

#         tsolver = Solver(optimizer,net,base_lr)

#         # trg_preds = get_logits(x_target,y_target,tsolver)
#         trg_gts = y_trg
#         trg_data = x_trg
#         src_gts = y_src
#         src_data = x_src
#         t_labels = np.array(trg_gts)
#         l_trg = t_labels
#         trg_size = len(trg_data)
#         n_classes = 2
#         trg_gt_class_percentage = np.zeros(n_classes)
#         for i in range(n_classes):
#             trg_gt_class_percentage[i] = np.sum(t_labels==i)/len(t_labels)
        
#         DA(tsolver,trg_data,trg_gts,src_data,src_gts,trg_gt_class_percentage=trg_gt_class_percentage)
        
#         net.save('../../saved_models/IDS/'+name+'_pppl')
        
#         f1,auprc,acc = calc_scores(x_trg,y_trg,net,draw=False)
#         pppl_res[name] = [('f1',f1),('auprc',auprc),('acc',acc)]
        
#         y_pred_trg,scores_trg = test(x_trg,y_trg,net,ret=True)
#         for fpr in [0.001,0.01,0.1]:
#             scores = scores_trg[:,1]
#         #     fpr = 0.01
#             benign_scores = scores[y_trg==0]
#             benign_scores_sorted = np.sort(benign_scores)
#             thr_ind = int(np.ceil(len(benign_scores_sorted)*fpr))
#             thr = benign_scores_sorted[-thr_ind]
#             print (thr)
#             mal_scores = scores[y_trg==1]
#             tpr = np.sum(mal_scores>thr)/len(mal_scores)
#             print('TPR:',tpr)
#             pppl_res[name].append((fpr,tpr))
#         print('--------------------------------------------------------------------------------------------')
#         print('--------------------------------------------------------------------------------------------')
#         print(onlysrc_res)
#         print('--------------------------------------------------------------------------------------------')
#         print('--------------------------------------------------------------------------------------------')
#         print(pppl_res)
#         print('--------------------------------------------------------------------------------------------')
#         print('--------------------------------------------------------------------------------------------')
        
        

        

In [22]:
# print('name','f1_only_src','f1_pppl','fpr0.01_only_src','fpr0.01_pppl')
# for src_day in days:
#     for trg_day in days:
#         if src_day == trg_day:
#             continue
#         name = src_day[:2]+'_'+trg_day[:2]
#         print(name,onlysrc_res[name][0][1],pppl_res[name][0][1],onlysrc_res[name][4][1],pppl_res[name][4][1])

In [23]:
import pickle

In [24]:
# with open('onlysrc_res','wb') as f:
#     pickle.dump(onlysrc_res,f)
    
# with open('pppl_res','wb') as f:
#     pickle.dump(pppl_res,f)

In [25]:
# def print_res(ht=None):
#     for src_day in days:
#         for trg_day in days:
#             if src_day == trg_day:
#                 continue
#             name = src_day[:2]+'_'+trg_day[:2]
#             if ht is None:
#                 print(name,end=' ')
#             else:
# #                 temp = ht[name][0][1] #f1 score
#                 temp = ht[name][4][1] #f1 score
#                 print(temp,end=' ')
#     print()
# print_res()
# print_res(onlysrc_res)
# print_res(pppl_res)

# Params like other DS

In [26]:
days = ['tuesday','wednesday','thursday']

In [27]:
# onlysrc_res = {}
# pppl_res = {}
# for src_day in days:
#     for trg_day in days:
# # for src_day in ['tuesday']:
# #     for trg_day in ['thursday']:
#         if src_day==trg_day:
#             continue
#         print('**************************************************',src_day,trg_day)
#         x_src,y_src,x_trg,y_trg,train_min,train_max = preproces_datasets(src_day,trg_day)
#         real_labels = y_src!=-1
#         x_src = x_src[real_labels]
#         y_src = y_src[real_labels]

#         real_labels = y_trg!=-1
#         x_trg = x_trg[real_labels]
#         y_trg = y_trg[real_labels]

#         print(x_src.shape,y_src.shape,x_trg.shape,y_trg.shape)
        
#         y_src[y_src!=0] = 1
#         y_trg[y_trg!=0] = 1
        
#         x_src = x_src[::2]
#         y_src = y_src[::2]
        
#         x_trg = x_trg[::2]
#         y_trg = y_trg[::2]
        
#         print('mal rate in src',np.sum(y_src==1)/len(y_src))
#         print('mal rate in trg',np.sum(y_trg==1)/len(y_trg))
        
#         y_src_onehot = np.zeros((len(y_src),2),np.float32)
#         y_src_onehot[range(len(y_src)),y_src.astype(np.int32)] = 1.

#         y_trg_onehot = np.zeros((len(y_trg),2),np.float32)
#         y_trg_onehot[range(len(y_trg)),y_trg.astype(np.int32)] = 1.
        
#         input_size = x_src.shape[1]
#         net = PacketModel()
#         net._set_inputs(tf.TensorSpec([None,timesteps*num_input]))
        
        
#         base_lr = 0.0001
#         optimizer = tf.keras.optimizers.Adam(learning_rate=base_lr)
#         grads = []
#         for v in net.trainable_variables:
#             grads.append(np.zeros(v.shape))
#         optimizer.apply_gradients(zip(grads,net.trainable_variables))

#         tsolver = Solver(optimizer,net,base_lr)
#         train_loss = tf.keras.metrics.Mean(name='train_loss')
        
        
#         nb_epochs = 6
#         batch_size = 256
#         total_batch = len(x_src)//batch_size
#         if len(x_src) % batch_size!=0:
#             total_batch+=1
#         RANDOM_SEED = 2020
#         rng = np.random.RandomState(RANDOM_SEED)
# #         src_dhandler = DataHandler(x_src,y_src_onehot,None,batch_size=256,shuffle=True)
        
#         for i in range(nb_epochs):
#             pos_inds = y_src==1
#             x_src_pos = x_src[pos_inds]
#             y_src_pos = y_src_onehot[pos_inds]
#             x_src_neg = x_src[~pos_inds]
#             y_src_neg = y_src_onehot[~pos_inds]
#             p = np.random.permutation(len(x_src_neg))
#             x_src_neg = x_src_neg[p]
#             y_src_neg = y_src_neg[p]
#             pos_len = len(x_src_pos)

#             src_dhandler = DataHandler(np.concatenate((x_src_pos,x_src_neg[:pos_len]))
#                                                ,np.concatenate((y_src_pos,y_src_neg[:pos_len])),None,batch_size=256,shuffle=True)

#             train_model2(total_batch,src_dhandler,tsolver)
            
#         name = src_day[:2]+'_'+trg_day[:2]
#         net.save('../../saved_models/IDS/'+name+'_only_src3')
        

        
        
        
#         f1,auprc,acc = calc_scores(x_trg,y_trg,net,draw=False)
#         onlysrc_res[name] = [('f1',f1),('auprc',auprc),('acc',acc)]
        
#         y_pred_trg,scores_trg = test(x_trg,y_trg,net,ret=True)
#         for fpr in [0.001,0.01,0.1]:
#             scores = scores_trg[:,1]
#         #     fpr = 0.01
#             benign_scores = scores[y_trg==0]
#             benign_scores_sorted = np.sort(benign_scores)
#             thr_ind = int(np.ceil(len(benign_scores_sorted)*fpr))
#             thr = benign_scores_sorted[-thr_ind]
#             print (thr)
#             mal_scores = scores[y_trg==1]
#             tpr = np.sum(mal_scores>thr)/len(mal_scores)
#             print('TPR:',tpr)
#             onlysrc_res[name].append((fpr,tpr))
            
            
        
        
#         ########################### DA
#         base_lr = 0.0001
#         # optimizer = torch.optim.Adam(net.parameters(),lr=base_lr)
#         optimizer = tf.keras.optimizers.Adam(lr=base_lr)
#         grads = []
#         for v in net.trainable_variables:
#             grads.append(np.zeros(v.shape))
#         optimizer.apply_gradients(zip(grads,net.trainable_variables))

#         tsolver = Solver(optimizer,net,base_lr)

#         # trg_preds = get_logits(x_target,y_target,tsolver)
#         trg_gts = y_trg
#         trg_data = x_trg
#         src_gts = y_src
#         src_data = x_src
#         t_labels = np.array(trg_gts)
#         l_trg = t_labels
#         trg_size = len(trg_data)
#         n_classes = 2
#         trg_gt_class_percentage = np.zeros(n_classes)
#         for i in range(n_classes):
#             trg_gt_class_percentage[i] = np.sum(t_labels==i)/len(t_labels)
        
#         DA(tsolver,trg_data,trg_gts,src_data,src_gts,trg_gt_class_percentage=trg_gt_class_percentage)
        
#         net.save('../../saved_models/IDS/'+name+'_pppl3')
        
#         f1,auprc,acc = calc_scores(x_trg,y_trg,net,draw=False)
#         pppl_res[name] = [('f1',f1),('auprc',auprc),('acc',acc)]
        
#         y_pred_trg,scores_trg = test(x_trg,y_trg,net,ret=True)
#         for fpr in [0.001,0.01,0.1]:
#             scores = scores_trg[:,1]
#         #     fpr = 0.01
#             benign_scores = scores[y_trg==0]
#             benign_scores_sorted = np.sort(benign_scores)
#             thr_ind = int(np.ceil(len(benign_scores_sorted)*fpr))
#             thr = benign_scores_sorted[-thr_ind]
#             print (thr)
#             mal_scores = scores[y_trg==1]
#             tpr = np.sum(mal_scores>thr)/len(mal_scores)
#             print('TPR:',tpr)
#             pppl_res[name].append((fpr,tpr))
#         print('--------------------------------------------------------------------------------------------')
#         print('--------------------------------------------------------------------------------------------')
#         print(onlysrc_res)
#         print('--------------------------------------------------------------------------------------------')
#         print('--------------------------------------------------------------------------------------------')
#         print(pppl_res)
#         print('--------------------------------------------------------------------------------------------')
#         print('--------------------------------------------------------------------------------------------')
        
        

        

In [28]:
onlysrc_res = {}
pppl_res = {}
# for src_day in days:
#     for trg_day in days:
for src_day in ['thursday']:
    for trg_day in ['tuesday']:
        if src_day==trg_day:
            continue
        print('**************************************************',src_day,trg_day)
        x_src,y_src,x_trg,y_trg,train_min,train_max = preproces_datasets(src_day,trg_day)


        print(x_src.shape,y_src.shape,x_trg.shape,y_trg.shape)
        
        y_src[y_src!=0] = 1
        y_trg[y_trg!=0] = 1
        
        print('mal rate in src',np.sum(y_src==1)/len(y_src))
        print('mal rate in trg',np.sum(y_trg==1)/len(y_trg))
        
        y_src_onehot = np.zeros((len(y_src),2),np.float32)
        y_src_onehot[range(len(y_src)),y_src.astype(np.int32)] = 1.

        y_trg_onehot = np.zeros((len(y_trg),2),np.float32)
        y_trg_onehot[range(len(y_trg)),y_trg.astype(np.int32)] = 1.
        
        input_size = x_src.shape[1]
        net = PacketModel()
        net._set_inputs(tf.TensorSpec([None,timesteps*num_input]))
        
        
        base_lr = 0.0001
        optimizer = tf.keras.optimizers.Adam(learning_rate=base_lr)
        grads = []
        for v in net.trainable_variables:
            grads.append(np.zeros(v.shape))
        optimizer.apply_gradients(zip(grads,net.trainable_variables))

        tsolver = Solver(optimizer,net,base_lr)
        train_loss = tf.keras.metrics.Mean(name='train_loss')
        
        
        nb_epochs = 6
        batch_size = 256
        total_batch = len(x_src)//batch_size
        if len(x_src) % batch_size!=0:
            total_batch+=1
        RANDOM_SEED = 2020
        rng = np.random.RandomState(RANDOM_SEED)
#         src_dhandler = DataHandler(x_src,y_src_onehot,None,batch_size=256,shuffle=True)
        
        for i in range(nb_epochs):
            pos_inds = y_src==1
            x_src_pos = x_src[pos_inds]
            y_src_pos = y_src_onehot[pos_inds]
            x_src_neg = x_src[~pos_inds]
            y_src_neg = y_src_onehot[~pos_inds]
            p = np.random.permutation(len(x_src_neg))
            x_src_neg = x_src_neg[p]
            y_src_neg = y_src_neg[p]
            pos_len = len(x_src_pos)

            src_dhandler = DataHandler(np.concatenate((x_src_pos,x_src_neg[:pos_len]))
                                               ,np.concatenate((y_src_pos,y_src_neg[:pos_len])),None,batch_size=256,shuffle=True)

            train_model2(total_batch,src_dhandler,tsolver)
            
        name = src_day[:2]+'_'+trg_day[:2]
#         net.save('../../saved_models/IDS/'+name+'_only_src4')
        
        x_trg = x_trg[::2]
        y_trg = y_trg[::2]
        
        
        
        
        f1,auprc,acc = calc_scores(x_trg,y_trg,net,draw=False)
        onlysrc_res[name] = [('f1',f1),('auprc',auprc),('acc',acc)]
        
        y_pred_trg,scores_trg = test(x_trg,y_trg,net,ret=True)
        for fpr in [0.001,0.01,0.1]:
            scores = scores_trg[:,1]
        #     fpr = 0.01
            benign_scores = scores[y_trg==0]
            benign_scores_sorted = np.sort(benign_scores)
            thr_ind = int(np.ceil(len(benign_scores_sorted)*fpr))
            thr = benign_scores_sorted[-thr_ind]
            print (thr)
            mal_scores = scores[y_trg==1]
            tpr = np.sum(mal_scores>thr)/len(mal_scores)
            print('TPR:',tpr)
            onlysrc_res[name].append((fpr,tpr))
            
            
        
        
        ########################### DA
        base_lr = 0.0001
        # optimizer = torch.optim.Adam(net.parameters(),lr=base_lr)
        optimizer = tf.keras.optimizers.Adam(lr=base_lr)
        grads = []
        for v in net.trainable_variables:
            grads.append(np.zeros(v.shape))
        optimizer.apply_gradients(zip(grads,net.trainable_variables))

        tsolver = Solver(optimizer,net,base_lr)

        # trg_preds = get_logits(x_target,y_target,tsolver)
        trg_gts = y_trg
        trg_data = x_trg
        src_gts = y_src
        src_data = x_src
        t_labels = np.array(trg_gts)
        l_trg = t_labels
        
        s_labels = np.array(src_gts)
        l_src = s_labels
        trg_size = len(trg_data)
        n_classes = 2
        
        trg_gt_class_percentage0 = np.zeros(n_classes)
        for i in range(n_classes):
            trg_gt_class_percentage0[i] = np.sum(t_labels==i)/len(t_labels)
        
        trg_gt_class_percentage = np.zeros(n_classes)
        for i in range(n_classes):
            trg_gt_class_percentage[i] = np.sum(t_labels==i)/len(t_labels)
#             trg_gt_class_percentage[i] = np.sum(s_labels==i)/len(s_labels)
        
        DA(tsolver,trg_data,trg_gts,src_data,src_gts,trg_gt_class_percentage=trg_gt_class_percentage)
        
#         net.save('../../saved_models/IDS/'+name+'_pppl4')
        
        f1,auprc,acc = calc_scores(x_trg,y_trg,net,draw=False)
        pppl_res[name] = [('f1',f1),('auprc',auprc),('acc',acc)]
        
        y_pred_trg,scores_trg = test(x_trg,y_trg,net,ret=True)
        for fpr in [0.001,0.01,0.1]:
            scores = scores_trg[:,1]
        #     fpr = 0.01
            benign_scores = scores[y_trg==0]
            benign_scores_sorted = np.sort(benign_scores)
            thr_ind = int(np.ceil(len(benign_scores_sorted)*fpr))
            thr = benign_scores_sorted[-thr_ind]
            print (thr)
            mal_scores = scores[y_trg==1]
            tpr = np.sum(mal_scores>thr)/len(mal_scores)
            print('TPR:',tpr)
            pppl_res[name].append((fpr,tpr))
        print('--------------------------------------------------------------------------------------------')
        print('--------------------------------------------------------------------------------------------')
        print(onlysrc_res)
        print('--------------------------------------------------------------------------------------------')
        print('--------------------------------------------------------------------------------------------')
        print(pppl_res)
        print('--------------------------------------------------------------------------------------------')
        print('--------------------------------------------------------------------------------------------')
        
        

        

In [84]:
onlysrc_res = {}
pppl_res = {}
# for src_day in days:
#     for trg_day in days:
for src_day in ['wednesday']:
    for trg_day in ['tuesday']:
        if src_day==trg_day:
            continue
        print('**************************************************',src_day,trg_day)
        x_src,y_src,x_trg,y_trg,train_min,train_max = preproces_datasets(src_day,trg_day)
        real_labels = y_src!=-1
        x_src = x_src[real_labels]
        y_src = y_src[real_labels]

        real_labels = y_trg!=-1
        x_trg = x_trg[real_labels]
        y_trg = y_trg[real_labels]

        print(x_src.shape,y_src.shape,x_trg.shape,y_trg.shape)
        
        y_src[y_src!=0] = 1
        y_trg[y_trg!=0] = 1
        
        print('mal rate in src',np.sum(y_src==1)/len(y_src))
        print('mal rate in trg',np.sum(y_trg==1)/len(y_trg))
        
        y_src_onehot = np.zeros((len(y_src),2),np.float32)
        y_src_onehot[range(len(y_src)),y_src.astype(np.int32)] = 1.

        y_trg_onehot = np.zeros((len(y_trg),2),np.float32)
        y_trg_onehot[range(len(y_trg)),y_trg.astype(np.int32)] = 1.
        
        input_size = x_src.shape[1]
        net = PacketModel()
        net._set_inputs(tf.TensorSpec([None,timesteps*num_input]))
        
        
        base_lr = 0.0001
        optimizer = tf.keras.optimizers.Adam(learning_rate=base_lr)
        grads = []
        for v in net.trainable_variables:
            grads.append(np.zeros(v.shape))
        optimizer.apply_gradients(zip(grads,net.trainable_variables))

        tsolver = Solver(optimizer,net,base_lr)
        train_loss = tf.keras.metrics.Mean(name='train_loss')
        
        
        nb_epochs = 6
        batch_size = 256
        total_batch = len(x_src)//batch_size
        if len(x_src) % batch_size!=0:
            total_batch+=1
        RANDOM_SEED = 2020
        rng = np.random.RandomState(RANDOM_SEED)
#         src_dhandler = DataHandler(x_src,y_src_onehot,None,batch_size=256,shuffle=True)
        
        for i in range(nb_epochs):
            pos_inds = y_src==1
            x_src_pos = x_src[pos_inds]
            y_src_pos = y_src_onehot[pos_inds]
            x_src_neg = x_src[~pos_inds]
            y_src_neg = y_src_onehot[~pos_inds]
            p = np.random.permutation(len(x_src_neg))
            x_src_neg = x_src_neg[p]
            y_src_neg = y_src_neg[p]
            pos_len = len(x_src_pos)

            src_dhandler = DataHandler(np.concatenate((x_src_pos,x_src_neg[:pos_len]))
                                               ,np.concatenate((y_src_pos,y_src_neg[:pos_len])),None,batch_size=256,shuffle=True)

            train_model2(total_batch,src_dhandler,tsolver)
            
        name = src_day[:2]+'_'+trg_day[:2]
        net.save('../../saved_models/IDS/'+name+'_only_src4')
        
        x_trg = x_trg[::2]
        y_trg = y_trg[::2]
        
        net = tf.keras.models.load_model('../../saved_models/IDS/'+name+'_only_src4')
        
        
        
        
        f1,auprc,acc = calc_scores(x_trg,y_trg,net,draw=False)
        onlysrc_res[name] = [('f1',f1),('auprc',auprc),('acc',acc)]
        
        y_pred_trg,scores_trg = test(x_trg,y_trg,net,ret=True)
        for fpr in [0.001,0.01,0.1]:
            scores = scores_trg[:,1]
        #     fpr = 0.01
            benign_scores = scores[y_trg==0]
            benign_scores_sorted = np.sort(benign_scores)
            thr_ind = int(np.ceil(len(benign_scores_sorted)*fpr))
            thr = benign_scores_sorted[-thr_ind]
            print (thr)
            mal_scores = scores[y_trg==1]
            tpr = np.sum(mal_scores>thr)/len(mal_scores)
            print('TPR:',tpr)
            onlysrc_res[name].append((fpr,tpr))
            
            
        
        
        ########################### DA
        base_lr = 0.0001
        # optimizer = torch.optim.Adam(net.parameters(),lr=base_lr)
        optimizer = tf.keras.optimizers.Adam(lr=base_lr)
        grads = []
        for v in net.trainable_variables:
            grads.append(np.zeros(v.shape))
        optimizer.apply_gradients(zip(grads,net.trainable_variables))

        tsolver = Solver(optimizer,net,base_lr)

        # trg_preds = get_logits(x_target,y_target,tsolver)
        trg_gts = y_trg
        trg_data = x_trg
        src_gts = y_src
        src_data = x_src
        t_labels = np.array(trg_gts)
        l_trg = t_labels
        
        s_labels = np.array(src_gts)
        l_src = s_labels
        trg_size = len(trg_data)
        n_classes = 2
        
        trg_gt_class_percentage0 = np.zeros(n_classes)
        for i in range(n_classes):
            trg_gt_class_percentage0[i] = np.sum(t_labels==i)/len(t_labels)
        
        trg_gt_class_percentage = np.zeros(n_classes)
        for i in range(n_classes):
#             trg_gt_class_percentage[i] = np.sum(t_labels==i)/len(t_labels)
            trg_gt_class_percentage[i] = np.sum(s_labels==i)/len(s_labels)
        
        DA(tsolver,trg_data,trg_gts,src_data,src_gts,trg_gt_class_percentage=trg_gt_class_percentage,mode=1)
        
#         logits_target1 = get_logits(x_target,y_target,tsolver)
#         yhat_target1 = logits_target1.argmax(axis=1)
        
        yhat_target1,logits_target1 = test(x_trg,y_trg,net,ret=True)
                                         
        
#         net.save('../../saved_models/IDS/'+name+'_pppl4')
        print('-----___________________________________________===== MODE 1')
        f1_m1,auprc_m1, acc_m1 = calc_scores(x_trg,y_trg,net,draw=False)
#         pppl_res[name] = [('f1',f1),('auprc',auprc),('acc',acc)]
        
        y_pred_trg,scores_trg = test(x_trg,y_trg,net,ret=True)
        for fpr in [0.001,0.01,0.1]:
            scores = scores_trg[:,1]
        #     fpr = 0.01
            benign_scores = scores[y_trg==0]
            benign_scores_sorted = np.sort(benign_scores)
            thr_ind = int(np.ceil(len(benign_scores_sorted)*fpr))
            thr = benign_scores_sorted[-thr_ind]
            print (thr)
            mal_scores = scores[y_trg==1]
            tpr = np.sum(mal_scores>thr)/len(mal_scores)
            print('TPR:',tpr)
#             pppl_res[name].append((fpr,tpr))
            
            
        net = tf.keras.models.load_model('../../saved_models/IDS/'+name+'_only_src4')
        
        
        base_lr = 0.0001
        # optimizer = torch.optim.Adam(net.parameters(),lr=base_lr)
        optimizer = tf.keras.optimizers.Adam(lr=base_lr)
        grads = []
        for v in net.trainable_variables:
            grads.append(np.zeros(v.shape))
        optimizer.apply_gradients(zip(grads,net.trainable_variables))

        tsolver = Solver(optimizer,net,base_lr)

        DA(tsolver,trg_data,trg_gts,src_data,src_gts,trg_gt_class_percentage=trg_gt_class_percentage,mode=2)
        
#         logits_target1 = get_logits(x_target,y_target,tsolver)
#         yhat_target1 = logits_target1.argmax(axis=1)
        
        yhat_target2,logits_target2 = test(x_trg,y_trg,net,ret=True)
                                         
        
#         net.save('../../saved_models/IDS/'+name+'_pppl4')
        print('-----___________________________________________===== MODE 2')
        f1_m2,auprc_m2,acc_m2 = calc_scores(x_trg,y_trg,net,draw=False)
#         pppl_res[name] = [('f1',f1),('auprc',auprc),('acc',acc)]
        
        y_pred_trg,scores_trg = test(x_trg,y_trg,net,ret=True)
        for fpr in [0.001,0.01,0.1]:
            scores = scores_trg[:,1]
        #     fpr = 0.01
            benign_scores = scores[y_trg==0]
            benign_scores_sorted = np.sort(benign_scores)
            thr_ind = int(np.ceil(len(benign_scores_sorted)*fpr))
            thr = benign_scores_sorted[-thr_ind]
            print (thr)
            mal_scores = scores[y_trg==1]
            tpr = np.sum(mal_scores>thr)/len(mal_scores)
            print('TPR:',tpr)
#             pppl_res[name].append((fpr,tpr))
            
        
        
        print('--------------------------------------------------------------------------------------------')
        print('--------------------------------------------------------------------------------------------')
        def model_selection(x_trg,yhat_target1,x_src,y_src,y_src_onehot):
            net1 = PacketModel()
            net1._set_inputs(tf.TensorSpec([None,timesteps*num_input]))

            base_lr = 0.0001
            optimizer = tf.keras.optimizers.Adam(learning_rate=base_lr)
            grads = []
            for v in net1.trainable_variables:
                grads.append(np.zeros(v.shape))
            optimizer.apply_gradients(zip(grads,net1.trainable_variables))

            tsolver1 = Solver(optimizer,net1,base_lr)

            yhat_target1_onehot = np.zeros((len(yhat_target1),2),np.float32)
            yhat_target1_onehot[range(len(yhat_target1)),yhat_target1.astype(np.int32)] = 1.

            dhandler = DataHandler(x_trg,yhat_target1_onehot,None,batch_size=256,shuffle=True)


            nb_epochs = 6
            batch_size = 256
            total_batch = len(x_trg)//batch_size
            if len(x_trg) % batch_size!=0:
                total_batch+=1

            for i in range(nb_epochs):
                pos_inds = yhat_target1==1
                x_src_pos = x_trg[pos_inds]
                y_src_pos = yhat_target1_onehot[pos_inds]
                x_src_neg = x_trg[~pos_inds]
                y_src_neg = yhat_target1_onehot[~pos_inds]
                p = np.random.permutation(len(x_src_neg))
                x_src_neg = x_src_neg[p]
                y_src_neg = y_src_neg[p]
                pos_len = len(x_src_pos)

                src_dhandler = DataHandler(np.concatenate((x_src_pos,x_src_neg[:pos_len]))
                                                   ,np.concatenate((y_src_pos,y_src_neg[:pos_len])),None,batch_size=256,shuffle=True)

                train_model2(total_batch,src_dhandler,tsolver1)

    #         for i in range(5):
    #             train_model2(100,dhandler,tsolver1)

            _,logits_train_1 = test(x_src,y_src,net1,ret=True)
            err1 = np.sum(np.abs(logits_train_1 - y_src_onehot)**2,axis=1)
            err1 = np.mean(err1)
            return err1
        err1 = model_selection(x_trg,yhat_target1,x_src,y_src,y_src_onehot)
        err2 = model_selection(x_trg,yhat_target2,x_src,y_src,y_src_onehot)
        print('err1: ',err1, 'err2: ',err2)
        print('f1_m1: ',f1_m1, 'f1_m2: ',f1_m2)
        final_f1 = f1_m1 if err1<err2 else f1_m2
        final_auprc = auprc_m1 if err1<err2 else auprc_m2
        final_acc = acc_m1 if err1<err2 else acc_m2
        pppl_res[name] = [('f1',final_f1),('auprc',final_auprc),('acc',final_acc)]
        print('+++++++++++++++++++++++++++++++')
        print(name,pppl_res[name])
        print('+++++++++++++++++++++++++++++++')

        
        

        

************************************************** wednesday tuesday
../../../anomaly_datasets/IDS2017/packet_based/wednesday/part_00000.npy
../../../anomaly_datasets/IDS2017/packet_based/wednesday/part_00001.npy
../../../anomaly_datasets/IDS2017/packet_based/wednesday/part_00002.npy
../../../anomaly_datasets/IDS2017/packet_based/wednesday/part_00003.npy
../../../anomaly_datasets/IDS2017/packet_based/wednesday/part_00004.npy
../../../anomaly_datasets/IDS2017/packet_based/wednesday/part_00005.npy
../../../anomaly_datasets/IDS2017/packet_based/wednesday/part_00006.npy
../../../anomaly_datasets/IDS2017/packet_based/wednesday/part_00007.npy
../../../anomaly_datasets/IDS2017/packet_based/wednesday/part_00008.npy
../../../anomaly_datasets/IDS2017/packet_based/wednesday/part_00009.npy
../../../anomaly_datasets/IDS2017/packet_based/wednesday/part_00010.npy
../../../anomaly_datasets/IDS2017/packet_based/wednesday/part_00011.npy
../../../anomaly_datasets/IDS2017/packet_based/wednesday/part_00012

epoch: 1/1 ETA: 0:00 loss: 0.0452  
######################################### 6
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ epoch: 0 Elapsed Time(m): 0.15
accuracy: 0.9623673161954445
%%%%%%% 0.3195864581557018
len of big_cond 48752 len of trg weights 286772
len of correcT_indices 237750 correct mals: 4635
>>>>>>>>>>>>>>>>>>>>>>>>>>16.9490 iter: 0 elapssed time: 10.23215103149414
(335524, 580) (335524,) (335524,)
------------ 121320
epoch: 1/1 ETA: 0:00 loss: 0.0456  
######################################### 8
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ epoch: 0 Elapsed Time(m): 0.19
accuracy: 0.9443878760827417
%%%%%%% 0.3195864581557018
len of big_cond 54488 len of trg weights 286772
len of correcT_indices 239000 correct mals: 5260
>>>>>>>>>>>>>>>>>>>>>>>>>>16.8955 iter: 0 elapssed time: 12.582035779953003
(341260, 580) (341260,) (341260,)
------------ 124022
epoch: 1/1 ETA: 0:00 loss: 0.0403  
current accuracy on targe

(427291, 580) (427291,) (427291,)
------------ 155392
epoch: 1/1 ETA: 0:00 loss: 0.0158  
######################################### 40
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ epoch: 0 Elapsed Time(m): 0.91
accuracy: 0.9104619697878454
%%%%%%% 0.3195864581557018
len of big_cond 146255 len of trg weights 286772
len of correcT_indices 240236 correct mals: 5878
>>>>>>>>>>>>>>>>>>>>>>>>>>15.8566 iter: 0 elapssed time: 55.8347270488739
(433027, 580) (433027,) (433027,)
------------ 157660
epoch: 1/1 ETA: 0:00 loss: 0.0168  
current accuracy on target domain:  accuracy: 0.8860209504414657
######################################### 42
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ epoch: 0 Elapsed Time(m): 0.97
accuracy: 0.8860209504414657
%%%%%%% 0.3195864581557018
len of big_cond 151990 len of trg weights 286772
len of correcT_indices 240116 correct mals: 5818
>>>>>>>>>>>>>>>>>>>>>>>>>>15.6872 iter: 0 elapssed time: 59.519971609

(524794, 580) (524794,) (524794,)
------------ 524794
epoch: 1/1 ETA: 0:02 loss: 0.0041  
current accuracy on target domain:  accuracy: 0.9145802240107124
######################################### 74
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ epoch: 0 Elapsed Time(m): 1.91
accuracy: 0.9145802240107124
%%%%%%% 0.3195864581557018
len of big_cond 243756 len of trg weights 286772
len of correcT_indices 262276 correct mals: 4109
>>>>>>>>>>>>>>>>>>>>>>>>>>8.0790 iter: 0 elapssed time: 115.78389048576355
(530528, 580) (530528,) (530528,)
------------ 530528
epoch: 1/1 ETA: 0:02 loss: 0.0038  
######################################### 76
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ epoch: 0 Elapsed Time(m): 1.98
accuracy: 0.915291590531851
%%%%%%% 0.3195864581557018
len of big_cond 249490 len of trg weights 286772
len of correcT_indices 262480 correct mals: 4335
>>>>>>>>>>>>>>>>>>>>>>>>>>8.0805 iter: 0 elapssed time: 119.781073093

(358466, 580) (358466,) (358466,)
------------ 107448
epoch: 1/1 ETA: 0:00 loss: 0.0100  
######################################### 16
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ epoch: 0 Elapsed Time(m): 0.35
accuracy: 0.9750603266706652
%%%%%%% 0.0009461116875614482
len of big_cond 77429 len of trg weights 286772
len of correcT_indices 278698 correct mals: 2129
>>>>>>>>>>>>>>>>>>>>>>>>>>1.2566 iter: 0 elapssed time: 22.090769052505493
(364201, 580) (364201,) (364201,)
------------ 107510
epoch: 1/1 ETA: 0:00 loss: 0.0097  
current accuracy on target domain:  accuracy: 0.9751091459417237
######################################### 18
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ epoch: 0 Elapsed Time(m): 0.40
accuracy: 0.9751091459417237
%%%%%%% 0.004415782707536786
len of big_cond 83165 len of trg weights 286772
len of correcT_indices 279213 correct mals: 2138
>>>>>>>>>>>>>>>>>>>>>>>>>>1.0846 iter: 0 elapssed time: 25.429884

(455968, 580) (455968,) (455968,)
------------ 115082
epoch: 1/1 ETA: 0:00 loss: 0.0066  
current accuracy on target domain:  accuracy: 0.9588837124963385
######################################### 50
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ epoch: 0 Elapsed Time(m): 1.09
accuracy: 0.9588837124963385
%%%%%%% 0.02875462029942836
len of big_cond 174932 len of trg weights 286772
len of correcT_indices 275409 correct mals: 2614
>>>>>>>>>>>>>>>>>>>>>>>>>>2.9183 iter: 0 elapssed time: 66.4229691028595
(461704, 580) (461704,) (461704,)
------------ 116530
epoch: 1/1 ETA: 0:00 loss: 0.0063  
######################################### 52
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ epoch: 0 Elapsed Time(m): 1.13
accuracy: 0.9586954095936842
%%%%%%% 0.0324704433513571
len of big_cond 180667 len of trg weights 286772
len of correcT_indices 275290 correct mals: 2821
>>>>>>>>>>>>>>>>>>>>>>>>>>3.0542 iter: 0 elapssed time: 68.7588362693

(553471, 580) (553471,) (553471,)
------------ 553471
epoch: 1/1 ETA: 0:03 loss: 0.0031  
######################################### 84
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ epoch: 0 Elapsed Time(m): 2.15
accuracy: 0.9563381362197146
%%%%%%% 0.039301701684261746
len of big_cond 272264 len of trg weights 286772
len of correcT_indices 274251 correct mals: 3192
>>>>>>>>>>>>>>>>>>>>>>>>>>3.4948 iter: 0 elapssed time: 130.22098398208618
(559036, 580) (559036,) (559036,)
------------ 559036
epoch: 1/1 ETA: 0:03 loss: 0.0031  
######################################### 86
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ epoch: 0 Elapsed Time(m): 2.23
accuracy: 0.957094834921122
%%%%%%% 0.039301701684261746
len of big_cond 277444 len of trg weights 286772
len of correcT_indices 274468 correct mals: 3457
>>>>>>>>>>>>>>>>>>>>>>>>>>3.4389 iter: 0 elapssed time: 135.23776841163635
(564216, 580) (564216,) (564216,)
------------ 564216
e

In [42]:
def model_selection(x_trg,yhat_target1,x_src,y_src,y_src_onehot):
    net1 = PacketModel()
    net1._set_inputs(tf.TensorSpec([None,timesteps*num_input]))

    base_lr = 0.0001
    optimizer = tf.keras.optimizers.Adam(learning_rate=base_lr)
    grads = []
    for v in net1.trainable_variables:
        grads.append(np.zeros(v.shape))
    optimizer.apply_gradients(zip(grads,net1.trainable_variables))

    tsolver1 = Solver(optimizer,net1,base_lr)

    yhat_target1_onehot = np.zeros((len(yhat_target1),2),np.float32)
    yhat_target1_onehot[range(len(yhat_target1)),yhat_target1.astype(np.int32)] = 1.

    dhandler = DataHandler(x_trg,yhat_target1_onehot,None,batch_size=256,shuffle=True)


    nb_epochs = 6
    batch_size = 256
    total_batch = len(x_trg)//batch_size
    if len(x_trg) % batch_size!=0:
        total_batch+=1

    for i in range(nb_epochs):
        pos_inds = yhat_target1==1
        x_src_pos = x_trg[pos_inds]
        y_src_pos = yhat_target1_onehot[pos_inds]
        x_src_neg = x_trg[~pos_inds]
        y_src_neg = yhat_target1_onehot[~pos_inds]
        p = np.random.permutation(len(x_src_neg))
        x_src_neg = x_src_neg[p]
        y_src_neg = y_src_neg[p]
        pos_len = len(x_src_pos)

        src_dhandler = DataHandler(np.concatenate((x_src_pos,x_src_neg[:pos_len]))
                                           ,np.concatenate((y_src_pos,y_src_neg[:pos_len])),None,batch_size=256,shuffle=True)

        train_model2(total_batch,src_dhandler,tsolver1)

#         for i in range(5):
#             train_model2(100,dhandler,tsolver1)
    return net1

#     _,logits_train_1 = test(x_src,y_src,net1,ret=True)
#     err1 = np.sum(np.abs(logits_train_1 - y_src_onehot)**2,axis=1)
#     err1 = np.mean(err1)
#     return err1

In [75]:
def model_selection_v2(x_trg,logits_hat,yhat,x_src,y_src,y_src_onehot):
    net1 = PacketModel()
    net1._set_inputs(tf.TensorSpec([None,timesteps*num_input]))

    base_lr = 0.0001
    optimizer = tf.keras.optimizers.Adam(learning_rate=base_lr)
    grads = []
    for v in net1.trainable_variables:
        grads.append(np.zeros(v.shape))
    optimizer.apply_gradients(zip(grads,net1.trainable_variables))

    tsolver1 = Solver(optimizer,net1,base_lr)

#     yhat_target1_onehot = np.zeros((len(yhat_target1),2),np.float32)
#     yhat_target1_onehot[range(len(yhat_target1)),yhat_target1.astype(np.int32)] = 1.

    dhandler = DataHandler(x_trg,logits_hat,None,batch_size=256,shuffle=True)


    nb_epochs = 6
    batch_size = 256
    total_batch = len(x_trg)//batch_size
    if len(x_trg) % batch_size!=0:
        total_batch+=1

    for i in range(nb_epochs):
        pos_inds = yhat==1
        x_src_pos = x_trg[pos_inds]
        y_src_pos = logits_hat[pos_inds]
        x_src_neg = x_trg[~pos_inds]
        y_src_neg = logits_hat[~pos_inds]
        p = np.random.permutation(len(x_src_neg))
        x_src_neg = x_src_neg[p]
        y_src_neg = y_src_neg[p]
        pos_len = len(x_src_pos)

        dhandler = DataHandler(np.concatenate((x_src_pos,x_src_neg[:pos_len]))
                                           ,np.concatenate((y_src_pos,y_src_neg[:pos_len])),None,batch_size=256,shuffle=True)

        train_model2(total_batch,dhandler,tsolver1)

#         for i in range(5):
#             train_model2(100,dhandler,tsolver1)
    return net1

#     _,logits_train_1 = test(x_src,y_src,net1,ret=True)
#     err1 = np.sum(np.abs(logits_train_1 - y_src_onehot)**2,axis=1)
#     err1 = np.mean(err1)
#     return err1

In [43]:
# err2 = model_selection(x_trg,yhat_target2,x_src,y_src,y_src_onehot)
# err1 = model_selection(x_trg,yhat_target1,x_src,y_src,y_src_onehot)
# print('err1: ',err1, 'err2: ',err2)
# print('f1_m1: ',f1_m1, 'f1_m2: ',f1_m2)

In [62]:
x_trg.shape,logits_target1.shape,logits_target2.shape

((342621, 580), (342621, 2), (342621, 2))

In [66]:
logits_target1 = logits_target1.astype(np.float32)
logits_target2 = logits_target2.astype(np.float32)

In [76]:
# net1 = model_selection(x_trg,yhat_target1,x_src,y_src,y_src_onehot)
# net2 = model_selection(x_trg,yhat_target2,x_src,y_src,y_src_onehot)

net2 = model_selection_v2(x_trg,logits_target2,yhat_target2,x_src,y_src,y_src_onehot)
net1 = model_selection_v2(x_trg,logits_target1,yhat_target1,x_src,y_src,y_src_onehot)


Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: Bad argument number for Name: 3, expecting 4
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: Bad argument number for Name: 3, expecting 4
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: Bad argument number for Name: 3, expecting 4
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: Bad argument number for Name: 3, expecting 4
 iter: 1339/1339 ETA: 0:00 loss: 0.0314  
 iter: 1339/1339 ETA: 0:00 loss: 0.0101  
 iter: 1339/1339 ETA: 0:00 loss: 0.0072  
 iter: 1339/1339 ETA: 0:00

In [77]:
_ = calc_scores(x_src,y_src,net1,draw=False)

Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: Bad argument number for Name: 3, expecting 4
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: Bad argument number for Name: 3, expecting 4
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: Bad argument number for Name: 3, expecting 4
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: Bad argument number for Name: 3, expecting 4
accuracy: 0.9854239604982356
accuracy: 0.9854239604982356
f1 score: 0.536482590374806
AUPRC: 0.8266618602978608


In [78]:
_ = calc_scores(x_src,y_src,net2,draw=False)

Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: Bad argument number for Name: 3, expecting 4
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: Bad argument number for Name: 3, expecting 4
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: Bad argument number for Name: 3, expecting 4
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: Bad argument number for Name: 3, expecting 4
accuracy: 0.9782667066519743
accuracy: 0.9782667066519743
f1 score: 0.022889394058164144
AUPRC: 0.658245866078367


In [79]:
_,logits_train_1 = test(x_src,y_src,net1,ret=True)
err1 = np.sum(np.abs(logits_train_1 - y_src_onehot)**2,axis=1)
err1 = np.mean(err1)

_,logits_train_2 = test(x_src,y_src,net2,ret=True)
err2 = np.sum(np.abs(logits_train_2 - y_src_onehot)**2,axis=1)
err2 = np.mean(err2)

print('err1',err1,'err2',err2)

accuracy: 0.9854239604982356
accuracy: 0.9782667066519743
err1 0.01976915089786419 err2 0.02774438800610258


In [80]:
np.sum(y_src==1)

12468

In [47]:
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 

In [None]:
#     _,logits_train_1 = test(x_src,y_src,net1,ret=True)
#     err1 = np.sum(np.abs(logits_train_1 - y_src_onehot)**2,axis=1)
#     err1 = np.mean(err1)

In [30]:
def print_res(ht=None):
    for src_day in days:
        for trg_day in days:
            if src_day == trg_day:
                continue
            name = src_day[:2]+'_'+trg_day[:2]
            if ht is None:
                print(name,end=' ')
            else:
                temp = ht[name][0][1] #f1 score
#                 temp = ht[name][4][1] #f1 score
                print(temp,end=' ')
    print()
print_res()
print_res(onlysrc_res)
print_res(pppl_res)

tu_we tu_th we_tu we_th th_tu th_we 
0.0610450699568687 0.19323986523204 0.01647961358147464 0.1789714285714286 0.011385199240986715 0.013176276352552703 
0.2731995179320949 0.707464813825049 0.27819971870604787 0.33883058470764615 0.7761930883159627 0.3229934135204666 


In [40]:
a.reshape(1,-1)

array([[  1, 100]])

In [41]:
a = np.array((1,100))
a = a.reshape(1,-1)

In [1]:
a

NameError: name 'a' is not defined

In [43]:
tf.keras.layers.Softmax()(a)

NotFoundError: Could not find valid device for node.
Node:{{node Softmax}}
All kernels registered for op Softmax :
  device='XLA_GPU'; T in [DT_FLOAT, DT_DOUBLE, DT_BFLOAT16, DT_HALF]
  device='XLA_CPU'; T in [DT_FLOAT, DT_DOUBLE, DT_BFLOAT16, DT_HALF]
  device='XLA_CPU_JIT'; T in [DT_FLOAT, DT_DOUBLE, DT_BFLOAT16, DT_HALF]
  device='XLA_GPU_JIT'; T in [DT_FLOAT, DT_DOUBLE, DT_BFLOAT16, DT_HALF]
  device='CPU'; T in [DT_DOUBLE]
  device='CPU'; T in [DT_FLOAT]
  device='CPU'; T in [DT_HALF]
  device='GPU'; T in [DT_DOUBLE]
  device='GPU'; T in [DT_FLOAT]
  device='GPU'; T in [DT_HALF]
 [Op:Softmax]

In [44]:
tf.nn.softmax(a)

NotFoundError: Could not find valid device for node.
Node:{{node Softmax}}
All kernels registered for op Softmax :
  device='XLA_GPU'; T in [DT_FLOAT, DT_DOUBLE, DT_BFLOAT16, DT_HALF]
  device='XLA_CPU'; T in [DT_FLOAT, DT_DOUBLE, DT_BFLOAT16, DT_HALF]
  device='XLA_CPU_JIT'; T in [DT_FLOAT, DT_DOUBLE, DT_BFLOAT16, DT_HALF]
  device='XLA_GPU_JIT'; T in [DT_FLOAT, DT_DOUBLE, DT_BFLOAT16, DT_HALF]
  device='CPU'; T in [DT_DOUBLE]
  device='CPU'; T in [DT_FLOAT]
  device='CPU'; T in [DT_HALF]
  device='GPU'; T in [DT_DOUBLE]
  device='GPU'; T in [DT_FLOAT]
  device='GPU'; T in [DT_HALF]
 [Op:Softmax]

In [27]:
x_src.shape,x_trg.shape

((462031, 580), (342621, 580))

In [25]:
with open('onlysrc_res2','wb') as f:
    pickle.dump(onlysrc_res,f)
    
with open('pppl_res2','wb') as f:
    pickle.dump(pppl_res,f)

In [52]:
DA(tsolver,trg_data,trg_gts,src_data,src_gts,trg_gt_class_percentage=trg_gt_class_percentage)


######################################### 0
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ epoch: 0 Elapsed Time(m): 0.00
accuracy: 0.9755450322904607
len of big_cond 31546 len of trg weights 286772
len of correcT_indices 279759 correct mals: 94
>>>>>>>>>>>>>>>>>>>>>>>>>>1.1539 iter: 0 elapssed time: 1.274892807006836
(318318, 580) (318318,) (318318,)
------------ 104732
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: Bad argument number for Name: 3, expecting 4
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: Bad argument number for Name: 3, expecting 4
epoch: 1/1 ETA: 0:00 loss: 0.0108  
current accuracy on target domain:  accuracy: 0.9758274866444423
######################################### 2
^^^^^^^^^^^^^^^^^^^^^^^^^^^^

(410085, 580) (410085,) (410085,)
------------ 104550
epoch: 1/1 ETA: 0:00 loss: 0.0070  
current accuracy on target domain:  accuracy: 0.9758553833707614
######################################### 34
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ epoch: 0 Elapsed Time(m): 0.74
accuracy: 0.9758553833707614
len of big_cond 129049 len of trg weights 286772
len of correcT_indices 279848 correct mals: 284
>>>>>>>>>>>>>>>>>>>>>>>>>>0.5014 iter: 0 elapssed time: 45.70130109786987
(415821, 580) (415821,) (415821,)
------------ 106080
epoch: 1/1 ETA: 0:00 loss: 0.0068  
######################################### 36
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ epoch: 0 Elapsed Time(m): 0.78
accuracy: 0.9763331148089772
len of big_cond 134784 len of trg weights 286772
len of correcT_indices 279985 correct mals: 104
>>>>>>>>>>>>>>>>>>>>>>>>>>0.4563 iter: 0 elapssed time: 47.95612287521362
(421556, 580) (421556,) (421556,)
------------ 1050

(513322, 580) (513322,) (513322,)
------------ 513322
epoch: 1/1 ETA: 0:02 loss: 0.0039  
######################################### 70
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ epoch: 0 Elapsed Time(m): 1.61
accuracy: 0.987380218431367
len of big_cond 232286 len of trg weights 286772
len of correcT_indices 283153 correct mals: 3288
>>>>>>>>>>>>>>>>>>>>>>>>>>0.2686 iter: 0 elapssed time: 97.71690154075623
(519058, 580) (519058,) (519058,)
------------ 519058
epoch: 1/1 ETA: 0:02 loss: 0.0038  
######################################### 72
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ epoch: 0 Elapsed Time(m): 1.67
accuracy: 0.9868501806313029
len of big_cond 238021 len of trg weights 286772
len of correcT_indices 283001 correct mals: 3076
>>>>>>>>>>>>>>>>>>>>>>>>>>0.2605 iter: 0 elapssed time: 101.39702701568604
(524793, 580) (524793,) (524793,)
------------ 524793
epoch: 1/1 ETA: 0:02 loss: 0.0036  
current accuracy on targ

(570677, 580) (570677,) (570677,)
------------ 570677
epoch: 1/1 ETA: 0:02 loss: 0.0025  
current accuracy on target domain:  accuracy: 0.9922726068095908
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ epoch: 9 Elapsed Time(m): 2.98
accuracy: 0.9922726068095908
len of big_cond 283905 len of trg weights 286772
len of correcT_indices 284556 correct mals: 4686
>>>>>>>>>>>>>>>>>>>>>>>>>>0.4089 iter: 9 elapssed time: 179.99253344535828
(570677, 580) (570677,) (570677,)
------------ 570677
epoch: 1/1 ETA: 0:03 loss: 0.0024  
current accuracy on target domain:  accuracy: 0.99263875134253


In [58]:
f1,auprc,acc = calc_scores(x_trg,y_trg,net,draw=False)

accuracy: 0.99330827277419
accuracy: 0.99330827277419
f1 score: 0.8377715783244568
AUPRC: 0.8846457023598123


In [59]:
y_pred_trg,scores_trg = test(x_trg,y_trg,net,ret=True)
for fpr in [0.001,0.01,0.1]:
    scores = scores_trg[:,1]
#     fpr = 0.01
    benign_scores = scores[y_trg==0]
    benign_scores_sorted = np.sort(benign_scores)
    thr_ind = int(np.ceil(len(benign_scores_sorted)*fpr))
    thr = benign_scores_sorted[-thr_ind]
    print (thr)
    mal_scores = scores[y_trg==1]
    tpr = np.sum(mal_scores>thr)/len(mal_scores)
    print('TPR:',tpr)
    pppl_res[name].append((fpr,tpr))

accuracy: 0.99330827277419
0.8530188798904419
TPR: 0.47112608277189605
0.13573472201824188
TPR: 0.9619826756496631
0.01047850213944912
TPR: 0.9955085017645172
