In [1]:
import os, sys
cur_dir = os.getcwd()
basename = os.path.basename(cur_dir)
for _ in range(5):
    if basename != 'OCLog':
        cur_dir = os.path.dirname(cur_dir)
        basename = os.path.basename(cur_dir)
        #print(cur_dir, basename)
    else:
        if cur_dir not in sys.path:
            sys.path.append(cur_dir)
            #print(sys.path)
import os
from openpyxl import Workbook
from openpyxl import load_workbook
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from tqdm import trange, tqdm, tnrange
from oclog.BGL.bglogUKC import BGLog, get_embedding_layer
from oclog.openset.boundary_loss import euclidean_metric, BoundaryLoss
from oclog.openset.pretraining import LogLineEncoder, LogSeqEncoder, LogClassifier
# from oclog.openset.openset import OpenSet
from oclog.openset.opensetv6 import OpenSet
from sklearn.metrics import confusion_matrix, f1_score, accuracy_score
import sklearn.metrics as m
import warnings
warnings.filterwarnings('ignore')
def update_tracker(file_name, data):
    if os.path.exists(file_name):
        wb = load_workbook(file_name)
    else:
        wb = Workbook()    
    wb.save(file_name)
    # wb.close(file_name)
    orig_df = pd.read_excel(file_name,)
    #print(orig_df.head())
    new_df = pd.DataFrame(data, )
    concat_df = pd.concat([orig_df, new_df], axis=0)
    #print(concat_df.head())
    concat_df.to_excel(file_name)
    return concat_df

def oset_train(ablation=5000,designated_ukc_cls=3,num_classes=2,embedding_size=128,lr_rate=3,optimizer='sgd',
pretrain_epochs=3,octrain_epochs=200,wait_patience=3, debug=False, tracker='tracker.xlsx', comment='',
             tracker_update=True):
    tf.random.set_seed(1234)
    np.random.seed(1234) 
    bglog = BGLog(save_padded_num_sequences=False, debug=debug,  load_from_pkl=True, )
    train_test = bglog.get_tensor_train_val_test(ablation=ablation, designated_ukc_cls=designated_ukc_cls )
    train_data, val_data, test_data = train_test
    line_encoder = LogLineEncoder(bglog, chars_in_line=64)
    logSeqencer =  LogSeqEncoder(line_in_seq=32, dense_neurons=embedding_size)
    ptmodel = LogClassifier(line_encoder=line_encoder, seq_encoder=logSeqencer, num_classes=num_classes)
    ptmodel.compile(optimizer='adam', loss='categorical_crossentropy',
                  metrics=['accuracy', tf.keras.metrics.Precision(), tf.keras.metrics.Recall()])
    hist = ptmodel.fit(train_data, validation_data=val_data, epochs=pretrain_epochs)    
    pre_tr_acc = hist.history.get('accuracy')[len(hist.history.get('accuracy'))-1]
    # print(round(pre_tr_acc, 4))
    pre_val_acc = hist.history.get('val_accuracy')[len(hist.history.get('val_accuracy'))-1]
    # print(round(pre_val_acc, 4))
    oset = OpenSet(num_classes, ptmodel, embedding_size=embedding_size)
    _, _ = oset.train(train_data,data_val=val_data, epochs=octrain_epochs, 
                      lr_rate=lr_rate, wait_patience=wait_patience, optimizer=optimizer,
                     pretrain_hist=hist)
    _, _, f1_weighted, f_measure = oset.evaluate(test_data, ukc_label=designated_ukc_cls)
    lst = list(tf.reshape(oset.radius, (1, num_classes)).numpy()[0])
    lst = [str(i) for i in lst]
    radius = ','.join(lst)    
    loss = oset.losses[len(oset.losses)-1].numpy()    
    tracker_data = {'ablation':[ablation],'designated_ukc_cls': [designated_ukc_cls],'num_classes': [num_classes],
                   'embedding_size': [embedding_size], 'lr_rate': [lr_rate], 'optimizer': [optimizer],
                    'pretrain_epochs': [pretrain_epochs],'octrain_epochs': [oset.epoch], 'wait_patience': [wait_patience], 
                   'f1_weighted': [f1_weighted], 'f1_macro': f_measure.get('F1-score'), 
                   'F1Known': f_measure.get('Known'), 'F1Open': f_measure.get('Open'), 'loss':[loss], 'Radius': radius,
                   'pre_tr_acc': pre_tr_acc, 'pre_val_acc': pre_val_acc,'comment': comment}
    
    if tracker_update:
        update_tracker(tracker, tracker_data)
    return test_data, oset, hist

In [None]:
comment="data 3000"
test_data, oset, hist = oset_train(ablation=3000,designated_ukc_cls=5,num_classes=3,embedding_size=12,
                             lr_rate=2.6, optimizer='adam',
pretrain_epochs=10, wait_patience=8, comment=comment)

padded_num_seq_df loaded from C:\Users\Bhujay_ROG\MyDev\OCLog\oclog\BGL\data\bgl_ukc.pkl
trained tokenizer, tk, loaded from C:\Users\Bhujay_ROG\MyDev\OCLog\oclog\BGL\data\bgltkukc.pkl
train_0:, 2400
val_0:, 300
test_0:, 300
train_1:, 2400
val_1:, 300
test_1:, 300
train_2:, 2400
val_2:, 300
test_2:, 300
class 5 is added as ukc
ukc_5:, 165
vocab_size: 50
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


100%|████████████████████████████████████████████████████████████████████████████████| 225/225 [00:31<00:00,  7.17it/s]


epoch: 1/200, train_loss: 7.153515815734863, F1_train: 0.6808977858129492 F1_val: 0.6890074211502782


100%|████████████████████████████████████████████████████████████████████████████████| 225/225 [00:31<00:00,  7.14it/s]


epoch: 2/200, train_loss: 7.153472900390625, F1_train: 0.699450319871884 F1_val: 0.6931595622119816


100%|████████████████████████████████████████████████████████████████████████████████| 225/225 [00:30<00:00,  7.37it/s]


epoch: 3/200, train_loss: 7.153521537780762, F1_train: 0.7066927224284855 F1_val: 0.692991214057508


100%|████████████████████████████████████████████████████████████████████████████████| 225/225 [00:30<00:00,  7.40it/s]


epoch: 4/200, train_loss: 7.15346622467041, F1_train: 0.7298765734563775 F1_val: 0.692991214057508


100%|████████████████████████████████████████████████████████████████████████████████| 225/225 [00:30<00:00,  7.30it/s]


epoch: 5/200, train_loss: 7.153414726257324, F1_train: 0.7359988020365378 F1_val: 0.6952156063389986


100%|████████████████████████████████████████████████████████████████████████████████| 225/225 [00:30<00:00,  7.33it/s]


epoch: 6/200, train_loss: 7.15341854095459, F1_train: 0.7392765386639238 F1_val: 0.6952156063389986


100%|████████████████████████████████████████████████████████████████████████████████| 225/225 [00:30<00:00,  7.34it/s]


epoch: 7/200, train_loss: 7.153411865234375, F1_train: 0.7431094411521186 F1_val: 0.692991214057508


100%|████████████████████████████████████████████████████████████████████████████████| 225/225 [00:31<00:00,  7.07it/s]


epoch: 8/200, train_loss: 7.1534857749938965, F1_train: 0.7447045774057043 F1_val: 0.6952156063389986


 70%|████████████████████████████████████████████████████████▏                       | 158/225 [00:21<00:09,  7.14it/s]

In [None]:
# pretraining should stop based on wait_patience