# install 하기

In [None]:
!pip install torch==1.12.1

In [None]:
!pip install transformers==4.24.0

In [None]:
!pip install datasets==2.6.1

In [None]:
!pip install sentencepiece==0.1.97

In [None]:
!pip install scikit-learn==1.0.2

# git clone 및 파일 다운

In [None]:
!git clone https://github.com/HappyBusDay/Korean_ABSA.git

In [None]:
!gdown --id 1AyyTWobLFLqNeeNGHYyZGUb0LNPVnfju

In [None]:
!unzip -qq "/content/pt_files.zip" -d "/content/Korean_ABSA/"

# import

In [None]:
import json
import os

import torch
import torch.nn as nn
from tqdm import trange
from transformers import AutoTokenizer
from torch.utils.data import DataLoader, TensorDataset
from transformers import get_linear_schedule_with_warmup
from transformers import AdamW
from datasets import load_metric
from sklearn.metrics import f1_score
import pandas as pd
import copy
import numpy as np

from transformers import ElectraModel, ElectraTokenizer
from transformers import AutoModel, ElectraTokenizer
from collections import Counter

import re

# 모델 구축

## 기본설정

In [None]:

PADDING_TOKEN = 1
S_OPEN_TOKEN = 0
S_CLOSE_TOKEN = 2

do_eval=True

# 모델을 학습 할 때 저장 되는 파일 경로
category_extraction_model_path = '/content/drive/MyDrive/korean_baseline/saved_model/category_extraction/'
polarity_classification_model_path = '/content/drive/MyDrive/korean_baseline/saved_model/polarity_classification/'


# 저장된 모델 Weight 파일 경로
test_category_extraction_model_path = '/content/drive/MyDrive/korean_baseline/saved_model/category_extraction/category_sample.pt'
test_polarity_classification_model_path = '/content/drive/MyDrive/korean_baseline/saved_model/polarity_classification/polarity_sample.pt'

# 데이터 파일 경로
train_cate_data_path = '/content/drive/MyDrive/train_sample(category).jsonl'
train_pola_data_path = '/content/drive/MyDrive/train_sample(polarity).jsonl'
test_data_path = '/content/drive/MyDrive/test_sample.jsonl'

max_len_elec = 256
max_len_debe = 256
max_len_robe = 514

# colab pro 환경에서 RoBERTa를 돌리게 될 경우 batch_size 수정 요망 ( Out of Memory 이슈 )
batch_size = 32

#ELECTRA
base_model_elec = 'kykim/electra-kor-base'
#RoBERTa
base_model_roberta = 'xlm-roberta-base'
#DeBERTa
base_model_deberta = "lighthouse/mdeberta-v3-base-kor-further"

learning_rate = 3e-6
eps = 1e-8
num_train_epochs = 30

classifier_hidden_size_base = 768
classifier_hidden_size_down = 384   # hidden_size down
classifier_hidden_size_up = 1000    # hidden_size up

classifier_dropout_prob_base = 0.1  # dropout = 0.1
classifier_dropout_prob_up = 0.5    # dropout = 0.5

# 카테고리의 수 = 25개
entity_property_pair = [
     '패키지/구성품#다양성','본품#인지도','브랜드#디자인',
     '패키지/구성품#편의성','제품 전체#디자인', '제품 전체#품질',
     '패키지/구성품#품질','패키지/구성품#일반','본품#일반',
     '패키지/구성품#디자인','본품#편의성','브랜드#품질',
     '브랜드#인지도','본품#다양성','본품#디자인',
     '제품 전체#다양성','본품#품질','제품 전체#인지도',
     '패키지/구성품#가격','본품#가격','제품 전체#가격',
     '브랜드#가격','브랜드#일반','제품 전체#일반','제품 전체#편의성'
     ]

tf_id_to_name = ['True', 'False']
tf_name_to_id = {tf_id_to_name[i]: i for i in range(len(tf_id_to_name))}

polarity_id_to_name = ['positive', 'negative', 'neutral']
polarity_name_to_id = {polarity_id_to_name[i]: i for i in range(len(polarity_id_to_name))}

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

special_tokens_dict = {
    'additional_special_tokens': ['&name&', '&affiliation&', '&social-security-num&', '&tel-num&', '&card-num&', '&bank-account&', '&num&', '&online-account&']
}

In [None]:
def jsonload(fname, encoding="utf-8"):
    with open(fname, encoding=encoding) as f:
        j = json.load(f)

    return j

# json 개체를 파일이름으로 깔끔하게 저장
def jsondump(j, fname):
    with open(fname, "w", encoding="UTF8") as f:
        json.dump(j, f, ensure_ascii=False)

# jsonl 파일 읽어서 list에 저장
def jsonlload(fname, encoding="utf-8"):
    json_list = []
    with open(fname, encoding=encoding) as f:
        for line in f.readlines():
            json_list.append(json.loads(line))
    return json_list

In [None]:
def tokenize_and_align_labels(tokenizer, form, annotations, max_len):

    entity_property_data_dict = {
        'input_ids': [],
        'attention_mask': [],
        'label': []
    }
    polarity_data_dict = {
        'input_ids': [],
        'attention_mask': [],
        'label': []
    }

    for pair in entity_property_pair:
        isPairInOpinion = False
        if pd.isna(form):
            break
        tokenized_data = tokenizer(form, pair, padding='max_length', max_length=max_len, truncation=True)
        for annotation in annotations:
            entity_property = annotation[0]
            polarity = annotation[2]

            if polarity == '------------':
                continue

            if entity_property == pair:
                entity_property_data_dict['input_ids'].append(tokenized_data['input_ids'])
                entity_property_data_dict['attention_mask'].append(tokenized_data['attention_mask'])
                entity_property_data_dict['label'].append(tf_name_to_id['True'])

                polarity_data_dict['input_ids'].append(tokenized_data['input_ids'])
                polarity_data_dict['attention_mask'].append(tokenized_data['attention_mask'])
                polarity_data_dict['label'].append(polarity_name_to_id[polarity])

                isPairInOpinion = True
                break

        if isPairInOpinion is False:
            entity_property_data_dict['input_ids'].append(tokenized_data['input_ids'])
            entity_property_data_dict['attention_mask'].append(tokenized_data['attention_mask'])
            entity_property_data_dict['label'].append(tf_name_to_id['False'])

    return entity_property_data_dict, polarity_data_dict


def get_dataset(raw_data, tokenizer, max_len):
    input_ids_list = []
    attention_mask_list = []
    token_labels_list = []

    polarity_input_ids_list = []
    polarity_attention_mask_list = []
    polarity_token_labels_list = []

    for utterance in raw_data:
        entity_property_data_dict, polarity_data_dict = tokenize_and_align_labels(tokenizer, utterance['sentence_form'], utterance['annotation'], max_len)
        input_ids_list.extend(entity_property_data_dict['input_ids'])
        attention_mask_list.extend(entity_property_data_dict['attention_mask'])
        token_labels_list.extend(entity_property_data_dict['label'])

        polarity_input_ids_list.extend(polarity_data_dict['input_ids'])
        polarity_attention_mask_list.extend(polarity_data_dict['attention_mask'])
        polarity_token_labels_list.extend(polarity_data_dict['label'])

    return TensorDataset(torch.tensor(input_ids_list), torch.tensor(attention_mask_list),
                         torch.tensor(token_labels_list)), TensorDataset(torch.tensor(polarity_input_ids_list), torch.tensor(polarity_attention_mask_list),
                         torch.tensor(polarity_token_labels_list))



In [None]:
def evaluation(y_true, y_pred, label_len):
    count_list = [0]*label_len
    hit_list = [0]*label_len
    for i in range(len(y_true)):
        count_list[y_true[i]] += 1
        if y_true[i] == y_pred[i]:
            hit_list[y_true[i]] += 1
    acc_list = []

    for i in range(label_len):
        acc_list.append(hit_list[i]/count_list[i])

    print(count_list)
    print(hit_list)
    print(acc_list)
    print('accuracy: ', (sum(hit_list) / sum(count_list)))
    print('macro_accuracy: ', sum(acc_list) / 3)
    # print(y_true)

    y_true = list(map(int, y_true))
    y_pred = list(map(int, y_pred))

    print('f1_score: ', f1_score(y_true, y_pred, average=None))
    print('f1_score_micro: ', f1_score(y_true, y_pred, average='micro'))
    print('f1_score_macro: ', f1_score(y_true, y_pred, average='macro'))

In [None]:
def evaluation_f1(true_data, pred_data):

    true_data_list = true_data
    pred_data_list = pred_data

    ce_eval = {
        'TP': 0,
        'FP': 0,
        'FN': 0,
        'TN': 0
    }

    pipeline_eval = {
        'TP': 0,
        'FP': 0,
        'FN': 0,
        'TN': 0
    }

    for i in range(len(true_data_list)):

        # TP, FN checking
        is_ce_found = False
        is_pipeline_found = False
        for y_ano  in true_data_list[i]['annotation']:
            y_category = y_ano[0]
            y_polarity = y_ano[2]

            for p_ano in pred_data_list[i]['annotation']:
                p_category = p_ano[0]
                p_polarity = p_ano[1]

                if y_category == p_category:
                    is_ce_found = True
                    if y_polarity == p_polarity:
                        is_pipeline_found = True

                    break

            if is_ce_found is True:
                ce_eval['TP'] += 1
            else:
                ce_eval['FN'] += 1

            if is_pipeline_found is True:
                pipeline_eval['TP'] += 1
            else:
                pipeline_eval['FN'] += 1

            is_ce_found = False
            is_pipeline_found = False

        # FP checking
        for p_ano in pred_data_list[i]['annotation']:
            p_category = p_ano[0]
            p_polarity = p_ano[1]

            for y_ano  in true_data_list[i]['annotation']:
                y_category = y_ano[0]
                y_polarity = y_ano[2]

                if y_category == p_category:
                    is_ce_found = True
                    if y_polarity == p_polarity:
                        is_pipeline_found = True

                    break

            if is_ce_found is False:
                ce_eval['FP'] += 1

            if is_pipeline_found is False:
                pipeline_eval['FP'] += 1
            is_ce_found = False
            is_pipeline_found = False

    ce_precision = ce_eval['TP']/(ce_eval['TP']+ce_eval['FP'])
    ce_recall = ce_eval['TP']/(ce_eval['TP']+ce_eval['FN'])

    ce_result = {
        'Precision': ce_precision,
        'Recall': ce_recall,
        'F1': 2*ce_recall*ce_precision/(ce_recall+ce_precision)
    }

    pipeline_precision = pipeline_eval['TP']/(pipeline_eval['TP']+pipeline_eval['FP'])
    pipeline_recall = pipeline_eval['TP']/(pipeline_eval['TP']+pipeline_eval['FN'])

    pipeline_result = {
        'Precision': pipeline_precision,
        'Recall': pipeline_recall,
        'F1': 2*pipeline_recall*pipeline_precision/(pipeline_recall+pipeline_precision)
    }

    return {
        'category extraction result': ce_result,
        'entire pipeline result': pipeline_result
    }

## SimpleClassifier

In [None]:
# baseline
class SimpleClassifier_Base(nn.Module):

    def __init__(self, num_label):
        super().__init__()
        self.dense = nn.Linear(classifier_hidden_size_base, classifier_hidden_size_base)
        self.dropout = nn.Dropout(classifier_dropout_prob_base)
        self.output = nn.Linear(classifier_hidden_size_base, num_label)

    def forward(self, features):
        x = features[:, 0, :]
        x = self.dropout(x)
        x = self.dense(x)
        x = torch.tanh(x)
        x = self.dropout(x)
        x = self.output(x)
        return x

# hidden_size를 1000으로 up
class SimpleClassifier_Hidden_up(nn.Module):

    def __init__(self, num_label):
        super().__init__()
        self.dense = nn.Linear(classifier_hidden_size_base, classifier_hidden_size_up)
        self.dropout = nn.Dropout(classifier_dropout_prob_base)
        self.output = nn.Linear(classifier_hidden_size_up, num_label)

    def forward(self, features):
        x = features[:, 0, :]
        x = self.dropout(x)
        x = self.dense(x)
        x = torch.tanh(x)
        x = self.dropout(x)
        x = self.output(x)
        return x        

# hidden_size를 384로 down
class SimpleClassifier_Hidden_down(nn.Module):

    def __init__(self, num_label):
        super().__init__()
        self.dense = nn.Linear(classifier_hidden_size_base, classifier_hidden_size_down)
        self.dropout = nn.Dropout(classifier_dropout_prob_base)
        self.output = nn.Linear(classifier_hidden_size_down, num_label)

    def forward(self, features):
        x = features[:, 0, :]
        x = self.dropout(x)
        x = self.dense(x)
        x = torch.tanh(x)
        x = self.dropout(x)
        x = self.output(x)
        return x        

# dropout 0.5
class SimpleClassifier_dr05(nn.Module):

    def __init__(self, num_label):
        super().__init__()
        self.dense = nn.Linear(classifier_hidden_size_base, classifier_hidden_size_base)
        self.dropout = nn.Dropout(classifier_dropout_prob_up)
        self.output = nn.Linear(classifier_hidden_size_base, num_label)

    def forward(self, features):
        x = features[:, 0, :]
        x = self.dropout(x)
        x = self.dense(x)
        x = torch.tanh(x)
        x = self.dropout(x)
        x = self.output(x)
        return x

# hidden_size를 384로 down + dropout 0.5
class SimpleClassifier_Hidden_down_dr05(nn.Module):

    def __init__(self, num_label):
        super().__init__()
        self.dense = nn.Linear(classifier_hidden_size_base, classifier_hidden_size_down)
        self.dropout = nn.Dropout(classifier_dropout_prob_up)
        self.output = nn.Linear(classifier_hidden_size_down, num_label)

    def forward(self, features):
        x = features[:, 0, :]
        x = self.dropout(x)
        x = self.dense(x)
        x = torch.tanh(x)
        x = self.dropout(x)
        x = self.output(x)
        return x        

# dense_layer를 1층 더 추가 (dropout0.1)
class SimpleClassifier_Layer(nn.Module):

    def __init__(self, num_label):
        super().__init__()
        self.dense1 = nn.Linear(classifier_hidden_size_base, classifier_hidden_size_base//2)
        self.dense2 = nn.Linear(classifier_hidden_size_base//2, classifier_hidden_size_base//4)
        self.dropout = nn.Dropout(classifier_dropout_prob_base)
        self.output = nn.Linear(classifier_hidden_size_base//4, num_label)

    def forward(self, features):
        x = features[:, 0, :]
        # layer 1
        x = self.dropout(x)
        x = self.dense1(x)
        
        # layer 2
        x = self.dropout(x)
        x = self.dense2(x)

        x = torch.tanh(x)
        x = self.dropout(x)
        x = self.output(x)

        return x

# dense_layer를 1층 더 추가 + dropout 0.5
class SimpleClassifier_Layer_dr05(nn.Module):

    def __init__(self, num_label):
        super().__init__()
        self.dense1 = nn.Linear(classifier_hidden_size_base, classifier_hidden_size_base//2)
        self.dense2 = nn.Linear(classifier_hidden_size_base//2, classifier_hidden_size_base//4)
        self.dropout = nn.Dropout(classifier_dropout_prob_up)
        self.output = nn.Linear(classifier_hidden_size_base//4, num_label)

    def forward(self, features):
        x = features[:, 0, :]
        # layer 1
        x = self.dropout(x)
        x = self.dense1(x)
        
        # layer 2
        x = self.dropout(x)
        x = self.dense2(x)

        x = torch.tanh(x)
        x = self.dropout(x)
        x = self.output(x)

        return x


## ELECTRA

#### baseline

In [None]:
# category baseline
class ElectraBaseClassifier_Cate_Base(nn.Module):
    def __init__(self, num_label, len_tokenizer):
        super(ElectraBaseClassifier_Cate_Base, self).__init__()

        self.num_label = num_label
        self.electra = AutoModel.from_pretrained(base_model_elec)
        self.electra.resize_token_embeddings(len_tokenizer)

        self.labels_classifier = SimpleClassifier_Base(self.num_label)

    def forward(self, input_ids, attention_mask, labels=None):
        outputs = self.electra(
            input_ids=input_ids,
            attention_mask=attention_mask,
            token_type_ids=None
        )

        sequence_output = outputs[0]
        logits = self.labels_classifier(sequence_output)

        loss = None

        if labels is not None:
            loss_fct = nn.CrossEntropyLoss()
            loss = loss_fct(logits.view(-1, self.num_label),
                                                labels.view(-1))

        return loss, logits

# polarity baseline
class ElectraBaseClassifier_Pola_Base(nn.Module):
    def __init__(self, num_label, len_tokenizer):
        super(ElectraBaseClassifier_Pola_Base, self).__init__()

        self.num_label = num_label
        self.electra = AutoModel.from_pretrained(base_model_elec)
        self.electra.resize_token_embeddings(len_tokenizer)

        self.labels_classifier = SimpleClassifier_Base(self.num_label)

    def forward(self, input_ids, attention_mask, labels=None):
        outputs = self.electra(
            input_ids=input_ids,
            attention_mask=attention_mask,
            token_type_ids=None
        )

        sequence_output = outputs[0]
        logits = self.labels_classifier(sequence_output)

        loss = None

        if labels is not None:
            loss_fct = nn.CrossEntropyLoss()
            loss = loss_fct(logits.view(-1, self.num_label),
                                                labels.view(-1))

        return loss, logits


#### only layer 추가

In [None]:
# category layer 추가
class ElectraBaseClassifier_Cate_Layer(nn.Module):
    def __init__(self, num_label, len_tokenizer):
        super(ElectraBaseClassifier_Cate_Layer, self).__init__()

        self.num_label = num_label
        self.electra = AutoModel.from_pretrained(base_model_elec)
        self.electra.resize_token_embeddings(len_tokenizer)

        self.labels_classifier = SimpleClassifier_Layer(self.num_label)

    def forward(self, input_ids, attention_mask, labels=None):
        outputs = self.electra(
            input_ids=input_ids,
            attention_mask=attention_mask,
            token_type_ids=None
        )

        sequence_output = outputs[0]
        logits = self.labels_classifier(sequence_output)

        loss = None

        if labels is not None:
            loss_fct = nn.CrossEntropyLoss()
            loss = loss_fct(logits.view(-1, self.num_label),
                                                labels.view(-1))

        return loss, logits

# polarity layer 추가
class ElectraBaseClassifier_Pola_Layer(nn.Module):
    def __init__(self, num_label, len_tokenizer):
        super(ElectraBaseClassifier_Pola_Layer, self).__init__()

        self.num_label = num_label
        self.electra = AutoModel.from_pretrained(base_model_elec)
        self.electra.resize_token_embeddings(len_tokenizer)

        self.labels_classifier = SimpleClassifier_Layer(self.num_label)

    def forward(self, input_ids, attention_mask, labels=None):
        outputs = self.electra(
            input_ids=input_ids,
            attention_mask=attention_mask,
            token_type_ids=None
        )

        sequence_output = outputs[0]
        logits = self.labels_classifier(sequence_output)

        loss = None

        if labels is not None:
            loss_fct = nn.CrossEntropyLoss()
            loss = loss_fct(logits.view(-1, self.num_label),
                                                labels.view(-1))

        return loss, logits


#### only dropout 0.5

In [None]:
# category dropout 0.5
class ElectraBaseClassifier_Cate_dr05(nn.Module):
    def __init__(self, num_label, len_tokenizer):
        super(ElectraBaseClassifier_Cate_dr05, self).__init__()

        self.num_label = num_label
        self.electra = AutoModel.from_pretrained(base_model_elec)
        self.electra.resize_token_embeddings(len_tokenizer)

        self.labels_classifier = SimpleClassifier_dr05(self.num_label)

    def forward(self, input_ids, attention_mask, labels=None):
        outputs = self.electra(
            input_ids=input_ids,
            attention_mask=attention_mask,
            token_type_ids=None
        )

        sequence_output = outputs[0]
        logits = self.labels_classifier(sequence_output)

        loss = None

        if labels is not None:
            loss_fct = nn.CrossEntropyLoss()
            loss = loss_fct(logits.view(-1, self.num_label),
                                                labels.view(-1))

        return loss, logits

# polarity dropout 0.5
class ElectraBaseClassifier_Pola_dr05(nn.Module):
    def __init__(self, num_label, len_tokenizer):
        super(ElectraBaseClassifier_Pola_dr05, self).__init__()

        self.num_label = num_label
        self.electra = AutoModel.from_pretrained(base_model_elec)
        self.electra.resize_token_embeddings(len_tokenizer)

        self.labels_classifier = SimpleClassifier_dr05(self.num_label)

    def forward(self, input_ids, attention_mask, labels=None):
        outputs = self.electra(
            input_ids=input_ids,
            attention_mask=attention_mask,
            token_type_ids=None
        )

        sequence_output = outputs[0]
        logits = self.labels_classifier(sequence_output)

        loss = None

        if labels is not None:
            loss_fct = nn.CrossEntropyLoss()
            loss = loss_fct(logits.view(-1, self.num_label),
                                                labels.view(-1))

        return loss, logits


#### layer + dropout 0.5

In [None]:
# category layer 추가 + dropout 0.5
class ElectraBaseClassifier_Cate_Layer_Dropout05(nn.Module):
    def __init__(self, num_label, len_tokenizer):
        super(ElectraBaseClassifier_Cate_Layer_Dropout05, self).__init__()

        self.num_label = num_label
        self.electra = AutoModel.from_pretrained(base_model_elec)
        self.electra.resize_token_embeddings(len_tokenizer)

        self.labels_classifier = SimpleClassifier_Layer_dr05(self.num_label)

    def forward(self, input_ids, attention_mask, labels=None):
        outputs = self.electra(
            input_ids=input_ids,
            attention_mask=attention_mask,
            token_type_ids=None
        )

        sequence_output = outputs[0]
        logits = self.labels_classifier(sequence_output)

        loss = None

        if labels is not None:
            loss_fct = nn.CrossEntropyLoss()
            loss = loss_fct(logits.view(-1, self.num_label),
                                                labels.view(-1))

        return loss, logits

# polarity layer 추가 + dropout 0.5
class ElectraBaseClassifier_Pola_Layer_Dropout05(nn.Module):
    def __init__(self, num_label, len_tokenizer):
        super(ElectraBaseClassifier_Pola_Layer_Dropout05, self).__init__()

        self.num_label = num_label
        self.electra = AutoModel.from_pretrained(base_model_elec)
        self.electra.resize_token_embeddings(len_tokenizer)

        self.labels_classifier = SimpleClassifier_Layer_dr05(self.num_label)

    def forward(self, input_ids, attention_mask, labels=None):
        outputs = self.electra(
            input_ids=input_ids,
            attention_mask=attention_mask,
            token_type_ids=None
        )

        sequence_output = outputs[0]
        logits = self.labels_classifier(sequence_output)

        loss = None

        if labels is not None:
            loss_fct = nn.CrossEntropyLoss()
            loss = loss_fct(logits.view(-1, self.num_label),
                                                labels.view(-1))

        return loss, logits

#### hidden_size_up

In [None]:
# category hidden_size_up
class ElectraBaseClassifier_Cate_hiddenup(nn.Module):
    def __init__(self, num_label, len_tokenizer):
        super(ElectraBaseClassifier_Cate_hiddenup, self).__init__()

        self.num_label = num_label
        self.electra = AutoModel.from_pretrained(base_model_elec)
        self.electra.resize_token_embeddings(len_tokenizer)

        self.labels_classifier = SimpleClassifier_Hidden_up(self.num_label)

    def forward(self, input_ids, attention_mask, labels=None):
        outputs = self.electra(
            input_ids=input_ids,
            attention_mask=attention_mask,
            token_type_ids=None
        )

        sequence_output = outputs[0]
        logits = self.labels_classifier(sequence_output)

        loss = None

        if labels is not None:
            loss_fct = nn.CrossEntropyLoss()
            loss = loss_fct(logits.view(-1, self.num_label),
                                                labels.view(-1))

        return loss, logits

# polarity hidden_size_up
class ElectraBaseClassifier_Pola_hiddenup(nn.Module):
    def __init__(self, num_label, len_tokenizer):
        super(ElectraBaseClassifier_Pola_hiddenup, self).__init__()

        self.num_label = num_label
        self.electra = AutoModel.from_pretrained(base_model_elec)
        self.electra.resize_token_embeddings(len_tokenizer)

        self.labels_classifier = SimpleClassifier_Hidden_up(self.num_label)

    def forward(self, input_ids, attention_mask, labels=None):
        outputs = self.electra(
            input_ids=input_ids,
            attention_mask=attention_mask,
            token_type_ids=None
        )

        sequence_output = outputs[0]
        logits = self.labels_classifier(sequence_output)

        loss = None

        if labels is not None:
            loss_fct = nn.CrossEntropyLoss()
            loss = loss_fct(logits.view(-1, self.num_label),
                                                labels.view(-1))

        return loss, logits


#### only hidden_size_down

In [None]:
# category hidden down
class ElectraBaseClassifier_Cate_hiddendown(nn.Module):
    def __init__(self, num_label, len_tokenizer):
        super(ElectraBaseClassifier_Cate_hiddendown, self).__init__()

        self.num_label = num_label
        self.electra = AutoModel.from_pretrained(base_model_elec)
        self.electra.resize_token_embeddings(len_tokenizer)

        self.labels_classifier = SimpleClassifier_Hidden_down(self.num_label)

    def forward(self, input_ids, attention_mask, labels=None):
        outputs = self.electra(
            input_ids=input_ids,
            attention_mask=attention_mask,
            token_type_ids=None
        )

        sequence_output = outputs[0]
        logits = self.labels_classifier(sequence_output)

        loss = None

        if labels is not None:
            loss_fct = nn.CrossEntropyLoss()
            loss = loss_fct(logits.view(-1, self.num_label),
                                                labels.view(-1))

        return loss, logits

# polarity hidden down
class ElectraBaseClassifier_Pola_hiddendown(nn.Module):
    def __init__(self, num_label, len_tokenizer):
        super(ElectraBaseClassifier_Pola_hiddendown, self).__init__()

        self.num_label = num_label
        self.electra = AutoModel.from_pretrained(base_model_elec)
        self.electra.resize_token_embeddings(len_tokenizer)

        self.labels_classifier = SimpleClassifier_Hidden_down(self.num_label)

    def forward(self, input_ids, attention_mask, labels=None):
        outputs = self.electra(
            input_ids=input_ids,
            attention_mask=attention_mask,
            token_type_ids=None
        )

        sequence_output = outputs[0]
        logits = self.labels_classifier(sequence_output)

        loss = None

        if labels is not None:
            loss_fct = nn.CrossEntropyLoss()
            loss = loss_fct(logits.view(-1, self.num_label),
                                                labels.view(-1))

        return loss, logits


#### hidden_size_down + dropout 0.5

In [None]:
# category hidden down + dropout 0.5
class ElectraBaseClassifier_Cate_hiddendown_dr05(nn.Module):
    def __init__(self, num_label, len_tokenizer):
        super(ElectraBaseClassifier_Cate_hiddendown_dr05, self).__init__()

        self.num_label = num_label
        self.electra = AutoModel.from_pretrained(base_model_elec)
        self.electra.resize_token_embeddings(len_tokenizer)

        self.labels_classifier = SimpleClassifier_Hidden_down_dr05(self.num_label)

    def forward(self, input_ids, attention_mask, labels=None):
        outputs = self.electra(
            input_ids=input_ids,
            attention_mask=attention_mask,
            token_type_ids=None
        )

        sequence_output = outputs[0]
        logits = self.labels_classifier(sequence_output)

        loss = None

        if labels is not None:
            loss_fct = nn.CrossEntropyLoss()
            loss = loss_fct(logits.view(-1, self.num_label),
                                                labels.view(-1))

        return loss, logits

# polarity hidden down + dropout 0.5
class ElectraBaseClassifier_Pola_hiddendown_dr05(nn.Module):
    def __init__(self, num_label, len_tokenizer):
        super(ElectraBaseClassifier_Pola_hiddendown_dr05, self).__init__()

        self.num_label = num_label
        self.electra = AutoModel.from_pretrained(base_model_elec)
        self.electra.resize_token_embeddings(len_tokenizer)

        self.labels_classifier = SimpleClassifier_Hidden_down_dr05(self.num_label)

    def forward(self, input_ids, attention_mask, labels=None):
        outputs = self.electra(
            input_ids=input_ids,
            attention_mask=attention_mask,
            token_type_ids=None
        )

        sequence_output = outputs[0]
        logits = self.labels_classifier(sequence_output)

        loss = None

        if labels is not None:
            loss_fct = nn.CrossEntropyLoss()
            loss = loss_fct(logits.view(-1, self.num_label),
                                                labels.view(-1))

        return loss, logits


## RoBERTa

In [None]:
# category baseline(roberta)
class RobertaBaseClassifier(nn.Module):
    def __init__(self, num_label, len_tokenizer):
        super(RobertaBaseClassifier, self).__init__()

        self.num_label = num_label
        self.electra = AutoModel.from_pretrained(base_model_roberta) 
        self.electra.resize_token_embeddings(len_tokenizer)

        self.labels_classifier = SimpleClassifier_Base(self.num_label)

    def forward(self, input_ids, attention_mask, labels=None):
        outputs = self.electra(
            input_ids=input_ids,
            attention_mask=attention_mask,
            token_type_ids=None
        )

        sequence_output = outputs[0]
        logits = self.labels_classifier(sequence_output)

        loss = None

        if labels is not None:
            loss_fct = nn.CrossEntropyLoss()
            loss = loss_fct(logits.view(-1, self.num_label),
                                                labels.view(-1))

        return loss, logits

## DeBERTa

In [None]:
# category baseline(deberta)
class DebertaBaseClassifier(nn.Module):
    def __init__(self, num_label, len_tokenizer):
        super(DebertaBaseClassifier, self).__init__()

        self.num_label = num_label
        self.deberta = AutoModel.from_pretrained(base_model_deberta)
        self.deberta.resize_token_embeddings(len_tokenizer)

        self.labels_classifier = SimpleClassifier_Base(self.num_label)

    def forward(self, input_ids, attention_mask, labels=None):
        outputs = self.deberta(
            input_ids=input_ids,
            attention_mask=attention_mask,
            token_type_ids=None
        )

        sequence_output = outputs[0]
        logits = self.labels_classifier(sequence_output)

        loss = None

        if labels is not None:
            loss_fct = nn.CrossEntropyLoss()
            loss = loss_fct(logits.view(-1, self.num_label),
                                                labels.view(-1))

        return loss, logits

# Model Test

## Inference

### base(ELECTRA)

In [None]:

def predict_from_korean_form_kelec(tokenizer_kelec, ce_model, pc_model, data):

    ce_model.to(device)
    ce_model.eval()
    for idx, sentence in enumerate(data):
        if idx % 10 == 0:
            print(idx, "/", len(data))

        form = sentence['sentence_form']
        sentence['annotation'] = []
        if type(form) != str:
            print("form type is arong: ", form)
            continue
        for pair in entity_property_pair:
            
            tokenized_data_kelec = tokenizer_kelec(form, pair, padding='max_length', max_length=256, truncation=True)
            input_ids_kelec = torch.tensor([tokenized_data_kelec['input_ids']]).to(device)
            attention_mask_kelec = torch.tensor([tokenized_data_kelec['attention_mask']]).to(device)

            with torch.no_grad():
                _, ce_logits = ce_model(input_ids_kelec, attention_mask_kelec)

            ce_predictions = torch.argmax(ce_logits, dim = -1)

            ce_result = tf_id_to_name[ce_predictions[0]]

            if ce_result == 'True':
                with torch.no_grad():
                    _, pc_logits = pc_model(input_ids_kelec, attention_mask_kelec)

                pc_predictions = torch.argmax(pc_logits, dim=-1)
                pc_result = polarity_id_to_name[pc_predictions[0]]

                sentence['annotation'].append([pair, pc_result])


    return data


### ELECTRA + Force

Force : 빈칸( '[ ]' 에 대해서 가장 높은 확률의 카테고리를 강제로 뽑아내는 방법 )

In [None]:

def predict_from_korean_form_kelec_forcing(tokenizer_kelec, ce_model, pc_model, data):

    ce_model.to(device)
    ce_model.eval()
    for idx, sentence in enumerate(data):
        if idx % 10 == 0:
            print(idx, "/", len(data))

        form = sentence['sentence_form']
        sentence['annotation'] = []
        if type(form) != str:
            print("form type is arong: ", form)
            continue


        
        tmp = []
        flag = False

        for pair in entity_property_pair:
            
            tokenized_data = tokenizer_kelec(form, pair, padding='max_length', max_length=256, truncation=True)

            input_ids = torch.tensor([tokenized_data['input_ids']]).to(device)
            attention_mask = torch.tensor([tokenized_data['attention_mask']]).to(device)


            with torch.no_grad():
                _, ce_logits = ce_model(input_ids, attention_mask)
                
                tmp.append( ce_logits[0][0] )

            ce_predictions = torch.argmax(ce_logits, dim = -1)
            
            ce_result = tf_id_to_name[ce_predictions[0]]


            if ce_result == 'True':
                flag = True
                with torch.no_grad():
                    _, pc_logits = pc_model(input_ids, attention_mask)

                pc_predictions = torch.argmax(pc_logits, dim=-1)
                pc_result = polarity_id_to_name[pc_predictions[0]]

                sentence['annotation'].append([pair, pc_result])
        
        if flag == False:

            tmp = torch.tensor(tmp)

            pair = entity_property_pair[torch.argmax(tmp)]

            with torch.no_grad():
                _, pc_logits = pc_model(input_ids, attention_mask)

            pc_predictions = torch.argmax(pc_logits, dim=-1)
            pc_result = polarity_id_to_name[pc_predictions[0]]

            sentence['annotation'].append([pair, pc_result])
                

    return data

### category : RoBERTa / polarity : ELECTRA

In [None]:

def predict_from_korean_form_roberta(tokenizer_roberta, tokenizer_kelec, ce_model, pc_model, data):

    ce_model.to(device)
    ce_model.eval()
    for idx, sentence in enumerate(data):
        if idx % 10 == 0:
            print(idx, "/", len(data))

        form = sentence['sentence_form']
        sentence['annotation'] = []
        if type(form) != str:
            print("form type is arong: ", form)
            continue
        for pair in entity_property_pair:
            

            tokenized_data_roberta = tokenizer_roberta(form, pair, padding='max_length', max_length=514, truncation=True)
            tokenized_data_kelec = tokenizer_kelec(form, pair, padding='max_length', max_length=256, truncation=True)

            input_ids_roberta = torch.tensor([tokenized_data_roberta['input_ids']]).to(device)
            attention_mask_roberta = torch.tensor([tokenized_data_roberta['attention_mask']]).to(device)

            input_ids_kelec = torch.tensor([tokenized_data_kelec['input_ids']]).to(device)
            attention_mask_kelec = torch.tensor([tokenized_data_kelec['attention_mask']]).to(device)

            with torch.no_grad():
                _, ce_logits = ce_model(input_ids_roberta, attention_mask_roberta)

            ce_predictions = torch.argmax(ce_logits, dim = -1)

            ce_result = tf_id_to_name[ce_predictions[0]]

            if ce_result == 'True':
                with torch.no_grad():
                    _, pc_logits = pc_model(input_ids_kelec, attention_mask_kelec)

                pc_predictions = torch.argmax(pc_logits, dim=-1)
                pc_result = polarity_id_to_name[pc_predictions[0]]

                sentence['annotation'].append([pair, pc_result])


    return data


### category : DeBERTa / polarity : ELECTRA 

In [None]:
def predict_from_korean_form_deberta(tokenizer_deberta, tokenizer_kelec, ce_model, pc_model, data):

    ce_model.to(device)
    ce_model.eval()
    for idx, sentence in enumerate(data):
        if idx % 10 == 0:
            print(idx, "/", len(data))

        form = sentence['sentence_form']
        sentence['annotation'] = []
        if type(form) != str:
            print("form type is arong: ", form)
            continue
        for pair in entity_property_pair:
            

            tokenized_data_deberta = tokenizer_deberta(form, pair, padding='max_length', max_length=256, truncation=True)
            tokenized_data_kelec = tokenizer_kelec(form, pair, padding='max_length', max_length=256, truncation=True)

            input_ids_deberta = torch.tensor([tokenized_data_deberta['input_ids']]).to(device)
            attention_mask_deberta = torch.tensor([tokenized_data_deberta['attention_mask']]).to(device)

            input_ids_kelec = torch.tensor([tokenized_data_kelec['input_ids']]).to(device)
            attention_mask_kelec = torch.tensor([tokenized_data_kelec['attention_mask']]).to(device)

            with torch.no_grad():
                _, ce_logits = ce_model(input_ids_deberta, attention_mask_deberta)

            ce_predictions = torch.argmax(ce_logits, dim = -1)

            ce_result = tf_id_to_name[ce_predictions[0]]

            if ce_result == 'True':
                with torch.no_grad():
                    _, pc_logits = pc_model(input_ids_kelec, attention_mask_kelec)

                pc_predictions = torch.argmax(pc_logits, dim=-1)
                pc_result = polarity_id_to_name[pc_predictions[0]]

                sentence['annotation'].append([pair, pc_result])


    return data


### category : DeBERTa / polarity : ELECTRA + Force

Force : 빈칸( '[ ]' 에 대해서 가장 높은 확률의 카테고리를 강제로 뽑아내는 방법 )

In [None]:
def predict_from_korean_form_deberta_forcing(tokenizer_deberta, tokenizer_kelec, ce_model, pc_model, data):

    ce_model.to(device)
    ce_model.eval()
    for idx, sentence in enumerate(data):
        if idx % 10 == 0:
            print(idx, "/", len(data))

        form = sentence['sentence_form']
        sentence['annotation'] = []
        if type(form) != str:
            print("form type is arong: ", form)
            continue


        tmp = []
        flag = False

        for pair in entity_property_pair:
            
            tokenized_data_kelec = tokenizer_kelec(form, pair, padding='max_length', max_length=256, truncation=True)
            tokenized_data_deberta = tokenizer_deberta(form, pair, padding='max_length', max_length=514, truncation=True)

            input_ids_kelec = torch.tensor([tokenized_data_kelec['input_ids']]).to(device)
            attention_mask_kelec = torch.tensor([tokenized_data_kelec['attention_mask']]).to(device)

            input_ids_deberta = torch.tensor([tokenized_data_deberta['input_ids']]).to(device)
            attention_mask_deberta = torch.tensor([tokenized_data_deberta['attention_mask']]).to(device)


            with torch.no_grad():
                _, ce_logits = ce_model(input_ids_deberta, attention_mask_deberta)
                
                tmp.append( ce_logits[0][0] )

            ce_predictions = torch.argmax(ce_logits, dim = -1)
            
            ce_result = tf_id_to_name[ce_predictions[0]]


            if ce_result == 'True':
                flag = True
                with torch.no_grad():
                    _, pc_logits = pc_model(input_ids_kelec, attention_mask_kelec)

                pc_predictions = torch.argmax(pc_logits, dim=-1)
                pc_result = polarity_id_to_name[pc_predictions[0]]

                sentence['annotation'].append([pair, pc_result])
        
        if flag == False:

            tmp = torch.tensor(tmp)

            pair = entity_property_pair[torch.argmax(tmp)]

            with torch.no_grad():
                _, pc_logits = pc_model(input_ids_kelec, attention_mask_kelec)

            pc_predictions = torch.argmax(pc_logits, dim=-1)
            pc_result = polarity_id_to_name[pc_predictions[0]]

            sentence['annotation'].append([pair, pc_result])
                

    return data

### ELECTRA + Threshold
확률 기반으로 annotation을 확실한 것만 가져온다

In [None]:

def predict_from_korean_form_kelec_threshold(tokenizer_kelec, ce_model, pc_model, data):

    ce_model.to(device)
    ce_model.eval()
    for sentence in data:
        form = sentence['sentence_form']
        sentence['annotation'] = []
        if type(form) != str:
            print("form type is arong: ", form)
            continue
        for pair in entity_property_pair:
            
            tokenized_data_kelec = tokenizer_kelec(form, pair, padding='max_length', max_length=256, truncation=True)
            input_ids_kelec = torch.tensor([tokenized_data_kelec['input_ids']]).to(device)
            attention_mask_kelec = torch.tensor([tokenized_data_kelec['attention_mask']]).to(device)

            with torch.no_grad():
                _, ce_logits = ce_model(input_ids_kelec, attention_mask_kelec)
                
            if ce_logits[0][0] > 1.75 :    
                ce_predictions = torch.argmax(ce_logits, dim = -1)

                ce_result = tf_id_to_name[ce_predictions[0]]

                if ce_result == 'True':
                    with torch.no_grad():
                        _, pc_logits = pc_model(input_ids_kelec, attention_mask_kelec)

                    pc_predictions = torch.argmax(pc_logits, dim=-1)
                    pc_result = polarity_id_to_name[pc_predictions[0]]

                    sentence['annotation'].append([pair, pc_result])


    return data


# Pipeline(Prediction)

재현용 코드 파일로 위에서 부터 차례대로 실행시키면 된다.

## roberta_gpu16_가15

In [None]:
#roberta_Gpu_16 가45
batch_size = 32
def test_sentiment_analysis():


    tokenizer_roberta = AutoTokenizer.from_pretrained(base_model_roberta)
    tokenizer_kelec = AutoTokenizer.from_pretrained(base_model_elec)

    num_added_toks_roberta = tokenizer_roberta.add_special_tokens(special_tokens_dict)
    num_added_toks_kelec = tokenizer_kelec.add_special_tokens(special_tokens_dict)

    test_data = jsonlload(test_data_path)

    entity_property_test_data_roberta, _ = get_dataset(test_data, tokenizer_roberta, max_len_robe)
    _, polarity_test_data_kelec = get_dataset(test_data, tokenizer_kelec, max_len_elec)

    entity_property_test_dataloader = DataLoader(entity_property_test_data_roberta, shuffle=True,
                                batch_size=batch_size)

    polarity_test_dataloader = DataLoader(polarity_test_data_kelec, shuffle=True,
                                                  batch_size=batch_size)
    
    model = RobertaBaseClassifier(len(tf_id_to_name), len(tokenizer_roberta))
    model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/roberta_Gpuragi_epoch_16.pt", map_location=device))
    model.to(device)
    model.eval()
            
    polarity_model = ElectraBaseClassifier_Pola_Base(len(polarity_id_to_name), len(tokenizer_kelec))
    polarity_model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/kelectra_polarity_epoch_45.pt"))
    polarity_model.to(device)
    polarity_model.eval()

    pred_data = predict_from_korean_form_roberta(tokenizer_roberta, tokenizer_kelec, model, polarity_model, copy.deepcopy(test_data))
    df = {'id' : [], 'sentence_form' : [], 'annotation' : []}

    for i in range(len(pred_data)) :
        df['id'].append(pred_data[i]['id'])
        df['sentence_form'].append(pred_data[i]['sentence_form'])
        df['annotation'].append(pred_data[i]['annotation'])
    df_pred = pd.DataFrame(df)

    with open('/content/Korean_ABSA/jsonl_files/roberta_Gpu_16.jsonl', 'w') as file:
        for i in range( len(df_pred) ):
            tmp = str(df_pred['annotation'][i]).replace("\'", "\"").replace('None', 'null')
            file.write(  '{'+'\"id\": \"nikluge-sa-2022-test-{0}\", \"sentence_form\": \"{1}\", \"annotation\": {2}'\
                .format( str(i+1).zfill(5)  ,   df_pred['sentence_form'][i], tmp ) +'}' ) 
            file.write("\n")

    return df_pred

In [None]:
test_sentiment_analysis() 

## roberta_Gpu_15 가45

In [None]:
#roberta_Gpu_15 가45
batch_size = 32
def test_sentiment_analysis():


    tokenizer_roberta = AutoTokenizer.from_pretrained(base_model_roberta)
    tokenizer_kelec = AutoTokenizer.from_pretrained(base_model_elec)

    num_added_toks_roberta = tokenizer_roberta.add_special_tokens(special_tokens_dict)
    num_added_toks_kelec = tokenizer_kelec.add_special_tokens(special_tokens_dict)

    test_data = jsonlload(test_data_path)

    entity_property_test_data_roberta, _ = get_dataset(test_data, tokenizer_roberta, max_len_robe)
    _, polarity_test_data_kelec = get_dataset(test_data, tokenizer_kelec, max_len_elec)

    entity_property_test_dataloader = DataLoader(entity_property_test_data_roberta, shuffle=True,
                                batch_size=batch_size)

    polarity_test_dataloader = DataLoader(polarity_test_data_kelec, shuffle=True,
                                                  batch_size=batch_size)
    
    model = RobertaBaseClassifier(len(tf_id_to_name), len(tokenizer_roberta))
    model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/roberta_Gpuragi_epoch_15.pt", map_location=device))
    model.to(device)
    model.eval()
            
    polarity_model = ElectraBaseClassifier_Pola_Base(len(polarity_id_to_name), len(tokenizer_kelec))
    polarity_model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/kelectra_polarity_epoch_45.pt"))
    polarity_model.to(device)
    polarity_model.eval()

    pred_data = predict_from_korean_form_roberta(tokenizer_roberta, tokenizer_kelec, model, polarity_model, copy.deepcopy(test_data))

    df = {'id' : [], 'sentence_form' : [], 'annotation' : []}

    for i in range(len(pred_data)) :
        df['id'].append(pred_data[i]['id'])
        df['sentence_form'].append(pred_data[i]['sentence_form'])
        df['annotation'].append(pred_data[i]['annotation'])
    df_pred = pd.DataFrame(df)

    with open('/content/Korean_ABSA/jsonl_files/roberta_Gpu_15.jsonl', 'w') as file:
        for i in range( len(df_pred) ):
            tmp = str(df_pred['annotation'][i]).replace("\'", "\"").replace('None', 'null')
            file.write(  '{'+'\"id\": \"nikluge-sa-2022-test-{0}\", \"sentence_form\": \"{1}\", \"annotation\": {2}'\
                .format( str(i+1).zfill(5)  ,   df_pred['sentence_form'][i], tmp ) +'}' ) 
            file.write("\n")

    return df_pred

In [None]:
test_sentiment_analysis() 

## deberta_Gpu_7 가45

In [None]:
#deberta_Gpu_7 가45
batch_size = 32
def test_sentiment_analysis():

    tokenizer_deberta = AutoTokenizer.from_pretrained(base_model_deberta)
    tokenizer_kelec = AutoTokenizer.from_pretrained(base_model_elec)
    num_added_toks_deberta = tokenizer_deberta.add_special_tokens(special_tokens_dict)
    num_added_toks_kelec = tokenizer_kelec.add_special_tokens(special_tokens_dict)

    test_data = jsonlload(test_data_path)

    entity_property_test_data_deberta, _ = get_dataset(test_data, tokenizer_deberta, max_len_debe)
    _, polarity_test_data_kelec = get_dataset(test_data, tokenizer_kelec, max_len_elec)

    entity_property_test_dataloader = DataLoader(entity_property_test_data_deberta, shuffle=True,
                                batch_size=batch_size)
    polarity_test_dataloader = DataLoader(polarity_test_data_kelec, shuffle=True,
                                                  batch_size=batch_size)
    
    model = DebertaBaseClassifier(len(tf_id_to_name), len(tokenizer_deberta))
    model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/deberta_Gpu_epoch_7.pt", map_location=device))
    model.to(device)
    model.eval()
            
    polarity_model = ElectraBaseClassifier_Pola_Base(len(polarity_id_to_name), len(tokenizer_kelec))
    polarity_model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/kelectra_polarity_epoch_45.pt"))
    polarity_model.to(device)
    polarity_model.eval()

    pred_data = predict_from_korean_form_deberta(tokenizer_deberta, tokenizer_kelec, model, polarity_model, copy.deepcopy(test_data))

    df = {'id' : [], 'sentence_form' : [], 'annotation' : []}

    for i in range(len(pred_data)) :
        df['id'].append(pred_data[i]['id'])
        df['sentence_form'].append(pred_data[i]['sentence_form'])
        df['annotation'].append(pred_data[i]['annotation'])
    df_pred = pd.DataFrame(df)

    with open('/content/Korean_ABSA/jsonl_files/deberta_Gpu_7.jsonl', 'w') as file:
        for i in range( len(df_pred) ):
            tmp = str(df_pred['annotation'][i]).replace("\'", "\"").replace('None', 'null')
            file.write(  '{'+'\"id\": \"nikluge-sa-2022-test-{0}\", \"sentence_form\": \"{1}\", \"annotation\": {2}'\
                .format( str(i+1).zfill(5)  ,   df_pred['sentence_form'][i], tmp ) +'}' ) 
            file.write("\n")

    return df_pred

In [None]:
test_sentiment_analysis() 

## deberta_Gpu_14 가45

In [None]:
#deberta_Gpu_14 가45
batch_size = 32
def test_sentiment_analysis():

    tokenizer_deberta = AutoTokenizer.from_pretrained(base_model_deberta)
    tokenizer_kelec = AutoTokenizer.from_pretrained(base_model_elec)
    num_added_toks_deberta = tokenizer_deberta.add_special_tokens(special_tokens_dict)
    num_added_toks_kelec = tokenizer_kelec.add_special_tokens(special_tokens_dict)

    test_data = jsonlload(test_data_path)

    entity_property_test_data_deberta, _ = get_dataset(test_data, tokenizer_deberta, max_len_debe)
    _, polarity_test_data_kelec = get_dataset(test_data, tokenizer_kelec, max_len_elec)

    entity_property_test_dataloader = DataLoader(entity_property_test_data_deberta, shuffle=True,
                                batch_size=batch_size)
    polarity_test_dataloader = DataLoader(polarity_test_data_kelec, shuffle=True,
                                                  batch_size=batch_size)
    
    model = DebertaBaseClassifier(len(tf_id_to_name), len(tokenizer_deberta))
    model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/deberta_Gpu_epoch_14.pt", map_location=device))
    model.to(device)
    model.eval()
            
    polarity_model = ElectraBaseClassifier_Pola_Base(len(polarity_id_to_name), len(tokenizer_kelec))
    polarity_model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/kelectra_polarity_epoch_45.pt"))
    polarity_model.to(device)
    polarity_model.eval()

    pred_data = predict_from_korean_form_deberta(tokenizer_deberta, tokenizer_kelec, model, polarity_model, copy.deepcopy(test_data))

    df = {'id' : [], 'sentence_form' : [], 'annotation' : []}

    for i in range(len(pred_data)) :
        df['id'].append(pred_data[i]['id'])
        df['sentence_form'].append(pred_data[i]['sentence_form'])
        df['annotation'].append(pred_data[i]['annotation'])
    df_pred = pd.DataFrame(df)

    with open('/content/Korean_ABSA/jsonl_files/deberta_Gpu_14.jsonl', 'w') as file:
        for i in range( len(df_pred) ):
            tmp = str(df_pred['annotation'][i]).replace("\'", "\"").replace('None', 'null')
            file.write(  '{'+'\"id\": \"nikluge-sa-2022-test-{0}\", \"sentence_form\": \"{1}\", \"annotation\": {2}'\
                .format( str(i+1).zfill(5)  ,   df_pred['sentence_form'][i], tmp ) +'}' ) 
            file.write("\n")

    return df_pred

In [None]:
test_sentiment_analysis() 

## deberta 7F 가45

In [None]:
#deberta 7F 가45
batch_size = 32
def test_sentiment_analysis():

    tokenizer_deberta = AutoTokenizer.from_pretrained(base_model_deberta)
    tokenizer_kelec = AutoTokenizer.from_pretrained(base_model_elec)
    num_added_toks_deberta = tokenizer_deberta.add_special_tokens(special_tokens_dict)
    num_added_toks_kelec = tokenizer_kelec.add_special_tokens(special_tokens_dict)

    test_data = jsonlload(test_data_path)

    entity_property_test_data_deberta, _ = get_dataset(test_data, tokenizer_deberta, max_len_debe)
    _, polarity_test_data_kelec = get_dataset(test_data, tokenizer_kelec, max_len_elec)

    entity_property_test_dataloader = DataLoader(entity_property_test_data_deberta, shuffle=True,
                                batch_size=batch_size)
    polarity_test_dataloader = DataLoader(polarity_test_data_kelec, shuffle=True,
                                                  batch_size=batch_size)
    
    model = DebertaBaseClassifier(len(tf_id_to_name), len(tokenizer_deberta))
    model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/deberta_Gpu_epoch_7.pt", map_location=device))
    model.to(device)
    model.eval()
            
    polarity_model = ElectraBaseClassifier_Pola_Base(len(polarity_id_to_name), len(tokenizer_kelec))
    polarity_model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/kelectra_polarity_epoch_45.pt"))
    polarity_model.to(device)
    polarity_model.eval()

    pred_data = predict_from_korean_form_deberta_forcing(tokenizer_deberta, tokenizer_kelec, model, polarity_model, copy.deepcopy(test_data))

    df = {'id' : [], 'sentence_form' : [], 'annotation' : []}

    for i in range(len(pred_data)) :
        df['id'].append(pred_data[i]['id'])
        df['sentence_form'].append(pred_data[i]['sentence_form'])
        df['annotation'].append(pred_data[i]['annotation'])
    df_pred = pd.DataFrame(df)

    with open('/content/Korean_ABSA/jsonl_files/deberta_Gpu_7F.jsonl', 'w') as file:
        for i in range( len(df_pred) ):
            tmp = str(df_pred['annotation'][i]).replace("\'", "\"").replace('None', 'null')
            file.write(  '{'+'\"id\": \"nikluge-sa-2022-test-{0}\", \"sentence_form\": \"{1}\", \"annotation\": {2}'\
                .format( str(i+1).zfill(5)  ,   df_pred['sentence_form'][i], tmp ) +'}' ) 
            file.write("\n")

    return df_pred

In [None]:
test_sentiment_analysis() 

## deberta 20F 가45

In [None]:
#deberta 20F 가45
batch_size = 32
def test_sentiment_analysis():

    tokenizer_deberta = AutoTokenizer.from_pretrained(base_model_deberta)
    tokenizer_kelec = AutoTokenizer.from_pretrained(base_model_elec)
    num_added_toks_deberta = tokenizer_deberta.add_special_tokens(special_tokens_dict)
    num_added_toks_kelec = tokenizer_kelec.add_special_tokens(special_tokens_dict)

    test_data = jsonlload(test_data_path)

    entity_property_test_data_deberta, _ = get_dataset(test_data, tokenizer_deberta, max_len_debe)
    _, polarity_test_data_kelec = get_dataset(test_data, tokenizer_kelec, max_len_elec)

    entity_property_test_dataloader = DataLoader(entity_property_test_data_deberta, shuffle=True,
                                batch_size=batch_size)
    polarity_test_dataloader = DataLoader(polarity_test_data_kelec, shuffle=True,
                                                  batch_size=batch_size)
    
    model = DebertaBaseClassifier(len(tf_id_to_name), len(tokenizer_deberta))
    model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/deberta_Gpu_epoch_20.pt", map_location=device))
    model.to(device)
    model.eval()
            
    polarity_model = ElectraBaseClassifier_Pola_Base(len(polarity_id_to_name), len(tokenizer_kelec))
    polarity_model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/kelectra_polarity_epoch_45.pt"))
    polarity_model.to(device)
    polarity_model.eval()

    pred_data = predict_from_korean_form_deberta_forcing(tokenizer_deberta, tokenizer_kelec, model, polarity_model, copy.deepcopy(test_data))

    df = {'id' : [], 'sentence_form' : [], 'annotation' : []}

    for i in range(len(pred_data)) :
        df['id'].append(pred_data[i]['id'])
        df['sentence_form'].append(pred_data[i]['sentence_form'])
        df['annotation'].append(pred_data[i]['annotation'])
    df_pred = pd.DataFrame(df)

    with open('/content/Korean_ABSA/jsonl_files/deberta_Gpu_20F.jsonl', 'w') as file:
        for i in range( len(df_pred) ):
            tmp = str(df_pred['annotation'][i]).replace("\'", "\"").replace('None', 'null')
            file.write(  '{'+'\"id\": \"nikluge-sa-2022-test-{0}\", \"sentence_form\": \"{1}\", \"annotation\": {2}'\
                .format( str(i+1).zfill(5)  ,   df_pred['sentence_form'][i], tmp ) +'}' ) 
            file.write("\n")

    return df_pred

In [None]:
test_sentiment_analysis() 

## deberta8 가45

In [None]:
#deberta8 가45
batch_size = 32
def test_sentiment_analysis():

    tokenizer_deberta = AutoTokenizer.from_pretrained(base_model_deberta)
    tokenizer_kelec = AutoTokenizer.from_pretrained(base_model_elec)
    num_added_toks_deberta = tokenizer_deberta.add_special_tokens(special_tokens_dict)
    num_added_toks_kelec = tokenizer_kelec.add_special_tokens(special_tokens_dict)

    test_data = jsonlload(test_data_path)

    entity_property_test_data_deberta, _ = get_dataset(test_data, tokenizer_deberta, max_len_debe)
    _, polarity_test_data_kelec = get_dataset(test_data, tokenizer_kelec, max_len_elec)

    entity_property_test_dataloader = DataLoader(entity_property_test_data_deberta, shuffle=True,
                                batch_size=batch_size)
    polarity_test_dataloader = DataLoader(polarity_test_data_kelec, shuffle=True,
                                                  batch_size=batch_size)
    
    model = DebertaBaseClassifier(len(tf_id_to_name), len(tokenizer_deberta))
    model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/deberta_Gpu_epoch_8.pt", map_location=device))
    model.to(device)
    model.eval()
            
    polarity_model = ElectraBaseClassifier_Pola_Base(len(polarity_id_to_name), len(tokenizer_kelec))
    polarity_model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/kelectra_polarity_epoch_45.pt"))
    polarity_model.to(device)
    polarity_model.eval()

    pred_data = predict_from_korean_form_deberta(tokenizer_deberta, tokenizer_kelec, model, polarity_model, copy.deepcopy(test_data))

    df = {'id' : [], 'sentence_form' : [], 'annotation' : []}

    for i in range(len(pred_data)) :
        df['id'].append(pred_data[i]['id'])
        df['sentence_form'].append(pred_data[i]['sentence_form'])
        df['annotation'].append(pred_data[i]['annotation'])
    df_pred = pd.DataFrame(df)

    with open('/content/Korean_ABSA/jsonl_files/deberta_Gpu_8.jsonl', 'w') as file:
        for i in range( len(df_pred) ):
            tmp = str(df_pred['annotation'][i]).replace("\'", "\"").replace('None', 'null')
            file.write(  '{'+'\"id\": \"nikluge-sa-2022-test-{0}\", \"sentence_form\": \"{1}\", \"annotation\": {2}'\
                .format( str(i+1).zfill(5)  ,   df_pred['sentence_form'][i], tmp ) +'}' ) 
            file.write("\n")

    return df_pred

In [None]:
test_sentiment_analysis() 

## roberta_gpu9 가45

In [None]:
#roberta_gpu9 가45
batch_size = 32
def test_sentiment_analysis():

    tokenizer_roberta = AutoTokenizer.from_pretrained(base_model_roberta)
    tokenizer_kelec = AutoTokenizer.from_pretrained(base_model_elec)
    num_added_toks_roberta = tokenizer_roberta.add_special_tokens(special_tokens_dict)
    num_added_toks_kelec = tokenizer_kelec.add_special_tokens(special_tokens_dict)

    test_data = jsonlload(test_data_path)

    entity_property_test_data_roberta, _ = get_dataset(test_data, tokenizer_roberta, max_len_robe)
    _, polarity_test_data_kelec = get_dataset(test_data, tokenizer_kelec, max_len_elec)

    entity_property_test_dataloader = DataLoader(entity_property_test_data_roberta, shuffle=True,
                                batch_size=batch_size)
    polarity_test_dataloader = DataLoader(polarity_test_data_kelec, shuffle=True,
                                                  batch_size=batch_size)
    
    model = RobertaBaseClassifier(len(tf_id_to_name), len(tokenizer_roberta))
    model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/roberta_Gpuragi_epoch_9.pt", map_location=device))
    model.to(device)
    model.eval()
            
    polarity_model = ElectraBaseClassifier_Pola_Base(len(polarity_id_to_name), len(tokenizer_kelec))
    polarity_model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/kelectra_polarity_epoch_45.pt"))
    polarity_model.to(device)
    polarity_model.eval()

    pred_data = predict_from_korean_form_roberta(tokenizer_roberta, tokenizer_kelec, model, polarity_model, copy.deepcopy(test_data))

    df = {'id' : [], 'sentence_form' : [], 'annotation' : []}

    for i in range(len(pred_data)) :
        df['id'].append(pred_data[i]['id'])
        df['sentence_form'].append(pred_data[i]['sentence_form'])
        df['annotation'].append(pred_data[i]['annotation'])
    df_pred = pd.DataFrame(df)

    with open('/content/Korean_ABSA/jsonl_files/roberta_Gpu_9.jsonl', 'w') as file:
        for i in range( len(df_pred) ):
            tmp = str(df_pred['annotation'][i]).replace("\'", "\"").replace('None', 'null')
            file.write(  '{'+'\"id\": \"nikluge-sa-2022-test-{0}\", \"sentence_form\": \"{1}\", \"annotation\": {2}'\
                .format( str(i+1).zfill(5)  ,   df_pred['sentence_form'][i], tmp ) +'}' ) 
            file.write("\n")

    return df_pred

In [None]:
test_sentiment_analysis() 

## kelec Gpu dr05 7 가45

In [None]:
# kelec Gpu dr05 7 가45
batch_size = 32
def test_sentiment_analysis():


    tokenizer = AutoTokenizer.from_pretrained(base_model_elec)
    num_added_toks = tokenizer.add_special_tokens(special_tokens_dict)
    test_data = jsonlload(test_data_path)

    entity_property_test_data, polarity_test_data = get_dataset(test_data, tokenizer, max_len_elec)

    entity_property_test_dataloader = DataLoader(entity_property_test_data, shuffle=True,
                                batch_size=batch_size)

    polarity_test_dataloader = DataLoader(polarity_test_data, shuffle=True,
                                                  batch_size=batch_size)
    
    model = ElectraBaseClassifier_Cate_dr05(len(tf_id_to_name), len(tokenizer))
    model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/kelec_cate_Gpu_data_drop0_5_epoch_7.pt"))
    model.to(device)
    model.eval()
            
    polarity_model = ElectraBaseClassifier_Pola_Base(len(polarity_id_to_name), len(tokenizer))
    polarity_model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/kelectra_polarity_epoch_45.pt"))
    polarity_model.to(device)
    polarity_model.eval()

    pred_data = predict_from_korean_form_kelec(tokenizer, model, polarity_model, copy.deepcopy(test_data))

    df = {'id' : [], 'sentence_form' : [], 'annotation' : []}

    for i in range(len(pred_data)) :
        df['id'].append(pred_data[i]['id'])
        df['sentence_form'].append(pred_data[i]['sentence_form'])
        df['annotation'].append(pred_data[i]['annotation'])
    df_pred = pd.DataFrame(df)

    with open('/content/Korean_ABSA/jsonl_files/kelec_Gpu_drop05_7.jsonl',  'w') as file:
        for i in range( len(df_pred) ):
            annos = df_pred['annotation'][i]
            str_annos = str(annos)
            tmp = str_annos.replace("None", "null").replace("\'", "\"")

            file.write(  '{'+'\"id\": \"nikluge-sa-2022-test-{0}\", \"sentence_form\": \"{1}\", \"annotation\": {2}'\
                        .format( str(i+1).zfill(5)  ,   df_pred['sentence_form'][i], tmp) +'}' )
            file.write("\n")


    return df_pred

In [None]:
test_sentiment_analysis() 

## gtgr layer dr05 12 가45

In [None]:
#gtgr layer dr05 12 가45
batch_size = 32
def test_sentiment_analysis():


    tokenizer = AutoTokenizer.from_pretrained(base_model_elec)
    num_added_toks = tokenizer.add_special_tokens(special_tokens_dict)
    test_data = jsonlload(test_data_path)

    entity_property_test_data, polarity_test_data = get_dataset(test_data, tokenizer, max_len_elec)

    entity_property_test_dataloader = DataLoader(entity_property_test_data, shuffle=True,
                                batch_size=batch_size)

    polarity_test_dataloader = DataLoader(polarity_test_data, shuffle=True,
                                                  batch_size=batch_size)
    
    model = ElectraBaseClassifier_Cate_Layer_Dropout05(len(tf_id_to_name), len(tokenizer))
    model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/G_Integral_kElectra_layer_dropout_epoch_12.pt"))
    model.to(device)
    model.eval()
            
    polarity_model = ElectraBaseClassifier_Pola_Base(len(polarity_id_to_name), len(tokenizer))
    polarity_model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/kelectra_polarity_epoch_45.pt"))
    polarity_model.to(device)
    polarity_model.eval()

    pred_data = predict_from_korean_form_kelec(tokenizer, model, polarity_model, copy.deepcopy(test_data))

    df = {'id' : [], 'sentence_form' : [], 'annotation' : []}

    for i in range(len(pred_data)) :
        df['id'].append(pred_data[i]['id'])
        df['sentence_form'].append(pred_data[i]['sentence_form'])
        df['annotation'].append(pred_data[i]['annotation'])
    df_pred = pd.DataFrame(df)
    with open('/content/Korean_ABSA/jsonl_files/kelec_Gpu_itgr_drop_layer_12.jsonl',  'w') as file:
        for i in range( len(df_pred) ):
            annos = df_pred['annotation'][i]
            str_annos = str(annos)
            tmp = str_annos.replace("None", "null").replace("\'", "\"")

            file.write(  '{'+'\"id\": \"nikluge-sa-2022-test-{0}\", \"sentence_form\": \"{1}\", \"annotation\": {2}'\
                        .format( str(i+1).zfill(5)  ,   df_pred['sentence_form'][i], tmp) +'}' )
            file.write("\n")

    return df_pred

In [None]:
test_sentiment_analysis() 

## kelec_Gpu_itgr_drop05_8_layer

In [None]:
#kelec_Gpu_itgr_drop05_8_layer
batch_size = 32
def test_sentiment_analysis():


    tokenizer = AutoTokenizer.from_pretrained(base_model_elec)
    num_added_toks = tokenizer.add_special_tokens(special_tokens_dict)
    test_data = jsonlload(test_data_path)

    entity_property_test_data, polarity_test_data = get_dataset(test_data, tokenizer, max_len_elec)

    entity_property_test_dataloader = DataLoader(entity_property_test_data, shuffle=True,
                                batch_size=batch_size)

    polarity_test_dataloader = DataLoader(polarity_test_data, shuffle=True,
                                                  batch_size=batch_size)
    
    model = ElectraBaseClassifier_Cate_Layer_Dropout05(len(tf_id_to_name), len(tokenizer))
    model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/G_Integral_kElectra_layer_dropout_epoch_8.pt"))
    model.to(device)
    model.eval()
            
    polarity_model = ElectraBaseClassifier_Pola_Base(len(polarity_id_to_name), len(tokenizer))
    polarity_model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/kelectra_polarity_epoch_45.pt"))
    polarity_model.to(device)
    polarity_model.eval()

    pred_data = predict_from_korean_form_kelec(tokenizer, model, polarity_model, copy.deepcopy(test_data))

    df = {'id' : [], 'sentence_form' : [], 'annotation' : []}

    for i in range(len(pred_data)) :
        df['id'].append(pred_data[i]['id'])
        df['sentence_form'].append(pred_data[i]['sentence_form'])
        df['annotation'].append(pred_data[i]['annotation'])
    df_pred = pd.DataFrame(df)
    with open('/content/Korean_ABSA/jsonl_files/kelec_Gpu_itgr_drop05_8_layer.jsonl',  'w') as file:
        for i in range( len(df_pred) ):
            annos = df_pred['annotation'][i]
            str_annos = str(annos)
            tmp = str_annos.replace("None", "null").replace("\'", "\"")

            file.write(  '{'+'\"id\": \"nikluge-sa-2022-test-{0}\", \"sentence_form\": \"{1}\", \"annotation\": {2}'\
                        .format( str(i+1).zfill(5)  ,   df_pred['sentence_form'][i], tmp) +'}' )
            file.write("\n")

    return df_pred

In [None]:
test_sentiment_analysis() 

## kelec_Gpu_itgr_drop05_9_layer

In [None]:
#kelec_Gpu_itgr_drop05_9_layer
batch_size = 32
def test_sentiment_analysis():


    tokenizer = AutoTokenizer.from_pretrained(base_model_elec)
    num_added_toks = tokenizer.add_special_tokens(special_tokens_dict)
    test_data = jsonlload(test_data_path)

    entity_property_test_data, polarity_test_data = get_dataset(test_data, tokenizer, max_len_elec)

    entity_property_test_dataloader = DataLoader(entity_property_test_data, shuffle=True,
                                batch_size=batch_size)

    polarity_test_dataloader = DataLoader(polarity_test_data, shuffle=True,
                                                  batch_size=batch_size)
    
    model = ElectraBaseClassifier_Cate_Layer_Dropout05(len(tf_id_to_name), len(tokenizer))
    model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/G_Integral_kElectra_layer_dropout_epoch_9.pt"))
    model.to(device)
    model.eval()
            
    polarity_model = ElectraBaseClassifier_Pola_Base(len(polarity_id_to_name), len(tokenizer))
    polarity_model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/kelectra_polarity_epoch_45.pt"))
    polarity_model.to(device)
    polarity_model.eval()

    pred_data = predict_from_korean_form_kelec(tokenizer, model, polarity_model, copy.deepcopy(test_data))

    df = {'id' : [], 'sentence_form' : [], 'annotation' : []}

    for i in range(len(pred_data)) :
        df['id'].append(pred_data[i]['id'])
        df['sentence_form'].append(pred_data[i]['sentence_form'])
        df['annotation'].append(pred_data[i]['annotation'])
    df_pred = pd.DataFrame(df)
    with open('/content/Korean_ABSA/jsonl_files/kelec_Gpu_itgr_drop05_9_layer.jsonl',  'w') as file:
        for i in range( len(df_pred) ):
            annos = df_pred['annotation'][i]
            str_annos = str(annos)
            tmp = str_annos.replace("None", "null").replace("\'", "\"")

            file.write(  '{'+'\"id\": \"nikluge-sa-2022-test-{0}\", \"sentence_form\": \"{1}\", \"annotation\": {2}'\
                        .format( str(i+1).zfill(5)  ,   df_pred['sentence_form'][i], tmp) +'}' )
            file.write("\n")

    return df_pred

In [None]:
test_sentiment_analysis() 

## kelec gpu 16

In [None]:
# kelec gpu 16 (batch 32)
batch_size = 32
def test_sentiment_analysis():

    tokenizer = AutoTokenizer.from_pretrained(base_model_elec)
    num_added_toks = tokenizer.add_special_tokens(special_tokens_dict)
    test_data = jsonlload(test_data_path)

    entity_property_test_data, polarity_test_data = get_dataset(test_data, tokenizer, max_len_elec)

    entity_property_test_dataloader = DataLoader(entity_property_test_data, shuffle=True,
                                batch_size=batch_size)

    polarity_test_dataloader = DataLoader(polarity_test_data, shuffle=True,
                                                  batch_size=batch_size)
    
    model = ElectraBaseClassifier_Cate_Base(len(tf_id_to_name), len(tokenizer))
    model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/kelec_Gpuragi_epoch_16.pt", map_location=device))
    model.to(device)
    model.eval()
            
    polarity_model = ElectraBaseClassifier_Pola_Base(len(polarity_id_to_name), len(tokenizer))
    polarity_model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/kelectra_polarity_epoch_45.pt", map_location=device))
    polarity_model.to(device)
    polarity_model.eval()

    pred_data = predict_from_korean_form_kelec(tokenizer, model, polarity_model, copy.deepcopy(test_data))

    df = {'id' : [], 'sentence_form' : [], 'annotation' : []}

    for i in range(len(pred_data)) :
        df['id'].append(pred_data[i]['id'])
        df['sentence_form'].append(pred_data[i]['sentence_form'])
        df['annotation'].append(pred_data[i]['annotation'])
    df_pred = pd.DataFrame(df)

    with open('/content/Korean_ABSA/jsonl_files/kelec_Gpu_16.jsonl', 'w') as file:
        for i in range( len(df_pred) ):
            tmp = str(df_pred['annotation'][i]).replace("\'", "\"")
            file.write(  '{'+'\"id\": \"nikluge-sa-2022-test-{0}\", \"sentence_form\": \"{1}\", \"annotation\": {2}'\
                .format( str(i+1).zfill(5)  ,   df_pred['sentence_form'][i], tmp ) +'}' ) 
            file.write("\n")

    return df_pred

In [None]:
test_sentiment_analysis() 

## kelec gpu 11

In [None]:
# kelec gpu 11 (batch 32)
batch_size=32
def test_sentiment_analysis():

    tokenizer = AutoTokenizer.from_pretrained(base_model_elec)
    num_added_toks = tokenizer.add_special_tokens(special_tokens_dict)
    test_data = jsonlload(test_data_path)

    entity_property_test_data, polarity_test_data = get_dataset(test_data, tokenizer, max_len_elec)

    entity_property_test_dataloader = DataLoader(entity_property_test_data, shuffle=True,
                                batch_size=batch_size)

    polarity_test_dataloader = DataLoader(polarity_test_data, shuffle=True,
                                                  batch_size=batch_size)
    
    model = ElectraBaseClassifier_Cate_Base(len(tf_id_to_name), len(tokenizer))
    model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/kelec_Gpuragi_epoch_11.pt", map_location=device))
    model.to(device)
    model.eval()
            
    polarity_model = ElectraBaseClassifier_Pola_Base(len(polarity_id_to_name), len(tokenizer))
    polarity_model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/kelectra_polarity_epoch_45.pt", map_location=device))
    polarity_model.to(device)
    polarity_model.eval()

    pred_data = predict_from_korean_form_kelec(tokenizer, model, polarity_model, copy.deepcopy(test_data))

    df = {'id' : [], 'sentence_form' : [], 'annotation' : []}

    for i in range(len(pred_data)) :
        df['id'].append(pred_data[i]['id'])
        df['sentence_form'].append(pred_data[i]['sentence_form'])
        df['annotation'].append(pred_data[i]['annotation'])
    df_pred = pd.DataFrame(df)

    with open('/content/Korean_ABSA/jsonl_files/kelec_Gpu_11.jsonl', 'w') as file:
        for i in range( len(df_pred) ):
            tmp = str(df_pred['annotation'][i]).replace("\'", "\"")
            file.write(  '{'+'\"id\": \"nikluge-sa-2022-test-{0}\", \"sentence_form\": \"{1}\", \"annotation\": {2}'\
                .format( str(i+1).zfill(5)  ,   df_pred['sentence_form'][i], tmp ) +'}' ) 
            file.write("\n")

    return df_pred

In [None]:
test_sentiment_analysis() 

##  kelec gpu 19

In [None]:
# kelec gpu 19 (batch 32)
batch_size=32
def test_sentiment_analysis():

    tokenizer = AutoTokenizer.from_pretrained(base_model_elec)
    num_added_toks = tokenizer.add_special_tokens(special_tokens_dict)
    test_data = jsonlload(test_data_path)

    entity_property_test_data, polarity_test_data = get_dataset(test_data, tokenizer, max_len_elec)

    entity_property_test_dataloader = DataLoader(entity_property_test_data, shuffle=True,
                                batch_size=batch_size)

    polarity_test_dataloader = DataLoader(polarity_test_data, shuffle=True,
                                                  batch_size=batch_size)
    
    model = ElectraBaseClassifier_Cate_Base(len(tf_id_to_name), len(tokenizer))
    model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/kelec_Gpuragi_epoch_19.pt", map_location=device))
    model.to(device)
    model.eval()
            
    polarity_model = ElectraBaseClassifier_Pola_Base(len(polarity_id_to_name), len(tokenizer))
    polarity_model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/kelectra_polarity_epoch_45.pt", map_location=device))
    polarity_model.to(device)
    polarity_model.eval()

    pred_data = predict_from_korean_form_kelec(tokenizer, model, polarity_model, copy.deepcopy(test_data))

    df = {'id' : [], 'sentence_form' : [], 'annotation' : []}

    for i in range(len(pred_data)) :
        df['id'].append(pred_data[i]['id'])
        df['sentence_form'].append(pred_data[i]['sentence_form'])
        df['annotation'].append(pred_data[i]['annotation'])
    df_pred = pd.DataFrame(df)

    with open('/content/Korean_ABSA/jsonl_files/kelec_Gpu_19.jsonl', 'w') as file:
        for i in range( len(df_pred) ):
            tmp = str(df_pred['annotation'][i]).replace("\'", "\"")
            file.write(  '{'+'\"id\": \"nikluge-sa-2022-test-{0}\", \"sentence_form\": \"{1}\", \"annotation\": {2}'\
                .format( str(i+1).zfill(5)  ,   df_pred['sentence_form'][i], tmp ) +'}' ) 
            file.write("\n")

    return df_pred

In [None]:
test_sentiment_analysis() 

##  kelec gpu 15 

In [None]:
# kelec gpu 15 (batch 32)
batch_size=32
def test_sentiment_analysis():

    tokenizer = AutoTokenizer.from_pretrained(base_model_elec)
    num_added_toks = tokenizer.add_special_tokens(special_tokens_dict)
    test_data = jsonlload(test_data_path)

    entity_property_test_data, polarity_test_data = get_dataset(test_data, tokenizer, max_len_elec)

    entity_property_test_dataloader = DataLoader(entity_property_test_data, shuffle=True,
                                batch_size=batch_size)

    polarity_test_dataloader = DataLoader(polarity_test_data, shuffle=True,
                                                  batch_size=batch_size)
    
    model = ElectraBaseClassifier_Cate_Base(len(tf_id_to_name), len(tokenizer))
    model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/kelec_Gpuragi_epoch_15.pt", map_location=device))
    model.to(device)
    model.eval()
            
    polarity_model = ElectraBaseClassifier_Pola_Base(len(polarity_id_to_name), len(tokenizer))
    polarity_model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/kelectra_polarity_epoch_45.pt", map_location=device))
    polarity_model.to(device)
    polarity_model.eval()

    pred_data = predict_from_korean_form_kelec(tokenizer, model, polarity_model, copy.deepcopy(test_data))

    df = {'id' : [], 'sentence_form' : [], 'annotation' : []}

    for i in range(len(pred_data)) :
        df['id'].append(pred_data[i]['id'])
        df['sentence_form'].append(pred_data[i]['sentence_form'])
        df['annotation'].append(pred_data[i]['annotation'])
    df_pred = pd.DataFrame(df)

    with open('/content/Korean_ABSA/jsonl_files/kelec_Gpu_15.jsonl', 'w') as file:
        for i in range( len(df_pred) ):
            tmp = str(df_pred['annotation'][i]).replace("\'", "\"")
            file.write(  '{'+'\"id\": \"nikluge-sa-2022-test-{0}\", \"sentence_form\": \"{1}\", \"annotation\": {2}'\
                .format( str(i+1).zfill(5)  ,   df_pred['sentence_form'][i], tmp ) +'}' ) 
            file.write("\n")

    return df_pred


In [None]:
test_sentiment_analysis() 

##  kelec gpu 20

In [None]:
# kelec gpu 20 (batch 32)
batch_size=32
def test_sentiment_analysis():

    tokenizer = AutoTokenizer.from_pretrained(base_model_elec)
    num_added_toks = tokenizer.add_special_tokens(special_tokens_dict)
    test_data = jsonlload(test_data_path)

    entity_property_test_data, polarity_test_data = get_dataset(test_data, tokenizer, max_len_elec)

    entity_property_test_dataloader = DataLoader(entity_property_test_data, shuffle=True,
                                batch_size=batch_size)

    polarity_test_dataloader = DataLoader(polarity_test_data, shuffle=True,
                                                  batch_size=batch_size)
    
    model = ElectraBaseClassifier_Cate_Base(len(tf_id_to_name), len(tokenizer))
    model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/kelec_Gpuragi_epoch_20.pt", map_location=device))
    model.to(device)
    model.eval()
            
    polarity_model = ElectraBaseClassifier_Pola_Base(len(polarity_id_to_name), len(tokenizer))
    polarity_model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/kelectra_polarity_epoch_45.pt", map_location=device))
    polarity_model.to(device)
    polarity_model.eval()

    pred_data = predict_from_korean_form_kelec(tokenizer, model, polarity_model, copy.deepcopy(test_data))

    df = {'id' : [], 'sentence_form' : [], 'annotation' : []}

    for i in range(len(pred_data)) :
        df['id'].append(pred_data[i]['id'])
        df['sentence_form'].append(pred_data[i]['sentence_form'])
        df['annotation'].append(pred_data[i]['annotation'])
    df_pred = pd.DataFrame(df)

    with open('/content/Korean_ABSA/jsonl_files/kelec_Gpu_20.jsonl', 'w') as file:
        for i in range( len(df_pred) ):
            tmp = str(df_pred['annotation'][i]).replace("\'", "\"")
            file.write(  '{'+'\"id\": \"nikluge-sa-2022-test-{0}\", \"sentence_form\": \"{1}\", \"annotation\": {2}'\
                .format( str(i+1).zfill(5)  ,   df_pred['sentence_form'][i], tmp ) +'}' ) 
            file.write("\n")

    return df_pred

In [None]:
test_sentiment_analysis() 

##  kelec gpu 6

In [None]:
# kelec gpu 6 (batch 32)
batch_size=32
def test_sentiment_analysis():

    tokenizer = AutoTokenizer.from_pretrained(base_model_elec)
    num_added_toks = tokenizer.add_special_tokens(special_tokens_dict)
    test_data = jsonlload(test_data_path)

    entity_property_test_data, polarity_test_data = get_dataset(test_data, tokenizer, max_len_elec)

    entity_property_test_dataloader = DataLoader(entity_property_test_data, shuffle=True,
                                batch_size=batch_size)

    polarity_test_dataloader = DataLoader(polarity_test_data, shuffle=True,
                                                  batch_size=batch_size)
    
    model = ElectraBaseClassifier_Cate_Base(len(tf_id_to_name), len(tokenizer))
    model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/kelec_Gpuragi_epoch_6.pt", map_location=device))
    model.to(device)
    model.eval()
            
    polarity_model = ElectraBaseClassifier_Pola_Base(len(polarity_id_to_name), len(tokenizer))
    polarity_model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/kelectra_polarity_epoch_45.pt", map_location=device))
    polarity_model.to(device)
    polarity_model.eval()

    pred_data = predict_from_korean_form_kelec(tokenizer, model, polarity_model, copy.deepcopy(test_data))

    df = {'id' : [], 'sentence_form' : [], 'annotation' : []}

    for i in range(len(pred_data)) :
        df['id'].append(pred_data[i]['id'])
        df['sentence_form'].append(pred_data[i]['sentence_form'])
        df['annotation'].append(pred_data[i]['annotation'])
    df_pred = pd.DataFrame(df)

    with open('/content/Korean_ABSA/jsonl_files/kelec_Gpu_6.jsonl', 'w') as file:
        for i in range( len(df_pred) ):
            tmp = str(df_pred['annotation'][i]).replace("\'", "\"")
            file.write(  '{'+'\"id\": \"nikluge-sa-2022-test-{0}\", \"sentence_form\": \"{1}\", \"annotation\": {2}'\
                .format( str(i+1).zfill(5)  ,   df_pred['sentence_form'][i], tmp ) +'}' ) 
            file.write("\n")

    return df_pred

In [None]:
test_sentiment_analysis() 

##  kelec gpu0.5_10 

In [None]:
# kelec gpu0.5_10 (batch 16) 
batch_size=16
def test_sentiment_analysis():

    tokenizer = AutoTokenizer.from_pretrained(base_model_elec)
    num_added_toks = tokenizer.add_special_tokens(special_tokens_dict)
    test_data = jsonlload(test_data_path)

    entity_property_test_data, polarity_test_data = get_dataset(test_data, tokenizer, max_len_elec)

    entity_property_test_dataloader = DataLoader(entity_property_test_data, shuffle=True,
                                batch_size=batch_size)

    polarity_test_dataloader = DataLoader(polarity_test_data, shuffle=True,
                                                  batch_size=batch_size)
    
    model = ElectraBaseClassifier_Cate_dr05(len(tf_id_to_name), len(tokenizer))
    model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/kelec_Gpu_drop05_10.pt", map_location=device))
    model.to(device)
    model.eval()
            
    polarity_model = ElectraBaseClassifier_Pola_Base(len(polarity_id_to_name), len(tokenizer))
    polarity_model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/kelectra_polarity_epoch_45.pt", map_location=device))
    polarity_model.to(device)
    polarity_model.eval()

    pred_data = predict_from_korean_form_kelec(tokenizer, model, polarity_model, copy.deepcopy(test_data))

    df = {'id' : [], 'sentence_form' : [], 'annotation' : []}

    for i in range(len(pred_data)) :
        df['id'].append(pred_data[i]['id'])
        df['sentence_form'].append(pred_data[i]['sentence_form'])
        df['annotation'].append(pred_data[i]['annotation'])
    df_pred = pd.DataFrame(df)

    with open('/content/Korean_ABSA/jsonl_files/kelec_Gpu_drop05_10.jsonl', 'w') as file:
        for i in range( len(df_pred) ):
            tmp = str(df_pred['annotation'][i]).replace("\'", "\"")
            file.write(  '{'+'\"id\": \"nikluge-sa-2022-test-{0}\", \"sentence_form\": \"{1}\", \"annotation\": {2}'\
                .format( str(i+1).zfill(5)  ,   df_pred['sentence_form'][i], tmp ) +'}' ) 
            file.write("\n")

    return df_pred


In [None]:
test_sentiment_analysis() 

##  kelec gpu6F

In [None]:
# kelec gpu6F (batch 32)
batch_size=32
def test_sentiment_analysis():

    tokenizer = AutoTokenizer.from_pretrained(base_model_elec)
    num_added_toks = tokenizer.add_special_tokens(special_tokens_dict)
    test_data = jsonlload(test_data_path)

    entity_property_test_data, polarity_test_data = get_dataset(test_data, tokenizer, max_len_elec)

    entity_property_test_dataloader = DataLoader(entity_property_test_data, shuffle=True,
                                batch_size=batch_size)

    polarity_test_dataloader = DataLoader(polarity_test_data, shuffle=True,
                                                  batch_size=batch_size)
    
    model = ElectraBaseClassifier_Cate_Base(len(tf_id_to_name), len(tokenizer))
    model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/kelec_Gpuragi_epoch_6.pt", map_location=device))
    model.to(device)
    model.eval()
            
    polarity_model = ElectraBaseClassifier_Pola_Base(len(polarity_id_to_name), len(tokenizer))
    polarity_model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/kelectra_polarity_epoch_45.pt", map_location=device))
    polarity_model.to(device)
    polarity_model.eval()

    pred_data = predict_from_korean_form_kelec_forcing(tokenizer, model, polarity_model, copy.deepcopy(test_data))

    df = {'id' : [], 'sentence_form' : [], 'annotation' : []}

    for i in range(len(pred_data)) :
        df['id'].append(pred_data[i]['id'])
        df['sentence_form'].append(pred_data[i]['sentence_form'])
        df['annotation'].append(pred_data[i]['annotation'])
    df_pred = pd.DataFrame(df)

    with open('/content/Korean_ABSA/jsonl_files/kelec_Gpu_6F.jsonl', 'w') as file:
        for i in range( len(df_pred) ):
            tmp = str(df_pred['annotation'][i]).replace("\'", "\"")
            file.write(  '{'+'\"id\": \"nikluge-sa-2022-test-{0}\", \"sentence_form\": \"{1}\", \"annotation\": {2}'\
                .format( str(i+1).zfill(5)  ,   df_pred['sentence_form'][i], tmp ) +'}' ) 
            file.write("\n")

    return df_pred

In [None]:
test_sentiment_analysis() 

## kelec_Gpu_20F

In [None]:
# kelec_Gpu_20F  (batch 16)
batch_size=16
def test_sentiment_analysis():

    tokenizer = AutoTokenizer.from_pretrained(base_model_elec)
    num_added_toks = tokenizer.add_special_tokens(special_tokens_dict)
    test_data = jsonlload(test_data_path)

    entity_property_test_data, polarity_test_data = get_dataset(test_data, tokenizer, max_len_elec)

    entity_property_test_dataloader = DataLoader(entity_property_test_data, shuffle=True,
                                batch_size=batch_size)

    polarity_test_dataloader = DataLoader(polarity_test_data, shuffle=True,
                                                  batch_size=batch_size)
    
    model = ElectraBaseClassifier_Cate_Base(len(tf_id_to_name), len(tokenizer))
    model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/kelec_Gpuragi_epoch_20.pt", map_location=device))
    model.to(device)
    model.eval()
            
    polarity_model = ElectraBaseClassifier_Pola_Base(len(polarity_id_to_name), len(tokenizer))
    polarity_model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/kelectra_polarity_epoch_45.pt", map_location=device))
    polarity_model.to(device)
    polarity_model.eval()

    pred_data = predict_from_korean_form_kelec_forcing(tokenizer, model, polarity_model, copy.deepcopy(test_data))

    df = {'id' : [], 'sentence_form' : [], 'annotation' : []}

    for i in range(len(pred_data)) :
        df['id'].append(pred_data[i]['id'])
        df['sentence_form'].append(pred_data[i]['sentence_form'])
        df['annotation'].append(pred_data[i]['annotation'])
    df_pred = pd.DataFrame(df)

    with open('/content/Korean_ABSA/jsonl_files/kelec_Gpu_20F.jsonl', 'w') as file:
        for i in range( len(df_pred) ):
            tmp = str(df_pred['annotation'][i]).replace("\'", "\"")
            file.write(  '{'+'\"id\": \"nikluge-sa-2022-test-{0}\", \"sentence_form\": \"{1}\", \"annotation\": {2}'\
                .format( str(i+1).zfill(5)  ,   df_pred['sentence_form'][i], tmp ) +'}' ) 
            file.write("\n")

    return df_pred

In [None]:
test_sentiment_analysis() 

## kelec K_9

In [None]:
# kelec K_9 (batch 16)
batch_size=16
def test_sentiment_analysis():

    tokenizer = AutoTokenizer.from_pretrained(base_model_elec)
    num_added_toks = tokenizer.add_special_tokens(special_tokens_dict)
    test_data = jsonlload(test_data_path)

    entity_property_test_data, polarity_test_data = get_dataset(test_data, tokenizer, max_len_elec)

    entity_property_test_dataloader = DataLoader(entity_property_test_data, shuffle=True,
                                batch_size=batch_size)

    polarity_test_dataloader = DataLoader(polarity_test_data, shuffle=True,
                                                  batch_size=batch_size)
    
    model = ElectraBaseClassifier_Cate_Base(len(tf_id_to_name), len(tokenizer))
    model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/kelec_K_data_epoch_9.pt", map_location=device))
    model.to(device)
    model.eval()
            
    polarity_model = ElectraBaseClassifier_Pola_Base(len(polarity_id_to_name), len(tokenizer))
    polarity_model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/kelectra_polarity_epoch_45.pt", map_location=device))
    polarity_model.to(device)
    polarity_model.eval()

    pred_data = predict_from_korean_form_kelec(tokenizer, model, polarity_model, copy.deepcopy(test_data))

    df = {'id' : [], 'sentence_form' : [], 'annotation' : []}

    for i in range(len(pred_data)) :
        df['id'].append(pred_data[i]['id'])
        df['sentence_form'].append(pred_data[i]['sentence_form'])
        df['annotation'].append(pred_data[i]['annotation'])
    df_pred = pd.DataFrame(df)

    with open('/content/Korean_ABSA/jsonl_files/kelec_K_9.jsonl', 'w') as file:
        for i in range( len(df_pred) ):
            tmp = str(df_pred['annotation'][i]).replace("\'", "\"")
            file.write(  '{'+'\"id\": \"nikluge-sa-2022-test-{0}\", \"sentence_form\": \"{1}\", \"annotation\": {2}'\
                .format( str(i+1).zfill(5)  ,   df_pred['sentence_form'][i], tmp ) +'}' ) 
            file.write("\n")

    # print('F1 result: ', evaluation_f1(test_data, pred_data))
    return df_pred

In [None]:
test_sentiment_analysis() 

##  kelec K_8

In [None]:
# kelec K_8 
batch_size=16
def test_sentiment_analysis():

    tokenizer = AutoTokenizer.from_pretrained(base_model_elec)
    num_added_toks = tokenizer.add_special_tokens(special_tokens_dict)
    test_data = jsonlload(test_data_path)

    entity_property_test_data, polarity_test_data = get_dataset(test_data, tokenizer, max_len_elec)

    entity_property_test_dataloader = DataLoader(entity_property_test_data, shuffle=True,
                                batch_size=batch_size)

    polarity_test_dataloader = DataLoader(polarity_test_data, shuffle=True,
                                                  batch_size=batch_size)
    
    model = ElectraBaseClassifier_Cate_Base(len(tf_id_to_name), len(tokenizer))
    model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/kelec_K_data_epoch_8.pt", map_location=device))
    model.to(device)
    model.eval()
            
    polarity_model = ElectraBaseClassifier_Pola_Base(len(polarity_id_to_name), len(tokenizer))
    polarity_model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/kelectra_polarity_epoch_45.pt", map_location=device))
    polarity_model.to(device)
    polarity_model.eval()

    pred_data = predict_from_korean_form_kelec(tokenizer, model, polarity_model, copy.deepcopy(test_data))

    df = {'id' : [], 'sentence_form' : [], 'annotation' : []}

    for i in range(len(pred_data)) :
        df['id'].append(pred_data[i]['id'])
        df['sentence_form'].append(pred_data[i]['sentence_form'])
        df['annotation'].append(pred_data[i]['annotation'])
    df_pred = pd.DataFrame(df)

    with open('/content/Korean_ABSA/jsonl_files/kelec_K_8.jsonl', 'w') as file:
        for i in range( len(df_pred) ):
            tmp = str(df_pred['annotation'][i]).replace("\'", "\"")
            file.write(  '{'+'\"id\": \"nikluge-sa-2022-test-{0}\", \"sentence_form\": \"{1}\", \"annotation\": {2}'\
                .format( str(i+1).zfill(5)  ,   df_pred['sentence_form'][i], tmp ) +'}' ) 
            file.write("\n")

    return df_pred

In [None]:
test_sentiment_analysis() 

##  kelec_K_dr05_7_가45

In [None]:
# kelec_K_dr05_7_가45 (batch16)
batch_size=16
def test_sentiment_analysis():

    tokenizer = AutoTokenizer.from_pretrained(base_model_elec)
    num_added_toks = tokenizer.add_special_tokens(special_tokens_dict)
    test_data = jsonlload(test_data_path)

    entity_property_test_data, polarity_test_data = get_dataset(test_data, tokenizer, max_len_elec)

    entity_property_test_dataloader = DataLoader(entity_property_test_data, shuffle=True,
                                batch_size=batch_size)

    polarity_test_dataloader = DataLoader(polarity_test_data, shuffle=True,
                                                  batch_size=batch_size)
    
    model = ElectraBaseClassifier_Cate_dr05(len(tf_id_to_name), len(tokenizer))
    model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/kelec_K_dr0.5_epoch_7.pt", map_location=device))
    model.to(device)
    model.eval()
            
    polarity_model = ElectraBaseClassifier_Pola_Base(len(polarity_id_to_name), len(tokenizer))
    polarity_model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/kelectra_polarity_epoch_45.pt", map_location=device))
    polarity_model.to(device)
    polarity_model.eval()

    pred_data = predict_from_korean_form_kelec(tokenizer, model, polarity_model, copy.deepcopy(test_data))

    df = {'id' : [], 'sentence_form' : [], 'annotation' : []}

    for i in range(len(pred_data)) :
        df['id'].append(pred_data[i]['id'])
        df['sentence_form'].append(pred_data[i]['sentence_form'])
        df['annotation'].append(pred_data[i]['annotation'])
    df_pred = pd.DataFrame(df)

    with open('/content/Korean_ABSA/jsonl_files/kelec_K_drop_05_7.jsonl', 'w') as file:
        for i in range( len(df_pred) ):
            tmp = str(df_pred['annotation'][i]).replace("\'", "\"")
            file.write(  '{'+'\"id\": \"nikluge-sa-2022-test-{0}\", \"sentence_form\": \"{1}\", \"annotation\": {2}'\
                .format( str(i+1).zfill(5)  ,   df_pred['sentence_form'][i], tmp ) +'}' ) 
            file.write("\n")

    return df_pred

In [None]:
test_sentiment_analysis() 

## Original_8_Ma32

In [None]:
# Original_8_Ma32
batch_size=8
def test_sentiment_analysis():

    tokenizer = AutoTokenizer.from_pretrained(base_model_elec)
    num_added_toks = tokenizer.add_special_tokens(special_tokens_dict)
    test_data = jsonlload(test_data_path)

    entity_property_test_data, polarity_test_data = get_dataset(test_data, tokenizer, max_len_elec)

    entity_property_test_dataloader = DataLoader(entity_property_test_data, shuffle=True,
                                batch_size=batch_size)

    polarity_test_dataloader = DataLoader(polarity_test_data, shuffle=True,
                                                  batch_size=batch_size)
    
    model = ElectraBaseClassifier_Cate_Base(len(tf_id_to_name), len(tokenizer))
    model.load_state_dict(torch.load('/content/Korean_ABSA/pt_files/elec_realfinaldata_epoch_8.pt', map_location=device))
    model.to(device)
    model.eval()
            
    polarity_model = ElectraBaseClassifier_Pola_Base(len(polarity_id_to_name), len(tokenizer))
    polarity_model.load_state_dict(torch.load('/content/Korean_ABSA/pt_files/kelec_pola_Ma_Standard_epoch_32.pt', map_location=device))
    polarity_model.to(device)
    polarity_model.eval()

    pred_data = predict_from_korean_form_kelec(tokenizer, model, polarity_model, copy.deepcopy(test_data))

    df = {'id' : [], 'sentence_form' : [], 'annotation' : []}

    for i in range(len(pred_data)) :
        df['id'].append(pred_data[i]['id'])
        df['sentence_form'].append(pred_data[i]['sentence_form'])
        df['annotation'].append(pred_data[i]['annotation'])
    df_pred = pd.DataFrame(df)

    with open('/content/Korean_ABSA/jsonl_files/Original_8_Ma32.jsonl', 'w') as file:
        for i in range( len(df_pred) ):
            tmp = str(df_pred['annotation'][i]).replace("\'", "\"")
            file.write(  '{'+'\"id\": \"nikluge-sa-2022-test-{0}\", \"sentence_form\": \"{1}\", \"annotation\": {2}'\
                .format( str(i+1).zfill(5)  ,   df_pred['sentence_form'][i], tmp ) +'}' ) 
            file.write("\n")

    return df_pred

In [None]:
test_sentiment_analysis() 

## Original_9_Ma32

In [None]:
# Original_9_Ma32
batch_size=8
def test_sentiment_analysis():

    tokenizer = AutoTokenizer.from_pretrained(base_model_elec)
    num_added_toks = tokenizer.add_special_tokens(special_tokens_dict)
    test_data = jsonlload(test_data_path)

    entity_property_test_data, polarity_test_data = get_dataset(test_data, tokenizer, max_len_elec)

    entity_property_test_dataloader = DataLoader(entity_property_test_data, shuffle=True,
                                batch_size=batch_size)

    polarity_test_dataloader = DataLoader(polarity_test_data, shuffle=True,
                                                  batch_size=batch_size)
    
    model = ElectraBaseClassifier_Cate_Base(len(tf_id_to_name), len(tokenizer))
    model.load_state_dict(torch.load('/content/Korean_ABSA/pt_files/elec_realfinaldata_epoch_9.pt', map_location=device))
    model.to(device)
    model.eval()
            
    polarity_model = ElectraBaseClassifier_Pola_Base(len(polarity_id_to_name), len(tokenizer))
    polarity_model.load_state_dict(torch.load('/content/Korean_ABSA/pt_files/kelec_pola_Ma_Standard_epoch_32.pt', map_location=device))
    polarity_model.to(device)
    polarity_model.eval()

    pred_data = predict_from_korean_form_kelec(tokenizer, model, polarity_model, copy.deepcopy(test_data))

    df = {'id' : [], 'sentence_form' : [], 'annotation' : []}

    for i in range(len(pred_data)) :
        df['id'].append(pred_data[i]['id'])
        df['sentence_form'].append(pred_data[i]['sentence_form'])
        df['annotation'].append(pred_data[i]['annotation'])
    df_pred = pd.DataFrame(df)

    with open('/content/Korean_ABSA/jsonl_files/Original_9_Ma32.jsonl', 'w') as file:
        for i in range( len(df_pred) ):
            tmp = str(df_pred['annotation'][i]).replace("\'", "\"")
            file.write(  '{'+'\"id\": \"nikluge-sa-2022-test-{0}\", \"sentence_form\": \"{1}\", \"annotation\": {2}'\
                .format( str(i+1).zfill(5)  ,   df_pred['sentence_form'][i], tmp ) +'}' ) 
            file.write("\n")

    return df_pred


In [None]:
test_sentiment_analysis() 

## Original_10_Ma32

In [None]:
# Original_10_Ma32
batch_size=8
def test_sentiment_analysis():

    tokenizer = AutoTokenizer.from_pretrained(base_model_elec)
    num_added_toks = tokenizer.add_special_tokens(special_tokens_dict)
    test_data = jsonlload(test_data_path)

    entity_property_test_data, polarity_test_data = get_dataset(test_data, tokenizer, max_len_elec)

    entity_property_test_dataloader = DataLoader(entity_property_test_data, shuffle=True,
                                batch_size=batch_size)

    polarity_test_dataloader = DataLoader(polarity_test_data, shuffle=True,
                                                  batch_size=batch_size)
    
    model = ElectraBaseClassifier_Cate_Base(len(tf_id_to_name), len(tokenizer))
    model.load_state_dict(torch.load('/content/Korean_ABSA/pt_files/elec_realfinaldata_epoch_10.pt', map_location=device))
    model.to(device)
    model.eval()
            
    polarity_model = ElectraBaseClassifier_Pola_Base(len(polarity_id_to_name), len(tokenizer))
    polarity_model.load_state_dict(torch.load('/content/Korean_ABSA/pt_files/kelec_pola_Ma_Standard_epoch_32.pt', map_location=device))
    polarity_model.to(device)
    polarity_model.eval()

    pred_data = predict_from_korean_form_kelec(tokenizer, model, polarity_model, copy.deepcopy(test_data))

    df = {'id' : [], 'sentence_form' : [], 'annotation' : []}

    for i in range(len(pred_data)) :
        df['id'].append(pred_data[i]['id'])
        df['sentence_form'].append(pred_data[i]['sentence_form'])
        df['annotation'].append(pred_data[i]['annotation'])
    df_pred = pd.DataFrame(df)

    with open('/content/Korean_ABSA/jsonl_files/Original_10_Ma32.jsonl', 'w') as file:
        for i in range( len(df_pred) ):
            tmp = str(df_pred['annotation'][i]).replace("\'", "\"")
            file.write(  '{'+'\"id\": \"nikluge-sa-2022-test-{0}\", \"sentence_form\": \"{1}\", \"annotation\": {2}'\
                .format( str(i+1).zfill(5)  ,   df_pred['sentence_form'][i], tmp ) +'}' ) 
            file.write("\n")

    return df_pred


In [None]:
test_sentiment_analysis() 

## kelec_Gpu_6_Ma32

In [None]:
# kelec_Gpu_6_Ma32
batch_size=8
def test_sentiment_analysis():

    tokenizer = AutoTokenizer.from_pretrained(base_model_elec)
    num_added_toks = tokenizer.add_special_tokens(special_tokens_dict)
    test_data = jsonlload(test_data_path)

    entity_property_test_data, polarity_test_data = get_dataset(test_data, tokenizer, max_len_elec)

    entity_property_test_dataloader = DataLoader(entity_property_test_data, shuffle=True,
                                batch_size=batch_size)

    polarity_test_dataloader = DataLoader(polarity_test_data, shuffle=True,
                                                  batch_size=batch_size)
    
    model = ElectraBaseClassifier_Cate_Base(len(tf_id_to_name), len(tokenizer))
    model.load_state_dict(torch.load('/content/Korean_ABSA/pt_files/kelec_Gpuragi_epoch_6.pt', map_location=device))
    model.to(device)
    model.eval()
            
    polarity_model = ElectraBaseClassifier_Pola_Base(len(polarity_id_to_name), len(tokenizer))
    polarity_model.load_state_dict(torch.load('/content/Korean_ABSA/pt_files/kelec_pola_Ma_Standard_epoch_32.pt', map_location=device))
    polarity_model.to(device)
    polarity_model.eval()

    pred_data = predict_from_korean_form_kelec(tokenizer, model, polarity_model, copy.deepcopy(test_data))

    df = {'id' : [], 'sentence_form' : [], 'annotation' : []}

    for i in range(len(pred_data)) :
        df['id'].append(pred_data[i]['id'])
        df['sentence_form'].append(pred_data[i]['sentence_form'])
        df['annotation'].append(pred_data[i]['annotation'])
    df_pred = pd.DataFrame(df)

    with open('/content/Korean_ABSA/jsonl_files/kelec_Gpu_6_Ma32.jsonl', 'w') as file:
        for i in range( len(df_pred) ):
            tmp = str(df_pred['annotation'][i]).replace("\'", "\"")
            file.write(  '{'+'\"id\": \"nikluge-sa-2022-test-{0}\", \"sentence_form\": \"{1}\", \"annotation\": {2}'\
                .format( str(i+1).zfill(5)  ,   df_pred['sentence_form'][i], tmp ) +'}' ) 
            file.write("\n")

    return df_pred


In [None]:
test_sentiment_analysis() 

## kelectra D_30

In [None]:
# kelectra D_30
batch_size=32
def test_sentiment_analysis():

    tokenizer = AutoTokenizer.from_pretrained(base_model_elec)
    num_added_toks = tokenizer.add_special_tokens(special_tokens_dict)
    test_data = jsonlload(test_data_path)

    entity_property_test_data, polarity_test_data = get_dataset(test_data, tokenizer, max_len_elec)

    entity_property_test_dataloader = DataLoader(entity_property_test_data, shuffle=True,
                                batch_size=batch_size)

    polarity_test_dataloader = DataLoader(polarity_test_data, shuffle=True,
                                                  batch_size=batch_size)
    
    model = ElectraBaseClassifier_Cate_Base(len(tf_id_to_name), len(tokenizer))
    model.load_state_dict(torch.load('/content/Korean_ABSA/pt_files/electra_kky_category_dataD_epoch_30.pt', map_location=device))
    model.to(device)
    model.eval()
            
    polarity_model = ElectraBaseClassifier_Pola_Base(len(polarity_id_to_name), len(tokenizer))
    polarity_model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/kelectra_polarity_epoch_45.pt", map_location=device))
    polarity_model.to(device)
    polarity_model.eval()

    pred_data = predict_from_korean_form_kelec(tokenizer, model, polarity_model, copy.deepcopy(test_data))


    df = {'id' : [], 'sentence_form' : [], 'annotation' : []}

    for i in range(len(pred_data)) :
        df['id'].append(pred_data[i]['id'])
        df['sentence_form'].append(pred_data[i]['sentence_form'])
        df['annotation'].append(pred_data[i]['annotation'])
    df_pred = pd.DataFrame(df)

    with open('/content/Korean_ABSA/jsonl_files/kelec_D_30.jsonl', 'w') as file:

        for i in range( len(df_pred) ):
            tmp = str(df_pred['annotation'][i]).replace("\'", "\"")
            file.write(  '{'+'\"id\": \"nikluge-sa-2022-test-{0}\", \"sentence_form\": \"{1}\", \"annotation\": {2}'\
                .format( str(i+1).zfill(5)  ,   df_pred['sentence_form'][i], tmp ) +'}' ) 
            file.write("\n")

    return df_pred


In [None]:
test_sentiment_analysis() 

## kelec_D_drop05_20

In [None]:
# kelec_D_drop05_20
batch_size=32
def test_sentiment_analysis():

    tokenizer = AutoTokenizer.from_pretrained(base_model_elec)
    num_added_toks = tokenizer.add_special_tokens(special_tokens_dict)
    test_data = jsonlload(test_data_path)

    entity_property_test_data, polarity_test_data = get_dataset(test_data, tokenizer, max_len_elec)

    entity_property_test_dataloader = DataLoader(entity_property_test_data, shuffle=True,
                                batch_size=batch_size)

    polarity_test_dataloader = DataLoader(polarity_test_data, shuffle=True,
                                                  batch_size=batch_size)
    
    model = ElectraBaseClassifier_Cate_dr05(len(tf_id_to_name), len(tokenizer))
    model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/catekelec_D_0.5_epoch_20.pt", map_location=device))
    model.to(device)
    model.eval()
            
    polarity_model = ElectraBaseClassifier_Pola_Base(len(polarity_id_to_name), len(tokenizer))
    polarity_model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/kelectra_polarity_epoch_45.pt", map_location=device))
    polarity_model.to(device)
    polarity_model.eval()

    pred_data = predict_from_korean_form_kelec(tokenizer, model, polarity_model, copy.deepcopy(test_data))

    df = {'id' : [], 'sentence_form' : [], 'annotation' : []}

    for i in range(len(pred_data)) :
        df['id'].append(pred_data[i]['id'])
        df['sentence_form'].append(pred_data[i]['sentence_form'])
        df['annotation'].append(pred_data[i]['annotation'])
    df_pred = pd.DataFrame(df)

    with open('/content/Korean_ABSA/jsonl_files/kelec_D_drop05_20.jsonl', 'w') as file:

        for i in range( len(df_pred) ):
            tmp = str(df_pred['annotation'][i]).replace("\'", "\"")
            file.write(  '{'+'\"id\": \"nikluge-sa-2022-test-{0}\", \"sentence_form\": \"{1}\", \"annotation\": {2}'\
                .format( str(i+1).zfill(5)  ,   df_pred['sentence_form'][i], tmp ) +'}' ) 
            file.write("\n")

    return df_pred


In [None]:
test_sentiment_analysis() 

## kelec_Gputrl_9

In [None]:
#  kelec_Gputrl_9
batch_size=32
def test_sentiment_analysis():

    tokenizer = AutoTokenizer.from_pretrained(base_model_elec)
    num_added_toks = tokenizer.add_special_tokens(special_tokens_dict)
    test_data = jsonlload(test_data_path)

    entity_property_test_data, polarity_test_data = get_dataset(test_data, tokenizer, max_len_elec)

    entity_property_test_dataloader = DataLoader(entity_property_test_data, shuffle=True,
                                batch_size=batch_size)

    polarity_test_dataloader = DataLoader(polarity_test_data, shuffle=True,
                                                  batch_size=batch_size)
    
    model = ElectraBaseClassifier_Cate_Base(len(tf_id_to_name), len(tokenizer))
    model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/Kelectra_GpuraGI_Itgr_Data_epoch_9.pt", map_location=device))
    model.to(device)
    model.eval()
            
    polarity_model = ElectraBaseClassifier_Pola_Base(len(polarity_id_to_name), len(tokenizer))
    polarity_model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/kelectra_polarity_epoch_45.pt", map_location=device))
    polarity_model.to(device)
    polarity_model.eval()

    pred_data = predict_from_korean_form_kelec(tokenizer, model, polarity_model, copy.deepcopy(test_data))

    df = {'id' : [], 'sentence_form' : [], 'annotation' : []}

    for i in range(len(pred_data)) :
        df['id'].append(pred_data[i]['id'])
        df['sentence_form'].append(pred_data[i]['sentence_form'])
        df['annotation'].append(pred_data[i]['annotation'])
    df_pred = pd.DataFrame(df)

    with open('/content/Korean_ABSA/jsonl_files/kelec_Gputrl_9.jsonl', 'w') as file:

        for i in range( len(df_pred) ):
            tmp = str(df_pred['annotation'][i]).replace("\'", "\"")
            file.write(  '{'+'\"id\": \"nikluge-sa-2022-test-{0}\", \"sentence_form\": \"{1}\", \"annotation\": {2}'\
                .format( str(i+1).zfill(5)  ,   df_pred['sentence_form'][i], tmp ) +'}' ) 
            file.write("\n")

    return df_pred

In [None]:
test_sentiment_analysis() 

## kelec_Gputrl_11


In [None]:
#kelec_Gputrl_11
batch_size=32
def test_sentiment_analysis():

    tokenizer = AutoTokenizer.from_pretrained(base_model_elec)
    num_added_toks = tokenizer.add_special_tokens(special_tokens_dict)
    test_data = jsonlload(test_data_path)

    entity_property_test_data, polarity_test_data = get_dataset(test_data, tokenizer, max_len_elec)

    entity_property_test_dataloader = DataLoader(entity_property_test_data, shuffle=True,
                                batch_size=batch_size)

    polarity_test_dataloader = DataLoader(polarity_test_data, shuffle=True,
                                                  batch_size=batch_size)
    
    model = ElectraBaseClassifier_Cate_Base(len(tf_id_to_name), len(tokenizer))
    model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/Kelectra_GpuraGI_Itgr_Data_epoch_11.pt", map_location=device))
    model.to(device)
    model.eval()
            
    polarity_model = ElectraBaseClassifier_Pola_Base(len(polarity_id_to_name), len(tokenizer))
    polarity_model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/kelectra_polarity_epoch_45.pt", map_location=device))
    polarity_model.to(device)
    polarity_model.eval()

    pred_data = predict_from_korean_form_kelec(tokenizer, model, polarity_model, copy.deepcopy(test_data))

    df = {'id' : [], 'sentence_form' : [], 'annotation' : []}

    for i in range(len(pred_data)) :
        df['id'].append(pred_data[i]['id'])
        df['sentence_form'].append(pred_data[i]['sentence_form'])
        df['annotation'].append(pred_data[i]['annotation'])
    df_pred = pd.DataFrame(df)

    with open('/content/Korean_ABSA/jsonl_files/kelec_Gputrl_11.jsonl', 'w') as file:

        for i in range( len(df_pred) ):
            tmp = str(df_pred['annotation'][i]).replace("\'", "\"")
            file.write(  '{'+'\"id\": \"nikluge-sa-2022-test-{0}\", \"sentence_form\": \"{1}\", \"annotation\": {2}'\
                .format( str(i+1).zfill(5)  ,   df_pred['sentence_form'][i], tmp ) +'}' ) 
            file.write("\n")

    return df_pred

In [None]:
test_sentiment_analysis() 

##  kelec_K_15


In [None]:
#  kelec_K_15
batch_size=32
def test_sentiment_analysis():

    tokenizer = AutoTokenizer.from_pretrained(base_model_elec)
    num_added_toks = tokenizer.add_special_tokens(special_tokens_dict)
    test_data = jsonlload(test_data_path)

    entity_property_test_data, polarity_test_data = get_dataset(test_data, tokenizer, max_len_elec)

    entity_property_test_dataloader = DataLoader(entity_property_test_data, shuffle=True,
                                batch_size=batch_size)

    polarity_test_dataloader = DataLoader(polarity_test_data, shuffle=True,
                                                  batch_size=batch_size)
    
    model = ElectraBaseClassifier_Cate_Base(len(tf_id_to_name), len(tokenizer))
    model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/kelec_K_data_epoch_15.pt", map_location=device))
    model.to(device)
    model.eval()
            
    polarity_model = ElectraBaseClassifier_Pola_Base(len(polarity_id_to_name), len(tokenizer))
    polarity_model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/kelectra_polarity_epoch_45.pt", map_location=device))
    polarity_model.to(device)
    polarity_model.eval()

    pred_data = predict_from_korean_form_kelec(tokenizer, model, polarity_model, copy.deepcopy(test_data))

    df = {'id' : [], 'sentence_form' : [], 'annotation' : []}

    for i in range(len(pred_data)) :
        df['id'].append(pred_data[i]['id'])
        df['sentence_form'].append(pred_data[i]['sentence_form'])
        df['annotation'].append(pred_data[i]['annotation'])
    df_pred = pd.DataFrame(df)

    with open('/content/Korean_ABSA/jsonl_files/kelec_K_15.jsonl', 'w') as file:

        for i in range( len(df_pred) ):
            tmp = str(df_pred['annotation'][i]).replace("\'", "\"")
            file.write(  '{'+'\"id\": \"nikluge-sa-2022-test-{0}\", \"sentence_form\": \"{1}\", \"annotation\": {2}'\
                .format( str(i+1).zfill(5)  ,   df_pred['sentence_form'][i], tmp ) +'}' ) 
            file.write("\n")

    return df_pred

In [None]:
test_sentiment_analysis() 

## kelec_Gpu_6_58

In [None]:
# kelec_Gpu_6_58
batch_size=32
def test_sentiment_analysis():

    tokenizer = AutoTokenizer.from_pretrained(base_model_elec)
    num_added_toks = tokenizer.add_special_tokens(special_tokens_dict)
    test_data = jsonlload(test_data_path)

    entity_property_test_data, polarity_test_data = get_dataset(test_data, tokenizer, max_len_elec)

    entity_property_test_dataloader = DataLoader(entity_property_test_data, shuffle=True,
                                batch_size=batch_size)

    polarity_test_dataloader = DataLoader(polarity_test_data, shuffle=True,
                                                  batch_size=batch_size)
    
    model = ElectraBaseClassifier_Cate_Base(len(tf_id_to_name), len(tokenizer))
    model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/g_prime_dataelectra_kky_category_datag_prime_epoch_6.pt", map_location=device))
    model.to(device)
    model.eval()
            
    polarity_model = ElectraBaseClassifier_Pola_Base(len(polarity_id_to_name), len(tokenizer))
    polarity_model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/kelectra_polarity_epoch_58.pt", map_location=device))
    polarity_model.to(device)
    polarity_model.eval()

    pred_data = predict_from_korean_form_kelec(tokenizer, model, polarity_model, copy.deepcopy(test_data))

    df = {'id' : [], 'sentence_form' : [], 'annotation' : []}

    for i in range(len(pred_data)) :
        df['id'].append(pred_data[i]['id'])
        df['sentence_form'].append(pred_data[i]['sentence_form'])
        df['annotation'].append(pred_data[i]['annotation'])
    df_pred = pd.DataFrame(df)

    with open('/content/Korean_ABSA/jsonl_files/kelec_Gpu_6_58.jsonl', 'w') as file:

        for i in range( len(df_pred) ):
            tmp = str(df_pred['annotation'][i]).replace("\'", "\"")
            file.write(  '{'+'\"id\": \"nikluge-sa-2022-test-{0}\", \"sentence_form\": \"{1}\", \"annotation\": {2}'\
                .format( str(i+1).zfill(5)  ,   df_pred['sentence_form'][i], tmp ) +'}' ) 
            file.write("\n")

    return df_pred


In [None]:
test_sentiment_analysis() 

## kelec_K_drop_05_7_Ma32_LR2

In [None]:
# kelec_K_drop_05_7_Ma32_LR2
batch_size=32
def test_sentiment_analysis():

    tokenizer_kelec = AutoTokenizer.from_pretrained(base_model_elec)
    num_added_toks_kelec = tokenizer_kelec.add_special_tokens(special_tokens_dict)
    test_data = jsonlload(test_data_path)

    entity_property_test_data_kelec, polarity_test_data_kelec = get_dataset(test_data, tokenizer_kelec, max_len_elec)

    entity_property_test_dataloader = DataLoader(entity_property_test_data_kelec, shuffle=True,
                                batch_size=batch_size)
    
    polarity_test_dataloader = DataLoader(polarity_test_data_kelec, shuffle=True,
                                                  batch_size=batch_size)   
     
    model = ElectraBaseClassifier_Cate_dr05(len(tf_id_to_name), len(tokenizer_kelec))
    model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/kelec_K_dr0.5_epoch_7.pt", map_location=device))
    model.to(device)
    model.eval()      

    polarity_model = ElectraBaseClassifier_Pola_Base(len(polarity_id_to_name), len(tokenizer_kelec))
    polarity_model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/Ma_Deep_epoch_2.pt", map_location=device))
    polarity_model.to(device)
    polarity_model.eval()

    pred_data = predict_from_korean_form_kelec(tokenizer_kelec, model, polarity_model, copy.deepcopy(test_data))

    df = {'id' : [], 'sentence_form' : [], 'annotation' : []}
    for i in range(len(pred_data)) :
        df['id'].append(pred_data[i]['id'])
        df['sentence_form'].append(pred_data[i]['sentence_form'])
        df['annotation'].append(pred_data[i]['annotation'])
    df_pred = pd.DataFrame(df)

    with open('/content/Korean_ABSA/jsonl_files/kelec_K_drop_05_7_Ma32_LR2.jsonl', 'w') as file:
        for i in range( len(df_pred) ):
            tmp = str(df_pred['annotation'][i]).replace("\'", "\"").replace('None', 'null')
            file.write(  '{'+'\"id\": \"nikluge-sa-2022-test-{0}\", \"sentence_form\": \"{1}\", \"annotation\": {2}'\
                .format( str(i+1).zfill(5)  ,   df_pred['sentence_form'][i], tmp ) +'}' ) 
            file.write("\n")

    return df_pred

In [None]:
test_sentiment_analysis() 

## kelec_K_drop_05_7_LR_1_Ma32_LR_2

In [None]:
# kelec_K_drop_05_7_LR_1_Ma32_LR_2
batch_size=32
def test_sentiment_analysis():

    tokenizer_kelec = AutoTokenizer.from_pretrained(base_model_elec)
    num_added_toks_kelec = tokenizer_kelec.add_special_tokens(special_tokens_dict)
    test_data = jsonlload(test_data_path)

    entity_property_test_data_kelec, polarity_test_data_kelec = get_dataset(test_data, tokenizer_kelec, max_len_elec)

    entity_property_test_dataloader = DataLoader(entity_property_test_data_kelec, shuffle=True,
                                batch_size=batch_size)
    
    polarity_test_dataloader = DataLoader(polarity_test_data_kelec, shuffle=True,
                                                  batch_size=batch_size)    
    
    model = ElectraBaseClassifier_Cate_dr05(len(tf_id_to_name), len(tokenizer_kelec))
    model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/K7_Lr_epoch_1.pt", map_location=device))
    model.to(device)
    model.eval()        

    polarity_model = ElectraBaseClassifier_Pola_Base(len(polarity_id_to_name), len(tokenizer_kelec))
    polarity_model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/Ma_Deep_epoch_2.pt", map_location=device))
    polarity_model.to(device)
    polarity_model.eval()

    pred_data = predict_from_korean_form_kelec(tokenizer_kelec, model, polarity_model, copy.deepcopy(test_data))

    df = {'id' : [], 'sentence_form' : [], 'annotation' : []}
    for i in range(len(pred_data)) :
        df['id'].append(pred_data[i]['id'])
        df['sentence_form'].append(pred_data[i]['sentence_form'])
        df['annotation'].append(pred_data[i]['annotation'])
    df_pred = pd.DataFrame(df)

    with open('/content/Korean_ABSA/jsonl_files/kelec_K_drop_05_7_LR_1_Ma32_LR_2.jsonl', 'w') as file:
        for i in range( len(df_pred) ):
            tmp = str(df_pred['annotation'][i]).replace("\'", "\"").replace('None', 'null')
            file.write(  '{'+'\"id\": \"nikluge-sa-2022-test-{0}\", \"sentence_form\": \"{1}\", \"annotation\": {2}'\
                .format( str(i+1).zfill(5)  ,   df_pred['sentence_form'][i], tmp ) +'}' ) 
            file.write("\n")

    return df_pred

In [None]:
test_sentiment_analysis() 

##  kelec_K_drop_05_7_LR_1_Ma32_LR_2F

In [None]:
#  kelec_K_drop_05_7_LR_1_Ma32_LR_2F
batch_size=32
def test_sentiment_analysis():

    tokenizer_kelec = AutoTokenizer.from_pretrained(base_model_elec)
    num_added_toks_kelec = tokenizer_kelec.add_special_tokens(special_tokens_dict)
    test_data = jsonlload(test_data_path)

    entity_property_test_data_kelec, polarity_test_data_kelec = get_dataset(test_data, tokenizer_kelec, max_len_elec)

    entity_property_test_dataloader = DataLoader(entity_property_test_data_kelec, shuffle=True,
                                batch_size=batch_size)
    
    polarity_test_dataloader = DataLoader(polarity_test_data_kelec, shuffle=True,
                                                  batch_size=batch_size)    
    
    model = ElectraBaseClassifier_Cate_dr05(len(tf_id_to_name), len(tokenizer_kelec))
    model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/K7_Lr_epoch_1.pt", map_location=device))
    model.to(device)
    model.eval()    

    polarity_model = ElectraBaseClassifier_Pola_Base(len(polarity_id_to_name), len(tokenizer_kelec))
    polarity_model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/Ma_Deep_epoch_2.pt", map_location=device))
    polarity_model.to(device)
    polarity_model.eval()

    pred_data = predict_from_korean_form_kelec_forcing(tokenizer_kelec, model, polarity_model, copy.deepcopy(test_data))

    df = {'id' : [], 'sentence_form' : [], 'annotation' : []}
    for i in range(len(pred_data)) :
        df['id'].append(pred_data[i]['id'])
        df['sentence_form'].append(pred_data[i]['sentence_form'])
        df['annotation'].append(pred_data[i]['annotation'])
    df_pred = pd.DataFrame(df)

    with open('/content/Korean_ABSA/jsonl_files/kelec_K_drop_05_7_LR_1_Ma32_LR_2F.jsonl', 'w') as file:
        for i in range( len(df_pred) ):
            tmp = str(df_pred['annotation'][i]).replace("\'", "\"").replace('None', 'null')
            file.write(  '{'+'\"id\": \"nikluge-sa-2022-test-{0}\", \"sentence_form\": \"{1}\", \"annotation\": {2}'\
                .format( str(i+1).zfill(5)  ,   df_pred['sentence_form'][i], tmp ) +'}' ) 
            file.write("\n")

    return df_pred

In [None]:
test_sentiment_analysis() 

## kelec_Gputrl_7

In [None]:
#kelec_Gputrl_7
batch_size=32
def test_sentiment_analysis():
    tokenizer_kelec = AutoTokenizer.from_pretrained(base_model_elec)
    num_added_toks_kelec = tokenizer_kelec.add_special_tokens(special_tokens_dict)
    test_data = jsonlload(test_data_path)
    entity_property_test_data_kelec, polarity_test_data_kelec = get_dataset(test_data, tokenizer_kelec, max_len_elec)
    entity_property_test_dataloader = DataLoader(entity_property_test_data_kelec, shuffle=True,
                                batch_size=batch_size)
    polarity_test_dataloader = DataLoader(polarity_test_data_kelec, shuffle=True,
                                                  batch_size=batch_size)    
    model = ElectraBaseClassifier_Cate_Base(len(tf_id_to_name), len(tokenizer_kelec))
    model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/Kelectra_GpuraGI_Itgr_Data_epoch_7.pt", map_location=device))
    model.to(device)
    model.eval()            
    polarity_model = ElectraBaseClassifier_Pola_Base(len(polarity_id_to_name), len(tokenizer_kelec))
    polarity_model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/kelectra_polarity_epoch_45.pt", map_location=device))
    polarity_model.to(device)
    polarity_model.eval()
    pred_data = predict_from_korean_form_kelec(tokenizer_kelec, model, polarity_model, copy.deepcopy(test_data))
    df = {'id' : [], 'sentence_form' : [], 'annotation' : []}
    for i in range(len(pred_data)) :
        df['id'].append(pred_data[i]['id'])
        df['sentence_form'].append(pred_data[i]['sentence_form'])
        df['annotation'].append(pred_data[i]['annotation'])
    df_pred = pd.DataFrame(df)
    with open('/content/Korean_ABSA/jsonl_files/kelec_Gputrl_7.jsonl', 'w') as file:
        for i in range( len(df_pred) ):
            tmp = str(df_pred['annotation'][i]).replace("\'", "\"").replace('None', 'null')
            file.write(  '{'+'\"id\": \"nikluge-sa-2022-test-{0}\", \"sentence_form\": \"{1}\", \"annotation\": {2}'\
                .format( str(i+1).zfill(5)  ,   df_pred['sentence_form'][i], tmp ) +'}' ) 
            file.write("\n")


    return df_pred

In [None]:
test_sentiment_analysis() 

## kelec_Gputrl_10

In [None]:
#kelec_Gputrl_10
batch_size=32
def test_sentiment_analysis():
    tokenizer_kelec = AutoTokenizer.from_pretrained(base_model_elec)
    num_added_toks_kelec = tokenizer_kelec.add_special_tokens(special_tokens_dict)
    test_data = jsonlload(test_data_path)
    entity_property_test_data_kelec, polarity_test_data_kelec = get_dataset(test_data, tokenizer_kelec, max_len_elec)
    entity_property_test_dataloader = DataLoader(entity_property_test_data_kelec, shuffle=True,
                                batch_size=batch_size)
    polarity_test_dataloader = DataLoader(polarity_test_data_kelec, shuffle=True,
                                                  batch_size=batch_size)    
    model = ElectraBaseClassifier_Cate_Base(len(tf_id_to_name), len(tokenizer_kelec))
    model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/Kelectra_GpuraGI_Itgr_Data_epoch_10.pt", map_location=device))
    model.to(device)
    model.eval()            
    polarity_model = ElectraBaseClassifier_Pola_Base(len(polarity_id_to_name), len(tokenizer_kelec))
    polarity_model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/kelectra_polarity_epoch_45.pt", map_location=device))
    polarity_model.to(device)
    polarity_model.eval()
    pred_data = predict_from_korean_form_kelec(tokenizer_kelec, model, polarity_model, copy.deepcopy(test_data))
    df = {'id' : [], 'sentence_form' : [], 'annotation' : []}
    for i in range(len(pred_data)) :
        df['id'].append(pred_data[i]['id'])
        df['sentence_form'].append(pred_data[i]['sentence_form'])
        df['annotation'].append(pred_data[i]['annotation'])
    df_pred = pd.DataFrame(df)
    with open('/content/Korean_ABSA/jsonl_files/kelec_Gputrl_10.jsonl', 'w') as file:
        for i in range( len(df_pred) ):
            tmp = str(df_pred['annotation'][i]).replace("\'", "\"").replace('None', 'null')
            file.write(  '{'+'\"id\": \"nikluge-sa-2022-test-{0}\", \"sentence_form\": \"{1}\", \"annotation\": {2}'\
                .format( str(i+1).zfill(5)  ,   df_pred['sentence_form'][i], tmp ) +'}' ) 
            file.write("\n")


    return df_pred

In [None]:
test_sentiment_analysis() 

## kelec_refind_8_Ma32

In [None]:
# kelec_refind_8_Ma32
batch_size=32
def test_sentiment_analysis():
    tokenizer_kelec = AutoTokenizer.from_pretrained(base_model_elec)
    num_added_toks_kelec = tokenizer_kelec.add_special_tokens(special_tokens_dict)
    test_data = jsonlload(test_data_path)
    entity_property_test_data_kelec, polarity_test_data_kelec = get_dataset(test_data, tokenizer_kelec, max_len_elec)
    entity_property_test_dataloader = DataLoader(entity_property_test_data_kelec, shuffle=True,
                                batch_size=batch_size)
    polarity_test_dataloader = DataLoader(polarity_test_data_kelec, shuffle=True,
                                                  batch_size=batch_size)    
    model = ElectraBaseClassifier_Cate_Base(len(tf_id_to_name), len(tokenizer_kelec))
    model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/kelec_train+dev_refined_data_epoch_8.pt", map_location=device))
    model.to(device)
    model.eval()            
    polarity_model = ElectraBaseClassifier_Pola_Base(len(polarity_id_to_name), len(tokenizer_kelec))
    polarity_model.load_state_dict(torch.load('/content/Korean_ABSA/pt_files/kelec_pola_Ma_Standard_epoch_32.pt', map_location=device))
    polarity_model.to(device)
    polarity_model.eval()
    pred_data = predict_from_korean_form_kelec(tokenizer_kelec, model, polarity_model, copy.deepcopy(test_data))
    df = {'id' : [], 'sentence_form' : [], 'annotation' : []}
    for i in range(len(pred_data)) :
        df['id'].append(pred_data[i]['id'])
        df['sentence_form'].append(pred_data[i]['sentence_form'])
        df['annotation'].append(pred_data[i]['annotation'])
    df_pred = pd.DataFrame(df)
    with open('/content/Korean_ABSA/jsonl_files/kelec_refind_8_Ma32.jsonl', 'w') as file:
        for i in range( len(df_pred) ):
            tmp = str(df_pred['annotation'][i]).replace("\'", "\"").replace('None', 'null')
            file.write(  '{'+'\"id\": \"nikluge-sa-2022-test-{0}\", \"sentence_form\": \"{1}\", \"annotation\": {2}'\
                .format( str(i+1).zfill(5)  ,   df_pred['sentence_form'][i], tmp ) +'}' ) 
            file.write("\n")


    return df_pred

In [None]:
test_sentiment_analysis() 

## kelec_refind_7_Ma32

In [None]:
# kelec_refind_7_Ma32
batch_size=32
def test_sentiment_analysis():
    tokenizer_kelec = AutoTokenizer.from_pretrained(base_model_elec)
    num_added_toks_kelec = tokenizer_kelec.add_special_tokens(special_tokens_dict)
    test_data = jsonlload(test_data_path)
    entity_property_test_data_kelec, polarity_test_data_kelec = get_dataset(test_data, tokenizer_kelec, max_len_elec)
    entity_property_test_dataloader = DataLoader(entity_property_test_data_kelec, shuffle=True,
                                batch_size=batch_size)
    polarity_test_dataloader = DataLoader(polarity_test_data_kelec, shuffle=True,
                                                  batch_size=batch_size)    
    model = ElectraBaseClassifier_Cate_Base(len(tf_id_to_name), len(tokenizer_kelec))
    model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/kelec_train+dev_refined_data_epoch_7.pt", map_location=device))
    model.to(device)
    model.eval()            
    polarity_model = ElectraBaseClassifier_Pola_Base(len(polarity_id_to_name), len(tokenizer_kelec))
    polarity_model.load_state_dict(torch.load('/content/Korean_ABSA/pt_files/kelec_pola_Ma_Standard_epoch_32.pt', map_location=device))
    polarity_model.to(device)
    polarity_model.eval()
    pred_data = predict_from_korean_form_kelec(tokenizer_kelec, model, polarity_model, copy.deepcopy(test_data))
    df = {'id' : [], 'sentence_form' : [], 'annotation' : []}
    for i in range(len(pred_data)) :
        df['id'].append(pred_data[i]['id'])
        df['sentence_form'].append(pred_data[i]['sentence_form'])
        df['annotation'].append(pred_data[i]['annotation'])
    df_pred = pd.DataFrame(df)
    with open('/content/Korean_ABSA/jsonl_files/kelec_refind_7_Ma32.jsonl', 'w') as file:
        for i in range( len(df_pred) ):
            tmp = str(df_pred['annotation'][i]).replace("\'", "\"").replace('None', 'null')
            file.write(  '{'+'\"id\": \"nikluge-sa-2022-test-{0}\", \"sentence_form\": \"{1}\", \"annotation\": {2}'\
                .format( str(i+1).zfill(5)  ,   df_pred['sentence_form'][i], tmp ) +'}' ) 
            file.write("\n")


    return df_pred

In [None]:
test_sentiment_analysis() 

## kelec_refind_6_Ma32

In [None]:
# kelec_refind_6_Ma32
batch_size=32
def test_sentiment_analysis():
    tokenizer_kelec = AutoTokenizer.from_pretrained(base_model_elec)
    num_added_toks_kelec = tokenizer_kelec.add_special_tokens(special_tokens_dict)
    test_data = jsonlload(test_data_path)
    entity_property_test_data_kelec, polarity_test_data_kelec = get_dataset(test_data, tokenizer_kelec, max_len_elec)
    entity_property_test_dataloader = DataLoader(entity_property_test_data_kelec, shuffle=True,
                                batch_size=batch_size)
    polarity_test_dataloader = DataLoader(polarity_test_data_kelec, shuffle=True,
                                                  batch_size=batch_size)    
    model = ElectraBaseClassifier_Cate_Base(len(tf_id_to_name), len(tokenizer_kelec))
    model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/kelec_train+dev_refined_data_epoch_6.pt", map_location=device))
    model.to(device)
    model.eval()            
    polarity_model = ElectraBaseClassifier_Pola_Base(len(polarity_id_to_name), len(tokenizer_kelec))
    polarity_model.load_state_dict(torch.load('/content/Korean_ABSA/pt_files/kelec_pola_Ma_Standard_epoch_32.pt', map_location=device))
    polarity_model.to(device)
    polarity_model.eval()
    pred_data = predict_from_korean_form_kelec(tokenizer_kelec, model, polarity_model, copy.deepcopy(test_data))
    df = {'id' : [], 'sentence_form' : [], 'annotation' : []}
    for i in range(len(pred_data)) :
        df['id'].append(pred_data[i]['id'])
        df['sentence_form'].append(pred_data[i]['sentence_form'])
        df['annotation'].append(pred_data[i]['annotation'])
    df_pred = pd.DataFrame(df)
    with open('/content/Korean_ABSA/jsonl_files/kelec_refind_6_Ma32.jsonl', 'w') as file:
        for i in range( len(df_pred) ):
            tmp = str(df_pred['annotation'][i]).replace("\'", "\"").replace('None', 'null')
            file.write(  '{'+'\"id\": \"nikluge-sa-2022-test-{0}\", \"sentence_form\": \"{1}\", \"annotation\": {2}'\
                .format( str(i+1).zfill(5)  ,   df_pred['sentence_form'][i], tmp ) +'}' ) 
            file.write("\n")


    return df_pred

In [None]:
test_sentiment_analysis() 

## kelec_refind_5_Ma32

In [None]:
# kelec_refind_5_Ma32
batch_size=32
def test_sentiment_analysis():
    tokenizer_kelec = AutoTokenizer.from_pretrained(base_model_elec)
    num_added_toks_kelec = tokenizer_kelec.add_special_tokens(special_tokens_dict)
    test_data = jsonlload(test_data_path)
    entity_property_test_data_kelec, polarity_test_data_kelec = get_dataset(test_data, tokenizer_kelec, max_len_elec)
    entity_property_test_dataloader = DataLoader(entity_property_test_data_kelec, shuffle=True,
                                batch_size=batch_size)
    polarity_test_dataloader = DataLoader(polarity_test_data_kelec, shuffle=True,
                                                  batch_size=batch_size)    
    model = ElectraBaseClassifier_Cate_Base(len(tf_id_to_name), len(tokenizer_kelec))
    model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/kelec_train+dev_refined_data_epoch_5.pt", map_location=device))
    model.to(device)
    model.eval()            
    polarity_model = ElectraBaseClassifier_Pola_Base(len(polarity_id_to_name), len(tokenizer_kelec))
    polarity_model.load_state_dict(torch.load('/content/Korean_ABSA/pt_files/kelec_pola_Ma_Standard_epoch_32.pt', map_location=device))
    polarity_model.to(device)
    polarity_model.eval()
    pred_data = predict_from_korean_form_kelec(tokenizer_kelec, model, polarity_model, copy.deepcopy(test_data))
    df = {'id' : [], 'sentence_form' : [], 'annotation' : []}
    for i in range(len(pred_data)) :
        df['id'].append(pred_data[i]['id'])
        df['sentence_form'].append(pred_data[i]['sentence_form'])
        df['annotation'].append(pred_data[i]['annotation'])
    df_pred = pd.DataFrame(df)
    with open('/content/Korean_ABSA/jsonl_files/kelec_refind_5_Ma32.jsonl', 'w') as file:
        for i in range( len(df_pred) ):
            tmp = str(df_pred['annotation'][i]).replace("\'", "\"").replace('None', 'null')
            file.write(  '{'+'\"id\": \"nikluge-sa-2022-test-{0}\", \"sentence_form\": \"{1}\", \"annotation\": {2}'\
                .format( str(i+1).zfill(5)  ,   df_pred['sentence_form'][i], tmp ) +'}' ) 
            file.write("\n")


    return df_pred

In [None]:
test_sentiment_analysis() 

##Deberta_Kup_GPU_Kdr_Original_Forcing

In [None]:
test_data_path_blank_first = './Blank1.jsonl'
test_data_path_blank_second = './Blank2.jsonl'
test_data_path_blank_third = './Blank3.jsonl'
test_data_path_blank_fourth = './Blank4.jsonl'
test_data_path_blank_fifth = './Blank5.jsonl'
test_data_path_blank_sixth = './Blank6.jsonl'

In [None]:
#Deberta_Kup_GPU_Kdr_Original_Forcing
batch_size=32
def Win():

    print("Deberta!!")

    tokenizer_kelec = AutoTokenizer.from_pretrained(base_model_elec)
    tokenizer_deberta = AutoTokenizer.from_pretrained(base_model_deberta)
    tokenizer_roberta = AutoTokenizer.from_pretrained(base_model_roberta)

    num_added_toks_kelec = tokenizer_kelec.add_special_tokens(special_tokens_dict)
    num_added_toks_deberta = tokenizer_deberta.add_special_tokens(special_tokens_dict)
    num_added_toks_roberta = tokenizer_roberta.add_special_tokens(special_tokens_dict)

    test_data = jsonlload(test_data_path)

    entity_property_test_data_deberta, polarity_test_data_deberta = get_dataset(test_data, tokenizer_kelec, max_len_elec)
    entity_property_test_data_kelec, polarity_test_data_kelec = get_dataset(test_data, tokenizer_kelec, max_len_elec)

    entity_property_test_dataloader = DataLoader(entity_property_test_data_kelec, shuffle=True,
                                batch_size=batch_size)

    polarity_test_dataloader = DataLoader(polarity_test_data_kelec, shuffle=True,
                                                  batch_size=batch_size)
    
    model = DebertaBaseClassifier(len(tf_id_to_name), len(tokenizer_deberta))
    model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/deberta_Gpu_epoch_5.pt", map_location=device))
    model.to(device)
    model.eval()
            
    polarity_model = ElectraBaseClassifier_Pola_Base(len(polarity_id_to_name), len(tokenizer_kelec))
    polarity_model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/Pola_Lr_epoch_2.pt", map_location=device))
    polarity_model.to(device)
    polarity_model.eval()

    pred_data = predict_from_korean_form_deberta(tokenizer_deberta, tokenizer_kelec, model, polarity_model, copy.deepcopy(test_data))
    df_pred_first = pd.DataFrame(pred_data)

    with open('./Blank1.jsonl', 'w') as file:
        for i in range( len(df_pred_first) ):
            if len(df_pred_first['annotation'][i]) == 0 :
                tmp = '[["제품 전체#일반", [null, 0, 0], "positive"]]'
                file.write(  '{'+'\"id\": \"{0}\", \"sentence_form\": \"{1}\", \"annotation\": {2}'\
                    .format( df_pred_first['id'][i]  ,   df_pred_first['sentence_form'][i], tmp ) +'}' ) 
                file.write("\n")

    print("K up!!")
    
    test_data_blank_first = jsonlload(test_data_path_blank_first)

    entity_property_test_data_kelec, polarity_test_data_kelec = get_dataset(test_data_blank_first, tokenizer_kelec, max_len_elec)

    entity_property_test_dataloader = DataLoader(entity_property_test_data_kelec, shuffle=True,
                                batch_size=batch_size)

    polarity_test_dataloader = DataLoader(polarity_test_data_kelec, shuffle=True,
                                                  batch_size=batch_size)
    
    model = ElectraBaseClassifier_Cate_hiddenup(len(tf_id_to_name), len(tokenizer_kelec))
    model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/kelec_cate_k_data_uphidden_epoch_8.pt", map_location=device))
    model.to(device)
    model.eval()
            
    polarity_model = ElectraBaseClassifier_Pola_Base(len(polarity_id_to_name), len(tokenizer_kelec))
    polarity_model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/Pola_Lr_epoch_2.pt", map_location=device))
    polarity_model.to(device)
    polarity_model.eval()

    pred_data = predict_from_korean_form_kelec(tokenizer_kelec, model, polarity_model, copy.deepcopy(test_data_blank_first))
    df_pred_second = pd.DataFrame(pred_data)

    with open('./Blank2.jsonl', 'w') as file:
        for i in range( len(df_pred_second) ):
            if len(df_pred_second['annotation'][i]) == 0 :
                tmp = '[["제품 전체#일반", [null, 0, 0], "positive"]]'
                file.write(  '{'+'\"id\": \"{0}\", \"sentence_form\": \"{1}\", \"annotation\": {2}'\
                    .format( df_pred_second['id'][i]  ,   df_pred_second['sentence_form'][i], tmp ) +'}' ) 
                file.write("\n")

    print("Gpu LR!!")

    test_data_blank_second = jsonlload(test_data_path_blank_second)

    entity_property_test_data_kelec, polarity_test_data_kelec = get_dataset(test_data_blank_second, tokenizer_kelec, max_len_elec)

    entity_property_test_dataloader = DataLoader(entity_property_test_data_kelec, shuffle=True,
                                batch_size=batch_size)

    polarity_test_dataloader = DataLoader(polarity_test_data_kelec, shuffle=True,
                                                  batch_size=batch_size)
    
    model = ElectraBaseClassifier_Cate_Base(len(tf_id_to_name), len(tokenizer_kelec))
    model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/G_kElectra_LeariningRate_epoch_1_3e-7.pt", map_location=device))
    model.to(device)
    model.eval()
            
    polarity_model = ElectraBaseClassifier_Pola_Base(len(polarity_id_to_name), len(tokenizer_kelec))
    polarity_model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/Pola_Lr_epoch_2.pt", map_location=device))
    polarity_model.to(device)
    polarity_model.eval()

    pred_data = predict_from_korean_form_kelec(tokenizer_kelec, model, polarity_model, copy.deepcopy(test_data_blank_second))
    df_pred_third = pd.DataFrame(pred_data)

    with open('./Blank3.jsonl', 'w') as file:
        for i in range( len(df_pred_third) ):
            if len(df_pred_third['annotation'][i]) == 0 :
                tmp = '[["제품 전체#일반", [null, 0, 0], "positive"]]'
                file.write(  '{'+'\"id\": \"{0}\", \"sentence_form\": \"{1}\", \"annotation\": {2}'\
                    .format( df_pred_third['id'][i]  ,   df_pred_third['sentence_form'][i], tmp ) +'}' ) 
                file.write("\n")
    
    print("K dr!!!")

    test_data_blank_third = jsonlload(test_data_path_blank_third)

    entity_property_test_data_kelec, polarity_test_data_kelec = get_dataset(test_data_blank_third, tokenizer_kelec, max_len_elec)

    entity_property_test_dataloader = DataLoader(entity_property_test_data_kelec, shuffle=True,
                                batch_size=batch_size)

    polarity_test_dataloader = DataLoader(polarity_test_data_kelec, shuffle=True,
                                                  batch_size=batch_size)
    
    model = ElectraBaseClassifier_Cate_Base(len(tf_id_to_name), len(tokenizer_kelec))
    model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/kelec_K_dr0.5_epoch_7.pt", map_location=device))
    model.to(device)
    model.eval()
            
    polarity_model = ElectraBaseClassifier_Pola_Base(len(polarity_id_to_name), len(tokenizer_kelec))
    polarity_model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/Pola_Lr_epoch_2.pt", map_location=device))
    polarity_model.to(device)
    polarity_model.eval()

    pred_data = predict_from_korean_form_kelec(tokenizer_kelec, model, polarity_model, copy.deepcopy(test_data_blank_third))
    df_pred_fourth = pd.DataFrame(pred_data)

    with open('./Blank4.jsonl', 'w') as file:
        for i in range( len(df_pred_fourth) ):
            if len(df_pred_fourth['annotation'][i]) == 0 :
                tmp = '[["제품 전체#일반", [null, 0, 0], "positive"]]'
                file.write(  '{'+'\"id\": \"{0}\", \"sentence_form\": \"{1}\", \"annotation\": {2}'\
                    .format( df_pred_fourth['id'][i]  ,   df_pred_fourth['sentence_form'][i], tmp ) +'}' ) 
                file.write("\n")

    print("Original!!")

    test_data_blank_fourth = jsonlload(test_data_path_blank_fourth)

    entity_property_test_data_deberta, polarity_test_data_deberta = get_dataset(test_data_blank_fourth, tokenizer_deberta, max_len_elec)
    entity_property_test_data_kelec, polarity_test_data_kelec = get_dataset(test_data_blank_fourth, tokenizer_kelec, max_len_elec)

    entity_property_test_dataloader = DataLoader(entity_property_test_data_kelec, shuffle=True,
                                batch_size=batch_size)

    polarity_test_dataloader = DataLoader(polarity_test_data_kelec, shuffle=True,
                                                  batch_size=batch_size)
    
    model = ElectraBaseClassifier_Cate_Base(len(tf_id_to_name), len(tokenizer_kelec))
    model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/kelec_train+dev_refined_data_epoch_8.pt", map_location=device))
    model.to(device)
    model.eval()
            
    polarity_model = ElectraBaseClassifier_Pola_Base(len(polarity_id_to_name), len(tokenizer_kelec))
    polarity_model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/Pola_Lr_epoch_2.pt", map_location=device))
    polarity_model.to(device)
    polarity_model.eval()

    pred_data = predict_from_korean_form_kelec(tokenizer_kelec, model, polarity_model, copy.deepcopy(test_data_blank_fourth))
    df_pred_fifth = pd.DataFrame(pred_data)

    with open('./Blank5.jsonl', 'w') as file:
        for i in range( len(df_pred_fifth) ):
            if len(df_pred_fifth['annotation'][i]) == 0 :
                tmp = '[["제품 전체#일반", [null, 0, 0], "positive"]]'
                file.write(  '{'+'\"id\": \"{0}\", \"sentence_form\": \"{1}\", \"annotation\": {2}'\
                    .format( df_pred_fifth['id'][i] ,   df_pred_fifth['sentence_form'][i], tmp ) +'}' ) 
                file.write("\n")

    print("The Last Forcing!!")

    test_data_final = jsonlload(test_data_path_blank_fifth)

    entity_property_test_data_kelec, polarity_test_data_kelec = get_dataset(test_data_final, tokenizer_kelec, max_len_elec)

    entity_property_test_dataloader = DataLoader(entity_property_test_data_kelec, shuffle=True,
                                batch_size=batch_size)

    polarity_test_dataloader = DataLoader(polarity_test_data_kelec, shuffle=True,
                                                  batch_size=batch_size)
    
    model = ElectraBaseClassifier_Cate_Base(len(tf_id_to_name), len(tokenizer_kelec))
    model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/G_kElectra_LeariningRate_epoch_1_3e-7.pt", map_location=device))
    model.to(device)
    model.eval()
            
    polarity_model = ElectraBaseClassifier_Pola_Base(len(polarity_id_to_name), len(tokenizer_kelec))
    polarity_model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/Pola_Lr_epoch_2.pt", map_location=device))
    polarity_model.to(device)
    polarity_model.eval()

    pred_data = predict_from_korean_form_kelec_forcing(tokenizer_kelec, model, polarity_model, copy.deepcopy(test_data_final))

    df_pred_final = pd.DataFrame(pred_data)

    df_final = pd.concat([df_pred_first, df_pred_second, df_pred_third, df_pred_fourth, df_pred_fifth, df_pred_final]).sort_values(by = ['id'], axis = 0).reset_index(drop = True)

    with open('/content/Korean_ABSA/jsonl_files/Deberta_Kup_GPU_Kdr_Original_Forcing.jsonl', 'w') as file:
        for i in range( len(df_final) ):
            if len(df_final['annotation'][i]) != 0 :
                tmp = str(df_final['annotation'][i]).replace("\'", "\"").replace('None', 'null')
                file.write(  '{'+'\"id\": \"{0}\", \"sentence_form\": \"{1}\", \"annotation\": {2}'\
                    .format( df_final['id'][i],   df_final['sentence_form'][i], tmp ) +'}' ) 
                file.write("\n")


    return pd.DataFrame(jsonlload('/content/Korean_ABSA/jsonl_files/Deberta_Kup_GPU_Kdr_Original_Forcing.jsonl'))

In [None]:
Win()

##Deberta_Kdr_GPU_Kup_Original_Forcing

In [None]:
test_data_path_blank_first = './Blank1.jsonl'
test_data_path_blank_second = './Blank2.jsonl'
test_data_path_blank_third = './Blank3.jsonl'
test_data_path_blank_fourth = './Blank4.jsonl'
test_data_path_blank_fifth = './Blank5.jsonl'
test_data_path_blank_sixth = './Blank6.jsonl'

In [None]:
#Deberta_Kdr_GPU_Kup_Original_Forcing
batch_size=32
def Win():

    print("Deberta!!")

    tokenizer_kelec = AutoTokenizer.from_pretrained(base_model_elec)
    tokenizer_deberta = AutoTokenizer.from_pretrained(base_model_deberta)
    tokenizer_roberta = AutoTokenizer.from_pretrained(base_model_roberta)

    num_added_toks_kelec = tokenizer_kelec.add_special_tokens(special_tokens_dict)
    num_added_toks_deberta = tokenizer_deberta.add_special_tokens(special_tokens_dict)
    num_added_toks_roberta = tokenizer_roberta.add_special_tokens(special_tokens_dict)

    test_data = jsonlload(test_data_path)

    entity_property_test_data_deberta, polarity_test_data_deberta = get_dataset(test_data, tokenizer_kelec, max_len_elec)
    entity_property_test_data_kelec, polarity_test_data_kelec = get_dataset(test_data, tokenizer_kelec, max_len_elec)

    entity_property_test_dataloader = DataLoader(entity_property_test_data_deberta, shuffle=True,
                                batch_size=batch_size)

    polarity_test_dataloader = DataLoader(polarity_test_data_kelec, shuffle=True,
                                                  batch_size=batch_size)
    
    model = DebertaBaseClassifier(len(tf_id_to_name), len(tokenizer_deberta))
    model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/deberta_Gpu_epoch_5.pt", map_location=device))
    model.to(device)
    model.eval()
            
    polarity_model = ElectraBaseClassifier_Pola_Base(len(polarity_id_to_name), len(tokenizer_kelec))
    polarity_model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/Pola_Lr_epoch_2.pt", map_location=device))
    polarity_model.to(device)
    polarity_model.eval()

    pred_data = predict_from_korean_form_deberta(tokenizer_deberta, tokenizer_kelec, model, polarity_model, copy.deepcopy(test_data))
    df_pred_first = pd.DataFrame(pred_data)

    with open('./Blank1.jsonl', 'w') as file:
        for i in range( len(df_pred_first) ):
            if len(df_pred_first['annotation'][i]) == 0 :
                tmp = '[["제품 전체#일반", [null, 0, 0], "positive"]]'
                file.write(  '{'+'\"id\": \"{0}\", \"sentence_form\": \"{1}\", \"annotation\": {2}'\
                    .format( df_pred_first['id'][i]  ,   df_pred_first['sentence_form'][i], tmp ) +'}' ) 
                file.write("\n")

    print("K Dr!!")
    
    test_data_blank_first = jsonlload(test_data_path_blank_first)

    entity_property_test_data_kelec, polarity_test_data_kelec = get_dataset(test_data_blank_first, tokenizer_kelec, max_len_elec)

    entity_property_test_dataloader = DataLoader(entity_property_test_data_kelec, shuffle=True,
                                batch_size=batch_size)

    polarity_test_dataloader = DataLoader(polarity_test_data_kelec, shuffle=True,
                                                  batch_size=batch_size)
    
    model = ElectraBaseClassifier_Cate_Base(len(tf_id_to_name), len(tokenizer_kelec))
    model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/kelec_K_dr0.5_epoch_7.pt", map_location=device))
    model.to(device)
    model.eval()
            
    polarity_model = ElectraBaseClassifier_Pola_Base(len(polarity_id_to_name), len(tokenizer_kelec))
    polarity_model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/Pola_Lr_epoch_2.pt", map_location=device))
    polarity_model.to(device)
    polarity_model.eval()

    pred_data = predict_from_korean_form_kelec(tokenizer_kelec, model, polarity_model, copy.deepcopy(test_data_blank_first))
    df_pred_second = pd.DataFrame(pred_data)

    with open('./Blank2.jsonl', 'w') as file:
        for i in range( len(df_pred_second) ):
            if len(df_pred_second['annotation'][i]) == 0 :
                tmp = '[["제품 전체#일반", [null, 0, 0], "positive"]]'
                file.write(  '{'+'\"id\": \"{0}\", \"sentence_form\": \"{1}\", \"annotation\": {2}'\
                    .format( df_pred_second['id'][i]  ,   df_pred_second['sentence_form'][i], tmp ) +'}' ) 
                file.write("\n")

    print("Gpu!!")

    test_data_blank_second = jsonlload(test_data_path_blank_second)

    entity_property_test_data_kelec, polarity_test_data_kelec = get_dataset(test_data_blank_second, tokenizer_kelec, max_len_elec)

    entity_property_test_dataloader = DataLoader(entity_property_test_data_kelec, shuffle=True,
                                batch_size=batch_size)

    polarity_test_dataloader = DataLoader(polarity_test_data_kelec, shuffle=True,
                                                  batch_size=batch_size)
    
    model = ElectraBaseClassifier_Cate_Base(len(tf_id_to_name), len(tokenizer_kelec))
    model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/G_kElectra_LeariningRate_epoch_1_3e-7.pt", map_location=device))
    model.to(device)
    model.eval()
            
    polarity_model = ElectraBaseClassifier_Pola_Base(len(polarity_id_to_name), len(tokenizer_kelec))
    polarity_model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/Pola_Lr_epoch_2.pt", map_location=device))
    polarity_model.to(device)
    polarity_model.eval()

    pred_data = predict_from_korean_form_kelec(tokenizer_kelec, model, polarity_model, copy.deepcopy(test_data_blank_second))
    df_pred_third = pd.DataFrame(pred_data)

    with open('./Blank3.jsonl', 'w') as file:
        for i in range( len(df_pred_third) ):
            if len(df_pred_third['annotation'][i]) == 0 :
                tmp = '[["제품 전체#일반", [null, 0, 0], "positive"]]'
                file.write(  '{'+'\"id\": \"{0}\", \"sentence_form\": \"{1}\", \"annotation\": {2}'\
                    .format( df_pred_third['id'][i]  ,   df_pred_third['sentence_form'][i], tmp ) +'}' ) 
                file.write("\n")
    
    print("K UP!!!")

    test_data_blank_third = jsonlload(test_data_path_blank_third)

    entity_property_test_data_kelec, polarity_test_data_kelec = get_dataset(test_data_blank_third, tokenizer_kelec, max_len_elec)

    entity_property_test_dataloader = DataLoader(entity_property_test_data_kelec, shuffle=True,
                                batch_size=batch_size)

    polarity_test_dataloader = DataLoader(polarity_test_data_kelec, shuffle=True,
                                                  batch_size=batch_size)
    
    model = ElectraBaseClassifier_Cate_hiddenup(len(tf_id_to_name), len(tokenizer_kelec))
    model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/kelec_cate_k_data_uphidden_epoch_8.pt", map_location=device))
    model.to(device)
    model.eval()
            
    polarity_model = ElectraBaseClassifier_Pola_Base(len(polarity_id_to_name), len(tokenizer_kelec))
    polarity_model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/Pola_Lr_epoch_2.pt", map_location=device))
    polarity_model.to(device)
    polarity_model.eval()

    pred_data = predict_from_korean_form_kelec(tokenizer_kelec, model, polarity_model, copy.deepcopy(test_data_blank_third))
    df_pred_fourth = pd.DataFrame(pred_data)

    with open('./Blank4.jsonl', 'w') as file:
        for i in range( len(df_pred_fourth) ):
            if len(df_pred_fourth['annotation'][i]) == 0 :
                tmp = '[["제품 전체#일반", [null, 0, 0], "positive"]]'
                file.write(  '{'+'\"id\": \"{0}\", \"sentence_form\": \"{1}\", \"annotation\": {2}'\
                    .format( df_pred_fourth['id'][i]  ,   df_pred_fourth['sentence_form'][i], tmp ) +'}' ) 
                file.write("\n")

    print("Original!!")

    test_data_blank_fourth = jsonlload(test_data_path_blank_fourth)

    entity_property_test_data_kelec, polarity_test_data_kelec = get_dataset(test_data_blank_fourth, tokenizer_kelec, max_len_elec)

    entity_property_test_dataloader = DataLoader(entity_property_test_data_kelec, shuffle=True,
                                batch_size=batch_size)

    polarity_test_dataloader = DataLoader(polarity_test_data_kelec, shuffle=True,
                                                  batch_size=batch_size)
    
    model = ElectraBaseClassifier_Cate_Base(len(tf_id_to_name), len(tokenizer_kelec))
    model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/kelec_train+dev_refined_data_epoch_8.pt", map_location=device))
    model.to(device)
    model.eval()
            
    polarity_model = ElectraBaseClassifier_Pola_Base(len(polarity_id_to_name), len(tokenizer_kelec))
    polarity_model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/Pola_Lr_epoch_2.pt", map_location=device))
    polarity_model.to(device)
    polarity_model.eval()

    pred_data = predict_from_korean_form_kelec(tokenizer_kelec, model, polarity_model, copy.deepcopy(test_data_blank_fourth))
    df_pred_fifth = pd.DataFrame(pred_data)

    with open('./Blank5.jsonl', 'w') as file:
        for i in range( len(df_pred_fifth) ):
            if len(df_pred_fifth['annotation'][i]) == 0 :
                tmp = '[["제품 전체#일반", [null, 0, 0], "positive"]]'
                file.write(  '{'+'\"id\": \"{0}\", \"sentence_form\": \"{1}\", \"annotation\": {2}'\
                    .format( df_pred_fifth['id'][i] ,   df_pred_fifth['sentence_form'][i], tmp ) +'}' ) 
                file.write("\n")

    print("The Last Forcing!!")

    test_data_final = jsonlload(test_data_path_blank_fifth)

    entity_property_test_data_kelec, polarity_test_data_kelec = get_dataset(test_data_final, tokenizer_kelec, max_len_elec)

    entity_property_test_dataloader = DataLoader(entity_property_test_data_kelec, shuffle=True,
                                batch_size=batch_size)

    polarity_test_dataloader = DataLoader(polarity_test_data_kelec, shuffle=True,
                                                  batch_size=batch_size)
    
    model = ElectraBaseClassifier_Cate_Base(len(tf_id_to_name), len(tokenizer_kelec))
    model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/G_kElectra_LeariningRate_epoch_1_3e-7.pt", map_location=device))
    model.to(device)
    model.eval()
            
    polarity_model = ElectraBaseClassifier_Pola_Base(len(polarity_id_to_name), len(tokenizer_kelec))
    polarity_model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/Pola_Lr_epoch_2.pt", map_location=device))
    polarity_model.to(device)
    polarity_model.eval()

    pred_data = predict_from_korean_form_kelec_forcing(tokenizer_kelec, model, polarity_model, copy.deepcopy(test_data_final))

    df_pred_final = pd.DataFrame(pred_data)

    df_final = pd.concat([df_pred_first, df_pred_second, df_pred_third, df_pred_fourth, df_pred_fifth, df_pred_final]).sort_values(by = ['id'], axis = 0).reset_index(drop = True)

    with open('/content/Korean_ABSA/jsonl_files/Deberta_Kdr_GPU_Kup_Original_Forcing.jsonl', 'w') as file:
        for i in range( len(df_final) ):
            if len(df_final['annotation'][i]) != 0 :
                tmp = str(df_final['annotation'][i]).replace("\'", "\"").replace('None', 'null')
                file.write(  '{'+'\"id\": \"{0}\", \"sentence_form\": \"{1}\", \"annotation\": {2}'\
                    .format( df_final['id'][i],   df_final['sentence_form'][i], tmp ) +'}' ) 
                file.write("\n")


    return pd.DataFrame(jsonlload('/content/Korean_ABSA/jsonl_files/Deberta_Kdr_GPU_Kup_Original_Forcing.jsonl'))

In [None]:
Win()

##Original_Kdr_Gpu_Kup8_Deberta_Forcing

In [None]:
test_data_path_blank_first = './Blank1.jsonl'
test_data_path_blank_second = './Blank2.jsonl'
test_data_path_blank_third = './Blank3.jsonl'
test_data_path_blank_fourth = './Blank4.jsonl'
test_data_path_blank_fifth = './Blank5.jsonl'
test_data_path_blank_sixth = './Blank6.jsonl'

In [None]:
#Original_Kdr_Gpu_Kup8_Deberta_Forcing
batch_size=32
def Win():

    print("Original!!")

    tokenizer_kelec = AutoTokenizer.from_pretrained(base_model_elec)
    tokenizer_deberta = AutoTokenizer.from_pretrained(base_model_deberta)
    tokenizer_roberta = AutoTokenizer.from_pretrained(base_model_roberta)

    num_added_toks_kelec = tokenizer_kelec.add_special_tokens(special_tokens_dict)
    num_added_toks_deberta = tokenizer_deberta.add_special_tokens(special_tokens_dict)
    num_added_toks_roberta = tokenizer_roberta.add_special_tokens(special_tokens_dict)

    test_data = jsonlload(test_data_path)

    entity_property_test_data_kelec, polarity_test_data_kelec = get_dataset(test_data, tokenizer_kelec, max_len_elec)

    entity_property_test_dataloader = DataLoader(entity_property_test_data_kelec, shuffle=True,
                                batch_size=batch_size)

    polarity_test_dataloader = DataLoader(polarity_test_data_kelec, shuffle=True,
                                                  batch_size=batch_size)
    
    model = ElectraBaseClassifier_Cate_Base(len(tf_id_to_name), len(tokenizer_kelec))
    model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/kelec_train+dev_refined_data_epoch_8.pt", map_location=device))
    model.to(device)
    model.eval()
            
    polarity_model = ElectraBaseClassifier_Pola_Base(len(polarity_id_to_name), len(tokenizer_kelec))
    polarity_model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/Pola_Lr_epoch_2.pt", map_location=device))
    polarity_model.to(device)
    polarity_model.eval()

    pred_data = predict_from_korean_form_kelec(tokenizer_kelec, model, polarity_model, copy.deepcopy(test_data))
    df_pred_first = pd.DataFrame(pred_data)

    with open('./Blank1.jsonl', 'w') as file:
        for i in range( len(df_pred_first) ):
            if len(df_pred_first['annotation'][i]) == 0 :
                tmp = '[["제품 전체#일반", [null, 0, 0], "positive"]]'
                file.write(  '{'+'\"id\": \"{0}\", \"sentence_form\": \"{1}\", \"annotation\": {2}'\
                    .format( df_pred_first['id'][i]  ,   df_pred_first['sentence_form'][i], tmp ) +'}' ) 
                file.write("\n")

    print("K dr!!")
    
    test_data_blank_first = jsonlload(test_data_path_blank_first)

    entity_property_test_data_kelec, polarity_test_data_kelec = get_dataset(test_data_blank_first, tokenizer_kelec, max_len_elec)

    entity_property_test_dataloader = DataLoader(entity_property_test_data_kelec, shuffle=True,
                                batch_size=batch_size)

    polarity_test_dataloader = DataLoader(polarity_test_data_kelec, shuffle=True,
                                                  batch_size=batch_size)
    
    model = ElectraBaseClassifier_Cate_Base(len(tf_id_to_name), len(tokenizer_kelec))
    model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/kelec_K_dr0.5_epoch_7.pt", map_location=device))
    model.to(device)
    model.eval()
            
    polarity_model = ElectraBaseClassifier_Pola_Base(len(polarity_id_to_name), len(tokenizer_kelec))
    polarity_model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/Pola_Lr_epoch_2.pt", map_location=device))
    polarity_model.to(device)
    polarity_model.eval()

    pred_data = predict_from_korean_form_kelec(tokenizer_kelec, model, polarity_model, copy.deepcopy(test_data_blank_first))
    df_pred_second = pd.DataFrame(pred_data)

    with open('./Blank2.jsonl', 'w') as file:
        for i in range( len(df_pred_second) ):
            if len(df_pred_second['annotation'][i]) == 0 :
                tmp = '[["제품 전체#일반", [null, 0, 0], "positive"]]'
                file.write(  '{'+'\"id\": \"{0}\", \"sentence_form\": \"{1}\", \"annotation\": {2}'\
                    .format( df_pred_second['id'][i]  ,   df_pred_second['sentence_form'][i], tmp ) +'}' ) 
                file.write("\n")

    print("Gpu LR!!")

    test_data_blank_second = jsonlload(test_data_path_blank_second)

    entity_property_test_data_kelec, polarity_test_data_kelec = get_dataset(test_data_blank_second, tokenizer_kelec, max_len_elec)

    entity_property_test_dataloader = DataLoader(entity_property_test_data_kelec, shuffle=True,
                                batch_size=batch_size)

    polarity_test_dataloader = DataLoader(polarity_test_data_kelec, shuffle=True,
                                                  batch_size=batch_size)
    
    model = ElectraBaseClassifier_Cate_Base(len(tf_id_to_name), len(tokenizer_kelec))
    model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/G_kElectra_LeariningRate_epoch_1_3e-7.pt", map_location=device))
    model.to(device)
    model.eval()
            
    polarity_model = ElectraBaseClassifier_Pola_Base(len(polarity_id_to_name), len(tokenizer_kelec))
    polarity_model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/Pola_Lr_epoch_2.pt", map_location=device))
    polarity_model.to(device)
    polarity_model.eval()

    pred_data = predict_from_korean_form_kelec(tokenizer_kelec, model, polarity_model, copy.deepcopy(test_data_blank_second))
    df_pred_third = pd.DataFrame(pred_data)

    with open('./Blank3.jsonl', 'w') as file:
        for i in range( len(df_pred_third) ):
            if len(df_pred_third['annotation'][i]) == 0 :
                tmp = '[["제품 전체#일반", [null, 0, 0], "positive"]]'
                file.write(  '{'+'\"id\": \"{0}\", \"sentence_form\": \"{1}\", \"annotation\": {2}'\
                    .format( df_pred_third['id'][i]  ,   df_pred_third['sentence_form'][i], tmp ) +'}' ) 
                file.write("\n")
    
    print("K up!!!")

    test_data_blank_third = jsonlload(test_data_path_blank_third)

    entity_property_test_data_kelec, polarity_test_data_kelec = get_dataset(test_data_blank_third, tokenizer_kelec, max_len_elec)

    entity_property_test_dataloader = DataLoader(entity_property_test_data_kelec, shuffle=True,
                                batch_size=batch_size)

    polarity_test_dataloader = DataLoader(polarity_test_data_kelec, shuffle=True,
                                                  batch_size=batch_size)
    
    model = ElectraBaseClassifier_Cate_hiddenup(len(tf_id_to_name), len(tokenizer_kelec))
    model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/kelec_cate_k_data_uphidden_epoch_8.pt", map_location=device))
    model.to(device)
    model.eval()
            
    polarity_model = ElectraBaseClassifier_Pola_Base(len(polarity_id_to_name), len(tokenizer_kelec))
    polarity_model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/Pola_Lr_epoch_2.pt", map_location=device))
    polarity_model.to(device)
    polarity_model.eval()

    pred_data = predict_from_korean_form_kelec(tokenizer_kelec, model, polarity_model, copy.deepcopy(test_data_blank_third))
    df_pred_fourth = pd.DataFrame(pred_data)

    with open('./Blank4.jsonl', 'w') as file:
        for i in range( len(df_pred_fourth) ):
            if len(df_pred_fourth['annotation'][i]) == 0 :
                tmp = '[["제품 전체#일반", [null, 0, 0], "positive"]]'
                file.write(  '{'+'\"id\": \"{0}\", \"sentence_form\": \"{1}\", \"annotation\": {2}'\
                    .format( df_pred_fourth['id'][i]  ,   df_pred_fourth['sentence_form'][i], tmp ) +'}' ) 
                file.write("\n")

    print("Deberta!!")

    test_data_blank_fourth = jsonlload(test_data_path_blank_fourth)

    entity_property_test_data_deberta, polarity_test_data_deberta = get_dataset(test_data_blank_fourth, tokenizer_deberta, max_len_elec)
    entity_property_test_data_kelec, polarity_test_data_kelec = get_dataset(test_data_blank_fourth, tokenizer_kelec, max_len_elec)

    entity_property_test_dataloader = DataLoader(entity_property_test_data_deberta, shuffle=True,
                                batch_size=batch_size)

    polarity_test_dataloader = DataLoader(polarity_test_data_kelec, shuffle=True,
                                                  batch_size=batch_size)
    
    model = DebertaBaseClassifier(len(tf_id_to_name), len(tokenizer_deberta))
    model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/deberta_Gpu_epoch_5.pt", map_location=device))
    model.to(device)
    model.eval()
            
    polarity_model = ElectraBaseClassifier_Pola_Base(len(polarity_id_to_name), len(tokenizer_kelec))
    polarity_model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/Pola_Lr_epoch_2.pt", map_location=device))
    polarity_model.to(device)
    polarity_model.eval()

    pred_data = predict_from_korean_form_deberta(tokenizer_deberta, tokenizer_kelec, model, polarity_model, copy.deepcopy(test_data_blank_fourth))
    df_pred_fifth = pd.DataFrame(pred_data)

    with open('./Blank5.jsonl', 'w') as file:
        for i in range( len(df_pred_fifth) ):
            if len(df_pred_fifth['annotation'][i]) == 0 :
                tmp = '[["제품 전체#일반", [null, 0, 0], "positive"]]'
                file.write(  '{'+'\"id\": \"{0}\", \"sentence_form\": \"{1}\", \"annotation\": {2}'\
                    .format( df_pred_fifth['id'][i] ,   df_pred_fifth['sentence_form'][i], tmp ) +'}' ) 
                file.write("\n")

    print("The Last Forcing!!")

    test_data_final = jsonlload(test_data_path_blank_fifth)

    entity_property_test_data_kelec, polarity_test_data_kelec = get_dataset(test_data_final, tokenizer_kelec, max_len_elec)

    entity_property_test_dataloader = DataLoader(entity_property_test_data_kelec, shuffle=True,
                                batch_size=batch_size)

    polarity_test_dataloader = DataLoader(polarity_test_data_kelec, shuffle=True,
                                                  batch_size=batch_size)
    
    model = ElectraBaseClassifier_Cate_Base(len(tf_id_to_name), len(tokenizer_kelec))
    model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/G_kElectra_LeariningRate_epoch_1_3e-7.pt", map_location=device))
    model.to(device)
    model.eval()
            
    polarity_model = ElectraBaseClassifier_Pola_Base(len(polarity_id_to_name), len(tokenizer_kelec))
    polarity_model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/Pola_Lr_epoch_2.pt", map_location=device))
    polarity_model.to(device)
    polarity_model.eval()

    pred_data = predict_from_korean_form_kelec_forcing(tokenizer_kelec, model, polarity_model, copy.deepcopy(test_data_final))

    df_pred_final = pd.DataFrame(pred_data)

    df_final = pd.concat([df_pred_first, df_pred_second, df_pred_third, df_pred_fourth, df_pred_fifth, df_pred_final]).sort_values(by = ['id'], axis = 0).reset_index(drop = True)

    with open('/content/Korean_ABSA/jsonl_files/Original_Kdr_Gpu_Kup8_Deberta_Forcing.jsonl', 'w') as file:
        for i in range( len(df_final) ):
            if len(df_final['annotation'][i]) != 0 :
                tmp = str(df_final['annotation'][i]).replace("\'", "\"").replace('None', 'null')
                file.write(  '{'+'\"id\": \"{0}\", \"sentence_form\": \"{1}\", \"annotation\": {2}'\
                    .format( df_final['id'][i],   df_final['sentence_form'][i], tmp ) +'}' ) 
                file.write("\n")


    return pd.DataFrame(jsonlload('/content/Korean_ABSA/jsonl_files/Original_Kdr_Gpu_Kup8_Deberta_Forcing.jsonl'))

In [None]:
Win()

##gpu_kdr_kup8_deberta_forcing

In [None]:
test_data_path_blank_first = './Blank1.jsonl'
test_data_path_blank_second = './Blank2.jsonl'
test_data_path_blank_third = './Blank3.jsonl'
test_data_path_blank_fourth = './Blank4.jsonl'
test_data_path_blank_fifth = './Blank5.jsonl'
test_data_path_blank_sixth = './Blank6.jsonl'

In [None]:
#gpu_kdr_kup8_deberta_forcing
batch_size=32
def Win():

    print("Gpu lr!!")

    tokenizer_kelec = AutoTokenizer.from_pretrained(base_model_elec)
    tokenizer_deberta = AutoTokenizer.from_pretrained(base_model_deberta)
    tokenizer_roberta = AutoTokenizer.from_pretrained(base_model_roberta)

    num_added_toks_kelec = tokenizer_kelec.add_special_tokens(special_tokens_dict)
    num_added_toks_deberta = tokenizer_deberta.add_special_tokens(special_tokens_dict)
    num_added_toks_roberta = tokenizer_roberta.add_special_tokens(special_tokens_dict)

    test_data = jsonlload(test_data_path)

    entity_property_test_data_kelec, polarity_test_data_kelec = get_dataset(test_data, tokenizer_kelec, max_len_elec)

    entity_property_test_dataloader = DataLoader(entity_property_test_data_kelec, shuffle=True,
                                batch_size=batch_size)

    polarity_test_dataloader = DataLoader(polarity_test_data_kelec, shuffle=True,
                                                  batch_size=batch_size)
    
    model = ElectraBaseClassifier_Cate_Base(len(tf_id_to_name), len(tokenizer_kelec))
    model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/G_kElectra_LeariningRate_epoch_1_3e-7.pt", map_location=device))
    model.to(device)
    model.eval()
            
    polarity_model = ElectraBaseClassifier_Pola_Base(len(polarity_id_to_name), len(tokenizer_kelec))
    polarity_model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/Pola_Lr_epoch_2.pt", map_location=device))
    polarity_model.to(device)
    polarity_model.eval()

    pred_data = predict_from_korean_form_kelec(tokenizer_kelec, model, polarity_model, copy.deepcopy(test_data))
    df_pred_first = pd.DataFrame(pred_data)

    with open('./Blank1.jsonl', 'w') as file:
        for i in range( len(df_pred_first) ):
            if len(df_pred_first['annotation'][i]) == 0 :
                tmp = '[["제품 전체#일반", [null, 0, 0], "positive"]]'
                file.write(  '{'+'\"id\": \"{0}\", \"sentence_form\": \"{1}\", \"annotation\": {2}'\
                    .format( df_pred_first['id'][i]  ,   df_pred_first['sentence_form'][i], tmp ) +'}' ) 
                file.write("\n")

    print("K dr!!")
    
    test_data_blank_first = jsonlload(test_data_path_blank_first)

    entity_property_test_data_kelec, polarity_test_data_kelec = get_dataset(test_data_blank_first, tokenizer_kelec, max_len_elec)

    entity_property_test_dataloader = DataLoader(entity_property_test_data_kelec, shuffle=True,
                                batch_size=batch_size)

    polarity_test_dataloader = DataLoader(polarity_test_data_kelec, shuffle=True,
                                                  batch_size=batch_size)
    
    model = ElectraBaseClassifier_Cate_Base(len(tf_id_to_name), len(tokenizer_kelec))
    model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/kelec_K_dr0.5_epoch_7.pt", map_location=device))
    model.to(device)
    model.eval()
            
    polarity_model = ElectraBaseClassifier_Pola_Base(len(polarity_id_to_name), len(tokenizer_kelec))
    polarity_model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/Pola_Lr_epoch_2.pt", map_location=device))
    polarity_model.to(device)
    polarity_model.eval()

    pred_data = predict_from_korean_form_kelec(tokenizer_kelec, model, polarity_model, copy.deepcopy(test_data_blank_first))
    df_pred_second = pd.DataFrame(pred_data)

    with open('./Blank2.jsonl', 'w') as file:
        for i in range( len(df_pred_second) ):
            if len(df_pred_second['annotation'][i]) == 0 :
                tmp = '[["제품 전체#일반", [null, 0, 0], "positive"]]'
                file.write(  '{'+'\"id\": \"{0}\", \"sentence_form\": \"{1}\", \"annotation\": {2}'\
                    .format( df_pred_second['id'][i]  ,   df_pred_second['sentence_form'][i], tmp ) +'}' ) 
                file.write("\n")

    print("K up!!")

    test_data_blank_second = jsonlload(test_data_path_blank_second)

    entity_property_test_data_kelec, polarity_test_data_kelec = get_dataset(test_data_blank_second, tokenizer_kelec, max_len_elec)

    entity_property_test_dataloader = DataLoader(entity_property_test_data_kelec, shuffle=True,
                                batch_size=batch_size)

    polarity_test_dataloader = DataLoader(polarity_test_data_kelec, shuffle=True,
                                                  batch_size=batch_size)
    
    model = ElectraBaseClassifier_Cate_hiddenup(len(tf_id_to_name), len(tokenizer_kelec))
    model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/kelec_cate_k_data_uphidden_epoch_8.pt", map_location=device))
    model.to(device)
    model.eval()
            
    polarity_model = ElectraBaseClassifier_Pola_Base(len(polarity_id_to_name), len(tokenizer_kelec))
    polarity_model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/Pola_Lr_epoch_2.pt", map_location=device))
    polarity_model.to(device)
    polarity_model.eval()

    pred_data = predict_from_korean_form_kelec(tokenizer_kelec, model, polarity_model, copy.deepcopy(test_data_blank_second))
    df_pred_third = pd.DataFrame(pred_data)

    with open('./Blank3.jsonl', 'w') as file:
        for i in range( len(df_pred_third) ):
            if len(df_pred_third['annotation'][i]) == 0 :
                tmp = '[["제품 전체#일반", [null, 0, 0], "positive"]]'
                file.write(  '{'+'\"id\": \"{0}\", \"sentence_form\": \"{1}\", \"annotation\": {2}'\
                    .format( df_pred_third['id'][i]  ,   df_pred_third['sentence_form'][i], tmp ) +'}' ) 
                file.write("\n")
    
    print("Deberta!!!")

    test_data_blank_third = jsonlload(test_data_path_blank_third)

    entity_property_test_data_deberta, polarity_test_data_deberta = get_dataset(test_data_blank_third, tokenizer_deberta, max_len_elec)
    entity_property_test_data_kelec, polarity_test_data_kelec = get_dataset(test_data_blank_third, tokenizer_kelec, max_len_elec)

    entity_property_test_dataloader = DataLoader(entity_property_test_data_deberta, shuffle=True,
                                batch_size=batch_size)

    polarity_test_dataloader = DataLoader(polarity_test_data_kelec, shuffle=True,
                                                  batch_size=batch_size)
    
    model = DebertaBaseClassifier(len(tf_id_to_name), len(tokenizer_deberta))
    model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/deberta_Gpu_epoch_5.pt", map_location=device))
    model.to(device)
    model.eval()
            
    polarity_model = ElectraBaseClassifier_Pola_Base(len(polarity_id_to_name), len(tokenizer_kelec))
    polarity_model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/Pola_Lr_epoch_2.pt", map_location=device))
    polarity_model.to(device)
    polarity_model.eval()

    pred_data = predict_from_korean_form_deberta(tokenizer_deberta, tokenizer_kelec, model, polarity_model, copy.deepcopy(test_data_blank_third))
    df_pred_fourth = pd.DataFrame(pred_data)

    with open('./Blank4.jsonl', 'w') as file:
        for i in range( len(df_pred_fourth) ):
            if len(df_pred_fourth['annotation'][i]) == 0 :
                tmp = '[["제품 전체#일반", [null, 0, 0], "positive"]]'
                file.write(  '{'+'\"id\": \"{0}\", \"sentence_form\": \"{1}\", \"annotation\": {2}'\
                    .format( df_pred_fourth['id'][i]  ,   df_pred_fourth['sentence_form'][i], tmp ) +'}' ) 
                file.write("\n")

    print("The Last Forcing!!")

    test_data_final = jsonlload(test_data_path_blank_fourth)

    entity_property_test_data_kelec, polarity_test_data_kelec = get_dataset(test_data_final, tokenizer_kelec, max_len_elec)

    entity_property_test_dataloader = DataLoader(entity_property_test_data_kelec, shuffle=True,
                                batch_size=batch_size)

    polarity_test_dataloader = DataLoader(polarity_test_data_kelec, shuffle=True,
                                                  batch_size=batch_size)
    
    model = ElectraBaseClassifier_Cate_Base(len(tf_id_to_name), len(tokenizer_kelec))
    model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/G_kElectra_LeariningRate_epoch_1_3e-7.pt", map_location=device))
    model.to(device)
    model.eval()
            
    polarity_model = ElectraBaseClassifier_Pola_Base(len(polarity_id_to_name), len(tokenizer_kelec))
    polarity_model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/Pola_Lr_epoch_2.pt", map_location=device))
    polarity_model.to(device)
    polarity_model.eval()

    pred_data = predict_from_korean_form_kelec_forcing(tokenizer_kelec, model, polarity_model, copy.deepcopy(test_data_final))

    df_pred_final = pd.DataFrame(pred_data)


    df_final = pd.concat([df_pred_first, df_pred_second, df_pred_third, df_pred_fourth, df_pred_final]).sort_values(by = ['id'], axis = 0).reset_index(drop = True)

    with open('/content/Korean_ABSA/jsonl_files/gpu_kdr_kup8_deberta_forcing.jsonl', 'w') as file:
        for i in range( len(df_final) ):
            if len(df_final['annotation'][i]) != 0 :
                tmp = str(df_final['annotation'][i]).replace("\'", "\"").replace('None', 'null')
                file.write(  '{'+'\"id\": \"{0}\", \"sentence_form\": \"{1}\", \"annotation\": {2}'\
                    .format( df_final['id'][i],   df_final['sentence_form'][i], tmp ) +'}' ) 
                file.write("\n")


    return pd.DataFrame(jsonlload('/content/Korean_ABSA/jsonl_files/gpu_kdr_kup8_deberta_forcing.jsonl'))

In [None]:
Win()

##kup8_gpu_kdr_deberta_forcing

In [None]:
test_data_path_blank_first = './Blank1.jsonl'
test_data_path_blank_second = './Blank2.jsonl'
test_data_path_blank_third = './Blank3.jsonl'
test_data_path_blank_fourth = './Blank4.jsonl'
test_data_path_blank_fifth = './Blank5.jsonl'
test_data_path_blank_sixth = './Blank6.jsonl'

In [None]:
#kup8_gpu_kdr_deberta_forcing
batch_size=32
def Win():

    print("K up!!")

    tokenizer_kelec = AutoTokenizer.from_pretrained(base_model_elec)
    tokenizer_deberta = AutoTokenizer.from_pretrained(base_model_deberta)
    tokenizer_roberta = AutoTokenizer.from_pretrained(base_model_roberta)

    num_added_toks_kelec = tokenizer_kelec.add_special_tokens(special_tokens_dict)
    num_added_toks_deberta = tokenizer_deberta.add_special_tokens(special_tokens_dict)
    num_added_toks_roberta = tokenizer_roberta.add_special_tokens(special_tokens_dict)

    test_data = jsonlload(test_data_path)

    entity_property_test_data_kelec, polarity_test_data_kelec = get_dataset(test_data, tokenizer_kelec, max_len_elec)

    entity_property_test_dataloader = DataLoader(entity_property_test_data_kelec, shuffle=True,
                                batch_size=batch_size)

    polarity_test_dataloader = DataLoader(polarity_test_data_kelec, shuffle=True,
                                                  batch_size=batch_size)
    
    model = ElectraBaseClassifier_Cate_hiddenup(len(tf_id_to_name), len(tokenizer_kelec))
    model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/kelec_cate_k_data_uphidden_epoch_8.pt", map_location=device))
    model.to(device)
    model.eval()
            
    polarity_model = ElectraBaseClassifier_Pola_Base(len(polarity_id_to_name), len(tokenizer_kelec))
    polarity_model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/Pola_Lr_epoch_2.pt", map_location=device))
    polarity_model.to(device)
    polarity_model.eval()

    pred_data = predict_from_korean_form_kelec(tokenizer_kelec, model, polarity_model, copy.deepcopy(test_data))
    df_pred_first = pd.DataFrame(pred_data)

    with open('./Blank1.jsonl', 'w') as file:
        for i in range( len(df_pred_first) ):
            if len(df_pred_first['annotation'][i]) == 0 :
                tmp = '[["제품 전체#일반", [null, 0, 0], "positive"]]'
                file.write(  '{'+'\"id\": \"{0}\", \"sentence_form\": \"{1}\", \"annotation\": {2}'\
                    .format( df_pred_first['id'][i]  ,   df_pred_first['sentence_form'][i], tmp ) +'}' ) 
                file.write("\n")

    print("Gpu LR!!")
    
    test_data_blank_first = jsonlload(test_data_path_blank_first)

    entity_property_test_data_kelec, polarity_test_data_kelec = get_dataset(test_data_blank_first, tokenizer_kelec, max_len_elec)

    entity_property_test_dataloader = DataLoader(entity_property_test_data_kelec, shuffle=True,
                                batch_size=batch_size)

    polarity_test_dataloader = DataLoader(polarity_test_data_kelec, shuffle=True,
                                                  batch_size=batch_size)
    
    model = ElectraBaseClassifier_Cate_Base(len(tf_id_to_name), len(tokenizer_kelec))
    model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/G_kElectra_LeariningRate_epoch_1_3e-7.pt", map_location=device))
    model.to(device)
    model.eval()
            
    polarity_model = ElectraBaseClassifier_Pola_Base(len(polarity_id_to_name), len(tokenizer_kelec))
    polarity_model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/Pola_Lr_epoch_2.pt", map_location=device))
    polarity_model.to(device)
    polarity_model.eval()

    pred_data = predict_from_korean_form_kelec(tokenizer_kelec, model, polarity_model, copy.deepcopy(test_data_blank_first))
    df_pred_second = pd.DataFrame(pred_data)

    with open('./Blank2.jsonl', 'w') as file:
        for i in range( len(df_pred_second) ):
            if len(df_pred_second['annotation'][i]) == 0 :
                tmp = '[["제품 전체#일반", [null, 0, 0], "positive"]]'
                file.write(  '{'+'\"id\": \"{0}\", \"sentence_form\": \"{1}\", \"annotation\": {2}'\
                    .format( df_pred_second['id'][i]  ,   df_pred_second['sentence_form'][i], tmp ) +'}' ) 
                file.write("\n")

    print("K dr!!")

    test_data_blank_second = jsonlload(test_data_path_blank_second)

    entity_property_test_data_kelec, polarity_test_data_kelec = get_dataset(test_data_blank_second, tokenizer_kelec, max_len_elec)

    entity_property_test_dataloader = DataLoader(entity_property_test_data_kelec, shuffle=True,
                                batch_size=batch_size)

    polarity_test_dataloader = DataLoader(polarity_test_data_kelec, shuffle=True,
                                                  batch_size=batch_size)
    
    model = ElectraBaseClassifier_Cate_Base(len(tf_id_to_name), len(tokenizer_kelec))
    model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/kelec_K_dr0.5_epoch_7.pt", map_location=device))
    model.to(device)
    model.eval()
            
    polarity_model = ElectraBaseClassifier_Pola_Base(len(polarity_id_to_name), len(tokenizer_kelec))
    polarity_model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/Pola_Lr_epoch_2.pt", map_location=device))
    polarity_model.to(device)
    polarity_model.eval()

    pred_data = predict_from_korean_form_kelec(tokenizer_kelec, model, polarity_model, copy.deepcopy(test_data_blank_second))
    df_pred_third = pd.DataFrame(pred_data)

    with open('./Blank3.jsonl', 'w') as file:
        for i in range( len(df_pred_third) ):
            if len(df_pred_third['annotation'][i]) == 0 :
                tmp = '[["제품 전체#일반", [null, 0, 0], "positive"]]'
                file.write(  '{'+'\"id\": \"{0}\", \"sentence_form\": \"{1}\", \"annotation\": {2}'\
                    .format( df_pred_third['id'][i]  ,   df_pred_third['sentence_form'][i], tmp ) +'}' ) 
                file.write("\n")
    
    print("Deberta!!!")

    test_data_blank_third = jsonlload(test_data_path_blank_third)

    entity_property_test_data_deberta, polarity_test_data_deberta = get_dataset(test_data_blank_third, tokenizer_deberta, max_len_elec)
    entity_property_test_data_kelec, polarity_test_data_kelec = get_dataset(test_data_blank_third, tokenizer_kelec, max_len_elec)

    entity_property_test_dataloader = DataLoader(entity_property_test_data_deberta, shuffle=True,
                                batch_size=batch_size)

    polarity_test_dataloader = DataLoader(polarity_test_data_kelec, shuffle=True,
                                                  batch_size=batch_size)
    
    model = DebertaBaseClassifier(len(tf_id_to_name), len(tokenizer_deberta))
    model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/deberta_Gpu_epoch_5.pt", map_location=device))
    model.to(device)
    model.eval()
            
    polarity_model = ElectraBaseClassifier_Pola_Base(len(polarity_id_to_name), len(tokenizer_kelec))
    polarity_model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/Pola_Lr_epoch_2.pt", map_location=device))
    polarity_model.to(device)
    polarity_model.eval()

    pred_data = predict_from_korean_form_deberta(tokenizer_deberta, tokenizer_kelec, model, polarity_model, copy.deepcopy(test_data_blank_third))
    df_pred_fourth = pd.DataFrame(pred_data)

    with open('./Blank4.jsonl', 'w') as file:
        for i in range( len(df_pred_fourth) ):
            if len(df_pred_fourth['annotation'][i]) == 0 :
                tmp = '[["제품 전체#일반", [null, 0, 0], "positive"]]'
                file.write(  '{'+'\"id\": \"{0}\", \"sentence_form\": \"{1}\", \"annotation\": {2}'\
                    .format( df_pred_fourth['id'][i]  ,   df_pred_fourth['sentence_form'][i], tmp ) +'}' ) 
                file.write("\n")


    print("The Last Forcing!!")

    test_data_final = jsonlload(test_data_path_blank_fourth)

    entity_property_test_data_kelec, polarity_test_data_kelec = get_dataset(test_data_final, tokenizer_kelec, max_len_elec)

    entity_property_test_dataloader = DataLoader(entity_property_test_data_kelec, shuffle=True,
                                batch_size=batch_size)

    polarity_test_dataloader = DataLoader(polarity_test_data_kelec, shuffle=True,
                                                  batch_size=batch_size)
    
    model = ElectraBaseClassifier_Cate_Base(len(tf_id_to_name), len(tokenizer_kelec))
    model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/G_kElectra_LeariningRate_epoch_1_3e-7.pt", map_location=device))
    model.to(device)
    model.eval()
            
    polarity_model = ElectraBaseClassifier_Pola_Base(len(polarity_id_to_name), len(tokenizer_kelec))
    polarity_model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/Pola_Lr_epoch_2.pt", map_location=device))
    polarity_model.to(device)
    polarity_model.eval()

    pred_data = predict_from_korean_form_kelec_forcing(tokenizer_kelec, model, polarity_model, copy.deepcopy(test_data_final))

    df_pred_final = pd.DataFrame(pred_data)

    df_final = pd.concat([df_pred_first, df_pred_second, df_pred_third, df_pred_fourth, df_pred_final]).sort_values(by = ['id'], axis = 0).reset_index(drop = True)

    with open('/content/Korean_ABSA/jsonl_files/kup8_gpu_kdr_deberta_forcing.jsonl', 'w') as file:
        for i in range( len(df_final) ):
            if len(df_final['annotation'][i]) != 0 :
                tmp = str(df_final['annotation'][i]).replace("\'", "\"").replace('None', 'null')
                file.write(  '{'+'\"id\": \"{0}\", \"sentence_form\": \"{1}\", \"annotation\": {2}'\
                    .format( df_final['id'][i],   df_final['sentence_form'][i], tmp ) +'}' ) 
                file.write("\n")


    return pd.DataFrame(jsonlload('/content/Korean_ABSA/jsonl_files/kup8_gpu_kdr_deberta_forcing.jsonl'))

In [None]:
Win()

##Original_Kup8_Gpu_Kdr_Deberta_Forcing

In [None]:
test_data_path_blank_first = './Blank1.jsonl'
test_data_path_blank_second = './Blank2.jsonl'
test_data_path_blank_third = './Blank3.jsonl'
test_data_path_blank_fourth = './Blank4.jsonl'
test_data_path_blank_fifth = './Blank5.jsonl'
test_data_path_blank_sixth = './Blank6.jsonl'

In [None]:
#Original_Kup8_Gpu_Kdr_Deberta_Forcing
batch_size=32
def Win():

    print("Original!")

    tokenizer_kelec = AutoTokenizer.from_pretrained(base_model_elec)
    tokenizer_deberta = AutoTokenizer.from_pretrained(base_model_deberta)
    tokenizer_roberta = AutoTokenizer.from_pretrained(base_model_roberta)

    num_added_toks_kelec = tokenizer_kelec.add_special_tokens(special_tokens_dict)
    num_added_toks_deberta = tokenizer_deberta.add_special_tokens(special_tokens_dict)
    num_added_toks_roberta = tokenizer_roberta.add_special_tokens(special_tokens_dict)

    test_data = jsonlload(test_data_path)

    entity_property_test_data_kelec, polarity_test_data_kelec = get_dataset(test_data, tokenizer_kelec, max_len_elec)

    entity_property_test_dataloader = DataLoader(entity_property_test_data_kelec, shuffle=True,
                                batch_size=batch_size)

    polarity_test_dataloader = DataLoader(polarity_test_data_kelec, shuffle=True,
                                                  batch_size=batch_size)
    
    model = ElectraBaseClassifier_Cate_Base(len(tf_id_to_name), len(tokenizer_kelec))
    model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/kelec_train+dev_refined_data_epoch_8.pt", map_location=device))
    model.to(device)
    model.eval()
            
    polarity_model = ElectraBaseClassifier_Pola_Base(len(polarity_id_to_name), len(tokenizer_kelec))
    polarity_model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/Pola_Lr_epoch_2.pt", map_location=device))
    polarity_model.to(device)
    polarity_model.eval()

    pred_data = predict_from_korean_form_kelec(tokenizer_kelec, model, polarity_model, copy.deepcopy(test_data))
    df_pred_first = pd.DataFrame(pred_data)

    with open('./Blank1.jsonl', 'w') as file:
        for i in range( len(df_pred_first) ):
            if len(df_pred_first['annotation'][i]) == 0 :
                tmp = '[["제품 전체#일반", [null, 0, 0], "positive"]]'
                file.write(  '{'+'\"id\": \"{0}\", \"sentence_form\": \"{1}\", \"annotation\": {2}'\
                    .format( df_pred_first['id'][i]  ,   df_pred_first['sentence_form'][i], tmp ) +'}' ) 
                file.write("\n")

    print("Gpu lr!!")
    
    test_data_blank_first = jsonlload(test_data_path_blank_first)

    entity_property_test_data_kelec, polarity_test_data_kelec = get_dataset(test_data_blank_first, tokenizer_kelec, max_len_elec)

    entity_property_test_dataloader = DataLoader(entity_property_test_data_kelec, shuffle=True,
                                batch_size=batch_size)

    polarity_test_dataloader = DataLoader(polarity_test_data_kelec, shuffle=True,
                                                  batch_size=batch_size)
    
    model = ElectraBaseClassifier_Cate_Base(len(tf_id_to_name), len(tokenizer_kelec))
    model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/G_kElectra_LeariningRate_epoch_1_3e-7.pt", map_location=device))
    model.to(device)
    model.eval()
            
    polarity_model = ElectraBaseClassifier_Pola_Base(len(polarity_id_to_name), len(tokenizer_kelec))
    polarity_model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/Pola_Lr_epoch_2.pt", map_location=device))
    polarity_model.to(device)
    polarity_model.eval()

    pred_data = predict_from_korean_form_kelec(tokenizer_kelec, model, polarity_model, copy.deepcopy(test_data_blank_first))
    df_pred_second = pd.DataFrame(pred_data)

    with open('./Blank2.jsonl', 'w') as file:
        for i in range( len(df_pred_second) ):
            if len(df_pred_second['annotation'][i]) == 0 :
                tmp = '[["제품 전체#일반", [null, 0, 0], "positive"]]'
                file.write(  '{'+'\"id\": \"{0}\", \"sentence_form\": \"{1}\", \"annotation\": {2}'\
                    .format( df_pred_second['id'][i]  ,   df_pred_second['sentence_form'][i], tmp ) +'}' ) 
                file.write("\n")

    print("K dr!!")

    test_data_blank_second = jsonlload(test_data_path_blank_second)

    entity_property_test_data_kelec, polarity_test_data_kelec = get_dataset(test_data_blank_second, tokenizer_kelec, max_len_elec)

    entity_property_test_dataloader = DataLoader(entity_property_test_data_kelec, shuffle=True,
                                batch_size=batch_size)

    polarity_test_dataloader = DataLoader(polarity_test_data_kelec, shuffle=True,
                                                  batch_size=batch_size)
    
    model = ElectraBaseClassifier_Cate_Base(len(tf_id_to_name), len(tokenizer_kelec))
    model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/kelec_K_dr0.5_epoch_7.pt", map_location=device))
    model.to(device)
    model.eval()
            
    polarity_model = ElectraBaseClassifier_Pola_Base(len(polarity_id_to_name), len(tokenizer_kelec))
    polarity_model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/Pola_Lr_epoch_2.pt", map_location=device))
    polarity_model.to(device)
    polarity_model.eval()

    pred_data = predict_from_korean_form_kelec(tokenizer_kelec, model, polarity_model, copy.deepcopy(test_data_blank_second))
    df_pred_third = pd.DataFrame(pred_data)

    with open('./Blank3.jsonl', 'w') as file:
        for i in range( len(df_pred_third) ):
            if len(df_pred_third['annotation'][i]) == 0 :
                tmp = '[["제품 전체#일반", [null, 0, 0], "positive"]]'
                file.write(  '{'+'\"id\": \"{0}\", \"sentence_form\": \"{1}\", \"annotation\": {2}'\
                    .format( df_pred_third['id'][i]  ,   df_pred_third['sentence_form'][i], tmp ) +'}' ) 
                file.write("\n")
    
    print("K up!!!")

    test_data_blank_third = jsonlload(test_data_path_blank_third)

    entity_property_test_data_kelec, polarity_test_data_kelec = get_dataset(test_data_blank_third, tokenizer_kelec, max_len_elec)

    entity_property_test_dataloader = DataLoader(entity_property_test_data_kelec, shuffle=True,
                                batch_size=batch_size)

    polarity_test_dataloader = DataLoader(polarity_test_data_kelec, shuffle=True,
                                                  batch_size=batch_size)
    
    model = ElectraBaseClassifier_Cate_hiddenup(len(tf_id_to_name), len(tokenizer_kelec))
    model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/kelec_cate_k_data_uphidden_epoch_8.pt", map_location=device))
    model.to(device)
    model.eval()
            
    polarity_model = ElectraBaseClassifier_Pola_Base(len(polarity_id_to_name), len(tokenizer_kelec))
    polarity_model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/Pola_Lr_epoch_2.pt", map_location=device))
    polarity_model.to(device)
    polarity_model.eval()

    pred_data = predict_from_korean_form_kelec(tokenizer_kelec, model, polarity_model, copy.deepcopy(test_data_blank_third))
    df_pred_fourth = pd.DataFrame(pred_data)

    with open('./Blank4.jsonl', 'w') as file:
        for i in range( len(df_pred_fourth) ):
            if len(df_pred_fourth['annotation'][i]) == 0 :
                tmp = '[["제품 전체#일반", [null, 0, 0], "positive"]]'
                file.write(  '{'+'\"id\": \"{0}\", \"sentence_form\": \"{1}\", \"annotation\": {2}'\
                    .format( df_pred_fourth['id'][i]  ,   df_pred_fourth['sentence_form'][i], tmp ) +'}' ) 
                file.write("\n")

    print("Deberta!!")

    test_data_blank_fourth = jsonlload(test_data_path_blank_fourth)

    entity_property_test_data_deberta, polarity_test_data_deberta = get_dataset(test_data_blank_fourth, tokenizer_deberta, max_len_elec)
    entity_property_test_data_kelec, polarity_test_data_kelec = get_dataset(test_data_blank_fourth, tokenizer_kelec, max_len_elec)

    entity_property_test_dataloader = DataLoader(entity_property_test_data_deberta, shuffle=True,
                                batch_size=batch_size)

    polarity_test_dataloader = DataLoader(polarity_test_data_kelec, shuffle=True,
                                                  batch_size=batch_size)
    
    model = DebertaBaseClassifier(len(tf_id_to_name), len(tokenizer_deberta))
    model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/deberta_Gpu_epoch_5.pt", map_location=device))
    model.to(device)
    model.eval()
            
    polarity_model = ElectraBaseClassifier_Pola_Base(len(polarity_id_to_name), len(tokenizer_kelec))
    polarity_model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/Pola_Lr_epoch_2.pt", map_location=device))
    polarity_model.to(device)
    polarity_model.eval()

    pred_data = predict_from_korean_form_deberta(tokenizer_deberta, tokenizer_kelec, model, polarity_model, copy.deepcopy(test_data_blank_fourth))
    df_pred_fifth = pd.DataFrame(pred_data)

    with open('./Blank5.jsonl', 'w') as file:
        for i in range( len(df_pred_fifth) ):
            if len(df_pred_fifth['annotation'][i]) == 0 :
                tmp = '[["제품 전체#일반", [null, 0, 0], "positive"]]'
                file.write(  '{'+'\"id\": \"{0}\", \"sentence_form\": \"{1}\", \"annotation\": {2}'\
                    .format( df_pred_fifth['id'][i] ,   df_pred_fifth['sentence_form'][i], tmp ) +'}' ) 
                file.write("\n")

    print("The Last Forcing!!")

    test_data_final = jsonlload(test_data_path_blank_fifth)

    entity_property_test_data_kelec, polarity_test_data_kelec = get_dataset(test_data_final, tokenizer_kelec, max_len_elec)

    entity_property_test_dataloader = DataLoader(entity_property_test_data_kelec, shuffle=True,
                                batch_size=batch_size)

    polarity_test_dataloader = DataLoader(polarity_test_data_kelec, shuffle=True,
                                                  batch_size=batch_size)
    
    model = ElectraBaseClassifier_Cate_Base(len(tf_id_to_name), len(tokenizer_kelec))
    model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/G_kElectra_LeariningRate_epoch_1_3e-7.pt", map_location=device))
    model.to(device)
    model.eval()
            
    polarity_model = ElectraBaseClassifier_Pola_Base(len(polarity_id_to_name), len(tokenizer_kelec))
    polarity_model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/Pola_Lr_epoch_2.pt", map_location=device))
    polarity_model.to(device)
    polarity_model.eval()

    pred_data = predict_from_korean_form_kelec_forcing(tokenizer_kelec, model, polarity_model, copy.deepcopy(test_data_final))

    df_pred_final = pd.DataFrame(pred_data)

    df_final = pd.concat([df_pred_first, df_pred_second, df_pred_third, df_pred_fourth, df_pred_fifth, df_pred_final]).sort_values(by = ['id'], axis = 0).reset_index(drop = True)

    with open('/content/Korean_ABSA/jsonl_files/Original_Kup8_Gpu_Kdr_Deberta_Forcing.jsonl', 'w') as file:
        for i in range( len(df_final) ):
            if len(df_final['annotation'][i]) != 0 :
                tmp = str(df_final['annotation'][i]).replace("\'", "\"").replace('None', 'null')
                file.write(  '{'+'\"id\": \"{0}\", \"sentence_form\": \"{1}\", \"annotation\": {2}'\
                    .format( df_final['id'][i],   df_final['sentence_form'][i], tmp ) +'}' ) 
                file.write("\n")


    return pd.DataFrame(jsonlload('/content/Korean_ABSA/jsonl_files/Original_Kup8_Gpu_Kdr_Deberta_Forcing.jsonl'))

In [None]:
Win()

##kup_kdr_gpu_deberta_forcing

In [None]:
test_data_path_blank_first = './Blank1.jsonl'
test_data_path_blank_second = './Blank2.jsonl'
test_data_path_blank_third = './Blank3.jsonl'
test_data_path_blank_fourth = './Blank4.jsonl'
test_data_path_blank_fifth = './Blank5.jsonl'
test_data_path_blank_sixth = './Blank6.jsonl'

In [None]:
#kup_kdr_gpu_deberta_forcing
batch_size=32
def Win():

    print("K up!!")

    tokenizer_kelec = AutoTokenizer.from_pretrained(base_model_elec)
    tokenizer_deberta = AutoTokenizer.from_pretrained(base_model_deberta)
    tokenizer_roberta = AutoTokenizer.from_pretrained(base_model_roberta)

    num_added_toks_kelec = tokenizer_kelec.add_special_tokens(special_tokens_dict)
    num_added_toks_deberta = tokenizer_deberta.add_special_tokens(special_tokens_dict)
    num_added_toks_roberta = tokenizer_roberta.add_special_tokens(special_tokens_dict)

    test_data = jsonlload(test_data_path)

    entity_property_test_data_kelec, polarity_test_data_kelec = get_dataset(test_data, tokenizer_kelec, max_len_elec)

    entity_property_test_dataloader = DataLoader(entity_property_test_data_kelec, shuffle=True,
                                batch_size=batch_size)

    polarity_test_dataloader = DataLoader(polarity_test_data_kelec, shuffle=True,
                                                  batch_size=batch_size)
    
    model = ElectraBaseClassifier_Cate_hiddenup(len(tf_id_to_name), len(tokenizer_kelec))
    model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/kelec_cate_k_data_uphidden_epoch_8.pt", map_location=device))
    model.to(device)
    model.eval()
            
    polarity_model = ElectraBaseClassifier_Pola_Base(len(polarity_id_to_name), len(tokenizer_kelec))
    polarity_model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/Pola_Lr_epoch_2.pt", map_location=device))
    polarity_model.to(device)
    polarity_model.eval()

    pred_data = predict_from_korean_form_kelec(tokenizer_kelec, model, polarity_model, copy.deepcopy(test_data))
    df_pred_first = pd.DataFrame(pred_data)

    with open('./Blank1.jsonl', 'w') as file:
        for i in range( len(df_pred_first) ):
            if len(df_pred_first['annotation'][i]) == 0 :
                tmp = '[["제품 전체#일반", [null, 0, 0], "positive"]]'
                file.write(  '{'+'\"id\": \"{0}\", \"sentence_form\": \"{1}\", \"annotation\": {2}'\
                    .format( df_pred_first['id'][i]  ,   df_pred_first['sentence_form'][i], tmp ) +'}' ) 
                file.write("\n")

    print("K dr!!")
    
    test_data_blank_first = jsonlload(test_data_path_blank_first)

    entity_property_test_data_kelec, polarity_test_data_kelec = get_dataset(test_data_blank_first, tokenizer_kelec, max_len_elec)

    entity_property_test_dataloader = DataLoader(entity_property_test_data_kelec, shuffle=True,
                                batch_size=batch_size)

    polarity_test_dataloader = DataLoader(polarity_test_data_kelec, shuffle=True,
                                                  batch_size=batch_size)
    
    model = ElectraBaseClassifier_Cate_Base(len(tf_id_to_name), len(tokenizer_kelec))
    model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/kelec_K_dr0.5_epoch_7.pt", map_location=device))
    model.to(device)
    model.eval()
            
    polarity_model = ElectraBaseClassifier_Pola_Base(len(polarity_id_to_name), len(tokenizer_kelec))
    polarity_model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/Pola_Lr_epoch_2.pt", map_location=device))
    polarity_model.to(device)
    polarity_model.eval()

    pred_data = predict_from_korean_form_kelec(tokenizer_kelec, model, polarity_model, copy.deepcopy(test_data_blank_first))
    df_pred_second = pd.DataFrame(pred_data)

    with open('./Blank2.jsonl', 'w') as file:
        for i in range( len(df_pred_second) ):
            if len(df_pred_second['annotation'][i]) == 0 :
                tmp = '[["제품 전체#일반", [null, 0, 0], "positive"]]'
                file.write(  '{'+'\"id\": \"{0}\", \"sentence_form\": \"{1}\", \"annotation\": {2}'\
                    .format( df_pred_second['id'][i]  ,   df_pred_second['sentence_form'][i], tmp ) +'}' ) 
                file.write("\n")

    print("Gpu!!")

    test_data_blank_second = jsonlload(test_data_path_blank_second)

    entity_property_test_data_kelec, polarity_test_data_kelec = get_dataset(test_data_blank_second, tokenizer_kelec, max_len_elec)

    entity_property_test_dataloader = DataLoader(entity_property_test_data_kelec, shuffle=True,
                                batch_size=batch_size)

    polarity_test_dataloader = DataLoader(polarity_test_data_kelec, shuffle=True,
                                                  batch_size=batch_size)
    
    model = ElectraBaseClassifier_Cate_Base(len(tf_id_to_name), len(tokenizer_kelec))
    model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/G_kElectra_LeariningRate_epoch_1_3e-7.pt", map_location=device))
    model.to(device)
    model.eval()
            
    polarity_model = ElectraBaseClassifier_Pola_Base(len(polarity_id_to_name), len(tokenizer_kelec))
    polarity_model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/Pola_Lr_epoch_2.pt", map_location=device))
    polarity_model.to(device)
    polarity_model.eval()

    pred_data = predict_from_korean_form_kelec(tokenizer_kelec, model, polarity_model, copy.deepcopy(test_data_blank_second))
    df_pred_third = pd.DataFrame(pred_data)

    with open('./Blank3.jsonl', 'w') as file:
        for i in range( len(df_pred_third) ):
            if len(df_pred_third['annotation'][i]) == 0 :
                tmp = '[["제품 전체#일반", [null, 0, 0], "positive"]]'
                file.write(  '{'+'\"id\": \"{0}\", \"sentence_form\": \"{1}\", \"annotation\": {2}'\
                    .format( df_pred_third['id'][i]  ,   df_pred_third['sentence_form'][i], tmp ) +'}' ) 
                file.write("\n")
    
    print("Deberta!!")

    test_data_blank_fourth = jsonlload(test_data_path_blank_third)

    entity_property_test_data_deberta, polarity_test_data_deberta = get_dataset(test_data_blank_fourth, tokenizer_deberta, max_len_elec)
    entity_property_test_data_kelec, polarity_test_data_kelec = get_dataset(test_data_blank_fourth, tokenizer_kelec, max_len_elec)

    entity_property_test_dataloader = DataLoader(entity_property_test_data_deberta, shuffle=True,
                                batch_size=batch_size)

    polarity_test_dataloader = DataLoader(polarity_test_data_kelec, shuffle=True,
                                                  batch_size=batch_size)
    
    model = DebertaBaseClassifier(len(tf_id_to_name), len(tokenizer_deberta))
    model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/deberta_Gpu_epoch_5.pt", map_location=device))
    model.to(device)
    model.eval()
            
    polarity_model = ElectraBaseClassifier_Pola_Base(len(polarity_id_to_name), len(tokenizer_kelec))
    polarity_model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/Pola_Lr_epoch_2.pt", map_location=device))
    polarity_model.to(device)
    polarity_model.eval()

    pred_data = predict_from_korean_form_deberta(tokenizer_deberta, tokenizer_kelec, model, polarity_model, copy.deepcopy(test_data_blank_fourth))
    df_pred_fifth = pd.DataFrame(pred_data)

    with open('Blank5.jsonl', 'w') as file:
        for i in range( len(df_pred_fifth) ):
            if len(df_pred_fifth['annotation'][i]) == 0 :
                tmp = '[["제품 전체#일반", [null, 0, 0], "positive"]]'
                file.write(  '{'+'\"id\": \"{0}\", \"sentence_form\": \"{1}\", \"annotation\": {2}'\
                    .format( df_pred_fifth['id'][i] ,   df_pred_fifth['sentence_form'][i], tmp ) +'}' ) 
                file.write("\n")

    print("The Last Forcing!!")

    test_data_final = jsonlload(test_data_path_blank_fifth)

    entity_property_test_data_kelec, polarity_test_data_kelec = get_dataset(test_data_final, tokenizer_kelec, max_len_elec)

    entity_property_test_dataloader = DataLoader(entity_property_test_data_kelec, shuffle=True,
                                batch_size=batch_size)

    polarity_test_dataloader = DataLoader(polarity_test_data_kelec, shuffle=True,
                                                  batch_size=batch_size)
    
    model = ElectraBaseClassifier_Cate_Base(len(tf_id_to_name), len(tokenizer_kelec))
    model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/G_kElectra_LeariningRate_epoch_1_3e-7.pt", map_location=device))
    model.to(device)
    model.eval()
            
    polarity_model = ElectraBaseClassifier_Pola_Base(len(polarity_id_to_name), len(tokenizer_kelec))
    polarity_model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/Pola_Lr_epoch_2.pt", map_location=device))
    polarity_model.to(device)
    polarity_model.eval()

    pred_data = predict_from_korean_form_kelec_forcing(tokenizer_kelec, model, polarity_model, copy.deepcopy(test_data_final))

    df_pred_final = pd.DataFrame(pred_data)

    df_final = pd.concat([df_pred_first, df_pred_second, df_pred_third, df_pred_fifth, df_pred_final]).sort_values(by = ['id'], axis = 0).reset_index(drop = True)

    with open('/content/Korean_ABSA/jsonl_files/kup_kdr_gpu_deberta_forcing.jsonl', 'w') as file:
        for i in range( len(df_final) ):
            if len(df_final['annotation'][i]) != 0 :
                tmp = str(df_final['annotation'][i]).replace("\'", "\"").replace('None', 'null')
                file.write(  '{'+'\"id\": \"{0}\", \"sentence_form\": \"{1}\", \"annotation\": {2}'\
                    .format( df_final['id'][i],   df_final['sentence_form'][i], tmp ) +'}' ) 
                file.write("\n")


    return pd.DataFrame(jsonlload('/content/Korean_ABSA/jsonl_files/kup_kdr_gpu_deberta_forcing.jsonl'))

In [None]:
Win()

##GpuLR_175

In [None]:
#Gpu 175 넣어야함
batch_size=32
def test_sentiment_analysis():

    tokenizer_kelec = AutoTokenizer.from_pretrained(base_model_elec)

    num_added_toks_kelec = tokenizer_kelec.add_special_tokens(special_tokens_dict)

    test_data = jsonlload(test_data_path)

    entity_property_test_data_kelec, polarity_test_data_kelec = get_dataset(test_data, tokenizer_kelec, max_len_elec)

    entity_property_test_dataloader = DataLoader(entity_property_test_data_kelec, shuffle=True,
                                batch_size=batch_size)

    polarity_test_dataloader = DataLoader(polarity_test_data_kelec, shuffle=True,
                                                  batch_size=batch_size)
    
    model = ElectraBaseClassifier_Cate_Base(len(tf_id_to_name), len(tokenizer_kelec))
    model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/kelec_Gpuragi_epoch_35.pt", map_location=device))
    model.to(device)
    model.eval()
            
    polarity_model = ElectraBaseClassifier_Pola_Base(len(polarity_id_to_name), len(tokenizer_kelec))
    polarity_model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/Pola_Lr_epoch_2.pt", map_location=device))
    polarity_model.to(device)
    polarity_model.eval()

    pred_data = predict_from_korean_form_kelec_threshold(tokenizer_kelec, model, polarity_model, copy.deepcopy(test_data))

    df = {'id' : [], 'sentence_form' : [], 'annotation' : []}

    for i in range(len(pred_data)) :
        df['id'].append(pred_data[i]['id'])
        df['sentence_form'].append(pred_data[i]['sentence_form'])
        df['annotation'].append(pred_data[i]['annotation'])
    df_pred = pd.DataFrame(df)

    with open('/content/Korean_ABSA/jsonl_files/GpuLR_175.jsonl', 'w') as file:
        for i in range( len(df_pred) ):
            tmp = str(df_pred['annotation'][i]).replace("\'", "\"").replace('None', 'null')
            file.write(  '{'+'\"id\": \"{0}\", \"sentence_form\": \"{1}\", \"annotation\": {2}'\
                .format( df_pred['id'][i]  ,   df_pred['sentence_form'][i], tmp ) +'}' ) 
            file.write("\n")


    return df_pred

In [None]:
test_sentiment_analysis()

## Original_175

In [None]:
#Original_175 넣어야함
batch_size=32
def test_sentiment_analysis():

    tokenizer_kelec = AutoTokenizer.from_pretrained(base_model_elec)

    num_added_toks_kelec = tokenizer_kelec.add_special_tokens(special_tokens_dict)

    test_data = jsonlload(test_data_path)

    entity_property_test_data_kelec, polarity_test_data_kelec = get_dataset(test_data, tokenizer_kelec, max_len_elec)

    entity_property_test_dataloader = DataLoader(entity_property_test_data_kelec, shuffle=True,
                                batch_size=batch_size)

    polarity_test_dataloader = DataLoader(polarity_test_data_kelec, shuffle=True,
                                                  batch_size=batch_size)
    
    model = ElectraBaseClassifier_Cate_Base(len(tf_id_to_name), len(tokenizer_kelec))
    model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/kelec_train+dev_refined_data_epoch_30.pt", map_location=device))
    model.to(device)
    model.eval()
            
    polarity_model = ElectraBaseClassifier_Pola_Base(len(polarity_id_to_name), len(tokenizer_kelec))
    polarity_model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/Pola_Lr_epoch_2.pt", map_location=device))
    polarity_model.to(device)
    polarity_model.eval()

    pred_data = predict_from_korean_form_kelec_threshold(tokenizer_kelec, model, polarity_model, copy.deepcopy(test_data))

    df = {'id' : [], 'sentence_form' : [], 'annotation' : []}

    for i in range(len(pred_data)) :
        df['id'].append(pred_data[i]['id'])
        df['sentence_form'].append(pred_data[i]['sentence_form'])
        df['annotation'].append(pred_data[i]['annotation'])
    df_pred = pd.DataFrame(df)

    with open('/content/Korean_ABSA/jsonl_files/Original_175.jsonl', 'w') as file:
        for i in range( len(df_pred) ):
            tmp = str(df_pred['annotation'][i]).replace("\'", "\"").replace('None', 'null')
            file.write(  '{'+'\"id\": \"{0}\", \"sentence_form\": \"{1}\", \"annotation\": {2}'\
                .format( df_pred['id'][i]  ,   df_pred['sentence_form'][i], tmp ) +'}' ) 
            file.write("\n")


    return df_pred

In [None]:
test_sentiment_analysis()

##Kup_175

In [None]:
#Kup_175 넣어야함
batch_size=32
def test_sentiment_analysis():

    tokenizer_kelec = AutoTokenizer.from_pretrained(base_model_elec)

    num_added_toks_kelec = tokenizer_kelec.add_special_tokens(special_tokens_dict)

    test_data = jsonlload(test_data_path)

    entity_property_test_data_kelec, polarity_test_data_kelec = get_dataset(test_data, tokenizer_kelec, max_len_elec)

    entity_property_test_dataloader = DataLoader(entity_property_test_data_kelec, shuffle=True,
                                batch_size=batch_size)

    polarity_test_dataloader = DataLoader(polarity_test_data_kelec, shuffle=True,
                                                  batch_size=batch_size)
    
    model = ElectraBaseClassifier_Cate_hiddenup(len(tf_id_to_name), len(tokenizer_kelec))
    model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/kelec_cate_k_data_uphidden_epoch_25.pt", map_location=device))
    model.to(device)
    model.eval()
            
    polarity_model = ElectraBaseClassifier_Pola_Base(len(polarity_id_to_name), len(tokenizer_kelec))
    polarity_model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/Pola_Lr_epoch_2.pt", map_location=device))
    polarity_model.to(device)
    polarity_model.eval()

    pred_data = predict_from_korean_form_kelec_threshold(tokenizer_kelec, model, polarity_model, copy.deepcopy(test_data))

    df = {'id' : [], 'sentence_form' : [], 'annotation' : []}

    for i in range(len(pred_data)) :
        df['id'].append(pred_data[i]['id'])
        df['sentence_form'].append(pred_data[i]['sentence_form'])
        df['annotation'].append(pred_data[i]['annotation'])
    df_pred = pd.DataFrame(df)

    with open('/content/Korean_ABSA/jsonl_files/Kup_175.jsonl', 'w') as file:
        for i in range( len(df_pred) ):
            tmp = str(df_pred['annotation'][i]).replace("\'", "\"").replace('None', 'null')
            file.write(  '{'+'\"id\": \"{0}\", \"sentence_form\": \"{1}\", \"annotation\": {2}'\
                .format( df_pred['id'][i]  ,   df_pred['sentence_form'][i], tmp ) +'}' ) 
            file.write("\n")


    return df_pred

In [None]:
test_sentiment_analysis()

##K_175

In [None]:
#K_175 넣어야함
batch_size=32
def test_sentiment_analysis():

    tokenizer_kelec = AutoTokenizer.from_pretrained(base_model_elec)

    num_added_toks_kelec = tokenizer_kelec.add_special_tokens(special_tokens_dict)

    test_data = jsonlload(test_data_path)

    entity_property_test_data_kelec, polarity_test_data_kelec = get_dataset(test_data, tokenizer_kelec, max_len_elec)

    entity_property_test_dataloader = DataLoader(entity_property_test_data_kelec, shuffle=True,
                                batch_size=batch_size)

    polarity_test_dataloader = DataLoader(polarity_test_data_kelec, shuffle=True,
                                                  batch_size=batch_size)
    
    model = ElectraBaseClassifier_Cate_Base(len(tf_id_to_name), len(tokenizer_kelec))
    model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/kelec_K_dr0.5_epoch_7.pt", map_location=device))
    model.to(device)
    model.eval()
            
    polarity_model = ElectraBaseClassifier_Pola_Base(len(polarity_id_to_name), len(tokenizer_kelec))
    polarity_model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/Pola_Lr_epoch_2.pt", map_location=device))
    polarity_model.to(device)
    polarity_model.eval()

    pred_data = predict_from_korean_form_kelec_threshold(tokenizer_kelec, model, polarity_model, copy.deepcopy(test_data))

    df = {'id' : [], 'sentence_form' : [], 'annotation' : []}

    for i in range(len(pred_data)) :
        df['id'].append(pred_data[i]['id'])
        df['sentence_form'].append(pred_data[i]['sentence_form'])
        df['annotation'].append(pred_data[i]['annotation'])
    df_pred = pd.DataFrame(df)

    with open('/content/Korean_ABSA/jsonl_files/K_175.jsonl', 'w') as file:
        for i in range( len(df_pred) ):
            tmp = str(df_pred['annotation'][i]).replace("\'", "\"").replace('None', 'null')
            file.write(  '{'+'\"id\": \"{0}\", \"sentence_form\": \"{1}\", \"annotation\": {2}'\
                .format( df_pred['id'][i]  ,   df_pred['sentence_form'][i], tmp ) +'}' ) 
            file.write("\n")


    return df_pred

In [None]:
test_sentiment_analysis()

##kelec_Gpu_itgr_LR2 가 58

In [None]:
#kelec_Gpu_itgr_LR2 가 58
batch_size=32
def test_sentiment_analysis():

    tokenizer_kelec = AutoTokenizer.from_pretrained(base_model_elec)

    num_added_toks_kelec = tokenizer_kelec.add_special_tokens(special_tokens_dict)

    test_data = jsonlload(test_data_path)

    entity_property_test_data_kelec, polarity_test_data_kelec = get_dataset(test_data, tokenizer_kelec, max_len_elec)

    entity_property_test_dataloader = DataLoader(entity_property_test_data_kelec, shuffle=True,
                                batch_size=batch_size)

    polarity_test_dataloader = DataLoader(polarity_test_data_kelec, shuffle=True,
                                                  batch_size=batch_size)
    
    model = ElectraBaseClassifier_Cate_Base(len(tf_id_to_name), len(tokenizer_kelec))
    model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/G_Integral_kElectra_Lr_epoch_2.pt", map_location=device))
    model.to(device)
    model.eval()
            
    polarity_model = ElectraBaseClassifier_Pola_Base(len(polarity_id_to_name), len(tokenizer_kelec))
    polarity_model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/kelectra_polarity_epoch_58.pt", map_location=device))
    polarity_model.to(device)
    polarity_model.eval()

    pred_data = predict_from_korean_form_kelec(tokenizer_kelec, model, polarity_model, copy.deepcopy(test_data))

    df = {'id' : [], 'sentence_form' : [], 'annotation' : []}

    for i in range(len(pred_data)) :
        df['id'].append(pred_data[i]['id'])
        df['sentence_form'].append(pred_data[i]['sentence_form'])
        df['annotation'].append(pred_data[i]['annotation'])
    df_pred = pd.DataFrame(df)

    with open('/content/Korean_ABSA/jsonl_files/kelec_Gpu_itgr_LR2.jsonl', 'w') as file:
        for i in range( len(df_pred) ):
            tmp = str(df_pred['annotation'][i]).replace("\'", "\"").replace('None', 'null')
            file.write(  '{'+'\"id\": \"{0}\", \"sentence_form\": \"{1}\", \"annotation\": {2}'\
                .format( df_pred['id'][i]  ,   df_pred['sentence_form'][i], tmp ) +'}' ) 
            file.write("\n")


    return df_pred

In [None]:
test_sentiment_analysis()

##kelec_Gpu_itgr_LR1

In [None]:
#kelec_Gpu_itgr_LR1 가 58
batch_size=32
def test_sentiment_analysis():

    tokenizer_kelec = AutoTokenizer.from_pretrained(base_model_elec)

    num_added_toks_kelec = tokenizer_kelec.add_special_tokens(special_tokens_dict)

    test_data = jsonlload(test_data_path)

    entity_property_test_data_kelec, polarity_test_data_kelec = get_dataset(test_data, tokenizer_kelec, max_len_elec)

    entity_property_test_dataloader = DataLoader(entity_property_test_data_kelec, shuffle=True,
                                batch_size=batch_size)

    polarity_test_dataloader = DataLoader(polarity_test_data_kelec, shuffle=True,
                                                  batch_size=batch_size)
    
    model = ElectraBaseClassifier_Cate_Base(len(tf_id_to_name), len(tokenizer_kelec))
    model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/G_Integral_kElectra_Lr_epoch_1.pt", map_location=device))
    model.to(device)
    model.eval()
            
    polarity_model = ElectraBaseClassifier_Pola_Base(len(polarity_id_to_name), len(tokenizer_kelec))
    polarity_model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/kelectra_polarity_epoch_58.pt", map_location=device))
    polarity_model.to(device)
    polarity_model.eval()

    pred_data = predict_from_korean_form_kelec(tokenizer_kelec, model, polarity_model, copy.deepcopy(test_data))

    df = {'id' : [], 'sentence_form' : [], 'annotation' : []}

    for i in range(len(pred_data)) :
        df['id'].append(pred_data[i]['id'])
        df['sentence_form'].append(pred_data[i]['sentence_form'])
        df['annotation'].append(pred_data[i]['annotation'])
    df_pred = pd.DataFrame(df)

    with open('/content/Korean_ABSA/jsonl_files/kelec_Gpu_itgr_LR1.jsonl', 'w') as file:
        for i in range( len(df_pred) ):
            tmp = str(df_pred['annotation'][i]).replace("\'", "\"").replace('None', 'null')
            file.write(  '{'+'\"id\": \"{0}\", \"sentence_form\": \"{1}\", \"annotation\": {2}'\
                .format( df_pred['id'][i]  ,   df_pred['sentence_form'][i], tmp ) +'}' ) 
            file.write("\n")


    return df_pred

In [None]:
test_sentiment_analysis()

##kelec_Gpu_6_LR_1_step3more_Ma32_LR_2

In [None]:
#kelec_Gpu_6_LR_1_step3more_Ma32_LR_2
batch_size=32
def test_sentiment_analysis():

    tokenizer_kelec = AutoTokenizer.from_pretrained(base_model_elec)

    num_added_toks_kelec = tokenizer_kelec.add_special_tokens(special_tokens_dict)

    test_data = jsonlload(test_data_path)

    entity_property_test_data_kelec, polarity_test_data_kelec = get_dataset(test_data, tokenizer_kelec, max_len_elec)

    entity_property_test_dataloader = DataLoader(entity_property_test_data_kelec, shuffle=True,
                                batch_size=batch_size)

    polarity_test_dataloader = DataLoader(polarity_test_data_kelec, shuffle=True,
                                                  batch_size=batch_size)
    
    model = ElectraBaseClassifier_Cate_Base(len(tf_id_to_name), len(tokenizer_kelec))
    model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/ThirdStep_More_epoch_1.pt", map_location=device))
    model.to(device)
    model.eval()
            
    polarity_model = ElectraBaseClassifier_Pola_Base(len(polarity_id_to_name), len(tokenizer_kelec))
    polarity_model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/Ma_Deep_epoch_2.pt", map_location=device))
    polarity_model.to(device)
    polarity_model.eval()

    pred_data = predict_from_korean_form_kelec_forcing(tokenizer_kelec, model, polarity_model, copy.deepcopy(test_data))

    df = {'id' : [], 'sentence_form' : [], 'annotation' : []}

    for i in range(len(pred_data)) :
        df['id'].append(pred_data[i]['id'])
        df['sentence_form'].append(pred_data[i]['sentence_form'])
        df['annotation'].append(pred_data[i]['annotation'])
    df_pred = pd.DataFrame(df)

    with open('/content/Korean_ABSA/jsonl_files/kelec_Gpu_6_LR_1_step3more_Ma32_LR_2.jsonl', 'w') as file:
        for i in range( len(df_pred) ):
            tmp = str(df_pred['annotation'][i]).replace("\'", "\"").replace('None', 'null')
            file.write(  '{'+'\"id\": \"{0}\", \"sentence_form\": \"{1}\", \"annotation\": {2}'\
                .format( df_pred['id'][i]  ,   df_pred['sentence_form'][i], tmp ) +'}' ) 
            file.write("\n")


    return df_pred

In [None]:
test_sentiment_analysis()

##kelec_Gpu_6_LR_1_step3more_Ma32F

In [None]:
#kelec_Gpu_6_LR_1_step3more_Ma32F
batch_size=32
def test_sentiment_analysis():

    tokenizer_kelec = AutoTokenizer.from_pretrained(base_model_elec)

    num_added_toks_kelec = tokenizer_kelec.add_special_tokens(special_tokens_dict)

    test_data = jsonlload(test_data_path)

    entity_property_test_data_kelec, polarity_test_data_kelec = get_dataset(test_data, tokenizer_kelec, max_len_elec)

    entity_property_test_dataloader = DataLoader(entity_property_test_data_kelec, shuffle=True,
                                batch_size=batch_size)

    polarity_test_dataloader = DataLoader(polarity_test_data_kelec, shuffle=True,
                                                  batch_size=batch_size)
    
    model = ElectraBaseClassifier_Cate_Base(len(tf_id_to_name), len(tokenizer_kelec))
    model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/ThirdStep_More_epoch_1.pt", map_location=device))
    model.to(device)
    model.eval()
            
    polarity_model = ElectraBaseClassifier_Pola_Base(len(polarity_id_to_name), len(tokenizer_kelec))
    polarity_model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/kelec_pola_Ma_Standard_epoch_32.pt", map_location=device))
    polarity_model.to(device)
    polarity_model.eval()

    pred_data = predict_from_korean_form_kelec_forcing(tokenizer_kelec, model, polarity_model, copy.deepcopy(test_data))

    df = {'id' : [], 'sentence_form' : [], 'annotation' : []}

    for i in range(len(pred_data)) :
        df['id'].append(pred_data[i]['id'])
        df['sentence_form'].append(pred_data[i]['sentence_form'])
        df['annotation'].append(pred_data[i]['annotation'])
    df_pred = pd.DataFrame(df)

    with open('/content/Korean_ABSA/jsonl_files/kelec_Gpu_6_LR_1_step3more_Ma32F.jsonl', 'w') as file:
        for i in range( len(df_pred) ):
            tmp = str(df_pred['annotation'][i]).replace("\'", "\"").replace('None', 'null')
            file.write(  '{'+'\"id\": \"{0}\", \"sentence_form\": \"{1}\", \"annotation\": {2}'\
                .format( df_pred['id'][i]  ,   df_pred['sentence_form'][i], tmp ) +'}' ) 
            file.write("\n")


    return df_pred

In [None]:
test_sentiment_analysis()

##kelec_Gpu_itgr_LR2F

In [None]:
#kelec_Gpu_itgr_LR2F
batch_size=32
def test_sentiment_analysis():

    tokenizer_kelec = AutoTokenizer.from_pretrained(base_model_elec)

    num_added_toks_kelec = tokenizer_kelec.add_special_tokens(special_tokens_dict)

    test_data = jsonlload(test_data_path)

    entity_property_test_data_kelec, polarity_test_data_kelec = get_dataset(test_data, tokenizer_kelec, max_len_elec)

    entity_property_test_dataloader = DataLoader(entity_property_test_data_kelec, shuffle=True,
                                batch_size=batch_size)

    polarity_test_dataloader = DataLoader(polarity_test_data_kelec, shuffle=True,
                                                  batch_size=batch_size)
    
    model = ElectraBaseClassifier_Cate_Base(len(tf_id_to_name), len(tokenizer_kelec))
    model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/G_Integral_kElectra_Lr_epoch_2.pt", map_location=device))
    model.to(device)
    model.eval()
            
    polarity_model = ElectraBaseClassifier_Pola_Base(len(polarity_id_to_name), len(tokenizer_kelec))
    polarity_model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/kelectra_polarity_epoch_58.pt", map_location=device))
    polarity_model.to(device)
    polarity_model.eval()

    pred_data = predict_from_korean_form_kelec_forcing(tokenizer_kelec, model, polarity_model, copy.deepcopy(test_data))

    df = {'id' : [], 'sentence_form' : [], 'annotation' : []}

    for i in range(len(pred_data)) :
        df['id'].append(pred_data[i]['id'])
        df['sentence_form'].append(pred_data[i]['sentence_form'])
        df['annotation'].append(pred_data[i]['annotation'])
    df_pred = pd.DataFrame(df)

    with open('/content/Korean_ABSA/jsonl_files/kelec_Gpu_itgr_LR2F.jsonl', 'w') as file:
        for i in range( len(df_pred) ):
            tmp = str(df_pred['annotation'][i]).replace("\'", "\"").replace('None', 'null')
            file.write(  '{'+'\"id\": \"{0}\", \"sentence_form\": \"{1}\", \"annotation\": {2}'\
                .format( df_pred['id'][i]  ,   df_pred['sentence_form'][i], tmp ) +'}' ) 
            file.write("\n")


    return df_pred

In [None]:
test_sentiment_analysis()

##kelec_Gpu_LR6_attemp2_LR3

In [None]:
#kelec_Gpu_LR6_attemp2_LR3
batch_size=32
def test_sentiment_analysis():

    tokenizer_kelec = AutoTokenizer.from_pretrained(base_model_elec)

    num_added_toks_kelec = tokenizer_kelec.add_special_tokens(special_tokens_dict)

    test_data = jsonlload(test_data_path)

    entity_property_test_data_kelec, polarity_test_data_kelec = get_dataset(test_data, tokenizer_kelec, max_len_elec)

    entity_property_test_dataloader = DataLoader(entity_property_test_data_kelec, shuffle=True,
                                batch_size=batch_size)

    polarity_test_dataloader = DataLoader(polarity_test_data_kelec, shuffle=True,
                                                  batch_size=batch_size)
    
    model = ElectraBaseClassifier_Cate_Base(len(tf_id_to_name), len(tokenizer_kelec))
    model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/SecondAttempt_epoch_4.pt", map_location=device))
    model.to(device)
    model.eval()
            
    polarity_model = ElectraBaseClassifier_Pola_Base(len(polarity_id_to_name), len(tokenizer_kelec))
    polarity_model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/Pola_Lr_epoch_3.pt", map_location=device))
    polarity_model.to(device)
    polarity_model.eval()

    pred_data = predict_from_korean_form_kelec(tokenizer_kelec, model, polarity_model, copy.deepcopy(test_data))

    df = {'id' : [], 'sentence_form' : [], 'annotation' : []}

    for i in range(len(pred_data)) :
        df['id'].append(pred_data[i]['id'])
        df['sentence_form'].append(pred_data[i]['sentence_form'])
        df['annotation'].append(pred_data[i]['annotation'])
    df_pred = pd.DataFrame(df)

    with open('/content/Korean_ABSA/jsonl_files/kelec_Gpu_LR6_attemp2_LR3.jsonl', 'w') as file:
        for i in range( len(df_pred) ):
            tmp = str(df_pred['annotation'][i]).replace("\'", "\"").replace('None', 'null')
            file.write(  '{'+'\"id\": \"{0}\", \"sentence_form\": \"{1}\", \"annotation\": {2}'\
                .format( df_pred['id'][i]  ,   df_pred['sentence_form'][i], tmp ) +'}' ) 
            file.write("\n")


    return df_pred

In [None]:
test_sentiment_analysis()

##kelec_Gpu_6_LR1_Ma32_LR_2F

In [None]:
#kelec_Gpu_6_LR1_Ma32_LR_2F
batch_size=32
def test_sentiment_analysis():

    tokenizer_kelec = AutoTokenizer.from_pretrained(base_model_elec)

    num_added_toks_kelec = tokenizer_kelec.add_special_tokens(special_tokens_dict)

    test_data = jsonlload(test_data_path)

    entity_property_test_data_kelec, polarity_test_data_kelec = get_dataset(test_data, tokenizer_kelec, max_len_elec)

    entity_property_test_dataloader = DataLoader(entity_property_test_data_kelec, shuffle=True,
                                batch_size=batch_size)

    polarity_test_dataloader = DataLoader(polarity_test_data_kelec, shuffle=True,
                                                  batch_size=batch_size)
    
    model = ElectraBaseClassifier_Cate_Base(len(tf_id_to_name), len(tokenizer_kelec))
    model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/G_kElectra_LeariningRate_epoch_1_3e-7.pt", map_location=device))
    model.to(device)
    model.eval()
            
    polarity_model = ElectraBaseClassifier_Pola_Base(len(polarity_id_to_name), len(tokenizer_kelec))
    polarity_model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/Ma_Deep_epoch_2.pt", map_location=device))
    polarity_model.to(device)
    polarity_model.eval()

    pred_data = predict_from_korean_form_kelec_forcing(tokenizer_kelec, model, polarity_model, copy.deepcopy(test_data))

    df = {'id' : [], 'sentence_form' : [], 'annotation' : []}

    for i in range(len(pred_data)) :
        df['id'].append(pred_data[i]['id'])
        df['sentence_form'].append(pred_data[i]['sentence_form'])
        df['annotation'].append(pred_data[i]['annotation'])
    df_pred = pd.DataFrame(df)

    with open('/content/Korean_ABSA/jsonl_files/kelec_Gpu_6_LR1_Ma32_LR_2F.jsonl', 'w') as file:
        for i in range( len(df_pred) ):
            tmp = str(df_pred['annotation'][i]).replace("\'", "\"").replace('None', 'null')
            file.write(  '{'+'\"id\": \"{0}\", \"sentence_form\": \"{1}\", \"annotation\": {2}'\
                .format( df_pred['id'][i]  ,   df_pred['sentence_form'][i], tmp ) +'}' ) 
            file.write("\n")


    return df_pred

In [None]:
test_sentiment_analysis()

##kelec_Gpu_6F_LR1

In [None]:
#kelec_Gpu_6F_LR1
batch_size=32
def test_sentiment_analysis():

    tokenizer_kelec = AutoTokenizer.from_pretrained(base_model_elec)

    num_added_toks_kelec = tokenizer_kelec.add_special_tokens(special_tokens_dict)

    test_data = jsonlload(test_data_path)

    entity_property_test_data_kelec, polarity_test_data_kelec = get_dataset(test_data, tokenizer_kelec, max_len_elec)

    entity_property_test_dataloader = DataLoader(entity_property_test_data_kelec, shuffle=True,
                                batch_size=batch_size)

    polarity_test_dataloader = DataLoader(polarity_test_data_kelec, shuffle=True,
                                                  batch_size=batch_size)
    
    model = ElectraBaseClassifier_Cate_Base(len(tf_id_to_name), len(tokenizer_kelec))
    model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/G_Integral_kElectra_Lr_epoch_1.pt", map_location=device))
    model.to(device)
    model.eval()
            
    polarity_model = ElectraBaseClassifier_Pola_Base(len(polarity_id_to_name), len(tokenizer_kelec))
    polarity_model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/kelectra_polarity_epoch_58.pt", map_location=device))
    polarity_model.to(device)
    polarity_model.eval()

    pred_data = predict_from_korean_form_kelec_forcing(tokenizer_kelec, model, polarity_model, copy.deepcopy(test_data))

    df = {'id' : [], 'sentence_form' : [], 'annotation' : []}

    for i in range(len(pred_data)) :
        df['id'].append(pred_data[i]['id'])
        df['sentence_form'].append(pred_data[i]['sentence_form'])
        df['annotation'].append(pred_data[i]['annotation'])
    df_pred = pd.DataFrame(df)

    with open('/content/Korean_ABSA/jsonl_files/kelec_Gpu_6F_LR1.jsonl', 'w') as file:
        for i in range( len(df_pred) ):
            tmp = str(df_pred['annotation'][i]).replace("\'", "\"").replace('None', 'null')
            file.write(  '{'+'\"id\": \"{0}\", \"sentence_form\": \"{1}\", \"annotation\": {2}'\
                .format( df_pred['id'][i]  ,   df_pred['sentence_form'][i], tmp ) +'}' ) 
            file.write("\n")


    return df_pred

In [None]:
test_sentiment_analysis()

##kelec_Gpu_6F_LR2

In [None]:
#kelec_Gpu_6F_LR2
batch_size=32
def test_sentiment_analysis():

    tokenizer_kelec = AutoTokenizer.from_pretrained(base_model_elec)

    num_added_toks_kelec = tokenizer_kelec.add_special_tokens(special_tokens_dict)

    test_data = jsonlload(test_data_path)

    entity_property_test_data_kelec, polarity_test_data_kelec = get_dataset(test_data, tokenizer_kelec, max_len_elec)

    entity_property_test_dataloader = DataLoader(entity_property_test_data_kelec, shuffle=True,
                                batch_size=batch_size)

    polarity_test_dataloader = DataLoader(polarity_test_data_kelec, shuffle=True,
                                                  batch_size=batch_size)
    
    model = ElectraBaseClassifier_Cate_Base(len(tf_id_to_name), len(tokenizer_kelec))
    model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/G_Integral_kElectra_Lr_epoch_2.pt", map_location=device))
    model.to(device)
    model.eval()
            
    polarity_model = ElectraBaseClassifier_Pola_Base(len(polarity_id_to_name), len(tokenizer_kelec))
    polarity_model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/kelectra_polarity_epoch_58.pt", map_location=device))
    polarity_model.to(device)
    polarity_model.eval()

    pred_data = predict_from_korean_form_kelec_forcing(tokenizer_kelec, model, polarity_model, copy.deepcopy(test_data))

    df = {'id' : [], 'sentence_form' : [], 'annotation' : []}

    for i in range(len(pred_data)) :
        df['id'].append(pred_data[i]['id'])
        df['sentence_form'].append(pred_data[i]['sentence_form'])
        df['annotation'].append(pred_data[i]['annotation'])
    df_pred = pd.DataFrame(df)

    with open('/content/Korean_ABSA/jsonl_files/kelec_Gpu_6F_LR2.jsonl', 'w') as file:
        for i in range( len(df_pred) ):
            tmp = str(df_pred['annotation'][i]).replace("\'", "\"").replace('None', 'null')
            file.write(  '{'+'\"id\": \"{0}\", \"sentence_form\": \"{1}\", \"annotation\": {2}'\
                .format( df_pred['id'][i]  ,   df_pred['sentence_form'][i], tmp ) +'}' ) 
            file.write("\n")


    return df_pred

In [None]:
test_sentiment_analysis()

##kelec_Gpu_6F_LR3

In [None]:
#kelec_Gpu_6F_LR3
batch_size=32
def test_sentiment_analysis():

    tokenizer_kelec = AutoTokenizer.from_pretrained(base_model_elec)

    num_added_toks_kelec = tokenizer_kelec.add_special_tokens(special_tokens_dict)

    test_data = jsonlload(test_data_path)

    entity_property_test_data_kelec, polarity_test_data_kelec = get_dataset(test_data, tokenizer_kelec, max_len_elec)

    entity_property_test_dataloader = DataLoader(entity_property_test_data_kelec, shuffle=True,
                                batch_size=batch_size)

    polarity_test_dataloader = DataLoader(polarity_test_data_kelec, shuffle=True,
                                                  batch_size=batch_size)
    
    model = ElectraBaseClassifier_Cate_Base(len(tf_id_to_name), len(tokenizer_kelec))
    model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/G_Integral_kElectra_Lr_epoch_3.pt", map_location=device))
    model.to(device)
    model.eval()
            
    polarity_model = ElectraBaseClassifier_Pola_Base(len(polarity_id_to_name), len(tokenizer_kelec))
    polarity_model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/kelectra_polarity_epoch_58.pt", map_location=device))
    polarity_model.to(device)
    polarity_model.eval()

    pred_data = predict_from_korean_form_kelec_forcing(tokenizer_kelec, model, polarity_model, copy.deepcopy(test_data))

    df = {'id' : [], 'sentence_form' : [], 'annotation' : []}

    for i in range(len(pred_data)) :
        df['id'].append(pred_data[i]['id'])
        df['sentence_form'].append(pred_data[i]['sentence_form'])
        df['annotation'].append(pred_data[i]['annotation'])
    df_pred = pd.DataFrame(df)

    with open('/content/Korean_ABSA/jsonl_files/kelec_Gpu_6F_LR3.jsonl', 'w') as file:
        for i in range( len(df_pred) ):
            tmp = str(df_pred['annotation'][i]).replace("\'", "\"").replace('None', 'null')
            file.write(  '{'+'\"id\": \"{0}\", \"sentence_form\": \"{1}\", \"annotation\": {2}'\
                .format( df_pred['id'][i]  ,   df_pred['sentence_form'][i], tmp ) +'}' ) 
            file.write("\n")


    return df_pred

In [None]:
test_sentiment_analysis()

##kelec_Gpu_6_LR2

In [None]:
#kelec_Gpu_6_LR2
batch_size=32
def test_sentiment_analysis():

    tokenizer_kelec = AutoTokenizer.from_pretrained(base_model_elec)

    num_added_toks_kelec = tokenizer_kelec.add_special_tokens(special_tokens_dict)

    test_data = jsonlload(test_data_path)

    entity_property_test_data_kelec, polarity_test_data_kelec = get_dataset(test_data, tokenizer_kelec, max_len_elec)

    entity_property_test_dataloader = DataLoader(entity_property_test_data_kelec, shuffle=True,
                                batch_size=batch_size)

    polarity_test_dataloader = DataLoader(polarity_test_data_kelec, shuffle=True,
                                                  batch_size=batch_size)
    
    model = ElectraBaseClassifier_Cate_Base(len(tf_id_to_name), len(tokenizer_kelec))
    model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/G_Integral_kElectra_Lr_epoch_2.pt", map_location=device))
    model.to(device)
    model.eval()
            
    polarity_model = ElectraBaseClassifier_Pola_Base(len(polarity_id_to_name), len(tokenizer_kelec))
    polarity_model.load_state_dict(torch.load("/content/Korean_ABSA/pt_files/kelectra_polarity_epoch_58.pt", map_location=device))
    polarity_model.to(device)
    polarity_model.eval()

    pred_data = predict_from_korean_form_kelec(tokenizer_kelec, model, polarity_model, copy.deepcopy(test_data))

    df = {'id' : [], 'sentence_form' : [], 'annotation' : []}

    for i in range(len(pred_data)) :
        df['id'].append(pred_data[i]['id'])
        df['sentence_form'].append(pred_data[i]['sentence_form'])
        df['annotation'].append(pred_data[i]['annotation'])
    df_pred = pd.DataFrame(df)

    with open('/content/Korean_ABSA/jsonl_files/kelec_Gpu_6_LR2.jsonl', 'w') as file:
        for i in range( len(df_pred) ):
            tmp = str(df_pred['annotation'][i]).replace("\'", "\"").replace('None', 'null')
            file.write(  '{'+'\"id\": \"{0}\", \"sentence_form\": \"{1}\", \"annotation\": {2}'\
                .format( df_pred['id'][i]  ,   df_pred['sentence_form'][i], tmp ) +'}' ) 
            file.write("\n")


    return df_pred

In [None]:
test_sentiment_analysis()

# Ensemble

##excute_ensemble, save_jsonl

In [None]:
def excute_ensemble(model_list):
    dic_ae = {
        'id' : [],
        'sentence_form' : [],
        'annotation' : []
    }

    models = model_list 


    for i in range(len(models[0])):
        tmp_divide = []

        for j in models:
            tmp_divide.append( str(j['annotation'][i]) )

        answer = Counter(tmp_divide).most_common(n=1)[0][0]

        check =  str(tmp_divide).replace("[", "").replace("]", "").replace(",", "").replace("\'", "").replace("\"", "").replace(" ", "")

        if '[]' in answer and check is not "":
            while '[]' in tmp_divide:
                tmp_divide.remove('[]')
            dic_ae['annotation'].append( tmp_divide[0] )
        else:
            dic_ae['annotation'].append( answer )
        dic_ae['id'].append( j['id'][i] )
        dic_ae['sentence_form'].append( j['sentence_form'][i] )

    df_ae = pd.DataFrame( dic_ae )

    return df_ae

In [None]:
def save_jsonl(df_ae, name, path):

    with open("{1}/{0}.jsonl" .format(name, path), 'w') as file:
        for i in range( len(df_ae) ):
            annos = df_ae['annotation'][i]
            str_annos = str(annos)
            tmp = str_annos.replace("None", "null").replace("\'", "\"")

            file.write(  '{'+'\"id\": \"nikluge-sa-2022-{3}-{0}\", \"sentence_form\": \"{1}\", \"annotation\": {2}'\
                        .format( str(i+1).zfill(5)  ,   df_ae['sentence_form'][i], tmp, name ) +'}' )
            file.write("\n")

#{"id": "nikluge-sa-2022-dev-00001", "sentence_form": "깔끔하게 부직포 포장으로 되어 있어서 그냥 뜨거운 물에 풍덩 넣어놓고 좀 휘젓어주면 금방 우러난다.", "annotation": [["본품#편의성", ["부직포 포장", 5, 11], "positive"]]}
# 0은 숫자, 1은 sentence, 2는 annotation, 3은 file_name(파일이름)

## Happy Bus SOTA ensemble file 만들기

In [None]:
path = '/content/Korean_ABSA/jsonl_files'
# path = '/content'

### kel5
* 총 사용 모델 : 5개
    * 단일 모델 : 5개

In [None]:
# 단일 모델 5개
kelec_Gpu_19 = pd.DataFrame(jsonlload("/content/Korean_ABSA/jsonl_files/kelec_Gpu_19.jsonl"))
kelec_Gpu_16 = pd.DataFrame(jsonlload("/content/Korean_ABSA/jsonl_files/kelec_Gpu_16.jsonl"))
kelec_Gpu_11 = pd.DataFrame(jsonlload("/content/Korean_ABSA/jsonl_files/kelec_Gpu_11.jsonl"))
kelec_Gpu_15 = pd.DataFrame(jsonlload("/content/Korean_ABSA/jsonl_files/kelec_Gpu_15.jsonl"))
kelec_Gpu_20 = pd.DataFrame(jsonlload("/content/Korean_ABSA/jsonl_files/kelec_Gpu_20.jsonl"))


In [None]:
esb_1030_kel5_list  = [kelec_Gpu_19 ,  kelec_Gpu_16,  kelec_Gpu_11, kelec_Gpu_15,  kelec_Gpu_20]

name = 'esb_1030_kel5'
df_model = excute_ensemble(esb_1030_kel5_list)
save_jsonl(df_model, name, path)

esb_1030_kel5 = pd.DataFrame(jsonlload(path + "/" + name + ".jsonl"))

### kel7D
* 총 사용 모델 : 7개
    * 단일 모델 : 7개

In [None]:
# 단일 모델 7개
kelec_Gpu_19 
kelec_Gpu_16
kelec_Gpu_11
kelec_Gpu_15
kelec_Gpu_20
kelec_D_30        = pd.DataFrame(jsonlload("/content/Korean_ABSA/jsonl_files/kelec_D_30.jsonl"))
kelec_D_drop05_20 = pd.DataFrame(jsonlload("/content/Korean_ABSA/jsonl_files/kelec_D_drop05_20.jsonl"))

In [None]:
esb_1030_kel7D_list = [kelec_Gpu_19 ,  kelec_Gpu_16,  kelec_Gpu_11, kelec_Gpu_15,  kelec_Gpu_20, kelec_D_30, kelec_D_drop05_20]

name = 'esb_1030_kel7D'
df_model = excute_ensemble(esb_1030_kel7D_list)
save_jsonl(df_model, name, path)

esb_1030_kel7D = pd.DataFrame(jsonlload(path + "/" + name + ".jsonl"))

### many10s
* 총 사용 모델 : 10개
    * 단일 모델 : 10개

In [None]:
# 단일 모델 10개
kelec_Gpu_19
kelec_Gpu_16
roberta_Gpu_16  = pd.DataFrame(jsonlload("/content/Korean_ABSA/jsonl_files/roberta_Gpu_16.jsonl"))
roberta_Gpu_15  = pd.DataFrame(jsonlload("/content/Korean_ABSA/jsonl_files/roberta_Gpu_15.jsonl"))
deberta_Gpu_7   = pd.DataFrame(jsonlload("/content/Korean_ABSA/jsonl_files/deberta_Gpu_7.jsonl"))
deberta_Gpu_14  = pd.DataFrame(jsonlload("/content/Korean_ABSA/jsonl_files/deberta_Gpu_14.jsonl"))
kelec_Gputrl_9  = pd.DataFrame(jsonlload("/content/Korean_ABSA/jsonl_files/kelec_Gputrl_9.jsonl"))
kelec_Gputrl_11 = pd.DataFrame(jsonlload("/content/Korean_ABSA/jsonl_files/kelec_Gputrl_11.jsonl"))
kelec_D_30
kelec_D_drop05_20

Unnamed: 0,id,sentence_form,annotation
0,nikluge-sa-2022-test-00001,하나 사려고 알아보는 중인데 맘에드는거 발견,"[[제품 전체#일반, positive]]"
1,nikluge-sa-2022-test-00002,동양인 피부톤과 잘 어울리고 우아한 분위기를 풍긴다네?,"[[제품 전체#디자인, positive]]"
2,nikluge-sa-2022-test-00003,근데 이건 마르살라보다 더 지나친 색 같은데..,"[[본품#일반, negative]]"
3,nikluge-sa-2022-test-00004,나스 색조가 다 그렇지만서도 어데이셔스 라인은 진짜 색 기막히게 뽑는것 같다,[]
4,nikluge-sa-2022-test-00005,색상만 보면 이걸 어떻게 발라.. 싶겠지만 의외로 너무너무 괜찮다,"[[본품#일반, positive]]"
...,...,...,...
2122,nikluge-sa-2022-test-02123,간단한 충전으로 간편한 사용이 가능한거죠.,"[[본품#편의성, positive]]"
2123,nikluge-sa-2022-test-02124,"눈을 가린 상태에서도 간편하게 조작이 가능하구요,","[[본품#편의성, positive]]"
2124,nikluge-sa-2022-test-02125,다양한 마사지로 관자놀이부터 눈주변까지 부드럽고 강력한 마사지가 진행됩니다.,"[[본품#품질, positive]]"
2125,nikluge-sa-2022-test-02126,"본체부터 케이블, 설명서까지 깔끔하게 정리되어 보관이 가능하니 더더 맘에 쏙 들어요.",[]


In [None]:
esb_1030_many10s_list = [kelec_Gpu_19 ,  kelec_Gpu_16, roberta_Gpu_16, roberta_Gpu_15, 
                   deberta_Gpu_7, deberta_Gpu_14, kelec_Gputrl_9, kelec_Gputrl_11, kelec_D_30, kelec_D_drop05_20]

name = 'esb_1030_many10s'
df_model = excute_ensemble(esb_1030_many10s_list)
save_jsonl(df_model, name, path)

esb_1030_many10s = pd.DataFrame(jsonlload(path + "/" + name + ".jsonl"))

### kelGpu_Gputgr8
* 총 사용 모델 : 8개
    * 단일 모델 : 8개

In [None]:
# 단일 모델 8개
kelec_Gpu_19
kelec_Gpu_16
kelec_Gpu_11
kelec_Gpu_15
kelec_Gputrl_9
kelec_Gputrl_11
kelec_Gputrl_10 = pd.DataFrame(jsonlload("/content/Korean_ABSA/jsonl_files/kelec_Gputrl_10.jsonl"))
kelec_Gputrl_7  = pd.DataFrame(jsonlload("/content/Korean_ABSA/jsonl_files/kelec_Gputrl_7.jsonl"))

In [None]:
esb_1030_kelGpu_Gputgr8_list = [kelec_Gpu_19 ,  kelec_Gpu_16,  kelec_Gpu_11, kelec_Gpu_15,
                                kelec_Gputrl_9, kelec_Gputrl_11, kelec_Gputrl_10, kelec_Gputrl_7]

name = 'esb_1030_kelGpu_Gputgr8'
df_model = excute_ensemble(esb_1030_kelGpu_Gputgr8_list)
save_jsonl(df_model, name, path)

esb_1030_kelGpu_Gputgr8 = pd.DataFrame(jsonlload(path + "/" + name + ".jsonl"))

### max 23_3
* 총 사용 모델 : 22개
    * 단일 모델 : 19개
    * 앙상블 모델 : 3개

In [None]:
# 단일 모델 19개
kelec_Gpu_6 = pd.DataFrame(jsonlload("/content/Korean_ABSA/jsonl_files/kelec_Gpu_6.jsonl"))
kelec_Gpu_11
kelec_D_30
kelec_D_drop05_20
kelec_Gputrl_9
kelec_Gputrl_11
deberta_Gpu_7F      = pd.DataFrame(jsonlload("/content/Korean_ABSA/jsonl_files/deberta_Gpu_7F.jsonl"))
deberta_Gpu_20F     = pd.DataFrame(jsonlload("/content/Korean_ABSA/jsonl_files/deberta_Gpu_20F.jsonl"))
kelec_Gpu_drop05_10 = pd.DataFrame(jsonlload("/content/Korean_ABSA/jsonl_files/kelec_Gpu_drop05_10.jsonl"))
kelec_Gpu_drop05_7  = pd.DataFrame(jsonlload("/content/Korean_ABSA/jsonl_files/kelec_Gpu_drop05_7.jsonl"))
kelec_Gpu_6F        = pd.DataFrame(jsonlload("/content/Korean_ABSA/jsonl_files/kelec_Gpu_6F.jsonl"))
kelec_Gpu_20F       = pd.DataFrame(jsonlload("/content/Korean_ABSA/jsonl_files/kelec_Gpu_20F.jsonl"))
kelec_Gpu_itgr_LR2F = pd.DataFrame(jsonlload("/content/Korean_ABSA/jsonl_files/kelec_Gpu_itgr_LR2F.jsonl"))
kelec_K_9           = pd.DataFrame(jsonlload("/content/Korean_ABSA/jsonl_files/kelec_K_9.jsonl"))
kelec_K_8           = pd.DataFrame(jsonlload("/content/Korean_ABSA/jsonl_files/kelec_K_8.jsonl"))
kelec_Gpu_itgr_LR2  = pd.DataFrame(jsonlload("/content/Korean_ABSA/jsonl_files/kelec_Gpu_itgr_LR2.jsonl"))
kelec_Gpu_itgr_LR1  = pd.DataFrame(jsonlload("/content/Korean_ABSA/jsonl_files/kelec_Gpu_itgr_LR1.jsonl"))
kelec_Gpu_itgr_drop05_9_layer = pd.DataFrame(jsonlload("/content/Korean_ABSA/jsonl_files/kelec_Gpu_itgr_drop05_9_layer.jsonl"))
kelec_Gpu_itgr_drop05_8_layer = pd.DataFrame(jsonlload("/content/Korean_ABSA/jsonl_files/kelec_Gpu_itgr_drop05_8_layer.jsonl"))

In [None]:
# 앙상블 모델 3개
esb_1030_kelGpu_Gputgr8
esb_1030_kel7D
esb_1030_many10s


Unnamed: 0,id,sentence_form,annotation
0,nikluge-sa-2022-esb_1030_many10s-00001,하나 사려고 알아보는 중인데 맘에드는거 발견,"[[제품 전체#일반, positive]]"
1,nikluge-sa-2022-esb_1030_many10s-00002,동양인 피부톤과 잘 어울리고 우아한 분위기를 풍긴다네?,"[[제품 전체#디자인, positive], [본품#품질, positive]]"
2,nikluge-sa-2022-esb_1030_many10s-00003,근데 이건 마르살라보다 더 지나친 색 같은데..,"[[본품#일반, negative]]"
3,nikluge-sa-2022-esb_1030_many10s-00004,나스 색조가 다 그렇지만서도 어데이셔스 라인은 진짜 색 기막히게 뽑는것 같다,"[[본품#일반, positive]]"
4,nikluge-sa-2022-esb_1030_many10s-00005,색상만 보면 이걸 어떻게 발라.. 싶겠지만 의외로 너무너무 괜찮다,"[[본품#일반, positive]]"
...,...,...,...
2122,nikluge-sa-2022-esb_1030_many10s-02123,간단한 충전으로 간편한 사용이 가능한거죠.,"[[본품#편의성, positive]]"
2123,nikluge-sa-2022-esb_1030_many10s-02124,"눈을 가린 상태에서도 간편하게 조작이 가능하구요,","[[본품#편의성, positive]]"
2124,nikluge-sa-2022-esb_1030_many10s-02125,다양한 마사지로 관자놀이부터 눈주변까지 부드럽고 강력한 마사지가 진행됩니다.,"[[본품#품질, positive]]"
2125,nikluge-sa-2022-esb_1030_many10s-02126,"본체부터 케이블, 설명서까지 깔끔하게 정리되어 보관이 가능하니 더더 맘에 쏙 들어요.","[[패키지/구성품#일반, positive]]"


In [None]:
esb_1101_max23_3_list = [esb_1030_kelGpu_Gputgr8, esb_1030_kel7D, esb_1030_many10s,
                    kelec_Gpu_6, kelec_Gpu_11, kelec_D_30, kelec_D_drop05_20, kelec_Gputrl_9, kelec_Gputrl_11,
                    deberta_Gpu_7F, deberta_Gpu_20F, kelec_Gpu_drop05_10, kelec_Gpu_drop05_7,
                    kelec_Gpu_6F, kelec_Gpu_20F, kelec_Gpu_itgr_LR2F, kelec_K_9, kelec_K_8,
                    kelec_Gpu_itgr_LR2, kelec_Gpu_itgr_LR1, kelec_Gpu_itgr_drop05_9_layer, kelec_Gpu_itgr_drop05_8_layer]

name = 'esb_1101_max23_3'
df_model = excute_ensemble(esb_1101_max23_3_list)
save_jsonl(df_model, name, path)

esb_1101_max23_3 = pd.DataFrame(jsonlload(path + "/" + name + ".jsonl"))

### max 23_2
* 총 사용 모델 : 22개
    * 단일 모델 : 19개
    * 앙상블 모델 : 3개

In [None]:
# 단일 모델 19개
kelec_Gpu_6
kelec_Gpu_11
kelec_D_30
kelec_D_drop05_20
kelec_Gputrl_9
kelec_Gputrl_11
deberta_Gpu_7F
deberta_Gpu_20F
kelec_Gpu_drop05_10
kelec_Gpu_drop05_7
kelec_Gpu_6F
kelec_Gpu_20F
kelec_Gpu_itgr_LR2F
kelec_K_9
kelec_K_8
kelec_Gpu_itgr_LR2
kelec_Gpu_itgr_LR1
kelec_Gpu_itgr_drop05_9_layer
kelec_Gpu_itgr_drop05_8_layer

Unnamed: 0,id,sentence_form,annotation
0,nikluge-sa-2022-test-00001,하나 사려고 알아보는 중인데 맘에드는거 발견,"[[제품 전체#일반, positive]]"
1,nikluge-sa-2022-test-00002,동양인 피부톤과 잘 어울리고 우아한 분위기를 풍긴다네?,[]
2,nikluge-sa-2022-test-00003,근데 이건 마르살라보다 더 지나친 색 같은데..,"[[본품#일반, negative]]"
3,nikluge-sa-2022-test-00004,나스 색조가 다 그렇지만서도 어데이셔스 라인은 진짜 색 기막히게 뽑는것 같다,[]
4,nikluge-sa-2022-test-00005,색상만 보면 이걸 어떻게 발라.. 싶겠지만 의외로 너무너무 괜찮다,"[[본품#일반, positive]]"
...,...,...,...
2122,nikluge-sa-2022-test-02123,간단한 충전으로 간편한 사용이 가능한거죠.,"[[본품#편의성, positive]]"
2123,nikluge-sa-2022-test-02124,"눈을 가린 상태에서도 간편하게 조작이 가능하구요,","[[본품#편의성, positive]]"
2124,nikluge-sa-2022-test-02125,다양한 마사지로 관자놀이부터 눈주변까지 부드럽고 강력한 마사지가 진행됩니다.,"[[본품#품질, positive]]"
2125,nikluge-sa-2022-test-02126,"본체부터 케이블, 설명서까지 깔끔하게 정리되어 보관이 가능하니 더더 맘에 쏙 들어요.","[[패키지/구성품#일반, positive]]"


In [None]:
# 앙상블 모델 3개
esb_1030_kelGpu_Gputgr8
esb_1030_kel5
esb_1030_many10s


Unnamed: 0,id,sentence_form,annotation
0,nikluge-sa-2022-esb_1030_many10s-00001,하나 사려고 알아보는 중인데 맘에드는거 발견,"[[제품 전체#일반, positive]]"
1,nikluge-sa-2022-esb_1030_many10s-00002,동양인 피부톤과 잘 어울리고 우아한 분위기를 풍긴다네?,"[[제품 전체#디자인, positive], [본품#품질, positive]]"
2,nikluge-sa-2022-esb_1030_many10s-00003,근데 이건 마르살라보다 더 지나친 색 같은데..,"[[본품#일반, negative]]"
3,nikluge-sa-2022-esb_1030_many10s-00004,나스 색조가 다 그렇지만서도 어데이셔스 라인은 진짜 색 기막히게 뽑는것 같다,"[[본품#일반, positive]]"
4,nikluge-sa-2022-esb_1030_many10s-00005,색상만 보면 이걸 어떻게 발라.. 싶겠지만 의외로 너무너무 괜찮다,"[[본품#일반, positive]]"
...,...,...,...
2122,nikluge-sa-2022-esb_1030_many10s-02123,간단한 충전으로 간편한 사용이 가능한거죠.,"[[본품#편의성, positive]]"
2123,nikluge-sa-2022-esb_1030_many10s-02124,"눈을 가린 상태에서도 간편하게 조작이 가능하구요,","[[본품#편의성, positive]]"
2124,nikluge-sa-2022-esb_1030_many10s-02125,다양한 마사지로 관자놀이부터 눈주변까지 부드럽고 강력한 마사지가 진행됩니다.,"[[본품#품질, positive]]"
2125,nikluge-sa-2022-esb_1030_many10s-02126,"본체부터 케이블, 설명서까지 깔끔하게 정리되어 보관이 가능하니 더더 맘에 쏙 들어요.","[[패키지/구성품#일반, positive]]"


In [None]:
esb_1101_max23_2_list = [esb_1030_kelGpu_Gputgr8, esb_1030_kel5, esb_1030_many10s,
                        kelec_Gpu_6, kelec_Gpu_11, kelec_D_30, kelec_D_drop05_20, kelec_Gputrl_9, kelec_Gputrl_11,
                        deberta_Gpu_7F, deberta_Gpu_20F, kelec_Gpu_drop05_10, kelec_Gpu_drop05_7,
                        kelec_Gpu_6F, kelec_Gpu_20F, kelec_Gpu_itgr_LR2F, kelec_K_9, kelec_K_8,
                        kelec_Gpu_itgr_LR2, kelec_Gpu_itgr_LR1, kelec_Gpu_itgr_drop05_9_layer, kelec_Gpu_itgr_drop05_8_layer]

                    
name = 'esb_1101_max23_2'
df_model = excute_ensemble(esb_1101_max23_2_list)
save_jsonl(df_model, name, path)

esb_1101_max23_2 = pd.DataFrame(jsonlload(path + "/" + name + ".jsonl"))

### max 27_13
* 총 사용 모델 : 26개
    * 단일 모델 : 23개
    * 앙상블 모델 : 3개

In [None]:
# 단일 모델 23개
kelec_Gpu_6F_LR1 = pd.DataFrame(jsonlload("/content/Korean_ABSA/jsonl_files/kelec_Gpu_6F_LR1.jsonl"))
kelec_Gpu_6F_LR2 = pd.DataFrame(jsonlload("/content/Korean_ABSA/jsonl_files/kelec_Gpu_6F_LR2.jsonl"))
kelec_Gpu_6F_LR3 = pd.DataFrame(jsonlload("/content/Korean_ABSA/jsonl_files/kelec_Gpu_6F_LR3.jsonl"))
kelec_Gpu_6_LR2  = pd.DataFrame(jsonlload("/content/Korean_ABSA/jsonl_files/kelec_Gpu_6_LR2.jsonl"))
kelec_Gpu_6_58   = pd.DataFrame(jsonlload("/content/Korean_ABSA/jsonl_files/kelec_Gpu_6_58.jsonl"))
kelec_Gpu_6
kelec_Gpu_11
kelec_D_30
kelec_D_drop05_20
kelec_Gputrl_11
deberta_Gpu_7F
deberta_Gpu_20F
kelec_Gpu_drop05_10
kelec_Gpu_drop05_7
kelec_Gpu_6F
kelec_Gpu_20F
kelec_Gpu_itgr_LR2F
kelec_K_9
kelec_K_8
kelec_Gpu_itgr_LR2
kelec_Gpu_itgr_LR1
kelec_Gpu_itgr_drop05_9_layer
kelec_Gpu_itgr_drop05_8_layer

Unnamed: 0,id,sentence_form,annotation
0,nikluge-sa-2022-test-00001,하나 사려고 알아보는 중인데 맘에드는거 발견,"[[제품 전체#일반, positive]]"
1,nikluge-sa-2022-test-00002,동양인 피부톤과 잘 어울리고 우아한 분위기를 풍긴다네?,[]
2,nikluge-sa-2022-test-00003,근데 이건 마르살라보다 더 지나친 색 같은데..,"[[본품#일반, negative]]"
3,nikluge-sa-2022-test-00004,나스 색조가 다 그렇지만서도 어데이셔스 라인은 진짜 색 기막히게 뽑는것 같다,[]
4,nikluge-sa-2022-test-00005,색상만 보면 이걸 어떻게 발라.. 싶겠지만 의외로 너무너무 괜찮다,"[[본품#일반, positive]]"
...,...,...,...
2122,nikluge-sa-2022-test-02123,간단한 충전으로 간편한 사용이 가능한거죠.,"[[본품#편의성, positive]]"
2123,nikluge-sa-2022-test-02124,"눈을 가린 상태에서도 간편하게 조작이 가능하구요,","[[본품#편의성, positive]]"
2124,nikluge-sa-2022-test-02125,다양한 마사지로 관자놀이부터 눈주변까지 부드럽고 강력한 마사지가 진행됩니다.,"[[본품#품질, positive]]"
2125,nikluge-sa-2022-test-02126,"본체부터 케이블, 설명서까지 깔끔하게 정리되어 보관이 가능하니 더더 맘에 쏙 들어요.","[[패키지/구성품#일반, positive]]"


In [None]:
# 앙상블 모델 3개
esb_1030_kelGpu_Gputgr8
esb_1030_kel5
esb_1030_many10s


Unnamed: 0,id,sentence_form,annotation
0,nikluge-sa-2022-esb_1030_many10s-00001,하나 사려고 알아보는 중인데 맘에드는거 발견,"[[제품 전체#일반, positive]]"
1,nikluge-sa-2022-esb_1030_many10s-00002,동양인 피부톤과 잘 어울리고 우아한 분위기를 풍긴다네?,"[[제품 전체#디자인, positive], [본품#품질, positive]]"
2,nikluge-sa-2022-esb_1030_many10s-00003,근데 이건 마르살라보다 더 지나친 색 같은데..,"[[본품#일반, negative]]"
3,nikluge-sa-2022-esb_1030_many10s-00004,나스 색조가 다 그렇지만서도 어데이셔스 라인은 진짜 색 기막히게 뽑는것 같다,"[[본품#일반, positive]]"
4,nikluge-sa-2022-esb_1030_many10s-00005,색상만 보면 이걸 어떻게 발라.. 싶겠지만 의외로 너무너무 괜찮다,"[[본품#일반, positive]]"
...,...,...,...
2122,nikluge-sa-2022-esb_1030_many10s-02123,간단한 충전으로 간편한 사용이 가능한거죠.,"[[본품#편의성, positive]]"
2123,nikluge-sa-2022-esb_1030_many10s-02124,"눈을 가린 상태에서도 간편하게 조작이 가능하구요,","[[본품#편의성, positive]]"
2124,nikluge-sa-2022-esb_1030_many10s-02125,다양한 마사지로 관자놀이부터 눈주변까지 부드럽고 강력한 마사지가 진행됩니다.,"[[본품#품질, positive]]"
2125,nikluge-sa-2022-esb_1030_many10s-02126,"본체부터 케이블, 설명서까지 깔끔하게 정리되어 보관이 가능하니 더더 맘에 쏙 들어요.","[[패키지/구성품#일반, positive]]"


In [None]:
esb_1102_max27_13_list = [kelec_Gpu_6F_LR1, kelec_Gpu_6F_LR2, kelec_Gpu_6F_LR3, kelec_Gpu_6_LR2, esb_1030_kelGpu_Gputgr8,
                     esb_1030_kel5, esb_1030_many10s, kelec_Gpu_6_58, kelec_Gpu_6, kelec_Gpu_11,
                     kelec_D_30, kelec_D_drop05_20, kelec_Gputrl_11, deberta_Gpu_7F,
                     deberta_Gpu_20F, kelec_Gpu_drop05_10, kelec_Gpu_drop05_7, kelec_Gpu_6F, kelec_Gpu_20F,
                     kelec_Gpu_itgr_LR2F, kelec_K_9, kelec_K_8, kelec_Gpu_itgr_LR2, kelec_Gpu_itgr_LR1,
                     kelec_Gpu_itgr_drop05_9_layer, kelec_Gpu_itgr_drop05_8_layer]


name = 'esb_1102_max27_13'
df_model = excute_ensemble(esb_1102_max27_13_list)
save_jsonl(df_model, name, path)

esb_1102_max27_13 = pd.DataFrame(jsonlload(path + "/" + name + ".jsonl"))

### max 27_6
* 총 사용 모델 : 26개
    * 단일 모델 : 24개
    * 앙상블 모델 : 2개

In [None]:
# 단일 모델 24개
kelec_Gpu_6F_LR1
kelec_Gpu_6F_LR2
kelec_Gpu_6F_LR3
kelec_Gpu_6_LR2
kelec_Gpu_6_58
kelec_Gpu_6
kelec_Gpu_11
kelec_D_30
kelec_D_drop05_20
kelec_Gputrl_9
kelec_Gputrl_11
deberta_Gpu_7F
deberta_Gpu_20F
kelec_Gpu_drop05_10
kelec_Gpu_drop05_7
kelec_Gpu_6F
kelec_Gpu_20F
kelec_Gpu_itgr_LR2F
kelec_K_9
kelec_K_8
kelec_Gpu_itgr_LR2
kelec_Gpu_itgr_LR1
kelec_Gpu_itgr_drop05_9_layer
kelec_Gpu_itgr_drop05_8_layer

Unnamed: 0,id,sentence_form,annotation
0,nikluge-sa-2022-test-00001,하나 사려고 알아보는 중인데 맘에드는거 발견,"[[제품 전체#일반, positive]]"
1,nikluge-sa-2022-test-00002,동양인 피부톤과 잘 어울리고 우아한 분위기를 풍긴다네?,[]
2,nikluge-sa-2022-test-00003,근데 이건 마르살라보다 더 지나친 색 같은데..,"[[본품#일반, negative]]"
3,nikluge-sa-2022-test-00004,나스 색조가 다 그렇지만서도 어데이셔스 라인은 진짜 색 기막히게 뽑는것 같다,[]
4,nikluge-sa-2022-test-00005,색상만 보면 이걸 어떻게 발라.. 싶겠지만 의외로 너무너무 괜찮다,"[[본품#일반, positive]]"
...,...,...,...
2122,nikluge-sa-2022-test-02123,간단한 충전으로 간편한 사용이 가능한거죠.,"[[본품#편의성, positive]]"
2123,nikluge-sa-2022-test-02124,"눈을 가린 상태에서도 간편하게 조작이 가능하구요,","[[본품#편의성, positive]]"
2124,nikluge-sa-2022-test-02125,다양한 마사지로 관자놀이부터 눈주변까지 부드럽고 강력한 마사지가 진행됩니다.,"[[본품#품질, positive]]"
2125,nikluge-sa-2022-test-02126,"본체부터 케이블, 설명서까지 깔끔하게 정리되어 보관이 가능하니 더더 맘에 쏙 들어요.","[[패키지/구성품#일반, positive]]"


In [None]:
# 앙상블 모델 2개
esb_1030_kelGpu_Gputgr8
esb_1030_many10s


Unnamed: 0,id,sentence_form,annotation
0,nikluge-sa-2022-esb_1030_many10s-00001,하나 사려고 알아보는 중인데 맘에드는거 발견,"[[제품 전체#일반, positive]]"
1,nikluge-sa-2022-esb_1030_many10s-00002,동양인 피부톤과 잘 어울리고 우아한 분위기를 풍긴다네?,"[[제품 전체#디자인, positive], [본품#품질, positive]]"
2,nikluge-sa-2022-esb_1030_many10s-00003,근데 이건 마르살라보다 더 지나친 색 같은데..,"[[본품#일반, negative]]"
3,nikluge-sa-2022-esb_1030_many10s-00004,나스 색조가 다 그렇지만서도 어데이셔스 라인은 진짜 색 기막히게 뽑는것 같다,"[[본품#일반, positive]]"
4,nikluge-sa-2022-esb_1030_many10s-00005,색상만 보면 이걸 어떻게 발라.. 싶겠지만 의외로 너무너무 괜찮다,"[[본품#일반, positive]]"
...,...,...,...
2122,nikluge-sa-2022-esb_1030_many10s-02123,간단한 충전으로 간편한 사용이 가능한거죠.,"[[본품#편의성, positive]]"
2123,nikluge-sa-2022-esb_1030_many10s-02124,"눈을 가린 상태에서도 간편하게 조작이 가능하구요,","[[본품#편의성, positive]]"
2124,nikluge-sa-2022-esb_1030_many10s-02125,다양한 마사지로 관자놀이부터 눈주변까지 부드럽고 강력한 마사지가 진행됩니다.,"[[본품#품질, positive]]"
2125,nikluge-sa-2022-esb_1030_many10s-02126,"본체부터 케이블, 설명서까지 깔끔하게 정리되어 보관이 가능하니 더더 맘에 쏙 들어요.","[[패키지/구성품#일반, positive]]"


In [None]:
esb_1102_max27_6_list = [kelec_Gpu_6F_LR1, kelec_Gpu_6F_LR2, kelec_Gpu_6F_LR3, kelec_Gpu_6_LR2, esb_1030_kelGpu_Gputgr8,
                        esb_1030_many10s, kelec_Gpu_6_58, kelec_Gpu_6, kelec_Gpu_11,
                        kelec_D_30, kelec_D_drop05_20, kelec_Gputrl_9, kelec_Gputrl_11, deberta_Gpu_7F,
                        deberta_Gpu_20F, kelec_Gpu_drop05_10, kelec_Gpu_drop05_7, kelec_Gpu_6F, kelec_Gpu_20F,
                        kelec_Gpu_itgr_LR2F, kelec_K_9, kelec_K_8, kelec_Gpu_itgr_LR2, kelec_Gpu_itgr_LR1,
                        kelec_Gpu_itgr_drop05_9_layer, kelec_Gpu_itgr_drop05_8_layer]

name = 'esb_1102_max27_6'
df_model = excute_ensemble(esb_1102_max27_6_list)
save_jsonl(df_model, name, path)

esb_1102_max27_6 = pd.DataFrame(jsonlload(path + "/" + name + ".jsonl"))

### max 26_x2
* 총 사용 모델 26개
    * 단일 모델 19개 (1개 모델 중복 사용 : 총 20개)
    * 앙상블 모델 4개 (2개 모델 중복 사용 : 총 6개)

In [None]:
# 단일 모델 19개 (1개 중복 사용 : 총 20개 모델)
kelec_Gpu_6
kelec_Gpu_11
kelec_D_30
kelec_D_drop05_20
kelec_Gputrl_9
kelec_Gputrl_11
deberta_Gpu_7F
deberta_Gpu_20F
kelec_Gpu_drop05_10
kelec_Gpu_drop05_7
kelec_Gpu_6F
kelec_Gpu_20F
kelec_Gpu_itgr_LR2F
kelec_K_9
kelec_K_8
kelec_Gpu_itgr_LR2
kelec_Gpu_itgr_LR2
kelec_Gpu_itgr_LR1
kelec_Gpu_itgr_drop05_9_layer
kelec_Gpu_itgr_drop05_8_layer

Unnamed: 0,id,sentence_form,annotation
0,nikluge-sa-2022-test-00001,하나 사려고 알아보는 중인데 맘에드는거 발견,"[[제품 전체#일반, positive]]"
1,nikluge-sa-2022-test-00002,동양인 피부톤과 잘 어울리고 우아한 분위기를 풍긴다네?,[]
2,nikluge-sa-2022-test-00003,근데 이건 마르살라보다 더 지나친 색 같은데..,"[[본품#일반, negative]]"
3,nikluge-sa-2022-test-00004,나스 색조가 다 그렇지만서도 어데이셔스 라인은 진짜 색 기막히게 뽑는것 같다,[]
4,nikluge-sa-2022-test-00005,색상만 보면 이걸 어떻게 발라.. 싶겠지만 의외로 너무너무 괜찮다,"[[본품#일반, positive]]"
...,...,...,...
2122,nikluge-sa-2022-test-02123,간단한 충전으로 간편한 사용이 가능한거죠.,"[[본품#편의성, positive]]"
2123,nikluge-sa-2022-test-02124,"눈을 가린 상태에서도 간편하게 조작이 가능하구요,","[[본품#편의성, positive]]"
2124,nikluge-sa-2022-test-02125,다양한 마사지로 관자놀이부터 눈주변까지 부드럽고 강력한 마사지가 진행됩니다.,"[[본품#품질, positive]]"
2125,nikluge-sa-2022-test-02126,"본체부터 케이블, 설명서까지 깔끔하게 정리되어 보관이 가능하니 더더 맘에 쏙 들어요.","[[패키지/구성품#일반, positive]]"


In [None]:
# 앙상블 모델 4개 (2개 중복 사용 : 총 6개 모델)
esb_1030_kelGpu_Gputgr8
esb_1030_kelGpu_Gputgr8
esb_1030_kel7D
esb_1030_kel7D
esb_1030_kel5
esb_1030_many10s

Unnamed: 0,id,sentence_form,annotation
0,nikluge-sa-2022-esb_1030_many10s-00001,하나 사려고 알아보는 중인데 맘에드는거 발견,"[[제품 전체#일반, positive]]"
1,nikluge-sa-2022-esb_1030_many10s-00002,동양인 피부톤과 잘 어울리고 우아한 분위기를 풍긴다네?,"[[제품 전체#디자인, positive], [본품#품질, positive]]"
2,nikluge-sa-2022-esb_1030_many10s-00003,근데 이건 마르살라보다 더 지나친 색 같은데..,"[[본품#일반, negative]]"
3,nikluge-sa-2022-esb_1030_many10s-00004,나스 색조가 다 그렇지만서도 어데이셔스 라인은 진짜 색 기막히게 뽑는것 같다,"[[본품#일반, positive]]"
4,nikluge-sa-2022-esb_1030_many10s-00005,색상만 보면 이걸 어떻게 발라.. 싶겠지만 의외로 너무너무 괜찮다,"[[본품#일반, positive]]"
...,...,...,...
2122,nikluge-sa-2022-esb_1030_many10s-02123,간단한 충전으로 간편한 사용이 가능한거죠.,"[[본품#편의성, positive]]"
2123,nikluge-sa-2022-esb_1030_many10s-02124,"눈을 가린 상태에서도 간편하게 조작이 가능하구요,","[[본품#편의성, positive]]"
2124,nikluge-sa-2022-esb_1030_many10s-02125,다양한 마사지로 관자놀이부터 눈주변까지 부드럽고 강력한 마사지가 진행됩니다.,"[[본품#품질, positive]]"
2125,nikluge-sa-2022-esb_1030_many10s-02126,"본체부터 케이블, 설명서까지 깔끔하게 정리되어 보관이 가능하니 더더 맘에 쏙 들어요.","[[패키지/구성품#일반, positive]]"


In [None]:
esb_1101_max26_x2_list = [esb_1030_kelGpu_Gputgr8, esb_1030_kelGpu_Gputgr8, esb_1030_kel7D, esb_1030_kel7D, esb_1030_kel5, esb_1030_many10s,
                        kelec_Gpu_6, kelec_Gpu_11, kelec_D_30, kelec_D_drop05_20, kelec_Gputrl_9, kelec_Gputrl_11,
                        deberta_Gpu_7F, deberta_Gpu_20F, kelec_Gpu_drop05_10, kelec_Gpu_drop05_7,
                        kelec_Gpu_6F, kelec_Gpu_20F, kelec_Gpu_itgr_LR2F, kelec_K_9, kelec_K_8,
                        kelec_Gpu_itgr_LR2, kelec_Gpu_itgr_LR2, kelec_Gpu_itgr_LR1, kelec_Gpu_itgr_drop05_9_layer, kelec_Gpu_itgr_drop05_8_layer]

name = 'esb_1101_max26_x2'
df_model = excute_ensemble(esb_1101_max26_x2_list)
save_jsonl(df_model, name, path)

esb_1101_max26_x2 = pd.DataFrame(jsonlload(path + "/" + name + ".jsonl"))

### desire 36_18
* 총 사용 모델 : 35개
    * 단일 모델 : 31개
    * 앙상블 모델 : 4개

In [None]:
# 딘일 모델 31개
kelec_Gpu_6F_LR1
kelec_Gpu_6F_LR2
kelec_K_drop_05_7_Ma32_LR2           = pd.DataFrame(jsonlload("/content/Korean_ABSA/jsonl_files/kelec_K_drop_05_7_Ma32_LR2.jsonl"))
kelec_K_drop_05_7_LR_1_Ma32_LR_2     = pd.DataFrame(jsonlload("/content/Korean_ABSA/jsonl_files/kelec_K_drop_05_7_LR_1_Ma32_LR_2.jsonl"))
kelec_K_drop_05_7_LR_1_Ma32_LR_2F    = pd.DataFrame(jsonlload("/content/Korean_ABSA/jsonl_files/kelec_K_drop_05_7_LR_1_Ma32_LR_2F.jsonl"))
kelec_Gpu_6_LR1_Ma32_LR_2F           = pd.DataFrame(jsonlload("/content/Korean_ABSA/jsonl_files/kelec_Gpu_6_LR1_Ma32_LR_2F.jsonl"))
kelec_Gpu_6_LR_1_step3more_Ma32_LR_2 = pd.DataFrame(jsonlload("/content/Korean_ABSA/jsonl_files/kelec_Gpu_6_LR_1_step3more_Ma32_LR_2.jsonl"))
kelec_Gpu_6_LR_1_step3more_Ma32F     = pd.DataFrame(jsonlload("/content/Korean_ABSA/jsonl_files/kelec_Gpu_6_LR_1_step3more_Ma32F.jsonl"))
kelec_Gpu_LR6_attemp2_LR3            = pd.DataFrame(jsonlload("/content/Korean_ABSA/jsonl_files/kelec_Gpu_LR6_attemp2_LR3.jsonl"))
kelec_Gpu_6_Ma32                     = pd.DataFrame(jsonlload("/content/Korean_ABSA/jsonl_files/kelec_Gpu_6_Ma32.jsonl"))
kelec_Gpu_itgr_drop_layer_12         = pd.DataFrame(jsonlload("/content/Korean_ABSA/jsonl_files/kelec_Gpu_itgr_drop_layer_12.jsonl"))
kelec_K_drop_05_7                    = pd.DataFrame(jsonlload("/content/Korean_ABSA/jsonl_files/kelec_K_drop_05_7.jsonl"))
kelec_Gpu_6F_LR3
kelec_Gpu_6_LR2
kelec_Gpu_6_58
kelec_Gpu_6
kelec_Gpu_11
kelec_D_30
kelec_D_drop05_20
deberta_Gpu_20F
kelec_Gpu_drop05_10
kelec_Gpu_drop05_7
kelec_Gpu_6F
kelec_Gpu_20F
kelec_Gpu_itgr_LR2F
kelec_K_9
kelec_K_8
kelec_Gpu_itgr_LR2
kelec_Gpu_itgr_LR1
kelec_Gpu_itgr_drop05_9_layer
kelec_Gpu_itgr_drop05_8_layer

Unnamed: 0,id,sentence_form,annotation
0,nikluge-sa-2022-test-00001,하나 사려고 알아보는 중인데 맘에드는거 발견,"[[제품 전체#일반, positive]]"
1,nikluge-sa-2022-test-00002,동양인 피부톤과 잘 어울리고 우아한 분위기를 풍긴다네?,[]
2,nikluge-sa-2022-test-00003,근데 이건 마르살라보다 더 지나친 색 같은데..,"[[본품#일반, negative]]"
3,nikluge-sa-2022-test-00004,나스 색조가 다 그렇지만서도 어데이셔스 라인은 진짜 색 기막히게 뽑는것 같다,[]
4,nikluge-sa-2022-test-00005,색상만 보면 이걸 어떻게 발라.. 싶겠지만 의외로 너무너무 괜찮다,"[[본품#일반, positive]]"
...,...,...,...
2122,nikluge-sa-2022-test-02123,간단한 충전으로 간편한 사용이 가능한거죠.,"[[본품#편의성, positive]]"
2123,nikluge-sa-2022-test-02124,"눈을 가린 상태에서도 간편하게 조작이 가능하구요,","[[본품#편의성, positive]]"
2124,nikluge-sa-2022-test-02125,다양한 마사지로 관자놀이부터 눈주변까지 부드럽고 강력한 마사지가 진행됩니다.,"[[본품#품질, positive]]"
2125,nikluge-sa-2022-test-02126,"본체부터 케이블, 설명서까지 깔끔하게 정리되어 보관이 가능하니 더더 맘에 쏙 들어요.","[[패키지/구성품#일반, positive]]"


In [None]:
# 앙상블 모델  4개
esb_1101_max23_2
esb_1101_max23_3
esb_1101_max26_x2
esb_1030_many10s


Unnamed: 0,id,sentence_form,annotation
0,nikluge-sa-2022-esb_1030_many10s-00001,하나 사려고 알아보는 중인데 맘에드는거 발견,"[[제품 전체#일반, positive]]"
1,nikluge-sa-2022-esb_1030_many10s-00002,동양인 피부톤과 잘 어울리고 우아한 분위기를 풍긴다네?,"[[제품 전체#디자인, positive], [본품#품질, positive]]"
2,nikluge-sa-2022-esb_1030_many10s-00003,근데 이건 마르살라보다 더 지나친 색 같은데..,"[[본품#일반, negative]]"
3,nikluge-sa-2022-esb_1030_many10s-00004,나스 색조가 다 그렇지만서도 어데이셔스 라인은 진짜 색 기막히게 뽑는것 같다,"[[본품#일반, positive]]"
4,nikluge-sa-2022-esb_1030_many10s-00005,색상만 보면 이걸 어떻게 발라.. 싶겠지만 의외로 너무너무 괜찮다,"[[본품#일반, positive]]"
...,...,...,...
2122,nikluge-sa-2022-esb_1030_many10s-02123,간단한 충전으로 간편한 사용이 가능한거죠.,"[[본품#편의성, positive]]"
2123,nikluge-sa-2022-esb_1030_many10s-02124,"눈을 가린 상태에서도 간편하게 조작이 가능하구요,","[[본품#편의성, positive]]"
2124,nikluge-sa-2022-esb_1030_many10s-02125,다양한 마사지로 관자놀이부터 눈주변까지 부드럽고 강력한 마사지가 진행됩니다.,"[[본품#품질, positive]]"
2125,nikluge-sa-2022-esb_1030_many10s-02126,"본체부터 케이블, 설명서까지 깔끔하게 정리되어 보관이 가능하니 더더 맘에 쏙 들어요.","[[패키지/구성품#일반, positive]]"


In [None]:
esb_1103_desire36_18_list = [esb_1101_max23_2, esb_1101_max23_3, esb_1101_max26_x2, 
                    kelec_Gpu_6F_LR1, kelec_Gpu_6F_LR2, 
                    kelec_K_drop_05_7_Ma32_LR2 , kelec_K_drop_05_7_LR_1_Ma32_LR_2 ,
                    kelec_K_drop_05_7_LR_1_Ma32_LR_2F , kelec_Gpu_6_LR1_Ma32_LR_2F ,
                    kelec_Gpu_6_LR_1_step3more_Ma32_LR_2 , kelec_Gpu_6_LR_1_step3more_Ma32F ,
                    kelec_Gpu_LR6_attemp2_LR3 , kelec_Gpu_6_Ma32 ,kelec_Gpu_itgr_drop_layer_12, kelec_K_drop_05_7  ,
                    kelec_Gpu_6F_LR3, kelec_Gpu_6_LR2, 
                    esb_1030_many10s, kelec_Gpu_6_58, kelec_Gpu_6, kelec_Gpu_11,
                    kelec_D_30, kelec_D_drop05_20, 
                    deberta_Gpu_20F, kelec_Gpu_drop05_10, kelec_Gpu_drop05_7, kelec_Gpu_6F, kelec_Gpu_20F,
                    kelec_Gpu_itgr_LR2F, kelec_K_9, kelec_K_8, kelec_Gpu_itgr_LR2, kelec_Gpu_itgr_LR1,
                    kelec_Gpu_itgr_drop05_9_layer, kelec_Gpu_itgr_drop05_8_layer]

name = 'esb_1103_desire36_18'
df_model = excute_ensemble(esb_1103_desire36_18_list)
save_jsonl(df_model, name, path)

esb_1103_desire36_18 = pd.DataFrame(jsonlload(path + "/" + name + ".jsonl"))

### max D 3_2
* 총 사용 모델 : 29개
    * 단일 모델 : 25개
    * 앙상블 모델 : 4개

In [None]:
# 단일 모델 25개
kelec_Gpu_6F_LR1
kelec_Gpu_6F_LR2
kelec_Gpu_LR6_attemp2_LR3
kelec_Gpu_6_Ma32
kelec_Gpu_itgr_drop_layer_12
kelec_K_drop_05_7  
kelec_Gpu_6F_LR3
kelec_Gpu_6_LR2
kelec_Gpu_6_58
kelec_Gpu_6
kelec_Gpu_11
kelec_D_30
kelec_D_drop05_20
deberta_Gpu_20F
kelec_Gpu_drop05_10
kelec_Gpu_drop05_7
kelec_Gpu_6F
kelec_Gpu_20F
kelec_Gpu_itgr_LR2F
kelec_K_9
kelec_K_8
kelec_Gpu_itgr_LR2
kelec_Gpu_itgr_LR1
kelec_Gpu_itgr_drop05_9_layer
kelec_Gpu_itgr_drop05_8_layer

Unnamed: 0,id,sentence_form,annotation
0,nikluge-sa-2022-test-00001,하나 사려고 알아보는 중인데 맘에드는거 발견,"[[제품 전체#일반, positive]]"
1,nikluge-sa-2022-test-00002,동양인 피부톤과 잘 어울리고 우아한 분위기를 풍긴다네?,[]
2,nikluge-sa-2022-test-00003,근데 이건 마르살라보다 더 지나친 색 같은데..,"[[본품#일반, negative]]"
3,nikluge-sa-2022-test-00004,나스 색조가 다 그렇지만서도 어데이셔스 라인은 진짜 색 기막히게 뽑는것 같다,[]
4,nikluge-sa-2022-test-00005,색상만 보면 이걸 어떻게 발라.. 싶겠지만 의외로 너무너무 괜찮다,"[[본품#일반, positive]]"
...,...,...,...
2122,nikluge-sa-2022-test-02123,간단한 충전으로 간편한 사용이 가능한거죠.,"[[본품#편의성, positive]]"
2123,nikluge-sa-2022-test-02124,"눈을 가린 상태에서도 간편하게 조작이 가능하구요,","[[본품#편의성, positive]]"
2124,nikluge-sa-2022-test-02125,다양한 마사지로 관자놀이부터 눈주변까지 부드럽고 강력한 마사지가 진행됩니다.,"[[본품#품질, positive]]"
2125,nikluge-sa-2022-test-02126,"본체부터 케이블, 설명서까지 깔끔하게 정리되어 보관이 가능하니 더더 맘에 쏙 들어요.","[[패키지/구성품#일반, positive]]"


In [None]:
# 앙상블 모델 4게
esb_1101_max23_2
esb_1101_max26_x2
esb_1030_kelGpu_Gputgr8
esb_1030_many10s


Unnamed: 0,id,sentence_form,annotation
0,nikluge-sa-2022-esb_1030_many10s-00001,하나 사려고 알아보는 중인데 맘에드는거 발견,"[[제품 전체#일반, positive]]"
1,nikluge-sa-2022-esb_1030_many10s-00002,동양인 피부톤과 잘 어울리고 우아한 분위기를 풍긴다네?,"[[제품 전체#디자인, positive], [본품#품질, positive]]"
2,nikluge-sa-2022-esb_1030_many10s-00003,근데 이건 마르살라보다 더 지나친 색 같은데..,"[[본품#일반, negative]]"
3,nikluge-sa-2022-esb_1030_many10s-00004,나스 색조가 다 그렇지만서도 어데이셔스 라인은 진짜 색 기막히게 뽑는것 같다,"[[본품#일반, positive]]"
4,nikluge-sa-2022-esb_1030_many10s-00005,색상만 보면 이걸 어떻게 발라.. 싶겠지만 의외로 너무너무 괜찮다,"[[본품#일반, positive]]"
...,...,...,...
2122,nikluge-sa-2022-esb_1030_many10s-02123,간단한 충전으로 간편한 사용이 가능한거죠.,"[[본품#편의성, positive]]"
2123,nikluge-sa-2022-esb_1030_many10s-02124,"눈을 가린 상태에서도 간편하게 조작이 가능하구요,","[[본품#편의성, positive]]"
2124,nikluge-sa-2022-esb_1030_many10s-02125,다양한 마사지로 관자놀이부터 눈주변까지 부드럽고 강력한 마사지가 진행됩니다.,"[[본품#품질, positive]]"
2125,nikluge-sa-2022-esb_1030_many10s-02126,"본체부터 케이블, 설명서까지 깔끔하게 정리되어 보관이 가능하니 더더 맘에 쏙 들어요.","[[패키지/구성품#일반, positive]]"


In [None]:
esb_1103_maxD3_2_list = [esb_1101_max23_2,  esb_1101_max26_x2,
                        kelec_Gpu_6F_LR1, kelec_Gpu_6F_LR2, 
                        kelec_Gpu_LR6_attemp2_LR3 , kelec_Gpu_6_Ma32 ,kelec_Gpu_itgr_drop_layer_12, kelec_K_drop_05_7  ,
                        kelec_Gpu_6F_LR3, kelec_Gpu_6_LR2, esb_1030_kelGpu_Gputgr8,
                        esb_1030_many10s, kelec_Gpu_6_58, kelec_Gpu_6, kelec_Gpu_11,
                        kelec_D_30, kelec_D_drop05_20, 
                        deberta_Gpu_20F, kelec_Gpu_drop05_10, kelec_Gpu_drop05_7, kelec_Gpu_6F, kelec_Gpu_20F,
                        kelec_Gpu_itgr_LR2F, kelec_K_9, kelec_K_8, kelec_Gpu_itgr_LR2, kelec_Gpu_itgr_LR1,
                        kelec_Gpu_itgr_drop05_9_layer, kelec_Gpu_itgr_drop05_8_layer]
                 
name = 'esb_1103_maxD3_2'
df_model = excute_ensemble(esb_1103_maxD3_2_list)
save_jsonl(df_model, name, path)

esb_1103_maxD3_2 = pd.DataFrame(jsonlload(path + "/" + name + ".jsonl"))

### hh 30_14
* 총 사용 모델 : 29개
    * 단일 모델 : 23개
    * 앙상블 모델 : 6개

In [None]:
# 단일 모델  23개
kelec_Gpu_6F_LR1
kelec_K_drop_05_7_Ma32_LR2
kelec_K_drop_05_7_LR_1_Ma32_LR_2 
kelec_K_drop_05_7_LR_1_Ma32_LR_2F 
kelec_Gpu_6_LR1_Ma32_LR_2F ,
kelec_Gpu_6_LR_1_step3more_Ma32_LR_2 
kelec_Gpu_6_LR_1_step3more_Ma32F ,
kelec_Gpu_LR6_attemp2_LR3
kelec_Gpu_6_Ma32
kelec_K_drop_05_7 
kelec_Gpu_6F_LR3
kelec_Gpu_6_LR2
kelec_Gpu_6_58
kelec_Gpu_6, 
kelec_D_30
kelec_D_drop05_20, 
deberta_Gpu_20F
kelec_Gpu_drop05_7
kelec_Gpu_20F
kelec_Gpu_itgr_LR2F
kelec_K_15            = pd.DataFrame(jsonlload("/content/Korean_ABSA/jsonl_files/kelec_K_15.jsonl"))
kelec_Gpu_itgr_LR1
kelec_Gpu_itgr_drop05_9_layer


Unnamed: 0,id,sentence_form,annotation
0,nikluge-sa-2022-test-00001,하나 사려고 알아보는 중인데 맘에드는거 발견,"[[제품 전체#일반, positive]]"
1,nikluge-sa-2022-test-00002,동양인 피부톤과 잘 어울리고 우아한 분위기를 풍긴다네?,"[[제품 전체#디자인, positive]]"
2,nikluge-sa-2022-test-00003,근데 이건 마르살라보다 더 지나친 색 같은데..,"[[본품#일반, negative]]"
3,nikluge-sa-2022-test-00004,나스 색조가 다 그렇지만서도 어데이셔스 라인은 진짜 색 기막히게 뽑는것 같다,[]
4,nikluge-sa-2022-test-00005,색상만 보면 이걸 어떻게 발라.. 싶겠지만 의외로 너무너무 괜찮다,"[[본품#일반, positive]]"
...,...,...,...
2122,nikluge-sa-2022-test-02123,간단한 충전으로 간편한 사용이 가능한거죠.,"[[본품#편의성, positive]]"
2123,nikluge-sa-2022-test-02124,"눈을 가린 상태에서도 간편하게 조작이 가능하구요,","[[본품#편의성, positive]]"
2124,nikluge-sa-2022-test-02125,다양한 마사지로 관자놀이부터 눈주변까지 부드럽고 강력한 마사지가 진행됩니다.,"[[본품#품질, positive]]"
2125,nikluge-sa-2022-test-02126,"본체부터 케이블, 설명서까지 깔끔하게 정리되어 보관이 가능하니 더더 맘에 쏙 들어요.","[[패키지/구성품#일반, positive]]"


In [None]:
# 앙상블 모델 6개 
esb_1102_max27_6
esb_1102_max27_13
esb_1101_max23_2
esb_1101_max23_3
esb_1030_kelGpu_Gputgr8
esb_1030_many10s

Unnamed: 0,id,sentence_form,annotation
0,nikluge-sa-2022-esb_1030_many10s-00001,하나 사려고 알아보는 중인데 맘에드는거 발견,"[[제품 전체#일반, positive]]"
1,nikluge-sa-2022-esb_1030_many10s-00002,동양인 피부톤과 잘 어울리고 우아한 분위기를 풍긴다네?,"[[제품 전체#디자인, positive], [본품#품질, positive]]"
2,nikluge-sa-2022-esb_1030_many10s-00003,근데 이건 마르살라보다 더 지나친 색 같은데..,"[[본품#일반, negative]]"
3,nikluge-sa-2022-esb_1030_many10s-00004,나스 색조가 다 그렇지만서도 어데이셔스 라인은 진짜 색 기막히게 뽑는것 같다,"[[본품#일반, positive]]"
4,nikluge-sa-2022-esb_1030_many10s-00005,색상만 보면 이걸 어떻게 발라.. 싶겠지만 의외로 너무너무 괜찮다,"[[본품#일반, positive]]"
...,...,...,...
2122,nikluge-sa-2022-esb_1030_many10s-02123,간단한 충전으로 간편한 사용이 가능한거죠.,"[[본품#편의성, positive]]"
2123,nikluge-sa-2022-esb_1030_many10s-02124,"눈을 가린 상태에서도 간편하게 조작이 가능하구요,","[[본품#편의성, positive]]"
2124,nikluge-sa-2022-esb_1030_many10s-02125,다양한 마사지로 관자놀이부터 눈주변까지 부드럽고 강력한 마사지가 진행됩니다.,"[[본품#품질, positive]]"
2125,nikluge-sa-2022-esb_1030_many10s-02126,"본체부터 케이블, 설명서까지 깔끔하게 정리되어 보관이 가능하니 더더 맘에 쏙 들어요.","[[패키지/구성품#일반, positive]]"


In [None]:
esb_1103_hh30_14_list = [esb_1102_max27_6, esb_1102_max27_13, esb_1101_max23_2, esb_1101_max23_3, 
                        kelec_Gpu_6F_LR1, kelec_K_drop_05_7_Ma32_LR2 , kelec_K_drop_05_7_LR_1_Ma32_LR_2 ,
                        kelec_K_drop_05_7_LR_1_Ma32_LR_2F , kelec_Gpu_6_LR1_Ma32_LR_2F ,
                        kelec_Gpu_6_LR_1_step3more_Ma32_LR_2 , kelec_Gpu_6_LR_1_step3more_Ma32F ,
                        kelec_Gpu_LR6_attemp2_LR3 , kelec_Gpu_6_Ma32 ,kelec_K_drop_05_7  ,
                        kelec_Gpu_6F_LR3, kelec_Gpu_6_LR2, esb_1030_kelGpu_Gputgr8,
                        esb_1030_many10s, kelec_Gpu_6_58, kelec_Gpu_6, 
                        kelec_D_30, kelec_D_drop05_20, 
                        deberta_Gpu_20F, kelec_Gpu_drop05_7, kelec_Gpu_20F,
                        kelec_Gpu_itgr_LR2F, kelec_K_15, kelec_Gpu_itgr_LR1, kelec_Gpu_itgr_drop05_9_layer]

name = 'esb_1103_hh30_14'
df_model = excute_ensemble(esb_1103_hh30_14_list)
save_jsonl(df_model, name, path)

esb_1103_hh30_14 = pd.DataFrame(jsonlload(path + "/" + name + ".jsonl"))

### hh 30_5
* 총 사용 모델 : 29개
    * 단일 모델 : 23개
    * 앙상블 모델 : 6개

In [None]:
# 단일 모델  23개
kelec_K_drop_05_7_Ma32_LR2
kelec_K_drop_05_7_LR_1_Ma32_LR_2
kelec_K_drop_05_7_LR_1_Ma32_LR_2F
kelec_Gpu_6_LR1_Ma32_LR_2F
kelec_Gpu_6_LR_1_step3more_Ma32_LR_2
kelec_Gpu_6_LR_1_step3more_Ma32F
kelec_Gpu_LR6_attemp2_LR3
kelec_Gpu_6_Ma32
kelec_Gpu_itgr_drop_layer_12
kelec_K_drop_05_7 
kelec_Gpu_6F_LR3
kelec_Gpu_6_LR2
kelec_Gpu_6_58
kelec_Gpu_6
kelec_D_30
kelec_D_drop05_20
deberta_Gpu_20F
kelec_Gpu_drop05_7
kelec_Gpu_20F
kelec_Gpu_itgr_LR2F
kelec_K_15
kelec_Gpu_itgr_LR1
kelec_Gpu_itgr_drop05_9_layer


Unnamed: 0,id,sentence_form,annotation
0,nikluge-sa-2022-test-00001,하나 사려고 알아보는 중인데 맘에드는거 발견,"[[제품 전체#일반, positive]]"
1,nikluge-sa-2022-test-00002,동양인 피부톤과 잘 어울리고 우아한 분위기를 풍긴다네?,"[[제품 전체#디자인, positive]]"
2,nikluge-sa-2022-test-00003,근데 이건 마르살라보다 더 지나친 색 같은데..,"[[본품#일반, negative]]"
3,nikluge-sa-2022-test-00004,나스 색조가 다 그렇지만서도 어데이셔스 라인은 진짜 색 기막히게 뽑는것 같다,[]
4,nikluge-sa-2022-test-00005,색상만 보면 이걸 어떻게 발라.. 싶겠지만 의외로 너무너무 괜찮다,"[[본품#일반, positive]]"
...,...,...,...
2122,nikluge-sa-2022-test-02123,간단한 충전으로 간편한 사용이 가능한거죠.,"[[본품#편의성, positive]]"
2123,nikluge-sa-2022-test-02124,"눈을 가린 상태에서도 간편하게 조작이 가능하구요,","[[본품#편의성, positive]]"
2124,nikluge-sa-2022-test-02125,다양한 마사지로 관자놀이부터 눈주변까지 부드럽고 강력한 마사지가 진행됩니다.,"[[본품#품질, positive]]"
2125,nikluge-sa-2022-test-02126,"본체부터 케이블, 설명서까지 깔끔하게 정리되어 보관이 가능하니 더더 맘에 쏙 들어요.","[[패키지/구성품#일반, positive]]"


In [None]:
# 앙상블 모델 6개 
esb_1102_max27_6
esb_1102_max27_13
esb_1101_max23_2
esb_1101_max23_3
esb_1030_kelGpu_Gputgr8
esb_1030_many10s

Unnamed: 0,id,sentence_form,annotation
0,nikluge-sa-2022-esb_1030_many10s-00001,하나 사려고 알아보는 중인데 맘에드는거 발견,"[[제품 전체#일반, positive]]"
1,nikluge-sa-2022-esb_1030_many10s-00002,동양인 피부톤과 잘 어울리고 우아한 분위기를 풍긴다네?,"[[제품 전체#디자인, positive], [본품#품질, positive]]"
2,nikluge-sa-2022-esb_1030_many10s-00003,근데 이건 마르살라보다 더 지나친 색 같은데..,"[[본품#일반, negative]]"
3,nikluge-sa-2022-esb_1030_many10s-00004,나스 색조가 다 그렇지만서도 어데이셔스 라인은 진짜 색 기막히게 뽑는것 같다,"[[본품#일반, positive]]"
4,nikluge-sa-2022-esb_1030_many10s-00005,색상만 보면 이걸 어떻게 발라.. 싶겠지만 의외로 너무너무 괜찮다,"[[본품#일반, positive]]"
...,...,...,...
2122,nikluge-sa-2022-esb_1030_many10s-02123,간단한 충전으로 간편한 사용이 가능한거죠.,"[[본품#편의성, positive]]"
2123,nikluge-sa-2022-esb_1030_many10s-02124,"눈을 가린 상태에서도 간편하게 조작이 가능하구요,","[[본품#편의성, positive]]"
2124,nikluge-sa-2022-esb_1030_many10s-02125,다양한 마사지로 관자놀이부터 눈주변까지 부드럽고 강력한 마사지가 진행됩니다.,"[[본품#품질, positive]]"
2125,nikluge-sa-2022-esb_1030_many10s-02126,"본체부터 케이블, 설명서까지 깔끔하게 정리되어 보관이 가능하니 더더 맘에 쏙 들어요.","[[패키지/구성품#일반, positive]]"


In [None]:
esb_1103_hh30_5_list = [esb_1102_max27_6, esb_1102_max27_13, esb_1101_max23_2, esb_1101_max23_3, 
                        kelec_K_drop_05_7_Ma32_LR2 , kelec_K_drop_05_7_LR_1_Ma32_LR_2 ,
                        kelec_K_drop_05_7_LR_1_Ma32_LR_2F , kelec_Gpu_6_LR1_Ma32_LR_2F ,
                        kelec_Gpu_6_LR_1_step3more_Ma32_LR_2 , kelec_Gpu_6_LR_1_step3more_Ma32F ,
                        kelec_Gpu_LR6_attemp2_LR3 , kelec_Gpu_6_Ma32 ,kelec_Gpu_itgr_drop_layer_12, kelec_K_drop_05_7  ,
                        kelec_Gpu_6F_LR3, kelec_Gpu_6_LR2, esb_1030_kelGpu_Gputgr8,
                        esb_1030_many10s, kelec_Gpu_6_58, kelec_Gpu_6, 
                        kelec_D_30, kelec_D_drop05_20, 
                        deberta_Gpu_20F, kelec_Gpu_drop05_7, kelec_Gpu_20F,
                        kelec_Gpu_itgr_LR2F, kelec_K_15, kelec_Gpu_itgr_LR1, kelec_Gpu_itgr_drop05_9_layer]

name = 'esb_1103_hh30_5'
df_model = excute_ensemble(esb_1103_hh30_5_list)
save_jsonl(df_model, name, path)

esb_1103_hh30_5 = pd.DataFrame(jsonlload(path + "/" + name + ".jsonl"))

### hh 30_8
* 총 사용 모델 : 29개
    * 단일 모델 : 23개
    * 앙상블 모델 : 6개

In [None]:
# 단일 모델  23개
kelec_Gpu_6F_LR1
kelec_K_drop_05_7_Ma32_LR2
kelec_K_drop_05_7_LR_1_Ma32_LR_2
kelec_Gpu_6_LR1_Ma32_LR_2F
kelec_Gpu_6_LR_1_step3more_Ma32_LR_2
kelec_Gpu_6_LR_1_step3more_Ma32F
kelec_Gpu_LR6_attemp2_LR3
kelec_Gpu_6_Ma32
kelec_Gpu_itgr_drop_layer_12
kelec_K_drop_05_7 
kelec_Gpu_6F_LR3
kelec_Gpu_6_LR2
kelec_Gpu_6_58
kelec_Gpu_6
kelec_D_30
kelec_D_drop05_20
deberta_Gpu_20F
kelec_Gpu_drop05_7
kelec_Gpu_20F
kelec_Gpu_itgr_LR2F
kelec_K_15
kelec_Gpu_itgr_LR1
kelec_Gpu_itgr_drop05_9_layer

Unnamed: 0,id,sentence_form,annotation
0,nikluge-sa-2022-test-00001,하나 사려고 알아보는 중인데 맘에드는거 발견,"[[제품 전체#일반, positive]]"
1,nikluge-sa-2022-test-00002,동양인 피부톤과 잘 어울리고 우아한 분위기를 풍긴다네?,"[[제품 전체#디자인, positive]]"
2,nikluge-sa-2022-test-00003,근데 이건 마르살라보다 더 지나친 색 같은데..,"[[본품#일반, negative]]"
3,nikluge-sa-2022-test-00004,나스 색조가 다 그렇지만서도 어데이셔스 라인은 진짜 색 기막히게 뽑는것 같다,[]
4,nikluge-sa-2022-test-00005,색상만 보면 이걸 어떻게 발라.. 싶겠지만 의외로 너무너무 괜찮다,"[[본품#일반, positive]]"
...,...,...,...
2122,nikluge-sa-2022-test-02123,간단한 충전으로 간편한 사용이 가능한거죠.,"[[본품#편의성, positive]]"
2123,nikluge-sa-2022-test-02124,"눈을 가린 상태에서도 간편하게 조작이 가능하구요,","[[본품#편의성, positive]]"
2124,nikluge-sa-2022-test-02125,다양한 마사지로 관자놀이부터 눈주변까지 부드럽고 강력한 마사지가 진행됩니다.,"[[본품#품질, positive]]"
2125,nikluge-sa-2022-test-02126,"본체부터 케이블, 설명서까지 깔끔하게 정리되어 보관이 가능하니 더더 맘에 쏙 들어요.","[[패키지/구성품#일반, positive]]"


In [None]:
# 앙상블 모델 6개 
esb_1102_max27_6
esb_1102_max27_13
esb_1101_max23_2
esb_1101_max23_3
esb_1030_kelGpu_Gputgr8
esb_1030_many10s

Unnamed: 0,id,sentence_form,annotation
0,nikluge-sa-2022-esb_1030_many10s-00001,하나 사려고 알아보는 중인데 맘에드는거 발견,"[[제품 전체#일반, positive]]"
1,nikluge-sa-2022-esb_1030_many10s-00002,동양인 피부톤과 잘 어울리고 우아한 분위기를 풍긴다네?,"[[제품 전체#디자인, positive], [본품#품질, positive]]"
2,nikluge-sa-2022-esb_1030_many10s-00003,근데 이건 마르살라보다 더 지나친 색 같은데..,"[[본품#일반, negative]]"
3,nikluge-sa-2022-esb_1030_many10s-00004,나스 색조가 다 그렇지만서도 어데이셔스 라인은 진짜 색 기막히게 뽑는것 같다,"[[본품#일반, positive]]"
4,nikluge-sa-2022-esb_1030_many10s-00005,색상만 보면 이걸 어떻게 발라.. 싶겠지만 의외로 너무너무 괜찮다,"[[본품#일반, positive]]"
...,...,...,...
2122,nikluge-sa-2022-esb_1030_many10s-02123,간단한 충전으로 간편한 사용이 가능한거죠.,"[[본품#편의성, positive]]"
2123,nikluge-sa-2022-esb_1030_many10s-02124,"눈을 가린 상태에서도 간편하게 조작이 가능하구요,","[[본품#편의성, positive]]"
2124,nikluge-sa-2022-esb_1030_many10s-02125,다양한 마사지로 관자놀이부터 눈주변까지 부드럽고 강력한 마사지가 진행됩니다.,"[[본품#품질, positive]]"
2125,nikluge-sa-2022-esb_1030_many10s-02126,"본체부터 케이블, 설명서까지 깔끔하게 정리되어 보관이 가능하니 더더 맘에 쏙 들어요.","[[패키지/구성품#일반, positive]]"


In [None]:
esb_1103_hh30_8_list = [esb_1102_max27_6, esb_1102_max27_13, esb_1101_max23_2, esb_1101_max23_3, 
                        kelec_Gpu_6F_LR1, kelec_K_drop_05_7_Ma32_LR2 , kelec_K_drop_05_7_LR_1_Ma32_LR_2 ,
                        kelec_Gpu_6_LR1_Ma32_LR_2F ,
                        kelec_Gpu_6_LR_1_step3more_Ma32_LR_2 , kelec_Gpu_6_LR_1_step3more_Ma32F ,
                        kelec_Gpu_LR6_attemp2_LR3 , kelec_Gpu_6_Ma32 ,kelec_Gpu_itgr_drop_layer_12, kelec_K_drop_05_7  ,
                        kelec_Gpu_6F_LR3, kelec_Gpu_6_LR2, esb_1030_kelGpu_Gputgr8,
                        esb_1030_many10s, kelec_Gpu_6_58, kelec_Gpu_6, 
                        kelec_D_30, kelec_D_drop05_20, 
                        deberta_Gpu_20F, kelec_Gpu_drop05_7, kelec_Gpu_20F,
                        kelec_Gpu_itgr_LR2F, kelec_K_15, kelec_Gpu_itgr_LR1, kelec_Gpu_itgr_drop05_9_layer]

name = 'esb_1103_hh30_8'
df_model = excute_ensemble(esb_1103_hh30_8_list)
save_jsonl(df_model, name, path)

esb_1103_hh30_8 = pd.DataFrame(jsonlload(path + "/" + name + ".jsonl"))

### refine4
* 총 사용 모델 : 4개
    * 단일 모델 : 4개

In [None]:
kelec_refind_8_Ma32 = pd.DataFrame(jsonlload("/content/Korean_ABSA/jsonl_files/kelec_refind_8_Ma32.jsonl"))
kelec_refind_7_Ma32 = pd.DataFrame(jsonlload("/content/Korean_ABSA/jsonl_files/kelec_refind_7_Ma32.jsonl"))
kelec_refind_6_Ma32 = pd.DataFrame(jsonlload("/content/Korean_ABSA/jsonl_files/kelec_refind_6_Ma32.jsonl"))
kelec_refind_5_Ma32 = pd.DataFrame(jsonlload("/content/Korean_ABSA/jsonl_files/kelec_refind_5_Ma32.jsonl"))

In [None]:
esb_1107_refine4_list = [kelec_refind_8_Ma32, kelec_refind_7_Ma32, kelec_refind_6_Ma32, kelec_refind_5_Ma32]

name = 'esb_1107_refine4'
df_model = excute_ensemble(esb_1107_refine4_list)
save_jsonl(df_model, name, path)

esb_1107_refine4 = pd.DataFrame(jsonlload(path + "/" + name + ".jsonl"))

### stroke4deb
* 총 사용 모델 : 4개
    * 앙상블 모델 : 4개


In [None]:
Deberta_Kdr_GPU_Kup_Original_Forcing  = pd.DataFrame(jsonlload("/content/Korean_ABSA/jsonl_files/Deberta_Kdr_GPU_Kup_Original_Forcing.jsonl"))
Deberta_Kup_GPU_Kdr_Original_Forcing  = pd.DataFrame(jsonlload("/content/Korean_ABSA/jsonl_files/Deberta_Kup_GPU_Kdr_Original_Forcing.jsonl"))
Original_Kup8_Gpu_Kdr_Deberta_Forcing = pd.DataFrame(jsonlload("/content/Korean_ABSA/jsonl_files/Original_Kup8_Gpu_Kdr_Deberta_Forcing.jsonl"))
Original_Kdr_Gpu_Kup8_Deberta_Forcing = pd.DataFrame(jsonlload("/content/Korean_ABSA/jsonl_files/Original_Kdr_Gpu_Kup8_Deberta_Forcing.jsonl"))

In [None]:
esb_1107_stroke4deb_list = [Deberta_Kdr_GPU_Kup_Original_Forcing, Deberta_Kup_GPU_Kdr_Original_Forcing,
                            Original_Kup8_Gpu_Kdr_Deberta_Forcing, Original_Kdr_Gpu_Kup8_Deberta_Forcing]
                       
name = 'esb_1107_stroke4deb'
df_model = excute_ensemble(esb_1107_stroke4deb_list)
save_jsonl(df_model, name, path)

esb_1107_stroke4deb = pd.DataFrame(jsonlload(path + "/" + name + ".jsonl"))

### stroke6
* 총 사용 모델 : 6개
    * 앙상블 모델 : 6개


In [None]:
# 앙상블 모델 6개
Deberta_Kdr_GPU_Kup_Original_Forcing
Deberta_Kup_GPU_Kdr_Original_Forcing
Original_Kup8_Gpu_Kdr_Deberta_Forcing
Original_Kdr_Gpu_Kup8_Deberta_Forcing
gpu_kdr_kup8_deberta_forcing = pd.DataFrame(jsonlload("/content/Korean_ABSA/jsonl_files/gpu_kdr_kup8_deberta_forcing.jsonl"))
kup8_gpu_kdr_deberta_forcing = pd.DataFrame(jsonlload("/content/Korean_ABSA/jsonl_files/kup8_gpu_kdr_deberta_forcing.jsonl"))

In [None]:
esb_1107_stroke6_list = [Deberta_Kdr_GPU_Kup_Original_Forcing, Deberta_Kup_GPU_Kdr_Original_Forcing,
                        Original_Kup8_Gpu_Kdr_Deberta_Forcing, Original_Kdr_Gpu_Kup8_Deberta_Forcing,
                        gpu_kdr_kup8_deberta_forcing ,kup8_gpu_kdr_deberta_forcing ]
                    
name = 'esb_1107_stroke6'
df_model = excute_ensemble(esb_1107_stroke6_list)
save_jsonl(df_model, name, path)

esb_1107_stroke6 = pd.DataFrame(jsonlload(path + "/" + name + ".jsonl"))

### force threshold 1.75 4
* 총 사용 모델 : 4개
    * 단일 모델 : 4개


In [None]:
# 단일모델 4개
GpuLR_175    = pd.DataFrame(jsonlload("/content/Korean_ABSA/jsonl_files/GpuLR_175.jsonl"))
Original_175 = pd.DataFrame(jsonlload("/content/Korean_ABSA/jsonl_files/Original_175.jsonl"))
Kup_175      = pd.DataFrame(jsonlload("/content/Korean_ABSA/jsonl_files/Kup_175.jsonl"))
K_175        = pd.DataFrame(jsonlload("/content/Korean_ABSA/jsonl_files/K_175.jsonl"))

In [None]:
esb_175_4_list = [GpuLR_175, Original_175,  Kup_175,  K_175]

name = 'esb_175_4'
df_model = excute_ensemble(esb_175_4_list)
save_jsonl(df_model, name, path)

esb_175_4 = pd.DataFrame(jsonlload(path + "/" + name + ".jsonl"))

### YD 32_23
* 총 사용 모델 : 31개
    * 단일 모델 : 20개
    * 앙상블 모델 : 11개

In [None]:
# 단일 모델 20개
kelec_K_drop_05_7_Ma32_LR2
kelec_K_drop_05_7_LR_1_Ma32_LR_2
kelec_Gpu_6_LR1_Ma32_LR_2F 
kelec_Gpu_6_LR_1_step3more_Ma32_LR_2 
kelec_Gpu_6_LR_1_step3more_Ma32F 
kelec_Gpu_LR6_attemp2_LR3 
kelec_Gpu_6_Ma32
kelec_K_drop_05_7  
kelec_Gpu_6F_LR3
kelec_Gpu_6_LR2
kelec_Gpu_6_58
kelec_D_30
kelec_D_drop05_20
deberta_Gpu_20F
kelec_Gpu_drop05_7
kelec_Gpu_20F
kelec_Gpu_itgr_LR2F
kelec_K_15
kelec_Gpu_itgr_LR1
kelec_Gpu_itgr_drop05_9_layer


Unnamed: 0,id,sentence_form,annotation
0,nikluge-sa-2022-test-00001,하나 사려고 알아보는 중인데 맘에드는거 발견,"[[제품 전체#일반, positive]]"
1,nikluge-sa-2022-test-00002,동양인 피부톤과 잘 어울리고 우아한 분위기를 풍긴다네?,"[[제품 전체#디자인, positive]]"
2,nikluge-sa-2022-test-00003,근데 이건 마르살라보다 더 지나친 색 같은데..,"[[본품#일반, negative]]"
3,nikluge-sa-2022-test-00004,나스 색조가 다 그렇지만서도 어데이셔스 라인은 진짜 색 기막히게 뽑는것 같다,[]
4,nikluge-sa-2022-test-00005,색상만 보면 이걸 어떻게 발라.. 싶겠지만 의외로 너무너무 괜찮다,"[[본품#일반, positive]]"
...,...,...,...
2122,nikluge-sa-2022-test-02123,간단한 충전으로 간편한 사용이 가능한거죠.,"[[본품#편의성, positive]]"
2123,nikluge-sa-2022-test-02124,"눈을 가린 상태에서도 간편하게 조작이 가능하구요,","[[본품#편의성, positive]]"
2124,nikluge-sa-2022-test-02125,다양한 마사지로 관자놀이부터 눈주변까지 부드럽고 강력한 마사지가 진행됩니다.,"[[본품#품질, positive]]"
2125,nikluge-sa-2022-test-02126,"본체부터 케이블, 설명서까지 깔끔하게 정리되어 보관이 가능하니 더더 맘에 쏙 들어요.","[[패키지/구성품#일반, positive]]"


In [None]:
# 앙상블 모델 11개
esb_1103_hh30_8
esb_1103_hh30_14
esb_1103_hh30_5
esb_1103_maxD3_2
esb_1103_desire36_18
esb_1102_max27_6
esb_1102_max27_13
esb_1101_max23_2
esb_1101_max23_3
esb_1030_kelGpu_Gputgr8
esb_1030_many10s

Unnamed: 0,id,sentence_form,annotation
0,nikluge-sa-2022-esb_1030_many10s-00001,하나 사려고 알아보는 중인데 맘에드는거 발견,"[[제품 전체#일반, positive]]"
1,nikluge-sa-2022-esb_1030_many10s-00002,동양인 피부톤과 잘 어울리고 우아한 분위기를 풍긴다네?,"[[제품 전체#디자인, positive], [본품#품질, positive]]"
2,nikluge-sa-2022-esb_1030_many10s-00003,근데 이건 마르살라보다 더 지나친 색 같은데..,"[[본품#일반, negative]]"
3,nikluge-sa-2022-esb_1030_many10s-00004,나스 색조가 다 그렇지만서도 어데이셔스 라인은 진짜 색 기막히게 뽑는것 같다,"[[본품#일반, positive]]"
4,nikluge-sa-2022-esb_1030_many10s-00005,색상만 보면 이걸 어떻게 발라.. 싶겠지만 의외로 너무너무 괜찮다,"[[본품#일반, positive]]"
...,...,...,...
2122,nikluge-sa-2022-esb_1030_many10s-02123,간단한 충전으로 간편한 사용이 가능한거죠.,"[[본품#편의성, positive]]"
2123,nikluge-sa-2022-esb_1030_many10s-02124,"눈을 가린 상태에서도 간편하게 조작이 가능하구요,","[[본품#편의성, positive]]"
2124,nikluge-sa-2022-esb_1030_many10s-02125,다양한 마사지로 관자놀이부터 눈주변까지 부드럽고 강력한 마사지가 진행됩니다.,"[[본품#품질, positive]]"
2125,nikluge-sa-2022-esb_1030_many10s-02126,"본체부터 케이블, 설명서까지 깔끔하게 정리되어 보관이 가능하니 더더 맘에 쏙 들어요.","[[패키지/구성품#일반, positive]]"


In [None]:
esb_1103_YD32_23_list = [esb_1103_hh30_8, esb_1103_hh30_14, esb_1103_hh30_5, esb_1103_maxD3_2, esb_1103_desire36_18,
                        esb_1102_max27_6, esb_1102_max27_13, esb_1101_max23_2, esb_1101_max23_3,  
                        kelec_K_drop_05_7_Ma32_LR2 , kelec_K_drop_05_7_LR_1_Ma32_LR_2, kelec_Gpu_6_LR1_Ma32_LR_2F ,
                        kelec_Gpu_6_LR_1_step3more_Ma32_LR_2 , kelec_Gpu_6_LR_1_step3more_Ma32F ,
                        kelec_Gpu_LR6_attemp2_LR3 , kelec_Gpu_6_Ma32 ,kelec_K_drop_05_7  ,
                        kelec_Gpu_6F_LR3, kelec_Gpu_6_LR2, esb_1030_kelGpu_Gputgr8,
                        esb_1030_many10s, kelec_Gpu_6_58,  
                        kelec_D_30, kelec_D_drop05_20, 
                        deberta_Gpu_20F, kelec_Gpu_drop05_7, kelec_Gpu_20F,
                        kelec_Gpu_itgr_LR2F, kelec_K_15, kelec_Gpu_itgr_LR1, kelec_Gpu_itgr_drop05_9_layer]

name = 'esb_1103_YD32_23'
df_model = excute_ensemble(esb_1103_YD32_23_list)
save_jsonl(df_model, name, path)

esb_1103_YD32_23 = pd.DataFrame(jsonlload(path + "/" + name + ".jsonl"))

### YD 32_16
* 총 사용 모델 : 31개
    * 단일 모델 : 20개
    * 앙상블 모델 : 11개

In [None]:
# 단일 모델 20개
kelec_K_drop_05_7_Ma32_LR2
kelec_K_drop_05_7_LR_1_Ma32_LR_2
kelec_Gpu_6_LR1_Ma32_LR_2F 
kelec_Gpu_6_LR_1_step3more_Ma32_LR_2 
kelec_Gpu_6_LR_1_step3more_Ma32F 
kelec_Gpu_LR6_attemp2_LR3 
kelec_K_drop_05_7  
kelec_Gpu_6F_LR3
kelec_Gpu_6_LR2
kelec_Gpu_6_58
kelec_Gpu_6
kelec_D_30
kelec_D_drop05_20
deberta_Gpu_20F
kelec_Gpu_drop05_7
kelec_Gpu_20F
kelec_Gpu_itgr_LR2F
kelec_K_15
kelec_Gpu_itgr_LR1
kelec_Gpu_itgr_drop05_9_layer

Unnamed: 0,id,sentence_form,annotation
0,nikluge-sa-2022-test-00001,하나 사려고 알아보는 중인데 맘에드는거 발견,"[[제품 전체#일반, positive]]"
1,nikluge-sa-2022-test-00002,동양인 피부톤과 잘 어울리고 우아한 분위기를 풍긴다네?,"[[제품 전체#디자인, positive]]"
2,nikluge-sa-2022-test-00003,근데 이건 마르살라보다 더 지나친 색 같은데..,"[[본품#일반, negative]]"
3,nikluge-sa-2022-test-00004,나스 색조가 다 그렇지만서도 어데이셔스 라인은 진짜 색 기막히게 뽑는것 같다,[]
4,nikluge-sa-2022-test-00005,색상만 보면 이걸 어떻게 발라.. 싶겠지만 의외로 너무너무 괜찮다,"[[본품#일반, positive]]"
...,...,...,...
2122,nikluge-sa-2022-test-02123,간단한 충전으로 간편한 사용이 가능한거죠.,"[[본품#편의성, positive]]"
2123,nikluge-sa-2022-test-02124,"눈을 가린 상태에서도 간편하게 조작이 가능하구요,","[[본품#편의성, positive]]"
2124,nikluge-sa-2022-test-02125,다양한 마사지로 관자놀이부터 눈주변까지 부드럽고 강력한 마사지가 진행됩니다.,"[[본품#품질, positive]]"
2125,nikluge-sa-2022-test-02126,"본체부터 케이블, 설명서까지 깔끔하게 정리되어 보관이 가능하니 더더 맘에 쏙 들어요.","[[패키지/구성품#일반, positive]]"


In [None]:
# 앙상블 모델 11개
esb_1103_hh30_8
esb_1103_hh30_14
esb_1103_hh30_5
esb_1103_maxD3_2
esb_1103_desire36_18
esb_1102_max27_6
esb_1102_max27_13
esb_1101_max23_2
esb_1101_max23_3
esb_1030_kelGpu_Gputgr8
esb_1030_many10s

Unnamed: 0,id,sentence_form,annotation
0,nikluge-sa-2022-esb_1030_many10s-00001,하나 사려고 알아보는 중인데 맘에드는거 발견,"[[제품 전체#일반, positive]]"
1,nikluge-sa-2022-esb_1030_many10s-00002,동양인 피부톤과 잘 어울리고 우아한 분위기를 풍긴다네?,"[[제품 전체#디자인, positive], [본품#품질, positive]]"
2,nikluge-sa-2022-esb_1030_many10s-00003,근데 이건 마르살라보다 더 지나친 색 같은데..,"[[본품#일반, negative]]"
3,nikluge-sa-2022-esb_1030_many10s-00004,나스 색조가 다 그렇지만서도 어데이셔스 라인은 진짜 색 기막히게 뽑는것 같다,"[[본품#일반, positive]]"
4,nikluge-sa-2022-esb_1030_many10s-00005,색상만 보면 이걸 어떻게 발라.. 싶겠지만 의외로 너무너무 괜찮다,"[[본품#일반, positive]]"
...,...,...,...
2122,nikluge-sa-2022-esb_1030_many10s-02123,간단한 충전으로 간편한 사용이 가능한거죠.,"[[본품#편의성, positive]]"
2123,nikluge-sa-2022-esb_1030_many10s-02124,"눈을 가린 상태에서도 간편하게 조작이 가능하구요,","[[본품#편의성, positive]]"
2124,nikluge-sa-2022-esb_1030_many10s-02125,다양한 마사지로 관자놀이부터 눈주변까지 부드럽고 강력한 마사지가 진행됩니다.,"[[본품#품질, positive]]"
2125,nikluge-sa-2022-esb_1030_many10s-02126,"본체부터 케이블, 설명서까지 깔끔하게 정리되어 보관이 가능하니 더더 맘에 쏙 들어요.","[[패키지/구성품#일반, positive]]"


In [None]:
esb_1103_YD32_16_list = [esb_1103_hh30_8, esb_1103_hh30_14, esb_1103_hh30_5, esb_1103_maxD3_2, esb_1103_desire36_18,
                        esb_1102_max27_6, esb_1102_max27_13, esb_1101_max23_2, esb_1101_max23_3,  
                        kelec_K_drop_05_7_Ma32_LR2 , kelec_K_drop_05_7_LR_1_Ma32_LR_2, kelec_Gpu_6_LR1_Ma32_LR_2F , 
                        kelec_Gpu_6_LR_1_step3more_Ma32_LR_2 , kelec_Gpu_6_LR_1_step3more_Ma32F ,
                        kelec_Gpu_LR6_attemp2_LR3 , kelec_K_drop_05_7  ,
                        kelec_Gpu_6F_LR3, kelec_Gpu_6_LR2, esb_1030_kelGpu_Gputgr8,
                        esb_1030_many10s, kelec_Gpu_6_58, kelec_Gpu_6, 
                        kelec_D_30, kelec_D_drop05_20, 
                        deberta_Gpu_20F, kelec_Gpu_drop05_7, kelec_Gpu_20F,
                        kelec_Gpu_itgr_LR2F, kelec_K_15, kelec_Gpu_itgr_LR1, kelec_Gpu_itgr_drop05_9_layer]

name = 'esb_1103_YD32_16'
df_model = excute_ensemble(esb_1103_YD32_16_list)
save_jsonl(df_model, name, path)

esb_1103_YD32_16 = pd.DataFrame(jsonlload(path + "/" + name + ".jsonl"))

### GB 30_2
* 총 사용 모델 : 29개
    * 단일 모델 : 19개
    * 앙상블 모델 : 10개

In [None]:
# 단일 모델 19개
kelec_Gpu_6F_LR1
kelec_K_drop_05_7_Ma32_LR2 
kelec_K_drop_05_7_LR_1_Ma32_LR_2 
kelec_Gpu_6_LR1_Ma32_LR_2F 
kelec_Gpu_6_LR_1_step3more_Ma32_LR_2
kelec_Gpu_6_LR2
kelec_Gpu_6_Ma32 
kelec_K_drop_05_7
kelec_Gpu_6_58
kelec_Gpu_6_LR_1_step3more_Ma32F 
kelec_D_30
kelec_D_drop05_20
deberta_Gpu_20F
kelec_Gpu_drop05_7
kelec_Gpu_20F
kelec_Gpu_itgr_LR2F
kelec_K_15
kelec_Gpu_itgr_LR1
kelec_Gpu_itgr_drop05_9_layer

Unnamed: 0,id,sentence_form,annotation
0,nikluge-sa-2022-test-00001,하나 사려고 알아보는 중인데 맘에드는거 발견,"[[제품 전체#일반, positive]]"
1,nikluge-sa-2022-test-00002,동양인 피부톤과 잘 어울리고 우아한 분위기를 풍긴다네?,"[[제품 전체#디자인, positive]]"
2,nikluge-sa-2022-test-00003,근데 이건 마르살라보다 더 지나친 색 같은데..,"[[본품#일반, negative]]"
3,nikluge-sa-2022-test-00004,나스 색조가 다 그렇지만서도 어데이셔스 라인은 진짜 색 기막히게 뽑는것 같다,[]
4,nikluge-sa-2022-test-00005,색상만 보면 이걸 어떻게 발라.. 싶겠지만 의외로 너무너무 괜찮다,"[[본품#일반, positive]]"
...,...,...,...
2122,nikluge-sa-2022-test-02123,간단한 충전으로 간편한 사용이 가능한거죠.,"[[본품#편의성, positive]]"
2123,nikluge-sa-2022-test-02124,"눈을 가린 상태에서도 간편하게 조작이 가능하구요,","[[본품#편의성, positive]]"
2124,nikluge-sa-2022-test-02125,다양한 마사지로 관자놀이부터 눈주변까지 부드럽고 강력한 마사지가 진행됩니다.,"[[본품#품질, positive]]"
2125,nikluge-sa-2022-test-02126,"본체부터 케이블, 설명서까지 깔끔하게 정리되어 보관이 가능하니 더더 맘에 쏙 들어요.","[[패키지/구성품#일반, positive]]"


In [None]:
# 앙상블 모델 10개
esb_1103_hh30_8
esb_1103_hh30_5
esb_1103_maxD3_2
esb_1103_desire36_18
esb_1102_max27_6
esb_1102_max27_13
esb_1101_max23_2
esb_1101_max23_3
esb_1030_kelGpu_Gputgr8
esb_1030_many10s

Unnamed: 0,id,sentence_form,annotation
0,nikluge-sa-2022-esb_1030_many10s-00001,하나 사려고 알아보는 중인데 맘에드는거 발견,"[[제품 전체#일반, positive]]"
1,nikluge-sa-2022-esb_1030_many10s-00002,동양인 피부톤과 잘 어울리고 우아한 분위기를 풍긴다네?,"[[제품 전체#디자인, positive], [본품#품질, positive]]"
2,nikluge-sa-2022-esb_1030_many10s-00003,근데 이건 마르살라보다 더 지나친 색 같은데..,"[[본품#일반, negative]]"
3,nikluge-sa-2022-esb_1030_many10s-00004,나스 색조가 다 그렇지만서도 어데이셔스 라인은 진짜 색 기막히게 뽑는것 같다,"[[본품#일반, positive]]"
4,nikluge-sa-2022-esb_1030_many10s-00005,색상만 보면 이걸 어떻게 발라.. 싶겠지만 의외로 너무너무 괜찮다,"[[본품#일반, positive]]"
...,...,...,...
2122,nikluge-sa-2022-esb_1030_many10s-02123,간단한 충전으로 간편한 사용이 가능한거죠.,"[[본품#편의성, positive]]"
2123,nikluge-sa-2022-esb_1030_many10s-02124,"눈을 가린 상태에서도 간편하게 조작이 가능하구요,","[[본품#편의성, positive]]"
2124,nikluge-sa-2022-esb_1030_many10s-02125,다양한 마사지로 관자놀이부터 눈주변까지 부드럽고 강력한 마사지가 진행됩니다.,"[[본품#품질, positive]]"
2125,nikluge-sa-2022-esb_1030_many10s-02126,"본체부터 케이블, 설명서까지 깔끔하게 정리되어 보관이 가능하니 더더 맘에 쏙 들어요.","[[패키지/구성품#일반, positive]]"


In [None]:
esb_1103_GB30_2_list = [esb_1103_hh30_8,  esb_1103_hh30_5, esb_1103_maxD3_2, esb_1103_desire36_18,
                        esb_1102_max27_6, esb_1102_max27_13, esb_1101_max23_2, esb_1101_max23_3, 
                        kelec_Gpu_6F_LR1, kelec_K_drop_05_7_Ma32_LR2 , kelec_K_drop_05_7_LR_1_Ma32_LR_2 ,
                        kelec_Gpu_6_LR1_Ma32_LR_2F ,
                        kelec_Gpu_6_LR_1_step3more_Ma32_LR_2, kelec_Gpu_6_LR2, esb_1030_kelGpu_Gputgr8,
                        kelec_Gpu_6_Ma32 , kelec_K_drop_05_7, esb_1030_many10s, kelec_Gpu_6_58,  kelec_Gpu_6_LR_1_step3more_Ma32F ,
                        kelec_D_30, kelec_D_drop05_20, deberta_Gpu_20F, kelec_Gpu_drop05_7, kelec_Gpu_20F,
                        kelec_Gpu_itgr_LR2F, kelec_K_15, kelec_Gpu_itgr_LR1, kelec_Gpu_itgr_drop05_9_layer]

name = 'esb_1103_GB30_2'
df_model = excute_ensemble(esb_1103_GB30_2_list)
save_jsonl(df_model, name, path)

esb_1103_GB30_2 = pd.DataFrame(jsonlload(path + "/" + name + ".jsonl"))

### GB 30_3
* 총 사용 모델 : 29개
    * 단일 모델 : 19개
    * 앙상블 모델 : 10개

In [None]:
# 단일 모델 19개
kelec_Gpu_6F_LR1
kelec_K_drop_05_7_Ma32_LR2 
kelec_K_drop_05_7_LR_1_Ma32_LR_2 
kelec_Gpu_6_LR1_Ma32_LR_2F 
kelec_Gpu_6_LR_1_step3more_Ma32_LR_2
kelec_Gpu_6_LR2
kelec_Gpu_6_Ma32 
kelec_K_drop_05_7
kelec_Gpu_6_58
kelec_Gpu_6_LR_1_step3more_Ma32F 
kelec_D_30
kelec_D_drop05_20
deberta_Gpu_20F
kelec_Gpu_drop05_7
kelec_Gpu_20F
kelec_Gpu_itgr_LR2F
kelec_K_15
kelec_Gpu_itgr_LR1
kelec_Gpu_itgr_drop05_9_layer

Unnamed: 0,id,sentence_form,annotation
0,nikluge-sa-2022-test-00001,하나 사려고 알아보는 중인데 맘에드는거 발견,"[[제품 전체#일반, positive]]"
1,nikluge-sa-2022-test-00002,동양인 피부톤과 잘 어울리고 우아한 분위기를 풍긴다네?,"[[제품 전체#디자인, positive]]"
2,nikluge-sa-2022-test-00003,근데 이건 마르살라보다 더 지나친 색 같은데..,"[[본품#일반, negative]]"
3,nikluge-sa-2022-test-00004,나스 색조가 다 그렇지만서도 어데이셔스 라인은 진짜 색 기막히게 뽑는것 같다,[]
4,nikluge-sa-2022-test-00005,색상만 보면 이걸 어떻게 발라.. 싶겠지만 의외로 너무너무 괜찮다,"[[본품#일반, positive]]"
...,...,...,...
2122,nikluge-sa-2022-test-02123,간단한 충전으로 간편한 사용이 가능한거죠.,"[[본품#편의성, positive]]"
2123,nikluge-sa-2022-test-02124,"눈을 가린 상태에서도 간편하게 조작이 가능하구요,","[[본품#편의성, positive]]"
2124,nikluge-sa-2022-test-02125,다양한 마사지로 관자놀이부터 눈주변까지 부드럽고 강력한 마사지가 진행됩니다.,"[[본품#품질, positive]]"
2125,nikluge-sa-2022-test-02126,"본체부터 케이블, 설명서까지 깔끔하게 정리되어 보관이 가능하니 더더 맘에 쏙 들어요.","[[패키지/구성품#일반, positive]]"


In [None]:
# 앙상블 모델 10개
esb_1103_hh30_8
esb_1103_hh30_14
esb_1103_maxD3_2
esb_1103_desire36_18
esb_1102_max27_6
esb_1102_max27_13
esb_1101_max23_2
esb_1101_max23_3
esb_1030_kelGpu_Gputgr8
esb_1030_many10s

Unnamed: 0,id,sentence_form,annotation
0,nikluge-sa-2022-esb_1030_many10s-00001,하나 사려고 알아보는 중인데 맘에드는거 발견,"[[제품 전체#일반, positive]]"
1,nikluge-sa-2022-esb_1030_many10s-00002,동양인 피부톤과 잘 어울리고 우아한 분위기를 풍긴다네?,"[[제품 전체#디자인, positive], [본품#품질, positive]]"
2,nikluge-sa-2022-esb_1030_many10s-00003,근데 이건 마르살라보다 더 지나친 색 같은데..,"[[본품#일반, negative]]"
3,nikluge-sa-2022-esb_1030_many10s-00004,나스 색조가 다 그렇지만서도 어데이셔스 라인은 진짜 색 기막히게 뽑는것 같다,"[[본품#일반, positive]]"
4,nikluge-sa-2022-esb_1030_many10s-00005,색상만 보면 이걸 어떻게 발라.. 싶겠지만 의외로 너무너무 괜찮다,"[[본품#일반, positive]]"
...,...,...,...
2122,nikluge-sa-2022-esb_1030_many10s-02123,간단한 충전으로 간편한 사용이 가능한거죠.,"[[본품#편의성, positive]]"
2123,nikluge-sa-2022-esb_1030_many10s-02124,"눈을 가린 상태에서도 간편하게 조작이 가능하구요,","[[본품#편의성, positive]]"
2124,nikluge-sa-2022-esb_1030_many10s-02125,다양한 마사지로 관자놀이부터 눈주변까지 부드럽고 강력한 마사지가 진행됩니다.,"[[본품#품질, positive]]"
2125,nikluge-sa-2022-esb_1030_many10s-02126,"본체부터 케이블, 설명서까지 깔끔하게 정리되어 보관이 가능하니 더더 맘에 쏙 들어요.","[[패키지/구성품#일반, positive]]"


In [None]:
esb_1103_GB30_3_list = [esb_1103_hh30_8, esb_1103_hh30_14, esb_1103_maxD3_2, esb_1103_desire36_18,
                        esb_1102_max27_6, esb_1102_max27_13, esb_1101_max23_2, esb_1101_max23_3, 
                        kelec_Gpu_6F_LR1, kelec_K_drop_05_7_Ma32_LR2 , kelec_K_drop_05_7_LR_1_Ma32_LR_2 ,
                        kelec_Gpu_6_LR1_Ma32_LR_2F ,
                        kelec_Gpu_6_LR_1_step3more_Ma32_LR_2, kelec_Gpu_6_LR2, esb_1030_kelGpu_Gputgr8,
                        kelec_Gpu_6_Ma32 , kelec_K_drop_05_7, esb_1030_many10s, kelec_Gpu_6_58,  kelec_Gpu_6_LR_1_step3more_Ma32F ,
                        kelec_D_30, kelec_D_drop05_20, deberta_Gpu_20F, kelec_Gpu_drop05_7, kelec_Gpu_20F,
                        kelec_Gpu_itgr_LR2F, kelec_K_15, kelec_Gpu_itgr_LR1, kelec_Gpu_itgr_drop05_9_layer]

name = 'esb_1103_GB30_3'
df_model = excute_ensemble(esb_1103_GB30_3_list)
save_jsonl(df_model, name, path)

esb_1103_GB30_3 = pd.DataFrame(jsonlload(path + "/" + name + ".jsonl"))

### GB 30_15
* 총 사용 모델 : 29개
    * 단일 모델 : 18개
    * 앙상블 모델 : 11개

In [None]:
# 단일 모델 18개
kelec_Gpu_6F_LR1
kelec_K_drop_05_7_Ma32_LR2 
kelec_K_drop_05_7_LR_1_Ma32_LR_2 
kelec_Gpu_6_LR1_Ma32_LR_2F 
kelec_Gpu_6_LR_1_step3more_Ma32_LR_2
kelec_Gpu_6_Ma32 
kelec_K_drop_05_7
kelec_Gpu_6_58
kelec_Gpu_6_LR_1_step3more_Ma32F 
kelec_D_30
kelec_D_drop05_20
deberta_Gpu_20F
kelec_Gpu_drop05_7
kelec_Gpu_20F
kelec_Gpu_itgr_LR2F
kelec_K_15
kelec_Gpu_itgr_LR1
kelec_Gpu_itgr_drop05_9_layer

Unnamed: 0,id,sentence_form,annotation
0,nikluge-sa-2022-test-00001,하나 사려고 알아보는 중인데 맘에드는거 발견,"[[제품 전체#일반, positive]]"
1,nikluge-sa-2022-test-00002,동양인 피부톤과 잘 어울리고 우아한 분위기를 풍긴다네?,"[[제품 전체#디자인, positive]]"
2,nikluge-sa-2022-test-00003,근데 이건 마르살라보다 더 지나친 색 같은데..,"[[본품#일반, negative]]"
3,nikluge-sa-2022-test-00004,나스 색조가 다 그렇지만서도 어데이셔스 라인은 진짜 색 기막히게 뽑는것 같다,[]
4,nikluge-sa-2022-test-00005,색상만 보면 이걸 어떻게 발라.. 싶겠지만 의외로 너무너무 괜찮다,"[[본품#일반, positive]]"
...,...,...,...
2122,nikluge-sa-2022-test-02123,간단한 충전으로 간편한 사용이 가능한거죠.,"[[본품#편의성, positive]]"
2123,nikluge-sa-2022-test-02124,"눈을 가린 상태에서도 간편하게 조작이 가능하구요,","[[본품#편의성, positive]]"
2124,nikluge-sa-2022-test-02125,다양한 마사지로 관자놀이부터 눈주변까지 부드럽고 강력한 마사지가 진행됩니다.,"[[본품#품질, positive]]"
2125,nikluge-sa-2022-test-02126,"본체부터 케이블, 설명서까지 깔끔하게 정리되어 보관이 가능하니 더더 맘에 쏙 들어요.","[[패키지/구성품#일반, positive]]"


In [None]:
# 앙상블 모델 11개
esb_1103_hh30_8
esb_1103_hh30_14
esb_1103_hh30_5
esb_1103_maxD3_2
esb_1103_desire36_18
esb_1102_max27_6
esb_1102_max27_13
esb_1101_max23_2
esb_1101_max23_3
esb_1030_kelGpu_Gputgr8
esb_1030_many10s

Unnamed: 0,id,sentence_form,annotation
0,nikluge-sa-2022-esb_1030_many10s-00001,하나 사려고 알아보는 중인데 맘에드는거 발견,"[[제품 전체#일반, positive]]"
1,nikluge-sa-2022-esb_1030_many10s-00002,동양인 피부톤과 잘 어울리고 우아한 분위기를 풍긴다네?,"[[제품 전체#디자인, positive], [본품#품질, positive]]"
2,nikluge-sa-2022-esb_1030_many10s-00003,근데 이건 마르살라보다 더 지나친 색 같은데..,"[[본품#일반, negative]]"
3,nikluge-sa-2022-esb_1030_many10s-00004,나스 색조가 다 그렇지만서도 어데이셔스 라인은 진짜 색 기막히게 뽑는것 같다,"[[본품#일반, positive]]"
4,nikluge-sa-2022-esb_1030_many10s-00005,색상만 보면 이걸 어떻게 발라.. 싶겠지만 의외로 너무너무 괜찮다,"[[본품#일반, positive]]"
...,...,...,...
2122,nikluge-sa-2022-esb_1030_many10s-02123,간단한 충전으로 간편한 사용이 가능한거죠.,"[[본품#편의성, positive]]"
2123,nikluge-sa-2022-esb_1030_many10s-02124,"눈을 가린 상태에서도 간편하게 조작이 가능하구요,","[[본품#편의성, positive]]"
2124,nikluge-sa-2022-esb_1030_many10s-02125,다양한 마사지로 관자놀이부터 눈주변까지 부드럽고 강력한 마사지가 진행됩니다.,"[[본품#품질, positive]]"
2125,nikluge-sa-2022-esb_1030_many10s-02126,"본체부터 케이블, 설명서까지 깔끔하게 정리되어 보관이 가능하니 더더 맘에 쏙 들어요.","[[패키지/구성품#일반, positive]]"


In [None]:
esb_1103_GB30_15_list = [esb_1103_hh30_8, esb_1103_hh30_14, esb_1103_hh30_5, esb_1103_maxD3_2, esb_1103_desire36_18,
                        esb_1102_max27_6, esb_1102_max27_13, esb_1101_max23_2, esb_1101_max23_3, 
                        kelec_Gpu_6F_LR1, kelec_K_drop_05_7_Ma32_LR2 , kelec_K_drop_05_7_LR_1_Ma32_LR_2 ,
                        kelec_Gpu_6_LR1_Ma32_LR_2F ,
                        kelec_Gpu_6_LR_1_step3more_Ma32_LR_2, esb_1030_kelGpu_Gputgr8,
                        kelec_Gpu_6_Ma32 , kelec_K_drop_05_7, esb_1030_many10s, kelec_Gpu_6_58,  kelec_Gpu_6_LR_1_step3more_Ma32F ,
                        kelec_D_30, kelec_D_drop05_20, deberta_Gpu_20F, kelec_Gpu_drop05_7, kelec_Gpu_20F,
                        kelec_Gpu_itgr_LR2F, kelec_K_15, kelec_Gpu_itgr_LR1, kelec_Gpu_itgr_drop05_9_layer]

name = 'esb_1103_GB30_15'
df_model = excute_ensemble(esb_1103_GB30_15_list)
save_jsonl(df_model, name, path)

esb_1103_GB30_15 = pd.DataFrame(jsonlload(path + "/" + name + ".jsonl"))

### OB33_6
* 총 모델 : 32개
    * 단일 모델 : 16개
    * 앙상블 모델 : 16개

In [None]:
# 단일 모델 16개
kelec_K_drop_05_7_Ma32_LR2
kelec_Gpu_6_LR1_Ma32_LR_2F
roberta_Gpu_16
kelec_Gpu_6_LR_1_step3more_Ma32_LR_2
kelec_Gpu_6_LR2
kelec_K_drop_05_7
kelec_Gpu_6_58
deberta_Gpu_8    = pd.DataFrame(jsonlload("/content/Korean_ABSA/jsonl_files/deberta_Gpu_8.jsonl"))
roberta_Gpu_9    = pd.DataFrame(jsonlload("/content/Korean_ABSA/jsonl_files/roberta_Gpu_9.jsonl"))
kelec_D_30
kelec_D_drop05_20
kelec_Gpu_drop05_7
kelec_Gpu_20F
kelec_Gpu_itgr_LR2F
kelec_K_15
kelec_Gpu_itgr_drop05_9_layer

Unnamed: 0,id,sentence_form,annotation
0,nikluge-sa-2022-test-00001,하나 사려고 알아보는 중인데 맘에드는거 발견,"[[제품 전체#일반, positive]]"
1,nikluge-sa-2022-test-00002,동양인 피부톤과 잘 어울리고 우아한 분위기를 풍긴다네?,"[[제품 전체#디자인, positive]]"
2,nikluge-sa-2022-test-00003,근데 이건 마르살라보다 더 지나친 색 같은데..,"[[본품#일반, negative]]"
3,nikluge-sa-2022-test-00004,나스 색조가 다 그렇지만서도 어데이셔스 라인은 진짜 색 기막히게 뽑는것 같다,[]
4,nikluge-sa-2022-test-00005,색상만 보면 이걸 어떻게 발라.. 싶겠지만 의외로 너무너무 괜찮다,"[[본품#일반, positive]]"
...,...,...,...
2122,nikluge-sa-2022-test-02123,간단한 충전으로 간편한 사용이 가능한거죠.,"[[본품#편의성, positive]]"
2123,nikluge-sa-2022-test-02124,"눈을 가린 상태에서도 간편하게 조작이 가능하구요,","[[본품#편의성, positive]]"
2124,nikluge-sa-2022-test-02125,다양한 마사지로 관자놀이부터 눈주변까지 부드럽고 강력한 마사지가 진행됩니다.,"[[본품#품질, positive]]"
2125,nikluge-sa-2022-test-02126,"본체부터 케이블, 설명서까지 깔끔하게 정리되어 보관이 가능하니 더더 맘에 쏙 들어요.","[[패키지/구성품#일반, positive]]"


In [None]:
# 앙상블 모델 16개
esb_1103_GB30_15
esb_1103_GB30_2
esb_1103_GB30_3
esb_1103_YD32_16
esb_1103_YD32_23
esb_1103_hh30_8
esb_1103_hh30_14
esb_1103_hh30_5
esb_1103_maxD3_2
esb_1103_desire36_18
esb_1102_max27_6
esb_1102_max27_13
esb_1101_max23_2
esb_1101_max23_3
esb_1030_kelGpu_Gputgr8
esb_1030_many10s

Unnamed: 0,id,sentence_form,annotation
0,nikluge-sa-2022-esb_1030_many10s-00001,하나 사려고 알아보는 중인데 맘에드는거 발견,"[[제품 전체#일반, positive]]"
1,nikluge-sa-2022-esb_1030_many10s-00002,동양인 피부톤과 잘 어울리고 우아한 분위기를 풍긴다네?,"[[제품 전체#디자인, positive], [본품#품질, positive]]"
2,nikluge-sa-2022-esb_1030_many10s-00003,근데 이건 마르살라보다 더 지나친 색 같은데..,"[[본품#일반, negative]]"
3,nikluge-sa-2022-esb_1030_many10s-00004,나스 색조가 다 그렇지만서도 어데이셔스 라인은 진짜 색 기막히게 뽑는것 같다,"[[본품#일반, positive]]"
4,nikluge-sa-2022-esb_1030_many10s-00005,색상만 보면 이걸 어떻게 발라.. 싶겠지만 의외로 너무너무 괜찮다,"[[본품#일반, positive]]"
...,...,...,...
2122,nikluge-sa-2022-esb_1030_many10s-02123,간단한 충전으로 간편한 사용이 가능한거죠.,"[[본품#편의성, positive]]"
2123,nikluge-sa-2022-esb_1030_many10s-02124,"눈을 가린 상태에서도 간편하게 조작이 가능하구요,","[[본품#편의성, positive]]"
2124,nikluge-sa-2022-esb_1030_many10s-02125,다양한 마사지로 관자놀이부터 눈주변까지 부드럽고 강력한 마사지가 진행됩니다.,"[[본품#품질, positive]]"
2125,nikluge-sa-2022-esb_1030_many10s-02126,"본체부터 케이블, 설명서까지 깔끔하게 정리되어 보관이 가능하니 더더 맘에 쏙 들어요.","[[패키지/구성품#일반, positive]]"


In [None]:
esb_1106_OB33_6_list = [esb_1103_GB30_15, esb_1103_GB30_2, esb_1103_GB30_3, esb_1103_YD32_16, esb_1103_YD32_23,
                        esb_1103_hh30_8,  esb_1103_hh30_5, esb_1103_maxD3_2, esb_1103_desire36_18,
                        esb_1102_max27_6, esb_1102_max27_13, esb_1101_max23_2, esb_1101_max23_3, 
                        kelec_K_drop_05_7_Ma32_LR2 , kelec_Gpu_6_LR1_Ma32_LR_2F , roberta_Gpu_16,
                        kelec_Gpu_6_LR_1_step3more_Ma32_LR_2, kelec_Gpu_6_LR2, esb_1030_kelGpu_Gputgr8,
                        kelec_K_drop_05_7, esb_1030_many10s, kelec_Gpu_6_58,  deberta_Gpu_8, roberta_Gpu_9,
                        kelec_D_30, kelec_D_drop05_20, kelec_Gpu_drop05_7, kelec_Gpu_20F,
                        kelec_Gpu_itgr_LR2F, kelec_K_15, kelec_Gpu_itgr_drop05_9_layer]

                   
name = 'esb_1106_OB33_6'
df_model = excute_ensemble(esb_1106_OB33_6_list)
save_jsonl(df_model, name, path)

esb_1106_OB33_6 = pd.DataFrame(jsonlload(path + "/" + name + ".jsonl"))

### X_MAS 28_12
* 총 모델 : 27개
    * 단일 모델 : 16개
    * 앙상블 모델 : 11개

In [None]:
# 단일 모델 16개
kelec_K_drop_05_7_Ma32_LR2
kelec_Gpu_6_LR1_Ma32_LR_2F 
roberta_Gpu_16
kelec_Gpu_6_LR_1_step3more_Ma32_LR_2
kelec_Gpu_6_LR2
kelec_K_drop_05_7
kelec_Gpu_6_58
deberta_Gpu_8
roberta_Gpu_9
kelec_D_30
kelec_D_drop05_20
kelec_Gpu_drop05_7
kelec_Gpu_20F
kelec_Gpu_itgr_LR2F
kelec_K_15
kelec_Gpu_itgr_drop05_9_layer

Unnamed: 0,id,sentence_form,annotation
0,nikluge-sa-2022-test-00001,하나 사려고 알아보는 중인데 맘에드는거 발견,"[[제품 전체#일반, positive]]"
1,nikluge-sa-2022-test-00002,동양인 피부톤과 잘 어울리고 우아한 분위기를 풍긴다네?,"[[제품 전체#디자인, positive]]"
2,nikluge-sa-2022-test-00003,근데 이건 마르살라보다 더 지나친 색 같은데..,"[[본품#일반, negative]]"
3,nikluge-sa-2022-test-00004,나스 색조가 다 그렇지만서도 어데이셔스 라인은 진짜 색 기막히게 뽑는것 같다,[]
4,nikluge-sa-2022-test-00005,색상만 보면 이걸 어떻게 발라.. 싶겠지만 의외로 너무너무 괜찮다,"[[본품#일반, positive]]"
...,...,...,...
2122,nikluge-sa-2022-test-02123,간단한 충전으로 간편한 사용이 가능한거죠.,"[[본품#편의성, positive]]"
2123,nikluge-sa-2022-test-02124,"눈을 가린 상태에서도 간편하게 조작이 가능하구요,","[[본품#편의성, positive]]"
2124,nikluge-sa-2022-test-02125,다양한 마사지로 관자놀이부터 눈주변까지 부드럽고 강력한 마사지가 진행됩니다.,"[[본품#품질, positive]]"
2125,nikluge-sa-2022-test-02126,"본체부터 케이블, 설명서까지 깔끔하게 정리되어 보관이 가능하니 더더 맘에 쏙 들어요.","[[패키지/구성품#일반, positive]]"


In [None]:
# 앙상블 모델 11개
esb_1103_hh30_8
esb_1103_hh30_14
esb_1103_hh30_5
esb_1103_maxD3_2
esb_1103_desire36_18
esb_1102_max27_6
esb_1102_max27_13
esb_1101_max23_2
esb_1101_max23_3
esb_1030_kelGpu_Gputgr8
esb_1030_many10s


Unnamed: 0,id,sentence_form,annotation
0,nikluge-sa-2022-esb_1030_many10s-00001,하나 사려고 알아보는 중인데 맘에드는거 발견,"[[제품 전체#일반, positive]]"
1,nikluge-sa-2022-esb_1030_many10s-00002,동양인 피부톤과 잘 어울리고 우아한 분위기를 풍긴다네?,"[[제품 전체#디자인, positive], [본품#품질, positive]]"
2,nikluge-sa-2022-esb_1030_many10s-00003,근데 이건 마르살라보다 더 지나친 색 같은데..,"[[본품#일반, negative]]"
3,nikluge-sa-2022-esb_1030_many10s-00004,나스 색조가 다 그렇지만서도 어데이셔스 라인은 진짜 색 기막히게 뽑는것 같다,"[[본품#일반, positive]]"
4,nikluge-sa-2022-esb_1030_many10s-00005,색상만 보면 이걸 어떻게 발라.. 싶겠지만 의외로 너무너무 괜찮다,"[[본품#일반, positive]]"
...,...,...,...
2122,nikluge-sa-2022-esb_1030_many10s-02123,간단한 충전으로 간편한 사용이 가능한거죠.,"[[본품#편의성, positive]]"
2123,nikluge-sa-2022-esb_1030_many10s-02124,"눈을 가린 상태에서도 간편하게 조작이 가능하구요,","[[본품#편의성, positive]]"
2124,nikluge-sa-2022-esb_1030_many10s-02125,다양한 마사지로 관자놀이부터 눈주변까지 부드럽고 강력한 마사지가 진행됩니다.,"[[본품#품질, positive]]"
2125,nikluge-sa-2022-esb_1030_many10s-02126,"본체부터 케이블, 설명서까지 깔끔하게 정리되어 보관이 가능하니 더더 맘에 쏙 들어요.","[[패키지/구성품#일반, positive]]"


In [None]:
esb_1105_X_MAS28_12_list = [esb_1103_hh30_8, esb_1103_hh30_14, esb_1103_hh30_5, esb_1103_maxD3_2, esb_1103_desire36_18,
                            esb_1102_max27_6, esb_1102_max27_13, esb_1101_max23_2, esb_1101_max23_3, 
                            kelec_K_drop_05_7_Ma32_LR2 , kelec_Gpu_6_LR1_Ma32_LR_2F ,  roberta_Gpu_16,
                            kelec_Gpu_6_LR_1_step3more_Ma32_LR_2, kelec_Gpu_6_LR2, esb_1030_kelGpu_Gputgr8,
                            kelec_K_drop_05_7, esb_1030_many10s, kelec_Gpu_6_58,  deberta_Gpu_8, roberta_Gpu_9,
                            kelec_D_30, kelec_D_drop05_20, kelec_Gpu_drop05_7, kelec_Gpu_20F,
                            kelec_Gpu_itgr_LR2F, kelec_K_15, kelec_Gpu_itgr_drop05_9_layer]

name = 'esb_1105_X_MAS28_12'
df_model = excute_ensemble(esb_1105_X_MAS28_12_list)
save_jsonl(df_model, name, path)

esb_1105_X_MAS28_12 = pd.DataFrame(jsonlload(path + "/" + name + ".jsonl"))

### FULLSTACK 33_2
* 총 모델 : 32개
    * 단일 모델 : 19개
    * 앙상블 모델 : 13개

In [None]:
# 단일 모델 19개
kelec_K_drop_05_7_Ma32_LR2
kelec_K_drop_05_7_LR_1_Ma32_LR_2
kelec_Gpu_6_LR1_Ma32_LR_2F
kelec_Gpu_6_LR_1_step3more_Ma32_LR_2
kelec_Gpu_6_LR_1_step3more_Ma32F
kelec_Gpu_LR6_attemp2_LR3 
kelec_K_drop_05_7 
kelec_Gpu_6F_LR3
kelec_Gpu_6_LR2
kelec_Gpu_6_58,  
kelec_D_30
kelec_D_drop05_20
deberta_Gpu_20F
kelec_Gpu_drop05_7
kelec_Gpu_20F
kelec_Gpu_itgr_LR2F
kelec_K_15
kelec_Gpu_itgr_LR1
kelec_Gpu_itgr_drop05_9_layer

Unnamed: 0,id,sentence_form,annotation
0,nikluge-sa-2022-test-00001,하나 사려고 알아보는 중인데 맘에드는거 발견,"[[제품 전체#일반, positive]]"
1,nikluge-sa-2022-test-00002,동양인 피부톤과 잘 어울리고 우아한 분위기를 풍긴다네?,"[[제품 전체#디자인, positive]]"
2,nikluge-sa-2022-test-00003,근데 이건 마르살라보다 더 지나친 색 같은데..,"[[본품#일반, negative]]"
3,nikluge-sa-2022-test-00004,나스 색조가 다 그렇지만서도 어데이셔스 라인은 진짜 색 기막히게 뽑는것 같다,[]
4,nikluge-sa-2022-test-00005,색상만 보면 이걸 어떻게 발라.. 싶겠지만 의외로 너무너무 괜찮다,"[[본품#일반, positive]]"
...,...,...,...
2122,nikluge-sa-2022-test-02123,간단한 충전으로 간편한 사용이 가능한거죠.,"[[본품#편의성, positive]]"
2123,nikluge-sa-2022-test-02124,"눈을 가린 상태에서도 간편하게 조작이 가능하구요,","[[본품#편의성, positive]]"
2124,nikluge-sa-2022-test-02125,다양한 마사지로 관자놀이부터 눈주변까지 부드럽고 강력한 마사지가 진행됩니다.,"[[본품#품질, positive]]"
2125,nikluge-sa-2022-test-02126,"본체부터 케이블, 설명서까지 깔끔하게 정리되어 보관이 가능하니 더더 맘에 쏙 들어요.","[[패키지/구성품#일반, positive]]"


In [None]:
# 앙상블 모델 13개
esb_1103_GB30_15
esb_1103_YD32_16
esb_1103_YD32_23
esb_1105_X_MAS28_12
kup_kdr_gpu_deberta_forcing = pd.DataFrame(jsonlload("/content/Korean_ABSA/jsonl_files/kup_kdr_gpu_deberta_forcing.jsonl"))
esb_1103_hh30_8
esb_1103_hh30_14
esb_1103_maxD3_2
esb_1103_desire36_18
esb_1102_max27_6
esb_1101_max23_2
esb_1030_kelGpu_Gputgr8
esb_1030_many10s

Unnamed: 0,id,sentence_form,annotation
0,nikluge-sa-2022-esb_1030_many10s-00001,하나 사려고 알아보는 중인데 맘에드는거 발견,"[[제품 전체#일반, positive]]"
1,nikluge-sa-2022-esb_1030_many10s-00002,동양인 피부톤과 잘 어울리고 우아한 분위기를 풍긴다네?,"[[제품 전체#디자인, positive], [본품#품질, positive]]"
2,nikluge-sa-2022-esb_1030_many10s-00003,근데 이건 마르살라보다 더 지나친 색 같은데..,"[[본품#일반, negative]]"
3,nikluge-sa-2022-esb_1030_many10s-00004,나스 색조가 다 그렇지만서도 어데이셔스 라인은 진짜 색 기막히게 뽑는것 같다,"[[본품#일반, positive]]"
4,nikluge-sa-2022-esb_1030_many10s-00005,색상만 보면 이걸 어떻게 발라.. 싶겠지만 의외로 너무너무 괜찮다,"[[본품#일반, positive]]"
...,...,...,...
2122,nikluge-sa-2022-esb_1030_many10s-02123,간단한 충전으로 간편한 사용이 가능한거죠.,"[[본품#편의성, positive]]"
2123,nikluge-sa-2022-esb_1030_many10s-02124,"눈을 가린 상태에서도 간편하게 조작이 가능하구요,","[[본품#편의성, positive]]"
2124,nikluge-sa-2022-esb_1030_many10s-02125,다양한 마사지로 관자놀이부터 눈주변까지 부드럽고 강력한 마사지가 진행됩니다.,"[[본품#품질, positive]]"
2125,nikluge-sa-2022-esb_1030_many10s-02126,"본체부터 케이블, 설명서까지 깔끔하게 정리되어 보관이 가능하니 더더 맘에 쏙 들어요.","[[패키지/구성품#일반, positive]]"


In [None]:
esb_1106_FULLSTACK33_2_list = [esb_1103_GB30_15 ,  esb_1103_YD32_16, esb_1103_YD32_23,
                                esb_1105_X_MAS28_12, kup_kdr_gpu_deberta_forcing,
                                esb_1103_hh30_8, esb_1103_hh30_14, esb_1103_maxD3_2, esb_1103_desire36_18,
                                esb_1102_max27_6, esb_1101_max23_2,   
                                kelec_K_drop_05_7_Ma32_LR2 , kelec_K_drop_05_7_LR_1_Ma32_LR_2, kelec_Gpu_6_LR1_Ma32_LR_2F ,
                                kelec_Gpu_6_LR_1_step3more_Ma32_LR_2 , kelec_Gpu_6_LR_1_step3more_Ma32F ,
                                kelec_Gpu_LR6_attemp2_LR3 , kelec_K_drop_05_7  ,
                                kelec_Gpu_6F_LR3, kelec_Gpu_6_LR2, esb_1030_kelGpu_Gputgr8,
                                esb_1030_many10s, kelec_Gpu_6_58,  
                                kelec_D_30, kelec_D_drop05_20, 
                                deberta_Gpu_20F, kelec_Gpu_drop05_7, kelec_Gpu_20F,
                                kelec_Gpu_itgr_LR2F, kelec_K_15, kelec_Gpu_itgr_LR1, kelec_Gpu_itgr_drop05_9_layer]

name = 'esb_1106_FULLSTACK33_2'
df_model = excute_ensemble(esb_1106_FULLSTACK33_2_list)
save_jsonl(df_model, name, path)

esb_1106_FULLSTACK33_2 = pd.DataFrame(jsonlload(path + "/" + name + ".jsonl"))

### NUCLEAR 30_4
* 총 사용 모델 : 29개
    * 단일 모델 : 18개
    * 앙상블 모델 : 11개

In [None]:
# 단일 모델 18개
kelec_Gpu_6F_LR1
kelec_K_drop_05_7_Ma32_LR2
kelec_K_drop_05_7_LR_1_Ma32_LR_2
kelec_Gpu_6_LR1_Ma32_LR_2F
kelec_Gpu_6_LR_1_step3more_Ma32_LR_2
kelec_Gpu_6_Ma32
kelec_K_drop_05_7
kelec_Gpu_6_58
kelec_Gpu_6_LR_1_step3more_Ma32F
kelec_D_30
kelec_D_drop05_20
deberta_Gpu_20F
kelec_Gpu_drop05_7
kelec_Gpu_20F
kelec_Gpu_itgr_LR2F
kelec_K_15
kelec_Gpu_itgr_LR1
kelec_Gpu_itgr_drop05_9_layer

Unnamed: 0,id,sentence_form,annotation
0,nikluge-sa-2022-test-00001,하나 사려고 알아보는 중인데 맘에드는거 발견,"[[제품 전체#일반, positive]]"
1,nikluge-sa-2022-test-00002,동양인 피부톤과 잘 어울리고 우아한 분위기를 풍긴다네?,"[[제품 전체#디자인, positive]]"
2,nikluge-sa-2022-test-00003,근데 이건 마르살라보다 더 지나친 색 같은데..,"[[본품#일반, negative]]"
3,nikluge-sa-2022-test-00004,나스 색조가 다 그렇지만서도 어데이셔스 라인은 진짜 색 기막히게 뽑는것 같다,[]
4,nikluge-sa-2022-test-00005,색상만 보면 이걸 어떻게 발라.. 싶겠지만 의외로 너무너무 괜찮다,"[[본품#일반, positive]]"
...,...,...,...
2122,nikluge-sa-2022-test-02123,간단한 충전으로 간편한 사용이 가능한거죠.,"[[본품#편의성, positive]]"
2123,nikluge-sa-2022-test-02124,"눈을 가린 상태에서도 간편하게 조작이 가능하구요,","[[본품#편의성, positive]]"
2124,nikluge-sa-2022-test-02125,다양한 마사지로 관자놀이부터 눈주변까지 부드럽고 강력한 마사지가 진행됩니다.,"[[본품#품질, positive]]"
2125,nikluge-sa-2022-test-02126,"본체부터 케이블, 설명서까지 깔끔하게 정리되어 보관이 가능하니 더더 맘에 쏙 들어요.","[[패키지/구성품#일반, positive]]"


In [None]:
# 앙상블 모델 11개 
esb_1103_GB30_15
esb_1103_GB30_2
esb_1103_YD32_16
esb_1103_hh30_8
esb_1103_hh30_14
esb_1103_maxD3_2
esb_1103_desire36_18
esb_1102_max27_6
esb_1101_max23_2 
esb_1030_kelGpu_Gputgr8
esb_1030_many10s

Unnamed: 0,id,sentence_form,annotation
0,nikluge-sa-2022-esb_1030_many10s-00001,하나 사려고 알아보는 중인데 맘에드는거 발견,"[[제품 전체#일반, positive]]"
1,nikluge-sa-2022-esb_1030_many10s-00002,동양인 피부톤과 잘 어울리고 우아한 분위기를 풍긴다네?,"[[제품 전체#디자인, positive], [본품#품질, positive]]"
2,nikluge-sa-2022-esb_1030_many10s-00003,근데 이건 마르살라보다 더 지나친 색 같은데..,"[[본품#일반, negative]]"
3,nikluge-sa-2022-esb_1030_many10s-00004,나스 색조가 다 그렇지만서도 어데이셔스 라인은 진짜 색 기막히게 뽑는것 같다,"[[본품#일반, positive]]"
4,nikluge-sa-2022-esb_1030_many10s-00005,색상만 보면 이걸 어떻게 발라.. 싶겠지만 의외로 너무너무 괜찮다,"[[본품#일반, positive]]"
...,...,...,...
2122,nikluge-sa-2022-esb_1030_many10s-02123,간단한 충전으로 간편한 사용이 가능한거죠.,"[[본품#편의성, positive]]"
2123,nikluge-sa-2022-esb_1030_many10s-02124,"눈을 가린 상태에서도 간편하게 조작이 가능하구요,","[[본품#편의성, positive]]"
2124,nikluge-sa-2022-esb_1030_many10s-02125,다양한 마사지로 관자놀이부터 눈주변까지 부드럽고 강력한 마사지가 진행됩니다.,"[[본품#품질, positive]]"
2125,nikluge-sa-2022-esb_1030_many10s-02126,"본체부터 케이블, 설명서까지 깔끔하게 정리되어 보관이 가능하니 더더 맘에 쏙 들어요.","[[패키지/구성품#일반, positive]]"


In [None]:
esb_1106_NUCLEAR30_4_list = [esb_1103_GB30_15, esb_1103_GB30_2, esb_1103_YD32_16, 
                            esb_1103_hh30_8, esb_1103_hh30_14,  esb_1103_maxD3_2, esb_1103_desire36_18,
                            esb_1102_max27_6, esb_1101_max23_2, 
                            kelec_Gpu_6F_LR1, kelec_K_drop_05_7_Ma32_LR2 , kelec_K_drop_05_7_LR_1_Ma32_LR_2 ,
                            kelec_Gpu_6_LR1_Ma32_LR_2F ,
                            kelec_Gpu_6_LR_1_step3more_Ma32_LR_2, esb_1030_kelGpu_Gputgr8,
                            kelec_Gpu_6_Ma32 , kelec_K_drop_05_7, esb_1030_many10s, kelec_Gpu_6_58,  kelec_Gpu_6_LR_1_step3more_Ma32F ,
                            kelec_D_30, kelec_D_drop05_20, deberta_Gpu_20F, kelec_Gpu_drop05_7, kelec_Gpu_20F,
                            kelec_Gpu_itgr_LR2F, kelec_K_15, kelec_Gpu_itgr_LR1, kelec_Gpu_itgr_drop05_9_layer]

name = 'esb_1106_NUCLEAR30_4'
df_model = excute_ensemble(esb_1106_NUCLEAR30_4_list)
save_jsonl(df_model, name, path)

esb_1106_NUCLEAR30_4 = pd.DataFrame(jsonlload(path + "/" + name + ".jsonl"))

### terarosa 39_2
* 총 사용 모델 : 38개
    * 단일 모델 : 21개
    * 앙상블 모델 : 17개

In [None]:
# 단일 모델 21개
kelec_refind_8_Ma32
kelec_refind_7_Ma32
kelec_refind_6_Ma32
kelec_Gpu_6F_LR1
kelec_K_drop_05_7_Ma32_LR2
kelec_K_drop_05_7_LR_1_Ma32_LR_2
kelec_Gpu_6_LR1_Ma32_LR_2F
kelec_Gpu_6_LR_1_step3more_Ma32_LR_2
kelec_Gpu_6_Ma32
kelec_K_drop_05_7
kelec_Gpu_6_58
kelec_Gpu_6_LR_1_step3more_Ma32F
kelec_D_30
kelec_D_drop05_20
deberta_Gpu_20F
kelec_Gpu_drop05_7
kelec_Gpu_20F
kelec_Gpu_itgr_LR2F
kelec_K_15
kelec_Gpu_itgr_LR1
kelec_Gpu_itgr_drop05_9_layer

Unnamed: 0,id,sentence_form,annotation
0,nikluge-sa-2022-test-00001,하나 사려고 알아보는 중인데 맘에드는거 발견,"[[제품 전체#일반, positive]]"
1,nikluge-sa-2022-test-00002,동양인 피부톤과 잘 어울리고 우아한 분위기를 풍긴다네?,"[[제품 전체#디자인, positive]]"
2,nikluge-sa-2022-test-00003,근데 이건 마르살라보다 더 지나친 색 같은데..,"[[본품#일반, negative]]"
3,nikluge-sa-2022-test-00004,나스 색조가 다 그렇지만서도 어데이셔스 라인은 진짜 색 기막히게 뽑는것 같다,[]
4,nikluge-sa-2022-test-00005,색상만 보면 이걸 어떻게 발라.. 싶겠지만 의외로 너무너무 괜찮다,"[[본품#일반, positive]]"
...,...,...,...
2122,nikluge-sa-2022-test-02123,간단한 충전으로 간편한 사용이 가능한거죠.,"[[본품#편의성, positive]]"
2123,nikluge-sa-2022-test-02124,"눈을 가린 상태에서도 간편하게 조작이 가능하구요,","[[본품#편의성, positive]]"
2124,nikluge-sa-2022-test-02125,다양한 마사지로 관자놀이부터 눈주변까지 부드럽고 강력한 마사지가 진행됩니다.,"[[본품#품질, positive]]"
2125,nikluge-sa-2022-test-02126,"본체부터 케이블, 설명서까지 깔끔하게 정리되어 보관이 가능하니 더더 맘에 쏙 들어요.","[[패키지/구성품#일반, positive]]"


In [None]:
# 앙상블 모델 17개 
esb_1106_NUCLEAR30_4
esb_1106_FULLSTACK33_2
esb_1106_OB33_6
gpu_kdr_kup8_deberta_forcing
kup8_gpu_kdr_deberta_forcing
Original_Kup8_Gpu_Kdr_Deberta_Forcing
esb_1107_refine4
esb_1103_GB30_15
esb_1103_GB30_2
esb_1103_YD32_16
esb_1103_hh30_8
esb_1103_maxD3_2
esb_1103_desire36_18
esb_1102_max27_6
esb_1101_max23_2
esb_1030_kelGpu_Gputgr8
esb_1030_many10s

Unnamed: 0,id,sentence_form,annotation
0,nikluge-sa-2022-esb_1030_many10s-00001,하나 사려고 알아보는 중인데 맘에드는거 발견,"[[제품 전체#일반, positive]]"
1,nikluge-sa-2022-esb_1030_many10s-00002,동양인 피부톤과 잘 어울리고 우아한 분위기를 풍긴다네?,"[[제품 전체#디자인, positive], [본품#품질, positive]]"
2,nikluge-sa-2022-esb_1030_many10s-00003,근데 이건 마르살라보다 더 지나친 색 같은데..,"[[본품#일반, negative]]"
3,nikluge-sa-2022-esb_1030_many10s-00004,나스 색조가 다 그렇지만서도 어데이셔스 라인은 진짜 색 기막히게 뽑는것 같다,"[[본품#일반, positive]]"
4,nikluge-sa-2022-esb_1030_many10s-00005,색상만 보면 이걸 어떻게 발라.. 싶겠지만 의외로 너무너무 괜찮다,"[[본품#일반, positive]]"
...,...,...,...
2122,nikluge-sa-2022-esb_1030_many10s-02123,간단한 충전으로 간편한 사용이 가능한거죠.,"[[본품#편의성, positive]]"
2123,nikluge-sa-2022-esb_1030_many10s-02124,"눈을 가린 상태에서도 간편하게 조작이 가능하구요,","[[본품#편의성, positive]]"
2124,nikluge-sa-2022-esb_1030_many10s-02125,다양한 마사지로 관자놀이부터 눈주변까지 부드럽고 강력한 마사지가 진행됩니다.,"[[본품#품질, positive]]"
2125,nikluge-sa-2022-esb_1030_many10s-02126,"본체부터 케이블, 설명서까지 깔끔하게 정리되어 보관이 가능하니 더더 맘에 쏙 들어요.","[[패키지/구성품#일반, positive]]"


In [None]:
esb_1107_terarosa39_2_list = [esb_1106_NUCLEAR30_4,  esb_1106_FULLSTACK33_2, esb_1106_OB33_6,
                            gpu_kdr_kup8_deberta_forcing, kup8_gpu_kdr_deberta_forcing, Original_Kup8_Gpu_Kdr_Deberta_Forcing,
                            esb_1107_refine4, kelec_refind_8_Ma32, kelec_refind_7_Ma32, kelec_refind_6_Ma32,
                            esb_1103_GB30_15, esb_1103_GB30_2, esb_1103_YD32_16, 
                            esb_1103_hh30_8,  esb_1103_maxD3_2, esb_1103_desire36_18,
                            esb_1102_max27_6, esb_1101_max23_2, 
                            kelec_Gpu_6F_LR1, kelec_K_drop_05_7_Ma32_LR2 , kelec_K_drop_05_7_LR_1_Ma32_LR_2 ,
                            kelec_Gpu_6_LR1_Ma32_LR_2F ,
                            kelec_Gpu_6_LR_1_step3more_Ma32_LR_2, esb_1030_kelGpu_Gputgr8,
                            kelec_Gpu_6_Ma32 , kelec_K_drop_05_7, esb_1030_many10s, kelec_Gpu_6_58,  kelec_Gpu_6_LR_1_step3more_Ma32F ,
                            kelec_D_30, kelec_D_drop05_20, deberta_Gpu_20F, kelec_Gpu_drop05_7, kelec_Gpu_20F,
                            kelec_Gpu_itgr_LR2F, kelec_K_15, kelec_Gpu_itgr_LR1, kelec_Gpu_itgr_drop05_9_layer]

name = 'esb_1107_terarosa39_2'
df_model = excute_ensemble(esb_1107_terarosa39_2_list)
save_jsonl(df_model, name, path)

esb_1107_terarosa39_2 = pd.DataFrame(jsonlload(path + "/" + name + ".jsonl"))

### icon 36_11
* 총 사용 모델 : 35개
    * 단일 모델 20개
    * 앙상블 모델 15개 

In [None]:
# 단일 모델 20개
kelec_refind_8_Ma32
kelec_refind_7_Ma32
kelec_Gpu_6F_LR1
kelec_K_drop_05_7_Ma32_LR2
kelec_K_drop_05_7_LR_1_Ma32_LR_2
kelec_Gpu_6_LR1_Ma32_LR_2F
kelec_Gpu_6_LR_1_step3more_Ma32_LR_2
kelec_Gpu_6_Ma32
kelec_K_drop_05_7
kelec_Gpu_6_58
kelec_Gpu_6_LR_1_step3more_Ma32F
kelec_D_30
kelec_D_drop05_20
deberta_Gpu_20F
kelec_Gpu_drop05_7
kelec_Gpu_20F
kelec_Gpu_itgr_LR2F
kelec_K_15
kelec_Gpu_itgr_LR1
kelec_Gpu_itgr_drop05_9_layer

Unnamed: 0,id,sentence_form,annotation
0,nikluge-sa-2022-test-00001,하나 사려고 알아보는 중인데 맘에드는거 발견,"[[제품 전체#일반, positive]]"
1,nikluge-sa-2022-test-00002,동양인 피부톤과 잘 어울리고 우아한 분위기를 풍긴다네?,"[[제품 전체#디자인, positive]]"
2,nikluge-sa-2022-test-00003,근데 이건 마르살라보다 더 지나친 색 같은데..,"[[본품#일반, negative]]"
3,nikluge-sa-2022-test-00004,나스 색조가 다 그렇지만서도 어데이셔스 라인은 진짜 색 기막히게 뽑는것 같다,[]
4,nikluge-sa-2022-test-00005,색상만 보면 이걸 어떻게 발라.. 싶겠지만 의외로 너무너무 괜찮다,"[[본품#일반, positive]]"
...,...,...,...
2122,nikluge-sa-2022-test-02123,간단한 충전으로 간편한 사용이 가능한거죠.,"[[본품#편의성, positive]]"
2123,nikluge-sa-2022-test-02124,"눈을 가린 상태에서도 간편하게 조작이 가능하구요,","[[본품#편의성, positive]]"
2124,nikluge-sa-2022-test-02125,다양한 마사지로 관자놀이부터 눈주변까지 부드럽고 강력한 마사지가 진행됩니다.,"[[본품#품질, positive]]"
2125,nikluge-sa-2022-test-02126,"본체부터 케이블, 설명서까지 깔끔하게 정리되어 보관이 가능하니 더더 맘에 쏙 들어요.","[[패키지/구성품#일반, positive]]"


In [None]:
# 앙상블 모델 15개 
esb_1106_NUCLEAR30_4
esb_1106_FULLSTACK33_2
esb_1106_OB33_6
esb_1107_stroke6
esb_1107_stroke4deb
Deberta_Kdr_GPU_Kup_Original_Forcing
Original_Kup8_Gpu_Kdr_Deberta_Forcing
esb_1107_refine4
esb_1103_YD32_16
esb_1103_hh30_8
esb_1103_maxD3_2
esb_1103_desire36_18
esb_1102_max27_6
esb_1030_kelGpu_Gputgr8
esb_1030_many10s

Unnamed: 0,id,sentence_form,annotation
0,nikluge-sa-2022-esb_1030_many10s-00001,하나 사려고 알아보는 중인데 맘에드는거 발견,"[[제품 전체#일반, positive]]"
1,nikluge-sa-2022-esb_1030_many10s-00002,동양인 피부톤과 잘 어울리고 우아한 분위기를 풍긴다네?,"[[제품 전체#디자인, positive], [본품#품질, positive]]"
2,nikluge-sa-2022-esb_1030_many10s-00003,근데 이건 마르살라보다 더 지나친 색 같은데..,"[[본품#일반, negative]]"
3,nikluge-sa-2022-esb_1030_many10s-00004,나스 색조가 다 그렇지만서도 어데이셔스 라인은 진짜 색 기막히게 뽑는것 같다,"[[본품#일반, positive]]"
4,nikluge-sa-2022-esb_1030_many10s-00005,색상만 보면 이걸 어떻게 발라.. 싶겠지만 의외로 너무너무 괜찮다,"[[본품#일반, positive]]"
...,...,...,...
2122,nikluge-sa-2022-esb_1030_many10s-02123,간단한 충전으로 간편한 사용이 가능한거죠.,"[[본품#편의성, positive]]"
2123,nikluge-sa-2022-esb_1030_many10s-02124,"눈을 가린 상태에서도 간편하게 조작이 가능하구요,","[[본품#편의성, positive]]"
2124,nikluge-sa-2022-esb_1030_many10s-02125,다양한 마사지로 관자놀이부터 눈주변까지 부드럽고 강력한 마사지가 진행됩니다.,"[[본품#품질, positive]]"
2125,nikluge-sa-2022-esb_1030_many10s-02126,"본체부터 케이블, 설명서까지 깔끔하게 정리되어 보관이 가능하니 더더 맘에 쏙 들어요.","[[패키지/구성품#일반, positive]]"


In [None]:
esb_1107_icon36_11_list = [esb_1106_NUCLEAR30_4, esb_1106_FULLSTACK33_2, esb_1106_OB33_6,       
                            esb_1107_stroke6, esb_1107_stroke4deb, Deberta_Kdr_GPU_Kup_Original_Forcing, 
                            Original_Kup8_Gpu_Kdr_Deberta_Forcing, 
                            esb_1107_refine4, kelec_refind_8_Ma32, kelec_refind_7_Ma32,
                            esb_1103_YD32_16, 
                            esb_1103_hh30_8,  esb_1103_maxD3_2, esb_1103_desire36_18,
                            esb_1102_max27_6,  
                            kelec_Gpu_6F_LR1, kelec_K_drop_05_7_Ma32_LR2 , kelec_K_drop_05_7_LR_1_Ma32_LR_2 ,
                            kelec_Gpu_6_LR1_Ma32_LR_2F ,
                            kelec_Gpu_6_LR_1_step3more_Ma32_LR_2, esb_1030_kelGpu_Gputgr8,
                            kelec_Gpu_6_Ma32 , kelec_K_drop_05_7, esb_1030_many10s, kelec_Gpu_6_58,  kelec_Gpu_6_LR_1_step3more_Ma32F ,
                            kelec_D_30, kelec_D_drop05_20, deberta_Gpu_20F, kelec_Gpu_drop05_7, kelec_Gpu_20F,
                            kelec_Gpu_itgr_LR2F, kelec_K_15, kelec_Gpu_itgr_LR1, kelec_Gpu_itgr_drop05_9_layer]

name = 'esb_1107_icon36_11'
df_model = excute_ensemble(esb_1107_icon36_11_list)
save_jsonl(df_model, name, path)

esb_1107_icon36_11 = pd.DataFrame(jsonlload(path + "/" + name + ".jsonl"))

### joy of victory 45_8
* 총 사용 모델 44개
    * 단일 모델 25개
    * 앙상블 모델 17개 (2개 모델 중복 사용 : 총 19개)

In [None]:
# 단일 모델 25개
GpuLR_175
Original_175
Original_8_Ma32   = pd.DataFrame(jsonlload("/content/Korean_ABSA/jsonl_files/Original_8_Ma32.jsonl"))
Original_9_Ma32   = pd.DataFrame(jsonlload("/content/Korean_ABSA/jsonl_files/Original_9_Ma32.jsonl"))
Original_10_Ma32  = pd.DataFrame(jsonlload("/content/Korean_ABSA/jsonl_files/Original_10_Ma32.jsonl"))
kelec_refind_7_Ma32
kelec_refind_8_Ma32
kelec_Gpu_6F_LR1
kelec_K_drop_05_7_Ma32_LR2
kelec_K_drop_05_7_LR_1_Ma32_LR_2
kelec_Gpu_6_LR1_Ma32_LR_2F
kelec_Gpu_6_LR_1_step3more_Ma32_LR_2
kelec_Gpu_6_Ma32
kelec_K_drop_05_7
kelec_Gpu_6_58
kelec_Gpu_6_LR_1_step3more_Ma32F
kelec_D_30
kelec_D_drop05_20
deberta_Gpu_20F
kelec_Gpu_drop05_7
kelec_Gpu_20F
kelec_Gpu_itgr_LR2F
kelec_K_15
kelec_Gpu_itgr_LR1
kelec_Gpu_itgr_drop05_9_layer

Unnamed: 0,id,sentence_form,annotation
0,nikluge-sa-2022-test-00001,하나 사려고 알아보는 중인데 맘에드는거 발견,"[[제품 전체#일반, positive]]"
1,nikluge-sa-2022-test-00002,동양인 피부톤과 잘 어울리고 우아한 분위기를 풍긴다네?,"[[제품 전체#디자인, positive]]"
2,nikluge-sa-2022-test-00003,근데 이건 마르살라보다 더 지나친 색 같은데..,"[[본품#일반, negative]]"
3,nikluge-sa-2022-test-00004,나스 색조가 다 그렇지만서도 어데이셔스 라인은 진짜 색 기막히게 뽑는것 같다,[]
4,nikluge-sa-2022-test-00005,색상만 보면 이걸 어떻게 발라.. 싶겠지만 의외로 너무너무 괜찮다,"[[본품#일반, positive]]"
...,...,...,...
2122,nikluge-sa-2022-test-02123,간단한 충전으로 간편한 사용이 가능한거죠.,"[[본품#편의성, positive]]"
2123,nikluge-sa-2022-test-02124,"눈을 가린 상태에서도 간편하게 조작이 가능하구요,","[[본품#편의성, positive]]"
2124,nikluge-sa-2022-test-02125,다양한 마사지로 관자놀이부터 눈주변까지 부드럽고 강력한 마사지가 진행됩니다.,"[[본품#품질, positive]]"
2125,nikluge-sa-2022-test-02126,"본체부터 케이블, 설명서까지 깔끔하게 정리되어 보관이 가능하니 더더 맘에 쏙 들어요.","[[패키지/구성품#일반, positive]]"


In [None]:
# 앙상블 모델 17개 (2개 중복사용 : 총 19개 모델)
esb_1107_icon36_11
esb_1107_terarosa39_2
esb_175_4
esb_1106_NUCLEAR30_4
esb_1106_FULLSTACK33_2
esb_1106_OB33_6
esb_1107_stroke6
esb_1107_stroke4deb
Deberta_Kdr_GPU_Kup_Original_Forcing
Original_Kup8_Gpu_Kdr_Deberta_Forcing
esb_1107_refine4
esb_1103_hh30_8
esb_1103_maxD3_2
esb_1103_desire36_18
esb_1102_max27_6
esb_1030_kelGpu_Gputgr8
esb_1030_many10s

Unnamed: 0,id,sentence_form,annotation
0,nikluge-sa-2022-esb_1030_many10s-00001,하나 사려고 알아보는 중인데 맘에드는거 발견,"[[제품 전체#일반, positive]]"
1,nikluge-sa-2022-esb_1030_many10s-00002,동양인 피부톤과 잘 어울리고 우아한 분위기를 풍긴다네?,"[[제품 전체#디자인, positive], [본품#품질, positive]]"
2,nikluge-sa-2022-esb_1030_many10s-00003,근데 이건 마르살라보다 더 지나친 색 같은데..,"[[본품#일반, negative]]"
3,nikluge-sa-2022-esb_1030_many10s-00004,나스 색조가 다 그렇지만서도 어데이셔스 라인은 진짜 색 기막히게 뽑는것 같다,"[[본품#일반, positive]]"
4,nikluge-sa-2022-esb_1030_many10s-00005,색상만 보면 이걸 어떻게 발라.. 싶겠지만 의외로 너무너무 괜찮다,"[[본품#일반, positive]]"
...,...,...,...
2122,nikluge-sa-2022-esb_1030_many10s-02123,간단한 충전으로 간편한 사용이 가능한거죠.,"[[본품#편의성, positive]]"
2123,nikluge-sa-2022-esb_1030_many10s-02124,"눈을 가린 상태에서도 간편하게 조작이 가능하구요,","[[본품#편의성, positive]]"
2124,nikluge-sa-2022-esb_1030_many10s-02125,다양한 마사지로 관자놀이부터 눈주변까지 부드럽고 강력한 마사지가 진행됩니다.,"[[본품#품질, positive]]"
2125,nikluge-sa-2022-esb_1030_many10s-02126,"본체부터 케이블, 설명서까지 깔끔하게 정리되어 보관이 가능하니 더더 맘에 쏙 들어요.","[[패키지/구성품#일반, positive]]"


In [None]:
esb_1108_joyofvictory45_8_list = [esb_1107_icon36_11, esb_1107_terarosa39_2, 
                                esb_175_4, GpuLR_175, Original_175,
                                esb_1106_NUCLEAR30_4,  esb_1106_FULLSTACK33_2, 
                                Original_8_Ma32, Original_9_Ma32, Original_10_Ma32,
                                esb_1106_NUCLEAR30_4, esb_1106_FULLSTACK33_2, esb_1106_OB33_6,
                                esb_1107_stroke6, esb_1107_stroke4deb, Deberta_Kdr_GPU_Kup_Original_Forcing, 
                                Original_Kup8_Gpu_Kdr_Deberta_Forcing, 
                                esb_1107_refine4, kelec_refind_8_Ma32, kelec_refind_7_Ma32,
                                esb_1103_hh30_8,  esb_1103_maxD3_2, esb_1103_desire36_18,
                                esb_1102_max27_6,  
                                kelec_Gpu_6F_LR1, kelec_K_drop_05_7_Ma32_LR2 , kelec_K_drop_05_7_LR_1_Ma32_LR_2 ,
                                kelec_Gpu_6_LR1_Ma32_LR_2F ,
                                kelec_Gpu_6_LR_1_step3more_Ma32_LR_2, esb_1030_kelGpu_Gputgr8,
                                kelec_Gpu_6_Ma32 , kelec_K_drop_05_7, esb_1030_many10s, kelec_Gpu_6_58,  kelec_Gpu_6_LR_1_step3more_Ma32F ,
                                kelec_D_30, kelec_D_drop05_20, deberta_Gpu_20F, kelec_Gpu_drop05_7, kelec_Gpu_20F,
                                kelec_Gpu_itgr_LR2F, kelec_K_15, kelec_Gpu_itgr_LR1, kelec_Gpu_itgr_drop05_9_layer]

name = 'esb_1108_joyofvictory45_8'
df_model = excute_ensemble(esb_1108_joyofvictory45_8_list)
save_jsonl(df_model, name, path)

esb_1108_joyofvictory45_8 = pd.DataFrame(jsonlload(path + "/" + name + ".jsonl"))

# SOTA model 확인 (esb_1108_joyofvictory45_8)

In [None]:
esb_1108_joyofvictory45_8

In [None]:
esb_1108_joyofvictory45_8_test = pd.DataFrame(jsonlload("/content/Korean_ABSA/jsonl_files/esb_1108_joyofvictory45_8.jsonl"))

a = esb_1108_joyofvictory45_8_test
b = esb_1108_joyofvictory45_8

count = 0
for i in range(len(a)):
    if b['annotation'][i] != a['annotation'][i]:
        # print(b['sentence_form'][i])
        # print(a['annotation'][i])
        # print(b['annotation'][i], "\n")
        count+=1

count

0