# install 하기

In [None]:
!pip install transformers

In [None]:
!pip install datasets

In [None]:
!pip install sentencepiece

# import

In [None]:
import json
import os

import torch
import torch.nn as nn
from tqdm import trange
from transformers import AutoTokenizer
from torch.utils.data import DataLoader, TensorDataset
from transformers import get_linear_schedule_with_warmup
from transformers import AdamW
from datasets import load_metric
from sklearn.metrics import f1_score
import pandas as pd
import copy
import numpy as np

from transformers import ElectraModel, ElectraTokenizer
from transformers import AutoModel, ElectraTokenizer


# 모델 구축

## 기본설정

In [None]:

PADDING_TOKEN = 1
S_OPEN_TOKEN = 0
S_CLOSE_TOKEN = 2

do_eval=True

# 모델을 학습 할 때 저장 되는 파일 경로
category_extraction_model_path = '/content/drive/MyDrive/korean_baseline/saved_model/category_extraction/'
polarity_classification_model_path = '/content/drive/MyDrive/korean_baseline/saved_model/polarity_classification/'


# 저장된 모델 Weight 파일 경로
test_category_extraction_model_path = '/content/drive/MyDrive/korean_baseline/saved_model/category_extraction/category_sample.pt'
test_polarity_classification_model_path = '/content/drive/MyDrive/korean_baseline/saved_model/polarity_classification/polarity_sample.pt'

# 데이터 파일 경로
train_cate_data_path = '/content/drive/MyDrive/train_sample(category).jsonl'
train_pola_data_path = '/content/drive/MyDrive/train_sample(polarity).jsonl'
test_data_path = '/content/drive/MyDrive/test_sample.jsonl'

max_len_elec = 256
max_len_debe = 256
max_len_robe = 514

# colab pro 환경에서 RoBERTa를 돌리게 될 경우 batch_size 수정 요망 ( Out of Memory 이슈 )
batch_size = 32

#ELECTRA
base_model_elec = 'kykim/electra-kor-base'
#RoBERTa
base_model_roberta = 'xlm-roberta-base'
#DeBERTa
base_model_deberta = "lighthouse/mdeberta-v3-base-kor-further"

learning_rate = 3e-6
eps = 1e-8
num_train_epochs = 30

classifier_hidden_size_base = 768
classifier_hidden_size_down = 384   # hidden_size down
classifier_hidden_size_up = 1000    # hidden_size up

classifier_dropout_prob_base = 0.1  # dropout = 0.1
classifier_dropout_prob_up = 0.5    # dropout = 0.5

# 카테고리의 수 = 25개
entity_property_pair = [
     '패키지/구성품#다양성','본품#인지도','브랜드#디자인',
     '패키지/구성품#편의성','제품 전체#디자인', '제품 전체#품질',
     '패키지/구성품#품질','패키지/구성품#일반','본품#일반',
     '패키지/구성품#디자인','본품#편의성','브랜드#품질',
     '브랜드#인지도','본품#다양성','본품#디자인',
     '제품 전체#다양성','본품#품질','제품 전체#인지도',
     '패키지/구성품#가격','본품#가격','제품 전체#가격',
     '브랜드#가격','브랜드#일반','제품 전체#일반','제품 전체#편의성'
     ]

tf_id_to_name = ['True', 'False']
tf_name_to_id = {tf_id_to_name[i]: i for i in range(len(tf_id_to_name))}

polarity_id_to_name = ['positive', 'negative', 'neutral']
polarity_name_to_id = {polarity_id_to_name[i]: i for i in range(len(polarity_id_to_name))}

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

special_tokens_dict = {
    'additional_special_tokens': ['&name&', '&affiliation&', '&social-security-num&', '&tel-num&', '&card-num&', '&bank-account&', '&num&', '&online-account&']
}

In [None]:
def jsonload(fname, encoding="utf-8"):
    with open(fname, encoding=encoding) as f:
        j = json.load(f)

    return j

# json 개체를 파일이름으로 깔끔하게 저장
def jsondump(j, fname):
    with open(fname, "w", encoding="UTF8") as f:
        json.dump(j, f, ensure_ascii=False)

# jsonl 파일 읽어서 list에 저장
def jsonlload(fname, encoding="utf-8"):
    json_list = []
    with open(fname, encoding=encoding) as f:
        for line in f.readlines():
            json_list.append(json.loads(line))
    return json_list

In [None]:
def tokenize_and_align_labels(tokenizer, form, annotations, max_len):

    entity_property_data_dict = {
        'input_ids': [],
        'attention_mask': [],
        'label': []
    }
    polarity_data_dict = {
        'input_ids': [],
        'attention_mask': [],
        'label': []
    }

    for pair in entity_property_pair:
        isPairInOpinion = False
        if pd.isna(form):
            break
        tokenized_data = tokenizer(form, pair, padding='max_length', max_length=max_len, truncation=True)
        for annotation in annotations:
            entity_property = annotation[0]
            polarity = annotation[2]

            if polarity == '------------':
                continue

            if entity_property == pair:
                entity_property_data_dict['input_ids'].append(tokenized_data['input_ids'])
                entity_property_data_dict['attention_mask'].append(tokenized_data['attention_mask'])
                entity_property_data_dict['label'].append(tf_name_to_id['True'])

                polarity_data_dict['input_ids'].append(tokenized_data['input_ids'])
                polarity_data_dict['attention_mask'].append(tokenized_data['attention_mask'])
                polarity_data_dict['label'].append(polarity_name_to_id[polarity])

                isPairInOpinion = True
                break

        if isPairInOpinion is False:
            entity_property_data_dict['input_ids'].append(tokenized_data['input_ids'])
            entity_property_data_dict['attention_mask'].append(tokenized_data['attention_mask'])
            entity_property_data_dict['label'].append(tf_name_to_id['False'])

    return entity_property_data_dict, polarity_data_dict


def get_dataset(raw_data, tokenizer, max_len):
    input_ids_list = []
    attention_mask_list = []
    token_labels_list = []

    polarity_input_ids_list = []
    polarity_attention_mask_list = []
    polarity_token_labels_list = []

    for utterance in raw_data:
        entity_property_data_dict, polarity_data_dict = tokenize_and_align_labels(tokenizer, utterance['sentence_form'], utterance['annotation'], max_len)
        input_ids_list.extend(entity_property_data_dict['input_ids'])
        attention_mask_list.extend(entity_property_data_dict['attention_mask'])
        token_labels_list.extend(entity_property_data_dict['label'])

        polarity_input_ids_list.extend(polarity_data_dict['input_ids'])
        polarity_attention_mask_list.extend(polarity_data_dict['attention_mask'])
        polarity_token_labels_list.extend(polarity_data_dict['label'])

    return TensorDataset(torch.tensor(input_ids_list), torch.tensor(attention_mask_list),
                         torch.tensor(token_labels_list)), TensorDataset(torch.tensor(polarity_input_ids_list), torch.tensor(polarity_attention_mask_list),
                         torch.tensor(polarity_token_labels_list))



In [None]:
def evaluation(y_true, y_pred, label_len):
    count_list = [0]*label_len
    hit_list = [0]*label_len
    for i in range(len(y_true)):
        count_list[y_true[i]] += 1
        if y_true[i] == y_pred[i]:
            hit_list[y_true[i]] += 1
    acc_list = []

    for i in range(label_len):
        acc_list.append(hit_list[i]/count_list[i])

    print(count_list)
    print(hit_list)
    print(acc_list)
    print('accuracy: ', (sum(hit_list) / sum(count_list)))
    print('macro_accuracy: ', sum(acc_list) / 3)
    # print(y_true)

    y_true = list(map(int, y_true))
    y_pred = list(map(int, y_pred))

    print('f1_score: ', f1_score(y_true, y_pred, average=None))
    print('f1_score_micro: ', f1_score(y_true, y_pred, average='micro'))
    print('f1_score_macro: ', f1_score(y_true, y_pred, average='macro'))

In [None]:
def evaluation_f1(true_data, pred_data):

    true_data_list = true_data
    pred_data_list = pred_data

    ce_eval = {
        'TP': 0,
        'FP': 0,
        'FN': 0,
        'TN': 0
    }

    pipeline_eval = {
        'TP': 0,
        'FP': 0,
        'FN': 0,
        'TN': 0
    }

    for i in range(len(true_data_list)):

        # TP, FN checking
        is_ce_found = False
        is_pipeline_found = False
        for y_ano  in true_data_list[i]['annotation']:
            y_category = y_ano[0]
            y_polarity = y_ano[2]

            for p_ano in pred_data_list[i]['annotation']:
                p_category = p_ano[0]
                p_polarity = p_ano[1]

                if y_category == p_category:
                    is_ce_found = True
                    if y_polarity == p_polarity:
                        is_pipeline_found = True

                    break

            if is_ce_found is True:
                ce_eval['TP'] += 1
            else:
                ce_eval['FN'] += 1

            if is_pipeline_found is True:
                pipeline_eval['TP'] += 1
            else:
                pipeline_eval['FN'] += 1

            is_ce_found = False
            is_pipeline_found = False

        # FP checking
        for p_ano in pred_data_list[i]['annotation']:
            p_category = p_ano[0]
            p_polarity = p_ano[1]

            for y_ano  in true_data_list[i]['annotation']:
                y_category = y_ano[0]
                y_polarity = y_ano[2]

                if y_category == p_category:
                    is_ce_found = True
                    if y_polarity == p_polarity:
                        is_pipeline_found = True

                    break

            if is_ce_found is False:
                ce_eval['FP'] += 1

            if is_pipeline_found is False:
                pipeline_eval['FP'] += 1
            is_ce_found = False
            is_pipeline_found = False

    ce_precision = ce_eval['TP']/(ce_eval['TP']+ce_eval['FP'])
    ce_recall = ce_eval['TP']/(ce_eval['TP']+ce_eval['FN'])

    ce_result = {
        'Precision': ce_precision,
        'Recall': ce_recall,
        'F1': 2*ce_recall*ce_precision/(ce_recall+ce_precision)
    }

    pipeline_precision = pipeline_eval['TP']/(pipeline_eval['TP']+pipeline_eval['FP'])
    pipeline_recall = pipeline_eval['TP']/(pipeline_eval['TP']+pipeline_eval['FN'])

    pipeline_result = {
        'Precision': pipeline_precision,
        'Recall': pipeline_recall,
        'F1': 2*pipeline_recall*pipeline_precision/(pipeline_recall+pipeline_precision)
    }

    return {
        'category extraction result': ce_result,
        'entire pipeline result': pipeline_result
    }

## SimpleClassifier

In [None]:
# baseline
class SimpleClassifier_Base(nn.Module):

    def __init__(self, num_label):
        super().__init__()
        self.dense = nn.Linear(classifier_hidden_size_base, classifier_hidden_size_base)
        self.dropout = nn.Dropout(classifier_dropout_prob_base)
        self.output = nn.Linear(classifier_hidden_size_base, num_label)

    def forward(self, features):
        x = features[:, 0, :]
        x = self.dropout(x)
        x = self.dense(x)
        x = torch.tanh(x)
        x = self.dropout(x)
        x = self.output(x)
        return x

# hidden_size를 1000으로 up
class SimpleClassifier_Hidden_up(nn.Module):

    def __init__(self, num_label):
        super().__init__()
        self.dense = nn.Linear(classifier_hidden_size_base, classifier_hidden_size_up)
        self.dropout = nn.Dropout(classifier_dropout_prob_base)
        self.output = nn.Linear(classifier_hidden_size_up, num_label)

    def forward(self, features):
        x = features[:, 0, :]
        x = self.dropout(x)
        x = self.dense(x)
        x = torch.tanh(x)
        x = self.dropout(x)
        x = self.output(x)
        return x        

# hidden_size를 384로 down
class SimpleClassifier_Hidden_down(nn.Module):

    def __init__(self, num_label):
        super().__init__()
        self.dense = nn.Linear(classifier_hidden_size_base, classifier_hidden_size_down)
        self.dropout = nn.Dropout(classifier_dropout_prob_base)
        self.output = nn.Linear(classifier_hidden_size_down, num_label)

    def forward(self, features):
        x = features[:, 0, :]
        x = self.dropout(x)
        x = self.dense(x)
        x = torch.tanh(x)
        x = self.dropout(x)
        x = self.output(x)
        return x        

# dropout 0.5
class SimpleClassifier_dr05(nn.Module):

    def __init__(self, num_label):
        super().__init__()
        self.dense = nn.Linear(classifier_hidden_size_base, classifier_hidden_size_base)
        self.dropout = nn.Dropout(classifier_dropout_prob_up)
        self.output = nn.Linear(classifier_hidden_size_base, num_label)

    def forward(self, features):
        x = features[:, 0, :]
        x = self.dropout(x)
        x = self.dense(x)
        x = torch.tanh(x)
        x = self.dropout(x)
        x = self.output(x)
        return x

# hidden_size를 384로 down + dropout 0.5
class SimpleClassifier_Hidden_down_dr05(nn.Module):

    def __init__(self, num_label):
        super().__init__()
        self.dense = nn.Linear(classifier_hidden_size_base, classifier_hidden_size_down)
        self.dropout = nn.Dropout(classifier_dropout_prob_up)
        self.output = nn.Linear(classifier_hidden_size_down, num_label)

    def forward(self, features):
        x = features[:, 0, :]
        x = self.dropout(x)
        x = self.dense(x)
        x = torch.tanh(x)
        x = self.dropout(x)
        x = self.output(x)
        return x        

# dense_layer를 1층 더 추가 (dropout0.1)
class SimpleClassifier_Layer(nn.Module):

    def __init__(self, num_label):
        super().__init__()
        self.dense1 = nn.Linear(classifier_hidden_size_base, classifier_hidden_size_base//2)
        self.dense2 = nn.Linear(classifier_hidden_size_base//2, classifier_hidden_size_base//4)
        self.dropout = nn.Dropout(classifier_dropout_prob_base)
        self.output = nn.Linear(classifier_hidden_size_base//4, num_label)

    def forward(self, features):
        x = features[:, 0, :]
        # layer 1
        x = self.dropout(x)
        x = self.dense1(x)
        
        # layer 2
        x = self.dropout(x)
        x = self.dense2(x)

        x = torch.tanh(x)
        x = self.dropout(x)
        x = self.output(x)

        return x

# dense_layer를 1층 더 추가 + dropout 0.5
class SimpleClassifier_Layer_dr05(nn.Module):

    def __init__(self, num_label):
        super().__init__()
        self.dense1 = nn.Linear(classifier_hidden_size_base, classifier_hidden_size_base//2)
        self.dense2 = nn.Linear(classifier_hidden_size_base//2, classifier_hidden_size_base//4)
        self.dropout = nn.Dropout(classifier_dropout_prob_up)
        self.output = nn.Linear(classifier_hidden_size_base//4, num_label)

    def forward(self, features):
        x = features[:, 0, :]
        # layer 1
        x = self.dropout(x)
        x = self.dense1(x)
        
        # layer 2
        x = self.dropout(x)
        x = self.dense2(x)

        x = torch.tanh(x)
        x = self.dropout(x)
        x = self.output(x)

        return x


## ELECTRA

#### baseline

In [None]:
# category baseline
class ElectraBaseClassifier_Cate_Base(nn.Module):
    def __init__(self, num_label, len_tokenizer):
        super(ElectraBaseClassifier_Cate_Base, self).__init__()

        self.num_label = num_label
        self.electra = AutoModel.from_pretrained(base_model_elec)
        self.electra.resize_token_embeddings(len_tokenizer)

        self.labels_classifier = SimpleClassifier_Base(self.num_label)

    def forward(self, input_ids, attention_mask, labels=None):
        outputs = self.electra(
            input_ids=input_ids,
            attention_mask=attention_mask,
            token_type_ids=None
        )

        sequence_output = outputs[0]
        logits = self.labels_classifier(sequence_output)

        loss = None

        if labels is not None:
            loss_fct = nn.CrossEntropyLoss()
            loss = loss_fct(logits.view(-1, self.num_label),
                                                labels.view(-1))

        return loss, logits

# polarity baseline
class ElectraBaseClassifier_Pola_Base(nn.Module):
    def __init__(self, num_label, len_tokenizer):
        super(ElectraBaseClassifier_Pola_Base, self).__init__()

        self.num_label = num_label
        self.electra = AutoModel.from_pretrained(base_model_elec)
        self.electra.resize_token_embeddings(len_tokenizer)

        self.labels_classifier = SimpleClassifier_Base(self.num_label)

    def forward(self, input_ids, attention_mask, labels=None):
        outputs = self.electra(
            input_ids=input_ids,
            attention_mask=attention_mask,
            token_type_ids=None
        )

        sequence_output = outputs[0]
        logits = self.labels_classifier(sequence_output)

        loss = None

        if labels is not None:
            loss_fct = nn.CrossEntropyLoss()
            loss = loss_fct(logits.view(-1, self.num_label),
                                                labels.view(-1))

        return loss, logits


#### only layer 추가

In [None]:
# category layer 추가
class ElectraBaseClassifier_Cate_Layer(nn.Module):
    def __init__(self, num_label, len_tokenizer):
        super(ElectraBaseClassifier_Cate_Layer, self).__init__()

        self.num_label = num_label
        self.electra = AutoModel.from_pretrained(base_model_elec)
        self.electra.resize_token_embeddings(len_tokenizer)

        self.labels_classifier = SimpleClassifier_Layer(self.num_label)

    def forward(self, input_ids, attention_mask, labels=None):
        outputs = self.electra(
            input_ids=input_ids,
            attention_mask=attention_mask,
            token_type_ids=None
        )

        sequence_output = outputs[0]
        logits = self.labels_classifier(sequence_output)

        loss = None

        if labels is not None:
            loss_fct = nn.CrossEntropyLoss()
            loss = loss_fct(logits.view(-1, self.num_label),
                                                labels.view(-1))

        return loss, logits

# polarity layer 추가
class ElectraBaseClassifier_Pola_Layer(nn.Module):
    def __init__(self, num_label, len_tokenizer):
        super(ElectraBaseClassifier_Pola_Layer, self).__init__()

        self.num_label = num_label
        self.electra = AutoModel.from_pretrained(base_model_elec)
        self.electra.resize_token_embeddings(len_tokenizer)

        self.labels_classifier = SimpleClassifier_Layer(self.num_label)

    def forward(self, input_ids, attention_mask, labels=None):
        outputs = self.electra(
            input_ids=input_ids,
            attention_mask=attention_mask,
            token_type_ids=None
        )

        sequence_output = outputs[0]
        logits = self.labels_classifier(sequence_output)

        loss = None

        if labels is not None:
            loss_fct = nn.CrossEntropyLoss()
            loss = loss_fct(logits.view(-1, self.num_label),
                                                labels.view(-1))

        return loss, logits


#### only dropout 0.5

In [None]:
# category dropout 0.5
class ElectraBaseClassifier_Cate_dr05(nn.Module):
    def __init__(self, num_label, len_tokenizer):
        super(ElectraBaseClassifier_Cate_dr05, self).__init__()

        self.num_label = num_label
        self.electra = AutoModel.from_pretrained(base_model_elec)
        self.electra.resize_token_embeddings(len_tokenizer)

        self.labels_classifier = SimpleClassifier_dr05(self.num_label)

    def forward(self, input_ids, attention_mask, labels=None):
        outputs = self.electra(
            input_ids=input_ids,
            attention_mask=attention_mask,
            token_type_ids=None
        )

        sequence_output = outputs[0]
        logits = self.labels_classifier(sequence_output)

        loss = None

        if labels is not None:
            loss_fct = nn.CrossEntropyLoss()
            loss = loss_fct(logits.view(-1, self.num_label),
                                                labels.view(-1))

        return loss, logits

# polarity dropout 0.5
class ElectraBaseClassifier_Pola_dr05(nn.Module):
    def __init__(self, num_label, len_tokenizer):
        super(ElectraBaseClassifier_Pola_dr05, self).__init__()

        self.num_label = num_label
        self.electra = AutoModel.from_pretrained(base_model_elec)
        self.electra.resize_token_embeddings(len_tokenizer)

        self.labels_classifier = SimpleClassifier_dr05(self.num_label)

    def forward(self, input_ids, attention_mask, labels=None):
        outputs = self.electra(
            input_ids=input_ids,
            attention_mask=attention_mask,
            token_type_ids=None
        )

        sequence_output = outputs[0]
        logits = self.labels_classifier(sequence_output)

        loss = None

        if labels is not None:
            loss_fct = nn.CrossEntropyLoss()
            loss = loss_fct(logits.view(-1, self.num_label),
                                                labels.view(-1))

        return loss, logits


#### layer + dropout 0.5

In [None]:
# category layer 추가 + dropout 0.5
class ElectraBaseClassifier_Cate_Layer_Dropout05(nn.Module):
    def __init__(self, num_label, len_tokenizer):
        super(ElectraBaseClassifier_Cate_Layer_Dropout05, self).__init__()

        self.num_label = num_label
        self.electra = AutoModel.from_pretrained(base_model_elec)
        self.electra.resize_token_embeddings(len_tokenizer)

        self.labels_classifier = SimpleClassifier_Layer_dr05(self.num_label)

    def forward(self, input_ids, attention_mask, labels=None):
        outputs = self.electra(
            input_ids=input_ids,
            attention_mask=attention_mask,
            token_type_ids=None
        )

        sequence_output = outputs[0]
        logits = self.labels_classifier(sequence_output)

        loss = None

        if labels is not None:
            loss_fct = nn.CrossEntropyLoss()
            loss = loss_fct(logits.view(-1, self.num_label),
                                                labels.view(-1))

        return loss, logits

# polarity layer 추가 + dropout 0.5
class ElectraBaseClassifier_Pola_Layer_Dropout05(nn.Module):
    def __init__(self, num_label, len_tokenizer):
        super(ElectraBaseClassifier_Pola_Layer_Dropout05, self).__init__()

        self.num_label = num_label
        self.electra = AutoModel.from_pretrained(base_model_elec)
        self.electra.resize_token_embeddings(len_tokenizer)

        self.labels_classifier = SimpleClassifier_Layer_dr05(self.num_label)

    def forward(self, input_ids, attention_mask, labels=None):
        outputs = self.electra(
            input_ids=input_ids,
            attention_mask=attention_mask,
            token_type_ids=None
        )

        sequence_output = outputs[0]
        logits = self.labels_classifier(sequence_output)

        loss = None

        if labels is not None:
            loss_fct = nn.CrossEntropyLoss()
            loss = loss_fct(logits.view(-1, self.num_label),
                                                labels.view(-1))

        return loss, logits

#### hidden_size_up

In [None]:
# category hidden_size_up
class ElectraBaseClassifier_Cate_hiddenup(nn.Module):
    def __init__(self, num_label, len_tokenizer):
        super(ElectraBaseClassifier_Cate_hiddenup, self).__init__()

        self.num_label = num_label
        self.electra = AutoModel.from_pretrained(base_model_elec)
        self.electra.resize_token_embeddings(len_tokenizer)

        self.labels_classifier = SimpleClassifier_Hidden_up(self.num_label)

    def forward(self, input_ids, attention_mask, labels=None):
        outputs = self.electra(
            input_ids=input_ids,
            attention_mask=attention_mask,
            token_type_ids=None
        )

        sequence_output = outputs[0]
        logits = self.labels_classifier(sequence_output)

        loss = None

        if labels is not None:
            loss_fct = nn.CrossEntropyLoss()
            loss = loss_fct(logits.view(-1, self.num_label),
                                                labels.view(-1))

        return loss, logits

# polarity hidden_size_up
class ElectraBaseClassifier_Pola_hiddenup(nn.Module):
    def __init__(self, num_label, len_tokenizer):
        super(ElectraBaseClassifier_Pola_hiddenup, self).__init__()

        self.num_label = num_label
        self.electra = AutoModel.from_pretrained(base_model_elec)
        self.electra.resize_token_embeddings(len_tokenizer)

        self.labels_classifier = SimpleClassifier_Hidden_up(self.num_label)

    def forward(self, input_ids, attention_mask, labels=None):
        outputs = self.electra(
            input_ids=input_ids,
            attention_mask=attention_mask,
            token_type_ids=None
        )

        sequence_output = outputs[0]
        logits = self.labels_classifier(sequence_output)

        loss = None

        if labels is not None:
            loss_fct = nn.CrossEntropyLoss()
            loss = loss_fct(logits.view(-1, self.num_label),
                                                labels.view(-1))

        return loss, logits


#### only hidden_size_down

In [None]:
# category hidden down
class ElectraBaseClassifier_Cate_hiddendown(nn.Module):
    def __init__(self, num_label, len_tokenizer):
        super(ElectraBaseClassifier_Cate_hiddendown, self).__init__()

        self.num_label = num_label
        self.electra = AutoModel.from_pretrained(base_model_elec)
        self.electra.resize_token_embeddings(len_tokenizer)

        self.labels_classifier = SimpleClassifier_Hidden_down(self.num_label)

    def forward(self, input_ids, attention_mask, labels=None):
        outputs = self.electra(
            input_ids=input_ids,
            attention_mask=attention_mask,
            token_type_ids=None
        )

        sequence_output = outputs[0]
        logits = self.labels_classifier(sequence_output)

        loss = None

        if labels is not None:
            loss_fct = nn.CrossEntropyLoss()
            loss = loss_fct(logits.view(-1, self.num_label),
                                                labels.view(-1))

        return loss, logits

# polarity hidden down
class ElectraBaseClassifier_Pola_hiddendown(nn.Module):
    def __init__(self, num_label, len_tokenizer):
        super(ElectraBaseClassifier_Pola_hiddendown, self).__init__()

        self.num_label = num_label
        self.electra = AutoModel.from_pretrained(base_model_elec)
        self.electra.resize_token_embeddings(len_tokenizer)

        self.labels_classifier = SimpleClassifier_Hidden_down(self.num_label)

    def forward(self, input_ids, attention_mask, labels=None):
        outputs = self.electra(
            input_ids=input_ids,
            attention_mask=attention_mask,
            token_type_ids=None
        )

        sequence_output = outputs[0]
        logits = self.labels_classifier(sequence_output)

        loss = None

        if labels is not None:
            loss_fct = nn.CrossEntropyLoss()
            loss = loss_fct(logits.view(-1, self.num_label),
                                                labels.view(-1))

        return loss, logits


#### hidden_size_down + dropout 0.5

In [None]:
# category hidden down + dropout 0.5
class ElectraBaseClassifier_Cate_hiddendown_dr05(nn.Module):
    def __init__(self, num_label, len_tokenizer):
        super(ElectraBaseClassifier_Cate_hiddendown_dr05, self).__init__()

        self.num_label = num_label
        self.electra = AutoModel.from_pretrained(base_model_elec)
        self.electra.resize_token_embeddings(len_tokenizer)

        self.labels_classifier = SimpleClassifier_Hidden_down_dr05(self.num_label)

    def forward(self, input_ids, attention_mask, labels=None):
        outputs = self.electra(
            input_ids=input_ids,
            attention_mask=attention_mask,
            token_type_ids=None
        )

        sequence_output = outputs[0]
        logits = self.labels_classifier(sequence_output)

        loss = None

        if labels is not None:
            loss_fct = nn.CrossEntropyLoss()
            loss = loss_fct(logits.view(-1, self.num_label),
                                                labels.view(-1))

        return loss, logits

# polarity hidden down + dropout 0.5
class ElectraBaseClassifier_Pola_hiddendown_dr05(nn.Module):
    def __init__(self, num_label, len_tokenizer):
        super(ElectraBaseClassifier_Pola_hiddendown_dr05, self).__init__()

        self.num_label = num_label
        self.electra = AutoModel.from_pretrained(base_model_elec)
        self.electra.resize_token_embeddings(len_tokenizer)

        self.labels_classifier = SimpleClassifier_Hidden_down_dr05(self.num_label)

    def forward(self, input_ids, attention_mask, labels=None):
        outputs = self.electra(
            input_ids=input_ids,
            attention_mask=attention_mask,
            token_type_ids=None
        )

        sequence_output = outputs[0]
        logits = self.labels_classifier(sequence_output)

        loss = None

        if labels is not None:
            loss_fct = nn.CrossEntropyLoss()
            loss = loss_fct(logits.view(-1, self.num_label),
                                                labels.view(-1))

        return loss, logits


## RoBERTa

In [None]:
# category baseline(roberta)
class RobertaBaseClassifier(nn.Module):
    def __init__(self, num_label, len_tokenizer):
        super(RobertaBaseClassifier, self).__init__()

        self.num_label = num_label
        self.roberta = AutoModel.from_pretrained(base_model_roberta) 
        self.roberta.resize_token_embeddings(len_tokenizer)

        self.labels_classifier = SimpleClassifier_Base(self.num_label)

    def forward(self, input_ids, attention_mask, labels=None):
        outputs = self.roberta(
            input_ids=input_ids,
            attention_mask=attention_mask,
            token_type_ids=None
        )

        sequence_output = outputs[0]
        logits = self.labels_classifier(sequence_output)

        loss = None

        if labels is not None:
            loss_fct = nn.CrossEntropyLoss()
            loss = loss_fct(logits.view(-1, self.num_label),
                                                labels.view(-1))

        return loss, logits

## DeBERTa

In [None]:
# category baseline(deberta)
class DebertaBaseClassifier(nn.Module):
    def __init__(self, num_label, len_tokenizer):
        super(DebertaBaseClassifier, self).__init__()

        self.num_label = num_label
        self.deberta = AutoModel.from_pretrained(base_model_deberta)
        self.deberta.resize_token_embeddings(len_tokenizer)

        self.labels_classifier = SimpleClassifier_Base(self.num_label)

    def forward(self, input_ids, attention_mask, labels=None):
        outputs = self.deberta(
            input_ids=input_ids,
            attention_mask=attention_mask,
            token_type_ids=None
        )

        sequence_output = outputs[0]
        logits = self.labels_classifier(sequence_output)

        loss = None

        if labels is not None:
            loss_fct = nn.CrossEntropyLoss()
            loss = loss_fct(logits.view(-1, self.num_label),
                                                labels.view(-1))

        return loss, logits

# 모델 학습


## category(entitiy) 학습

* 자신이 원하는 파라미터를 수정한 목차로 넘어가서 함수를 실행한다.
* **모든 함수 명이 같으니 실수 하지 않도록 주의하도록 하자.**
* 함수를 실행한 후, "cate 학습 시작" 목차로 넘어가서 train을 시작하면 된다! 

### ELECTRA model

#### base category

In [None]:
# base electra
def train_entity_analysis():
    
    # data의 부족으로 valid data를 만들지 않아 따로 learning_rate을 직접 핸들링함. 
    # global learning_rate 

    print('train_entity_analysis')
    print('category_extraction model would be saved at ', category_extraction_model_path)

    print('loading train data')
    train_data = jsonlload(train_cate_data_path)

    print('tokenizing train data')
    tokenizer = AutoTokenizer.from_pretrained(base_model_elec)
    num_added_toks = tokenizer.add_special_tokens(special_tokens_dict)
    print('We have added', num_added_toks, 'tokens')
    entity_property_train_data, _ = get_dataset(train_data, tokenizer, max_len_elec)
    entity_property_train_dataloader = DataLoader(entity_property_train_data, shuffle=True,
                                  batch_size=batch_size)

    print('loading model')
    entity_property_model = ElectraBaseClassifier_Cate_Base(len(tf_id_to_name), len(tokenizer))

    # ====================================================================================================== #
    # 특정 epoch의 pt파일을 불러와서 이어서 학습할 때 사용 / torch.load("이 부분에 pt파일 경로 넣기")
    # entity_property_model.load_state_dict(torch.load("/content/drive/MyDrive/sample.pt"))
    entity_property_model.to(device)



    print('end loading')

    # entity_property_model_optimizer_setting
    FULL_FINETUNING = True
    if FULL_FINETUNING:
        entity_property_param_optimizer = list(entity_property_model.named_parameters())
        no_decay = ['bias', 'gamma', 'beta']
        entity_property_optimizer_grouped_parameters = [
            {'params': [p for n, p in entity_property_param_optimizer if not any(nd in n for nd in no_decay)],
             'weight_decay_rate': 0.01},
            {'params': [p for n, p in entity_property_param_optimizer if any(nd in n for nd in no_decay)],
             'weight_decay_rate': 0.0}
        ]
    else:
        entity_property_param_optimizer = list(entity_property_model.classifier.named_parameters())
        entity_property_optimizer_grouped_parameters = [{"params": [p for n, p in entity_property_param_optimizer]}]

    entity_property_optimizer = AdamW(
        entity_property_optimizer_grouped_parameters,
        lr=learning_rate,
        eps=eps
    )
    epochs = num_train_epochs
    max_grad_norm = 1.0
    total_steps = epochs * len(entity_property_train_dataloader)

    entity_property_scheduler = get_linear_schedule_with_warmup(
        entity_property_optimizer,
        num_warmup_steps=0,
        num_training_steps=total_steps
    )


    epoch_step = 0

    for _ in trange(epochs, desc="Epoch"):
        entity_property_model.train()
        epoch_step += 1
        print("epoch_step ==>", epoch_step)

        # entity_property train
        entity_property_total_loss = 0
        
        # train 데이터와 학습시키는 모델 이름 출력
        print(train_cate_data_path)
        print(base_model_elec)
        print("ElectraBaseClassifier_Cate_Base")
        
        #====================================================================================================== #
        # step : 모델 학습 시킬때 학습진행과정 확인하기 위한 과정  

        for step, batch in enumerate(entity_property_train_dataloader):
            if step%1000==0:
                print(step, "/", len(entity_property_train_dataloader))

            # learning_rate를 직접 핸들링 할 때 주석 해제 후 사용
            # learning_rate = learning_rate * 0.1
            
            batch = tuple(t.to(device) for t in batch)
            b_input_ids, b_input_mask, b_labels = batch

            entity_property_model.zero_grad()

            loss, _ = entity_property_model(b_input_ids, b_input_mask, b_labels)

            loss.backward()

            entity_property_total_loss += loss.item()

            torch.nn.utils.clip_grad_norm_(parameters=entity_property_model.parameters(), max_norm=max_grad_norm)
            entity_property_optimizer.step()
            entity_property_scheduler.step()

        avg_train_loss = entity_property_total_loss / len(entity_property_train_dataloader)
        print("Entity_Property_Epoch: ", epoch_step)
        print("Average train loss: {}".format(avg_train_loss))

        model_saved_path = category_extraction_model_path + 'elec_cate_base_epoch_' + str(epoch_step) + '.pt'
        torch.save(entity_property_model.state_dict(), model_saved_path)

    print("training is done")

#### only layer 추가 category

In [None]:
# only_layer electra
def train_entity_analysis():

    print('train_entity_analysis')
    print('category_extraction model would be saved at ', category_extraction_model_path)

    print('loading train data')
    train_data = jsonlload(train_cate_data_path)

    print('tokenizing train data')
    tokenizer = AutoTokenizer.from_pretrained(base_model_elec)
    num_added_toks = tokenizer.add_special_tokens(special_tokens_dict)
    print('We have added', num_added_toks, 'tokens')
    entity_property_train_data, _ = get_dataset(train_data, tokenizer, max_len_elec)
    entity_property_train_dataloader = DataLoader(entity_property_train_data, shuffle=True,
                                  batch_size=batch_size)

    print('loading model')
    entity_property_model = ElectraBaseClassifier_Cate_Layer(len(tf_id_to_name), len(tokenizer))

    # ====================================================================================================== #
    # 특정 epoch의 pt파일을 불러와서 이어서 학습할 때 사용 / torch.load("이 부분에 pt파일 경로 넣기")
    # entity_property_model.load_state_dict(torch.load("/content/drive/MyDrive/sample.pt"))
    entity_property_model.to(device)



    print('end loading')

    # entity_property_model_optimizer_setting
    FULL_FINETUNING = True
    if FULL_FINETUNING:
        entity_property_param_optimizer = list(entity_property_model.named_parameters())
        no_decay = ['bias', 'gamma', 'beta']
        entity_property_optimizer_grouped_parameters = [
            {'params': [p for n, p in entity_property_param_optimizer if not any(nd in n for nd in no_decay)],
             'weight_decay_rate': 0.01},
            {'params': [p for n, p in entity_property_param_optimizer if any(nd in n for nd in no_decay)],
             'weight_decay_rate': 0.0}
        ]
    else:
        entity_property_param_optimizer = list(entity_property_model.classifier.named_parameters())
        entity_property_optimizer_grouped_parameters = [{"params": [p for n, p in entity_property_param_optimizer]}]

    entity_property_optimizer = AdamW(
        entity_property_optimizer_grouped_parameters,
        lr=learning_rate,
        eps=eps
    )
    epochs = num_train_epochs
    max_grad_norm = 1.0
    total_steps = epochs * len(entity_property_train_dataloader)

    entity_property_scheduler = get_linear_schedule_with_warmup(
        entity_property_optimizer,
        num_warmup_steps=0,
        num_training_steps=total_steps
    )


    epoch_step = 0

    for _ in trange(epochs, desc="Epoch"):
        entity_property_model.train()
        epoch_step += 1
        print("epoch_step ==>", epoch_step)

        # entity_property train
        entity_property_total_loss = 0
        
        # train 데이터와 학습시키는 모델 이름 출력
        print(train_cate_data_path)
        print(base_model_elec)
        print("ElectraBaseClassifier_Cate_Layer")

        #====================================================================================================== #
        # step : 모델 학습 시킬때 학습진행과정 확인하기 위한 과정  
        for step, batch in enumerate(entity_property_train_dataloader):
            if step%1000==0:
                print(step, "/", len(entity_property_train_dataloader))
        
            batch = tuple(t.to(device) for t in batch)
            b_input_ids, b_input_mask, b_labels = batch

            entity_property_model.zero_grad()

            loss, _ = entity_property_model(b_input_ids, b_input_mask, b_labels)

            loss.backward()

            entity_property_total_loss += loss.item()

            torch.nn.utils.clip_grad_norm_(parameters=entity_property_model.parameters(), max_norm=max_grad_norm)
            entity_property_optimizer.step()
            entity_property_scheduler.step()

        avg_train_loss = entity_property_total_loss / len(entity_property_train_dataloader)
        print("Entity_Property_Epoch: ", epoch_step)
        print("Average train loss: {}".format(avg_train_loss))

        model_saved_path = category_extraction_model_path + 'elec_cate_layer_epoch_' + str(epoch_step) + '.pt'
        torch.save(entity_property_model.state_dict(), model_saved_path)

    print("training is done")

#### only dropout 0.5 category

In [None]:
# only_dropout 0.5 electra
def train_entity_analysis():

    print('train_entity_analysis')
    print('category_extraction model would be saved at ', category_extraction_model_path)

    print('loading train data')
    train_data = jsonlload(train_cate_data_path)

    print('tokenizing train data')
    tokenizer = AutoTokenizer.from_pretrained(base_model_elec)
    num_added_toks = tokenizer.add_special_tokens(special_tokens_dict)
    print('We have added', num_added_toks, 'tokens')
    entity_property_train_data, _ = get_dataset(train_data, tokenizer, max_len_elec)
    entity_property_train_dataloader = DataLoader(entity_property_train_data, shuffle=True,
                                  batch_size=batch_size)

    print('loading model')
    entity_property_model = ElectraBaseClassifier_Cate_dr05(len(tf_id_to_name), len(tokenizer))

    # ====================================================================================================== #
    # 특정 epoch의 pt파일을 불러와서 이어서 학습할 때 사용 / torch.load("이 부분에 pt파일 경로 넣기")
    # entity_property_model.load_state_dict(torch.load("/content/drive/MyDrive/sample.pt"))
    entity_property_model.to(device)



    print('end loading')

    # entity_property_model_optimizer_setting
    FULL_FINETUNING = True
    if FULL_FINETUNING:
        entity_property_param_optimizer = list(entity_property_model.named_parameters())
        no_decay = ['bias', 'gamma', 'beta']
        entity_property_optimizer_grouped_parameters = [
            {'params': [p for n, p in entity_property_param_optimizer if not any(nd in n for nd in no_decay)],
             'weight_decay_rate': 0.01},
            {'params': [p for n, p in entity_property_param_optimizer if any(nd in n for nd in no_decay)],
             'weight_decay_rate': 0.0}
        ]
    else:
        entity_property_param_optimizer = list(entity_property_model.classifier.named_parameters())
        entity_property_optimizer_grouped_parameters = [{"params": [p for n, p in entity_property_param_optimizer]}]

    entity_property_optimizer = AdamW(
        entity_property_optimizer_grouped_parameters,
        lr=learning_rate,
        eps=eps
    )
    epochs = num_train_epochs
    max_grad_norm = 1.0
    total_steps = epochs * len(entity_property_train_dataloader)

    entity_property_scheduler = get_linear_schedule_with_warmup(
        entity_property_optimizer,
        num_warmup_steps=0,
        num_training_steps=total_steps
    )


    epoch_step = 0

    for _ in trange(epochs, desc="Epoch"):
        entity_property_model.train()
        epoch_step += 1
        print("epoch_step ==>", epoch_step)

        # entity_property train
        entity_property_total_loss = 0
        
        # train 데이터와 학습시키는 모델 이름 출력
        print(train_cate_data_path)
        print(base_model_elec)
        print("ElectraBaseClassifier_Cate_dr05")

        #====================================================================================================== #
        # step : 모델 학습 시킬때 학습진행과정 확인하기 위한 과정        
        for step, batch in enumerate(entity_property_train_dataloader):
            if step%1000==0:
                print(step, "/", len(entity_property_train_dataloader))
        
            batch = tuple(t.to(device) for t in batch)
            b_input_ids, b_input_mask, b_labels = batch

            entity_property_model.zero_grad()

            loss, _ = entity_property_model(b_input_ids, b_input_mask, b_labels)

            loss.backward()

            entity_property_total_loss += loss.item()

            torch.nn.utils.clip_grad_norm_(parameters=entity_property_model.parameters(), max_norm=max_grad_norm)
            entity_property_optimizer.step()
            entity_property_scheduler.step()

        avg_train_loss = entity_property_total_loss / len(entity_property_train_dataloader)
        print("Entity_Property_Epoch: ", epoch_step)
        print("Average train loss: {}".format(avg_train_loss))

        model_saved_path = category_extraction_model_path + 'elec_cate_dr05_epoch_' + str(epoch_step) + '.pt'
        torch.save(entity_property_model.state_dict(), model_saved_path)

    print("training is done")

#### layer + dropout 0.5 category

In [None]:
# layer + dropout 0.5 electra
def train_entity_analysis():

    print('train_entity_analysis')
    print('category_extraction model would be saved at ', category_extraction_model_path)

    print('loading train data')
    train_data = jsonlload(train_cate_data_path)

    print('tokenizing train data')
    tokenizer = AutoTokenizer.from_pretrained(base_model_elec)
    num_added_toks = tokenizer.add_special_tokens(special_tokens_dict)
    print('We have added', num_added_toks, 'tokens')
    entity_property_train_data, _ = get_dataset(train_data, tokenizer, max_len_elec)
    entity_property_train_dataloader = DataLoader(entity_property_train_data, shuffle=True,
                                  batch_size=batch_size)

    print('loading model')
    entity_property_model = ElectraBaseClassifier_Cate_Layer_Dropout05(len(tf_id_to_name), len(tokenizer))

    # ====================================================================================================== #
    # 특정 epoch의 pt파일을 불러와서 이어서 학습할 때 사용 / torch.load("이 부분에 pt파일 경로 넣기")
    # entity_property_model.load_state_dict(torch.load("/content/drive/MyDrive/sample.pt"))
    entity_property_model.to(device)



    print('end loading')

    # entity_property_model_optimizer_setting
    FULL_FINETUNING = True
    if FULL_FINETUNING:
        entity_property_param_optimizer = list(entity_property_model.named_parameters())
        no_decay = ['bias', 'gamma', 'beta']
        entity_property_optimizer_grouped_parameters = [
            {'params': [p for n, p in entity_property_param_optimizer if not any(nd in n for nd in no_decay)],
             'weight_decay_rate': 0.01},
            {'params': [p for n, p in entity_property_param_optimizer if any(nd in n for nd in no_decay)],
             'weight_decay_rate': 0.0}
        ]
    else:
        entity_property_param_optimizer = list(entity_property_model.classifier.named_parameters())
        entity_property_optimizer_grouped_parameters = [{"params": [p for n, p in entity_property_param_optimizer]}]

    entity_property_optimizer = AdamW(
        entity_property_optimizer_grouped_parameters,
        lr=learning_rate,
        eps=eps
    )
    epochs = num_train_epochs
    max_grad_norm = 1.0
    total_steps = epochs * len(entity_property_train_dataloader)

    entity_property_scheduler = get_linear_schedule_with_warmup(
        entity_property_optimizer,
        num_warmup_steps=0,
        num_training_steps=total_steps
    )


    epoch_step = 0

    for _ in trange(epochs, desc="Epoch"):
        entity_property_model.train()
        epoch_step += 1
        print("epoch_step ==>", epoch_step)

        # entity_property train
        entity_property_total_loss = 0
        
        # train 데이터와 학습시키는 모델 이름 출력
        print(train_cate_data_path)
        print(base_model_elec)
        print("ElectraBaseClassifier_Cate_Layer_Dropout05")

        #====================================================================================================== #
        # step : 모델 학습 시킬때 학습진행과정 확인하기 위한 과정  
        
        for step, batch in enumerate(entity_property_train_dataloader):
            if step%1000==0:
                print(step, "/", len(entity_property_train_dataloader))
        
            batch = tuple(t.to(device) for t in batch)
            b_input_ids, b_input_mask, b_labels = batch

            entity_property_model.zero_grad()

            loss, _ = entity_property_model(b_input_ids, b_input_mask, b_labels)

            loss.backward()

            entity_property_total_loss += loss.item()

            torch.nn.utils.clip_grad_norm_(parameters=entity_property_model.parameters(), max_norm=max_grad_norm)
            entity_property_optimizer.step()
            entity_property_scheduler.step()

        avg_train_loss = entity_property_total_loss / len(entity_property_train_dataloader)
        print("Entity_Property_Epoch: ", epoch_step)
        print("Average train loss: {}".format(avg_train_loss))

        model_saved_path = category_extraction_model_path + 'elec_cate_layer_dr05_epoch_' + str(epoch_step) + '.pt'
        torch.save(entity_property_model.state_dict(), model_saved_path)

    print("training is done")

#### hidden_size_up category

In [None]:
# hidden_size_up electra
def train_entity_analysis():

    print('train_entity_analysis')
    print('category_extraction model would be saved at ', category_extraction_model_path)

    print('loading train data')
    train_data = jsonlload(train_cate_data_path)

    print('tokenizing train data')
    tokenizer = AutoTokenizer.from_pretrained(base_model_elec)
    num_added_toks = tokenizer.add_special_tokens(special_tokens_dict)
    print('We have added', num_added_toks, 'tokens')
    entity_property_train_data, _ = get_dataset(train_data, tokenizer, max_len_elec)
    entity_property_train_dataloader = DataLoader(entity_property_train_data, shuffle=True,
                                  batch_size=batch_size)

    print('loading model')
    entity_property_model = ElectraBaseClassifier_Cate_hiddenup(len(tf_id_to_name), len(tokenizer))

    # ====================================================================================================== #
    # 특정 epoch의 pt파일을 불러와서 이어서 학습할 때 사용 / torch.load("이 부분에 pt파일 경로 넣기")
    # entity_property_model.load_state_dict(torch.load("/content/drive/MyDrive/sample.pt"))
    entity_property_model.to(device)



    print('end loading')

    # entity_property_model_optimizer_setting
    FULL_FINETUNING = True
    if FULL_FINETUNING:
        entity_property_param_optimizer = list(entity_property_model.named_parameters())
        no_decay = ['bias', 'gamma', 'beta']
        entity_property_optimizer_grouped_parameters = [
            {'params': [p for n, p in entity_property_param_optimizer if not any(nd in n for nd in no_decay)],
             'weight_decay_rate': 0.01},
            {'params': [p for n, p in entity_property_param_optimizer if any(nd in n for nd in no_decay)],
             'weight_decay_rate': 0.0}
        ]
    else:
        entity_property_param_optimizer = list(entity_property_model.classifier.named_parameters())
        entity_property_optimizer_grouped_parameters = [{"params": [p for n, p in entity_property_param_optimizer]}]

    entity_property_optimizer = AdamW(
        entity_property_optimizer_grouped_parameters,
        lr=learning_rate,
        eps=eps
    )
    epochs = num_train_epochs
    max_grad_norm = 1.0
    total_steps = epochs * len(entity_property_train_dataloader)

    entity_property_scheduler = get_linear_schedule_with_warmup(
        entity_property_optimizer,
        num_warmup_steps=0,
        num_training_steps=total_steps
    )


    epoch_step = 0

    for _ in trange(epochs, desc="Epoch"):
        entity_property_model.train()
        epoch_step += 1
        print("epoch_step ==>", epoch_step)

        # entity_property train
        entity_property_total_loss = 0
        
        # train 데이터와 학습시키는 모델 이름 출력
        print(train_cate_data_path)
        print(base_model_elec)
        print("ElectraBaseClassifier_Cate_hiddenup")

        #====================================================================================================== #
        # step : 모델 학습 시킬때 학습진행과정 확인하기 위한 과정  
        
        for step, batch in enumerate(entity_property_train_dataloader):
            if step%1000==0:
                print(step, "/", len(entity_property_train_dataloader))
        
            batch = tuple(t.to(device) for t in batch)
            b_input_ids, b_input_mask, b_labels = batch

            entity_property_model.zero_grad()

            loss, _ = entity_property_model(b_input_ids, b_input_mask, b_labels)

            loss.backward()

            entity_property_total_loss += loss.item()

            torch.nn.utils.clip_grad_norm_(parameters=entity_property_model.parameters(), max_norm=max_grad_norm)
            entity_property_optimizer.step()
            entity_property_scheduler.step()

        avg_train_loss = entity_property_total_loss / len(entity_property_train_dataloader)
        print("Entity_Property_Epoch: ", epoch_step)
        print("Average train loss: {}".format(avg_train_loss))

        model_saved_path = category_extraction_model_path + 'elec_cate_hiddenup_epoch_' + str(epoch_step) + '.pt'
        torch.save(entity_property_model.state_dict(), model_saved_path)

    print("training is done")

#### hidden_size down category

In [None]:
# hidden_size_down
def train_entity_analysis():

    print('train_entity_analysis')
    print('category_extraction model would be saved at ', category_extraction_model_path)

    print('loading train data')
    train_data = jsonlload(train_cate_data_path)

    print('tokenizing train data')
    tokenizer = AutoTokenizer.from_pretrained(base_model_elec)
    num_added_toks = tokenizer.add_special_tokens(special_tokens_dict)
    print('We have added', num_added_toks, 'tokens')
    entity_property_train_data, _ = get_dataset(train_data, tokenizer, max_len_elec)
    entity_property_train_dataloader = DataLoader(entity_property_train_data, shuffle=True,
                                  batch_size=batch_size)

    print('loading model')
    entity_property_model = ElectraBaseClassifier_Cate_hiddendown(len(tf_id_to_name), len(tokenizer))

    # ====================================================================================================== #
    # 특정 epoch의 pt파일을 불러와서 이어서 학습할 때 사용 / torch.load("이 부분에 pt파일 경로 넣기")
    # entity_property_model.load_state_dict(torch.load("/content/drive/MyDrive/sample.pt"))
    entity_property_model.to(device)



    print('end loading')

    # entity_property_model_optimizer_setting
    FULL_FINETUNING = True
    if FULL_FINETUNING:
        entity_property_param_optimizer = list(entity_property_model.named_parameters())
        no_decay = ['bias', 'gamma', 'beta']
        entity_property_optimizer_grouped_parameters = [
            {'params': [p for n, p in entity_property_param_optimizer if not any(nd in n for nd in no_decay)],
             'weight_decay_rate': 0.01},
            {'params': [p for n, p in entity_property_param_optimizer if any(nd in n for nd in no_decay)],
             'weight_decay_rate': 0.0}
        ]
    else:
        entity_property_param_optimizer = list(entity_property_model.classifier.named_parameters())
        entity_property_optimizer_grouped_parameters = [{"params": [p for n, p in entity_property_param_optimizer]}]

    entity_property_optimizer = AdamW(
        entity_property_optimizer_grouped_parameters,
        lr=learning_rate,
        eps=eps
    )
    epochs = num_train_epochs
    max_grad_norm = 1.0
    total_steps = epochs * len(entity_property_train_dataloader)

    entity_property_scheduler = get_linear_schedule_with_warmup(
        entity_property_optimizer,
        num_warmup_steps=0,
        num_training_steps=total_steps
    )


    epoch_step = 0

    for _ in trange(epochs, desc="Epoch"):
        entity_property_model.train()
        epoch_step += 1
        print("epoch_step ==>", epoch_step)

        # entity_property train
        entity_property_total_loss = 0
        
        # train 데이터와 학습시키는 모델 이름 출력
        print(train_cate_data_path)
        print(base_model_elec)
        print("ElectraBaseClassifier_Cate_hiddendown")

        #====================================================================================================== #
        # step : 모델 학습 시킬때 학습진행과정 확인하기 위한 과정  
        
        for step, batch in enumerate(entity_property_train_dataloader):
            if step%1000==0:
                print(step, "/", len(entity_property_train_dataloader))
        
            batch = tuple(t.to(device) for t in batch)
            b_input_ids, b_input_mask, b_labels = batch

            entity_property_model.zero_grad()

            loss, _ = entity_property_model(b_input_ids, b_input_mask, b_labels)

            loss.backward()

            entity_property_total_loss += loss.item()

            torch.nn.utils.clip_grad_norm_(parameters=entity_property_model.parameters(), max_norm=max_grad_norm)
            entity_property_optimizer.step()
            entity_property_scheduler.step()

        avg_train_loss = entity_property_total_loss / len(entity_property_train_dataloader)
        print("Entity_Property_Epoch: ", epoch_step)
        print("Average train loss: {}".format(avg_train_loss))

        model_saved_path = category_extraction_model_path + 'elec_cate_hiddendown_epoch_' + str(epoch_step) + '.pt'
        torch.save(entity_property_model.state_dict(), model_saved_path)

    print("training is done")

#### hidden_size_down + dropout 0.5 category

In [None]:
# hidden_size_down + dropout 0.5
def train_entity_analysis():

    print('train_entity_analysis')
    print('category_extraction model would be saved at ', category_extraction_model_path)

    print('loading train data')
    train_data = jsonlload(train_cate_data_path)

    print('tokenizing train data')
    tokenizer = AutoTokenizer.from_pretrained(base_model_elec)
    num_added_toks = tokenizer.add_special_tokens(special_tokens_dict)
    print('We have added', num_added_toks, 'tokens')
    entity_property_train_data, _ = get_dataset(train_data, tokenizer, max_len_elec)
    entity_property_train_dataloader = DataLoader(entity_property_train_data, shuffle=True,
                                  batch_size=batch_size)

    print('loading model')
    entity_property_model = ElectraBaseClassifier_Cate_hiddendown_dr05(len(tf_id_to_name), len(tokenizer))

    # ====================================================================================================== #
    # 특정 epoch의 pt파일을 불러와서 이어서 학습할 때 사용 / torch.load("이 부분에 pt파일 경로 넣기")
    # entity_property_model.load_state_dict(torch.load("/content/drive/MyDrive/sample.pt"))
    entity_property_model.to(device)



    print('end loading')

    # entity_property_model_optimizer_setting
    FULL_FINETUNING = True
    if FULL_FINETUNING:
        entity_property_param_optimizer = list(entity_property_model.named_parameters())
        no_decay = ['bias', 'gamma', 'beta']
        entity_property_optimizer_grouped_parameters = [
            {'params': [p for n, p in entity_property_param_optimizer if not any(nd in n for nd in no_decay)],
             'weight_decay_rate': 0.01},
            {'params': [p for n, p in entity_property_param_optimizer if any(nd in n for nd in no_decay)],
             'weight_decay_rate': 0.0}
        ]
    else:
        entity_property_param_optimizer = list(entity_property_model.classifier.named_parameters())
        entity_property_optimizer_grouped_parameters = [{"params": [p for n, p in entity_property_param_optimizer]}]

    entity_property_optimizer = AdamW(
        entity_property_optimizer_grouped_parameters,
        lr=learning_rate,
        eps=eps
    )
    epochs = num_train_epochs
    max_grad_norm = 1.0
    total_steps = epochs * len(entity_property_train_dataloader)

    entity_property_scheduler = get_linear_schedule_with_warmup(
        entity_property_optimizer,
        num_warmup_steps=0,
        num_training_steps=total_steps
    )


    epoch_step = 0

    for _ in trange(epochs, desc="Epoch"):
        entity_property_model.train()
        epoch_step += 1
        print("epoch_step ==>", epoch_step)

        # entity_property train
        entity_property_total_loss = 0
        
        # train 데이터와 학습시키는 모델 이름 출력
        print(train_cate_data_path)
        print(base_model_elec)
        print("ElectraBaseClassifier_Cate_hiddendown_dr05")

        #====================================================================================================== #
        # step : 모델 학습 시킬때 학습진행과정 확인하기 위한 과정  
        
        for step, batch in enumerate(entity_property_train_dataloader):
            if step%1000==0:
                print(step, "/", len(entity_property_train_dataloader))
        
            batch = tuple(t.to(device) for t in batch)
            b_input_ids, b_input_mask, b_labels = batch

            entity_property_model.zero_grad()

            loss, _ = entity_property_model(b_input_ids, b_input_mask, b_labels)

            loss.backward()

            entity_property_total_loss += loss.item()

            torch.nn.utils.clip_grad_norm_(parameters=entity_property_model.parameters(), max_norm=max_grad_norm)
            entity_property_optimizer.step()
            entity_property_scheduler.step()

        avg_train_loss = entity_property_total_loss / len(entity_property_train_dataloader)
        print("Entity_Property_Epoch: ", epoch_step)
        print("Average train loss: {}".format(avg_train_loss))

        model_saved_path = category_extraction_model_path + 'elec_cate_hiddendown_dr05_epoch_' + str(epoch_step) + '.pt'
        torch.save(entity_property_model.state_dict(), model_saved_path)

    print("training is done")

### RoBERTa category

In [None]:
# base roberta
def train_entity_analysis():

    print('train_entity_analysis')
    print('category_extraction model would be saved at ', category_extraction_model_path)

    print('loading train data')
    train_data = jsonlload(train_cate_data_path)

    print('tokenizing train data')
    tokenizer = AutoTokenizer.from_pretrained(base_model_roberta)
    num_added_toks = tokenizer.add_special_tokens(special_tokens_dict)
    print('We have added', num_added_toks, 'tokens')
    entity_property_train_data, _ = get_dataset(train_data, tokenizer, max_len_robe)
    entity_property_train_dataloader = DataLoader(entity_property_train_data, shuffle=True,
                                  batch_size=batch_size)

    print('loading model')
    entity_property_model = RobertaBaseClassifier(len(tf_id_to_name), len(tokenizer))

    # ====================================================================================================== #
    # 특정 epoch의 pt파일을 불러와서 이어서 학습할 때 사용 / torch.load("이 부분에 pt파일 경로 넣기")
    # entity_property_model.load_state_dict(torch.load("/content/drive/MyDrive/sample.pt"))
    entity_property_model.to(device)



    print('end loading')

    # entity_property_model_optimizer_setting
    FULL_FINETUNING = True
    if FULL_FINETUNING:
        entity_property_param_optimizer = list(entity_property_model.named_parameters())
        no_decay = ['bias', 'gamma', 'beta']
        entity_property_optimizer_grouped_parameters = [
            {'params': [p for n, p in entity_property_param_optimizer if not any(nd in n for nd in no_decay)],
             'weight_decay_rate': 0.01},
            {'params': [p for n, p in entity_property_param_optimizer if any(nd in n for nd in no_decay)],
             'weight_decay_rate': 0.0}
        ]
    else:
        entity_property_param_optimizer = list(entity_property_model.classifier.named_parameters())
        entity_property_optimizer_grouped_parameters = [{"params": [p for n, p in entity_property_param_optimizer]}]

    entity_property_optimizer = AdamW(
        entity_property_optimizer_grouped_parameters,
        lr=learning_rate,
        eps=eps
    )
    epochs = num_train_epochs
    max_grad_norm = 1.0
    total_steps = epochs * len(entity_property_train_dataloader)

    entity_property_scheduler = get_linear_schedule_with_warmup(
        entity_property_optimizer,
        num_warmup_steps=0,
        num_training_steps=total_steps
    )


    epoch_step = 0

    for _ in trange(epochs, desc="Epoch"):
        entity_property_model.train()
        epoch_step += 1
        print("epoch_step ==>", epoch_step)

        # entity_property train
        entity_property_total_loss = 0
        
        # train 데이터와 학습시키는 모델 이름 출력
        print(train_cate_data_path)
        print(base_model_elec)
        print("RobertaBaseClassifier")

        #====================================================================================================== #
        # step : 모델 학습 시킬때 학습진행과정 확인하기 위한 과정  
        
        for step, batch in enumerate(entity_property_train_dataloader):
            if step%1000==0:
                print(step, "/", len(entity_property_train_dataloader))
        
            batch = tuple(t.to(device) for t in batch)
            b_input_ids, b_input_mask, b_labels = batch

            entity_property_model.zero_grad()

            loss, _ = entity_property_model(b_input_ids, b_input_mask, b_labels)

            loss.backward()

            entity_property_total_loss += loss.item()

            torch.nn.utils.clip_grad_norm_(parameters=entity_property_model.parameters(), max_norm=max_grad_norm)
            entity_property_optimizer.step()
            entity_property_scheduler.step()

        avg_train_loss = entity_property_total_loss / len(entity_property_train_dataloader)
        print("Entity_Property_Epoch: ", epoch_step)
        print("Average train loss: {}".format(avg_train_loss))

        model_saved_path = category_extraction_model_path + 'robe_cate_base_epoch_' + str(epoch_step) + '.pt'
        torch.save(entity_property_model.state_dict(), model_saved_path)

    print("training is done")

### DeBERTa category

In [None]:
# base deberta
def train_entity_analysis():

    print('train_entity_analysis')
    print('category_extraction model would be saved at ', category_extraction_model_path)

    print('loading train data')
    train_data = jsonlload(train_cate_data_path)

    print('tokenizing train data')
    tokenizer = AutoTokenizer.from_pretrained(base_model_deberta)
    num_added_toks = tokenizer.add_special_tokens(special_tokens_dict)
    print('We have added', num_added_toks, 'tokens')
    entity_property_train_data, _ = get_dataset(train_data, tokenizer, max_len_debe)
    entity_property_train_dataloader = DataLoader(entity_property_train_data, shuffle=True,
                                  batch_size=batch_size)

    print('loading model')
    entity_property_model = DebertaBaseClassifier(len(tf_id_to_name), len(tokenizer))

    # ====================================================================================================== #
    # 특정 epoch의 pt파일을 불러와서 이어서 학습할 때 사용 / torch.load("이 부분에 pt파일 경로 넣기")
    # entity_property_model.load_state_dict(torch.load("/content/drive/MyDrive/sample.pt"))
    entity_property_model.to(device)



    print('end loading')

    # entity_property_model_optimizer_setting
    FULL_FINETUNING = True
    if FULL_FINETUNING:
        entity_property_param_optimizer = list(entity_property_model.named_parameters())
        no_decay = ['bias', 'gamma', 'beta']
        entity_property_optimizer_grouped_parameters = [
            {'params': [p for n, p in entity_property_param_optimizer if not any(nd in n for nd in no_decay)],
             'weight_decay_rate': 0.01},
            {'params': [p for n, p in entity_property_param_optimizer if any(nd in n for nd in no_decay)],
             'weight_decay_rate': 0.0}
        ]
    else:
        entity_property_param_optimizer = list(entity_property_model.classifier.named_parameters())
        entity_property_optimizer_grouped_parameters = [{"params": [p for n, p in entity_property_param_optimizer]}]

    entity_property_optimizer = AdamW(
        entity_property_optimizer_grouped_parameters,
        lr=learning_rate,
        eps=eps
    )
    epochs = num_train_epochs
    max_grad_norm = 1.0
    total_steps = epochs * len(entity_property_train_dataloader)

    entity_property_scheduler = get_linear_schedule_with_warmup(
        entity_property_optimizer,
        num_warmup_steps=0,
        num_training_steps=total_steps
    )


    epoch_step = 0

    for _ in trange(epochs, desc="Epoch"):
        entity_property_model.train()
        epoch_step += 1
        print("epoch_step ==>", epoch_step)

        # entity_property train
        entity_property_total_loss = 0
        
        # train 데이터와 학습시키는 모델 이름 출력
        print(train_cate_data_path)
        print(base_model_elec)
        print("DebertaBaseClassifier")

        #====================================================================================================== #
        # step : 모델 학습 시킬때 학습진행과정 확인하기 위한 과정  
        
        for step, batch in enumerate(entity_property_train_dataloader):
            if step%1000==0:
                print(step, "/", len(entity_property_train_dataloader))
        
            batch = tuple(t.to(device) for t in batch)
            b_input_ids, b_input_mask, b_labels = batch

            entity_property_model.zero_grad()

            loss, _ = entity_property_model(b_input_ids, b_input_mask, b_labels)

            loss.backward()

            entity_property_total_loss += loss.item()

            torch.nn.utils.clip_grad_norm_(parameters=entity_property_model.parameters(), max_norm=max_grad_norm)
            entity_property_optimizer.step()
            entity_property_scheduler.step()

        avg_train_loss = entity_property_total_loss / len(entity_property_train_dataloader)
        print("Entity_Property_Epoch: ", epoch_step)
        print("Average train loss: {}".format(avg_train_loss))

        model_saved_path = category_extraction_model_path + 'debe_cate_base_epoch_' + str(epoch_step) + '.pt'
        torch.save(entity_property_model.state_dict(), model_saved_path)

    print("training is done")

## category 학습 시작

In [None]:
train_entity_analysis()

## polarity 학습

* 자신이 원하는 파라미터를 수정한 목차로 넘어가서 함수를 실행한다.
* **모든 함수 명이 같으니 실수 하지 않도록 주의하도록 하자.**
* 함수를 실행한 후, "pola 학습 시작" 목차로 넘어가서 train을 시작하면 된다! 

### ELECTRA model

#### base polarity

In [None]:
# base electra
def train_polarity_analysis() :

    # data의 부족으로 valid data를 만들지 않아 따로 learning_rate을 직접 핸들링함. 
    # global learning_rate 

    print('train_polarity_analysis')
    print('polarity model would be saved at ', polarity_classification_model_path)

    print('loading train data')
    train_data = jsonlload(train_pola_data_path)

    print('tokenizing train data')
    tokenizer = AutoTokenizer.from_pretrained(base_model_elec)
    num_added_toks = tokenizer.add_special_tokens(special_tokens_dict)
    print('We have added', num_added_toks, 'tokens')
    _, polarity_train_data = get_dataset(train_data, tokenizer, max_len_elec)
    polarity_train_dataloader = DataLoader(polarity_train_data, shuffle=True,
                                                  batch_size=batch_size)
    
    print('loading model')
    polarity_model = ElectraBaseClassifier_Pola_Base(len(polarity_id_to_name), len(tokenizer))

    # ====================================================================================================== #
    # 특정 epoch의 pt파일을 불러와서 이어서 학습할 때 사용 / torch.load("이 부분에 pt파일 경로 넣기")
    # polarity_model.load_state_dict(torch.load("/content/drive/MyDrive/sample.pt"))
    polarity_model.to(device)


    print('end loading')

    # polarity_model_optimizer_setting
    FULL_FINETUNING = True
    if FULL_FINETUNING:
        polarity_param_optimizer = list(polarity_model.named_parameters())
        no_decay = ['bias', 'gamma', 'beta']
        polarity_optimizer_grouped_parameters = [
            {'params': [p for n, p in polarity_param_optimizer if not any(nd in n for nd in no_decay)],
             'weight_decay_rate': 0.01},
            {'params': [p for n, p in polarity_param_optimizer if any(nd in n for nd in no_decay)],
             'weight_decay_rate': 0.0}
        ]
    else:
        polarity_param_optimizer = list(polarity_model.classifier.named_parameters())
        polarity_optimizer_grouped_parameters = [{"params": [p for n, p in polarity_param_optimizer]}]

    polarity_optimizer = AdamW(
        polarity_optimizer_grouped_parameters,
        lr=learning_rate,
        eps=eps
    )
    epochs = num_train_epochs
    max_grad_norm = 1.0
    total_steps = epochs * len(polarity_train_dataloader)

    polarity_scheduler = get_linear_schedule_with_warmup(
        polarity_optimizer,
        num_warmup_steps=0,
        num_training_steps=total_steps
    )

    epoch_step = 0

    for _ in trange(epochs, desc="Epoch"):
        polarity_model.train()
        epoch_step += 1
        print("epoch_step ==>", epoch_step)

        # polarity train
        polarity_total_loss = 0
        
        # train 데이터와 학습시키는 모델 이름 출력
        print(train_cate_data_path)
        print(base_model_elec)
        print("ElectraBaseClassifier_Pola_Base")
        
        #====================================================================================================== #
        # step : 모델 학습 시킬때 학습진행과정 확인하기 위한 과정  

        for step, batch in enumerate(polarity_train_dataloader):
            if step%1000==0:
                print(step, "/", len(polarity_train_dataloader))
                
            # learning_rate를 직접 핸들링 할 때 주석 해제 후 사용
            # learning_rate = learning_rate * 0.1
            
            batch = tuple(t.to(device) for t in batch)
            b_input_ids, b_input_mask, b_labels = batch

            polarity_model.zero_grad()

            loss, _ = polarity_model(b_input_ids, b_input_mask, b_labels)

            loss.backward()

            polarity_total_loss += loss.item()

            torch.nn.utils.clip_grad_norm_(parameters=polarity_model.parameters(), max_norm=max_grad_norm)
            polarity_optimizer.step()
            polarity_scheduler.step()

        avg_train_loss = polarity_total_loss / len(polarity_train_dataloader)
        print("Entity_Property_Epoch: ", epoch_step)
        print("Average train loss: {}".format(avg_train_loss))

        model_saved_path = polarity_classification_model_path + 'elec_pola_base_epoch_' + str(epoch_step) + '.pt'
        torch.save(polarity_model.state_dict(), model_saved_path)

    print("training is done")

#### only layer 추가 polarity

In [None]:
# layer electra
def train_polarity_analysis() :

    print('train_polarity_analysis')
    print('polarity model would be saved at ', polarity_classification_model_path)

    print('loading train data')
    train_data = jsonlload(train_pola_data_path)

    print('tokenizing train data')
    tokenizer = AutoTokenizer.from_pretrained(base_model_elec)
    num_added_toks = tokenizer.add_special_tokens(special_tokens_dict)
    print('We have added', num_added_toks, 'tokens')
    _, polarity_train_data = get_dataset(train_data, tokenizer, max_len_elec)
    polarity_train_dataloader = DataLoader(polarity_train_data, shuffle=True,
                                                  batch_size=batch_size)
    
    print('loading model')
    polarity_model = ElectraBaseClassifier_Pola_Layer(len(polarity_id_to_name), len(tokenizer))

    # ====================================================================================================== #
    # 특정 epoch의 pt파일을 불러와서 이어서 학습할 때 사용 / torch.load("이 부분에 pt파일 경로 넣기")
    # polarity_model.load_state_dict(torch.load("/content/drive/MyDrive/sample.pt"))
    polarity_model.to(device)


    print('end loading')

    # polarity_model_optimizer_setting
    FULL_FINETUNING = True
    if FULL_FINETUNING:
        polarity_param_optimizer = list(polarity_model.named_parameters())
        no_decay = ['bias', 'gamma', 'beta']
        polarity_optimizer_grouped_parameters = [
            {'params': [p for n, p in polarity_param_optimizer if not any(nd in n for nd in no_decay)],
             'weight_decay_rate': 0.01},
            {'params': [p for n, p in polarity_param_optimizer if any(nd in n for nd in no_decay)],
             'weight_decay_rate': 0.0}
        ]
    else:
        polarity_param_optimizer = list(polarity_model.classifier.named_parameters())
        polarity_optimizer_grouped_parameters = [{"params": [p for n, p in polarity_param_optimizer]}]

    polarity_optimizer = AdamW(
        polarity_optimizer_grouped_parameters,
        lr=learning_rate,
        eps=eps
    )
    epochs = num_train_epochs
    max_grad_norm = 1.0
    total_steps = epochs * len(polarity_train_dataloader)

    polarity_scheduler = get_linear_schedule_with_warmup(
        polarity_optimizer,
        num_warmup_steps=0,
        num_training_steps=total_steps
    )

    epoch_step = 0

    for _ in trange(epochs, desc="Epoch"):
        polarity_model.train()
        epoch_step += 1
        print("epoch_step ==>", epoch_step)

        # polarity train
        polarity_total_loss = 0
        
        # train 데이터와 학습시키는 모델 이름 출력
        print(train_cate_data_path)
        print(base_model_elec)
        print("ElectraBaseClassifier_Pola_Layer")
        
        #====================================================================================================== #
        # step : 모델 학습 시킬때 학습진행과정 확인하기 위한 과정  
        
        for step, batch in enumerate(polarity_train_dataloader):
            if step%1000==0:
                print(step, "/", len(polarity_train_dataloader))

            batch = tuple(t.to(device) for t in batch)
            b_input_ids, b_input_mask, b_labels = batch

            polarity_model.zero_grad()

            loss, _ = polarity_model(b_input_ids, b_input_mask, b_labels)

            loss.backward()

            polarity_total_loss += loss.item()

            torch.nn.utils.clip_grad_norm_(parameters=polarity_model.parameters(), max_norm=max_grad_norm)
            polarity_optimizer.step()
            polarity_scheduler.step()

        avg_train_loss = polarity_total_loss / len(polarity_train_dataloader)
        print("Entity_Property_Epoch: ", epoch_step)
        print("Average train loss: {}".format(avg_train_loss))

        model_saved_path = polarity_classification_model_path + 'elec_pola_layer_epoch_' + str(epoch_step) + '.pt'
        torch.save(polarity_model.state_dict(), model_saved_path)

    print("training is done")

#### dropout 0.5 polarity

In [None]:
# dropout 0.5 electra
def train_polarity_analysis() :

    print('train_polarity_analysis')
    print('polarity model would be saved at ', polarity_classification_model_path)

    print('loading train data')
    train_data = jsonlload(train_pola_data_path)

    print('tokenizing train data')
    tokenizer = AutoTokenizer.from_pretrained(base_model_elec)
    num_added_toks = tokenizer.add_special_tokens(special_tokens_dict)
    print('We have added', num_added_toks, 'tokens')
    _, polarity_train_data = get_dataset(train_data, tokenizer, max_len_elec)
    polarity_train_dataloader = DataLoader(polarity_train_data, shuffle=True,
                                                  batch_size=batch_size)
    
    print('loading model')
    polarity_model = ElectraBaseClassifier_Pola_dr05(len(polarity_id_to_name), len(tokenizer))

    # ====================================================================================================== #
    # 특정 epoch의 pt파일을 불러와서 이어서 학습할 때 사용 / torch.load("이 부분에 pt파일 경로 넣기")
    # polarity_model.load_state_dict(torch.load("/content/drive/MyDrive/sample.pt"))
    polarity_model.to(device)


    print('end loading')

    # polarity_model_optimizer_setting
    FULL_FINETUNING = True
    if FULL_FINETUNING:
        polarity_param_optimizer = list(polarity_model.named_parameters())
        no_decay = ['bias', 'gamma', 'beta']
        polarity_optimizer_grouped_parameters = [
            {'params': [p for n, p in polarity_param_optimizer if not any(nd in n for nd in no_decay)],
             'weight_decay_rate': 0.01},
            {'params': [p for n, p in polarity_param_optimizer if any(nd in n for nd in no_decay)],
             'weight_decay_rate': 0.0}
        ]
    else:
        polarity_param_optimizer = list(polarity_model.classifier.named_parameters())
        polarity_optimizer_grouped_parameters = [{"params": [p for n, p in polarity_param_optimizer]}]

    polarity_optimizer = AdamW(
        polarity_optimizer_grouped_parameters,
        lr=learning_rate,
        eps=eps
    )
    epochs = num_train_epochs
    max_grad_norm = 1.0
    total_steps = epochs * len(polarity_train_dataloader)

    polarity_scheduler = get_linear_schedule_with_warmup(
        polarity_optimizer,
        num_warmup_steps=0,
        num_training_steps=total_steps
    )

    epoch_step = 0

    for _ in trange(epochs, desc="Epoch"):
        polarity_model.train()
        epoch_step += 1
        print("epoch_step ==>", epoch_step)

        # polarity train
        polarity_total_loss = 0
        
        # train 데이터와 학습시키는 모델 이름 출력
        print(train_cate_data_path)
        print(base_model_elec)
        print("ElectraBaseClassifier_Pola_dr05")
        
        #====================================================================================================== #
        # step : 모델 학습 시킬때 학습진행과정 확인하기 위한 과정  

        for step, batch in enumerate(polarity_train_dataloader):
            if step%1000==0:
                print(step, "/", len(polarity_train_dataloader))

            batch = tuple(t.to(device) for t in batch)
            b_input_ids, b_input_mask, b_labels = batch

            polarity_model.zero_grad()

            loss, _ = polarity_model(b_input_ids, b_input_mask, b_labels)

            loss.backward()

            polarity_total_loss += loss.item()

            torch.nn.utils.clip_grad_norm_(parameters=polarity_model.parameters(), max_norm=max_grad_norm)
            polarity_optimizer.step()
            polarity_scheduler.step()

        avg_train_loss = polarity_total_loss / len(polarity_train_dataloader)
        print("Entity_Property_Epoch: ", epoch_step)
        print("Average train loss: {}".format(avg_train_loss))

        model_saved_path = polarity_classification_model_path + 'elec_pola_dr05_epoch_' + str(epoch_step) + '.pt'
        torch.save(polarity_model.state_dict(), model_saved_path)

    print("training is done")

#### layer + dropout 0.5 polarity

In [None]:
# layer + dropout 0.5 electra
def train_polarity_analysis() :

    print('train_polarity_analysis')
    print('polarity model would be saved at ', polarity_classification_model_path)

    print('loading train data')
    train_data = jsonlload(train_pola_data_path)

    print('tokenizing train data')
    tokenizer = AutoTokenizer.from_pretrained(base_model_elec)
    num_added_toks = tokenizer.add_special_tokens(special_tokens_dict)
    print('We have added', num_added_toks, 'tokens')
    _, polarity_train_data = get_dataset(train_data, tokenizer, max_len_elec)
    polarity_train_dataloader = DataLoader(polarity_train_data, shuffle=True,
                                                  batch_size=batch_size)
    
    print('loading model')
    polarity_model = ElectraBaseClassifier_Pola_Layer_Dropout05(len(polarity_id_to_name), len(tokenizer))

    # ====================================================================================================== #
    # 특정 epoch의 pt파일을 불러와서 이어서 학습할 때 사용 / torch.load("이 부분에 pt파일 경로 넣기")
    # polarity_model.load_state_dict(torch.load("/content/drive/MyDrive/sample.pt"))
    polarity_model.to(device)


    print('end loading')

    # polarity_model_optimizer_setting
    FULL_FINETUNING = True
    if FULL_FINETUNING:
        polarity_param_optimizer = list(polarity_model.named_parameters())
        no_decay = ['bias', 'gamma', 'beta']
        polarity_optimizer_grouped_parameters = [
            {'params': [p for n, p in polarity_param_optimizer if not any(nd in n for nd in no_decay)],
             'weight_decay_rate': 0.01},
            {'params': [p for n, p in polarity_param_optimizer if any(nd in n for nd in no_decay)],
             'weight_decay_rate': 0.0}
        ]
    else:
        polarity_param_optimizer = list(polarity_model.classifier.named_parameters())
        polarity_optimizer_grouped_parameters = [{"params": [p for n, p in polarity_param_optimizer]}]

    polarity_optimizer = AdamW(
        polarity_optimizer_grouped_parameters,
        lr=learning_rate,
        eps=eps
    )
    epochs = num_train_epochs
    max_grad_norm = 1.0
    total_steps = epochs * len(polarity_train_dataloader)

    polarity_scheduler = get_linear_schedule_with_warmup(
        polarity_optimizer,
        num_warmup_steps=0,
        num_training_steps=total_steps
    )

    epoch_step = 0

    for _ in trange(epochs, desc="Epoch"):
        polarity_model.train()
        epoch_step += 1
        print("epoch_step ==>", epoch_step)

        # polarity train
        polarity_total_loss = 0
        
        # train 데이터와 학습시키는 모델 이름 출력
        print(train_cate_data_path)
        print(base_model_elec)
        print("ElectraBaseClassifier_Pola_Layer_Dropout05")

        #====================================================================================================== #
        # step : 모델 학습 시킬때 학습진행과정 확인하기 위한 과정  
        
        for step, batch in enumerate(polarity_train_dataloader):
            if step%1000==0:
                print(step, "/", len(polarity_train_dataloader))

            batch = tuple(t.to(device) for t in batch)
            b_input_ids, b_input_mask, b_labels = batch

            polarity_model.zero_grad()

            loss, _ = polarity_model(b_input_ids, b_input_mask, b_labels)

            loss.backward()

            polarity_total_loss += loss.item()

            torch.nn.utils.clip_grad_norm_(parameters=polarity_model.parameters(), max_norm=max_grad_norm)
            polarity_optimizer.step()
            polarity_scheduler.step()

        avg_train_loss = polarity_total_loss / len(polarity_train_dataloader)
        print("Entity_Property_Epoch: ", epoch_step)
        print("Average train loss: {}".format(avg_train_loss))

        model_saved_path = polarity_classification_model_path + 'elec_pola_layer_dr05_epoch_' + str(epoch_step) + '.pt'
        torch.save(polarity_model.state_dict(), model_saved_path)

    print("training is done")

#### hidden_size_down polarity

In [None]:
# hidden_size_down electra
def train_polarity_analysis() :

    print('train_polarity_analysis')
    print('polarity model would be saved at ', polarity_classification_model_path)

    print('loading train data')
    train_data = jsonlload(train_pola_data_path)

    print('tokenizing train data')
    tokenizer = AutoTokenizer.from_pretrained(base_model_elec)
    num_added_toks = tokenizer.add_special_tokens(special_tokens_dict)
    print('We have added', num_added_toks, 'tokens')
    _, polarity_train_data = get_dataset(train_data, tokenizer, max_len_elec)
    polarity_train_dataloader = DataLoader(polarity_train_data, shuffle=True,
                                                  batch_size=batch_size)
    
    print('loading model')
    polarity_model = ElectraBaseClassifier_Pola_hiddendown(len(polarity_id_to_name), len(tokenizer))

    # ====================================================================================================== #
    # 특정 epoch의 pt파일을 불러와서 이어서 학습할 때 사용 / torch.load("이 부분에 pt파일 경로 넣기")
    # polarity_model.load_state_dict(torch.load("/content/drive/MyDrive/sample.pt"))
    polarity_model.to(device)


    print('end loading')

    # polarity_model_optimizer_setting
    FULL_FINETUNING = True
    if FULL_FINETUNING:
        polarity_param_optimizer = list(polarity_model.named_parameters())
        no_decay = ['bias', 'gamma', 'beta']
        polarity_optimizer_grouped_parameters = [
            {'params': [p for n, p in polarity_param_optimizer if not any(nd in n for nd in no_decay)],
             'weight_decay_rate': 0.01},
            {'params': [p for n, p in polarity_param_optimizer if any(nd in n for nd in no_decay)],
             'weight_decay_rate': 0.0}
        ]
    else:
        polarity_param_optimizer = list(polarity_model.classifier.named_parameters())
        polarity_optimizer_grouped_parameters = [{"params": [p for n, p in polarity_param_optimizer]}]

    polarity_optimizer = AdamW(
        polarity_optimizer_grouped_parameters,
        lr=learning_rate,
        eps=eps
    )
    epochs = num_train_epochs
    max_grad_norm = 1.0
    total_steps = epochs * len(polarity_train_dataloader)

    polarity_scheduler = get_linear_schedule_with_warmup(
        polarity_optimizer,
        num_warmup_steps=0,
        num_training_steps=total_steps
    )

    epoch_step = 0

    for _ in trange(epochs, desc="Epoch"):
        polarity_model.train()
        epoch_step += 1
        print("epoch_step ==>", epoch_step)

        # polarity train
        polarity_total_loss = 0
        
        # train 데이터와 학습시키는 모델 이름 출력
        print(train_cate_data_path)
        print(base_model_elec)
        print("ElectraBaseClassifier_Pola_hiddendown")

        #====================================================================================================== #
        # step : 모델 학습 시킬때 학습진행과정 확인하기 위한 과정  
        
        for step, batch in enumerate(polarity_train_dataloader):
            if step%1000==0:
                print(step, "/", len(polarity_train_dataloader))

            batch = tuple(t.to(device) for t in batch)
            b_input_ids, b_input_mask, b_labels = batch

            polarity_model.zero_grad()

            loss, _ = polarity_model(b_input_ids, b_input_mask, b_labels)

            loss.backward()

            polarity_total_loss += loss.item()

            torch.nn.utils.clip_grad_norm_(parameters=polarity_model.parameters(), max_norm=max_grad_norm)
            polarity_optimizer.step()
            polarity_scheduler.step()

        avg_train_loss = polarity_total_loss / len(polarity_train_dataloader)
        print("Entity_Property_Epoch: ", epoch_step)
        print("Average train loss: {}".format(avg_train_loss))

        model_saved_path = polarity_classification_model_path + 'elec_pola_hiddendown_epoch_' + str(epoch_step) + '.pt'
        torch.save(polarity_model.state_dict(), model_saved_path)

    print("training is done")

#### hidden_size_down + dropout 0.5 polarity

In [None]:
# hidden_size_down + dropout 0.5 electra
def train_polarity_analysis() :

    print('train_polarity_analysis')
    print('polarity model would be saved at ', polarity_classification_model_path)

    print('loading train data')
    train_data = jsonlload(train_pola_data_path)

    print('tokenizing train data')
    tokenizer = AutoTokenizer.from_pretrained(base_model_elec)
    num_added_toks = tokenizer.add_special_tokens(special_tokens_dict)
    print('We have added', num_added_toks, 'tokens')
    _, polarity_train_data = get_dataset(train_data, tokenizer, max_len_elec)
    polarity_train_dataloader = DataLoader(polarity_train_data, shuffle=True,
                                                  batch_size=batch_size)
    
    print('loading model')
    polarity_model = ElectraBaseClassifier_Pola_hiddendown_dr05(len(polarity_id_to_name), len(tokenizer))

    # ====================================================================================================== #
    # 특정 epoch의 pt파일을 불러와서 이어서 학습할 때 사용 / torch.load("이 부분에 pt파일 경로 넣기")
    # polarity_model.load_state_dict(torch.load("/content/drive/MyDrive/sample.pt"))
    polarity_model.to(device)


    print('end loading')

    # polarity_model_optimizer_setting
    FULL_FINETUNING = True
    if FULL_FINETUNING:
        polarity_param_optimizer = list(polarity_model.named_parameters())
        no_decay = ['bias', 'gamma', 'beta']
        polarity_optimizer_grouped_parameters = [
            {'params': [p for n, p in polarity_param_optimizer if not any(nd in n for nd in no_decay)],
             'weight_decay_rate': 0.01},
            {'params': [p for n, p in polarity_param_optimizer if any(nd in n for nd in no_decay)],
             'weight_decay_rate': 0.0}
        ]
    else:
        polarity_param_optimizer = list(polarity_model.classifier.named_parameters())
        polarity_optimizer_grouped_parameters = [{"params": [p for n, p in polarity_param_optimizer]}]

    polarity_optimizer = AdamW(
        polarity_optimizer_grouped_parameters,
        lr=learning_rate,
        eps=eps
    )
    epochs = num_train_epochs
    max_grad_norm = 1.0
    total_steps = epochs * len(polarity_train_dataloader)

    polarity_scheduler = get_linear_schedule_with_warmup(
        polarity_optimizer,
        num_warmup_steps=0,
        num_training_steps=total_steps
    )

    epoch_step = 0

    for _ in trange(epochs, desc="Epoch"):
        polarity_model.train()
        epoch_step += 1
        print("epoch_step ==>", epoch_step)

        # polarity train
        polarity_total_loss = 0
        
        # train 데이터와 학습시키는 모델 이름 출력
        print(train_cate_data_path)
        print(base_model_elec)
        print("ElectraBaseClassifier_Pola_hiddendown_dr05")
        

        #====================================================================================================== #
        # step : 모델 학습 시킬때 학습진행과정 확인하기 위한 과정  
        for step, batch in enumerate(polarity_train_dataloader):
            if step%1000==0:
                print(step, "/", len(polarity_train_dataloader))

            batch = tuple(t.to(device) for t in batch)
            b_input_ids, b_input_mask, b_labels = batch

            polarity_model.zero_grad()

            loss, _ = polarity_model(b_input_ids, b_input_mask, b_labels)

            loss.backward()

            polarity_total_loss += loss.item()

            torch.nn.utils.clip_grad_norm_(parameters=polarity_model.parameters(), max_norm=max_grad_norm)
            polarity_optimizer.step()
            polarity_scheduler.step()

        avg_train_loss = polarity_total_loss / len(polarity_train_dataloader)
        print("Entity_Property_Epoch: ", epoch_step)
        print("Average train loss: {}".format(avg_train_loss))

        model_saved_path = polarity_classification_model_path + 'elec_pola_hiddendown_dr05_epoch_' + str(epoch_step) + '.pt'
        torch.save(polarity_model.state_dict(), model_saved_path)

    print("training is done")

## polarity 학습 시작

In [None]:
train_polarity_analysis()