<a href="https://colab.research.google.com/github/NoisyStudents/NoisyABSA/blob/main/NoisyABSA_example.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
try:
    import google.colab
    from google.colab import drive
    drive.mount('/content/drive', force_remount = True)
    IN_COLAB = True
except:
    IN_COLAB = False

In [None]:
if IN_COLAB:
  !pip install transformers==4.28.0
  !pip install datasets
  !pip install evaluate
  !pip install sentencepiece

In [None]:
import os
import torch

if IN_COLAB:
    root_path = 'Enter colab path'
else:
    root_path = 'Enter local path'

use_mps = True if torch.has_mps else False
os.chdir(root_path)

In [None]:
import warnings
warnings.filterwarnings('ignore')
import pandas as pd

from InstructABSA.data_prep import DatasetLoader
from InstructABSA.utils import T5Generator
from instructions import InstructionsHandler

데이터셋 가져오기

In [None]:
# for train
labeled_dataset = ['preped_Laptop_train.csv']
unlabeled_dataset = ['Laptop_unlabeled.csv'] # raw_text 만 존재
# for test
labeled_to_test = ['preped_Restaurant_trial.csv']
# 입력 예시: labeled_dataset = ['a.csv', 'b.csv', 'c.csv']

data_path = root_path+'/Dataset/'

# 리스트 컴프리헨션을 사용하여 파일들을 읽어와서 concat 수행
df_labeled = pd.concat([pd.read_csv(data_path+file) for file in labeled_dataset])
df_unlabeled = pd.concat([pd.read_csv(data_path+file) for file in unlabeled_dataset])
df_test = pd.concat([pd.read_csv(data_path+file) for file in labeled_to_test])
df_unlabeled.rename(columns={'Sentence':'raw_text'}, inplace=True)

# df_labeled: DataFrame : raw_text, aspectTerms([{'term': 'cord', 'polarity': 'neutral'}])
# df_test: 위와 동일
# df_unlabeled : DataFrame: raw_text 만 존재

print('df_labeled Form')
print(df_labeled.shape)
print('df_unlabeled Form')
print(df_unlabeled.shape)
print('df_test Form')
print(df_test.shape)

##!! Checkpoint 이름 설정

In [None]:
task_name = 'joint_task_FinalExperiment'
experiment_name = 'CrossDomain_iteration_Rest_Default' # 실험별로 구분할 수 있도록 이름 설정

model_checkpoint = 'allenai/tk-instruct-base-def-pos'
print('Experiment Name: ', experiment_name)
model_out_path = './Models'
model_out_path = os.path.join(model_out_path, task_name, f"{model_checkpoint.replace('/', '')}-{experiment_name}")
print('Model output path: ', model_out_path)

#### 여기서부터 수행

In [None]:
def convert_labels_to_aspect_terms(labels):
    aspect_terms = []
    label_pairs = labels.split(', ')

    for pair in label_pairs:
        pair_split = pair.split(':')
        term = pair_split[0]
        polarity = ':'.join(pair_split[1:])
        aspect_term = {'term': term, 'polarity': polarity}
        aspect_terms.append(aspect_term)

    return aspect_terms

### Iteration 1

In [None]:
use_mps_ = True if torch.has_mps else False
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Get the input text into the required format using Instructions
instruct_handler = InstructionsHandler()
# Set instruction_set1 for InstructABSA-1 and instruction_set2 for InstructABSA-2
instruct_handler.load_instruction_set2()


############################################### Vanilla Model ##############################################
# Vanilla model 선언
t5_exp = T5Generator(model_checkpoint) # tk-instruct

# InstructABSA 입력 형태로 전환
# Set bos_instruct1 for lapt14 and bos_instruct2 for rest14. For other datasets, modify the insructions.py file.
loader = DatasetLoader(df_labeled, df_test)
if loader.train_df_id is not None:
    loader.train_df_id = loader.create_data_in_joint_task_format(loader.train_df_id, 'term', 'polarity', 'raw_text', 'aspectTerms', instruct_handler.joint['bos_instruct3'], instruct_handler.joint['eos_instruct'])
if loader.test_df_id is not None:
    loader.test_df_id = loader.create_data_in_joint_task_format(loader.test_df_id, 'term', 'polarity', 'raw_text', 'aspectTerms', instruct_handler.joint['bos_instruct3'], instruct_handler.joint['eos_instruct'])

# 뒤에서 사용하기 위해 loader.train_df_id 를 다시 저장해놓기
train_data_save = loader.train_df_id.copy()

# Tokenize Dataset
id_ds, id_tokenized_ds, ood_ds, ood_tokenized_ds = loader.set_data_for_training_semeval(t5_exp.tokenize_function_inputs)

# 모델의 매개변수를 디바이스로 이동
t5_exp.model = t5_exp.model.to(device)

# Vanilla model 성능 평가
# Get prediction labels - Training set
id_tr_pred_labels = t5_exp.get_labels(tokenized_dataset = id_tokenized_ds, sample_set = 'train', batch_size = 16)
id_tr_labels = [i.strip() for i in id_ds['train']['labels']]
# Get prediction labels - Testing set
id_te_pred_labels = t5_exp.get_labels(tokenized_dataset = id_tokenized_ds, sample_set = 'test', batch_size = 16)
id_te_labels = [i.strip() for i in id_ds['test']['labels']]

print('First iteration train, test metrics for Vanilla model')
print('when equal terms')
p, r, f1, _ = t5_exp.get_metrics_eq(id_tr_labels, id_tr_pred_labels)
print('Train Precision: ', p)
print('Train Recall: ', r)
print('Train F1: ', f1)
p, r, f1, _ = t5_exp.get_metrics_eq(id_te_labels, id_te_pred_labels)
print('Test Precision: ', p)
print('Test Recall: ', r)
print('Test F1: ', f1)

print(' ')
print('when include terms')
p, r, f1, _ = t5_exp.get_metrics_in(id_tr_labels, id_tr_pred_labels)
print('Train Precision: ', p)
print('Train Recall: ', r)
print('Train F1: ', f1)
p, r, f1, _ = t5_exp.get_metrics_in(id_te_labels, id_te_pred_labels)
print('Test Precision: ', p)
print('Test Recall: ', r)
print('Test F1: ', f1)



In [None]:

############################################### Teacher Model ##############################################

# 첫번째 iteration, teacher model의 checkpoint 명 지정
model_out_path = os.path.join(model_out_path, 'joint_task', f"{model_checkpoint.replace('/', '')}-first_training")
print('First Trained Model output path: ', model_out_path)

# Training arguments
training_args = {
  'output_dir':model_out_path,
  'evaluation_strategy':"epoch",
  'learning_rate':5e-5,
  'lr_scheduler_type':'cosine',
  'per_device_train_batch_size':8,
  'per_device_eval_batch_size':16,
  'num_train_epochs':4,
  'weight_decay':0.01,
  'warmup_ratio':0.1,
  'save_strategy':'no',
  'load_best_model_at_end':False,
  'push_to_hub':False,
  'eval_accumulation_steps':1,
  'predict_with_generate':True,
  'use_mps_device':use_mps_
}

# teacher model 학습 -> 학습된 모델은 위 model_out_path에 저장
t5_trainer = t5_exp.train(id_tokenized_ds, **training_args)

# teacher inference
# teacher model 선언
t5_teacher = T5Generator(model_out_path)
id_ds, id_tokenized_ds, ood_ds, ood_tokenized_ds = loader.set_data_for_training_semeval(t5_teacher.tokenize_function_inputs)

# 모델의 매개변수를 디바이스로 이동
t5_teacher.model = t5_teacher.model.to(device)

# inference train, test dataset
# Get prediction labels - Training set
id_tr_pred_labels = t5_teacher.get_labels(tokenized_dataset = id_tokenized_ds, sample_set = 'train', batch_size = 16)
id_tr_labels = [i.strip() for i in id_ds['train']['labels']]
# Get prediction labels - Testing set
id_te_pred_labels = t5_teacher.get_labels(tokenized_dataset = id_tokenized_ds, sample_set = 'test', batch_size = 16)
id_te_labels = [i.strip() for i in id_ds['test']['labels']]

print('First iteration train, test metrics for teacher model')
print('when equal terms')
p, r, f1, _ = t5_teacher.get_metrics_eq(id_tr_labels, id_tr_pred_labels)
print('Train Precision: ', p)
print('Train Recall: ', r)
print('Train F1: ', f1)
p, r, f1, _ = t5_teacher.get_metrics_eq(id_te_labels, id_te_pred_labels)
print('Test Precision: ', p)
print('Test Recall: ', r)
print('Test F1: ', f1)

print(' ')
print('when include terms')
p, r, f1, _ = t5_teacher.get_metrics_in(id_tr_labels, id_tr_pred_labels)
print('Train Precision: ', p)
print('Train Recall: ', r)
print('Train F1: ', f1)
p, r, f1, _ = t5_teacher.get_metrics_in(id_te_labels, id_te_pred_labels)
print('Test Precision: ', p)
print('Test Recall: ', r)
print('Test F1: ', f1)


# 학습된 teacher 모델을 이용해 unlabeled dataset에 대한 inference 수행
#inference unlabeled dataset
tokenizer = t5_teacher.tokenizer
model = t5_teacher.model

# InstructABSA 입력 형태와 동일하게
df_unlabeled_inferenced = pd.DataFrame()
df_unlabeled_inferenced['raw_text'] = df_unlabeled['raw_text']
df_unlabeled_inferenced['text'] = input_text = instruct_handler.joint['bos_instruct3'] + df_unlabeled_inferenced['raw_text'] + '' + instruct_handler.joint['eos_instruct']
output_list = []
for text in df_unlabeled_inferenced['text']:
    tokenized_text = tokenizer(text,return_tensors="pt")
    tokenized_text.to(device)
    output = tokenizer.decode(model.generate(tokenized_text.input_ids)[0].to(device), skip_special_tokens=True)
    output_list.append(output)
df_unlabeled_inferenced['labels'] = output_list
df_unlabeled_inferenced['aspectTerms'] = df_unlabeled_inferenced['labels'].apply(convert_labels_to_aspect_terms)
df_unlabeled_inferenced['Unnamed: 0'] = df_unlabeled_inferenced.index

############################################### Student Model 1 ##############################################
# student training
# student trainig을 위해 df_labeled 와 df_unlabeled_inferenced를 concat하여 training_df 형성
loader.train_df_id = pd.concat([train_data_save, df_unlabeled_inferenced])

# student baseline 선언
t5_exp = T5Generator(model_checkpoint) # tk-instruct

# Tokenize Dataset
id_ds, id_tokenized_ds, ood_ds, ood_tokenized_ds = loader.set_data_for_training_semeval(t5_exp.tokenize_function_inputs)

# 모델의 매개변수를 디바이스로 이동
t5_exp.model = t5_exp.model.to(device)

model_out_path = os.path.join(model_out_path, 'joint_task', f"{model_checkpoint.replace('/', '')}-second_training")
print('Model output path: ', model_out_path)

# Training arguments
training_args = {
  'output_dir':model_out_path,
  'evaluation_strategy':"epoch",
  'learning_rate':5e-5,
  'lr_scheduler_type':'cosine',
  'per_device_train_batch_size':8,
  'per_device_eval_batch_size':16,
  'num_train_epochs':4,
  'weight_decay':0.01,
  'warmup_ratio':0.1,
  'save_strategy':'no',
  'load_best_model_at_end':False,
  'push_to_hub':False,
  'eval_accumulation_steps':1,
  'predict_with_generate':True,
  'use_mps_device':use_mps_
}

# student model 학습
t5_trainer = t5_exp.train(id_tokenized_ds, **training_args)

# student inference
t5_student = T5Generator(model_out_path)
id_ds, id_tokenized_ds, ood_ds, ood_tokenized_ds = loader.set_data_for_training_semeval(t5_student.tokenize_function_inputs)


# 모델의 매개변수를 디바이스로 이동
t5_student.model = t5_student.model.to(device)

# inference train, test dataset
# Get prediction labels - Training set

id_tr_pred_labels = t5_student.get_labels(tokenized_dataset = id_tokenized_ds, sample_set = 'train', batch_size = 16)
id_tr_labels = [i.strip() for i in id_ds['train']['labels']]
# Get prediction labels - Testing set
id_te_pred_labels = t5_student.get_labels(tokenized_dataset = id_tokenized_ds, sample_set = 'test', batch_size = 16)
id_te_labels = [i.strip() for i in id_ds['test']['labels']]

print('First iteration train, test metrics for student model')
print('when equal terms')
p, r, f1, _ = t5_student.get_metrics_eq(id_tr_labels, id_tr_pred_labels)
print('Train Precision: ', p)
print('Train Recall: ', r)
print('Train F1: ', f1)
p, r, f1, _ = t5_student.get_metrics_eq(id_te_labels, id_te_pred_labels)
print('Test Precision: ', p)
print('Test Recall: ', r)
print('Test F1: ', f1)

print(' ')
print('when include terms')
p, r, f1, _ = t5_student.get_metrics_in(id_tr_labels, id_tr_pred_labels)
print('Train Precision: ', p)
print('Train Recall: ', r)
print('Train F1: ', f1)
p, r, f1, _ = t5_student.get_metrics_in(id_te_labels, id_te_pred_labels)
print('Test Precision: ', p)
print('Test Recall: ', r)
print('Test F1: ', f1)



In [None]:
# student model1의 inference 수행

# 학습된 student 모델을 이용해 unlabeled dataset에 대한 inference 수행
#inference unlabeled dataset
tokenizer = t5_student.tokenizer
model = t5_student.model

# InstructABSA 입력 형태와 동일하게
df_unlabeled_inferenced = pd.DataFrame()
df_unlabeled_inferenced['raw_text'] = df_unlabeled['raw_text']
df_unlabeled_inferenced['text'] = input_text = instruct_handler.joint['bos_instruct3'] + df_unlabeled_inferenced['raw_text'] + '' + instruct_handler.joint['eos_instruct']
output_list = []
for text in df_unlabeled_inferenced['text']:
    tokenized_text = tokenizer(text,return_tensors="pt")
    tokenized_text.to(device)
    output = tokenizer.decode(model.generate(tokenized_text.input_ids)[0].to(device), skip_special_tokens=True)
    output_list.append(output)
df_unlabeled_inferenced['labels'] = output_list
df_unlabeled_inferenced['aspectTerms'] = df_unlabeled_inferenced['labels'].apply(convert_labels_to_aspect_terms)
df_unlabeled_inferenced['Unnamed: 0'] = df_unlabeled_inferenced.index



### Iteration 2

In [None]:
############################################### Student Model 2 ##############################################

# student2 trainig을 위해 df_labeled 와 student1에서 inference했던 df_unlabeled_inferenced를 concat하여 training_df 형성
loader.train_df_id = pd.concat([train_data_save, df_unlabeled_inferenced])

# 이때 앞 iteration에서 학습시켰던 student 모델을 가져옴
t5_exp = T5Generator(model_out_path) # new teacher baseline 가져오기


# Tokenize Dataset
id_ds, id_tokenized_ds, ood_ds, ood_tokenized_ds = loader.set_data_for_training_semeval(t5_exp.tokenize_function_inputs)


# 모델의 매개변수를 디바이스로 이동
t5_exp.model = t5_exp.model.to(device)

# 두번째 iteration, teacher model의 checkpoint 명 지정
model_out_path = os.path.join(model_out_path, 'joint_task', f"{model_checkpoint.replace('/', '')}-third_training")
print('Third Trained Model output path: ', model_out_path)


# Training arguments
training_args = {
  'output_dir':model_out_path,
  'evaluation_strategy':"epoch",
  'learning_rate':5e-5,
  'lr_scheduler_type':'cosine',
  'per_device_train_batch_size':8,
  'per_device_eval_batch_size':16,
  'num_train_epochs':4,
  'weight_decay':0.01,
  'warmup_ratio':0.1,
  'save_strategy':'no',
  'load_best_model_at_end':False,
  'push_to_hub':False,
  'eval_accumulation_steps':1,
  'predict_with_generate':True,
  'use_mps_device':use_mps_
}

# student model 학습
t5_trainer = t5_exp.train(id_tokenized_ds, **training_args)

# student inference
t5_student2 = T5Generator(model_out_path)
id_ds, id_tokenized_ds, ood_ds, ood_tokenized_ds = loader.set_data_for_training_semeval(t5_student2.tokenize_function_inputs)


# 모델의 매개변수를 디바이스로 이동
t5_student2.model = t5_student2.model.to(device)

# inference train, test dataset
# Get prediction labels - Training set

id_tr_pred_labels = t5_student2.get_labels(tokenized_dataset = id_tokenized_ds, sample_set = 'train', batch_size = 16)
id_tr_labels = [i.strip() for i in id_ds['train']['labels']]
# Get prediction labels - Testing set
id_te_pred_labels = t5_student2.get_labels(tokenized_dataset = id_tokenized_ds, sample_set = 'test', batch_size = 16)
id_te_labels = [i.strip() for i in id_ds['test']['labels']]

print('Second iteration train, test metrics for student model')
print('when equal terms')
p, r, f1, _ = t5_student2.get_metrics_eq(id_tr_labels, id_tr_pred_labels)
print('Train Precision: ', p)
print('Train Recall: ', r)
print('Train F1: ', f1)
p, r, f1, _ = t5_student2.get_metrics_eq(id_te_labels, id_te_pred_labels)
print('Test Precision: ', p)
print('Test Recall: ', r)
print('Test F1: ', f1)

print(' ')
print('when include terms')
p, r, f1, _ = t5_student2.get_metrics_in(id_tr_labels, id_tr_pred_labels)
print('Train Precision: ', p)
print('Train Recall: ', r)
print('Train F1: ', f1)
p, r, f1, _ = t5_student2.get_metrics_in(id_te_labels, id_te_pred_labels)
print('Test Precision: ', p)
print('Test Recall: ', r)
print('Test F1: ', f1)


In [None]:
# student model2의 inference 수행

# 학습된 student 모델을 이용해 unlabeled dataset에 대한 inference 수행
#inference unlabeled dataset
tokenizer = t5_student2.tokenizer
model = t5_student2.model

# InstructABSA 입력 형태와 동일하게
df_unlabeled_inferenced = pd.DataFrame()
df_unlabeled_inferenced['raw_text'] = df_unlabeled['raw_text']
df_unlabeled_inferenced['text'] = input_text = instruct_handler.joint['bos_instruct3'] + df_unlabeled_inferenced['raw_text'] + '' + instruct_handler.joint['eos_instruct']
output_list = []
for text in df_unlabeled_inferenced['text']:
    tokenized_text = tokenizer(text,return_tensors="pt")
    tokenized_text.to(device)
    output = tokenizer.decode(model.generate(tokenized_text.input_ids)[0].to(device), skip_special_tokens=True)
    output_list.append(output)
df_unlabeled_inferenced['labels'] = output_list
df_unlabeled_inferenced['aspectTerms'] = df_unlabeled_inferenced['labels'].apply(convert_labels_to_aspect_terms)
df_unlabeled_inferenced['Unnamed: 0'] = df_unlabeled_inferenced.index



### iteration 3

In [None]:
############################################### Student Model 3 ##############################################

# student3 trainig을 위해 df_labeled 와 student1에서 inference했던 df_unlabeled_inferenced를 concat하여 training_df 형성
loader.train_df_id = pd.concat([train_data_save, df_unlabeled_inferenced])

# 이때 앞 iteration에서 학습시켰던 student 모델을 가져옴
t5_exp = T5Generator(model_out_path) # new teacher baseline 가져오기


# Tokenize Dataset
id_ds, id_tokenized_ds, ood_ds, ood_tokenized_ds = loader.set_data_for_training_semeval(t5_exp.tokenize_function_inputs)


# 모델의 매개변수를 디바이스로 이동
t5_exp.model = t5_exp.model.to(device)

# 두번째 iteration, teacher model의 checkpoint 명 지정
model_out_path = os.path.join(model_out_path, 'joint_task', f"{model_checkpoint.replace('/', '')}-fourth_training")
print('Fourth Trained Model output path: ', model_out_path)


# Training arguments
training_args = {
  'output_dir':model_out_path,
  'evaluation_strategy':"epoch",
  'learning_rate':5e-5,
  'lr_scheduler_type':'cosine',
  'per_device_train_batch_size':8,
  'per_device_eval_batch_size':16,
  'num_train_epochs':4,
  'weight_decay':0.01,
  'warmup_ratio':0.1,
  'save_strategy':'no',
  'load_best_model_at_end':False,
  'push_to_hub':False,
  'eval_accumulation_steps':1,
  'predict_with_generate':True,
  'use_mps_device':use_mps_
}

# student model 학습
t5_trainer = t5_exp.train(id_tokenized_ds, **training_args)

# student inference
t5_student3 = T5Generator(model_out_path)
id_ds, id_tokenized_ds, ood_ds, ood_tokenized_ds = loader.set_data_for_training_semeval(t5_student3.tokenize_function_inputs)


# 모델의 매개변수를 디바이스로 이동
t5_student3.model = t5_student3.model.to(device)

# inference train, test dataset
# Get prediction labels - Training set

id_tr_pred_labels = t5_student3.get_labels(tokenized_dataset = id_tokenized_ds, sample_set = 'train', batch_size = 16)
id_tr_labels = [i.strip() for i in id_ds['train']['labels']]
# Get prediction labels - Testing set
id_te_pred_labels = t5_student3.get_labels(tokenized_dataset = id_tokenized_ds, sample_set = 'test', batch_size = 16)
id_te_labels = [i.strip() for i in id_ds['test']['labels']]

print('Third iteration train, test metrics for student model')
print('when equal terms')
p, r, f1, _ = t5_student3.get_metrics_eq(id_tr_labels, id_tr_pred_labels)
print('Train Precision: ', p)
print('Train Recall: ', r)
print('Train F1: ', f1)
p, r, f1, _ = t5_student3.get_metrics_eq(id_te_labels, id_te_pred_labels)
print('Test Precision: ', p)
print('Test Recall: ', r)
print('Test F1: ', f1)

print(' ')
print('when include terms')
p, r, f1, _ = t5_student3.get_metrics_in(id_tr_labels, id_tr_pred_labels)
print('Train Precision: ', p)
print('Train Recall: ', r)
print('Train F1: ', f1)
p, r, f1, _ = t5_student3.get_metrics_in(id_te_labels, id_te_pred_labels)
print('Test Precision: ', p)
print('Test Recall: ', r)
print('Test F1: ', f1)


### iteration 4

In [None]:
# student model3의 inference 수행

# 학습된 student 모델을 이용해 unlabeled dataset에 대한 inference 수행
#inference unlabeled dataset
tokenizer = t5_student3.tokenizer
model = t5_student3.model

# InstructABSA 입력 형태와 동일하게
df_unlabeled_inferenced = pd.DataFrame()
df_unlabeled_inferenced['raw_text'] = df_unlabeled['raw_text']
df_unlabeled_inferenced['text'] = input_text = instruct_handler.joint['bos_instruct3'] + df_unlabeled_inferenced['raw_text'] + '' + instruct_handler.joint['eos_instruct']
output_list = []
for text in df_unlabeled_inferenced['text']:
    tokenized_text = tokenizer(text,return_tensors="pt")
    tokenized_text.to(device)
    output = tokenizer.decode(model.generate(tokenized_text.input_ids)[0].to(device), skip_special_tokens=True)
    output_list.append(output)
df_unlabeled_inferenced['labels'] = output_list
df_unlabeled_inferenced['aspectTerms'] = df_unlabeled_inferenced['labels'].apply(convert_labels_to_aspect_terms)
df_unlabeled_inferenced['Unnamed: 0'] = df_unlabeled_inferenced.index



In [None]:
############################################### Student Model 4 ##############################################

# student4 trainig을 위해 df_labeled 와 student1에서 inference했던 df_unlabeled_inferenced를 concat하여 training_df 형성
loader.train_df_id = pd.concat([train_data_save, df_unlabeled_inferenced])

# 이때 앞 iteration에서 학습시켰던 student 모델을 가져옴
t5_exp = T5Generator(model_out_path) # new teacher baseline 가져오기


# Tokenize Dataset
id_ds, id_tokenized_ds, ood_ds, ood_tokenized_ds = loader.set_data_for_training_semeval(t5_exp.tokenize_function_inputs)


# 모델의 매개변수를 디바이스로 이동
t5_exp.model = t5_exp.model.to(device)

# 두번째 iteration, teacher model의 checkpoint 명 지정
model_out_path = os.path.join(model_out_path, 'joint_task', f"{model_checkpoint.replace('/', '')}-fifth_training")
print('Fifth Trained Model output path: ', model_out_path)


# Training arguments
training_args = {
  'output_dir':model_out_path,
  'evaluation_strategy':"epoch",
  'learning_rate':5e-5,
  'lr_scheduler_type':'cosine',
  'per_device_train_batch_size':8,
  'per_device_eval_batch_size':16,
  'num_train_epochs':4,
  'weight_decay':0.01,
  'warmup_ratio':0.1,
  'save_strategy':'no',
  'load_best_model_at_end':False,
  'push_to_hub':False,
  'eval_accumulation_steps':1,
  'predict_with_generate':True,
  'use_mps_device':use_mps_
}

# student model 학습
t5_trainer = t5_exp.train(id_tokenized_ds, **training_args)

# student inference
t5_student4 = T5Generator(model_out_path)
id_ds, id_tokenized_ds, ood_ds, ood_tokenized_ds = loader.set_data_for_training_semeval(t5_student4.tokenize_function_inputs)


# 모델의 매개변수를 디바이스로 이동
t5_student4.model = t5_student4.model.to(device)

# inference train, test dataset
# Get prediction labels - Training set

id_tr_pred_labels = t5_student4.get_labels(tokenized_dataset = id_tokenized_ds, sample_set = 'train', batch_size = 16)
id_tr_labels = [i.strip() for i in id_ds['train']['labels']]
# Get prediction labels - Testing set
id_te_pred_labels = t5_student4.get_labels(tokenized_dataset = id_tokenized_ds, sample_set = 'test', batch_size = 16)
id_te_labels = [i.strip() for i in id_ds['test']['labels']]

print('Fourth iteration train, test metrics for student model')
print('when equal terms')
p, r, f1, _ = t5_student4.get_metrics_eq(id_tr_labels, id_tr_pred_labels)
print('Train Precision: ', p)
print('Train Recall: ', r)
print('Train F1: ', f1)
p, r, f1, _ = t5_student4.get_metrics_eq(id_te_labels, id_te_pred_labels)
print('Test Precision: ', p)
print('Test Recall: ', r)
print('Test F1: ', f1)

print(' ')
print('when include terms')
p, r, f1, _ = t5_student4.get_metrics_in(id_tr_labels, id_tr_pred_labels)
print('Train Precision: ', p)
print('Train Recall: ', r)
print('Train F1: ', f1)
p, r, f1, _ = t5_student4.get_metrics_in(id_te_labels, id_te_pred_labels)
print('Test Precision: ', p)
print('Test Recall: ', r)
print('Test F1: ', f1)
