In [1]:
! pip install -U accelerate
! pip install -U transformers

Collecting accelerate
  Downloading accelerate-0.31.0-py3-none-any.whl (309 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/309.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━[0m [32m256.0/309.4 kB[0m [31m7.5 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m309.4/309.4 kB[0m [31m7.2 MB/s[0m eta [36m0:00:00[0m
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch>=1.10.0->accelerate)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch>=1.10.0->accelerate)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch>=1.10.0->accelerate)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from 

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
! pip install kss
! pip install datasets

Collecting kss
  Downloading kss-6.0.4.tar.gz (1.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting emoji==1.2.0 (from kss)
  Downloading emoji-1.2.0-py3-none-any.whl (131 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m131.3/131.3 kB[0m [31m19.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pecab (from kss)
  Downloading pecab-1.0.8.tar.gz (26.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m26.4/26.4 MB[0m [31m7.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting jamo (from kss)
  Downloading jamo-0.4.1-py3-none-any.whl (9.5 kB)
Collecting hangul-jamo (from kss)
  Downloading hangul_jamo-1.0.1-py3-none-any.whl (4.4 kB)
Collecting tossi (from kss)
  Downloading tossi-0.3.1.tar.gz (11 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting

In [6]:
import math
import torch
from transformers import GPT2LMHeadModel, GPT2Tokenizer, Trainer, TrainingArguments, EarlyStoppingCallback, PreTrainedTokenizerFast
from torch.utils.data import Dataset
from sklearn.model_selection import train_test_split
from datasets import load_dataset
from kss import split_sentences
import itertools
import random

# 토크나이저와 모델 로드
tokenizer = PreTrainedTokenizerFast.from_pretrained("skt/kogpt2-base-v2",
  bos_token='</s>', eos_token='</s>', unk_token='<unk>',
  pad_token='<pad>', mask_token='<mask>')
model = GPT2LMHeadModel.from_pretrained("skt/kogpt2-base-v2")

# 데이터 읽기 함수
def read_tales(file_path):
    with open(file_path, 'r', encoding='utf-8') as f:
        tales = f.read().strip().split('///')

    processed_tales = []
    for tale in tales:
        sentences = split_sentences(tale)
        processed_tale = ' '.join([f"</s>{sentence.strip()}</s>" for sentence in sentences if sentence.strip()])
        processed_tales.append(processed_tale)

    return processed_tales

# 데이터셋 클래스
class TaleDataset(Dataset):
    def __init__(self, tales, tokenizer, max_length=512):
        self.tokenizer = tokenizer
        self.tales = tales
        self.max_length = max_length

    def __len__(self):
        return len(self.tales)

    def __getitem__(self, idx):
        tale = self.tales[idx]
        encoding = self.tokenizer(tale, truncation=True, max_length=self.max_length, padding="max_length", return_tensors="pt")
        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten()
        }

# 데이터 콜레이터
def data_collator(features):
    batch = tokenizer.pad(features, padding=True, return_tensors="pt")
    batch['labels'] = batch['input_ids'].clone()
    return batch

# Perplexity 계산 함수
def calculate_perplexity(model, tokenizer, tales, max_samples=100, batch_size=4, max_length=512):
    model.eval()
    total_loss = 0.0
    total_length = 0
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    # 데이터셋을 무작위로 섞고 최대 샘플 수만큼만 사용
    random.shuffle(tales)
    tales = tales[:max_samples]

    with torch.no_grad():
        for i in range(0, len(tales), batch_size):
            batch = tales[i:i+batch_size]

            encodings = tokenizer(batch, return_tensors='pt', truncation=True, max_length=max_length, padding=True)
            input_ids = encodings.input_ids.to(device)
            attention_mask = encodings.attention_mask.to(device)

            outputs = model(input_ids, attention_mask=attention_mask, labels=input_ids)
            loss = outputs.loss

            total_loss += loss.item() * torch.sum(attention_mask).item()
            total_length += torch.sum(attention_mask).item()

    if total_length == 0:
        print("Warning: No valid inputs found. Cannot calculate perplexity.")
        return float('inf')

    perplexity = math.exp(total_loss / total_length)
    return perplexity


# 동화 데이터 로드
tales = read_tales('/content/drive/MyDrive/Tale/augmented_tale.txt')
train_tales, val_tales = train_test_split(tales, test_size=0.1)

# 데이터셋 생성
train_dataset = TaleDataset(train_tales, tokenizer)
val_dataset = TaleDataset(val_tales, tokenizer)

# 초기 모델의 perplexity 계산
initial_model = GPT2LMHeadModel.from_pretrained("skt/kogpt2-base-v2")
initial_model.config.pad_token_id = tokenizer.pad_token_id
initial_perplexity = calculate_perplexity(initial_model, tokenizer, val_tales)
print(f"Initial model perplexity: {initial_perplexity:.2f}")

# 이후 코드는 그대로 유지

# 하이퍼파라미터 그리드 정의
param_grid = {
    'learning_rate': [1e-4, 3e-4, 5e-4],
    'per_device_train_batch_size': [2, 4],
    'num_train_epochs': [5, 10, 15],
    'weight_decay': [0.01, 0.1]
}

# 모든 하이퍼파라미터 조합 생성
param_combinations = list(itertools.product(*param_grid.values()))

The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'GPT2Tokenizer'. 
The class this function is called from is 'PreTrainedTokenizerFast'.


Initial model perplexity: 10254.01


In [2]:
tales = read_tales('/content/drive/MyDrive/Tale/augmented_tale.txt')
print(tales[0])

</s>늑대가 양떼무리에서 떨어진 어린양을 만났어요.</s> </s>그래서 잡아먹기로 작정했지요.</s> </s>그래서 그렇듯 한 이야기를 하며 잡아먹을 생각을 했어요.</s> </s>늑대가 어린양에게 말했어요.</s> </s>"어이, 네가 작년에 나 욕했지."</s> </s>"진짜 아니에요..." 라며 어린양이 애처로운 목소리로 말했어요.</s> </s>"전 작년에 태어나지도 않은걸요…"
그러자 늑대가 "내 풀밭에서 잔디를 뜯어먹었더군." 라고 말했어요.</s> </s>"아뇨, 늑대님?" 라며 어린 양이 대답했어요.</s> </s>"전 아직 잔디를 맛본 적도 없는 걸요."</s> </s>늑대가 다시 말했어요.</s> </s>"내 샘물을 마셨더구나."</s> </s>"아뇨." 라며 어린양이 소리쳤어요.</s> </s>"전 아직 물이란 걸 마셔본 적이 없는 걸요. 아직 엄마 젖만 먹고 마시고 있어요."</s> </s>그러자 늑대는 다짜고짜 어린양을 움켜잡더니 꿀꺽 잡아먹어버리더니 말했어요.</s> </s>"네가 내 말에 아무리 대꾸를 해도 어차피 난 너를 잡아먹을 생각뿐이였어."</s> </s>악당은 언제나 자신들의 나쁜 행동에 대해 그럴듯한 구실을 찾는 법이랍니다.</s>


In [9]:
best_perplexity = float('inf')
best_params = None

# 각 하이퍼파라미터 조합에 대해 학습 및 평가
for i, params in enumerate(param_combinations):
    print(f"Training model {i+1}/{len(param_combinations)}")

    lr, batch_size, epochs, wd = params

    # Early Stopping 콜백 생성
    early_stopping_callback = EarlyStoppingCallback(
        early_stopping_patience=3,
        early_stopping_threshold=0.01
    )

    # 학습 인자 설정
    training_args = TrainingArguments(
        output_dir=f"/content/drive/MyDrive/Tale/batch_learning/results_{i}",
        num_train_epochs=epochs,
        per_device_train_batch_size=batch_size,
        per_device_eval_batch_size=batch_size,
        warmup_steps=500,
        weight_decay=wd,
        learning_rate=lr,
        logging_dir=f'/content/drive/MyDrive/Tale/batch_learning/logs_{i}',
        logging_steps=10,
        eval_strategy="epoch",
        save_strategy="epoch",
        load_best_model_at_end=True,
        metric_for_best_model="eval_loss",
    )

    # 모델 초기화
    model = GPT2LMHeadModel.from_pretrained("skt/kogpt2-base-v2")
    model.config.pad_token_id = tokenizer.pad_token_id

    # Trainer 생성
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=val_dataset,
        data_collator=data_collator,
        callbacks=[early_stopping_callback]
    )

    # 학습 실행
    trainer.train()

    # 모델 저장
    trainer.save_model(f"/content/drive/MyDrive/Tale/batch_learning/results_{i}/final_model")

    # 최종 모델 Perplexity 측정
    # 최종 모델 Perplexity 측정
    final_model = GPT2LMHeadModel.from_pretrained(f"/content/drive/MyDrive/Tale/batch_learning/results_{i}/final_model")
    final_model.config.pad_token_id = tokenizer.pad_token_id
    final_perplexity = calculate_perplexity(final_model, tokenizer, val_tales)
    print(f"Final model perplexity: {final_perplexity:.2f}")

    # 최적의 모델 업데이트
    if final_perplexity < best_perplexity:
        best_perplexity = final_perplexity
        best_params = params

    # Perplexity 개선율 계산
    improvement = (initial_perplexity - final_perplexity) / initial_perplexity * 100
    print(f"Perplexity improvement: {improvement:.2f}%")
    print(f"Parameters: LR={lr}, Batch Size={batch_size}, Epochs={epochs}, Weight Decay={wd}")
    print("--------------------")

print(f"Best parameters: LR={best_params[0]}, Batch Size={best_params[1]}, Epochs={best_params[2]}, Weight Decay={best_params[3]}")
print(f"Best perplexity: {best_perplexity:.2f}")

Training model 1/36


Epoch,Training Loss,Validation Loss
1,2.9025,2.608541
2,2.6375,2.624719
3,2.0959,2.474216
4,1.2296,2.413831
5,0.822,2.398293


There were missing keys in the checkpoint model loaded: ['lm_head.weight'].


Final model perplexity: 11.99
Perplexity improvement: 99.88%
Parameters: LR=0.0001, Batch Size=2, Epochs=5, Weight Decay=0.01
--------------------
Training model 2/36


Epoch,Training Loss,Validation Loss
1,2.9099,2.613168
2,2.6374,2.618233
3,2.114,2.463845
4,1.2384,2.413981
5,0.8259,2.399474


There were missing keys in the checkpoint model loaded: ['lm_head.weight'].


Final model perplexity: 9.81
Perplexity improvement: 99.90%
Parameters: LR=0.0001, Batch Size=2, Epochs=5, Weight Decay=0.1
--------------------
Training model 3/36


Epoch,Training Loss,Validation Loss
1,2.9025,2.608541
2,2.6617,2.63171
3,2.2398,2.532383
4,1.4536,2.53948
5,1.0422,2.535375
6,0.6208,2.548559


There were missing keys in the checkpoint model loaded: ['lm_head.weight'].


Final model perplexity: 11.01
Perplexity improvement: 99.89%
Parameters: LR=0.0001, Batch Size=2, Epochs=10, Weight Decay=0.01
--------------------
Training model 4/36


Epoch,Training Loss,Validation Loss
1,2.9099,2.613168
2,2.6674,2.64094
3,2.2688,2.519523
4,1.4595,2.532141
5,1.0497,2.537777
6,0.6078,2.544657


There were missing keys in the checkpoint model loaded: ['lm_head.weight'].


Final model perplexity: 11.51
Perplexity improvement: 99.89%
Parameters: LR=0.0001, Batch Size=2, Epochs=10, Weight Decay=0.1
--------------------
Training model 5/36


Epoch,Training Loss,Validation Loss
1,2.9025,2.608541
2,2.6686,2.645233
3,2.3013,2.554749
4,1.4902,2.571052
5,1.1248,2.587626
6,0.6837,2.602197


There were missing keys in the checkpoint model loaded: ['lm_head.weight'].


Final model perplexity: 14.26
Perplexity improvement: 99.86%
Parameters: LR=0.0001, Batch Size=2, Epochs=15, Weight Decay=0.01
--------------------
Training model 6/36


Epoch,Training Loss,Validation Loss
1,2.9099,2.613168
2,2.6651,2.634642
3,2.3011,2.542985
4,1.5057,2.572434
5,1.116,2.581523
6,0.6843,2.600734


There were missing keys in the checkpoint model loaded: ['lm_head.weight'].


Final model perplexity: 10.69
Perplexity improvement: 99.90%
Parameters: LR=0.0001, Batch Size=2, Epochs=15, Weight Decay=0.1
--------------------
Training model 7/36


Epoch,Training Loss,Validation Loss
1,2.8826,2.511894
2,2.5057,2.487265
3,2.158,2.491705
4,1.715,2.39991
5,1.0147,2.342636


There were missing keys in the checkpoint model loaded: ['lm_head.weight'].


Final model perplexity: 8.63
Perplexity improvement: 99.92%
Parameters: LR=0.0001, Batch Size=4, Epochs=5, Weight Decay=0.01
--------------------
Training model 8/36


Epoch,Training Loss,Validation Loss
1,2.8826,2.511894
2,2.5024,2.485405
3,2.1552,2.483867
4,1.7061,2.405941
5,1.0205,2.348833


There were missing keys in the checkpoint model loaded: ['lm_head.weight'].


Final model perplexity: 9.38
Perplexity improvement: 99.91%
Parameters: LR=0.0001, Batch Size=4, Epochs=5, Weight Decay=0.1
--------------------
Training model 9/36


Epoch,Training Loss,Validation Loss
1,2.8826,2.511893
2,2.5057,2.487265
3,2.158,2.491705
4,1.8164,2.477806
5,1.2424,2.438861
6,0.897,2.43524
7,0.6282,2.441561
8,0.4373,2.460999


There were missing keys in the checkpoint model loaded: ['lm_head.weight'].


Final model perplexity: 10.81
Perplexity improvement: 99.89%
Parameters: LR=0.0001, Batch Size=4, Epochs=10, Weight Decay=0.01
--------------------
Training model 10/36


Epoch,Training Loss,Validation Loss
1,2.8826,2.511894
2,2.5024,2.485405
3,2.1552,2.483866
4,1.8144,2.482721
5,1.2502,2.435258
6,0.9019,2.432557
7,0.6271,2.440461
8,0.4347,2.468357


There were missing keys in the checkpoint model loaded: ['lm_head.weight'].


Final model perplexity: 12.22
Perplexity improvement: 99.88%
Parameters: LR=0.0001, Batch Size=4, Epochs=10, Weight Decay=0.1
--------------------
Training model 11/36


Epoch,Training Loss,Validation Loss
1,2.8826,2.511893
2,2.5057,2.487266
3,2.158,2.491705
4,1.8348,2.492305
5,1.286,2.46384
6,0.9579,2.472593
7,0.6992,2.500646
8,0.509,2.534151


There were missing keys in the checkpoint model loaded: ['lm_head.weight'].


Final model perplexity: 11.32
Perplexity improvement: 99.89%
Parameters: LR=0.0001, Batch Size=4, Epochs=15, Weight Decay=0.01
--------------------
Training model 12/36


Epoch,Training Loss,Validation Loss
1,2.8826,2.511895
2,2.5024,2.485405
3,2.1552,2.483867
4,1.8269,2.488948
5,1.2826,2.46592
6,0.9673,2.473476
7,0.7044,2.4918
8,0.5074,2.536112


There were missing keys in the checkpoint model loaded: ['lm_head.weight'].


Final model perplexity: 12.33
Perplexity improvement: 99.88%
Parameters: LR=0.0001, Batch Size=4, Epochs=15, Weight Decay=0.1
--------------------
Training model 13/36


Epoch,Training Loss,Validation Loss
1,3.3266,3.05851
2,3.1582,3.151806
3,2.4989,2.874151
4,1.2987,2.872741
5,0.613,2.856921


There were missing keys in the checkpoint model loaded: ['lm_head.weight'].


Final model perplexity: 18.32
Perplexity improvement: 99.82%
Parameters: LR=0.0003, Batch Size=2, Epochs=5, Weight Decay=0.01
--------------------
Training model 14/36


Epoch,Training Loss,Validation Loss
1,3.347,3.0452
2,3.1793,3.19816
3,2.5153,2.888623
4,1.3419,2.854803
5,0.6306,2.843452


There were missing keys in the checkpoint model loaded: ['lm_head.weight'].


Final model perplexity: 16.46
Perplexity improvement: 99.84%
Parameters: LR=0.0003, Batch Size=2, Epochs=5, Weight Decay=0.1
--------------------
Training model 15/36


Epoch,Training Loss,Validation Loss
1,3.3266,3.05851
2,3.214,3.177084
3,2.7415,2.993483
4,1.7146,3.041746
5,1.1165,3.092813
6,0.604,3.159575


There were missing keys in the checkpoint model loaded: ['lm_head.weight'].


Final model perplexity: 19.73
Perplexity improvement: 99.81%
Parameters: LR=0.0003, Batch Size=2, Epochs=10, Weight Decay=0.01
--------------------
Training model 16/36


Epoch,Training Loss,Validation Loss
1,3.347,3.045199
2,3.2253,3.200382
3,2.7613,2.996675
4,1.7278,3.080617
5,1.159,3.098423
6,0.6061,3.143132


There were missing keys in the checkpoint model loaded: ['lm_head.weight'].


Final model perplexity: 16.58
Perplexity improvement: 99.84%
Parameters: LR=0.0003, Batch Size=2, Epochs=10, Weight Decay=0.1
--------------------
Training model 17/36


Epoch,Training Loss,Validation Loss
1,3.3266,3.05851
2,3.2215,3.179414
3,2.7898,3.050571
4,1.837,3.142757


There were missing keys in the checkpoint model loaded: ['lm_head.weight'].


Final model perplexity: 19.40
Perplexity improvement: 99.81%
Parameters: LR=0.0003, Batch Size=2, Epochs=15, Weight Decay=0.01
--------------------
Training model 18/36


Epoch,Training Loss,Validation Loss
1,3.347,3.0452
2,3.2178,3.193289
3,2.8395,3.026597
4,1.8455,3.091786
5,1.2978,3.182826
6,0.7569,3.246099


There were missing keys in the checkpoint model loaded: ['lm_head.weight'].


Final model perplexity: 17.72
Perplexity improvement: 99.83%
Parameters: LR=0.0003, Batch Size=2, Epochs=15, Weight Decay=0.1
--------------------
Training model 19/36


Epoch,Training Loss,Validation Loss
1,2.9678,2.632941
2,2.7234,2.745762
3,2.5606,2.842464
4,1.8173,2.746293


There were missing keys in the checkpoint model loaded: ['lm_head.weight'].


Final model perplexity: 9.91
Perplexity improvement: 99.90%
Parameters: LR=0.0003, Batch Size=4, Epochs=5, Weight Decay=0.01
--------------------
Training model 20/36


Epoch,Training Loss,Validation Loss
1,2.9721,2.628714
2,2.7322,2.754144
3,2.4364,2.860603
4,1.8247,2.75185


There were missing keys in the checkpoint model loaded: ['lm_head.weight'].


Final model perplexity: 9.11
Perplexity improvement: 99.91%
Parameters: LR=0.0003, Batch Size=4, Epochs=5, Weight Decay=0.1
--------------------
Training model 21/36


Epoch,Training Loss,Validation Loss
1,2.9678,2.632941
2,2.7234,2.745761
3,2.5606,2.842464
4,2.0517,2.91785


There were missing keys in the checkpoint model loaded: ['lm_head.weight'].


Final model perplexity: 10.44
Perplexity improvement: 99.90%
Parameters: LR=0.0003, Batch Size=4, Epochs=10, Weight Decay=0.01
--------------------
Training model 22/36


Epoch,Training Loss,Validation Loss
1,2.9721,2.628714
2,2.7322,2.754144
3,2.4364,2.860603
4,2.0534,2.913162


There were missing keys in the checkpoint model loaded: ['lm_head.weight'].


Final model perplexity: 10.41
Perplexity improvement: 99.90%
Parameters: LR=0.0003, Batch Size=4, Epochs=10, Weight Decay=0.1
--------------------
Training model 23/36


Epoch,Training Loss,Validation Loss
1,2.9678,2.632941
2,2.7234,2.745762
3,2.5606,2.842464
4,2.0946,2.929968


There were missing keys in the checkpoint model loaded: ['lm_head.weight'].


Final model perplexity: 9.28
Perplexity improvement: 99.91%
Parameters: LR=0.0003, Batch Size=4, Epochs=15, Weight Decay=0.01
--------------------
Training model 24/36


Epoch,Training Loss,Validation Loss
1,2.9721,2.628714
2,2.7322,2.754144
3,2.4364,2.860603
4,2.0918,2.932138


There were missing keys in the checkpoint model loaded: ['lm_head.weight'].


Final model perplexity: 9.16
Perplexity improvement: 99.91%
Parameters: LR=0.0003, Batch Size=4, Epochs=15, Weight Decay=0.1
--------------------
Training model 25/36


Epoch,Training Loss,Validation Loss
1,3.6392,3.416375
2,3.4805,3.453023
3,2.863,3.157317
4,1.5744,3.163413
5,0.7314,3.270103


There were missing keys in the checkpoint model loaded: ['lm_head.weight'].


Final model perplexity: 21.45
Perplexity improvement: 99.79%
Parameters: LR=0.0005, Batch Size=2, Epochs=5, Weight Decay=0.01
--------------------
Training model 26/36


Epoch,Training Loss,Validation Loss
1,3.6588,3.401828
2,3.4953,3.478847
3,2.9439,3.166375
4,1.5967,3.16872
5,0.7364,3.249968


There were missing keys in the checkpoint model loaded: ['lm_head.weight'].


Final model perplexity: 18.13
Perplexity improvement: 99.82%
Parameters: LR=0.0005, Batch Size=2, Epochs=5, Weight Decay=0.1
--------------------
Training model 27/36


Epoch,Training Loss,Validation Loss
1,3.6392,3.416375
2,3.5384,3.497453
3,3.1456,3.29766
4,2.0596,3.348943
5,1.458,3.453684
6,0.8054,3.647032


There were missing keys in the checkpoint model loaded: ['lm_head.weight'].


Final model perplexity: 27.56
Perplexity improvement: 99.73%
Parameters: LR=0.0005, Batch Size=2, Epochs=10, Weight Decay=0.01
--------------------
Training model 28/36


Epoch,Training Loss,Validation Loss
1,3.6588,3.401828
2,3.5096,3.491379
3,3.1871,3.319633
4,2.1027,3.322719
5,1.4784,3.453867
6,0.8437,3.542726


There were missing keys in the checkpoint model loaded: ['lm_head.weight'].


Final model perplexity: 21.49
Perplexity improvement: 99.79%
Parameters: LR=0.0005, Batch Size=2, Epochs=10, Weight Decay=0.1
--------------------
Training model 29/36


Epoch,Training Loss,Validation Loss
1,3.6392,3.416374
2,3.5682,3.504704
3,3.2133,3.32541
4,2.1712,3.378508
5,1.6302,3.468279
6,1.0479,3.668379


There were missing keys in the checkpoint model loaded: ['lm_head.weight'].


Final model perplexity: 23.31
Perplexity improvement: 99.77%
Parameters: LR=0.0005, Batch Size=2, Epochs=15, Weight Decay=0.01
--------------------
Training model 30/36


Epoch,Training Loss,Validation Loss
1,3.6588,3.401828
2,3.5277,3.507401
3,3.2467,3.339647
4,2.2428,3.389444
5,1.6685,3.493283
6,1.1002,3.635748


There were missing keys in the checkpoint model loaded: ['lm_head.weight'].


Final model perplexity: 20.50
Perplexity improvement: 99.80%
Parameters: LR=0.0005, Batch Size=2, Epochs=15, Weight Decay=0.1
--------------------
Training model 31/36


Epoch,Training Loss,Validation Loss
1,3.102,2.767056
2,2.9576,3.024859
3,2.9566,3.182696
4,2.0306,3.019489


There were missing keys in the checkpoint model loaded: ['lm_head.weight'].


Final model perplexity: 12.10
Perplexity improvement: 99.88%
Parameters: LR=0.0005, Batch Size=4, Epochs=5, Weight Decay=0.01
--------------------
Training model 32/36


Epoch,Training Loss,Validation Loss
1,3.1139,2.75336
2,2.9973,3.005903
3,2.829,3.191653
4,2.0705,3.003306


There were missing keys in the checkpoint model loaded: ['lm_head.weight'].


Final model perplexity: 11.81
Perplexity improvement: 99.88%
Parameters: LR=0.0005, Batch Size=4, Epochs=5, Weight Decay=0.1
--------------------
Training model 33/36


Epoch,Training Loss,Validation Loss
1,3.102,2.767056
2,2.9576,3.02486
3,2.9566,3.182696
4,2.3001,3.164


There were missing keys in the checkpoint model loaded: ['lm_head.weight'].


Final model perplexity: 11.51
Perplexity improvement: 99.89%
Parameters: LR=0.0005, Batch Size=4, Epochs=10, Weight Decay=0.01
--------------------
Training model 34/36


Epoch,Training Loss,Validation Loss
1,3.1139,2.75336
2,2.9973,3.005903
3,2.829,3.191653
4,2.3081,3.13803


There were missing keys in the checkpoint model loaded: ['lm_head.weight'].


Final model perplexity: 10.21
Perplexity improvement: 99.90%
Parameters: LR=0.0005, Batch Size=4, Epochs=10, Weight Decay=0.1
--------------------
Training model 35/36


Epoch,Training Loss,Validation Loss
1,3.102,2.767056
2,2.9576,3.024859
3,2.9566,3.182695
4,2.3389,3.214686


There were missing keys in the checkpoint model loaded: ['lm_head.weight'].


Final model perplexity: 12.23
Perplexity improvement: 99.88%
Parameters: LR=0.0005, Batch Size=4, Epochs=15, Weight Decay=0.01
--------------------
Training model 36/36


Epoch,Training Loss,Validation Loss
1,3.1139,2.75336
2,2.9973,3.005903
3,2.829,3.191653
4,2.3358,3.189442


There were missing keys in the checkpoint model loaded: ['lm_head.weight'].


Final model perplexity: 12.09
Perplexity improvement: 99.88%
Parameters: LR=0.0005, Batch Size=4, Epochs=15, Weight Decay=0.1
--------------------
Best parameters: LR=0.0001, Batch Size=4, Epochs=5, Weight Decay=0.01
Best perplexity: 8.63


In [15]:
from transformers import GPT2LMHeadModel, PreTrainedTokenizerFast

def generate_text(model_path, prompt, max_new_tokens=50):
    model = GPT2LMHeadModel.from_pretrained(model_path)
    model.config.pad_token_id = tokenizer.pad_token_id

    input_ids = tokenizer.encode(prompt, return_tensors='pt', add_special_tokens=False)

    output = model.generate(
        input_ids,
        max_new_tokens=512,
        do_sample=True,
        temperature=0.7,
        top_k=50,
        top_p=0.95,
        no_repeat_ngram_size=2,
        pad_token_id=tokenizer.pad_token_id,
        eos_token_id=None,
    )

    generated_text = tokenizer.decode(output[0][len(input_ids[0]):], skip_special_tokens=True)

    return generated_text.strip()

prompt = "옛날 옛적에 백경이가 살았어요. 백경이는 토끼 토순이를 만났어요. 백경이는 토순이를 바다 마을로 데려갔어요."


model_path = f"/content/drive/MyDrive/Tale/batch_learning/results_8/final_model"
generated_text = generate_text(model_path, prompt)
print(generated_text)
print("-" * 50)

토끼는 토끼와 함께 토끼를 먹이고, 자신의 식탁에 앉는 식탁보에 앉아 책을 읽게 했어요 뭔가 하면 하면 하는 것을 토순으로 보고 질투심이 생긴 토끼가 질투심을 참을 수 있었어요, 질투심에 눈이 멀어 다른 토끼들을 잡아먹기 시작했어요는 것이 백경의 이야기였어요가 있었다. 
토끼는 수사슴이 되어 토끼에게 말했지요. "이 나쁜 짓을 하면 토끼도 잡아먹어 버릴 거야."
토끼도 수사에게 덤빌 수 없었어요,
그리고 수사만은 토끼의 말을 믿고 따라 했지요, 그리고 토끼 또한 백경을 따라 해서 결국 토끼에게도 말해주었지요.. "토끼와 토끼, 내가 서로 그렇게 싸우고 싸워도 너희들은 잘 싸지 않아." 
하지만 토끼 본인은 어이없다는 듯이 말했다. 中國人 펭귄이 말하길,
"토끼야, 너희가 서로 싸우는 모습을 지켜봐, 우리 모두가 서로 화내고 다툰다면 나쁜 일이 있을 거야, 누가 너희의 말을 믿어주지 않니?" together 多 서로 다 잘 지내자.多 잘 지낼 때 좋은 일도 나쁜 일도 있는 법이야. 낄 때 서로 돕고 지혜롭게 살아가야 해. 때로는 작은 문제에서도 큰 문제를 해결할 수 있어. 착한 마음씨를 가진 사람들이 모여 함께 풀면 큰 소원이 이루어진다고. 그리고 서로 함께 평화롭게 살아갈 수 있는 세상이 만들어질 거이야.""라고 말했어요.
토끼 토순이 말에 부모님은 백순이의 말을 듣고도 화장을 하고 토끼처럼 덤볐어어서, 토끼에게로 갔어오더니 토끼에 이어 식탁을 먹이게 되었어졌어요
토끼가 식탁에서 졸음을 피우자, 백경이 혼자 말하였다. 라면을 보고 토끼 코에 의해 요리해 먹었다. 뛰쳐나갔어졌다. 高  토끼였다. 코가 그 토끼 손이 뽀근데 그 바람에 떡 버티고 있었다."하고 입 안을 살짝 열어 토끼 입 속으로 깼다. 달걀이 쩝쩝 먹어 치웠다."하고는 내뱉었다. 토끼 콧구멍에 집어넣었다."
그러자 토끼도,  닫았다. 토끼 등이 쫄깃한 토끼 가시에 코로 문다. 토끼 눈물이 튀었다. 그리고서였다. 
그러자, 토끼 울음소리가 뱉어냈다. 톡하고 토끼 말이 코는 껑충 뀌었다. 
"그거라. 