In [2]:
import gc
import torch

def cleanup_globals(vars_to_keep: list):
    global_vars = list(globals().keys())
    protected_vars = ['In', 'Out', 'get_ipython', 'exit', 'quit', 'gc', 'torch', 'cleanup_globals']

    for var in global_vars:
        if var not in vars_to_keep and not var.startswith('_') and var not in protected_vars:
            try:
                del globals()[var]
                print(f"{var} 삭제됨")
            except:
                continue
    torch.cuda.empty_cache()
    gc.collect()

In [3]:
# 데이터 셋 불러오기
import boto3
import pandas as pd
import io
from datasets import Dataset

# AWS S3 에 업로드 했던 데이터셋 불러오기
# 엑세스키가 필요함
try:
    credentials_df = pd.read_csv('./ganghyun-dev_accessKeys.csv')

    if not credentials_df.empty:
        aws_access_key_id = credentials_df['Access key ID'].iloc[0].strip()
        aws_secret_access_key = credentials_df['Secret access key'].iloc[0].strip()
    else:
        print("Error: 'aws_credentials.csv' is empty.")
        exit()

except FileNotFoundError:
    print("Error: 'aws_credentials.csv' not found in Drive.")
    print("Please create a file named 'aws_credentials.csv' in your Google Drive with your AWS credentials.")
    exit()
except KeyError:
    print("Error: 'Access key ID' or 'Secret access key' column not found in 'aws_credentials.csv'.")
    print("Please ensure your CSV file has these columns.")
    exit()
except Exception as e:
    print(f"Error loading AWS credentials from CSV: {e}")
    exit()

bucket_name = "dr.hong-s3"

file_key = "dataset/template_generation_dataset_with_temp_policy.xlsx"

s3_client = boto3.client('s3',
                         aws_access_key_id=aws_access_key_id,
                         aws_secret_access_key=aws_secret_access_key)

try:
    file_content = s3_client.get_object(Bucket=bucket_name, Key=file_key)['Body'].read()
    print("파일을 성공적으로 메모리로 불러왔습니다.")

except Exception as e:
    print(f"S3에서 파일을 불러오는 중 오류가 발생했습니다: {e}")
    exit()

# 엑셀 파일을 pandas datafrome 으로 변환
print("파일을 pandas dataframe 로 변환")
df = pd.read_excel(io.BytesIO(file_content))

print("원본 데이터 상위 5개")
print(df.head)
print()

print("Hugging Face Dataset 으로 변환")
gen_dataset = Dataset.from_pandas(df)

gen_train_test_dataset = gen_dataset.train_test_split(test_size=0.2, shuffle=True)
gen_train_dataset = gen_train_test_dataset["train"]
gen_test_dataset = gen_train_test_dataset["test"]

print("최종 분할된 데이터 셋")
print(gen_train_dataset)
print(gen_test_dataset)

cleanup_globals(["gen_train_dataset", "gen_test_dataset"])

파일을 성공적으로 메모리로 불러왔습니다.
파일을 pandas dataframe 로 변환
원본 데이터 상위 5개
<bound method NDFrame.head of                                                template  \
0     {"title": "회사소개서 발송", "text": "안녕하세요 #{수신자명}님,...   
1     {"title": "서비스 소개서 발송", "text": "안녕하세요 #{수신자명}...   
2     {"title": "(전용) 강의 일정 안내 / 화케터", "text": "안녕하세...   
3     {"title": "(공용) 후기 작성 요청_이미지형_01", "text": "[템...   
4     {"title": "(공용) 인보이스 알림_이미지형_01", "text": "■ #...   
...                                                 ...   
1167  {"title": "재입고 알림", "text": "안녕하세요 #{수신자명}님,\n...   
1168  {"title": "사전 구매 예약", "text": "안녕하세요 #{수신자명}님,...   
1169  {"title": "정기구독 결제 안내", "text": "안녕하세요 #{수신자명}...   
1170  {"title": "자동 결제 정보 등록 완료", "text": "안녕하세요 #{수...   
1171  {"title": "자동 결제 안내", "text": "안녕하세요 #{수신자명}님,...   

                                user_input  \
0             안녕하세요, 회사소개서 발송 템플릿 제작 부탁드려요   
1                           서비스 소개서 발송 템플릿   
2        (전용) 강의 일정 안내 / 화케터용 알림톡을 만들고 싶어요   
3             (

In [4]:
import os
import gc
import torch
from huggingface_hub import snapshot_download
from huggingface_hub.utils import RepositoryNotFoundError
from transformers import AutoModelForCausalLM, AutoTokenizer

def download_model_snapshot(model_id: str, local_dir: str) -> str:
    print(f"'{model_id}' 모델을 '{local_dir}' 경로에 다운로드합니다...")
    try:
        # snapshot_download는 알아서 기존 파일을 체크하고 필요한 것만 다운로드합니다.
        model_path = snapshot_download(
            repo_id=model_id,
            local_dir=local_dir,
            local_dir_use_symlinks=False,
            # resume_download=True, # 기본값이 True이므로 명시하지 않아도 됨
        )
        print("✅ 모델 준비 완료!")
        return model_path
    except RepositoryNotFoundError:
        print(f"❌ 오류: 모델 ID '{model_id}'를 찾을 수 없습니다.")
        return None
    except Exception as e:
        print(f"❌ 다운로드 중 오류가 발생했습니다: {e}")
        return None

def load_llama3_model_and_tokenizer_from_local(model_path: str) -> tuple:

    print("토크나이저를 로드합니다...")
    tokenizer = AutoTokenizer.from_pretrained(model_path)

    print("모델을 메모리로 로드합니다...")
    model = AutoModelForCausalLM.from_pretrained(
        model_path,
        device_map="auto",
        attn_implementation="flash_attention_2"
    )
    return model, tokenizer
    
# 실행
checkpoint = "MLP-KTLim/llama-3-Korean-Bllossom-8B"
model_path = download_model_snapshot(checkpoint, "./downloaded_model/" + checkpoint.replace("/", "--"))
model, tokenizer = load_llama3_model_and_tokenizer_from_local(model_path)

cleanup_globals(["model", "tokenizer", "gen_train_dataset", "gen_test_dataset"])

For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/download#download-files-to-local-folder.


'MLP-KTLim/llama-3-Korean-Bllossom-8B' 모델을 './downloaded_model/MLP-KTLim/llama-3-Korean-Bllossom-8B' 경로에 다운로드합니다...


Fetching 12 files: 100%|██████████| 12/12 [03:29<00:00, 17.43s/it]


✅ 모델 준비 완료!
토크나이저를 로드합니다...
모델을 메모리로 로드합니다...


ImportError: FlashAttention2 has been toggled on, but it cannot be used due to the following error: the package flash_attn seems to be not installed. Please refer to the documentation of https://huggingface.co/docs/transformers/perf_infer_gpu_one#flashattention-2 to install Flash Attention 2.

In [20]:
# 데이터 셋 전처리
from transformers import AutoTokenizer, DataCollatorForSeq2Seq
from datasets import Dataset
import torch

tokenizer = AutoTokenizer.from_pretrained("MLP-KTLim/llama-3-Korean-Bllossom-8B")

# 토큰화 함수 정의
def tokenize_function(elements):
    texts = [
        f"{tokenizer.bos_token}user_input: {user_input}\npolicy: {policy}\ntemplate: {template}{tokenizer.eos_token}"
        for user_input, policy, template in zip(elements['user_input'], elements['policy'], elements['template'])
    ]

    tokenized = tokenizer(
        texts, 
        truncation=True, 
        max_length=8192,
        padding=False,  # DataCollator에서 처리
    )
    
    tokenized['labels'] = tokenized['input_ids'].copy() # AutoRegressive
    return tokenized

# 데이터셋에 토큰화 함수 적용
print("\nApplying tokenization function to the dataset...")
tokenized_gen_train_datasets = gen_train_dataset.map(
    tokenize_function, 
    batched=True,
    remove_columns=gen_train_dataset.column_names
)
tokenized_gen_eval_datasets = gen_test_dataset.map(
    tokenize_function, 
    batched=True,
    remove_columns=gen_test_dataset.column_names
)

print("Tokenized train dataset features:", tokenized_gen_train_datasets.features)
print("\nTokenized test dataset features:", tokenized_gen_eval_datasets.features)
print(f"\nTrain dataset size: {len(tokenized_gen_train_datasets)}")
print(f"Test dataset size: {len(tokenized_gen_eval_datasets)}")

cleanup_globals(["tokenizer", "tokenize_function", "tokenized_gen_train_datasets", "tokenized_gen_eval_datasets"])


Applying tokenization function to the dataset...


Map: 100%|██████████| 937/937 [00:00<00:00, 2070.45 examples/s]
Map: 100%|██████████| 235/235 [00:00<00:00, 1946.03 examples/s]

Tokenized train dataset features: {'input_ids': List(Value('int32')), 'attention_mask': List(Value('int8')), 'labels': List(Value('int64'))}

Tokenized test dataset features: {'input_ids': List(Value('int32')), 'attention_mask': List(Value('int8')), 'labels': List(Value('int64'))}

Train dataset size: 937
Test dataset size: 235
gen_train_dataset 삭제됨
gen_test_dataset 삭제됨
AutoTokenizer 삭제됨
DataCollatorForSeq2Seq 삭제됨
Dataset 삭제됨





In [None]:
import torch
import os
from transformers import (
    Trainer, 
    TrainingArguments,
    DataCollatorForSeq2Seq
)
from peft import LoraConfig, get_peft_model

# 데이터셋을 torch 형식으로 변환하는 함수
def convert_to_features(batch):
    return {
        'input_ids': torch.tensor(batch['input_ids'], dtype=torch.long),
        'attention_mask': torch.tensor(batch['attention_mask'], dtype=torch.long),
        'labels': torch.tensor(batch['labels'], dtype=torch.long)
    }

# set_transform 사용하여 변환 설정
tokenized_gen_train_datasets.set_transform(convert_to_features)
tokenized_gen_eval_datasets.set_transform(convert_to_features)

# DataCollator 정의
data_collator = DataCollatorForSeq2Seq(
    tokenizer=tokenizer,
    model=model,
    padding=True,
    return_tensors="pt"
)

# 학습 인자 정의
training_args = TrainingArguments(
    output_dir="./results",
    num_train_epochs=3,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    gradient_accumulation_steps=8,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir="./logs",
    logging_steps=10,
    learning_rate=1e-5,
    fp16=True,
    save_strategy="steps",
    save_steps=500,
    eval_strategy="steps",
    eval_steps=500,
    load_best_model_at_end=True,
    save_total_limit=3,
    metric_for_best_model="eval_loss",
    remove_unused_columns=False,
    gradient_checkpointing=True
)

# Trainer 설정
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_gen_train_datasets,
    eval_dataset=tokenized_gen_eval_datasets,
    data_collator=data_collator,
    tokenizer=tokenizer
)

# 학습 시작
print("\n모델 학습 시작...")
try:
    trainer.train()
    print("모델 학습 완료!")
except Exception as e:
    print(f"학습 중 오류 발생: {str(e)}")
    print("\n데이터셋 상세 정보:")
    print(f"Train dataset size: {len(tokenized_gen_train_datasets)}")
    print(f"Sample from train dataset:")
    print(tokenized_gen_train_datasets[0])
    raise e

# 최종 모델 저장
model.save_pretrained("./final_model")

기존 로드된 모델을 PEFT 모델로 변환 중...
trainable params: 3,407,872 || all params: 8,033,669,120 || trainable%: 0.0424

Re-tokenizing datasets...


  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]


모델 학습 시작...


Step,Training Loss,Validation Loss


In [None]:
# 밑에는 wandb 적용 버전 코드

In [13]:
import torch
import os
import wandb
from transformers import (
    Trainer, 
    TrainingArguments,
    DataCollatorForSeq2Seq,
    AutoTokenizer
)
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training

tokenizer = AutoTokenizer.from_pretrained(checkpoint)

# wandb 초기화
wandb.init(
    project="dr-hong",
    name="llama-3-Korean-Bllossom-8B",
    config={
        "learning_rate": 1e-5,
        "epochs": 3,
        "batch_size": 4,
        "model_name": "MLP-KTLim/llama-3-Korean-Bllossom-8B",
        "lora_r": 8,
        "lora_alpha": 32,
    }
)

# 모델을 kbit 학습을 위해 준비
model = prepare_model_for_kbit_training(model)

# LoRA 설정
print("LoRA 어댑터 초기화 중...")
lora_config = LoraConfig(
    r=8,
    lora_alpha=32,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],  # 타겟 모듈 추가
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    inference_mode=False  # 학습 모드 활성화
)

# 모델을 PEFT 모델로 변환
model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

# gradient 계산 활성화 확인
for name, param in model.named_parameters():
    if 'lora' in name:
        param.requires_grad = True

# 데이터셋을 torch 형식으로 변환하는 함수
def convert_to_features(batch):
    return {
        'input_ids': torch.tensor(batch['input_ids'], dtype=torch.long),
        'attention_mask': torch.tensor(batch['attention_mask'], dtype=torch.long),
        'labels': torch.tensor(batch['labels'], dtype=torch.long)
    }

# set_transform 사용하여 변환 설정
tokenized_gen_train_datasets.set_transform(convert_to_features)
tokenized_gen_eval_datasets.set_transform(convert_to_features)

# DataCollator 정의
data_collator = DataCollatorForSeq2Seq(
    tokenizer=tokenizer,
    model=model,
    padding=True,
    return_tensors="pt"
)

# 학습 인자 정의
training_args = TrainingArguments(
    output_dir="./results",
    num_train_epochs=3,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    gradient_accumulation_steps=8,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir="./logs",
    logging_steps=10,
    learning_rate=1e-5,
    fp16=True,
    save_strategy="steps",
    save_steps=500,
    eval_strategy="steps",
    eval_steps=100,
    load_best_model_at_end=True,
    save_total_limit=3,
    metric_for_best_model="eval_loss",
    remove_unused_columns=False,
    gradient_checkpointing=True,
    report_to="wandb",
    run_name="lora-finetuning",
    # 추가 설정
    ddp_find_unused_parameters=False,
    torch_compile=False  # 일부 환경에서 문제가 될 수 있음
)

# Trainer 설정
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_gen_train_datasets,
    eval_dataset=tokenized_gen_eval_datasets,
    data_collator=data_collator,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)


# 학습 시작
print("\n모델 학습 시작...")
try:
    trainer.train()
    print("모델 학습 완료!")
    
    # 최종 평가
    final_eval_results = trainer.evaluate()
    print("\n최종 평가 결과:")
    print(final_eval_results)
    
    # wandb에 최종 결과 로깅
    wandb.log({"final_eval": final_eval_results})
    
except Exception as e:
    print(f"학습 중 오류 발생: {str(e)}")
    raise e

# 최종 모델 저장
model.save_pretrained("./final_model")

# wandb 종료
wandb.finish()

LoRA 어댑터 초기화 중...
trainable params: 6,815,744 || all params: 8,037,076,992 || trainable%: 0.0848


No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.



모델 학습 시작...


Step,Training Loss,Validation Loss


모델 학습 완료!


학습 중 오류 발생: CUDA out of memory. Tried to allocate 5.43 GiB. GPU 0 has a total capacity of 21.98 GiB of which 4.84 GiB is free. Process 7849 has 17.13 GiB memory in use. Of the allocated memory 13.08 GiB is allocated by PyTorch, and 3.74 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)


OutOfMemoryError: CUDA out of memory. Tried to allocate 5.43 GiB. GPU 0 has a total capacity of 21.98 GiB of which 4.84 GiB is free. Process 7849 has 17.13 GiB memory in use. Of the allocated memory 13.08 GiB is allocated by PyTorch, and 3.74 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [14]:
model.save_pretrained("./finetuned-v1")
print("모델 저장 완료")

모델 저장 완료


In [None]:
# 커널 재시작

In [8]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer, BitsAndBytesConfig, DataCollatorForSeq2Seq
from peft import PeftModel
import wandb
import evaluate
import numpy as np
import os

# CUDA 메모리 단편화 방지
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'

# wandb 초기화 설정에 더 자세한 설정 추가
wandb.init(
    project="dr-hong",
    name="llama-3-Korean-Bllossom-8B",
    config={
        "model": "llama-3-Korean-Bllossom-8B",
        "batch_size": 16,
        "quantization": "4bit",
        "learning_rate": 1e-4,
        "epochs": 1,
    }
)

# compute_metrics 함수 수정
def compute_metrics(eval_preds):
    logits, labels = eval_preds
    predictions = np.argmax(logits, axis=-1)
    
    # 패딩 토큰 무시
    mask = labels != -100
    predictions = predictions[mask]
    labels = labels[mask]
    
    # 평가 지표 계산
    accuracy = accuracy_score(labels, predictions)
    
    # wandb에 메트릭 로깅
    wandb.log({
        "eval/accuracy": accuracy,
        "eval/samples": len(predictions)
    })
    
    return {
        "accuracy": accuracy,
        "num_samples": len(predictions)
    }

# 4비트 양자화 설정
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,  # 더블 양자화 활성화
)

# 모델 로드
base_model_path = "./downloaded_model/models--MLP-KTLim--llama-3-Korean-Bllossom-8B/snapshots/ed9647c18477ee09a03690c613c859eddca24362"
max_memory = {0: "10GB", "cpu": "30GB"}

# CUDA 캐시 정리
torch.cuda.empty_cache()

tokenizer = AutoTokenizer.from_pretrained(base_model_path, local_files_only=True)

model = AutoModelForCausalLM.from_pretrained(
    base_model_path,
    return_dict=True,
    quantization_config=bnb_config,
    device_map="auto",
    torch_dtype=torch.float16,
    trust_remote_code=True,
    local_files_only=True,
    max_memory=max_memory,
    low_cpu_mem_usage=True  # CPU 메모리 사용량 최적화
)

# LoRA 가중치 로드
model = PeftModel.from_pretrained(model, "./finetuned-v1")
model.gradient_checkpointing_enable()  # gradient checkpointing 활성화

# DataCollator 정의
data_collator = DataCollatorForSeq2Seq(
    tokenizer=tokenizer,
    model=model,
    padding=True,
    return_tensors="pt"
)

# 배치 단위로 평가하는 함수
def evaluate_in_batches(dataset, batch_size=16):
    all_metrics = []
    total_samples = 0
    
    for i in range(0, len(dataset), batch_size):
        torch.cuda.empty_cache()
        
        batch_end = min(i + batch_size, len(dataset))
        current_batch = dataset.select(range(i, batch_end))
        
        batch_training_args = TrainingArguments(
            output_dir="./results",
            per_device_eval_batch_size=1,
            fp16=True,
            remove_unused_columns=False,
            no_cuda=False,
            report_to="wandb",
            gradient_checkpointing=True,
            dataloader_num_workers=1,
            dataloader_pin_memory=False,
            logging_steps=1  # 매 스텝마다 로깅
        )
        
        batch_trainer = Trainer(
            model=model,
            args=batch_training_args,
            eval_dataset=current_batch,
            data_collator=data_collator,
            tokenizer=tokenizer,
            compute_metrics=compute_metrics
        )
        
        try:
            print(f"\n배치 평가 중... ({i} ~ {batch_end}/{len(dataset)})")
            with torch.no_grad():
                batch_results = batch_trainer.evaluate()
            
            # 배치별 결과 로깅
            wandb.log({
                f"batch_{i}/accuracy": batch_results["eval_accuracy"],
                "current_batch": i,
                "total_batches": len(dataset) // batch_size
            })
            
            all_metrics.append(batch_results)
            total_samples += batch_end - i
            
        except Exception as e:
            print(f"배치 평가 중 오류 발생: {str(e)}")
            continue
    
    if all_metrics:
        final_metrics = {}
        for key in all_metrics[0].keys():
            weighted_avg = sum(m[key] * (len(current_batch) / total_samples) 
                             for m, current_batch in zip(all_metrics, range(0, len(dataset), batch_size)))
            final_metrics[key] = weighted_avg
        
        # 최종 결과 로깅
        wandb.log({
            "final/accuracy": final_metrics["eval_accuracy"],
            "final/total_samples": total_samples
        })
        
        return final_metrics
    return None

# 환경 변수 설정
os.environ["TOKENIZERS_PARALLELISM"] = "false"  # tokenizer 경고 제거

# wandb 초기화 및 평가 실행
try:
    print("새로 로드한 모델로 평가 시작...")
    print(f"평가 데이터셋 크기: {len(tokenized_gen_eval_datasets)}")
    
    eval_results = evaluate_in_batches(tokenized_gen_eval_datasets, batch_size=16)
    
    if eval_results:
        print("\n최종 평가 결과:")
        print(eval_results)
        wandb.log({"evaluation": eval_results})
    
except Exception as e:
    print(f"평가 중 오류 발생: {str(e)}")
finally:
    wandb.finish()

Loading checkpoint shards: 100%|██████████| 4/4 [01:55<00:00, 28.87s/it]


새로 로드한 모델로 평가 시작...
평가 데이터셋 크기: 235

배치 평가 중... (0 ~ 16/235)


  batch_trainer = Trainer(


배치 평가 중 오류 발생: CUDA out of memory. Tried to allocate 1.77 GiB. GPU 0 has a total capacity of 21.98 GiB of which 1.51 GiB is free. Process 8093 has 20.46 GiB memory in use. Of the allocated memory 17.74 GiB is allocated by PyTorch, and 2.42 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

배치 평가 중... (16 ~ 32/235)


  batch_trainer = Trainer(


배치 평가 중 오류 발생: CUDA out of memory. Tried to allocate 1.95 GiB. GPU 0 has a total capacity of 21.98 GiB of which 1.25 GiB is free. Process 8093 has 20.72 GiB memory in use. Of the allocated memory 17.95 GiB is allocated by PyTorch, and 2.47 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

배치 평가 중... (32 ~ 48/235)


  batch_trainer = Trainer(


배치 평가 중 오류 발생: CUDA out of memory. Tried to allocate 2.07 GiB. GPU 0 has a total capacity of 21.98 GiB of which 1.19 GiB is free. Process 8093 has 20.78 GiB memory in use. Of the allocated memory 18.07 GiB is allocated by PyTorch, and 2.41 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

배치 평가 중... (48 ~ 64/235)


  batch_trainer = Trainer(


배치 평가 중 오류 발생: CUDA out of memory. Tried to allocate 1.79 GiB. GPU 0 has a total capacity of 21.98 GiB of which 1.42 GiB is free. Process 8093 has 20.55 GiB memory in use. Of the allocated memory 17.80 GiB is allocated by PyTorch, and 2.45 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

배치 평가 중... (64 ~ 80/235)


  batch_trainer = Trainer(


배치 평가 중 오류 발생: CUDA out of memory. Tried to allocate 1.97 GiB. GPU 0 has a total capacity of 21.98 GiB of which 1.19 GiB is free. Process 8093 has 20.78 GiB memory in use. Of the allocated memory 17.98 GiB is allocated by PyTorch, and 2.50 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

배치 평가 중... (80 ~ 96/235)


  batch_trainer = Trainer(


배치 평가 중 오류 발생: CUDA out of memory. Tried to allocate 1.93 GiB. GPU 0 has a total capacity of 21.98 GiB of which 1.41 GiB is free. Process 8093 has 20.55 GiB memory in use. Of the allocated memory 17.93 GiB is allocated by PyTorch, and 2.33 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

배치 평가 중... (96 ~ 112/235)


  batch_trainer = Trainer(


배치 평가 중 오류 발생: CUDA out of memory. Tried to allocate 2.27 GiB. GPU 0 has a total capacity of 21.98 GiB of which 992.44 MiB is free. Process 8093 has 21.00 GiB memory in use. Of the allocated memory 18.25 GiB is allocated by PyTorch, and 2.45 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

배치 평가 중... (112 ~ 128/235)


  batch_trainer = Trainer(


배치 평가 중 오류 발생: CUDA out of memory. Tried to allocate 2.29 GiB. GPU 0 has a total capacity of 21.98 GiB of which 1.95 GiB is free. Process 8093 has 20.02 GiB memory in use. Of the allocated memory 18.13 GiB is allocated by PyTorch, and 1.59 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

배치 평가 중... (128 ~ 144/235)


  batch_trainer = Trainer(


배치 평가 중 오류 발생: CUDA out of memory. Tried to allocate 1.92 GiB. GPU 0 has a total capacity of 21.98 GiB of which 1.42 GiB is free. Process 8093 has 20.55 GiB memory in use. Of the allocated memory 17.91 GiB is allocated by PyTorch, and 2.35 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

배치 평가 중... (144 ~ 160/235)


  batch_trainer = Trainer(


배치 평가 중 오류 발생: CUDA out of memory. Tried to allocate 2.10 GiB. GPU 0 has a total capacity of 21.98 GiB of which 1.09 GiB is free. Process 8093 has 20.87 GiB memory in use. Of the allocated memory 18.11 GiB is allocated by PyTorch, and 2.47 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

배치 평가 중... (160 ~ 176/235)


  batch_trainer = Trainer(


배치 평가 중 오류 발생: CUDA out of memory. Tried to allocate 1.72 GiB. GPU 0 has a total capacity of 21.98 GiB of which 1.50 GiB is free. Process 8093 has 20.47 GiB memory in use. Of the allocated memory 17.73 GiB is allocated by PyTorch, and 2.44 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

배치 평가 중... (176 ~ 192/235)


  batch_trainer = Trainer(


배치 평가 중 오류 발생: CUDA out of memory. Tried to allocate 2.04 GiB. GPU 0 has a total capacity of 21.98 GiB of which 1.19 GiB is free. Process 8093 has 20.78 GiB memory in use. Of the allocated memory 18.05 GiB is allocated by PyTorch, and 2.43 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

배치 평가 중... (192 ~ 208/235)


  batch_trainer = Trainer(


배치 평가 중 오류 발생: CUDA out of memory. Tried to allocate 1.79 GiB. GPU 0 has a total capacity of 21.98 GiB of which 942.44 MiB is free. Process 8093 has 21.05 GiB memory in use. Of the allocated memory 17.79 GiB is allocated by PyTorch, and 2.96 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

배치 평가 중... (208 ~ 224/235)


  batch_trainer = Trainer(


배치 평가 중 오류 발생: CUDA out of memory. Tried to allocate 2.34 GiB. GPU 0 has a total capacity of 21.98 GiB of which 882.44 MiB is free. Process 8093 has 21.11 GiB memory in use. Of the allocated memory 18.32 GiB is allocated by PyTorch, and 2.49 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

배치 평가 중... (224 ~ 235/235)


  batch_trainer = Trainer(


배치 평가 중 오류 발생: CUDA out of memory. Tried to allocate 1.80 GiB. GPU 0 has a total capacity of 21.98 GiB of which 1.43 GiB is free. Process 8093 has 20.53 GiB memory in use. Of the allocated memory 17.82 GiB is allocated by PyTorch, and 2.42 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)
