In [1]:
import torch
print(torch.cuda.is_available())
print(torch.__version__)

True
2.0.1+cu118


In [1]:
import os
import json
import torch
from torch.utils.data import DataLoader, Dataset
from torch.cuda.amp import autocast, GradScaler
from transformers import (
    AutoConfig,
    BartForConditionalGeneration,
    PreTrainedTokenizerFast,
    get_linear_schedule_with_warmup
)
import evaluate
from torch.optim import AdamW
from tqdm.auto import tqdm

# -------------------------
# 1. 데이터 로드 (단일 JSON 파일에서)
# -------------------------
with open("train_data_all.json", "r", encoding="utf-8") as f:
    train_data = json.load(f)
with open("val_data_all.json", "r", encoding="utf-8") as f:
    val_data = json.load(f)

train_input_texts = [d["passage"] for d in train_data]
train_target_summaries = [d["summary"] for d in train_data]

val_input_texts = [d["passage"] for d in val_data]
val_target_summaries = [d["summary"] for d in val_data]

print(f"Train 데이터 개수: {len(train_input_texts)}")
print(f"Validation 데이터 개수: {len(val_input_texts)}")

batch_size = 4
num_epochs = 3

# -------------------------
# 2. 모델 및 토크나이저 로드 (KoBART)
# -------------------------
model_name = "gogamza/kobart-base-v1"
tokenizer = PreTrainedTokenizerFast.from_pretrained(model_name)

model_config = AutoConfig.from_pretrained(model_name, num_labels=2)
model = BartForConditionalGeneration.from_pretrained(model_name, config=model_config)

# -------------------------
# 3. GPU 사용 설정 (단일 GPU)
# -------------------------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# -------------------------
# 4. 토큰화 함수
# -------------------------
def preprocess_for_model(
    input_texts, 
    target_summaries, 
    tokenizer, 
    max_input_length=1024, 
    max_target_length=128
):
    inputs = tokenizer(
        input_texts,
        max_length=max_input_length,
        truncation=True,
        padding="longest",
        return_tensors="pt"
    )
    targets = tokenizer(
        target_summaries,
        max_length=max_target_length,
        truncation=True,
        padding="longest",
        return_tensors="pt"
    )
    return inputs, targets

# -------------------------
# 5. 실제 토큰화 실행
# -------------------------
train_inputs, train_targets = preprocess_for_model(train_input_texts, train_target_summaries, tokenizer)
val_inputs, val_targets = preprocess_for_model(val_input_texts, val_target_summaries, tokenizer)

# -------------------------
# 6. Dataset 클래스 정의
# -------------------------
class SummaryDataset(Dataset):
    def __init__(self, inputs, targets):
        self.inputs = inputs
        self.targets = targets

    def __len__(self):
        return len(self.inputs["input_ids"])

    def __getitem__(self, idx):
        return {
            "input_ids": self.inputs["input_ids"][idx],
            "attention_mask": self.inputs["attention_mask"][idx],
            "labels": self.targets["input_ids"][idx],
        }

# -------------------------
# 7. Dataset 및 DataLoader
# -------------------------
train_dataset = SummaryDataset(train_inputs, train_targets)
val_dataset = SummaryDataset(val_inputs, val_targets)

train_dataloader = DataLoader(
    train_dataset, 
    batch_size=batch_size, 
    shuffle=True,
    num_workers=0
)
val_dataloader = DataLoader(
    val_dataset, 
    batch_size=batch_size, 
    shuffle=False,
    num_workers=0
)

# -------------------------
# 8. 옵티마이저 및 스케줄러
# -------------------------
optimizer = AdamW(model.parameters(), lr=5e-5)

num_training_steps = num_epochs * len(train_dataloader)
warmup_steps = int(0.1 * num_training_steps)
scheduler = get_linear_schedule_with_warmup(
    optimizer,
    num_warmup_steps=warmup_steps,
    num_training_steps=num_training_steps
)

# -------------------------
# 9. 평가용 메트릭 (ROUGE)
# -------------------------
rouge = evaluate.load("rouge")

# -------------------------
# 10. 검증(Validation) 함수
# -------------------------
def validate(model, val_dataloader, tokenizer, device):
    model.eval()
    predictions = []
    references = []
    
    with torch.no_grad():
        for batch in val_dataloader:
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            labels = batch["labels"].to(device)
            
            generated_ids = model.generate(
                input_ids=input_ids,
                attention_mask=attention_mask,
                max_length=128,
                num_beams=4,
                early_stopping=True
            )
            
            decoded_preds = [
                tokenizer.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=True)
                for g in generated_ids
            ]
            decoded_labels = [
                tokenizer.decode(l, skip_special_tokens=True, clean_up_tokenization_spaces=True)
                for l in labels
            ]
            
            predictions.extend(decoded_preds)
            references.extend(decoded_labels)
    
    results = rouge.compute(predictions=predictions, references=references)
    model.train()
    return results, predictions

# -------------------------
# 11. 학습 루프
# -------------------------
best_rouge_score = 0.0
best_epoch = 0

scaler = GradScaler()
model.train()

for epoch in range(num_epochs):
    total_loss = 0.0
    
    for batch_idx, batch in enumerate(tqdm(train_dataloader, desc=f"Epoch {epoch+1}/{num_epochs}")):
        optimizer.zero_grad()
        
        input_ids = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        labels = batch["labels"].to(device)

        with autocast():
            outputs = model(
                input_ids=input_ids,
                attention_mask=attention_mask,
                labels=labels
            )
            loss = outputs.loss.mean()  # 단일 GPU라면 보통 scalar지만, .mean() 유지 가능
        
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        scheduler.step()
        
        total_loss += loss.item()

    avg_train_loss = total_loss / len(train_dataloader)
    print(f"\n=== Epoch {epoch+1} Done ===")
    print(f"Average Train Loss: {avg_train_loss:.4f}")

    # 검증
    val_results, val_predictions = validate(model, val_dataloader, tokenizer, device)
    print(f"Validation ROUGE: {val_results}")

    rouge_l_score = val_results["rougeL"]
    if rouge_l_score > best_rouge_score:
        best_rouge_score = rouge_l_score
        best_epoch = epoch + 1

        # 모델 체크포인트 저장
        model.save_pretrained("./summary_model_checkpoint")
        tokenizer.save_pretrained("./summary_model_checkpoint")
        print(f"** Best model saved at epoch {best_epoch} with ROUGE-L: {best_rouge_score:.4f}")

# -------------------------
# 12. 최종 모델 저장
# -------------------------
model.save_pretrained("./summary_model_final")
tokenizer.save_pretrained("./summary_model_final")

print("학습 및 검증 완료!")
print(f"Best epoch: {best_epoch}, Best ROUGE-L: {best_rouge_score:.4f}")

# GPU 캐시 정리
torch.cuda.empty_cache()


Train 데이터 개수: 84364
Validation 데이터 개수: 10000


You passed along `num_labels=3` with an incompatible id to label map: {'0': 'NEGATIVE', '1': 'POSITIVE'}. The number of labels wil be overwritten to 2.
You passed along `num_labels=3` with an incompatible id to label map: {'0': 'NEGATIVE', '1': 'POSITIVE'}. The number of labels wil be overwritten to 2.


Epoch 1/3:   0%|          | 0/21091 [00:00<?, ?it/s]



KeyboardInterrupt: 

In [2]:
import os
import json
import torch
from torch.utils.data import DataLoader, Dataset
from torch.cuda.amp import autocast, GradScaler
from transformers import (
    AutoConfig,
    BartForConditionalGeneration, 
    PreTrainedTokenizerFast,
    get_linear_schedule_with_warmup
)
import evaluate
from torch.optim import AdamW
from tqdm.auto import tqdm

batch_size = 4

# -------------------------
# 1. 데이터 로드 함수
# -------------------------
def load_preprocessed_data(base_dir):
    input_texts = []
    target_summaries = []
    all_files = []
    
    # 먼저 모든 파일 경로를 수집
    for root, _, files in os.walk(base_dir):
        for file in files:
            if file.endswith(".json"):
                all_files.append(os.path.join(root, file))
    
    # tqdm으로 진행률 표시
    for file_path in tqdm(all_files, desc=f"Loading JSON from {base_dir}"):
        with open(file_path, "r", encoding="utf-8") as f:
            data = json.load(f)
            passage = " ".join([f"{d['character']}: {d['dialogue']}" for d in data["passage"]])
            summary = data["summaries"].get("Summary1", "")
            
            if passage and summary:
                input_texts.append(passage)
                target_summaries.append(summary)
    
    return input_texts, target_summaries

# -------------------------
# 2. 데이터 로드
# -------------------------
train_base_dir = "pre_data/Training/TL1"
val_base_dir = "pre_data/Validation/VL1"

train_input_texts, train_target_summaries = load_preprocessed_data(train_base_dir)
val_input_texts, val_target_summaries = load_preprocessed_data(val_base_dir)

print(f"Train 데이터 개수: {len(train_input_texts)}")
print(f"Validation 데이터 개수: {len(val_input_texts)}")

# -------------------------
# 3. 모델 및 토크나이저 로드 (KoBART)
# -------------------------
model_name = "gogamza/kobart-base-v1"
tokenizer = PreTrainedTokenizerFast.from_pretrained(model_name)

model_config = AutoConfig.from_pretrained(model_name, num_labels=2)
model = BartForConditionalGeneration.from_pretrained(model_name, config=model_config)

# -------------------------
# 4. GPU 사용 설정 (단일 GPU로만 사용)
# -------------------------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# -------------------------
# 5. 토큰화 함수 (chunk 단위로 처리 + 진행 표시)
# -------------------------
def preprocess_for_model(
    input_texts, 
    target_summaries, 
    tokenizer, 
    max_input_length=1024, 
    max_target_length=128
):
    inputs = tokenizer(
        input_texts,
        max_length=max_input_length,
        truncation=True,
        padding="longest",
        return_tensors="pt"
    )
    targets = tokenizer(
        target_summaries,
        max_length=max_target_length,
        truncation=True,
        padding="longest",
        return_tensors="pt"
    )
    return inputs, targets

# -------------------------
# 6. 실제 토큰화 실행
# -------------------------
train_inputs, train_targets = preprocess_for_model(train_input_texts, train_target_summaries, tokenizer)
val_inputs, val_targets = preprocess_for_model(val_input_texts, val_target_summaries, tokenizer)

# -------------------------
# 7. Dataset 클래스 정의
# -------------------------
class SummaryDataset(Dataset):
    def __init__(self, inputs, targets):
        self.inputs = inputs
        self.targets = targets

    def __len__(self):
        return len(self.inputs["input_ids"])

    def __getitem__(self, idx):
        return {
            "input_ids": self.inputs["input_ids"][idx],
            "attention_mask": self.inputs["attention_mask"][idx],
            "labels": self.targets["input_ids"][idx],
        }

# -------------------------
# 8. Dataset 생성
# -------------------------
train_dataset = SummaryDataset(train_inputs, train_targets)
val_dataset = SummaryDataset(val_inputs, val_targets)

# -------------------------
# 9. DataLoader 생성 (num_workers 활용)
# -------------------------
train_dataloader = DataLoader(
    train_dataset, 
    batch_size=batch_size, 
    shuffle=True,
    num_workers=0  # 멀티프로세스 로딩
)
val_dataloader = DataLoader(
    val_dataset, 
    batch_size=batch_size, 
    shuffle=False,
    num_workers=0
)

# -------------------------
# 10. 옵티마이저 및 스케줄러
# -------------------------
optimizer = AdamW(model.parameters(), lr=5e-5)
num_epochs = 3
num_training_steps = num_epochs * len(train_dataloader)
warmup_steps = int(0.1 * num_training_steps)
scheduler = get_linear_schedule_with_warmup(
    optimizer,
    num_warmup_steps=warmup_steps,
    num_training_steps=num_training_steps
)

# -------------------------
# 11. 평가용 메트릭 (ROUGE)
# -------------------------
rouge = evaluate.load("rouge")

# -------------------------
# 12. 검증(Validation) 함수
# -------------------------
def validate(model, val_dataloader, tokenizer, device):
    model.eval()
    predictions = []
    references = []
    
    with torch.no_grad():
        for batch in val_dataloader:
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            labels = batch["labels"].to(device)
            
            # 단일 GPU -> model.generate 사용
            generated_ids = model.generate(
                input_ids=input_ids,
                attention_mask=attention_mask,
                max_length=128,
                num_beams=4,
                early_stopping=True
            )
            
            decoded_preds = [
                tokenizer.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=True) 
                for g in generated_ids
            ]
            decoded_labels = [
                tokenizer.decode(l, skip_special_tokens=True, clean_up_tokenization_spaces=True) 
                for l in labels
            ]
            
            predictions.extend(decoded_preds)
            references.extend(decoded_labels)
    
    results = rouge.compute(predictions=predictions, references=references)
    model.train()
    return results, predictions

# -------------------------
# 13. 학습 루프
# -------------------------
best_rouge_score = 0.0
best_epoch = 0

scaler = GradScaler()
model.train()

for epoch in range(num_epochs):
    total_loss = 0.0
    
    for batch_idx, batch in enumerate(tqdm(train_dataloader, desc=f"Epoch {epoch+1}/{num_epochs}")):
        optimizer.zero_grad()
        
        input_ids = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        labels = batch["labels"].to(device)

        with autocast():
            outputs = model(
                input_ids=input_ids,
                attention_mask=attention_mask,
                labels=labels
            )
            # 만약 여러 GPU를 사용했을 때 shape가 달라지는 걸 대비해 .mean() 유지 가능
            # 하지만 단일 GPU라면 보통 scalar로 나옵니다.
            loss = outputs.loss.mean()
        
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        scheduler.step()
        
        total_loss += loss.item()

    avg_train_loss = total_loss / len(train_dataloader)
    print(f"\n=== Epoch {epoch+1} Done ===")
    print(f"Average Train Loss: {avg_train_loss:.4f}")

    # --- 검증 ---
    val_results, val_predictions = validate(model, val_dataloader, tokenizer, device)
    print(f"Validation ROUGE: {val_results}")

    rouge_l_score = val_results["rougeL"]
    if rouge_l_score > best_rouge_score:
        best_rouge_score = rouge_l_score
        best_epoch = epoch + 1

        # 단일 GPU, DataParallel 제거 -> 그냥 model 저장
        model.save_pretrained("./summary_model_checkpoint")
        tokenizer.save_pretrained("./summary_model_checkpoint")
        print(f"** Best model saved at epoch {best_epoch} with ROUGE-L: {best_rouge_score:.4f}")

# -------------------------
# 14. 최종 모델 저장
# -------------------------
model.save_pretrained("./summary_model_final")
tokenizer.save_pretrained("./summary_model_final")

print("학습 및 검증 완료!")
print(f"Best epoch: {best_epoch}, Best ROUGE-L: {best_rouge_score:.4f}")

torch.cuda.empty_cache()


Loading JSON from pre_data/Training/TL1:   0%|          | 0/84364 [00:00<?, ?it/s]

Loading JSON from pre_data/Validation/VL1:   0%|          | 0/10000 [00:00<?, ?it/s]

Train 데이터 개수: 84364
Validation 데이터 개수: 10000


You passed along `num_labels=3` with an incompatible id to label map: {'0': 'NEGATIVE', '1': 'POSITIVE'}. The number of labels wil be overwritten to 2.
You passed along `num_labels=3` with an incompatible id to label map: {'0': 'NEGATIVE', '1': 'POSITIVE'}. The number of labels wil be overwritten to 2.


Epoch 1/3:   0%|          | 0/21091 [00:00<?, ?it/s]




=== Epoch 1 Done ===
Average Train Loss: 1.9132
Validation ROUGE: {'rouge1': 0.06877769452769443, 'rouge2': 0.015111782106782112, 'rougeL': 0.06825757381507366, 'rougeLsum': 0.06832091186591174}
** Best model saved at epoch 1 with ROUGE-L: 0.0683


Epoch 2/3:   0%|          | 0/21091 [00:00<?, ?it/s]


=== Epoch 2 Done ===
Average Train Loss: 1.3922
Validation ROUGE: {'rouge1': 0.07248212454212435, 'rouge2': 0.015642640692640694, 'rougeL': 0.07195047924297901, 'rougeLsum': 0.071904023199023}
** Best model saved at epoch 2 with ROUGE-L: 0.0720


Epoch 3/3:   0%|          | 0/21091 [00:00<?, ?it/s]


=== Epoch 3 Done ===
Average Train Loss: 1.0955
Validation ROUGE: {'rouge1': 0.07328302836052812, 'rouge2': 0.016068253968253968, 'rougeL': 0.07264224636474612, 'rougeLsum': 0.07289302253302227}
** Best model saved at epoch 3 with ROUGE-L: 0.0726
학습 및 검증 완료!
Best epoch: 3, Best ROUGE-L: 0.0726


In [3]:
import os
import json
import torch
from torch.utils.data import DataLoader, Dataset
from torch.cuda.amp import autocast, GradScaler
# KoBART 관련
from transformers import BartForConditionalGeneration, PreTrainedTokenizerFast, AdamW, get_linear_schedule_with_warmup
import evaluate

# 추가
from tqdm.auto import tqdm

batch_size = 6

# 1. 데이터 로드 함수
def load_preprocessed_data(base_dir):
    input_texts = []
    target_summaries = []
    
    for root, _, files in os.walk(base_dir):
        for file in files:
            if file.endswith(".json"):
                file_path = os.path.join(root, file)
                with open(file_path, "r", encoding="utf-8") as f:
                    data = json.load(f)
                    passage = " ".join([f"{d['character']}: {d['dialogue']}" for d in data["passage"]])
                    summary = data["summaries"].get("Summary1", "")
                    
                    if passage and summary:
                        input_texts.append(passage)
                        target_summaries.append(summary)
    return input_texts, target_summaries

# 2. 데이터 로드
train_base_dir = "./pre_data/Training/TL1"
val_base_dir = "./pre_data/Validation/VL1"

train_input_texts, train_target_summaries = load_preprocessed_data(train_base_dir)
val_input_texts, val_target_summaries = load_preprocessed_data(val_base_dir)

# Train 데이터와 Validation 데이터의 개수 출력
print(f"Train 데이터 개수: {len(train_input_texts)}")
print(f"Validation 데이터 개수: {len(val_input_texts)}")

# 4. 모델 및 토크나이저 로드 (KoBART)
model_name = "gogamza/kobart-base-v1"
tokenizer = PreTrainedTokenizerFast.from_pretrained(model_name)
model = BartForConditionalGeneration.from_pretrained(model_name)

# 5. GPU 사용 설정
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# 6. 데이터 전처리 함수
def preprocess_for_model(input_texts, target_summaries, tokenizer, max_input_length=1024, max_target_length=128):
    inputs = tokenizer(
        input_texts,
        max_length=max_input_length,
        truncation=True,
        padding="longest",
        return_tensors="pt"
    )
    targets = tokenizer(
        target_summaries,
        max_length=max_target_length,
        truncation=True,
        padding="longest",
        return_tensors="pt"
    )
    return inputs, targets

# 7. 토큰화 (Train, Validation 각각)
train_inputs, train_targets = preprocess_for_model(train_input_texts, train_target_summaries, tokenizer)
val_inputs, val_targets = preprocess_for_model(val_input_texts, val_target_summaries, tokenizer)

# 8. Dataset 클래스 정의
class SummaryDataset(Dataset):
    def __init__(self, inputs, targets):
        self.inputs = inputs
        self.targets = targets

    def __len__(self):
        return len(self.inputs["input_ids"])

    def __getitem__(self, idx):
        return {
            "input_ids": self.inputs["input_ids"][idx],
            "attention_mask": self.inputs["attention_mask"][idx],
            "labels": self.targets["input_ids"][idx],
        }

# 9. Dataset 생성
train_dataset = SummaryDataset(train_inputs, train_targets)
val_dataset = SummaryDataset(val_inputs, val_targets)

# 10. DataLoader 생성
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

# 11. 옵티마이저 및 스케줄러 설정
optimizer = AdamW(model.parameters(), lr=5e-5)
num_epochs = 3
num_training_steps = num_epochs * len(train_dataloader)
warmup_steps = int(0.1 * num_training_steps)
scheduler = get_linear_schedule_with_warmup(
    optimizer,
    num_warmup_steps=warmup_steps,
    num_training_steps=num_training_steps
)

# 12. 평가용 메트릭 (ROUGE)
rouge = evaluate.load("rouge")

# 13. 검증(Validation) 함수
def validate(model, val_dataloader, tokenizer, device):
    model.eval()
    predictions = []
    references = []
    
    with torch.no_grad():
        for batch in val_dataloader:
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            labels = batch["labels"].to(device)
            
            generated_ids = model.generate(
                input_ids=input_ids,
                attention_mask=attention_mask,
                max_length=128,
                num_beams=4,
                early_stopping=True
            )
            
            decoded_preds = [tokenizer.decode(g, skip_special_tokens=True) for g in generated_ids]
            decoded_labels = [tokenizer.decode(l, skip_special_tokens=True) for l in labels]
            
            predictions.extend(decoded_preds)
            references.extend(decoded_labels)
    
    results = rouge.compute(predictions=predictions, references=references)
    model.train()
    return results, predictions

# ----------------------------
# 14. 학습 루프 (Train + Validation)
# ----------------------------
best_rouge_score = 0.0
best_epoch = 0

scaler = GradScaler()
model.train()

for epoch in range(num_epochs):
    total_loss = 0.0
    
    # tqdm을 이용한 진행률 표시
    # desc: 진행 표시줄 왼쪽에 표시될 문구
    # total: (선택) 총 step 개수를 명시적으로 표시하고 싶다면 len(train_dataloader)를 전달 가능
    for batch_idx, batch in enumerate(tqdm(train_dataloader, desc=f"Epoch {epoch+1}/{num_epochs}")):
        optimizer.zero_grad()
        
        input_ids = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        labels = batch["labels"].to(device)

        with autocast():
            outputs = model(
                input_ids=input_ids,
                attention_mask=attention_mask,
                labels=labels
            )
            loss = outputs.loss
        
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        scheduler.step()
        
        total_loss += loss.item()

    avg_train_loss = total_loss / len(train_dataloader)
    print(f"\n=== Epoch {epoch+1} Done ===")
    print(f"Average Train Loss: {avg_train_loss:.4f}")

    # --- 검증(Validation) ---
    val_results, val_predictions = validate(model, val_dataloader, tokenizer, device)
    print(f"Validation ROUGE: {val_results}")

    rouge_l_score = val_results["rougeL"]
    if rouge_l_score > best_rouge_score:
        best_rouge_score = rouge_l_score
        best_epoch = epoch + 1
        model.save_pretrained(f"./summary_model_checkpoint")
        tokenizer.save_pretrained(f"./summary_model_checkpoint")
        print(f"** Best model saved at epoch {best_epoch} with ROUGE-L: {best_rouge_score:.4f}")

# 15. 최종 모델 저장
model.save_pretrained("./summary_model_final")
tokenizer.save_pretrained("./summary_model_final")

print("학습 및 검증 완료!")
print(f"Best epoch: {best_epoch}, Best ROUGE-L: {best_rouge_score:.4f}")

torch.cuda.empty_cache()


Train 데이터 개수: 84364
Validation 데이터 개수: 10000


You passed along `num_labels=3` with an incompatible id to label map: {'0': 'NEGATIVE', '1': 'POSITIVE'}. The number of labels wil be overwritten to 2.
You passed along `num_labels=3` with an incompatible id to label map: {'0': 'NEGATIVE', '1': 'POSITIVE'}. The number of labels wil be overwritten to 2.


Epoch 1/3:   0%|          | 0/14061 [00:00<?, ?it/s]

KeyboardInterrupt: 