In [1]:
pip install jsonlines

Collecting jsonlines
  Downloading jsonlines-4.0.0-py3-none-any.whl.metadata (1.6 kB)
Downloading jsonlines-4.0.0-py3-none-any.whl (8.7 kB)
Installing collected packages: jsonlines
Successfully installed jsonlines-4.0.0
Note: you may need to restart the kernel to use updated packages.


In [3]:
    import json
    import os
    from typing import List, Dict, Tuple
    import random
    from transformers import AutoTokenizer, AutoModelForCausalLM
    import torch
    from tqdm import tqdm
    import re
    from collections import defaultdict, Counter
    import time
    import logging
    import jsonlines
        
    # 检查环境和 GPU 信息
    print(f"PyTorch version: {torch.__version__}")
    print(f"CUDA available: {torch.cuda.is_available()}")
    if torch.cuda.is_available():
        print(f"CUDA version: {torch.version.cuda}")
        print(f"GPU: {torch.cuda.get_device_name(0)}")
        print(f"Total GPU memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.2f} GB")
    
    # 设置设备
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")
    
    # 1. 加载 GLM-4-9B 模型和分词器
    model_name = "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"  # 确认 GLM-4-9B 模型名称
    tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
    tokenizer.padding_side = 'left'  # 确保与 Qwen2 一致
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        torch_dtype=torch.float16,  # 使用 float16 减少显存
        device_map="cuda:0",  # 强制加载到 P100
        trust_remote_code=True
    )
    model.eval()
    print("Model loaded successfully")
    print(f"Model device: {next(model.parameters()).device}")

PyTorch version: 2.5.1+cu124
CUDA available: True
CUDA version: 12.4
GPU: Tesla P100-PCIE-16GB
Total GPU memory: 15.89 GB
Using device: cuda:0


tokenizer_config.json:   0%|          | 0.00/3.07k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.08M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/826 [00:00<?, ?B/s]

2025-05-12 11:15:37.428985: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1747048537.615771      31 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1747048537.665063      31 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


model.safetensors.index.json:   0%|          | 0.00/24.2k [00:00<?, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-000002.safetensors:   0%|          | 0.00/8.67G [00:00<?, ?B/s]

model-00002-of-000002.safetensors:   0%|          | 0.00/7.39G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/181 [00:00<?, ?B/s]

Model loaded successfully
Model device: cuda:0


In [4]:
# 打印初始显存
if torch.cuda.is_available():
    print(f"GPU memory allocated: {torch.cuda.memory_allocated(0) / 1024**3:.2f} GB")
    print(f"GPU memory reserved: {torch.cuda.memory_reserved(0) / 1024**3:.2f} GB")

# 2. 构造提示函数
def create_zero_shot_prompt(passage: str, number: str) -> str:
    return f"""Answer with only 'Yes' or 'No'. Do not provide explanations. Is "{number}" in the following passage an error? "{passage}"
Answer:"""

def create_few_shot_prompt(passage: str, number: str) -> str:
    examples = [
        {"passage": "Spiders have 9 limbs.", "number": "9", "answer": "Yes"},
        {"passage": "Spiders have 8 limbs.", "number": "8", "answer": "No"},
        {"passage": "Mike's height is -3.6 meters.", "number": "-3.6", "answer": "Yes"},
        {"passage": "Mike's height is 1.8 meters.", "number": "1.8", "answer": "No"}
    ]
    prompt = "Answer with only 'Yes' or 'No'. Do not provide explanations.\n"
    for ex in examples:
        prompt += f"""Question: Is "{ex['number']}" in the following passage an error? "{ex['passage']}"
Answer: {ex['answer']}\n"""
    prompt += f"""Question: Is "{number}" in the following passage an error? "{passage}"
Answer:"""
    return prompt

# 3. 加载 BeNEDect 数据集
def load_benedect_dataset(file_path: str) -> List[Dict]:
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"数据集文件 {file_path} 不存在，请确认路径！")
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            dataset_dict = json.load(f)
    except json.JSONDecodeError as e:
        raise ValueError(f"JSON 文件解析错误：{e}")
    
    dataset = list(dataset_dict.values())
    save_list = []
    
    for i, data in enumerate(tqdm(dataset, desc="Processing dataset")):
        required_fields = ['correct_number', 'correct_passage', 'error_number', 'error_passage', 'dataset', 'operation']
        for field in required_fields:
            if field not in data:
                print(f"样本 {data.get('id', '未知')} 缺少字段 {field}")
                continue
        
        prompt_fn = create_few_shot_prompt if i % 48 == 0 else create_zero_shot_prompt
        correct_item = {
            "prompt": prompt_fn(data['correct_passage'], data['correct_number']),
            "expected_answer": "No",
            "dataset": data['dataset'],
            "operation": data['operation'],
            "error_annotation": data.get('error_annotation', {}),
            "passage": data['correct_passage'],
            "number": data['correct_number'],
            "prompt_type": "few_shot" if i % 48 == 0 else "zero_shot"
        }
        error_item = {
            "prompt": prompt_fn(data['error_passage'], data['error_number']),
            "expected_answer": "Yes",
            "dataset": data['dataset'],
            "operation": data['operation'],
            "error_annotation": data.get('error_annotation', {}),
            "passage": data['error_passage'],
            "number": data['error_number'],
            "prompt_type": "few_shot" if i % 48 == 0 else "zero_shot"
        }
        save_list.append(correct_item)
        save_list.append(error_item)
    
    return save_list

# 4. 单条推理
def predict_single(prompt: str, max_retries: int = 3) -> str:
    print(f"Single prediction prompt: {prompt[:100]}...")
    attempt = 0
    success = False
    prediction = None
    
    while attempt < max_retries and not success:
        try:
            inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)
            inputs = {k: v.to(device) for k, v in inputs.items()}
            print(f"Input device: {inputs['input_ids'].device}")
            print(f"Input shape: {inputs['input_ids'].shape}")
            
            with torch.no_grad():
                outputs = model.generate(
                    **inputs,
                    max_new_tokens=5,
                    pad_token_id=tokenizer.pad_token_id,
                    eos_token_id=tokenizer.eos_token_id,
                    do_sample=False,
                    top_k=1,
                    top_p=0.0
                )
            
            prediction = tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True).strip()
            success = True
            print(f"Single prediction success: Raw Prediction: {prediction}")
            
        except RuntimeError as e:
            print(f"单条推理失败（尝试 {attempt + 1}/{max_retries}）：{e}")
            attempt += 1
            torch.cuda.empty_cache()
            time.sleep(1)
            if attempt == max_retries:
                print("单条推理失败，跳过")
                prediction = "generation_error"
        
        finally:
            if 'inputs' in locals():
                for v in inputs.values():
                    del v
            torch.cuda.empty_cache()
            print(f"GPU memory after single prediction: {torch.cuda.memory_allocated(0) / 1024**3:.2f} GB")
    
    return prediction

# 5. 批次推理
def predict_batch(prompts: List[str], batch_size: int = 8, max_retries: int = 3) -> List[str]:
    predictions = []
    
    for i in tqdm(range(0, len(prompts), batch_size), desc="Predicting"):
        batch_prompts = prompts[i:i + batch_size]
        attempt = 0
        success = False
        batch_preds = None
        
        while attempt < max_retries and not success:
            try:
                # 强制统一序列长度，检查张量形状
                inputs = tokenizer(
                    batch_prompts,
                    return_tensors="pt",
                    padding=True,
                    truncation=True,
                    max_length=512,
                    return_attention_mask=True
                )
                inputs = {k: v.to(device) for k, v in inputs.items()}
                print(f"Batch {i//batch_size} input shapes: input_ids={inputs['input_ids'].shape}, attention_mask={inputs['attention_mask'].shape}")
                
                with torch.no_grad():
                    outputs = model.generate(
                        **inputs,
                        max_new_tokens=5,
                        pad_token_id=tokenizer.pad_token_id,
                        eos_token_id=tokenizer.eos_token_id,
                        do_sample=False,
                        top_k=1,
                        top_p=0.0
                    )
                
                batch_preds = [
                    tokenizer.decode(output[inputs['input_ids'].shape[1]:], skip_special_tokens=True).strip()
                    for output in outputs
                ]
                success = True
                if i % (10 * batch_size) == 0:
                    print(f"批次 {i//batch_size}: 原始预测: {batch_preds}")
                
            except RuntimeError as e:
                print(f"批次推理失败（尝试 {attempt + 1}/{max_retries}）：{e}")
                attempt += 1
                torch.cuda.empty_cache()
                time.sleep(1)
                if attempt == max_retries:
                    print(f"批次 {i//batch_size} 推理失败，跳过")
                    batch_preds = ["generation_error"] * len(batch_prompts)
            
            finally:
                if 'inputs' in locals():
                    for v in inputs.values():
                        del v
                torch.cuda.empty_cache()
                print(f"GPU memory after batch {i//batch_size}: {torch.cuda.memory_allocated(0) / 1024**3:.2f} GB")
        
        predictions.extend(batch_preds)
    
    return predictions

# 6. 保存预测结果到 JSONL
def save_predictions_to_jsonl(data: List[Dict], predictions: List[str], output_file: str):
    with jsonlines.open(output_file, mode='w') as writer:
        for item, pred in zip(data, predictions):
            result = {
                "prompt": item['prompt'],
                "passage": item['passage'],
                "number": item['number'],
                "expected_answer": item['expected_answer'],
                "raw_prediction": pred,
                "dataset": item['dataset'],
                "operation": item['operation'],
                "error_annotation": item['error_annotation'],
                "prompt_type": item['prompt_type']
            }
            writer.write(result)

GPU memory allocated: 14.96 GB
GPU memory reserved: 15.08 GB


In [6]:
file_path = "/kaggle/input/benedect/BeNEDect_all.json"  # 确认路径
dataset = load_benedect_dataset(file_path)

random_sample = random.choice(dataset)
print("=== 随机样本推理 ===")
print(f"Passage: {random_sample['passage']}")
print(f"Number: {random_sample['number']}")
print(f"Expected Answer: {random_sample['expected_answer']}")
print(f"Prompt Type: {random_sample['prompt_type']}")
print(f"Prompt:\n{random_sample['prompt']}")

random_pred = predict_single(random_sample['prompt'])
print(f"Raw Prediction: {random_pred}")

prompts = [item['prompt'] for item in dataset]
predictions = predict_batch(prompts, batch_size=4, max_retries=3)  # 减小批次大小
output_file = "/kaggle/working/predictions.jsonl"
save_predictions_to_jsonl(dataset, predictions, output_file)
print(f"预测结果已保存到 {output_file}")

Processing dataset: 100%|██████████| 4800/4800 [00:00<00:00, 223857.89it/s]


=== 随机样本推理 ===
Passage: Renaissance sculpture proper is often taken to begin with the famous competition for the doors of the Florence Baptistry in 1403, from which the trial models submitted by the winner, Lorenzo Ghiberti, and Filippo Brunelleschi survive. Ghiberti's doors are still in place, but were undoubtedly eclipsed by his second pair for the other entrance, the so-called "Gates of Paradise", which took him from 1425 to 1452, and are dazzlingly confident classicizing compositions with varied depths of relief allowing extensive backgrounds. The intervening years had seen Ghiberti's early assistant Donatello develop with seminal statues including his "Davids" in marble (1408–09) and bronze (129.60s), and his Equestrian statue of Gattamelata, as well as reliefs. A leading figure in the later period was Andrea del Verrocchio, best known for his equestrian statue of Bartolomeo Colleoni in Venice; his pupil Leonardo da Vinci designed an equine sculpture in 1482 "The Horse" for Milan-

Predicting:   0%|          | 0/2400 [00:00<?, ?it/s]

Batch 0 input shapes: input_ids=torch.Size([4, 164]), attention_mask=torch.Size([4, 164])


Predicting:   0%|          | 1/2400 [00:02<1:25:42,  2.14s/it]

批次 0: 原始预测: ['Yes\nQuestion: Is', 'Yes\nQuestion: Is', 'Yes\nYes, because', 'Yes\nYes, because']
GPU memory after batch 0: 14.97 GB
Batch 1 input shapes: input_ids=torch.Size([4, 49]), attention_mask=torch.Size([4, 49])


Predicting:   0%|          | 2/2400 [00:03<1:00:56,  1.52s/it]

GPU memory after batch 1: 14.97 GB
Batch 2 input shapes: input_ids=torch.Size([4, 54]), attention_mask=torch.Size([4, 54])


Predicting:   0%|          | 3/2400 [00:04<53:04,  1.33s/it]  

GPU memory after batch 2: 14.97 GB
Batch 3 input shapes: input_ids=torch.Size([4, 52]), attention_mask=torch.Size([4, 52])


Predicting:   0%|          | 4/2400 [00:05<49:21,  1.24s/it]

GPU memory after batch 3: 14.97 GB
Batch 4 input shapes: input_ids=torch.Size([4, 40]), attention_mask=torch.Size([4, 40])


Predicting:   0%|          | 5/2400 [00:06<45:19,  1.14s/it]

GPU memory after batch 4: 14.97 GB
Batch 5 input shapes: input_ids=torch.Size([4, 47]), attention_mask=torch.Size([4, 47])


Predicting:   0%|          | 6/2400 [00:07<43:49,  1.10s/it]

GPU memory after batch 5: 14.97 GB
Batch 6 input shapes: input_ids=torch.Size([4, 57]), attention_mask=torch.Size([4, 57])


Predicting:   0%|          | 7/2400 [00:08<44:47,  1.12s/it]

GPU memory after batch 6: 14.97 GB
Batch 7 input shapes: input_ids=torch.Size([4, 59]), attention_mask=torch.Size([4, 59])


Predicting:   0%|          | 8/2400 [00:09<45:24,  1.14s/it]

GPU memory after batch 7: 14.97 GB
Batch 8 input shapes: input_ids=torch.Size([4, 60]), attention_mask=torch.Size([4, 60])


Predicting:   0%|          | 9/2400 [00:10<45:50,  1.15s/it]

GPU memory after batch 8: 14.97 GB
Batch 9 input shapes: input_ids=torch.Size([4, 47]), attention_mask=torch.Size([4, 47])


Predicting:   0%|          | 10/2400 [00:11<44:17,  1.11s/it]

GPU memory after batch 9: 14.97 GB
Batch 10 input shapes: input_ids=torch.Size([4, 51]), attention_mask=torch.Size([4, 51])


Predicting:   0%|          | 11/2400 [00:13<44:02,  1.11s/it]

批次 10: 原始预测: ['Yes\nYes, because', 'Yes\nYes, because', 'No\nYes, because', 'Yes\nYes, because']
GPU memory after batch 10: 14.97 GB
Batch 11 input shapes: input_ids=torch.Size([4, 44]), attention_mask=torch.Size([4, 44])


Predicting:   0%|          | 12/2400 [00:14<43:01,  1.08s/it]

GPU memory after batch 11: 14.97 GB
Batch 12 input shapes: input_ids=torch.Size([4, 57]), attention_mask=torch.Size([4, 57])


Predicting:   1%|          | 13/2400 [00:15<44:07,  1.11s/it]

GPU memory after batch 12: 14.97 GB
Batch 13 input shapes: input_ids=torch.Size([4, 49]), attention_mask=torch.Size([4, 49])


Predicting:   1%|          | 14/2400 [00:16<43:55,  1.10s/it]

GPU memory after batch 13: 14.97 GB
Batch 14 input shapes: input_ids=torch.Size([4, 53]), attention_mask=torch.Size([4, 53])


Predicting:   1%|          | 15/2400 [00:17<43:46,  1.10s/it]

GPU memory after batch 14: 14.97 GB
Batch 15 input shapes: input_ids=torch.Size([4, 64]), attention_mask=torch.Size([4, 64])


Predicting:   1%|          | 16/2400 [00:18<44:39,  1.12s/it]

GPU memory after batch 15: 14.97 GB
Batch 16 input shapes: input_ids=torch.Size([4, 52]), attention_mask=torch.Size([4, 52])


Predicting:   1%|          | 17/2400 [00:19<44:16,  1.11s/it]

GPU memory after batch 16: 14.97 GB
Batch 17 input shapes: input_ids=torch.Size([4, 47]), attention_mask=torch.Size([4, 47])


Predicting:   1%|          | 18/2400 [00:20<43:12,  1.09s/it]

GPU memory after batch 17: 14.97 GB
Batch 18 input shapes: input_ids=torch.Size([4, 55]), attention_mask=torch.Size([4, 55])


Predicting:   1%|          | 19/2400 [00:21<43:16,  1.09s/it]

GPU memory after batch 18: 14.97 GB
Batch 19 input shapes: input_ids=torch.Size([4, 48]), attention_mask=torch.Size([4, 48])


Predicting:   1%|          | 20/2400 [00:22<42:32,  1.07s/it]

GPU memory after batch 19: 14.97 GB
Batch 20 input shapes: input_ids=torch.Size([4, 58]), attention_mask=torch.Size([4, 58])


Predicting:   1%|          | 21/2400 [00:24<43:43,  1.10s/it]

批次 20: 原始预测: ['No\nYes\nYes', 'No\nYes\nYes', 'Yes\nYes, because', 'Yes\nYes, because']
GPU memory after batch 20: 14.97 GB
Batch 21 input shapes: input_ids=torch.Size([4, 66]), attention_mask=torch.Size([4, 66])


Predicting:   1%|          | 22/2400 [00:25<45:16,  1.14s/it]

GPU memory after batch 21: 14.97 GB
Batch 22 input shapes: input_ids=torch.Size([4, 48]), attention_mask=torch.Size([4, 48])


Predicting:   1%|          | 23/2400 [00:26<43:53,  1.11s/it]

GPU memory after batch 22: 14.97 GB
Batch 23 input shapes: input_ids=torch.Size([4, 52]), attention_mask=torch.Size([4, 52])


Predicting:   1%|          | 24/2400 [00:27<43:41,  1.10s/it]

GPU memory after batch 23: 14.97 GB
Batch 24 input shapes: input_ids=torch.Size([4, 156]), attention_mask=torch.Size([4, 156])


Predicting:   1%|          | 25/2400 [00:29<53:31,  1.35s/it]

GPU memory after batch 24: 14.97 GB
Batch 25 input shapes: input_ids=torch.Size([4, 57]), attention_mask=torch.Size([4, 57])


Predicting:   1%|          | 26/2400 [00:30<51:22,  1.30s/it]

GPU memory after batch 25: 14.97 GB
Batch 26 input shapes: input_ids=torch.Size([4, 61]), attention_mask=torch.Size([4, 61])


Predicting:   1%|          | 27/2400 [00:31<49:53,  1.26s/it]

GPU memory after batch 26: 14.97 GB
Batch 27 input shapes: input_ids=torch.Size([4, 54]), attention_mask=torch.Size([4, 54])


Predicting:   1%|          | 28/2400 [00:32<47:53,  1.21s/it]

GPU memory after batch 27: 14.97 GB
Batch 28 input shapes: input_ids=torch.Size([4, 49]), attention_mask=torch.Size([4, 49])


Predicting:   1%|          | 29/2400 [00:33<46:26,  1.18s/it]

GPU memory after batch 28: 14.97 GB
Batch 29 input shapes: input_ids=torch.Size([4, 65]), attention_mask=torch.Size([4, 65])


Predicting:   1%|▏         | 30/2400 [00:35<47:06,  1.19s/it]

GPU memory after batch 29: 14.97 GB
Batch 30 input shapes: input_ids=torch.Size([4, 44]), attention_mask=torch.Size([4, 44])


Predicting:   1%|▏         | 31/2400 [00:36<45:06,  1.14s/it]

批次 30: 原始预测: ['Yes\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because']
GPU memory after batch 30: 14.97 GB
Batch 31 input shapes: input_ids=torch.Size([4, 53]), attention_mask=torch.Size([4, 53])


Predicting:   1%|▏         | 32/2400 [00:37<44:30,  1.13s/it]

GPU memory after batch 31: 14.97 GB
Batch 32 input shapes: input_ids=torch.Size([4, 50]), attention_mask=torch.Size([4, 50])


Predicting:   1%|▏         | 33/2400 [00:38<44:04,  1.12s/it]

GPU memory after batch 32: 14.97 GB
Batch 33 input shapes: input_ids=torch.Size([4, 54]), attention_mask=torch.Size([4, 54])


Predicting:   1%|▏         | 34/2400 [00:39<43:47,  1.11s/it]

GPU memory after batch 33: 14.97 GB
Batch 34 input shapes: input_ids=torch.Size([4, 52]), attention_mask=torch.Size([4, 52])


Predicting:   1%|▏         | 35/2400 [00:40<43:34,  1.11s/it]

GPU memory after batch 34: 14.97 GB
Batch 35 input shapes: input_ids=torch.Size([4, 43]), attention_mask=torch.Size([4, 43])


Predicting:   2%|▏         | 36/2400 [00:41<42:36,  1.08s/it]

GPU memory after batch 35: 14.97 GB
Batch 36 input shapes: input_ids=torch.Size([4, 45]), attention_mask=torch.Size([4, 45])


Predicting:   2%|▏         | 37/2400 [00:42<41:57,  1.07s/it]

GPU memory after batch 36: 14.97 GB
Batch 37 input shapes: input_ids=torch.Size([4, 54]), attention_mask=torch.Size([4, 54])


Predicting:   2%|▏         | 38/2400 [00:43<42:17,  1.07s/it]

GPU memory after batch 37: 14.97 GB
Batch 38 input shapes: input_ids=torch.Size([4, 56]), attention_mask=torch.Size([4, 56])


Predicting:   2%|▏         | 39/2400 [00:44<42:32,  1.08s/it]

GPU memory after batch 38: 14.97 GB
Batch 39 input shapes: input_ids=torch.Size([4, 52]), attention_mask=torch.Size([4, 52])


Predicting:   2%|▏         | 40/2400 [00:45<42:39,  1.08s/it]

GPU memory after batch 39: 14.97 GB
Batch 40 input shapes: input_ids=torch.Size([4, 64]), attention_mask=torch.Size([4, 64])


Predicting:   2%|▏         | 41/2400 [00:47<43:44,  1.11s/it]

批次 40: 原始预测: ['Yes\nYes, because', 'Yes\nYes, because', 'No\nAnswer\nYes', 'No\nAnswer\nYes']
GPU memory after batch 40: 14.97 GB
Batch 41 input shapes: input_ids=torch.Size([4, 47]), attention_mask=torch.Size([4, 47])


Predicting:   2%|▏         | 42/2400 [00:48<42:42,  1.09s/it]

GPU memory after batch 41: 14.97 GB
Batch 42 input shapes: input_ids=torch.Size([4, 50]), attention_mask=torch.Size([4, 50])


Predicting:   2%|▏         | 43/2400 [00:49<42:45,  1.09s/it]

GPU memory after batch 42: 14.97 GB
Batch 43 input shapes: input_ids=torch.Size([4, 59]), attention_mask=torch.Size([4, 59])


Predicting:   2%|▏         | 44/2400 [00:50<43:45,  1.11s/it]

GPU memory after batch 43: 14.97 GB
Batch 44 input shapes: input_ids=torch.Size([4, 62]), attention_mask=torch.Size([4, 62])


Predicting:   2%|▏         | 45/2400 [00:51<44:26,  1.13s/it]

GPU memory after batch 44: 14.97 GB
Batch 45 input shapes: input_ids=torch.Size([4, 54]), attention_mask=torch.Size([4, 54])


Predicting:   2%|▏         | 46/2400 [00:52<43:58,  1.12s/it]

GPU memory after batch 45: 14.97 GB
Batch 46 input shapes: input_ids=torch.Size([4, 47]), attention_mask=torch.Size([4, 47])


Predicting:   2%|▏         | 47/2400 [00:53<42:50,  1.09s/it]

GPU memory after batch 46: 14.97 GB
Batch 47 input shapes: input_ids=torch.Size([4, 56]), attention_mask=torch.Size([4, 56])


Predicting:   2%|▏         | 48/2400 [00:54<42:50,  1.09s/it]

GPU memory after batch 47: 14.97 GB
Batch 48 input shapes: input_ids=torch.Size([4, 166]), attention_mask=torch.Size([4, 166])


Predicting:   2%|▏         | 49/2400 [00:56<54:29,  1.39s/it]

GPU memory after batch 48: 14.97 GB
Batch 49 input shapes: input_ids=torch.Size([4, 49]), attention_mask=torch.Size([4, 49])


Predicting:   2%|▏         | 50/2400 [00:57<50:57,  1.30s/it]

GPU memory after batch 49: 14.97 GB
Batch 50 input shapes: input_ids=torch.Size([4, 55]), attention_mask=torch.Size([4, 55])


Predicting:   2%|▏         | 51/2400 [00:58<48:30,  1.24s/it]

批次 50: 原始预测: ['Yes\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because']
GPU memory after batch 50: 14.97 GB
Batch 51 input shapes: input_ids=torch.Size([4, 45]), attention_mask=torch.Size([4, 45])


Predicting:   2%|▏         | 52/2400 [00:59<45:58,  1.17s/it]

GPU memory after batch 51: 14.97 GB
Batch 52 input shapes: input_ids=torch.Size([4, 49]), attention_mask=torch.Size([4, 49])


Predicting:   2%|▏         | 53/2400 [01:01<44:59,  1.15s/it]

GPU memory after batch 52: 14.97 GB
Batch 53 input shapes: input_ids=torch.Size([4, 44]), attention_mask=torch.Size([4, 44])


Predicting:   2%|▏         | 54/2400 [01:02<43:29,  1.11s/it]

GPU memory after batch 53: 14.97 GB
Batch 54 input shapes: input_ids=torch.Size([4, 64]), attention_mask=torch.Size([4, 64])


Predicting:   2%|▏         | 55/2400 [01:03<44:13,  1.13s/it]

GPU memory after batch 54: 14.97 GB
Batch 55 input shapes: input_ids=torch.Size([4, 51]), attention_mask=torch.Size([4, 51])


Predicting:   2%|▏         | 56/2400 [01:04<43:46,  1.12s/it]

GPU memory after batch 55: 14.97 GB
Batch 56 input shapes: input_ids=torch.Size([4, 46]), attention_mask=torch.Size([4, 46])


Predicting:   2%|▏         | 57/2400 [01:05<42:38,  1.09s/it]

GPU memory after batch 56: 14.97 GB
Batch 57 input shapes: input_ids=torch.Size([4, 55]), attention_mask=torch.Size([4, 55])


Predicting:   2%|▏         | 58/2400 [01:06<42:40,  1.09s/it]

GPU memory after batch 57: 14.97 GB
Batch 58 input shapes: input_ids=torch.Size([4, 52]), attention_mask=torch.Size([4, 52])


Predicting:   2%|▏         | 59/2400 [01:07<42:37,  1.09s/it]

GPU memory after batch 58: 14.97 GB
Batch 59 input shapes: input_ids=torch.Size([4, 46]), attention_mask=torch.Size([4, 46])


Predicting:   2%|▎         | 60/2400 [01:08<41:51,  1.07s/it]

GPU memory after batch 59: 14.97 GB
Batch 60 input shapes: input_ids=torch.Size([4, 50]), attention_mask=torch.Size([4, 50])


Predicting:   3%|▎         | 61/2400 [01:09<42:03,  1.08s/it]

批次 60: 原始预测: ['No\nAnswer: Yes', 'No\nAnswer: No', 'Yes\nYes, because', 'Yes\nYes, because']
GPU memory after batch 60: 14.97 GB
Batch 61 input shapes: input_ids=torch.Size([4, 56]), attention_mask=torch.Size([4, 56])


Predicting:   3%|▎         | 62/2400 [01:10<42:13,  1.08s/it]

GPU memory after batch 61: 14.97 GB
Batch 62 input shapes: input_ids=torch.Size([4, 51]), attention_mask=torch.Size([4, 51])


Predicting:   3%|▎         | 63/2400 [01:11<42:18,  1.09s/it]

GPU memory after batch 62: 14.97 GB
Batch 63 input shapes: input_ids=torch.Size([4, 53]), attention_mask=torch.Size([4, 53])


Predicting:   3%|▎         | 64/2400 [01:12<42:22,  1.09s/it]

GPU memory after batch 63: 14.97 GB
Batch 64 input shapes: input_ids=torch.Size([4, 51]), attention_mask=torch.Size([4, 51])


Predicting:   3%|▎         | 65/2400 [01:14<42:24,  1.09s/it]

GPU memory after batch 64: 14.97 GB
Batch 65 input shapes: input_ids=torch.Size([4, 53]), attention_mask=torch.Size([4, 53])


Predicting:   3%|▎         | 66/2400 [01:15<42:26,  1.09s/it]

GPU memory after batch 65: 14.97 GB
Batch 66 input shapes: input_ids=torch.Size([4, 54]), attention_mask=torch.Size([4, 54])


Predicting:   3%|▎         | 67/2400 [01:16<42:27,  1.09s/it]

GPU memory after batch 66: 14.97 GB
Batch 67 input shapes: input_ids=torch.Size([4, 46]), attention_mask=torch.Size([4, 46])


Predicting:   3%|▎         | 68/2400 [01:17<41:41,  1.07s/it]

GPU memory after batch 67: 14.97 GB
Batch 68 input shapes: input_ids=torch.Size([4, 59]), attention_mask=torch.Size([4, 59])


Predicting:   3%|▎         | 69/2400 [01:18<42:51,  1.10s/it]

GPU memory after batch 68: 14.97 GB
Batch 69 input shapes: input_ids=torch.Size([4, 54]), attention_mask=torch.Size([4, 54])


Predicting:   3%|▎         | 70/2400 [01:19<42:44,  1.10s/it]

GPU memory after batch 69: 14.97 GB
Batch 70 input shapes: input_ids=torch.Size([4, 47]), attention_mask=torch.Size([4, 47])


Predicting:   3%|▎         | 71/2400 [01:20<41:51,  1.08s/it]

批次 70: 原始预测: ['Yes\nExplanation: The', 'Yes\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because']
GPU memory after batch 70: 14.97 GB
Batch 71 input shapes: input_ids=torch.Size([4, 53]), attention_mask=torch.Size([4, 53])


Predicting:   3%|▎         | 72/2400 [01:21<42:01,  1.08s/it]

GPU memory after batch 71: 14.97 GB
Batch 72 input shapes: input_ids=torch.Size([4, 164]), attention_mask=torch.Size([4, 164])


Predicting:   3%|▎         | 73/2400 [01:23<53:36,  1.38s/it]

GPU memory after batch 72: 14.97 GB
Batch 73 input shapes: input_ids=torch.Size([4, 56]), attention_mask=torch.Size([4, 56])


Predicting:   3%|▎         | 74/2400 [01:24<50:15,  1.30s/it]

GPU memory after batch 73: 14.97 GB
Batch 74 input shapes: input_ids=torch.Size([4, 65]), attention_mask=torch.Size([4, 65])


Predicting:   3%|▎         | 75/2400 [01:26<49:29,  1.28s/it]

GPU memory after batch 74: 14.97 GB
Batch 75 input shapes: input_ids=torch.Size([4, 48]), attention_mask=torch.Size([4, 48])


Predicting:   3%|▎         | 76/2400 [01:27<46:34,  1.20s/it]

GPU memory after batch 75: 14.97 GB
Batch 76 input shapes: input_ids=torch.Size([4, 47]), attention_mask=torch.Size([4, 47])


Predicting:   3%|▎         | 77/2400 [01:28<44:30,  1.15s/it]

GPU memory after batch 76: 14.97 GB
Batch 77 input shapes: input_ids=torch.Size([4, 56]), attention_mask=torch.Size([4, 56])


Predicting:   3%|▎         | 78/2400 [01:29<43:51,  1.13s/it]

GPU memory after batch 77: 14.97 GB
Batch 78 input shapes: input_ids=torch.Size([4, 51]), attention_mask=torch.Size([4, 51])


Predicting:   3%|▎         | 79/2400 [01:30<43:22,  1.12s/it]

GPU memory after batch 78: 14.97 GB
Batch 79 input shapes: input_ids=torch.Size([4, 53]), attention_mask=torch.Size([4, 53])


Predicting:   3%|▎         | 80/2400 [01:31<43:02,  1.11s/it]

GPU memory after batch 79: 14.97 GB
Batch 80 input shapes: input_ids=torch.Size([4, 49]), attention_mask=torch.Size([4, 49])


Predicting:   3%|▎         | 81/2400 [01:32<42:45,  1.11s/it]

批次 80: 原始预测: ['No\nExplanation: The', 'No\nYes\nYes', 'No\nYes\nYes', 'Yes\nYes, because']
GPU memory after batch 80: 14.97 GB
Batch 81 input shapes: input_ids=torch.Size([4, 50]), attention_mask=torch.Size([4, 50])


Predicting:   3%|▎         | 82/2400 [01:33<42:35,  1.10s/it]

GPU memory after batch 81: 14.97 GB
Batch 82 input shapes: input_ids=torch.Size([4, 50]), attention_mask=torch.Size([4, 50])


Predicting:   3%|▎         | 83/2400 [01:34<42:27,  1.10s/it]

GPU memory after batch 82: 14.97 GB
Batch 83 input shapes: input_ids=torch.Size([4, 53]), attention_mask=torch.Size([4, 53])


Predicting:   4%|▎         | 84/2400 [01:35<42:22,  1.10s/it]

GPU memory after batch 83: 14.97 GB
Batch 84 input shapes: input_ids=torch.Size([4, 53]), attention_mask=torch.Size([4, 53])


Predicting:   4%|▎         | 85/2400 [01:36<42:19,  1.10s/it]

GPU memory after batch 84: 14.97 GB
Batch 85 input shapes: input_ids=torch.Size([4, 61]), attention_mask=torch.Size([4, 61])


Predicting:   4%|▎         | 86/2400 [01:38<43:11,  1.12s/it]

GPU memory after batch 85: 14.97 GB
Batch 86 input shapes: input_ids=torch.Size([4, 56]), attention_mask=torch.Size([4, 56])


Predicting:   4%|▎         | 87/2400 [01:39<42:52,  1.11s/it]

GPU memory after batch 86: 14.97 GB
Batch 87 input shapes: input_ids=torch.Size([4, 51]), attention_mask=torch.Size([4, 51])


Predicting:   4%|▎         | 88/2400 [01:40<42:38,  1.11s/it]

GPU memory after batch 87: 14.97 GB
Batch 88 input shapes: input_ids=torch.Size([4, 54]), attention_mask=torch.Size([4, 54])


Predicting:   4%|▎         | 89/2400 [01:41<42:28,  1.10s/it]

GPU memory after batch 88: 14.97 GB
Batch 89 input shapes: input_ids=torch.Size([4, 47]), attention_mask=torch.Size([4, 47])


Predicting:   4%|▍         | 90/2400 [01:42<41:35,  1.08s/it]

GPU memory after batch 89: 14.97 GB
Batch 90 input shapes: input_ids=torch.Size([4, 38]), attention_mask=torch.Size([4, 38])


Predicting:   4%|▍         | 91/2400 [01:43<40:10,  1.04s/it]

批次 90: 原始预测: ['No\nYes, because', 'No\n\nYes', 'Yes\nYes, because', 'Yes\nYes, because']
GPU memory after batch 90: 14.97 GB
Batch 91 input shapes: input_ids=torch.Size([4, 53]), attention_mask=torch.Size([4, 53])


Predicting:   4%|▍         | 92/2400 [01:44<40:46,  1.06s/it]

GPU memory after batch 91: 14.97 GB
Batch 92 input shapes: input_ids=torch.Size([4, 60]), attention_mask=torch.Size([4, 60])


Predicting:   4%|▍         | 93/2400 [01:45<42:09,  1.10s/it]

GPU memory after batch 92: 14.97 GB
Batch 93 input shapes: input_ids=torch.Size([4, 51]), attention_mask=torch.Size([4, 51])


Predicting:   4%|▍         | 94/2400 [01:46<42:07,  1.10s/it]

GPU memory after batch 93: 14.97 GB
Batch 94 input shapes: input_ids=torch.Size([4, 48]), attention_mask=torch.Size([4, 48])


Predicting:   4%|▍         | 95/2400 [01:47<41:20,  1.08s/it]

GPU memory after batch 94: 14.97 GB
Batch 95 input shapes: input_ids=torch.Size([4, 48]), attention_mask=torch.Size([4, 48])


Predicting:   4%|▍         | 96/2400 [01:48<40:47,  1.06s/it]

GPU memory after batch 95: 14.97 GB
Batch 96 input shapes: input_ids=torch.Size([4, 157]), attention_mask=torch.Size([4, 157])


Predicting:   4%|▍         | 97/2400 [01:50<50:50,  1.32s/it]

GPU memory after batch 96: 14.97 GB
Batch 97 input shapes: input_ids=torch.Size([4, 43]), attention_mask=torch.Size([4, 43])


Predicting:   4%|▍         | 98/2400 [01:51<47:23,  1.24s/it]

GPU memory after batch 97: 14.97 GB
Batch 98 input shapes: input_ids=torch.Size([4, 65]), attention_mask=torch.Size([4, 65])


Predicting:   4%|▍         | 99/2400 [01:52<47:20,  1.23s/it]

GPU memory after batch 98: 14.97 GB
Batch 99 input shapes: input_ids=torch.Size([4, 45]), attention_mask=torch.Size([4, 45])


Predicting:   4%|▍         | 100/2400 [01:53<44:56,  1.17s/it]

GPU memory after batch 99: 14.97 GB
Batch 100 input shapes: input_ids=torch.Size([4, 60]), attention_mask=torch.Size([4, 60])


Predicting:   4%|▍         | 101/2400 [01:55<44:57,  1.17s/it]

批次 100: 原始预测: ['No\nAnswer\nYes', 'No\nAnswer\nYes', 'No\nYes', 'No\nYes']
GPU memory after batch 100: 14.97 GB
Batch 101 input shapes: input_ids=torch.Size([4, 52]), attention_mask=torch.Size([4, 52])


Predicting:   4%|▍         | 102/2400 [01:56<44:02,  1.15s/it]

GPU memory after batch 101: 14.97 GB
Batch 102 input shapes: input_ids=torch.Size([4, 54]), attention_mask=torch.Size([4, 54])


Predicting:   4%|▍         | 103/2400 [01:57<43:23,  1.13s/it]

GPU memory after batch 102: 14.97 GB
Batch 103 input shapes: input_ids=torch.Size([4, 50]), attention_mask=torch.Size([4, 50])


Predicting:   4%|▍         | 104/2400 [01:58<42:55,  1.12s/it]

GPU memory after batch 103: 14.97 GB
Batch 104 input shapes: input_ids=torch.Size([4, 54]), attention_mask=torch.Size([4, 54])


Predicting:   4%|▍         | 105/2400 [01:59<42:35,  1.11s/it]

GPU memory after batch 104: 14.97 GB
Batch 105 input shapes: input_ids=torch.Size([4, 55]), attention_mask=torch.Size([4, 55])


Predicting:   4%|▍         | 106/2400 [02:00<42:21,  1.11s/it]

GPU memory after batch 105: 14.97 GB
Batch 106 input shapes: input_ids=torch.Size([4, 60]), attention_mask=torch.Size([4, 60])


Predicting:   4%|▍         | 107/2400 [02:01<43:06,  1.13s/it]

GPU memory after batch 106: 14.97 GB
Batch 107 input shapes: input_ids=torch.Size([4, 51]), attention_mask=torch.Size([4, 51])


Predicting:   4%|▍         | 108/2400 [02:02<42:42,  1.12s/it]

GPU memory after batch 107: 14.97 GB
Batch 108 input shapes: input_ids=torch.Size([4, 56]), attention_mask=torch.Size([4, 56])


Predicting:   5%|▍         | 109/2400 [02:04<42:26,  1.11s/it]

GPU memory after batch 108: 14.97 GB
Batch 109 input shapes: input_ids=torch.Size([4, 52]), attention_mask=torch.Size([4, 52])


Predicting:   5%|▍         | 110/2400 [02:05<42:13,  1.11s/it]

GPU memory after batch 109: 14.97 GB
Batch 110 input shapes: input_ids=torch.Size([4, 51]), attention_mask=torch.Size([4, 51])


Predicting:   5%|▍         | 111/2400 [02:06<42:04,  1.10s/it]

批次 110: 原始预测: ['No\nYes\nYes', 'No\nYes, because', 'No\nAnswer: No', 'No\nAnswer: No']
GPU memory after batch 110: 14.97 GB
Batch 111 input shapes: input_ids=torch.Size([4, 55]), attention_mask=torch.Size([4, 55])


Predicting:   5%|▍         | 112/2400 [02:07<41:58,  1.10s/it]

GPU memory after batch 111: 14.97 GB
Batch 112 input shapes: input_ids=torch.Size([4, 53]), attention_mask=torch.Size([4, 53])


Predicting:   5%|▍         | 113/2400 [02:08<41:53,  1.10s/it]

GPU memory after batch 112: 14.97 GB
Batch 113 input shapes: input_ids=torch.Size([4, 53]), attention_mask=torch.Size([4, 53])


Predicting:   5%|▍         | 114/2400 [02:09<41:49,  1.10s/it]

GPU memory after batch 113: 14.97 GB
Batch 114 input shapes: input_ids=torch.Size([4, 48]), attention_mask=torch.Size([4, 48])


Predicting:   5%|▍         | 115/2400 [02:10<41:01,  1.08s/it]

GPU memory after batch 114: 14.97 GB
Batch 115 input shapes: input_ids=torch.Size([4, 46]), attention_mask=torch.Size([4, 46])


Predicting:   5%|▍         | 116/2400 [02:11<40:26,  1.06s/it]

GPU memory after batch 115: 14.97 GB
Batch 116 input shapes: input_ids=torch.Size([4, 49]), attention_mask=torch.Size([4, 49])


Predicting:   5%|▍         | 117/2400 [02:12<40:46,  1.07s/it]

GPU memory after batch 116: 14.97 GB
Batch 117 input shapes: input_ids=torch.Size([4, 53]), attention_mask=torch.Size([4, 53])


Predicting:   5%|▍         | 118/2400 [02:13<41:02,  1.08s/it]

GPU memory after batch 117: 14.97 GB
Batch 118 input shapes: input_ids=torch.Size([4, 49]), attention_mask=torch.Size([4, 49])


Predicting:   5%|▍         | 119/2400 [02:14<41:11,  1.08s/it]

GPU memory after batch 118: 14.97 GB
Batch 119 input shapes: input_ids=torch.Size([4, 66]), attention_mask=torch.Size([4, 66])


Predicting:   5%|▌         | 120/2400 [02:16<42:52,  1.13s/it]

GPU memory after batch 119: 14.97 GB
Batch 120 input shapes: input_ids=torch.Size([4, 161]), attention_mask=torch.Size([4, 161])


Predicting:   5%|▌         | 121/2400 [02:18<53:42,  1.41s/it]

批次 120: 原始预测: ['Yes\nQuestion: Is', 'Yes\nQuestion: Is', 'Yes\nYes, because', 'Yes\nYes, because']
GPU memory after batch 120: 14.97 GB
Batch 121 input shapes: input_ids=torch.Size([4, 48]), attention_mask=torch.Size([4, 48])


Predicting:   5%|▌         | 122/2400 [02:19<49:16,  1.30s/it]

GPU memory after batch 121: 14.97 GB
Batch 122 input shapes: input_ids=torch.Size([4, 54]), attention_mask=torch.Size([4, 54])


Predicting:   5%|▌         | 123/2400 [02:20<46:58,  1.24s/it]

GPU memory after batch 122: 14.97 GB
Batch 123 input shapes: input_ids=torch.Size([4, 44]), attention_mask=torch.Size([4, 44])


Predicting:   5%|▌         | 124/2400 [02:21<44:30,  1.17s/it]

GPU memory after batch 123: 14.97 GB
Batch 124 input shapes: input_ids=torch.Size([4, 51]), attention_mask=torch.Size([4, 51])


Predicting:   5%|▌         | 125/2400 [02:22<43:35,  1.15s/it]

GPU memory after batch 124: 14.97 GB
Batch 125 input shapes: input_ids=torch.Size([4, 45]), attention_mask=torch.Size([4, 45])


Predicting:   5%|▌         | 126/2400 [02:23<42:09,  1.11s/it]

GPU memory after batch 125: 14.97 GB
Batch 126 input shapes: input_ids=torch.Size([4, 44]), attention_mask=torch.Size([4, 44])


Predicting:   5%|▌         | 127/2400 [02:24<41:09,  1.09s/it]

GPU memory after batch 126: 14.97 GB
Batch 127 input shapes: input_ids=torch.Size([4, 59]), attention_mask=torch.Size([4, 59])


Predicting:   5%|▌         | 128/2400 [02:25<42:07,  1.11s/it]

GPU memory after batch 127: 14.97 GB
Batch 128 input shapes: input_ids=torch.Size([4, 49]), attention_mask=torch.Size([4, 49])


Predicting:   5%|▌         | 129/2400 [02:26<41:52,  1.11s/it]

GPU memory after batch 128: 14.97 GB
Batch 129 input shapes: input_ids=torch.Size([4, 49]), attention_mask=torch.Size([4, 49])


Predicting:   5%|▌         | 130/2400 [02:27<41:42,  1.10s/it]

GPU memory after batch 129: 14.97 GB
Batch 130 input shapes: input_ids=torch.Size([4, 46]), attention_mask=torch.Size([4, 46])


Predicting:   5%|▌         | 131/2400 [02:28<40:49,  1.08s/it]

批次 130: 原始预测: ['Yes\nYes, because', 'Yes\nYes, because', 'No\nAnswer: No', 'No\nAnswer: No']
GPU memory after batch 130: 14.97 GB
Batch 131 input shapes: input_ids=torch.Size([4, 56]), attention_mask=torch.Size([4, 56])


Predicting:   6%|▌         | 132/2400 [02:29<40:59,  1.08s/it]

GPU memory after batch 131: 14.97 GB
Batch 132 input shapes: input_ids=torch.Size([4, 48]), attention_mask=torch.Size([4, 48])


Predicting:   6%|▌         | 133/2400 [02:30<40:21,  1.07s/it]

GPU memory after batch 132: 14.97 GB
Batch 133 input shapes: input_ids=torch.Size([4, 54]), attention_mask=torch.Size([4, 54])


Predicting:   6%|▌         | 134/2400 [02:32<40:38,  1.08s/it]

GPU memory after batch 133: 14.97 GB
Batch 134 input shapes: input_ids=torch.Size([4, 45]), attention_mask=torch.Size([4, 45])


Predicting:   6%|▌         | 135/2400 [02:33<40:04,  1.06s/it]

GPU memory after batch 134: 14.97 GB
Batch 135 input shapes: input_ids=torch.Size([4, 49]), attention_mask=torch.Size([4, 49])


Predicting:   6%|▌         | 136/2400 [02:34<40:23,  1.07s/it]

GPU memory after batch 135: 14.97 GB
Batch 136 input shapes: input_ids=torch.Size([4, 49]), attention_mask=torch.Size([4, 49])


Predicting:   6%|▌         | 137/2400 [02:35<40:38,  1.08s/it]

GPU memory after batch 136: 14.97 GB
Batch 137 input shapes: input_ids=torch.Size([4, 53]), attention_mask=torch.Size([4, 53])


Predicting:   6%|▌         | 138/2400 [02:36<40:47,  1.08s/it]

GPU memory after batch 137: 14.97 GB
Batch 138 input shapes: input_ids=torch.Size([4, 61]), attention_mask=torch.Size([4, 61])


Predicting:   6%|▌         | 139/2400 [02:37<41:49,  1.11s/it]

GPU memory after batch 138: 14.97 GB
Batch 139 input shapes: input_ids=torch.Size([4, 51]), attention_mask=torch.Size([4, 51])


Predicting:   6%|▌         | 140/2400 [02:38<41:37,  1.11s/it]

GPU memory after batch 139: 14.97 GB
Batch 140 input shapes: input_ids=torch.Size([4, 45]), attention_mask=torch.Size([4, 45])


Predicting:   6%|▌         | 141/2400 [02:39<40:43,  1.08s/it]

批次 140: 原始预测: ['Yes\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because']
GPU memory after batch 140: 14.97 GB
Batch 141 input shapes: input_ids=torch.Size([4, 48]), attention_mask=torch.Size([4, 48])


Predicting:   6%|▌         | 142/2400 [02:40<40:07,  1.07s/it]

GPU memory after batch 141: 14.97 GB
Batch 142 input shapes: input_ids=torch.Size([4, 49]), attention_mask=torch.Size([4, 49])


Predicting:   6%|▌         | 143/2400 [02:41<40:24,  1.07s/it]

GPU memory after batch 142: 14.97 GB
Batch 143 input shapes: input_ids=torch.Size([4, 48]), attention_mask=torch.Size([4, 48])


Predicting:   6%|▌         | 144/2400 [02:42<39:52,  1.06s/it]

GPU memory after batch 143: 14.97 GB
Batch 144 input shapes: input_ids=torch.Size([4, 156]), attention_mask=torch.Size([4, 156])


Predicting:   6%|▌         | 145/2400 [02:44<49:43,  1.32s/it]

GPU memory after batch 144: 14.97 GB
Batch 145 input shapes: input_ids=torch.Size([4, 45]), attention_mask=torch.Size([4, 45])


Predicting:   6%|▌         | 146/2400 [02:45<46:21,  1.23s/it]

GPU memory after batch 145: 14.97 GB
Batch 146 input shapes: input_ids=torch.Size([4, 49]), attention_mask=torch.Size([4, 49])


Predicting:   6%|▌         | 147/2400 [02:46<44:45,  1.19s/it]

GPU memory after batch 146: 14.97 GB
Batch 147 input shapes: input_ids=torch.Size([4, 43]), attention_mask=torch.Size([4, 43])


Predicting:   6%|▌         | 148/2400 [02:47<42:54,  1.14s/it]

GPU memory after batch 147: 14.97 GB
Batch 148 input shapes: input_ids=torch.Size([4, 49]), attention_mask=torch.Size([4, 49])


Predicting:   6%|▌         | 149/2400 [02:48<42:18,  1.13s/it]

GPU memory after batch 148: 14.97 GB
Batch 149 input shapes: input_ids=torch.Size([4, 49]), attention_mask=torch.Size([4, 49])


Predicting:   6%|▋         | 150/2400 [02:50<41:54,  1.12s/it]

GPU memory after batch 149: 14.97 GB
Batch 150 input shapes: input_ids=torch.Size([4, 43]), attention_mask=torch.Size([4, 43])


Predicting:   6%|▋         | 151/2400 [02:51<40:51,  1.09s/it]

批次 150: 原始预测: ['Yes\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because']
GPU memory after batch 150: 14.97 GB
Batch 151 input shapes: input_ids=torch.Size([4, 60]), attention_mask=torch.Size([4, 60])


Predicting:   6%|▋         | 152/2400 [02:52<41:48,  1.12s/it]

GPU memory after batch 151: 14.97 GB
Batch 152 input shapes: input_ids=torch.Size([4, 52]), attention_mask=torch.Size([4, 52])


Predicting:   6%|▋         | 153/2400 [02:53<41:33,  1.11s/it]

GPU memory after batch 152: 14.97 GB
Batch 153 input shapes: input_ids=torch.Size([4, 43]), attention_mask=torch.Size([4, 43])


Predicting:   6%|▋         | 154/2400 [02:54<40:34,  1.08s/it]

GPU memory after batch 153: 14.97 GB
Batch 154 input shapes: input_ids=torch.Size([4, 53]), attention_mask=torch.Size([4, 53])


Predicting:   6%|▋         | 155/2400 [02:55<40:40,  1.09s/it]

GPU memory after batch 154: 14.97 GB
Batch 155 input shapes: input_ids=torch.Size([4, 54]), attention_mask=torch.Size([4, 54])


Predicting:   6%|▋         | 156/2400 [02:56<40:43,  1.09s/it]

GPU memory after batch 155: 14.97 GB
Batch 156 input shapes: input_ids=torch.Size([4, 54]), attention_mask=torch.Size([4, 54])


Predicting:   7%|▋         | 157/2400 [02:57<40:46,  1.09s/it]

GPU memory after batch 156: 14.97 GB
Batch 157 input shapes: input_ids=torch.Size([4, 57]), attention_mask=torch.Size([4, 57])


Predicting:   7%|▋         | 158/2400 [02:58<41:42,  1.12s/it]

GPU memory after batch 157: 14.97 GB
Batch 158 input shapes: input_ids=torch.Size([4, 43]), attention_mask=torch.Size([4, 43])


Predicting:   7%|▋         | 159/2400 [02:59<40:40,  1.09s/it]

GPU memory after batch 158: 14.97 GB
Batch 159 input shapes: input_ids=torch.Size([4, 56]), attention_mask=torch.Size([4, 56])


Predicting:   7%|▋         | 160/2400 [03:00<40:44,  1.09s/it]

GPU memory after batch 159: 14.97 GB
Batch 160 input shapes: input_ids=torch.Size([4, 53]), attention_mask=torch.Size([4, 53])


Predicting:   7%|▋         | 161/2400 [03:02<40:44,  1.09s/it]

批次 160: 原始预测: ['No\nAnswer: Yes', 'No\nAnswer: Yes', 'No\nYes\nYes', 'Yes\nYes, because']
GPU memory after batch 160: 14.97 GB
Batch 161 input shapes: input_ids=torch.Size([4, 50]), attention_mask=torch.Size([4, 50])


Predicting:   7%|▋         | 162/2400 [03:03<40:43,  1.09s/it]

GPU memory after batch 161: 14.97 GB
Batch 162 input shapes: input_ids=torch.Size([4, 51]), attention_mask=torch.Size([4, 51])


Predicting:   7%|▋         | 163/2400 [03:04<40:44,  1.09s/it]

GPU memory after batch 162: 14.97 GB
Batch 163 input shapes: input_ids=torch.Size([4, 50]), attention_mask=torch.Size([4, 50])


Predicting:   7%|▋         | 164/2400 [03:05<40:44,  1.09s/it]

GPU memory after batch 163: 14.97 GB
Batch 164 input shapes: input_ids=torch.Size([4, 54]), attention_mask=torch.Size([4, 54])


Predicting:   7%|▋         | 165/2400 [03:06<40:44,  1.09s/it]

GPU memory after batch 164: 14.97 GB
Batch 165 input shapes: input_ids=torch.Size([4, 57]), attention_mask=torch.Size([4, 57])


Predicting:   7%|▋         | 166/2400 [03:07<41:38,  1.12s/it]

GPU memory after batch 165: 14.97 GB
Batch 166 input shapes: input_ids=torch.Size([4, 52]), attention_mask=torch.Size([4, 52])


Predicting:   7%|▋         | 167/2400 [03:08<41:20,  1.11s/it]

GPU memory after batch 166: 14.97 GB
Batch 167 input shapes: input_ids=torch.Size([4, 53]), attention_mask=torch.Size([4, 53])


Predicting:   7%|▋         | 168/2400 [03:09<41:08,  1.11s/it]

GPU memory after batch 167: 14.97 GB
Batch 168 input shapes: input_ids=torch.Size([4, 160]), attention_mask=torch.Size([4, 160])


Predicting:   7%|▋         | 169/2400 [03:11<50:23,  1.36s/it]

GPU memory after batch 168: 14.97 GB
Batch 169 input shapes: input_ids=torch.Size([4, 52]), attention_mask=torch.Size([4, 52])


Predicting:   7%|▋         | 170/2400 [03:12<47:27,  1.28s/it]

GPU memory after batch 169: 14.97 GB
Batch 170 input shapes: input_ids=torch.Size([4, 52]), attention_mask=torch.Size([4, 52])


Predicting:   7%|▋         | 171/2400 [03:13<45:24,  1.22s/it]

批次 170: 原始预测: ['Yes\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because', 'No\nYes\nYes']
GPU memory after batch 170: 14.97 GB
Batch 171 input shapes: input_ids=torch.Size([4, 46]), attention_mask=torch.Size([4, 46])


Predicting:   7%|▋         | 172/2400 [03:14<43:12,  1.16s/it]

GPU memory after batch 171: 14.97 GB
Batch 172 input shapes: input_ids=torch.Size([4, 59]), attention_mask=torch.Size([4, 59])


Predicting:   7%|▋         | 173/2400 [03:16<43:18,  1.17s/it]

GPU memory after batch 172: 14.97 GB
Batch 173 input shapes: input_ids=torch.Size([4, 51]), attention_mask=torch.Size([4, 51])


Predicting:   7%|▋         | 174/2400 [03:17<42:28,  1.14s/it]

GPU memory after batch 173: 14.97 GB
Batch 174 input shapes: input_ids=torch.Size([4, 64]), attention_mask=torch.Size([4, 64])


Predicting:   7%|▋         | 175/2400 [03:18<42:48,  1.15s/it]

GPU memory after batch 174: 14.97 GB
Batch 175 input shapes: input_ids=torch.Size([4, 51]), attention_mask=torch.Size([4, 51])


Predicting:   7%|▋         | 176/2400 [03:19<42:06,  1.14s/it]

GPU memory after batch 175: 14.97 GB
Batch 176 input shapes: input_ids=torch.Size([4, 49]), attention_mask=torch.Size([4, 49])


Predicting:   7%|▋         | 177/2400 [03:20<41:36,  1.12s/it]

GPU memory after batch 176: 14.97 GB
Batch 177 input shapes: input_ids=torch.Size([4, 55]), attention_mask=torch.Size([4, 55])


Predicting:   7%|▋         | 178/2400 [03:21<41:16,  1.11s/it]

GPU memory after batch 177: 14.97 GB
Batch 178 input shapes: input_ids=torch.Size([4, 57]), attention_mask=torch.Size([4, 57])


Predicting:   7%|▋         | 179/2400 [03:22<41:55,  1.13s/it]

GPU memory after batch 178: 14.97 GB
Batch 179 input shapes: input_ids=torch.Size([4, 53]), attention_mask=torch.Size([4, 53])


Predicting:   8%|▊         | 180/2400 [03:23<41:33,  1.12s/it]

GPU memory after batch 179: 14.97 GB
Batch 180 input shapes: input_ids=torch.Size([4, 54]), attention_mask=torch.Size([4, 54])


Predicting:   8%|▊         | 181/2400 [03:25<41:14,  1.12s/it]

批次 180: 原始预测: ['No\nYes, because', 'No\nYes, because', 'Yes\nYes, because', 'No\nAnswer: No']
GPU memory after batch 180: 14.97 GB
Batch 181 input shapes: input_ids=torch.Size([4, 46]), attention_mask=torch.Size([4, 46])


Predicting:   8%|▊         | 182/2400 [03:26<40:12,  1.09s/it]

GPU memory after batch 181: 14.97 GB
Batch 182 input shapes: input_ids=torch.Size([4, 52]), attention_mask=torch.Size([4, 52])


Predicting:   8%|▊         | 183/2400 [03:27<40:16,  1.09s/it]

GPU memory after batch 182: 14.97 GB
Batch 183 input shapes: input_ids=torch.Size([4, 50]), attention_mask=torch.Size([4, 50])


Predicting:   8%|▊         | 184/2400 [03:28<40:17,  1.09s/it]

GPU memory after batch 183: 14.97 GB
Batch 184 input shapes: input_ids=torch.Size([4, 63]), attention_mask=torch.Size([4, 63])


Predicting:   8%|▊         | 185/2400 [03:29<41:13,  1.12s/it]

GPU memory after batch 184: 14.97 GB
Batch 185 input shapes: input_ids=torch.Size([4, 49]), attention_mask=torch.Size([4, 49])


Predicting:   8%|▊         | 186/2400 [03:30<40:56,  1.11s/it]

GPU memory after batch 185: 14.97 GB
Batch 186 input shapes: input_ids=torch.Size([4, 43]), attention_mask=torch.Size([4, 43])


Predicting:   8%|▊         | 187/2400 [03:31<39:59,  1.08s/it]

GPU memory after batch 186: 14.97 GB
Batch 187 input shapes: input_ids=torch.Size([4, 57]), attention_mask=torch.Size([4, 57])


Predicting:   8%|▊         | 188/2400 [03:32<40:57,  1.11s/it]

GPU memory after batch 187: 14.97 GB
Batch 188 input shapes: input_ids=torch.Size([4, 47]), attention_mask=torch.Size([4, 47])


Predicting:   8%|▊         | 189/2400 [03:33<40:00,  1.09s/it]

GPU memory after batch 188: 14.97 GB
Batch 189 input shapes: input_ids=torch.Size([4, 49]), attention_mask=torch.Size([4, 49])


Predicting:   8%|▊         | 190/2400 [03:34<40:03,  1.09s/it]

GPU memory after batch 189: 14.97 GB
Batch 190 input shapes: input_ids=torch.Size([4, 51]), attention_mask=torch.Size([4, 51])


Predicting:   8%|▊         | 191/2400 [03:35<40:06,  1.09s/it]

批次 190: 原始预测: ['Yes\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because']
GPU memory after batch 190: 14.97 GB
Batch 191 input shapes: input_ids=torch.Size([4, 50]), attention_mask=torch.Size([4, 50])


Predicting:   8%|▊         | 192/2400 [03:37<40:06,  1.09s/it]

GPU memory after batch 191: 14.97 GB
Batch 192 input shapes: input_ids=torch.Size([4, 172]), attention_mask=torch.Size([4, 172])


Predicting:   8%|▊         | 193/2400 [03:39<51:37,  1.40s/it]

GPU memory after batch 192: 14.97 GB
Batch 193 input shapes: input_ids=torch.Size([4, 50]), attention_mask=torch.Size([4, 50])


Predicting:   8%|▊         | 194/2400 [03:40<48:09,  1.31s/it]

GPU memory after batch 193: 14.97 GB
Batch 194 input shapes: input_ids=torch.Size([4, 43]), attention_mask=torch.Size([4, 43])


Predicting:   8%|▊         | 195/2400 [03:41<45:00,  1.22s/it]

GPU memory after batch 194: 14.97 GB
Batch 195 input shapes: input_ids=torch.Size([4, 43]), attention_mask=torch.Size([4, 43])


Predicting:   8%|▊         | 196/2400 [03:42<42:48,  1.17s/it]

GPU memory after batch 195: 14.97 GB
Batch 196 input shapes: input_ids=torch.Size([4, 50]), attention_mask=torch.Size([4, 50])


Predicting:   8%|▊         | 197/2400 [03:43<41:59,  1.14s/it]

GPU memory after batch 196: 14.97 GB
Batch 197 input shapes: input_ids=torch.Size([4, 47]), attention_mask=torch.Size([4, 47])


Predicting:   8%|▊         | 198/2400 [03:44<40:41,  1.11s/it]

GPU memory after batch 197: 14.97 GB
Batch 198 input shapes: input_ids=torch.Size([4, 50]), attention_mask=torch.Size([4, 50])


Predicting:   8%|▊         | 199/2400 [03:45<40:29,  1.10s/it]

GPU memory after batch 198: 14.97 GB
Batch 199 input shapes: input_ids=torch.Size([4, 49]), attention_mask=torch.Size([4, 49])


Predicting:   8%|▊         | 200/2400 [03:46<40:20,  1.10s/it]

GPU memory after batch 199: 14.97 GB
Batch 200 input shapes: input_ids=torch.Size([4, 49]), attention_mask=torch.Size([4, 49])


Predicting:   8%|▊         | 201/2400 [03:47<40:16,  1.10s/it]

批次 200: 原始预测: ['No\nAnswer: Yes', 'Yes\nYes, because', 'Yes\nExplanation: The', 'Yes\nExplanation: The']
GPU memory after batch 200: 14.97 GB
Batch 201 input shapes: input_ids=torch.Size([4, 44]), attention_mask=torch.Size([4, 44])


Predicting:   8%|▊         | 202/2400 [03:48<39:27,  1.08s/it]

GPU memory after batch 201: 14.97 GB
Batch 202 input shapes: input_ids=torch.Size([4, 54]), attention_mask=torch.Size([4, 54])


Predicting:   8%|▊         | 203/2400 [03:49<39:38,  1.08s/it]

GPU memory after batch 202: 14.97 GB
Batch 203 input shapes: input_ids=torch.Size([4, 53]), attention_mask=torch.Size([4, 53])


Predicting:   8%|▊         | 204/2400 [03:50<39:45,  1.09s/it]

GPU memory after batch 203: 14.97 GB
Batch 204 input shapes: input_ids=torch.Size([4, 50]), attention_mask=torch.Size([4, 50])


Predicting:   9%|▊         | 205/2400 [03:52<39:47,  1.09s/it]

GPU memory after batch 204: 14.97 GB
Batch 205 input shapes: input_ids=torch.Size([4, 51]), attention_mask=torch.Size([4, 51])


Predicting:   9%|▊         | 206/2400 [03:53<39:50,  1.09s/it]

GPU memory after batch 205: 14.97 GB
Batch 206 input shapes: input_ids=torch.Size([4, 48]), attention_mask=torch.Size([4, 48])


Predicting:   9%|▊         | 207/2400 [03:54<39:10,  1.07s/it]

GPU memory after batch 206: 14.97 GB
Batch 207 input shapes: input_ids=torch.Size([4, 50]), attention_mask=torch.Size([4, 50])


Predicting:   9%|▊         | 208/2400 [03:55<39:26,  1.08s/it]

GPU memory after batch 207: 14.97 GB
Batch 208 input shapes: input_ids=torch.Size([4, 42]), attention_mask=torch.Size([4, 42])


Predicting:   9%|▊         | 209/2400 [03:56<38:49,  1.06s/it]

GPU memory after batch 208: 14.97 GB
Batch 209 input shapes: input_ids=torch.Size([4, 54]), attention_mask=torch.Size([4, 54])


Predicting:   9%|▉         | 210/2400 [03:57<39:09,  1.07s/it]

GPU memory after batch 209: 14.97 GB
Batch 210 input shapes: input_ids=torch.Size([4, 55]), attention_mask=torch.Size([4, 55])


Predicting:   9%|▉         | 211/2400 [03:58<39:22,  1.08s/it]

批次 210: 原始预测: ['No\nYes', 'Yes\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because']
GPU memory after batch 210: 14.97 GB
Batch 211 input shapes: input_ids=torch.Size([4, 46]), attention_mask=torch.Size([4, 46])


Predicting:   9%|▉         | 212/2400 [03:59<38:46,  1.06s/it]

GPU memory after batch 211: 14.97 GB
Batch 212 input shapes: input_ids=torch.Size([4, 45]), attention_mask=torch.Size([4, 45])


Predicting:   9%|▉         | 213/2400 [04:00<38:20,  1.05s/it]

GPU memory after batch 212: 14.97 GB
Batch 213 input shapes: input_ids=torch.Size([4, 55]), attention_mask=torch.Size([4, 55])


Predicting:   9%|▉         | 214/2400 [04:01<38:47,  1.06s/it]

GPU memory after batch 213: 14.97 GB
Batch 214 input shapes: input_ids=torch.Size([4, 45]), attention_mask=torch.Size([4, 45])


Predicting:   9%|▉         | 215/2400 [04:02<38:21,  1.05s/it]

GPU memory after batch 214: 14.97 GB
Batch 215 input shapes: input_ids=torch.Size([4, 56]), attention_mask=torch.Size([4, 56])


Predicting:   9%|▉         | 216/2400 [04:03<38:47,  1.07s/it]

GPU memory after batch 215: 14.97 GB
Batch 216 input shapes: input_ids=torch.Size([4, 169]), attention_mask=torch.Size([4, 169])


Predicting:   9%|▉         | 217/2400 [04:05<50:26,  1.39s/it]

GPU memory after batch 216: 14.97 GB
Batch 217 input shapes: input_ids=torch.Size([4, 47]), attention_mask=torch.Size([4, 47])


Predicting:   9%|▉         | 218/2400 [04:06<46:31,  1.28s/it]

GPU memory after batch 217: 14.97 GB
Batch 218 input shapes: input_ids=torch.Size([4, 53]), attention_mask=torch.Size([4, 53])


Predicting:   9%|▉         | 219/2400 [04:07<44:30,  1.22s/it]

GPU memory after batch 218: 14.97 GB
Batch 219 input shapes: input_ids=torch.Size([4, 59]), attention_mask=torch.Size([4, 59])


Predicting:   9%|▉         | 220/2400 [04:09<43:56,  1.21s/it]

GPU memory after batch 219: 14.97 GB
Batch 220 input shapes: input_ids=torch.Size([4, 55]), attention_mask=torch.Size([4, 55])


Predicting:   9%|▉         | 221/2400 [04:10<42:40,  1.18s/it]

批次 220: 原始预测: ['No\nExplanation: The', 'Yes\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because']
GPU memory after batch 220: 14.97 GB
Batch 221 input shapes: input_ids=torch.Size([4, 59]), attention_mask=torch.Size([4, 59])


Predicting:   9%|▉         | 222/2400 [04:11<42:37,  1.17s/it]

GPU memory after batch 221: 14.97 GB
Batch 222 input shapes: input_ids=torch.Size([4, 55]), attention_mask=torch.Size([4, 55])


Predicting:   9%|▉         | 223/2400 [04:12<41:44,  1.15s/it]

GPU memory after batch 222: 14.97 GB
Batch 223 input shapes: input_ids=torch.Size([4, 57]), attention_mask=torch.Size([4, 57])


Predicting:   9%|▉         | 224/2400 [04:13<41:58,  1.16s/it]

GPU memory after batch 223: 14.97 GB
Batch 224 input shapes: input_ids=torch.Size([4, 48]), attention_mask=torch.Size([4, 48])


Predicting:   9%|▉         | 225/2400 [04:14<40:32,  1.12s/it]

GPU memory after batch 224: 14.97 GB
Batch 225 input shapes: input_ids=torch.Size([4, 52]), attention_mask=torch.Size([4, 52])


Predicting:   9%|▉         | 226/2400 [04:15<40:14,  1.11s/it]

GPU memory after batch 225: 14.97 GB
Batch 226 input shapes: input_ids=torch.Size([4, 50]), attention_mask=torch.Size([4, 50])


Predicting:   9%|▉         | 227/2400 [04:16<40:02,  1.11s/it]

GPU memory after batch 226: 14.97 GB
Batch 227 input shapes: input_ids=torch.Size([4, 57]), attention_mask=torch.Size([4, 57])


Predicting:  10%|▉         | 228/2400 [04:18<40:44,  1.13s/it]

GPU memory after batch 227: 14.97 GB
Batch 228 input shapes: input_ids=torch.Size([4, 48]), attention_mask=torch.Size([4, 48])


Predicting:  10%|▉         | 229/2400 [04:19<39:40,  1.10s/it]

GPU memory after batch 228: 14.97 GB
Batch 229 input shapes: input_ids=torch.Size([4, 47]), attention_mask=torch.Size([4, 47])


Predicting:  10%|▉         | 230/2400 [04:20<38:54,  1.08s/it]

GPU memory after batch 229: 14.97 GB
Batch 230 input shapes: input_ids=torch.Size([4, 52]), attention_mask=torch.Size([4, 52])


Predicting:  10%|▉         | 231/2400 [04:21<39:04,  1.08s/it]

批次 230: 原始预测: ['No\nYes\nYes', 'Yes\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because']
GPU memory after batch 230: 14.97 GB
Batch 231 input shapes: input_ids=torch.Size([4, 49]), attention_mask=torch.Size([4, 49])


Predicting:  10%|▉         | 232/2400 [04:22<39:10,  1.08s/it]

GPU memory after batch 231: 14.97 GB
Batch 232 input shapes: input_ids=torch.Size([4, 54]), attention_mask=torch.Size([4, 54])


Predicting:  10%|▉         | 233/2400 [04:23<39:15,  1.09s/it]

GPU memory after batch 232: 14.97 GB
Batch 233 input shapes: input_ids=torch.Size([4, 41]), attention_mask=torch.Size([4, 41])


Predicting:  10%|▉         | 234/2400 [04:24<38:34,  1.07s/it]

GPU memory after batch 233: 14.97 GB
Batch 234 input shapes: input_ids=torch.Size([4, 50]), attention_mask=torch.Size([4, 50])


Predicting:  10%|▉         | 235/2400 [04:25<38:50,  1.08s/it]

GPU memory after batch 234: 14.97 GB
Batch 235 input shapes: input_ids=torch.Size([4, 53]), attention_mask=torch.Size([4, 53])


Predicting:  10%|▉         | 236/2400 [04:26<39:00,  1.08s/it]

GPU memory after batch 235: 14.97 GB
Batch 236 input shapes: input_ids=torch.Size([4, 47]), attention_mask=torch.Size([4, 47])


Predicting:  10%|▉         | 237/2400 [04:27<38:25,  1.07s/it]

GPU memory after batch 236: 14.97 GB
Batch 237 input shapes: input_ids=torch.Size([4, 45]), attention_mask=torch.Size([4, 45])


Predicting:  10%|▉         | 238/2400 [04:28<37:57,  1.05s/it]

GPU memory after batch 237: 14.97 GB
Batch 238 input shapes: input_ids=torch.Size([4, 48]), attention_mask=torch.Size([4, 48])


Predicting:  10%|▉         | 239/2400 [04:29<37:38,  1.05s/it]

GPU memory after batch 238: 14.97 GB
Batch 239 input shapes: input_ids=torch.Size([4, 56]), attention_mask=torch.Size([4, 56])


Predicting:  10%|█         | 240/2400 [04:30<38:09,  1.06s/it]

GPU memory after batch 239: 14.97 GB
Batch 240 input shapes: input_ids=torch.Size([4, 148]), attention_mask=torch.Size([4, 148])


Predicting:  10%|█         | 241/2400 [04:32<47:13,  1.31s/it]

批次 240: 原始预测: ['Yes\nQuestion: Is', 'Yes\nQuestion: Is', 'No\nAnswer: No', 'No\nAnswer: No']
GPU memory after batch 240: 14.97 GB
Batch 241 input shapes: input_ids=torch.Size([4, 56]), attention_mask=torch.Size([4, 56])


Predicting:  10%|█         | 242/2400 [04:33<44:51,  1.25s/it]

GPU memory after batch 241: 14.97 GB
Batch 242 input shapes: input_ids=torch.Size([4, 40]), attention_mask=torch.Size([4, 40])


Predicting:  10%|█         | 243/2400 [04:34<41:39,  1.16s/it]

GPU memory after batch 242: 14.97 GB
Batch 243 input shapes: input_ids=torch.Size([4, 51]), attention_mask=torch.Size([4, 51])


Predicting:  10%|█         | 244/2400 [04:35<40:55,  1.14s/it]

GPU memory after batch 243: 14.97 GB
Batch 244 input shapes: input_ids=torch.Size([4, 48]), attention_mask=torch.Size([4, 48])


Predicting:  10%|█         | 245/2400 [04:36<39:43,  1.11s/it]

GPU memory after batch 244: 14.97 GB
Batch 245 input shapes: input_ids=torch.Size([4, 50]), attention_mask=torch.Size([4, 50])


Predicting:  10%|█         | 246/2400 [04:37<39:32,  1.10s/it]

GPU memory after batch 245: 14.97 GB
Batch 246 input shapes: input_ids=torch.Size([4, 46]), attention_mask=torch.Size([4, 46])


Predicting:  10%|█         | 247/2400 [04:38<38:43,  1.08s/it]

GPU memory after batch 246: 14.97 GB
Batch 247 input shapes: input_ids=torch.Size([4, 47]), attention_mask=torch.Size([4, 47])


Predicting:  10%|█         | 248/2400 [04:40<38:08,  1.06s/it]

GPU memory after batch 247: 14.97 GB
Batch 248 input shapes: input_ids=torch.Size([4, 61]), attention_mask=torch.Size([4, 61])


Predicting:  10%|█         | 249/2400 [04:41<39:19,  1.10s/it]

GPU memory after batch 248: 14.97 GB
Batch 249 input shapes: input_ids=torch.Size([4, 52]), attention_mask=torch.Size([4, 52])


Predicting:  10%|█         | 250/2400 [04:42<39:16,  1.10s/it]

GPU memory after batch 249: 14.97 GB
Batch 250 input shapes: input_ids=torch.Size([4, 51]), attention_mask=torch.Size([4, 51])


Predicting:  10%|█         | 251/2400 [04:43<39:14,  1.10s/it]

批次 250: 原始预测: ['No\nYes, because', 'No\nYes\nYes', 'Yes\nYes, because', 'Yes\nYes, because']
GPU memory after batch 250: 14.97 GB
Batch 251 input shapes: input_ids=torch.Size([4, 51]), attention_mask=torch.Size([4, 51])


Predicting:  10%|█         | 252/2400 [04:44<39:10,  1.09s/it]

GPU memory after batch 251: 14.97 GB
Batch 252 input shapes: input_ids=torch.Size([4, 51]), attention_mask=torch.Size([4, 51])


Predicting:  11%|█         | 253/2400 [04:45<39:08,  1.09s/it]

GPU memory after batch 252: 14.97 GB
Batch 253 input shapes: input_ids=torch.Size([4, 48]), attention_mask=torch.Size([4, 48])


Predicting:  11%|█         | 254/2400 [04:46<38:24,  1.07s/it]

GPU memory after batch 253: 14.97 GB
Batch 254 input shapes: input_ids=torch.Size([4, 55]), attention_mask=torch.Size([4, 55])


Predicting:  11%|█         | 255/2400 [04:47<38:35,  1.08s/it]

GPU memory after batch 254: 14.97 GB
Batch 255 input shapes: input_ids=torch.Size([4, 47]), attention_mask=torch.Size([4, 47])


Predicting:  11%|█         | 256/2400 [04:48<38:00,  1.06s/it]

GPU memory after batch 255: 14.97 GB
Batch 256 input shapes: input_ids=torch.Size([4, 48]), attention_mask=torch.Size([4, 48])


Predicting:  11%|█         | 257/2400 [04:49<37:35,  1.05s/it]

GPU memory after batch 256: 14.97 GB
Batch 257 input shapes: input_ids=torch.Size([4, 49]), attention_mask=torch.Size([4, 49])


Predicting:  11%|█         | 258/2400 [04:50<37:59,  1.06s/it]

GPU memory after batch 257: 14.97 GB
Batch 258 input shapes: input_ids=torch.Size([4, 50]), attention_mask=torch.Size([4, 50])


Predicting:  11%|█         | 259/2400 [04:51<38:17,  1.07s/it]

GPU memory after batch 258: 14.97 GB
Batch 259 input shapes: input_ids=torch.Size([4, 50]), attention_mask=torch.Size([4, 50])


Predicting:  11%|█         | 260/2400 [04:53<38:28,  1.08s/it]

GPU memory after batch 259: 14.97 GB
Batch 260 input shapes: input_ids=torch.Size([4, 45]), attention_mask=torch.Size([4, 45])


Predicting:  11%|█         | 261/2400 [04:54<37:53,  1.06s/it]

批次 260: 原始预测: ['Yes\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because']
GPU memory after batch 260: 14.97 GB
Batch 261 input shapes: input_ids=torch.Size([4, 48]), attention_mask=torch.Size([4, 48])


Predicting:  11%|█         | 262/2400 [04:55<37:27,  1.05s/it]

GPU memory after batch 261: 14.97 GB
Batch 262 input shapes: input_ids=torch.Size([4, 47]), attention_mask=torch.Size([4, 47])


Predicting:  11%|█         | 263/2400 [04:56<37:08,  1.04s/it]

GPU memory after batch 262: 14.97 GB
Batch 263 input shapes: input_ids=torch.Size([4, 48]), attention_mask=torch.Size([4, 48])


Predicting:  11%|█         | 264/2400 [04:57<36:59,  1.04s/it]

GPU memory after batch 263: 14.97 GB
Batch 264 input shapes: input_ids=torch.Size([4, 168]), attention_mask=torch.Size([4, 168])


Predicting:  11%|█         | 265/2400 [04:59<48:06,  1.35s/it]

GPU memory after batch 264: 14.97 GB
Batch 265 input shapes: input_ids=torch.Size([4, 58]), attention_mask=torch.Size([4, 58])


Predicting:  11%|█         | 266/2400 [05:00<46:10,  1.30s/it]

GPU memory after batch 265: 14.97 GB
Batch 266 input shapes: input_ids=torch.Size([4, 47]), attention_mask=torch.Size([4, 47])


Predicting:  11%|█         | 267/2400 [05:01<43:16,  1.22s/it]

GPU memory after batch 266: 14.97 GB
Batch 267 input shapes: input_ids=torch.Size([4, 43]), attention_mask=torch.Size([4, 43])


Predicting:  11%|█         | 268/2400 [05:02<41:12,  1.16s/it]

GPU memory after batch 267: 14.97 GB
Batch 268 input shapes: input_ids=torch.Size([4, 44]), attention_mask=torch.Size([4, 44])


Predicting:  11%|█         | 269/2400 [05:03<39:44,  1.12s/it]

GPU memory after batch 268: 14.97 GB
Batch 269 input shapes: input_ids=torch.Size([4, 52]), attention_mask=torch.Size([4, 52])


Predicting:  11%|█▏        | 270/2400 [05:04<39:26,  1.11s/it]

GPU memory after batch 269: 14.97 GB
Batch 270 input shapes: input_ids=torch.Size([4, 52]), attention_mask=torch.Size([4, 52])


Predicting:  11%|█▏        | 271/2400 [05:05<39:14,  1.11s/it]

批次 270: 原始预测: ['Yes\nYes, because', 'No\nYes\nYes', 'Yes\nYes, because', 'Yes\nYes, because']
GPU memory after batch 270: 14.97 GB
Batch 271 input shapes: input_ids=torch.Size([4, 48]), attention_mask=torch.Size([4, 48])


Predicting:  11%|█▏        | 272/2400 [05:06<38:23,  1.08s/it]

GPU memory after batch 271: 14.97 GB
Batch 272 input shapes: input_ids=torch.Size([4, 47]), attention_mask=torch.Size([4, 47])


Predicting:  11%|█▏        | 273/2400 [05:07<37:46,  1.07s/it]

GPU memory after batch 272: 14.97 GB
Batch 273 input shapes: input_ids=torch.Size([4, 52]), attention_mask=torch.Size([4, 52])


Predicting:  11%|█▏        | 274/2400 [05:08<38:02,  1.07s/it]

GPU memory after batch 273: 14.97 GB
Batch 274 input shapes: input_ids=torch.Size([4, 51]), attention_mask=torch.Size([4, 51])


Predicting:  11%|█▏        | 275/2400 [05:09<38:13,  1.08s/it]

GPU memory after batch 274: 14.97 GB
Batch 275 input shapes: input_ids=torch.Size([4, 52]), attention_mask=torch.Size([4, 52])


Predicting:  12%|█▏        | 276/2400 [05:10<38:20,  1.08s/it]

GPU memory after batch 275: 14.97 GB
Batch 276 input shapes: input_ids=torch.Size([4, 48]), attention_mask=torch.Size([4, 48])


Predicting:  12%|█▏        | 277/2400 [05:11<37:43,  1.07s/it]

GPU memory after batch 276: 14.97 GB
Batch 277 input shapes: input_ids=torch.Size([4, 49]), attention_mask=torch.Size([4, 49])


Predicting:  12%|█▏        | 278/2400 [05:13<37:59,  1.07s/it]

GPU memory after batch 277: 14.97 GB
Batch 278 input shapes: input_ids=torch.Size([4, 48]), attention_mask=torch.Size([4, 48])


Predicting:  12%|█▏        | 279/2400 [05:14<37:28,  1.06s/it]

GPU memory after batch 278: 14.97 GB
Batch 279 input shapes: input_ids=torch.Size([4, 55]), attention_mask=torch.Size([4, 55])


Predicting:  12%|█▏        | 280/2400 [05:15<37:48,  1.07s/it]

GPU memory after batch 279: 14.97 GB
Batch 280 input shapes: input_ids=torch.Size([4, 52]), attention_mask=torch.Size([4, 52])


Predicting:  12%|█▏        | 281/2400 [05:16<38:01,  1.08s/it]

批次 280: 原始预测: ['Yes\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because']
GPU memory after batch 280: 14.97 GB
Batch 281 input shapes: input_ids=torch.Size([4, 44]), attention_mask=torch.Size([4, 44])


Predicting:  12%|█▏        | 282/2400 [05:17<37:27,  1.06s/it]

GPU memory after batch 281: 14.97 GB
Batch 282 input shapes: input_ids=torch.Size([4, 54]), attention_mask=torch.Size([4, 54])


Predicting:  12%|█▏        | 283/2400 [05:18<37:47,  1.07s/it]

GPU memory after batch 282: 14.97 GB
Batch 283 input shapes: input_ids=torch.Size([4, 49]), attention_mask=torch.Size([4, 49])


Predicting:  12%|█▏        | 284/2400 [05:19<37:59,  1.08s/it]

GPU memory after batch 283: 14.97 GB
Batch 284 input shapes: input_ids=torch.Size([4, 53]), attention_mask=torch.Size([4, 53])


Predicting:  12%|█▏        | 285/2400 [05:20<38:07,  1.08s/it]

GPU memory after batch 284: 14.97 GB
Batch 285 input shapes: input_ids=torch.Size([4, 51]), attention_mask=torch.Size([4, 51])


Predicting:  12%|█▏        | 286/2400 [05:21<38:14,  1.09s/it]

GPU memory after batch 285: 14.97 GB
Batch 286 input shapes: input_ids=torch.Size([4, 48]), attention_mask=torch.Size([4, 48])


Predicting:  12%|█▏        | 287/2400 [05:22<37:36,  1.07s/it]

GPU memory after batch 286: 14.97 GB
Batch 287 input shapes: input_ids=torch.Size([4, 50]), attention_mask=torch.Size([4, 50])


Predicting:  12%|█▏        | 288/2400 [05:23<37:50,  1.08s/it]

GPU memory after batch 287: 14.97 GB
Batch 288 input shapes: input_ids=torch.Size([4, 155]), attention_mask=torch.Size([4, 155])


Predicting:  12%|█▏        | 289/2400 [05:25<46:53,  1.33s/it]

GPU memory after batch 288: 14.97 GB
Batch 289 input shapes: input_ids=torch.Size([4, 51]), attention_mask=torch.Size([4, 51])


Predicting:  12%|█▏        | 290/2400 [05:26<44:20,  1.26s/it]

GPU memory after batch 289: 14.97 GB
Batch 290 input shapes: input_ids=torch.Size([4, 55]), attention_mask=torch.Size([4, 55])


Predicting:  12%|█▏        | 291/2400 [05:27<42:33,  1.21s/it]

批次 290: 原始预测: ['No\nAnswer: Yes', 'No\nAnswer\nYes', 'No\nAnswer: No', 'No\nAnswer: No']
GPU memory after batch 290: 14.97 GB
Batch 291 input shapes: input_ids=torch.Size([4, 54]), attention_mask=torch.Size([4, 54])


Predicting:  12%|█▏        | 292/2400 [05:29<41:19,  1.18s/it]

GPU memory after batch 291: 14.97 GB
Batch 292 input shapes: input_ids=torch.Size([4, 47]), attention_mask=torch.Size([4, 47])


Predicting:  12%|█▏        | 293/2400 [05:30<39:45,  1.13s/it]

GPU memory after batch 292: 14.97 GB
Batch 293 input shapes: input_ids=torch.Size([4, 48]), attention_mask=torch.Size([4, 48])


Predicting:  12%|█▏        | 294/2400 [05:31<38:38,  1.10s/it]

GPU memory after batch 293: 14.97 GB
Batch 294 input shapes: input_ids=torch.Size([4, 49]), attention_mask=torch.Size([4, 49])


Predicting:  12%|█▏        | 295/2400 [05:32<38:31,  1.10s/it]

GPU memory after batch 294: 14.97 GB
Batch 295 input shapes: input_ids=torch.Size([4, 51]), attention_mask=torch.Size([4, 51])


Predicting:  12%|█▏        | 296/2400 [05:33<38:27,  1.10s/it]

GPU memory after batch 295: 14.97 GB
Batch 296 input shapes: input_ids=torch.Size([4, 49]), attention_mask=torch.Size([4, 49])


Predicting:  12%|█▏        | 297/2400 [05:34<38:22,  1.09s/it]

GPU memory after batch 296: 14.97 GB
Batch 297 input shapes: input_ids=torch.Size([4, 47]), attention_mask=torch.Size([4, 47])


Predicting:  12%|█▏        | 298/2400 [05:35<37:38,  1.07s/it]

GPU memory after batch 297: 14.97 GB
Batch 298 input shapes: input_ids=torch.Size([4, 53]), attention_mask=torch.Size([4, 53])


Predicting:  12%|█▏        | 299/2400 [05:36<37:49,  1.08s/it]

GPU memory after batch 298: 14.97 GB
Batch 299 input shapes: input_ids=torch.Size([4, 48]), attention_mask=torch.Size([4, 48])


Predicting:  12%|█▎        | 300/2400 [05:37<37:13,  1.06s/it]

GPU memory after batch 299: 14.97 GB
Batch 300 input shapes: input_ids=torch.Size([4, 52]), attention_mask=torch.Size([4, 52])


Predicting:  13%|█▎        | 301/2400 [05:38<37:31,  1.07s/it]

批次 300: 原始预测: ['Yes\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because']
GPU memory after batch 300: 14.97 GB
Batch 301 input shapes: input_ids=torch.Size([4, 53]), attention_mask=torch.Size([4, 53])


Predicting:  13%|█▎        | 302/2400 [05:39<37:43,  1.08s/it]

GPU memory after batch 301: 14.97 GB
Batch 302 input shapes: input_ids=torch.Size([4, 53]), attention_mask=torch.Size([4, 53])


Predicting:  13%|█▎        | 303/2400 [05:40<37:51,  1.08s/it]

GPU memory after batch 302: 14.97 GB
Batch 303 input shapes: input_ids=torch.Size([4, 60]), attention_mask=torch.Size([4, 60])


Predicting:  13%|█▎        | 304/2400 [05:41<38:47,  1.11s/it]

GPU memory after batch 303: 14.97 GB
Batch 304 input shapes: input_ids=torch.Size([4, 54]), attention_mask=torch.Size([4, 54])


Predicting:  13%|█▎        | 305/2400 [05:43<38:35,  1.11s/it]

GPU memory after batch 304: 14.97 GB
Batch 305 input shapes: input_ids=torch.Size([4, 49]), attention_mask=torch.Size([4, 49])


Predicting:  13%|█▎        | 306/2400 [05:44<38:25,  1.10s/it]

GPU memory after batch 305: 14.97 GB
Batch 306 input shapes: input_ids=torch.Size([4, 50]), attention_mask=torch.Size([4, 50])


Predicting:  13%|█▎        | 307/2400 [05:45<38:18,  1.10s/it]

GPU memory after batch 306: 14.97 GB
Batch 307 input shapes: input_ids=torch.Size([4, 45]), attention_mask=torch.Size([4, 45])


Predicting:  13%|█▎        | 308/2400 [05:46<37:32,  1.08s/it]

GPU memory after batch 307: 14.97 GB
Batch 308 input shapes: input_ids=torch.Size([4, 43]), attention_mask=torch.Size([4, 43])


Predicting:  13%|█▎        | 309/2400 [05:47<36:59,  1.06s/it]

GPU memory after batch 308: 14.97 GB
Batch 309 input shapes: input_ids=torch.Size([4, 54]), attention_mask=torch.Size([4, 54])


Predicting:  13%|█▎        | 310/2400 [05:48<37:18,  1.07s/it]

GPU memory after batch 309: 14.97 GB
Batch 310 input shapes: input_ids=torch.Size([4, 47]), attention_mask=torch.Size([4, 47])


Predicting:  13%|█▎        | 311/2400 [05:49<36:49,  1.06s/it]

批次 310: 原始预测: ['Yes\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because']
GPU memory after batch 310: 14.97 GB
Batch 311 input shapes: input_ids=torch.Size([4, 51]), attention_mask=torch.Size([4, 51])


Predicting:  13%|█▎        | 312/2400 [05:50<37:09,  1.07s/it]

GPU memory after batch 311: 14.97 GB
Batch 312 input shapes: input_ids=torch.Size([4, 159]), attention_mask=torch.Size([4, 159])


Predicting:  13%|█▎        | 313/2400 [05:52<46:11,  1.33s/it]

GPU memory after batch 312: 14.97 GB
Batch 313 input shapes: input_ids=torch.Size([4, 49]), attention_mask=torch.Size([4, 49])


Predicting:  13%|█▎        | 314/2400 [05:53<43:41,  1.26s/it]

GPU memory after batch 313: 14.97 GB
Batch 314 input shapes: input_ids=torch.Size([4, 52]), attention_mask=torch.Size([4, 52])


Predicting:  13%|█▎        | 315/2400 [05:54<41:57,  1.21s/it]

GPU memory after batch 314: 14.97 GB
Batch 315 input shapes: input_ids=torch.Size([4, 49]), attention_mask=torch.Size([4, 49])


Predicting:  13%|█▎        | 316/2400 [05:55<40:42,  1.17s/it]

GPU memory after batch 315: 14.97 GB
Batch 316 input shapes: input_ids=torch.Size([4, 54]), attention_mask=torch.Size([4, 54])


Predicting:  13%|█▎        | 317/2400 [05:56<39:53,  1.15s/it]

GPU memory after batch 316: 14.97 GB
Batch 317 input shapes: input_ids=torch.Size([4, 48]), attention_mask=torch.Size([4, 48])


Predicting:  13%|█▎        | 318/2400 [05:57<38:35,  1.11s/it]

GPU memory after batch 317: 14.97 GB
Batch 318 input shapes: input_ids=torch.Size([4, 50]), attention_mask=torch.Size([4, 50])


Predicting:  13%|█▎        | 319/2400 [05:58<38:22,  1.11s/it]

GPU memory after batch 318: 14.97 GB
Batch 319 input shapes: input_ids=torch.Size([4, 47]), attention_mask=torch.Size([4, 47])


Predicting:  13%|█▎        | 320/2400 [05:59<37:31,  1.08s/it]

GPU memory after batch 319: 14.97 GB
Batch 320 input shapes: input_ids=torch.Size([4, 52]), attention_mask=torch.Size([4, 52])


Predicting:  13%|█▎        | 321/2400 [06:01<37:37,  1.09s/it]

批次 320: 原始预测: ['Yes\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because', 'No\nYes, because']
GPU memory after batch 320: 14.97 GB
Batch 321 input shapes: input_ids=torch.Size([4, 47]), attention_mask=torch.Size([4, 47])


Predicting:  13%|█▎        | 322/2400 [06:02<36:59,  1.07s/it]

GPU memory after batch 321: 14.97 GB
Batch 322 input shapes: input_ids=torch.Size([4, 54]), attention_mask=torch.Size([4, 54])


Predicting:  13%|█▎        | 323/2400 [06:03<37:13,  1.08s/it]

GPU memory after batch 322: 14.97 GB
Batch 323 input shapes: input_ids=torch.Size([4, 43]), attention_mask=torch.Size([4, 43])


Predicting:  14%|█▎        | 324/2400 [06:04<36:39,  1.06s/it]

GPU memory after batch 323: 14.97 GB
Batch 324 input shapes: input_ids=torch.Size([4, 54]), attention_mask=torch.Size([4, 54])


Predicting:  14%|█▎        | 325/2400 [06:05<37:00,  1.07s/it]

GPU memory after batch 324: 14.97 GB
Batch 325 input shapes: input_ids=torch.Size([4, 51]), attention_mask=torch.Size([4, 51])


Predicting:  14%|█▎        | 326/2400 [06:06<37:13,  1.08s/it]

GPU memory after batch 325: 14.97 GB
Batch 326 input shapes: input_ids=torch.Size([4, 54]), attention_mask=torch.Size([4, 54])


Predicting:  14%|█▎        | 327/2400 [06:07<37:22,  1.08s/it]

GPU memory after batch 326: 14.97 GB
Batch 327 input shapes: input_ids=torch.Size([4, 53]), attention_mask=torch.Size([4, 53])


Predicting:  14%|█▎        | 328/2400 [06:08<37:28,  1.09s/it]

GPU memory after batch 327: 14.97 GB
Batch 328 input shapes: input_ids=torch.Size([4, 46]), attention_mask=torch.Size([4, 46])


Predicting:  14%|█▎        | 329/2400 [06:09<36:50,  1.07s/it]

GPU memory after batch 328: 14.97 GB
Batch 329 input shapes: input_ids=torch.Size([4, 54]), attention_mask=torch.Size([4, 54])


Predicting:  14%|█▍        | 330/2400 [06:10<37:06,  1.08s/it]

GPU memory after batch 329: 14.97 GB
Batch 330 input shapes: input_ids=torch.Size([4, 52]), attention_mask=torch.Size([4, 52])


Predicting:  14%|█▍        | 331/2400 [06:11<37:15,  1.08s/it]

批次 330: 原始预测: ['No\nExplanation: The', 'No\nExplanation: The', 'Yes\nYes, because', 'No\nYes\nYes']
GPU memory after batch 330: 14.97 GB
Batch 331 input shapes: input_ids=torch.Size([4, 48]), attention_mask=torch.Size([4, 48])


Predicting:  14%|█▍        | 332/2400 [06:12<36:42,  1.06s/it]

GPU memory after batch 331: 14.97 GB
Batch 332 input shapes: input_ids=torch.Size([4, 48]), attention_mask=torch.Size([4, 48])


Predicting:  14%|█▍        | 333/2400 [06:13<36:17,  1.05s/it]

GPU memory after batch 332: 14.97 GB
Batch 333 input shapes: input_ids=torch.Size([4, 50]), attention_mask=torch.Size([4, 50])


Predicting:  14%|█▍        | 334/2400 [06:14<36:39,  1.06s/it]

GPU memory after batch 333: 14.97 GB
Batch 334 input shapes: input_ids=torch.Size([4, 52]), attention_mask=torch.Size([4, 52])


Predicting:  14%|█▍        | 335/2400 [06:16<36:57,  1.07s/it]

GPU memory after batch 334: 14.97 GB
Batch 335 input shapes: input_ids=torch.Size([4, 50]), attention_mask=torch.Size([4, 50])


Predicting:  14%|█▍        | 336/2400 [06:17<37:08,  1.08s/it]

GPU memory after batch 335: 14.97 GB
Batch 336 input shapes: input_ids=torch.Size([4, 159]), attention_mask=torch.Size([4, 159])


Predicting:  14%|█▍        | 337/2400 [06:19<45:56,  1.34s/it]

GPU memory after batch 336: 14.97 GB
Batch 337 input shapes: input_ids=torch.Size([4, 48]), attention_mask=torch.Size([4, 48])


Predicting:  14%|█▍        | 338/2400 [06:20<42:43,  1.24s/it]

GPU memory after batch 337: 14.97 GB
Batch 338 input shapes: input_ids=torch.Size([4, 51]), attention_mask=torch.Size([4, 51])


Predicting:  14%|█▍        | 339/2400 [06:21<41:08,  1.20s/it]

GPU memory after batch 338: 14.97 GB
Batch 339 input shapes: input_ids=torch.Size([4, 50]), attention_mask=torch.Size([4, 50])


Predicting:  14%|█▍        | 340/2400 [06:22<40:03,  1.17s/it]

GPU memory after batch 339: 14.97 GB
Batch 340 input shapes: input_ids=torch.Size([4, 50]), attention_mask=torch.Size([4, 50])


Predicting:  14%|█▍        | 341/2400 [06:23<39:16,  1.14s/it]

批次 340: 原始预测: ['Yes\nExplanation: The', 'Yes\nExplanation: The', 'Yes\nExplanation: The', 'Yes\nYes, because']
GPU memory after batch 340: 14.97 GB
Batch 341 input shapes: input_ids=torch.Size([4, 47]), attention_mask=torch.Size([4, 47])


Predicting:  14%|█▍        | 342/2400 [06:24<38:02,  1.11s/it]

GPU memory after batch 341: 14.97 GB
Batch 342 input shapes: input_ids=torch.Size([4, 48]), attention_mask=torch.Size([4, 48])


Predicting:  14%|█▍        | 343/2400 [06:25<37:10,  1.08s/it]

GPU memory after batch 342: 14.97 GB
Batch 343 input shapes: input_ids=torch.Size([4, 53]), attention_mask=torch.Size([4, 53])


Predicting:  14%|█▍        | 344/2400 [06:26<37:14,  1.09s/it]

GPU memory after batch 343: 14.97 GB
Batch 344 input shapes: input_ids=torch.Size([4, 47]), attention_mask=torch.Size([4, 47])


Predicting:  14%|█▍        | 345/2400 [06:27<36:35,  1.07s/it]

GPU memory after batch 344: 14.97 GB
Batch 345 input shapes: input_ids=torch.Size([4, 41]), attention_mask=torch.Size([4, 41])


Predicting:  14%|█▍        | 346/2400 [06:28<36:05,  1.05s/it]

GPU memory after batch 345: 14.97 GB
Batch 346 input shapes: input_ids=torch.Size([4, 51]), attention_mask=torch.Size([4, 51])


Predicting:  14%|█▍        | 347/2400 [06:29<36:27,  1.07s/it]

GPU memory after batch 346: 14.97 GB
Batch 347 input shapes: input_ids=torch.Size([4, 51]), attention_mask=torch.Size([4, 51])


Predicting:  14%|█▍        | 348/2400 [06:30<36:42,  1.07s/it]

GPU memory after batch 347: 14.97 GB
Batch 348 input shapes: input_ids=torch.Size([4, 48]), attention_mask=torch.Size([4, 48])


Predicting:  15%|█▍        | 349/2400 [06:31<36:14,  1.06s/it]

GPU memory after batch 348: 14.97 GB
Batch 349 input shapes: input_ids=torch.Size([4, 56]), attention_mask=torch.Size([4, 56])


Predicting:  15%|█▍        | 350/2400 [06:32<36:34,  1.07s/it]

GPU memory after batch 349: 14.97 GB
Batch 350 input shapes: input_ids=torch.Size([4, 51]), attention_mask=torch.Size([4, 51])


Predicting:  15%|█▍        | 351/2400 [06:33<36:46,  1.08s/it]

批次 350: 原始预测: ['No\nAnswer: No', 'No\nAnswer: No', 'No\nAnswer: No', 'No\nYes\nYes']
GPU memory after batch 350: 14.97 GB
Batch 351 input shapes: input_ids=torch.Size([4, 58]), attention_mask=torch.Size([4, 58])


Predicting:  15%|█▍        | 352/2400 [06:35<37:43,  1.11s/it]

GPU memory after batch 351: 14.97 GB
Batch 352 input shapes: input_ids=torch.Size([4, 49]), attention_mask=torch.Size([4, 49])


Predicting:  15%|█▍        | 353/2400 [06:36<37:34,  1.10s/it]

GPU memory after batch 352: 14.97 GB
Batch 353 input shapes: input_ids=torch.Size([4, 53]), attention_mask=torch.Size([4, 53])


Predicting:  15%|█▍        | 354/2400 [06:37<37:29,  1.10s/it]

GPU memory after batch 353: 14.97 GB
Batch 354 input shapes: input_ids=torch.Size([4, 63]), attention_mask=torch.Size([4, 63])


Predicting:  15%|█▍        | 355/2400 [06:38<38:14,  1.12s/it]

GPU memory after batch 354: 14.97 GB
Batch 355 input shapes: input_ids=torch.Size([4, 50]), attention_mask=torch.Size([4, 50])


Predicting:  15%|█▍        | 356/2400 [06:39<37:55,  1.11s/it]

GPU memory after batch 355: 14.97 GB
Batch 356 input shapes: input_ids=torch.Size([4, 54]), attention_mask=torch.Size([4, 54])


Predicting:  15%|█▍        | 357/2400 [06:40<37:41,  1.11s/it]

GPU memory after batch 356: 14.97 GB
Batch 357 input shapes: input_ids=torch.Size([4, 54]), attention_mask=torch.Size([4, 54])


Predicting:  15%|█▍        | 358/2400 [06:41<37:31,  1.10s/it]

GPU memory after batch 357: 14.97 GB
Batch 358 input shapes: input_ids=torch.Size([4, 51]), attention_mask=torch.Size([4, 51])


Predicting:  15%|█▍        | 359/2400 [06:42<37:23,  1.10s/it]

GPU memory after batch 358: 14.97 GB
Batch 359 input shapes: input_ids=torch.Size([4, 47]), attention_mask=torch.Size([4, 47])


Predicting:  15%|█▌        | 360/2400 [06:43<36:38,  1.08s/it]

GPU memory after batch 359: 14.97 GB
Batch 360 input shapes: input_ids=torch.Size([4, 159]), attention_mask=torch.Size([4, 159])


Predicting:  15%|█▌        | 361/2400 [06:45<45:20,  1.33s/it]

批次 360: 原始预测: ['No\nQuestion: Is', 'Yes\nQuestion: Is', 'No\nAnswer: No', 'No\nAnswer: No']
GPU memory after batch 360: 14.97 GB
Batch 361 input shapes: input_ids=torch.Size([4, 60]), attention_mask=torch.Size([4, 60])


Predicting:  15%|█▌        | 362/2400 [06:46<43:40,  1.29s/it]

GPU memory after batch 361: 14.97 GB
Batch 362 input shapes: input_ids=torch.Size([4, 45]), attention_mask=torch.Size([4, 45])


Predicting:  15%|█▌        | 363/2400 [06:47<41:01,  1.21s/it]

GPU memory after batch 362: 14.97 GB
Batch 363 input shapes: input_ids=torch.Size([4, 47]), attention_mask=torch.Size([4, 47])


Predicting:  15%|█▌        | 364/2400 [06:49<39:09,  1.15s/it]

GPU memory after batch 363: 14.97 GB
Batch 364 input shapes: input_ids=torch.Size([4, 47]), attention_mask=torch.Size([4, 47])


Predicting:  15%|█▌        | 365/2400 [06:50<37:51,  1.12s/it]

GPU memory after batch 364: 14.97 GB
Batch 365 input shapes: input_ids=torch.Size([4, 50]), attention_mask=torch.Size([4, 50])


Predicting:  15%|█▌        | 366/2400 [06:51<37:36,  1.11s/it]

GPU memory after batch 365: 14.97 GB
Batch 366 input shapes: input_ids=torch.Size([4, 46]), attention_mask=torch.Size([4, 46])


Predicting:  15%|█▌        | 367/2400 [06:52<36:44,  1.08s/it]

GPU memory after batch 366: 14.97 GB
Batch 367 input shapes: input_ids=torch.Size([4, 51]), attention_mask=torch.Size([4, 51])


Predicting:  15%|█▌        | 368/2400 [06:53<36:49,  1.09s/it]

GPU memory after batch 367: 14.97 GB
Batch 368 input shapes: input_ids=torch.Size([4, 48]), attention_mask=torch.Size([4, 48])


Predicting:  15%|█▌        | 369/2400 [06:54<36:12,  1.07s/it]

GPU memory after batch 368: 14.97 GB
Batch 369 input shapes: input_ids=torch.Size([4, 47]), attention_mask=torch.Size([4, 47])


Predicting:  15%|█▌        | 370/2400 [06:55<35:45,  1.06s/it]

GPU memory after batch 369: 14.97 GB
Batch 370 input shapes: input_ids=torch.Size([4, 57]), attention_mask=torch.Size([4, 57])


Predicting:  15%|█▌        | 371/2400 [06:56<36:56,  1.09s/it]

批次 370: 原始预测: ['Yes\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because']
GPU memory after batch 370: 14.97 GB
Batch 371 input shapes: input_ids=torch.Size([4, 49]), attention_mask=torch.Size([4, 49])


Predicting:  16%|█▌        | 372/2400 [06:57<36:54,  1.09s/it]

GPU memory after batch 371: 14.97 GB
Batch 372 input shapes: input_ids=torch.Size([4, 48]), attention_mask=torch.Size([4, 48])


Predicting:  16%|█▌        | 373/2400 [06:58<36:12,  1.07s/it]

GPU memory after batch 372: 14.97 GB
Batch 373 input shapes: input_ids=torch.Size([4, 49]), attention_mask=torch.Size([4, 49])


Predicting:  16%|█▌        | 374/2400 [06:59<36:24,  1.08s/it]

GPU memory after batch 373: 14.97 GB
Batch 374 input shapes: input_ids=torch.Size([4, 50]), attention_mask=torch.Size([4, 50])


Predicting:  16%|█▌        | 375/2400 [07:00<36:32,  1.08s/it]

GPU memory after batch 374: 14.97 GB
Batch 375 input shapes: input_ids=torch.Size([4, 58]), attention_mask=torch.Size([4, 58])


Predicting:  16%|█▌        | 376/2400 [07:01<37:26,  1.11s/it]

GPU memory after batch 375: 14.97 GB
Batch 376 input shapes: input_ids=torch.Size([4, 49]), attention_mask=torch.Size([4, 49])


Predicting:  16%|█▌        | 377/2400 [07:03<37:15,  1.10s/it]

GPU memory after batch 376: 14.97 GB
Batch 377 input shapes: input_ids=torch.Size([4, 60]), attention_mask=torch.Size([4, 60])


Predicting:  16%|█▌        | 378/2400 [07:04<37:56,  1.13s/it]

GPU memory after batch 377: 14.97 GB
Batch 378 input shapes: input_ids=torch.Size([4, 48]), attention_mask=torch.Size([4, 48])


Predicting:  16%|█▌        | 379/2400 [07:05<36:55,  1.10s/it]

GPU memory after batch 378: 14.97 GB
Batch 379 input shapes: input_ids=torch.Size([4, 48]), attention_mask=torch.Size([4, 48])


Predicting:  16%|█▌        | 380/2400 [07:06<36:12,  1.08s/it]

GPU memory after batch 379: 14.97 GB
Batch 380 input shapes: input_ids=torch.Size([4, 60]), attention_mask=torch.Size([4, 60])


Predicting:  16%|█▌        | 381/2400 [07:07<37:12,  1.11s/it]

批次 380: 原始预测: ['Yes\nYes, because', 'No\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because']
GPU memory after batch 380: 14.97 GB
Batch 381 input shapes: input_ids=torch.Size([4, 54]), attention_mask=torch.Size([4, 54])


Predicting:  16%|█▌        | 382/2400 [07:08<37:05,  1.10s/it]

GPU memory after batch 381: 14.97 GB
Batch 382 input shapes: input_ids=torch.Size([4, 47]), attention_mask=torch.Size([4, 47])


Predicting:  16%|█▌        | 383/2400 [07:09<36:17,  1.08s/it]

GPU memory after batch 382: 14.97 GB
Batch 383 input shapes: input_ids=torch.Size([4, 54]), attention_mask=torch.Size([4, 54])


Predicting:  16%|█▌        | 384/2400 [07:10<36:25,  1.08s/it]

GPU memory after batch 383: 14.97 GB
Batch 384 input shapes: input_ids=torch.Size([4, 156]), attention_mask=torch.Size([4, 156])


Predicting:  16%|█▌        | 385/2400 [07:12<44:57,  1.34s/it]

GPU memory after batch 384: 14.97 GB
Batch 385 input shapes: input_ids=torch.Size([4, 56]), attention_mask=torch.Size([4, 56])


Predicting:  16%|█▌        | 386/2400 [07:13<42:29,  1.27s/it]

GPU memory after batch 385: 14.97 GB
Batch 386 input shapes: input_ids=torch.Size([4, 52]), attention_mask=torch.Size([4, 52])


Predicting:  16%|█▌        | 387/2400 [07:14<40:43,  1.21s/it]

GPU memory after batch 386: 14.97 GB
Batch 387 input shapes: input_ids=torch.Size([4, 54]), attention_mask=torch.Size([4, 54])


Predicting:  16%|█▌        | 388/2400 [07:15<39:29,  1.18s/it]

GPU memory after batch 387: 14.97 GB
Batch 388 input shapes: input_ids=torch.Size([4, 49]), attention_mask=torch.Size([4, 49])


Predicting:  16%|█▌        | 389/2400 [07:16<38:36,  1.15s/it]

GPU memory after batch 388: 14.97 GB
Batch 389 input shapes: input_ids=torch.Size([4, 47]), attention_mask=torch.Size([4, 47])


Predicting:  16%|█▋        | 390/2400 [07:18<37:20,  1.11s/it]

GPU memory after batch 389: 14.97 GB
Batch 390 input shapes: input_ids=torch.Size([4, 54]), attention_mask=torch.Size([4, 54])


Predicting:  16%|█▋        | 391/2400 [07:19<37:07,  1.11s/it]

批次 390: 原始预测: ['Yes\nYes, because', 'No\nAnswer: No', 'Yes\nYes, because', 'Yes\nYes, because']
GPU memory after batch 390: 14.97 GB
Batch 391 input shapes: input_ids=torch.Size([4, 54]), attention_mask=torch.Size([4, 54])


Predicting:  16%|█▋        | 392/2400 [07:20<36:57,  1.10s/it]

GPU memory after batch 391: 14.97 GB
Batch 392 input shapes: input_ids=torch.Size([4, 49]), attention_mask=torch.Size([4, 49])


Predicting:  16%|█▋        | 393/2400 [07:21<36:49,  1.10s/it]

GPU memory after batch 392: 14.97 GB
Batch 393 input shapes: input_ids=torch.Size([4, 51]), attention_mask=torch.Size([4, 51])


Predicting:  16%|█▋        | 394/2400 [07:22<36:43,  1.10s/it]

GPU memory after batch 393: 14.97 GB
Batch 394 input shapes: input_ids=torch.Size([4, 53]), attention_mask=torch.Size([4, 53])


Predicting:  16%|█▋        | 395/2400 [07:23<36:39,  1.10s/it]

GPU memory after batch 394: 14.97 GB
Batch 395 input shapes: input_ids=torch.Size([4, 50]), attention_mask=torch.Size([4, 50])


Predicting:  16%|█▋        | 396/2400 [07:24<36:36,  1.10s/it]

GPU memory after batch 395: 14.97 GB
Batch 396 input shapes: input_ids=torch.Size([4, 49]), attention_mask=torch.Size([4, 49])


Predicting:  17%|█▋        | 397/2400 [07:25<36:33,  1.09s/it]

GPU memory after batch 396: 14.97 GB
Batch 397 input shapes: input_ids=torch.Size([4, 61]), attention_mask=torch.Size([4, 61])


Predicting:  17%|█▋        | 398/2400 [07:26<37:20,  1.12s/it]

GPU memory after batch 397: 14.97 GB
Batch 398 input shapes: input_ids=torch.Size([4, 50]), attention_mask=torch.Size([4, 50])


Predicting:  17%|█▋        | 399/2400 [07:27<37:02,  1.11s/it]

GPU memory after batch 398: 14.97 GB
Batch 399 input shapes: input_ids=torch.Size([4, 50]), attention_mask=torch.Size([4, 50])


Predicting:  17%|█▋        | 400/2400 [07:29<36:51,  1.11s/it]

GPU memory after batch 399: 14.97 GB
Batch 400 input shapes: input_ids=torch.Size([4, 51]), attention_mask=torch.Size([4, 51])


Predicting:  17%|█▋        | 401/2400 [07:30<36:42,  1.10s/it]

批次 400: 原始预测: ['Yes\nYes, because', 'Yes\nYes, because', 'No\nAnswer: No', 'No\nAnswer: No']
GPU memory after batch 400: 14.97 GB
Batch 401 input shapes: input_ids=torch.Size([4, 49]), attention_mask=torch.Size([4, 49])


Predicting:  17%|█▋        | 402/2400 [07:31<36:34,  1.10s/it]

GPU memory after batch 401: 14.97 GB
Batch 402 input shapes: input_ids=torch.Size([4, 57]), attention_mask=torch.Size([4, 57])


Predicting:  17%|█▋        | 403/2400 [07:32<37:17,  1.12s/it]

GPU memory after batch 402: 14.97 GB
Batch 403 input shapes: input_ids=torch.Size([4, 49]), attention_mask=torch.Size([4, 49])


Predicting:  17%|█▋        | 404/2400 [07:33<36:58,  1.11s/it]

GPU memory after batch 403: 14.97 GB
Batch 404 input shapes: input_ids=torch.Size([4, 52]), attention_mask=torch.Size([4, 52])


Predicting:  17%|█▋        | 405/2400 [07:34<36:48,  1.11s/it]

GPU memory after batch 404: 14.97 GB
Batch 405 input shapes: input_ids=torch.Size([4, 50]), attention_mask=torch.Size([4, 50])


Predicting:  17%|█▋        | 406/2400 [07:35<36:38,  1.10s/it]

GPU memory after batch 405: 14.97 GB
Batch 406 input shapes: input_ids=torch.Size([4, 50]), attention_mask=torch.Size([4, 50])


Predicting:  17%|█▋        | 407/2400 [07:36<36:30,  1.10s/it]

GPU memory after batch 406: 14.97 GB
Batch 407 input shapes: input_ids=torch.Size([4, 52]), attention_mask=torch.Size([4, 52])


Predicting:  17%|█▋        | 408/2400 [07:37<36:25,  1.10s/it]

GPU memory after batch 407: 14.97 GB
Batch 408 input shapes: input_ids=torch.Size([4, 162]), attention_mask=torch.Size([4, 162])


Predicting:  17%|█▋        | 409/2400 [07:39<46:13,  1.39s/it]

GPU memory after batch 408: 14.97 GB
Batch 409 input shapes: input_ids=torch.Size([4, 54]), attention_mask=torch.Size([4, 54])


Predicting:  17%|█▋        | 410/2400 [07:41<43:12,  1.30s/it]

GPU memory after batch 409: 14.97 GB
Batch 410 input shapes: input_ids=torch.Size([4, 49]), attention_mask=torch.Size([4, 49])


Predicting:  17%|█▋        | 411/2400 [07:42<41:05,  1.24s/it]

批次 410: 原始预测: ['Yes\nYes, because', 'No\nAnswer: No', 'Yes\nYes, because', 'Yes\nYes, because']
GPU memory after batch 410: 14.97 GB
Batch 411 input shapes: input_ids=torch.Size([4, 51]), attention_mask=torch.Size([4, 51])


Predicting:  17%|█▋        | 412/2400 [07:43<39:36,  1.20s/it]

GPU memory after batch 411: 14.97 GB
Batch 412 input shapes: input_ids=torch.Size([4, 47]), attention_mask=torch.Size([4, 47])


Predicting:  17%|█▋        | 413/2400 [07:44<37:55,  1.15s/it]

GPU memory after batch 412: 14.97 GB
Batch 413 input shapes: input_ids=torch.Size([4, 50]), attention_mask=torch.Size([4, 50])


Predicting:  17%|█▋        | 414/2400 [07:45<37:22,  1.13s/it]

GPU memory after batch 413: 14.97 GB
Batch 414 input shapes: input_ids=torch.Size([4, 50]), attention_mask=torch.Size([4, 50])


Predicting:  17%|█▋        | 415/2400 [07:46<36:59,  1.12s/it]

GPU memory after batch 414: 14.97 GB
Batch 415 input shapes: input_ids=torch.Size([4, 49]), attention_mask=torch.Size([4, 49])


Predicting:  17%|█▋        | 416/2400 [07:47<36:44,  1.11s/it]

GPU memory after batch 415: 14.97 GB
Batch 416 input shapes: input_ids=torch.Size([4, 46]), attention_mask=torch.Size([4, 46])


Predicting:  17%|█▋        | 417/2400 [07:48<35:53,  1.09s/it]

GPU memory after batch 416: 14.97 GB
Batch 417 input shapes: input_ids=torch.Size([4, 56]), attention_mask=torch.Size([4, 56])


Predicting:  17%|█▋        | 418/2400 [07:49<35:57,  1.09s/it]

GPU memory after batch 417: 14.97 GB
Batch 418 input shapes: input_ids=torch.Size([4, 56]), attention_mask=torch.Size([4, 56])


Predicting:  17%|█▋        | 419/2400 [07:50<36:00,  1.09s/it]

GPU memory after batch 418: 14.97 GB
Batch 419 input shapes: input_ids=torch.Size([4, 51]), attention_mask=torch.Size([4, 51])


Predicting:  18%|█▊        | 420/2400 [07:51<36:00,  1.09s/it]

GPU memory after batch 419: 14.97 GB
Batch 420 input shapes: input_ids=torch.Size([4, 44]), attention_mask=torch.Size([4, 44])


Predicting:  18%|█▊        | 421/2400 [07:52<35:20,  1.07s/it]

批次 420: 原始预测: ['Yes\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because']
GPU memory after batch 420: 14.97 GB
Batch 421 input shapes: input_ids=torch.Size([4, 48]), attention_mask=torch.Size([4, 48])


Predicting:  18%|█▊        | 422/2400 [07:53<34:52,  1.06s/it]

GPU memory after batch 421: 14.97 GB
Batch 422 input shapes: input_ids=torch.Size([4, 49]), attention_mask=torch.Size([4, 49])


Predicting:  18%|█▊        | 423/2400 [07:54<35:11,  1.07s/it]

GPU memory after batch 422: 14.97 GB
Batch 423 input shapes: input_ids=torch.Size([4, 52]), attention_mask=torch.Size([4, 52])


Predicting:  18%|█▊        | 424/2400 [07:56<35:24,  1.08s/it]

GPU memory after batch 423: 14.97 GB
Batch 424 input shapes: input_ids=torch.Size([4, 57]), attention_mask=torch.Size([4, 57])


Predicting:  18%|█▊        | 425/2400 [07:57<36:22,  1.10s/it]

GPU memory after batch 424: 14.97 GB
Batch 425 input shapes: input_ids=torch.Size([4, 48]), attention_mask=torch.Size([4, 48])


Predicting:  18%|█▊        | 426/2400 [07:58<35:35,  1.08s/it]

GPU memory after batch 425: 14.97 GB
Batch 426 input shapes: input_ids=torch.Size([4, 51]), attention_mask=torch.Size([4, 51])


Predicting:  18%|█▊        | 427/2400 [07:59<35:40,  1.09s/it]

GPU memory after batch 426: 14.97 GB
Batch 427 input shapes: input_ids=torch.Size([4, 44]), attention_mask=torch.Size([4, 44])


Predicting:  18%|█▊        | 428/2400 [08:00<35:04,  1.07s/it]

GPU memory after batch 427: 14.97 GB
Batch 428 input shapes: input_ids=torch.Size([4, 45]), attention_mask=torch.Size([4, 45])


Predicting:  18%|█▊        | 429/2400 [08:01<34:39,  1.05s/it]

GPU memory after batch 428: 14.97 GB
Batch 429 input shapes: input_ids=torch.Size([4, 51]), attention_mask=torch.Size([4, 51])


Predicting:  18%|█▊        | 430/2400 [08:02<35:00,  1.07s/it]

GPU memory after batch 429: 14.97 GB
Batch 430 input shapes: input_ids=torch.Size([4, 47]), attention_mask=torch.Size([4, 47])


Predicting:  18%|█▊        | 431/2400 [08:03<34:36,  1.05s/it]

批次 430: 原始预测: ['No\n\nYes, because', 'No\n\nAnswer: No', 'Yes\nYes, because', 'Yes\nYes, because']
GPU memory after batch 430: 14.97 GB
Batch 431 input shapes: input_ids=torch.Size([4, 55]), attention_mask=torch.Size([4, 55])


Predicting:  18%|█▊        | 432/2400 [08:04<34:59,  1.07s/it]

GPU memory after batch 431: 14.97 GB
Batch 432 input shapes: input_ids=torch.Size([4, 161]), attention_mask=torch.Size([4, 161])


Predicting:  18%|█▊        | 433/2400 [08:06<44:56,  1.37s/it]

GPU memory after batch 432: 14.97 GB
Batch 433 input shapes: input_ids=torch.Size([4, 52]), attention_mask=torch.Size([4, 52])


Predicting:  18%|█▊        | 434/2400 [08:07<42:11,  1.29s/it]

GPU memory after batch 433: 14.97 GB
Batch 434 input shapes: input_ids=torch.Size([4, 48]), attention_mask=torch.Size([4, 48])


Predicting:  18%|█▊        | 435/2400 [08:08<39:36,  1.21s/it]

GPU memory after batch 434: 14.97 GB
Batch 435 input shapes: input_ids=torch.Size([4, 52]), attention_mask=torch.Size([4, 52])


Predicting:  18%|█▊        | 436/2400 [08:09<38:26,  1.17s/it]

GPU memory after batch 435: 14.97 GB
Batch 436 input shapes: input_ids=torch.Size([4, 56]), attention_mask=torch.Size([4, 56])


Predicting:  18%|█▊        | 437/2400 [08:11<37:38,  1.15s/it]

GPU memory after batch 436: 14.97 GB
Batch 437 input shapes: input_ids=torch.Size([4, 47]), attention_mask=torch.Size([4, 47])


Predicting:  18%|█▊        | 438/2400 [08:12<36:24,  1.11s/it]

GPU memory after batch 437: 14.97 GB
Batch 438 input shapes: input_ids=torch.Size([4, 50]), attention_mask=torch.Size([4, 50])


Predicting:  18%|█▊        | 439/2400 [08:13<36:10,  1.11s/it]

GPU memory after batch 438: 14.97 GB
Batch 439 input shapes: input_ids=torch.Size([4, 55]), attention_mask=torch.Size([4, 55])


Predicting:  18%|█▊        | 440/2400 [08:14<36:01,  1.10s/it]

GPU memory after batch 439: 14.97 GB
Batch 440 input shapes: input_ids=torch.Size([4, 52]), attention_mask=torch.Size([4, 52])


Predicting:  18%|█▊        | 441/2400 [08:15<35:54,  1.10s/it]

批次 440: 原始预测: ['No\nYes, because', 'No\nAnswer: No', 'Yes\nYes, because', 'Yes\nYes, because']
GPU memory after batch 440: 14.97 GB
Batch 441 input shapes: input_ids=torch.Size([4, 52]), attention_mask=torch.Size([4, 52])


Predicting:  18%|█▊        | 442/2400 [08:16<35:48,  1.10s/it]

GPU memory after batch 441: 14.97 GB
Batch 442 input shapes: input_ids=torch.Size([4, 44]), attention_mask=torch.Size([4, 44])


Predicting:  18%|█▊        | 443/2400 [08:17<35:04,  1.08s/it]

GPU memory after batch 442: 14.97 GB
Batch 443 input shapes: input_ids=torch.Size([4, 53]), attention_mask=torch.Size([4, 53])


Predicting:  18%|█▊        | 444/2400 [08:18<35:13,  1.08s/it]

GPU memory after batch 443: 14.97 GB
Batch 444 input shapes: input_ids=torch.Size([4, 56]), attention_mask=torch.Size([4, 56])


Predicting:  19%|█▊        | 445/2400 [08:19<35:20,  1.08s/it]

GPU memory after batch 444: 14.97 GB
Batch 445 input shapes: input_ids=torch.Size([4, 50]), attention_mask=torch.Size([4, 50])


Predicting:  19%|█▊        | 446/2400 [08:20<35:24,  1.09s/it]

GPU memory after batch 445: 14.97 GB
Batch 446 input shapes: input_ids=torch.Size([4, 49]), attention_mask=torch.Size([4, 49])


Predicting:  19%|█▊        | 447/2400 [08:21<35:26,  1.09s/it]

GPU memory after batch 446: 14.97 GB
Batch 447 input shapes: input_ids=torch.Size([4, 54]), attention_mask=torch.Size([4, 54])


Predicting:  19%|█▊        | 448/2400 [08:22<35:28,  1.09s/it]

GPU memory after batch 447: 14.97 GB
Batch 448 input shapes: input_ids=torch.Size([4, 59]), attention_mask=torch.Size([4, 59])


Predicting:  19%|█▊        | 449/2400 [08:24<36:15,  1.12s/it]

GPU memory after batch 448: 14.97 GB
Batch 449 input shapes: input_ids=torch.Size([4, 48]), attention_mask=torch.Size([4, 48])


Predicting:  19%|█▉        | 450/2400 [08:25<35:23,  1.09s/it]

GPU memory after batch 449: 14.97 GB
Batch 450 input shapes: input_ids=torch.Size([4, 54]), attention_mask=torch.Size([4, 54])


Predicting:  19%|█▉        | 451/2400 [08:26<35:25,  1.09s/it]

批次 450: 原始预测: ['No\nAnswer: No', 'No\nAnswer: No', 'Yes\nYes, because', 'Yes\nYes, because']
GPU memory after batch 450: 14.97 GB
Batch 451 input shapes: input_ids=torch.Size([4, 52]), attention_mask=torch.Size([4, 52])


Predicting:  19%|█▉        | 452/2400 [08:27<35:25,  1.09s/it]

GPU memory after batch 451: 14.97 GB
Batch 452 input shapes: input_ids=torch.Size([4, 50]), attention_mask=torch.Size([4, 50])


Predicting:  19%|█▉        | 453/2400 [08:28<35:26,  1.09s/it]

GPU memory after batch 452: 14.97 GB
Batch 453 input shapes: input_ids=torch.Size([4, 50]), attention_mask=torch.Size([4, 50])


Predicting:  19%|█▉        | 454/2400 [08:29<35:25,  1.09s/it]

GPU memory after batch 453: 14.97 GB
Batch 454 input shapes: input_ids=torch.Size([4, 46]), attention_mask=torch.Size([4, 46])


Predicting:  19%|█▉        | 455/2400 [08:30<34:46,  1.07s/it]

GPU memory after batch 454: 14.97 GB
Batch 455 input shapes: input_ids=torch.Size([4, 48]), attention_mask=torch.Size([4, 48])


Predicting:  19%|█▉        | 456/2400 [08:31<34:18,  1.06s/it]

GPU memory after batch 455: 14.97 GB
Batch 456 input shapes: input_ids=torch.Size([4, 158]), attention_mask=torch.Size([4, 158])


Predicting:  19%|█▉        | 457/2400 [08:33<42:48,  1.32s/it]

GPU memory after batch 456: 14.97 GB
Batch 457 input shapes: input_ids=torch.Size([4, 47]), attention_mask=torch.Size([4, 47])


Predicting:  19%|█▉        | 458/2400 [08:34<39:54,  1.23s/it]

GPU memory after batch 457: 14.97 GB
Batch 458 input shapes: input_ids=torch.Size([4, 51]), attention_mask=torch.Size([4, 51])


Predicting:  19%|█▉        | 459/2400 [08:35<38:32,  1.19s/it]

GPU memory after batch 458: 14.97 GB
Batch 459 input shapes: input_ids=torch.Size([4, 55]), attention_mask=torch.Size([4, 55])


Predicting:  19%|█▉        | 460/2400 [08:36<37:35,  1.16s/it]

GPU memory after batch 459: 14.97 GB
Batch 460 input shapes: input_ids=torch.Size([4, 51]), attention_mask=torch.Size([4, 51])


Predicting:  19%|█▉        | 461/2400 [08:37<36:53,  1.14s/it]

批次 460: 原始预测: ['Yes\nYes, because', 'Yes\nYes, because', 'No\nAnswer: No', 'Yes\nYes, because']
GPU memory after batch 460: 14.97 GB
Batch 461 input shapes: input_ids=torch.Size([4, 55]), attention_mask=torch.Size([4, 55])


Predicting:  19%|█▉        | 462/2400 [08:38<36:25,  1.13s/it]

GPU memory after batch 461: 14.97 GB
Batch 462 input shapes: input_ids=torch.Size([4, 54]), attention_mask=torch.Size([4, 54])


Predicting:  19%|█▉        | 463/2400 [08:39<36:04,  1.12s/it]

GPU memory after batch 462: 14.97 GB
Batch 463 input shapes: input_ids=torch.Size([4, 49]), attention_mask=torch.Size([4, 49])


Predicting:  19%|█▉        | 464/2400 [08:41<35:47,  1.11s/it]

GPU memory after batch 463: 14.97 GB
Batch 464 input shapes: input_ids=torch.Size([4, 44]), attention_mask=torch.Size([4, 44])


Predicting:  19%|█▉        | 465/2400 [08:42<34:58,  1.08s/it]

GPU memory after batch 464: 14.97 GB
Batch 465 input shapes: input_ids=torch.Size([4, 49]), attention_mask=torch.Size([4, 49])


Predicting:  19%|█▉        | 466/2400 [08:43<35:01,  1.09s/it]

GPU memory after batch 465: 14.97 GB
Batch 466 input shapes: input_ids=torch.Size([4, 59]), attention_mask=torch.Size([4, 59])


Predicting:  19%|█▉        | 467/2400 [08:44<35:51,  1.11s/it]

GPU memory after batch 466: 14.97 GB
Batch 467 input shapes: input_ids=torch.Size([4, 46]), attention_mask=torch.Size([4, 46])


Predicting:  20%|█▉        | 468/2400 [08:45<35:00,  1.09s/it]

GPU memory after batch 467: 14.97 GB
Batch 468 input shapes: input_ids=torch.Size([4, 46]), attention_mask=torch.Size([4, 46])


Predicting:  20%|█▉        | 469/2400 [08:46<34:23,  1.07s/it]

GPU memory after batch 468: 14.97 GB
Batch 469 input shapes: input_ids=torch.Size([4, 61]), attention_mask=torch.Size([4, 61])


Predicting:  20%|█▉        | 470/2400 [08:47<35:24,  1.10s/it]

GPU memory after batch 469: 14.97 GB
Batch 470 input shapes: input_ids=torch.Size([4, 50]), attention_mask=torch.Size([4, 50])


Predicting:  20%|█▉        | 471/2400 [08:48<35:17,  1.10s/it]

批次 470: 原始预测: ['No\nYes, because', 'No\nYes, because', 'No\nAnswer\nYes', 'Yes\nYes, because']
GPU memory after batch 470: 14.97 GB
Batch 471 input shapes: input_ids=torch.Size([4, 58]), attention_mask=torch.Size([4, 58])


Predicting:  20%|█▉        | 472/2400 [08:49<35:59,  1.12s/it]

GPU memory after batch 471: 14.97 GB
Batch 472 input shapes: input_ids=torch.Size([4, 56]), attention_mask=torch.Size([4, 56])


Predicting:  20%|█▉        | 473/2400 [08:50<35:43,  1.11s/it]

GPU memory after batch 472: 14.97 GB
Batch 473 input shapes: input_ids=torch.Size([4, 54]), attention_mask=torch.Size([4, 54])


Predicting:  20%|█▉        | 474/2400 [08:52<35:31,  1.11s/it]

GPU memory after batch 473: 14.97 GB
Batch 474 input shapes: input_ids=torch.Size([4, 48]), attention_mask=torch.Size([4, 48])


Predicting:  20%|█▉        | 475/2400 [08:53<34:44,  1.08s/it]

GPU memory after batch 474: 14.97 GB
Batch 475 input shapes: input_ids=torch.Size([4, 60]), attention_mask=torch.Size([4, 60])


Predicting:  20%|█▉        | 476/2400 [08:54<35:35,  1.11s/it]

GPU memory after batch 475: 14.97 GB
Batch 476 input shapes: input_ids=torch.Size([4, 52]), attention_mask=torch.Size([4, 52])


Predicting:  20%|█▉        | 477/2400 [08:55<35:24,  1.10s/it]

GPU memory after batch 476: 14.97 GB
Batch 477 input shapes: input_ids=torch.Size([4, 50]), attention_mask=torch.Size([4, 50])


Predicting:  20%|█▉        | 478/2400 [08:56<35:17,  1.10s/it]

GPU memory after batch 477: 14.97 GB
Batch 478 input shapes: input_ids=torch.Size([4, 55]), attention_mask=torch.Size([4, 55])


Predicting:  20%|█▉        | 479/2400 [08:57<35:11,  1.10s/it]

GPU memory after batch 478: 14.97 GB
Batch 479 input shapes: input_ids=torch.Size([4, 55]), attention_mask=torch.Size([4, 55])


Predicting:  20%|██        | 480/2400 [08:58<35:07,  1.10s/it]

GPU memory after batch 479: 14.97 GB
Batch 480 input shapes: input_ids=torch.Size([4, 162]), attention_mask=torch.Size([4, 162])


Predicting:  20%|██        | 481/2400 [09:00<44:32,  1.39s/it]

批次 480: 原始预测: ['No\nQuestion: Is', 'Yes\nQuestion: Is', 'No\n\nYes, because', 'No\nAnswer: No']
GPU memory after batch 480: 14.97 GB
Batch 481 input shapes: input_ids=torch.Size([4, 46]), attention_mask=torch.Size([4, 46])


Predicting:  20%|██        | 482/2400 [09:01<41:00,  1.28s/it]

GPU memory after batch 481: 14.97 GB
Batch 482 input shapes: input_ids=torch.Size([4, 63]), attention_mask=torch.Size([4, 63])


Predicting:  20%|██        | 483/2400 [09:02<39:57,  1.25s/it]

GPU memory after batch 482: 14.97 GB
Batch 483 input shapes: input_ids=torch.Size([4, 45]), attention_mask=torch.Size([4, 45])


Predicting:  20%|██        | 484/2400 [09:03<37:48,  1.18s/it]

GPU memory after batch 483: 14.97 GB
Batch 484 input shapes: input_ids=torch.Size([4, 49]), attention_mask=torch.Size([4, 49])


Predicting:  20%|██        | 485/2400 [09:05<36:54,  1.16s/it]

GPU memory after batch 484: 14.97 GB
Batch 485 input shapes: input_ids=torch.Size([4, 59]), attention_mask=torch.Size([4, 59])


Predicting:  20%|██        | 486/2400 [09:06<37:03,  1.16s/it]

GPU memory after batch 485: 14.97 GB
Batch 486 input shapes: input_ids=torch.Size([4, 56]), attention_mask=torch.Size([4, 56])


Predicting:  20%|██        | 487/2400 [09:07<36:23,  1.14s/it]

GPU memory after batch 486: 14.97 GB
Batch 487 input shapes: input_ids=torch.Size([4, 59]), attention_mask=torch.Size([4, 59])


Predicting:  20%|██        | 488/2400 [09:08<36:41,  1.15s/it]

GPU memory after batch 487: 14.97 GB
Batch 488 input shapes: input_ids=torch.Size([4, 56]), attention_mask=torch.Size([4, 56])


Predicting:  20%|██        | 489/2400 [09:09<36:07,  1.13s/it]

GPU memory after batch 488: 14.97 GB
Batch 489 input shapes: input_ids=torch.Size([4, 43]), attention_mask=torch.Size([4, 43])


Predicting:  20%|██        | 490/2400 [09:10<35:04,  1.10s/it]

GPU memory after batch 489: 14.97 GB
Batch 490 input shapes: input_ids=torch.Size([4, 53]), attention_mask=torch.Size([4, 53])


Predicting:  20%|██        | 491/2400 [09:11<34:59,  1.10s/it]

批次 490: 原始预测: ['Yes\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because']
GPU memory after batch 490: 14.97 GB
Batch 491 input shapes: input_ids=torch.Size([4, 50]), attention_mask=torch.Size([4, 50])


Predicting:  20%|██        | 492/2400 [09:12<34:53,  1.10s/it]

GPU memory after batch 491: 14.97 GB
Batch 492 input shapes: input_ids=torch.Size([4, 56]), attention_mask=torch.Size([4, 56])


Predicting:  21%|██        | 493/2400 [09:13<34:50,  1.10s/it]

GPU memory after batch 492: 14.97 GB
Batch 493 input shapes: input_ids=torch.Size([4, 51]), attention_mask=torch.Size([4, 51])


Predicting:  21%|██        | 494/2400 [09:14<34:47,  1.10s/it]

GPU memory after batch 493: 14.97 GB
Batch 494 input shapes: input_ids=torch.Size([4, 53]), attention_mask=torch.Size([4, 53])


Predicting:  21%|██        | 495/2400 [09:16<34:45,  1.09s/it]

GPU memory after batch 494: 14.97 GB
Batch 495 input shapes: input_ids=torch.Size([4, 60]), attention_mask=torch.Size([4, 60])


Predicting:  21%|██        | 496/2400 [09:17<35:30,  1.12s/it]

GPU memory after batch 495: 14.97 GB
Batch 496 input shapes: input_ids=torch.Size([4, 57]), attention_mask=torch.Size([4, 57])


Predicting:  21%|██        | 497/2400 [09:18<36:00,  1.14s/it]

GPU memory after batch 496: 14.97 GB
Batch 497 input shapes: input_ids=torch.Size([4, 58]), attention_mask=torch.Size([4, 58])


Predicting:  21%|██        | 498/2400 [09:19<36:20,  1.15s/it]

GPU memory after batch 497: 14.97 GB
Batch 498 input shapes: input_ids=torch.Size([4, 50]), attention_mask=torch.Size([4, 50])


Predicting:  21%|██        | 499/2400 [09:20<35:48,  1.13s/it]

GPU memory after batch 498: 14.97 GB
Batch 499 input shapes: input_ids=torch.Size([4, 53]), attention_mask=torch.Size([4, 53])


Predicting:  21%|██        | 500/2400 [09:21<35:26,  1.12s/it]

GPU memory after batch 499: 14.97 GB
Batch 500 input shapes: input_ids=torch.Size([4, 78]), attention_mask=torch.Size([4, 78])


Predicting:  21%|██        | 501/2400 [09:23<37:09,  1.17s/it]

批次 500: 原始预测: ['Yes\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because']
GPU memory after batch 500: 14.97 GB
Batch 501 input shapes: input_ids=torch.Size([4, 69]), attention_mask=torch.Size([4, 69])


Predicting:  21%|██        | 502/2400 [09:24<37:43,  1.19s/it]

GPU memory after batch 501: 14.97 GB
Batch 502 input shapes: input_ids=torch.Size([4, 88]), attention_mask=torch.Size([4, 88])


Predicting:  21%|██        | 503/2400 [09:25<39:34,  1.25s/it]

GPU memory after batch 502: 14.97 GB
Batch 503 input shapes: input_ids=torch.Size([4, 78]), attention_mask=torch.Size([4, 78])


Predicting:  21%|██        | 504/2400 [09:26<40:02,  1.27s/it]

GPU memory after batch 503: 14.97 GB
Batch 504 input shapes: input_ids=torch.Size([4, 172]), attention_mask=torch.Size([4, 172])


Predicting:  21%|██        | 505/2400 [09:29<48:14,  1.53s/it]

GPU memory after batch 504: 14.97 GB
Batch 505 input shapes: input_ids=torch.Size([4, 81]), attention_mask=torch.Size([4, 81])


Predicting:  21%|██        | 506/2400 [09:30<46:52,  1.49s/it]

GPU memory after batch 505: 14.97 GB
Batch 506 input shapes: input_ids=torch.Size([4, 84]), attention_mask=torch.Size([4, 84])


Predicting:  21%|██        | 507/2400 [09:31<45:56,  1.46s/it]

GPU memory after batch 506: 14.97 GB
Batch 507 input shapes: input_ids=torch.Size([4, 93]), attention_mask=torch.Size([4, 93])


Predicting:  21%|██        | 508/2400 [09:33<45:51,  1.45s/it]

GPU memory after batch 507: 14.97 GB
Batch 508 input shapes: input_ids=torch.Size([4, 89]), attention_mask=torch.Size([4, 89])


Predicting:  21%|██        | 509/2400 [09:34<45:47,  1.45s/it]

GPU memory after batch 508: 14.97 GB
Batch 509 input shapes: input_ids=torch.Size([4, 90]), attention_mask=torch.Size([4, 90])


Predicting:  21%|██▏       | 510/2400 [09:36<45:44,  1.45s/it]

GPU memory after batch 509: 14.97 GB
Batch 510 input shapes: input_ids=torch.Size([4, 103]), attention_mask=torch.Size([4, 103])


Predicting:  21%|██▏       | 511/2400 [09:37<46:20,  1.47s/it]

批次 510: 原始预测: ['Yes\nYes, because', 'Yes\nYes, because', 'No\nYes\nYes', 'Yes\nYes, because']
GPU memory after batch 510: 14.97 GB
Batch 511 input shapes: input_ids=torch.Size([4, 89]), attention_mask=torch.Size([4, 89])


Predicting:  21%|██▏       | 512/2400 [09:39<46:05,  1.46s/it]

GPU memory after batch 511: 14.97 GB
Batch 512 input shapes: input_ids=torch.Size([4, 69]), attention_mask=torch.Size([4, 69])


Predicting:  21%|██▏       | 513/2400 [09:40<43:54,  1.40s/it]

GPU memory after batch 512: 14.97 GB
Batch 513 input shapes: input_ids=torch.Size([4, 109]), attention_mask=torch.Size([4, 109])


Predicting:  21%|██▏       | 514/2400 [09:42<45:43,  1.45s/it]

GPU memory after batch 513: 14.97 GB
Batch 514 input shapes: input_ids=torch.Size([4, 74]), attention_mask=torch.Size([4, 74])


Predicting:  21%|██▏       | 515/2400 [09:43<44:15,  1.41s/it]

GPU memory after batch 514: 14.97 GB
Batch 515 input shapes: input_ids=torch.Size([4, 112]), attention_mask=torch.Size([4, 112])


Predicting:  22%|██▏       | 516/2400 [09:44<45:57,  1.46s/it]

GPU memory after batch 515: 14.97 GB
Batch 516 input shapes: input_ids=torch.Size([4, 79]), attention_mask=torch.Size([4, 79])


Predicting:  22%|██▏       | 517/2400 [09:46<44:25,  1.42s/it]

GPU memory after batch 516: 14.97 GB
Batch 517 input shapes: input_ids=torch.Size([4, 101]), attention_mask=torch.Size([4, 101])


Predicting:  22%|██▏       | 518/2400 [09:47<45:19,  1.44s/it]

GPU memory after batch 517: 14.97 GB
Batch 518 input shapes: input_ids=torch.Size([4, 128]), attention_mask=torch.Size([4, 128])


Predicting:  22%|██▏       | 519/2400 [09:49<47:29,  1.51s/it]

GPU memory after batch 518: 14.97 GB
Batch 519 input shapes: input_ids=torch.Size([4, 75]), attention_mask=torch.Size([4, 75])


Predicting:  22%|██▏       | 520/2400 [09:50<45:28,  1.45s/it]

GPU memory after batch 519: 14.97 GB
Batch 520 input shapes: input_ids=torch.Size([4, 87]), attention_mask=torch.Size([4, 87])


Predicting:  22%|██▏       | 521/2400 [09:52<44:51,  1.43s/it]

批次 520: 原始预测: ['Yes\nYes, because', 'No\nYes\nYes', 'Yes\nExplanation: The', 'Yes\nExplanation: The']
GPU memory after batch 520: 14.97 GB
Batch 521 input shapes: input_ids=torch.Size([4, 136]), attention_mask=torch.Size([4, 136])


Predicting:  22%|██▏       | 522/2400 [09:53<48:26,  1.55s/it]

GPU memory after batch 521: 14.97 GB
Batch 522 input shapes: input_ids=torch.Size([4, 87]), attention_mask=torch.Size([4, 87])


Predicting:  22%|██▏       | 523/2400 [09:55<46:52,  1.50s/it]

GPU memory after batch 522: 14.97 GB
Batch 523 input shapes: input_ids=torch.Size([4, 160]), attention_mask=torch.Size([4, 160])


Predicting:  22%|██▏       | 524/2400 [09:57<50:57,  1.63s/it]

GPU memory after batch 523: 14.97 GB
Batch 524 input shapes: input_ids=torch.Size([4, 91]), attention_mask=torch.Size([4, 91])


Predicting:  22%|██▏       | 525/2400 [09:58<49:14,  1.58s/it]

GPU memory after batch 524: 14.97 GB
Batch 525 input shapes: input_ids=torch.Size([4, 90]), attention_mask=torch.Size([4, 90])


Predicting:  22%|██▏       | 526/2400 [10:00<48:01,  1.54s/it]

GPU memory after batch 525: 14.97 GB
Batch 526 input shapes: input_ids=torch.Size([4, 92]), attention_mask=torch.Size([4, 92])


Predicting:  22%|██▏       | 527/2400 [10:01<47:11,  1.51s/it]

GPU memory after batch 526: 14.97 GB
Batch 527 input shapes: input_ids=torch.Size([4, 152]), attention_mask=torch.Size([4, 152])


Predicting:  22%|██▏       | 528/2400 [10:03<50:48,  1.63s/it]

GPU memory after batch 527: 14.97 GB
Batch 528 input shapes: input_ids=torch.Size([4, 254]), attention_mask=torch.Size([4, 254])


Predicting:  22%|██▏       | 529/2400 [10:06<1:01:20,  1.97s/it]

GPU memory after batch 528: 14.97 GB
Batch 529 input shapes: input_ids=torch.Size([4, 91]), attention_mask=torch.Size([4, 91])


Predicting:  22%|██▏       | 530/2400 [10:07<56:29,  1.81s/it]  

GPU memory after batch 529: 14.97 GB
Batch 530 input shapes: input_ids=torch.Size([4, 131]), attention_mask=torch.Size([4, 131])


Predicting:  22%|██▏       | 531/2400 [10:09<56:28,  1.81s/it]

批次 530: 原始预测: ['No\n</think>\n\nYes', 'No\n</think>\n\nYes', 'Yes\nYes, because', 'No\nAnswer: No']
GPU memory after batch 530: 14.97 GB
Batch 531 input shapes: input_ids=torch.Size([4, 95]), attention_mask=torch.Size([4, 95])


Predicting:  22%|██▏       | 532/2400 [10:10<53:04,  1.70s/it]

GPU memory after batch 531: 14.97 GB
Batch 532 input shapes: input_ids=torch.Size([4, 85]), attention_mask=torch.Size([4, 85])


Predicting:  22%|██▏       | 533/2400 [10:12<50:04,  1.61s/it]

GPU memory after batch 532: 14.97 GB
Batch 533 input shapes: input_ids=torch.Size([4, 87]), attention_mask=torch.Size([4, 87])


Predicting:  22%|██▏       | 534/2400 [10:13<48:01,  1.54s/it]

GPU memory after batch 533: 14.97 GB
Batch 534 input shapes: input_ids=torch.Size([4, 85]), attention_mask=torch.Size([4, 85])


Predicting:  22%|██▏       | 535/2400 [10:15<46:31,  1.50s/it]

GPU memory after batch 534: 14.97 GB
Batch 535 input shapes: input_ids=torch.Size([4, 115]), attention_mask=torch.Size([4, 115])


Predicting:  22%|██▏       | 536/2400 [10:16<47:44,  1.54s/it]

GPU memory after batch 535: 14.97 GB
Batch 536 input shapes: input_ids=torch.Size([4, 127]), attention_mask=torch.Size([4, 127])


Predicting:  22%|██▏       | 537/2400 [10:17<44:51,  1.44s/it]

GPU memory after batch 536: 14.97 GB
Batch 537 input shapes: input_ids=torch.Size([4, 123]), attention_mask=torch.Size([4, 123])


Predicting:  22%|██▏       | 538/2400 [10:19<46:55,  1.51s/it]

GPU memory after batch 537: 14.97 GB
Batch 538 input shapes: input_ids=torch.Size([4, 72]), attention_mask=torch.Size([4, 72])


Predicting:  22%|██▏       | 539/2400 [10:20<44:19,  1.43s/it]

GPU memory after batch 538: 14.97 GB
Batch 539 input shapes: input_ids=torch.Size([4, 64]), attention_mask=torch.Size([4, 64])


Predicting:  22%|██▎       | 540/2400 [10:22<41:56,  1.35s/it]

GPU memory after batch 539: 14.97 GB
Batch 540 input shapes: input_ids=torch.Size([4, 67]), attention_mask=torch.Size([4, 67])


Predicting:  23%|██▎       | 541/2400 [10:23<40:49,  1.32s/it]

批次 540: 原始预测: ['Yes\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because']
GPU memory after batch 540: 14.97 GB
Batch 541 input shapes: input_ids=torch.Size([4, 165]), attention_mask=torch.Size([4, 165])


Predicting:  23%|██▎       | 542/2400 [10:24<43:41,  1.41s/it]

GPU memory after batch 541: 14.97 GB
Batch 542 input shapes: input_ids=torch.Size([4, 105]), attention_mask=torch.Size([4, 105])


Predicting:  23%|██▎       | 543/2400 [10:26<45:20,  1.46s/it]

GPU memory after batch 542: 14.97 GB
Batch 543 input shapes: input_ids=torch.Size([4, 89]), attention_mask=torch.Size([4, 89])


Predicting:  23%|██▎       | 544/2400 [10:27<45:10,  1.46s/it]

GPU memory after batch 543: 14.97 GB
Batch 544 input shapes: input_ids=torch.Size([4, 136]), attention_mask=torch.Size([4, 136])


Predicting:  23%|██▎       | 545/2400 [10:29<48:27,  1.57s/it]

GPU memory after batch 544: 14.97 GB
Batch 545 input shapes: input_ids=torch.Size([4, 138]), attention_mask=torch.Size([4, 138])


Predicting:  23%|██▎       | 546/2400 [10:31<51:10,  1.66s/it]

GPU memory after batch 545: 14.97 GB
Batch 546 input shapes: input_ids=torch.Size([4, 138]), attention_mask=torch.Size([4, 138])


Predicting:  23%|██▎       | 547/2400 [10:33<52:57,  1.71s/it]

GPU memory after batch 546: 14.97 GB
Batch 547 input shapes: input_ids=torch.Size([4, 91]), attention_mask=torch.Size([4, 91])


Predicting:  23%|██▎       | 548/2400 [10:34<50:27,  1.63s/it]

GPU memory after batch 547: 14.97 GB
Batch 548 input shapes: input_ids=torch.Size([4, 83]), attention_mask=torch.Size([4, 83])


Predicting:  23%|██▎       | 549/2400 [10:36<48:08,  1.56s/it]

GPU memory after batch 548: 14.97 GB
Batch 549 input shapes: input_ids=torch.Size([4, 69]), attention_mask=torch.Size([4, 69])


Predicting:  23%|██▎       | 550/2400 [10:37<45:05,  1.46s/it]

GPU memory after batch 549: 14.97 GB
Batch 550 input shapes: input_ids=torch.Size([4, 98]), attention_mask=torch.Size([4, 98])


Predicting:  23%|██▎       | 551/2400 [10:39<45:33,  1.48s/it]

批次 550: 原始预测: ['Yes', 'Yes', 'Yes\nYes, because', 'Yes\nYes, because']
GPU memory after batch 550: 14.97 GB
Batch 551 input shapes: input_ids=torch.Size([4, 143]), attention_mask=torch.Size([4, 143])


Predicting:  23%|██▎       | 552/2400 [10:40<49:05,  1.59s/it]

GPU memory after batch 551: 14.97 GB
Batch 552 input shapes: input_ids=torch.Size([4, 222]), attention_mask=torch.Size([4, 222])


Predicting:  23%|██▎       | 553/2400 [10:43<57:08,  1.86s/it]

GPU memory after batch 552: 14.97 GB
Batch 553 input shapes: input_ids=torch.Size([4, 82]), attention_mask=torch.Size([4, 82])


Predicting:  23%|██▎       | 554/2400 [10:44<52:46,  1.72s/it]

GPU memory after batch 553: 14.97 GB
Batch 554 input shapes: input_ids=torch.Size([4, 80]), attention_mask=torch.Size([4, 80])


Predicting:  23%|██▎       | 555/2400 [10:46<48:56,  1.59s/it]

GPU memory after batch 554: 14.97 GB
Batch 555 input shapes: input_ids=torch.Size([4, 105]), attention_mask=torch.Size([4, 105])


Predicting:  23%|██▎       | 556/2400 [10:47<48:54,  1.59s/it]

GPU memory after batch 555: 14.97 GB
Batch 556 input shapes: input_ids=torch.Size([4, 96]), attention_mask=torch.Size([4, 96])


Predicting:  23%|██▎       | 557/2400 [10:49<47:36,  1.55s/it]

GPU memory after batch 556: 14.97 GB
Batch 557 input shapes: input_ids=torch.Size([4, 79]), attention_mask=torch.Size([4, 79])


Predicting:  23%|██▎       | 558/2400 [10:50<45:16,  1.47s/it]

GPU memory after batch 557: 14.97 GB
Batch 558 input shapes: input_ids=torch.Size([4, 76]), attention_mask=torch.Size([4, 76])


Predicting:  23%|██▎       | 559/2400 [10:51<43:38,  1.42s/it]

GPU memory after batch 558: 14.97 GB
Batch 559 input shapes: input_ids=torch.Size([4, 150]), attention_mask=torch.Size([4, 150])


Predicting:  23%|██▎       | 560/2400 [10:53<48:01,  1.57s/it]

GPU memory after batch 559: 14.97 GB
Batch 560 input shapes: input_ids=torch.Size([4, 119]), attention_mask=torch.Size([4, 119])


Predicting:  23%|██▎       | 561/2400 [10:55<48:36,  1.59s/it]

批次 560: 原始预测: ['Yes\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because']
GPU memory after batch 560: 14.97 GB
Batch 561 input shapes: input_ids=torch.Size([4, 123]), attention_mask=torch.Size([4, 123])


Predicting:  23%|██▎       | 562/2400 [10:56<49:21,  1.61s/it]

GPU memory after batch 561: 14.97 GB
Batch 562 input shapes: input_ids=torch.Size([4, 80]), attention_mask=torch.Size([4, 80])


Predicting:  23%|██▎       | 563/2400 [10:58<46:30,  1.52s/it]

GPU memory after batch 562: 14.97 GB
Batch 563 input shapes: input_ids=torch.Size([4, 88]), attention_mask=torch.Size([4, 88])


Predicting:  24%|██▎       | 564/2400 [10:59<45:17,  1.48s/it]

GPU memory after batch 563: 14.97 GB
Batch 564 input shapes: input_ids=torch.Size([4, 73]), attention_mask=torch.Size([4, 73])


Predicting:  24%|██▎       | 565/2400 [11:00<43:36,  1.43s/it]

GPU memory after batch 564: 14.97 GB
Batch 565 input shapes: input_ids=torch.Size([4, 95]), attention_mask=torch.Size([4, 95])


Predicting:  24%|██▎       | 566/2400 [11:02<43:50,  1.43s/it]

GPU memory after batch 565: 14.97 GB
Batch 566 input shapes: input_ids=torch.Size([4, 93]), attention_mask=torch.Size([4, 93])


Predicting:  24%|██▎       | 567/2400 [11:03<43:58,  1.44s/it]

GPU memory after batch 566: 14.97 GB
Batch 567 input shapes: input_ids=torch.Size([4, 86]), attention_mask=torch.Size([4, 86])


Predicting:  24%|██▎       | 568/2400 [11:05<43:28,  1.42s/it]

GPU memory after batch 567: 14.97 GB
Batch 568 input shapes: input_ids=torch.Size([4, 121]), attention_mask=torch.Size([4, 121])


Predicting:  24%|██▎       | 569/2400 [11:06<45:42,  1.50s/it]

GPU memory after batch 568: 14.97 GB
Batch 569 input shapes: input_ids=torch.Size([4, 140]), attention_mask=torch.Size([4, 140])


Predicting:  24%|██▍       | 570/2400 [11:08<49:01,  1.61s/it]

GPU memory after batch 569: 14.97 GB
Batch 570 input shapes: input_ids=torch.Size([4, 140]), attention_mask=torch.Size([4, 140])


Predicting:  24%|██▍       | 571/2400 [11:10<51:20,  1.68s/it]

批次 570: 原始预测: ['Yes\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because', 'Yes']
GPU memory after batch 570: 14.97 GB
Batch 571 input shapes: input_ids=torch.Size([4, 74]), attention_mask=torch.Size([4, 74])


Predicting:  24%|██▍       | 572/2400 [11:11<47:48,  1.57s/it]

GPU memory after batch 571: 14.97 GB
Batch 572 input shapes: input_ids=torch.Size([4, 58]), attention_mask=torch.Size([4, 58])


Predicting:  24%|██▍       | 573/2400 [11:13<44:10,  1.45s/it]

GPU memory after batch 572: 14.97 GB
Batch 573 input shapes: input_ids=torch.Size([4, 97]), attention_mask=torch.Size([4, 97])


Predicting:  24%|██▍       | 574/2400 [11:14<44:43,  1.47s/it]

GPU memory after batch 573: 14.97 GB
Batch 574 input shapes: input_ids=torch.Size([4, 94]), attention_mask=torch.Size([4, 94])


Predicting:  24%|██▍       | 575/2400 [11:16<44:31,  1.46s/it]

GPU memory after batch 574: 14.97 GB
Batch 575 input shapes: input_ids=torch.Size([4, 69]), attention_mask=torch.Size([4, 69])


Predicting:  24%|██▍       | 576/2400 [11:17<42:24,  1.40s/it]

GPU memory after batch 575: 14.97 GB
Batch 576 input shapes: input_ids=torch.Size([4, 258]), attention_mask=torch.Size([4, 258])


Predicting:  24%|██▍       | 577/2400 [11:20<56:10,  1.85s/it]

GPU memory after batch 576: 14.97 GB
Batch 577 input shapes: input_ids=torch.Size([4, 146]), attention_mask=torch.Size([4, 146])


Predicting:  24%|██▍       | 578/2400 [11:22<56:36,  1.86s/it]

GPU memory after batch 577: 14.97 GB
Batch 578 input shapes: input_ids=torch.Size([4, 150]), attention_mask=torch.Size([4, 150])


Predicting:  24%|██▍       | 579/2400 [11:24<56:55,  1.88s/it]

GPU memory after batch 578: 14.97 GB
Batch 579 input shapes: input_ids=torch.Size([4, 96]), attention_mask=torch.Size([4, 96])


Predicting:  24%|██▍       | 580/2400 [11:25<53:02,  1.75s/it]

GPU memory after batch 579: 14.97 GB
Batch 580 input shapes: input_ids=torch.Size([4, 77]), attention_mask=torch.Size([4, 77])


Predicting:  24%|██▍       | 581/2400 [11:26<48:56,  1.61s/it]

批次 580: 原始预测: ['Yes\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because']
GPU memory after batch 580: 14.97 GB
Batch 581 input shapes: input_ids=torch.Size([4, 73]), attention_mask=torch.Size([4, 73])


Predicting:  24%|██▍       | 582/2400 [11:28<46:03,  1.52s/it]

GPU memory after batch 581: 14.97 GB
Batch 582 input shapes: input_ids=torch.Size([4, 98]), attention_mask=torch.Size([4, 98])


Predicting:  24%|██▍       | 583/2400 [11:29<45:57,  1.52s/it]

GPU memory after batch 582: 14.97 GB
Batch 583 input shapes: input_ids=torch.Size([4, 109]), attention_mask=torch.Size([4, 109])


Predicting:  24%|██▍       | 584/2400 [11:31<46:36,  1.54s/it]

GPU memory after batch 583: 14.97 GB
Batch 584 input shapes: input_ids=torch.Size([4, 93]), attention_mask=torch.Size([4, 93])


Predicting:  24%|██▍       | 585/2400 [11:32<45:46,  1.51s/it]

GPU memory after batch 584: 14.97 GB
Batch 585 input shapes: input_ids=torch.Size([4, 105]), attention_mask=torch.Size([4, 105])


Predicting:  24%|██▍       | 586/2400 [11:34<46:27,  1.54s/it]

GPU memory after batch 585: 14.97 GB
Batch 586 input shapes: input_ids=torch.Size([4, 96]), attention_mask=torch.Size([4, 96])


Predicting:  24%|██▍       | 587/2400 [11:35<45:39,  1.51s/it]

GPU memory after batch 586: 14.97 GB
Batch 587 input shapes: input_ids=torch.Size([4, 82]), attention_mask=torch.Size([4, 82])


Predicting:  24%|██▍       | 588/2400 [11:37<44:29,  1.47s/it]

GPU memory after batch 587: 14.97 GB
Batch 588 input shapes: input_ids=torch.Size([4, 174]), attention_mask=torch.Size([4, 174])


Predicting:  25%|██▍       | 589/2400 [11:38<46:25,  1.54s/it]

GPU memory after batch 588: 14.97 GB
Batch 589 input shapes: input_ids=torch.Size([4, 123]), attention_mask=torch.Size([4, 123])


Predicting:  25%|██▍       | 590/2400 [11:40<47:34,  1.58s/it]

GPU memory after batch 589: 14.97 GB
Batch 590 input shapes: input_ids=torch.Size([4, 119]), attention_mask=torch.Size([4, 119])


Predicting:  25%|██▍       | 591/2400 [11:42<48:02,  1.59s/it]

批次 590: 原始预测: ['Yes', 'Yes', 'Yes\nYes, because', 'Yes\nYes, because']
GPU memory after batch 590: 14.97 GB
Batch 591 input shapes: input_ids=torch.Size([4, 78]), attention_mask=torch.Size([4, 78])


Predicting:  25%|██▍       | 592/2400 [11:43<45:22,  1.51s/it]

GPU memory after batch 591: 14.97 GB
Batch 592 input shapes: input_ids=torch.Size([4, 78]), attention_mask=torch.Size([4, 78])


Predicting:  25%|██▍       | 593/2400 [11:44<43:29,  1.44s/it]

GPU memory after batch 592: 14.97 GB
Batch 593 input shapes: input_ids=torch.Size([4, 87]), attention_mask=torch.Size([4, 87])


Predicting:  25%|██▍       | 594/2400 [11:46<42:57,  1.43s/it]

GPU memory after batch 593: 14.97 GB
Batch 594 input shapes: input_ids=torch.Size([4, 101]), attention_mask=torch.Size([4, 101])


Predicting:  25%|██▍       | 595/2400 [11:47<43:43,  1.45s/it]

GPU memory after batch 594: 14.97 GB
Batch 595 input shapes: input_ids=torch.Size([4, 83]), attention_mask=torch.Size([4, 83])


Predicting:  25%|██▍       | 596/2400 [11:48<43:03,  1.43s/it]

GPU memory after batch 595: 14.97 GB
Batch 596 input shapes: input_ids=torch.Size([4, 81]), attention_mask=torch.Size([4, 81])


Predicting:  25%|██▍       | 597/2400 [11:50<42:37,  1.42s/it]

GPU memory after batch 596: 14.97 GB
Batch 597 input shapes: input_ids=torch.Size([4, 69]), attention_mask=torch.Size([4, 69])


Predicting:  25%|██▍       | 598/2400 [11:51<40:55,  1.36s/it]

GPU memory after batch 597: 14.97 GB
Batch 598 input shapes: input_ids=torch.Size([4, 81]), attention_mask=torch.Size([4, 81])


Predicting:  25%|██▍       | 599/2400 [11:52<41:06,  1.37s/it]

GPU memory after batch 598: 14.97 GB
Batch 599 input shapes: input_ids=torch.Size([4, 114]), attention_mask=torch.Size([4, 114])


Predicting:  25%|██▌       | 600/2400 [11:54<43:25,  1.45s/it]

GPU memory after batch 599: 14.97 GB
Batch 600 input shapes: input_ids=torch.Size([4, 173]), attention_mask=torch.Size([4, 173])


Predicting:  25%|██▌       | 601/2400 [11:56<49:35,  1.65s/it]

批次 600: 原始预测: ['Yes\nQuestion: Is', 'Yes\nQuestion: Is', 'Yes\nYes, because', 'No\nYes\nYes']
GPU memory after batch 600: 14.97 GB
Batch 601 input shapes: input_ids=torch.Size([4, 83]), attention_mask=torch.Size([4, 83])


Predicting:  25%|██▌       | 602/2400 [11:58<47:08,  1.57s/it]

GPU memory after batch 601: 14.97 GB
Batch 602 input shapes: input_ids=torch.Size([4, 63]), attention_mask=torch.Size([4, 63])


Predicting:  25%|██▌       | 603/2400 [11:59<43:33,  1.45s/it]

GPU memory after batch 602: 14.97 GB
Batch 603 input shapes: input_ids=torch.Size([4, 85]), attention_mask=torch.Size([4, 85])


Predicting:  25%|██▌       | 604/2400 [12:00<42:55,  1.43s/it]

GPU memory after batch 603: 14.97 GB
Batch 604 input shapes: input_ids=torch.Size([4, 85]), attention_mask=torch.Size([4, 85])


Predicting:  25%|██▌       | 605/2400 [12:02<42:29,  1.42s/it]

GPU memory after batch 604: 14.97 GB
Batch 605 input shapes: input_ids=torch.Size([4, 121]), attention_mask=torch.Size([4, 121])


Predicting:  25%|██▌       | 606/2400 [12:03<44:41,  1.49s/it]

GPU memory after batch 605: 14.97 GB
Batch 606 input shapes: input_ids=torch.Size([4, 119]), attention_mask=torch.Size([4, 119])


Predicting:  25%|██▌       | 607/2400 [12:05<45:53,  1.54s/it]

GPU memory after batch 606: 14.97 GB
Batch 607 input shapes: input_ids=torch.Size([4, 119]), attention_mask=torch.Size([4, 119])


Predicting:  25%|██▌       | 608/2400 [12:06<46:43,  1.56s/it]

GPU memory after batch 607: 14.97 GB
Batch 608 input shapes: input_ids=torch.Size([4, 70]), attention_mask=torch.Size([4, 70])


Predicting:  25%|██▌       | 609/2400 [12:08<43:44,  1.47s/it]

GPU memory after batch 608: 14.97 GB
Batch 609 input shapes: input_ids=torch.Size([4, 96]), attention_mask=torch.Size([4, 96])


Predicting:  25%|██▌       | 610/2400 [12:09<43:36,  1.46s/it]

GPU memory after batch 609: 14.97 GB
Batch 610 input shapes: input_ids=torch.Size([4, 72]), attention_mask=torch.Size([4, 72])


Predicting:  25%|██▌       | 611/2400 [12:10<41:34,  1.39s/it]

批次 610: 原始预测: ['Yes\nYes, because', 'Yes\nYes, because', 'No\nYes\nYes', 'Yes\nYes, because']
GPU memory after batch 610: 14.97 GB
Batch 611 input shapes: input_ids=torch.Size([4, 78]), attention_mask=torch.Size([4, 78])


Predicting:  26%|██▌       | 612/2400 [12:12<40:42,  1.37s/it]

GPU memory after batch 611: 14.97 GB
Batch 612 input shapes: input_ids=torch.Size([4, 96]), attention_mask=torch.Size([4, 96])


Predicting:  26%|██▌       | 613/2400 [12:13<41:28,  1.39s/it]

GPU memory after batch 612: 14.97 GB
Batch 613 input shapes: input_ids=torch.Size([4, 102]), attention_mask=torch.Size([4, 102])


Predicting:  26%|██▌       | 614/2400 [12:15<42:32,  1.43s/it]

GPU memory after batch 613: 14.97 GB
Batch 614 input shapes: input_ids=torch.Size([4, 95]), attention_mask=torch.Size([4, 95])


Predicting:  26%|██▌       | 615/2400 [12:16<42:44,  1.44s/it]

GPU memory after batch 614: 14.97 GB
Batch 615 input shapes: input_ids=torch.Size([4, 89]), attention_mask=torch.Size([4, 89])


Predicting:  26%|██▌       | 616/2400 [12:17<41:30,  1.40s/it]

GPU memory after batch 615: 14.97 GB
Batch 616 input shapes: input_ids=torch.Size([4, 154]), attention_mask=torch.Size([4, 154])


Predicting:  26%|██▌       | 617/2400 [12:19<44:58,  1.51s/it]

GPU memory after batch 616: 14.97 GB
Batch 617 input shapes: input_ids=torch.Size([4, 81]), attention_mask=torch.Size([4, 81])


Predicting:  26%|██▌       | 618/2400 [12:21<43:48,  1.47s/it]

GPU memory after batch 617: 14.97 GB
Batch 618 input shapes: input_ids=torch.Size([4, 116]), attention_mask=torch.Size([4, 116])


Predicting:  26%|██▌       | 619/2400 [12:22<45:10,  1.52s/it]

GPU memory after batch 618: 14.97 GB
Batch 619 input shapes: input_ids=torch.Size([4, 85]), attention_mask=torch.Size([4, 85])


Predicting:  26%|██▌       | 620/2400 [12:24<43:57,  1.48s/it]

GPU memory after batch 619: 14.97 GB
Batch 620 input shapes: input_ids=torch.Size([4, 135]), attention_mask=torch.Size([4, 135])


Predicting:  26%|██▌       | 621/2400 [12:25<46:54,  1.58s/it]

批次 620: 原始预测: ['No\nAnswer: No', 'No\nAnswer: No', 'No\nAnswer: No', 'No\nYes, because']
GPU memory after batch 620: 14.97 GB
Batch 621 input shapes: input_ids=torch.Size([4, 66]), attention_mask=torch.Size([4, 66])


Predicting:  26%|██▌       | 622/2400 [12:27<43:47,  1.48s/it]

GPU memory after batch 621: 14.97 GB
Batch 622 input shapes: input_ids=torch.Size([4, 69]), attention_mask=torch.Size([4, 69])


Predicting:  26%|██▌       | 623/2400 [12:28<41:36,  1.40s/it]

GPU memory after batch 622: 14.97 GB
Batch 623 input shapes: input_ids=torch.Size([4, 125]), attention_mask=torch.Size([4, 125])


Predicting:  26%|██▌       | 624/2400 [12:30<43:55,  1.48s/it]

GPU memory after batch 623: 14.97 GB
Batch 624 input shapes: input_ids=torch.Size([4, 253]), attention_mask=torch.Size([4, 253])


Predicting:  26%|██▌       | 625/2400 [12:32<55:09,  1.86s/it]

GPU memory after batch 624: 14.97 GB
Batch 625 input shapes: input_ids=torch.Size([4, 96]), attention_mask=torch.Size([4, 96])


Predicting:  26%|██▌       | 626/2400 [12:34<51:28,  1.74s/it]

GPU memory after batch 625: 14.97 GB
Batch 626 input shapes: input_ids=torch.Size([4, 105]), attention_mask=torch.Size([4, 105])


Predicting:  26%|██▌       | 627/2400 [12:35<50:06,  1.70s/it]

GPU memory after batch 626: 14.97 GB
Batch 627 input shapes: input_ids=torch.Size([4, 74]), attention_mask=torch.Size([4, 74])


Predicting:  26%|██▌       | 628/2400 [12:37<46:36,  1.58s/it]

GPU memory after batch 627: 14.97 GB
Batch 628 input shapes: input_ids=torch.Size([4, 141]), attention_mask=torch.Size([4, 141])


Predicting:  26%|██▌       | 629/2400 [12:39<49:06,  1.66s/it]

GPU memory after batch 628: 14.97 GB
Batch 629 input shapes: input_ids=torch.Size([4, 96]), attention_mask=torch.Size([4, 96])


Predicting:  26%|██▋       | 630/2400 [12:40<47:12,  1.60s/it]

GPU memory after batch 629: 14.97 GB
Batch 630 input shapes: input_ids=torch.Size([4, 98]), attention_mask=torch.Size([4, 98])


Predicting:  26%|██▋       | 631/2400 [12:41<46:24,  1.57s/it]

批次 630: 原始预测: ['Yes', 'Yes', 'Yes\nYes, because', 'Yes\nYes, because']
GPU memory after batch 630: 14.97 GB
Batch 631 input shapes: input_ids=torch.Size([4, 69]), attention_mask=torch.Size([4, 69])


Predicting:  26%|██▋       | 632/2400 [12:43<43:22,  1.47s/it]

GPU memory after batch 631: 14.97 GB
Batch 632 input shapes: input_ids=torch.Size([4, 142]), attention_mask=torch.Size([4, 142])


Predicting:  26%|██▋       | 633/2400 [12:45<46:49,  1.59s/it]

GPU memory after batch 632: 14.97 GB
Batch 633 input shapes: input_ids=torch.Size([4, 144]), attention_mask=torch.Size([4, 144])


Predicting:  26%|██▋       | 634/2400 [12:46<49:13,  1.67s/it]

GPU memory after batch 633: 14.97 GB
Batch 634 input shapes: input_ids=torch.Size([4, 90]), attention_mask=torch.Size([4, 90])


Predicting:  26%|██▋       | 635/2400 [12:48<47:14,  1.61s/it]

GPU memory after batch 634: 14.97 GB
Batch 635 input shapes: input_ids=torch.Size([4, 92]), attention_mask=torch.Size([4, 92])


Predicting:  26%|██▋       | 636/2400 [12:49<45:51,  1.56s/it]

GPU memory after batch 635: 14.97 GB
Batch 636 input shapes: input_ids=torch.Size([4, 96]), attention_mask=torch.Size([4, 96])


Predicting:  27%|██▋       | 637/2400 [12:51<44:54,  1.53s/it]

GPU memory after batch 636: 14.97 GB
Batch 637 input shapes: input_ids=torch.Size([4, 150]), attention_mask=torch.Size([4, 150])


Predicting:  27%|██▋       | 638/2400 [12:53<48:09,  1.64s/it]

GPU memory after batch 637: 14.97 GB
Batch 638 input shapes: input_ids=torch.Size([4, 97]), attention_mask=torch.Size([4, 97])


Predicting:  27%|██▋       | 639/2400 [12:54<47:01,  1.60s/it]

GPU memory after batch 638: 14.97 GB
Batch 639 input shapes: input_ids=torch.Size([4, 96]), attention_mask=torch.Size([4, 96])


Predicting:  27%|██▋       | 640/2400 [12:55<41:51,  1.43s/it]

GPU memory after batch 639: 14.97 GB
Batch 640 input shapes: input_ids=torch.Size([4, 113]), attention_mask=torch.Size([4, 113])


Predicting:  27%|██▋       | 641/2400 [12:57<43:35,  1.49s/it]

批次 640: 原始预测: ['Yes\nYes, because', 'Yes\nYes, because', 'Yes', 'Yes\nYes, because']
GPU memory after batch 640: 14.97 GB
Batch 641 input shapes: input_ids=torch.Size([4, 107]), attention_mask=torch.Size([4, 107])


Predicting:  27%|██▋       | 642/2400 [12:58<44:29,  1.52s/it]

GPU memory after batch 641: 14.97 GB
Batch 642 input shapes: input_ids=torch.Size([4, 99]), attention_mask=torch.Size([4, 99])


Predicting:  27%|██▋       | 643/2400 [13:00<44:26,  1.52s/it]

GPU memory after batch 642: 14.97 GB
Batch 643 input shapes: input_ids=torch.Size([4, 67]), attention_mask=torch.Size([4, 67])


Predicting:  27%|██▋       | 644/2400 [13:01<41:53,  1.43s/it]

GPU memory after batch 643: 14.97 GB
Batch 644 input shapes: input_ids=torch.Size([4, 66]), attention_mask=torch.Size([4, 66])


Predicting:  27%|██▋       | 645/2400 [13:02<40:06,  1.37s/it]

GPU memory after batch 644: 14.97 GB
Batch 645 input shapes: input_ids=torch.Size([4, 115]), attention_mask=torch.Size([4, 115])


Predicting:  27%|██▋       | 646/2400 [13:04<42:20,  1.45s/it]

GPU memory after batch 645: 14.97 GB
Batch 646 input shapes: input_ids=torch.Size([4, 83]), attention_mask=torch.Size([4, 83])


Predicting:  27%|██▋       | 647/2400 [13:05<41:45,  1.43s/it]

GPU memory after batch 646: 14.97 GB
Batch 647 input shapes: input_ids=torch.Size([4, 103]), attention_mask=torch.Size([4, 103])


Predicting:  27%|██▋       | 648/2400 [13:07<42:30,  1.46s/it]

GPU memory after batch 647: 14.97 GB
Batch 648 input shapes: input_ids=torch.Size([4, 214]), attention_mask=torch.Size([4, 214])


Predicting:  27%|██▋       | 649/2400 [13:09<51:00,  1.75s/it]

GPU memory after batch 648: 14.97 GB
Batch 649 input shapes: input_ids=torch.Size([4, 101]), attention_mask=torch.Size([4, 101])


Predicting:  27%|██▋       | 650/2400 [13:11<48:57,  1.68s/it]

GPU memory after batch 649: 14.97 GB
Batch 650 input shapes: input_ids=torch.Size([4, 105]), attention_mask=torch.Size([4, 105])


Predicting:  27%|██▋       | 651/2400 [13:13<48:10,  1.65s/it]

批次 650: 原始预测: ['No\n\nYes, because', 'No\nYes, because', 'No\nYes, because', 'No\nYes, because']
GPU memory after batch 650: 14.97 GB
Batch 651 input shapes: input_ids=torch.Size([4, 84]), attention_mask=torch.Size([4, 84])


Predicting:  27%|██▋       | 652/2400 [13:14<45:50,  1.57s/it]

GPU memory after batch 651: 14.97 GB
Batch 652 input shapes: input_ids=torch.Size([4, 136]), attention_mask=torch.Size([4, 136])


Predicting:  27%|██▋       | 653/2400 [13:16<47:56,  1.65s/it]

GPU memory after batch 652: 14.97 GB
Batch 653 input shapes: input_ids=torch.Size([4, 94]), attention_mask=torch.Size([4, 94])


Predicting:  27%|██▋       | 654/2400 [13:17<46:12,  1.59s/it]

GPU memory after batch 653: 14.97 GB
Batch 654 input shapes: input_ids=torch.Size([4, 77]), attention_mask=torch.Size([4, 77])


Predicting:  27%|██▋       | 655/2400 [13:18<43:42,  1.50s/it]

GPU memory after batch 654: 14.97 GB
Batch 655 input shapes: input_ids=torch.Size([4, 115]), attention_mask=torch.Size([4, 115])


Predicting:  27%|██▋       | 656/2400 [13:20<44:48,  1.54s/it]

GPU memory after batch 655: 14.97 GB
Batch 656 input shapes: input_ids=torch.Size([4, 103]), attention_mask=torch.Size([4, 103])


Predicting:  27%|██▋       | 657/2400 [13:22<44:34,  1.53s/it]

GPU memory after batch 656: 14.97 GB
Batch 657 input shapes: input_ids=torch.Size([4, 99]), attention_mask=torch.Size([4, 99])


Predicting:  27%|██▋       | 658/2400 [13:23<44:22,  1.53s/it]

GPU memory after batch 657: 14.97 GB
Batch 658 input shapes: input_ids=torch.Size([4, 93]), attention_mask=torch.Size([4, 93])


Predicting:  27%|██▋       | 659/2400 [13:24<39:53,  1.37s/it]

GPU memory after batch 658: 14.97 GB
Batch 659 input shapes: input_ids=torch.Size([4, 141]), attention_mask=torch.Size([4, 141])


Predicting:  28%|██▊       | 660/2400 [13:26<44:06,  1.52s/it]

GPU memory after batch 659: 14.97 GB
Batch 660 input shapes: input_ids=torch.Size([4, 88]), attention_mask=torch.Size([4, 88])


Predicting:  28%|██▊       | 661/2400 [13:27<42:56,  1.48s/it]

批次 660: 原始预测: ['Yes\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because', 'Yes']
GPU memory after batch 660: 14.97 GB
Batch 661 input shapes: input_ids=torch.Size([4, 113]), attention_mask=torch.Size([4, 113])


Predicting:  28%|██▊       | 662/2400 [13:29<44:11,  1.53s/it]

GPU memory after batch 661: 14.97 GB
Batch 662 input shapes: input_ids=torch.Size([4, 68]), attention_mask=torch.Size([4, 68])


Predicting:  28%|██▊       | 663/2400 [13:30<41:36,  1.44s/it]

GPU memory after batch 662: 14.97 GB
Batch 663 input shapes: input_ids=torch.Size([4, 66]), attention_mask=torch.Size([4, 66])


Predicting:  28%|██▊       | 664/2400 [13:32<39:49,  1.38s/it]

GPU memory after batch 663: 14.97 GB
Batch 664 input shapes: input_ids=torch.Size([4, 101]), attention_mask=torch.Size([4, 101])


Predicting:  28%|██▊       | 665/2400 [13:33<41:00,  1.42s/it]

GPU memory after batch 664: 14.97 GB
Batch 665 input shapes: input_ids=torch.Size([4, 105]), attention_mask=torch.Size([4, 105])


Predicting:  28%|██▊       | 666/2400 [13:35<42:29,  1.47s/it]

GPU memory after batch 665: 14.97 GB
Batch 666 input shapes: input_ids=torch.Size([4, 73]), attention_mask=torch.Size([4, 73])


Predicting:  28%|██▊       | 667/2400 [13:36<40:59,  1.42s/it]

GPU memory after batch 666: 14.97 GB
Batch 667 input shapes: input_ids=torch.Size([4, 91]), attention_mask=torch.Size([4, 91])


Predicting:  28%|██▊       | 668/2400 [13:37<41:14,  1.43s/it]

GPU memory after batch 667: 14.97 GB
Batch 668 input shapes: input_ids=torch.Size([4, 87]), attention_mask=torch.Size([4, 87])


Predicting:  28%|██▊       | 669/2400 [13:39<40:53,  1.42s/it]

GPU memory after batch 668: 14.97 GB
Batch 669 input shapes: input_ids=torch.Size([4, 111]), attention_mask=torch.Size([4, 111])


Predicting:  28%|██▊       | 670/2400 [13:40<42:23,  1.47s/it]

GPU memory after batch 669: 14.97 GB
Batch 670 input shapes: input_ids=torch.Size([4, 111]), attention_mask=torch.Size([4, 111])


Predicting:  28%|██▊       | 671/2400 [13:42<43:25,  1.51s/it]

批次 670: 原始预测: ['No\nAnswer: No', 'No\nAnswer: No', 'Yes\nYes, because', 'Yes\nYes, because']
GPU memory after batch 670: 14.97 GB
Batch 671 input shapes: input_ids=torch.Size([4, 104]), attention_mask=torch.Size([4, 104])


Predicting:  28%|██▊       | 672/2400 [13:43<43:29,  1.51s/it]

GPU memory after batch 671: 14.97 GB
Batch 672 input shapes: input_ids=torch.Size([4, 246]), attention_mask=torch.Size([4, 246])


Predicting:  28%|██▊       | 673/2400 [13:46<53:45,  1.87s/it]

GPU memory after batch 672: 14.97 GB
Batch 673 input shapes: input_ids=torch.Size([4, 115]), attention_mask=torch.Size([4, 115])


Predicting:  28%|██▊       | 674/2400 [13:48<51:40,  1.80s/it]

GPU memory after batch 673: 14.97 GB
Batch 674 input shapes: input_ids=torch.Size([4, 90]), attention_mask=torch.Size([4, 90])


Predicting:  28%|██▊       | 675/2400 [13:49<48:39,  1.69s/it]

GPU memory after batch 674: 14.97 GB
Batch 675 input shapes: input_ids=torch.Size([4, 77]), attention_mask=torch.Size([4, 77])


Predicting:  28%|██▊       | 676/2400 [13:51<45:14,  1.57s/it]

GPU memory after batch 675: 14.97 GB
Batch 676 input shapes: input_ids=torch.Size([4, 98]), attention_mask=torch.Size([4, 98])


Predicting:  28%|██▊       | 677/2400 [13:52<44:41,  1.56s/it]

GPU memory after batch 676: 14.97 GB
Batch 677 input shapes: input_ids=torch.Size([4, 85]), attention_mask=torch.Size([4, 85])


Predicting:  28%|██▊       | 678/2400 [13:53<43:11,  1.50s/it]

GPU memory after batch 677: 14.97 GB
Batch 678 input shapes: input_ids=torch.Size([4, 128]), attention_mask=torch.Size([4, 128])


Predicting:  28%|██▊       | 679/2400 [13:55<44:37,  1.56s/it]

GPU memory after batch 678: 14.97 GB
Batch 679 input shapes: input_ids=torch.Size([4, 136]), attention_mask=torch.Size([4, 136])


Predicting:  28%|██▊       | 680/2400 [13:57<46:50,  1.63s/it]

GPU memory after batch 679: 14.97 GB
Batch 680 input shapes: input_ids=torch.Size([4, 77]), attention_mask=torch.Size([4, 77])


Predicting:  28%|██▊       | 681/2400 [13:58<43:56,  1.53s/it]

批次 680: 原始预测: ['Yes\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because']
GPU memory after batch 680: 14.97 GB
Batch 681 input shapes: input_ids=torch.Size([4, 106]), attention_mask=torch.Size([4, 106])


Predicting:  28%|██▊       | 682/2400 [14:00<44:24,  1.55s/it]

GPU memory after batch 681: 14.97 GB
Batch 682 input shapes: input_ids=torch.Size([4, 114]), attention_mask=torch.Size([4, 114])


Predicting:  28%|██▊       | 683/2400 [14:01<45:03,  1.57s/it]

GPU memory after batch 682: 14.97 GB
Batch 683 input shapes: input_ids=torch.Size([4, 147]), attention_mask=torch.Size([4, 147])


Predicting:  28%|██▊       | 684/2400 [14:03<47:45,  1.67s/it]

GPU memory after batch 683: 14.97 GB
Batch 684 input shapes: input_ids=torch.Size([4, 86]), attention_mask=torch.Size([4, 86])


Predicting:  29%|██▊       | 685/2400 [14:05<45:18,  1.58s/it]

GPU memory after batch 684: 14.97 GB
Batch 685 input shapes: input_ids=torch.Size([4, 76]), attention_mask=torch.Size([4, 76])


Predicting:  29%|██▊       | 686/2400 [14:06<42:50,  1.50s/it]

GPU memory after batch 685: 14.97 GB
Batch 686 input shapes: input_ids=torch.Size([4, 103]), attention_mask=torch.Size([4, 103])


Predicting:  29%|██▊       | 687/2400 [14:08<42:57,  1.50s/it]

GPU memory after batch 686: 14.97 GB
Batch 687 input shapes: input_ids=torch.Size([4, 83]), attention_mask=torch.Size([4, 83])


Predicting:  29%|██▊       | 688/2400 [14:09<41:55,  1.47s/it]

GPU memory after batch 687: 14.97 GB
Batch 688 input shapes: input_ids=torch.Size([4, 75]), attention_mask=torch.Size([4, 75])


Predicting:  29%|██▊       | 689/2400 [14:10<40:28,  1.42s/it]

GPU memory after batch 688: 14.97 GB
Batch 689 input shapes: input_ids=torch.Size([4, 75]), attention_mask=torch.Size([4, 75])


Predicting:  29%|██▉       | 690/2400 [14:12<39:25,  1.38s/it]

GPU memory after batch 689: 14.97 GB
Batch 690 input shapes: input_ids=torch.Size([4, 92]), attention_mask=torch.Size([4, 92])


Predicting:  29%|██▉       | 691/2400 [14:13<39:58,  1.40s/it]

批次 690: 原始预测: ['Yes\nYes, because', 'No\nYes\nYes', 'No\nAnswer: No', 'No\nAnswer: No']
GPU memory after batch 690: 14.97 GB
Batch 691 input shapes: input_ids=torch.Size([4, 75]), attention_mask=torch.Size([4, 75])


Predicting:  29%|██▉       | 692/2400 [14:14<39:05,  1.37s/it]

GPU memory after batch 691: 14.97 GB
Batch 692 input shapes: input_ids=torch.Size([4, 110]), attention_mask=torch.Size([4, 110])


Predicting:  29%|██▉       | 693/2400 [14:16<40:55,  1.44s/it]

GPU memory after batch 692: 14.97 GB
Batch 693 input shapes: input_ids=torch.Size([4, 125]), attention_mask=torch.Size([4, 125])


Predicting:  29%|██▉       | 694/2400 [14:18<42:50,  1.51s/it]

GPU memory after batch 693: 14.97 GB
Batch 694 input shapes: input_ids=torch.Size([4, 125]), attention_mask=torch.Size([4, 125])


Predicting:  29%|██▉       | 695/2400 [14:19<44:11,  1.56s/it]

GPU memory after batch 694: 14.97 GB
Batch 695 input shapes: input_ids=torch.Size([4, 126]), attention_mask=torch.Size([4, 126])


Predicting:  29%|██▉       | 696/2400 [14:21<45:10,  1.59s/it]

GPU memory after batch 695: 14.97 GB
Batch 696 input shapes: input_ids=torch.Size([4, 187]), attention_mask=torch.Size([4, 187])


Predicting:  29%|██▉       | 697/2400 [14:23<50:23,  1.78s/it]

GPU memory after batch 696: 14.97 GB
Batch 697 input shapes: input_ids=torch.Size([4, 71]), attention_mask=torch.Size([4, 71])


Predicting:  29%|██▉       | 698/2400 [14:24<45:45,  1.61s/it]

GPU memory after batch 697: 14.97 GB
Batch 698 input shapes: input_ids=torch.Size([4, 130]), attention_mask=torch.Size([4, 130])


Predicting:  29%|██▉       | 699/2400 [14:26<47:26,  1.67s/it]

GPU memory after batch 698: 14.97 GB
Batch 699 input shapes: input_ids=torch.Size([4, 93]), attention_mask=torch.Size([4, 93])


Predicting:  29%|██▉       | 700/2400 [14:28<45:33,  1.61s/it]

GPU memory after batch 699: 14.97 GB
Batch 700 input shapes: input_ids=torch.Size([4, 126]), attention_mask=torch.Size([4, 126])


Predicting:  29%|██▉       | 701/2400 [14:29<46:03,  1.63s/it]

批次 700: 原始预测: ['Yes\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because']
GPU memory after batch 700: 14.97 GB
Batch 701 input shapes: input_ids=torch.Size([4, 107]), attention_mask=torch.Size([4, 107])


Predicting:  29%|██▉       | 702/2400 [14:31<45:43,  1.62s/it]

GPU memory after batch 701: 14.97 GB
Batch 702 input shapes: input_ids=torch.Size([4, 142]), attention_mask=torch.Size([4, 142])


Predicting:  29%|██▉       | 703/2400 [14:33<47:48,  1.69s/it]

GPU memory after batch 702: 14.97 GB
Batch 703 input shapes: input_ids=torch.Size([4, 136]), attention_mask=torch.Size([4, 136])


Predicting:  29%|██▉       | 704/2400 [14:35<48:51,  1.73s/it]

GPU memory after batch 703: 14.97 GB
Batch 704 input shapes: input_ids=torch.Size([4, 83]), attention_mask=torch.Size([4, 83])


Predicting:  29%|██▉       | 705/2400 [14:36<45:56,  1.63s/it]

GPU memory after batch 704: 14.97 GB
Batch 705 input shapes: input_ids=torch.Size([4, 83]), attention_mask=torch.Size([4, 83])


Predicting:  29%|██▉       | 706/2400 [14:37<43:52,  1.55s/it]

GPU memory after batch 705: 14.97 GB
Batch 706 input shapes: input_ids=torch.Size([4, 78]), attention_mask=torch.Size([4, 78])


Predicting:  29%|██▉       | 707/2400 [14:39<41:43,  1.48s/it]

GPU memory after batch 706: 14.97 GB
Batch 707 input shapes: input_ids=torch.Size([4, 75]), attention_mask=torch.Size([4, 75])


Predicting:  30%|██▉       | 708/2400 [14:40<40:11,  1.43s/it]

GPU memory after batch 707: 14.97 GB
Batch 708 input shapes: input_ids=torch.Size([4, 120]), attention_mask=torch.Size([4, 120])


Predicting:  30%|██▉       | 709/2400 [14:42<41:55,  1.49s/it]

GPU memory after batch 708: 14.97 GB
Batch 709 input shapes: input_ids=torch.Size([4, 80]), attention_mask=torch.Size([4, 80])


Predicting:  30%|██▉       | 710/2400 [14:43<40:20,  1.43s/it]

GPU memory after batch 709: 14.97 GB
Batch 710 input shapes: input_ids=torch.Size([4, 93]), attention_mask=torch.Size([4, 93])


Predicting:  30%|██▉       | 711/2400 [14:44<40:27,  1.44s/it]

批次 710: 原始预测: ['Yes\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because']
GPU memory after batch 710: 14.97 GB
Batch 711 input shapes: input_ids=torch.Size([4, 142]), attention_mask=torch.Size([4, 142])


Predicting:  30%|██▉       | 712/2400 [14:46<44:02,  1.57s/it]

GPU memory after batch 711: 14.97 GB
Batch 712 input shapes: input_ids=torch.Size([4, 98]), attention_mask=torch.Size([4, 98])


Predicting:  30%|██▉       | 713/2400 [14:48<42:21,  1.51s/it]

GPU memory after batch 712: 14.97 GB
Batch 713 input shapes: input_ids=torch.Size([4, 82]), attention_mask=torch.Size([4, 82])


Predicting:  30%|██▉       | 714/2400 [14:49<41:18,  1.47s/it]

GPU memory after batch 713: 14.97 GB
Batch 714 input shapes: input_ids=torch.Size([4, 66]), attention_mask=torch.Size([4, 66])


Predicting:  30%|██▉       | 715/2400 [14:50<39:17,  1.40s/it]

GPU memory after batch 714: 14.97 GB
Batch 715 input shapes: input_ids=torch.Size([4, 127]), attention_mask=torch.Size([4, 127])


Predicting:  30%|██▉       | 716/2400 [14:52<41:33,  1.48s/it]

GPU memory after batch 715: 14.97 GB
Batch 716 input shapes: input_ids=torch.Size([4, 98]), attention_mask=torch.Size([4, 98])


Predicting:  30%|██▉       | 717/2400 [14:53<38:08,  1.36s/it]

GPU memory after batch 716: 14.97 GB
Batch 717 input shapes: input_ids=torch.Size([4, 92]), attention_mask=torch.Size([4, 92])


Predicting:  30%|██▉       | 718/2400 [14:54<38:53,  1.39s/it]

GPU memory after batch 717: 14.97 GB
Batch 718 input shapes: input_ids=torch.Size([4, 130]), attention_mask=torch.Size([4, 130])


Predicting:  30%|██▉       | 719/2400 [14:56<42:26,  1.52s/it]

GPU memory after batch 718: 14.97 GB
Batch 719 input shapes: input_ids=torch.Size([4, 93]), attention_mask=torch.Size([4, 93])


Predicting:  30%|███       | 720/2400 [14:58<41:52,  1.50s/it]

GPU memory after batch 719: 14.97 GB
Batch 720 input shapes: input_ids=torch.Size([4, 173]), attention_mask=torch.Size([4, 173])


Predicting:  30%|███       | 721/2400 [15:00<47:13,  1.69s/it]

批次 720: 原始预测: ['Yes\nQuestion: Is', 'Yes\nQuestion: Is', 'Yes\nYes, because', 'No\nYes, because']
GPU memory after batch 720: 14.97 GB
Batch 721 input shapes: input_ids=torch.Size([4, 128]), attention_mask=torch.Size([4, 128])


Predicting:  30%|███       | 722/2400 [15:01<47:05,  1.68s/it]

GPU memory after batch 721: 14.97 GB
Batch 722 input shapes: input_ids=torch.Size([4, 93]), attention_mask=torch.Size([4, 93])


Predicting:  30%|███       | 723/2400 [15:03<45:07,  1.61s/it]

GPU memory after batch 722: 14.97 GB
Batch 723 input shapes: input_ids=torch.Size([4, 91]), attention_mask=torch.Size([4, 91])


Predicting:  30%|███       | 724/2400 [15:04<43:43,  1.57s/it]

GPU memory after batch 723: 14.97 GB
Batch 724 input shapes: input_ids=torch.Size([4, 85]), attention_mask=torch.Size([4, 85])


Predicting:  30%|███       | 725/2400 [15:06<42:13,  1.51s/it]

GPU memory after batch 724: 14.97 GB
Batch 725 input shapes: input_ids=torch.Size([4, 116]), attention_mask=torch.Size([4, 116])


Predicting:  30%|███       | 726/2400 [15:07<43:11,  1.55s/it]

GPU memory after batch 725: 14.97 GB
Batch 726 input shapes: input_ids=torch.Size([4, 91]), attention_mask=torch.Size([4, 91])


Predicting:  30%|███       | 727/2400 [15:09<42:21,  1.52s/it]

GPU memory after batch 726: 14.97 GB
Batch 727 input shapes: input_ids=torch.Size([4, 117]), attention_mask=torch.Size([4, 117])


Predicting:  30%|███       | 728/2400 [15:10<39:36,  1.42s/it]

GPU memory after batch 727: 14.97 GB
Batch 728 input shapes: input_ids=torch.Size([4, 76]), attention_mask=torch.Size([4, 76])


Predicting:  30%|███       | 729/2400 [15:11<38:34,  1.39s/it]

GPU memory after batch 728: 14.97 GB
Batch 729 input shapes: input_ids=torch.Size([4, 95]), attention_mask=torch.Size([4, 95])


Predicting:  30%|███       | 730/2400 [15:13<39:08,  1.41s/it]

GPU memory after batch 729: 14.97 GB
Batch 730 input shapes: input_ids=torch.Size([4, 145]), attention_mask=torch.Size([4, 145])


Predicting:  30%|███       | 731/2400 [15:15<43:14,  1.55s/it]

批次 730: 原始预测: ['Yes\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because']
GPU memory after batch 730: 14.97 GB
Batch 731 input shapes: input_ids=torch.Size([4, 86]), attention_mask=torch.Size([4, 86])


Predicting:  30%|███       | 732/2400 [15:16<41:48,  1.50s/it]

GPU memory after batch 731: 14.97 GB
Batch 732 input shapes: input_ids=torch.Size([4, 116]), attention_mask=torch.Size([4, 116])


Predicting:  31%|███       | 733/2400 [15:18<42:48,  1.54s/it]

GPU memory after batch 732: 14.97 GB
Batch 733 input shapes: input_ids=torch.Size([4, 97]), attention_mask=torch.Size([4, 97])


Predicting:  31%|███       | 734/2400 [15:19<42:33,  1.53s/it]

GPU memory after batch 733: 14.97 GB
Batch 734 input shapes: input_ids=torch.Size([4, 100]), attention_mask=torch.Size([4, 100])


Predicting:  31%|███       | 735/2400 [15:21<42:22,  1.53s/it]

GPU memory after batch 734: 14.97 GB
Batch 735 input shapes: input_ids=torch.Size([4, 73]), attention_mask=torch.Size([4, 73])


Predicting:  31%|███       | 736/2400 [15:22<40:28,  1.46s/it]

GPU memory after batch 735: 14.97 GB
Batch 736 input shapes: input_ids=torch.Size([4, 84]), attention_mask=torch.Size([4, 84])


Predicting:  31%|███       | 737/2400 [15:23<39:50,  1.44s/it]

GPU memory after batch 736: 14.97 GB
Batch 737 input shapes: input_ids=torch.Size([4, 114]), attention_mask=torch.Size([4, 114])


Predicting:  31%|███       | 738/2400 [15:25<41:25,  1.50s/it]

GPU memory after batch 737: 14.97 GB
Batch 738 input shapes: input_ids=torch.Size([4, 61]), attention_mask=torch.Size([4, 61])


Predicting:  31%|███       | 739/2400 [15:26<38:44,  1.40s/it]

GPU memory after batch 738: 14.97 GB
Batch 739 input shapes: input_ids=torch.Size([4, 111]), attention_mask=torch.Size([4, 111])


Predicting:  31%|███       | 740/2400 [15:28<40:20,  1.46s/it]

GPU memory after batch 739: 14.97 GB
Batch 740 input shapes: input_ids=torch.Size([4, 115]), attention_mask=torch.Size([4, 115])


Predicting:  31%|███       | 741/2400 [15:29<41:44,  1.51s/it]

批次 740: 原始预测: ['Yes', 'Yes', 'Yes\nYes, because', 'Yes\nYes, because']
GPU memory after batch 740: 14.97 GB
Batch 741 input shapes: input_ids=torch.Size([4, 72]), attention_mask=torch.Size([4, 72])


Predicting:  31%|███       | 742/2400 [15:31<39:25,  1.43s/it]

GPU memory after batch 741: 14.97 GB
Batch 742 input shapes: input_ids=torch.Size([4, 66]), attention_mask=torch.Size([4, 66])


Predicting:  31%|███       | 743/2400 [15:32<37:47,  1.37s/it]

GPU memory after batch 742: 14.97 GB
Batch 743 input shapes: input_ids=torch.Size([4, 82]), attention_mask=torch.Size([4, 82])


Predicting:  31%|███       | 744/2400 [15:33<37:54,  1.37s/it]

GPU memory after batch 743: 14.97 GB
Batch 744 input shapes: input_ids=torch.Size([4, 190]), attention_mask=torch.Size([4, 190])


Predicting:  31%|███       | 745/2400 [15:35<44:47,  1.62s/it]

GPU memory after batch 744: 14.97 GB
Batch 745 input shapes: input_ids=torch.Size([4, 84]), attention_mask=torch.Size([4, 84])


Predicting:  31%|███       | 746/2400 [15:37<42:48,  1.55s/it]

GPU memory after batch 745: 14.97 GB
Batch 746 input shapes: input_ids=torch.Size([4, 67]), attention_mask=torch.Size([4, 67])


Predicting:  31%|███       | 747/2400 [15:38<40:08,  1.46s/it]

GPU memory after batch 746: 14.97 GB
Batch 747 input shapes: input_ids=torch.Size([4, 87]), attention_mask=torch.Size([4, 87])


Predicting:  31%|███       | 748/2400 [15:39<39:32,  1.44s/it]

GPU memory after batch 747: 14.97 GB
Batch 748 input shapes: input_ids=torch.Size([4, 66]), attention_mask=torch.Size([4, 66])


Predicting:  31%|███       | 749/2400 [15:41<37:50,  1.38s/it]

GPU memory after batch 748: 14.97 GB
Batch 749 input shapes: input_ids=torch.Size([4, 77]), attention_mask=torch.Size([4, 77])


Predicting:  31%|███▏      | 750/2400 [15:42<37:13,  1.35s/it]

GPU memory after batch 749: 14.97 GB
Batch 750 input shapes: input_ids=torch.Size([4, 124]), attention_mask=torch.Size([4, 124])


Predicting:  31%|███▏      | 751/2400 [15:44<39:48,  1.45s/it]

批次 750: 原始预测: ['Yes', 'Yes', 'Yes\nYes, because', 'Yes\nYes, because']
GPU memory after batch 750: 14.97 GB
Batch 751 input shapes: input_ids=torch.Size([4, 98]), attention_mask=torch.Size([4, 98])


Predicting:  31%|███▏      | 752/2400 [15:45<40:19,  1.47s/it]

GPU memory after batch 751: 14.97 GB
Batch 752 input shapes: input_ids=torch.Size([4, 102]), attention_mask=torch.Size([4, 102])


Predicting:  31%|███▏      | 753/2400 [15:47<40:42,  1.48s/it]

GPU memory after batch 752: 14.97 GB
Batch 753 input shapes: input_ids=torch.Size([4, 90]), attention_mask=torch.Size([4, 90])


Predicting:  31%|███▏      | 754/2400 [15:48<40:25,  1.47s/it]

GPU memory after batch 753: 14.97 GB
Batch 754 input shapes: input_ids=torch.Size([4, 99]), attention_mask=torch.Size([4, 99])


Predicting:  31%|███▏      | 755/2400 [15:50<40:43,  1.49s/it]

GPU memory after batch 754: 14.97 GB
Batch 755 input shapes: input_ids=torch.Size([4, 76]), attention_mask=torch.Size([4, 76])


Predicting:  32%|███▏      | 756/2400 [15:51<39:10,  1.43s/it]

GPU memory after batch 755: 14.97 GB
Batch 756 input shapes: input_ids=torch.Size([4, 76]), attention_mask=torch.Size([4, 76])


Predicting:  32%|███▏      | 757/2400 [15:52<38:05,  1.39s/it]

GPU memory after batch 756: 14.97 GB
Batch 757 input shapes: input_ids=torch.Size([4, 81]), attention_mask=torch.Size([4, 81])


Predicting:  32%|███▏      | 758/2400 [15:54<38:02,  1.39s/it]

GPU memory after batch 757: 14.97 GB
Batch 758 input shapes: input_ids=torch.Size([4, 92]), attention_mask=torch.Size([4, 92])


Predicting:  32%|███▏      | 759/2400 [15:55<38:30,  1.41s/it]

GPU memory after batch 758: 14.97 GB
Batch 759 input shapes: input_ids=torch.Size([4, 79]), attention_mask=torch.Size([4, 79])


Predicting:  32%|███▏      | 760/2400 [15:56<37:38,  1.38s/it]

GPU memory after batch 759: 14.97 GB
Batch 760 input shapes: input_ids=torch.Size([4, 79]), attention_mask=torch.Size([4, 79])


Predicting:  32%|███▏      | 761/2400 [15:58<37:00,  1.36s/it]

批次 760: 原始预测: ['Yes\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because']
GPU memory after batch 760: 14.97 GB
Batch 761 input shapes: input_ids=torch.Size([4, 80]), attention_mask=torch.Size([4, 80])


Predicting:  32%|███▏      | 762/2400 [15:59<36:33,  1.34s/it]

GPU memory after batch 761: 14.97 GB
Batch 762 input shapes: input_ids=torch.Size([4, 84]), attention_mask=torch.Size([4, 84])


Predicting:  32%|███▏      | 763/2400 [16:00<36:56,  1.35s/it]

GPU memory after batch 762: 14.97 GB
Batch 763 input shapes: input_ids=torch.Size([4, 88]), attention_mask=torch.Size([4, 88])


Predicting:  32%|███▏      | 764/2400 [16:02<37:12,  1.36s/it]

GPU memory after batch 763: 14.97 GB
Batch 764 input shapes: input_ids=torch.Size([4, 93]), attention_mask=torch.Size([4, 93])


Predicting:  32%|███▏      | 765/2400 [16:03<37:53,  1.39s/it]

GPU memory after batch 764: 14.97 GB
Batch 765 input shapes: input_ids=torch.Size([4, 110]), attention_mask=torch.Size([4, 110])


Predicting:  32%|███▏      | 766/2400 [16:05<39:30,  1.45s/it]

GPU memory after batch 765: 14.97 GB
Batch 766 input shapes: input_ids=torch.Size([4, 96]), attention_mask=torch.Size([4, 96])


Predicting:  32%|███▏      | 767/2400 [16:06<39:30,  1.45s/it]

GPU memory after batch 766: 14.97 GB
Batch 767 input shapes: input_ids=torch.Size([4, 117]), attention_mask=torch.Size([4, 117])


Predicting:  32%|███▏      | 768/2400 [16:08<40:56,  1.51s/it]

GPU memory after batch 767: 14.97 GB
Batch 768 input shapes: input_ids=torch.Size([4, 278]), attention_mask=torch.Size([4, 278])


Predicting:  32%|███▏      | 769/2400 [16:11<52:54,  1.95s/it]

GPU memory after batch 768: 14.97 GB
Batch 769 input shapes: input_ids=torch.Size([4, 182]), attention_mask=torch.Size([4, 182])


Predicting:  32%|███▏      | 770/2400 [16:13<54:41,  2.01s/it]

GPU memory after batch 769: 14.97 GB
Batch 770 input shapes: input_ids=torch.Size([4, 87]), attention_mask=torch.Size([4, 87])


Predicting:  32%|███▏      | 771/2400 [16:14<49:34,  1.83s/it]

批次 770: 原始预测: ['Yes\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because']
GPU memory after batch 770: 14.97 GB
Batch 771 input shapes: input_ids=torch.Size([4, 67]), attention_mask=torch.Size([4, 67])


Predicting:  32%|███▏      | 772/2400 [16:16<44:43,  1.65s/it]

GPU memory after batch 771: 14.97 GB
Batch 772 input shapes: input_ids=torch.Size([4, 76]), attention_mask=torch.Size([4, 76])


Predicting:  32%|███▏      | 773/2400 [16:17<38:20,  1.41s/it]

GPU memory after batch 772: 14.97 GB
Batch 773 input shapes: input_ids=torch.Size([4, 118]), attention_mask=torch.Size([4, 118])


Predicting:  32%|███▏      | 774/2400 [16:18<36:31,  1.35s/it]

GPU memory after batch 773: 14.97 GB
Batch 774 input shapes: input_ids=torch.Size([4, 84]), attention_mask=torch.Size([4, 84])


Predicting:  32%|███▏      | 775/2400 [16:19<36:48,  1.36s/it]

GPU memory after batch 774: 14.97 GB
Batch 775 input shapes: input_ids=torch.Size([4, 71]), attention_mask=torch.Size([4, 71])


Predicting:  32%|███▏      | 776/2400 [16:20<35:47,  1.32s/it]

GPU memory after batch 775: 14.97 GB
Batch 776 input shapes: input_ids=torch.Size([4, 62]), attention_mask=torch.Size([4, 62])


Predicting:  32%|███▏      | 777/2400 [16:22<34:34,  1.28s/it]

GPU memory after batch 776: 14.97 GB
Batch 777 input shapes: input_ids=torch.Size([4, 96]), attention_mask=torch.Size([4, 96])


Predicting:  32%|███▏      | 778/2400 [16:23<32:25,  1.20s/it]

GPU memory after batch 777: 14.97 GB
Batch 778 input shapes: input_ids=torch.Size([4, 106]), attention_mask=torch.Size([4, 106])


Predicting:  32%|███▏      | 779/2400 [16:24<35:35,  1.32s/it]

GPU memory after batch 778: 14.97 GB
Batch 779 input shapes: input_ids=torch.Size([4, 91]), attention_mask=torch.Size([4, 91])


Predicting:  32%|███▎      | 780/2400 [16:26<36:39,  1.36s/it]

GPU memory after batch 779: 14.97 GB
Batch 780 input shapes: input_ids=torch.Size([4, 116]), attention_mask=torch.Size([4, 116])


Predicting:  33%|███▎      | 781/2400 [16:27<38:50,  1.44s/it]

批次 780: 原始预测: ['Yes\nYes, because', 'Yes\nYes, because', 'Yes', 'Yes\nYes, because']
GPU memory after batch 780: 14.97 GB
Batch 781 input shapes: input_ids=torch.Size([4, 65]), attention_mask=torch.Size([4, 65])


Predicting:  33%|███▎      | 782/2400 [16:28<37:07,  1.38s/it]

GPU memory after batch 781: 14.97 GB
Batch 782 input shapes: input_ids=torch.Size([4, 92]), attention_mask=torch.Size([4, 92])


Predicting:  33%|███▎      | 783/2400 [16:30<37:41,  1.40s/it]

GPU memory after batch 782: 14.97 GB
Batch 783 input shapes: input_ids=torch.Size([4, 123]), attention_mask=torch.Size([4, 123])


Predicting:  33%|███▎      | 784/2400 [16:32<39:51,  1.48s/it]

GPU memory after batch 783: 14.97 GB
Batch 784 input shapes: input_ids=torch.Size([4, 95]), attention_mask=torch.Size([4, 95])


Predicting:  33%|███▎      | 785/2400 [16:33<39:35,  1.47s/it]

GPU memory after batch 784: 14.97 GB
Batch 785 input shapes: input_ids=torch.Size([4, 83]), attention_mask=torch.Size([4, 83])


Predicting:  33%|███▎      | 786/2400 [16:34<38:54,  1.45s/it]

GPU memory after batch 785: 14.97 GB
Batch 786 input shapes: input_ids=torch.Size([4, 123]), attention_mask=torch.Size([4, 123])


Predicting:  33%|███▎      | 787/2400 [16:36<40:41,  1.51s/it]

GPU memory after batch 786: 14.97 GB
Batch 787 input shapes: input_ids=torch.Size([4, 98]), attention_mask=torch.Size([4, 98])


Predicting:  33%|███▎      | 788/2400 [16:38<40:40,  1.51s/it]

GPU memory after batch 787: 14.97 GB
Batch 788 input shapes: input_ids=torch.Size([4, 86]), attention_mask=torch.Size([4, 86])


Predicting:  33%|███▎      | 789/2400 [16:39<39:37,  1.48s/it]

GPU memory after batch 788: 14.97 GB
Batch 789 input shapes: input_ids=torch.Size([4, 84]), attention_mask=torch.Size([4, 84])


Predicting:  33%|███▎      | 790/2400 [16:40<38:53,  1.45s/it]

GPU memory after batch 789: 14.97 GB
Batch 790 input shapes: input_ids=torch.Size([4, 77]), attention_mask=torch.Size([4, 77])


Predicting:  33%|███▎      | 791/2400 [16:42<37:39,  1.40s/it]

批次 790: 原始预测: ['Yes\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because']
GPU memory after batch 790: 14.97 GB
Batch 791 input shapes: input_ids=torch.Size([4, 86]), attention_mask=torch.Size([4, 86])


Predicting:  33%|███▎      | 792/2400 [16:43<37:31,  1.40s/it]

GPU memory after batch 791: 14.97 GB
Batch 792 input shapes: input_ids=torch.Size([4, 249]), attention_mask=torch.Size([4, 249])


Predicting:  33%|███▎      | 793/2400 [16:46<48:20,  1.81s/it]

GPU memory after batch 792: 14.97 GB
Batch 793 input shapes: input_ids=torch.Size([4, 95]), attention_mask=torch.Size([4, 95])


Predicting:  33%|███▎      | 794/2400 [16:47<45:28,  1.70s/it]

GPU memory after batch 793: 14.97 GB
Batch 794 input shapes: input_ids=torch.Size([4, 96]), attention_mask=torch.Size([4, 96])


Predicting:  33%|███▎      | 795/2400 [16:49<43:28,  1.63s/it]

GPU memory after batch 794: 14.97 GB
Batch 795 input shapes: input_ids=torch.Size([4, 98]), attention_mask=torch.Size([4, 98])


Predicting:  33%|███▎      | 796/2400 [16:50<42:32,  1.59s/it]

GPU memory after batch 795: 14.97 GB
Batch 796 input shapes: input_ids=torch.Size([4, 89]), attention_mask=torch.Size([4, 89])


Predicting:  33%|███▎      | 797/2400 [16:52<41:21,  1.55s/it]

GPU memory after batch 796: 14.97 GB
Batch 797 input shapes: input_ids=torch.Size([4, 114]), attention_mask=torch.Size([4, 114])


Predicting:  33%|███▎      | 798/2400 [16:53<41:59,  1.57s/it]

GPU memory after batch 797: 14.97 GB
Batch 798 input shapes: input_ids=torch.Size([4, 114]), attention_mask=torch.Size([4, 114])


Predicting:  33%|███▎      | 799/2400 [16:55<42:24,  1.59s/it]

GPU memory after batch 798: 14.97 GB
Batch 799 input shapes: input_ids=torch.Size([4, 149]), attention_mask=torch.Size([4, 149])


Predicting:  33%|███▎      | 800/2400 [16:57<44:51,  1.68s/it]

GPU memory after batch 799: 14.97 GB
Batch 800 input shapes: input_ids=torch.Size([4, 88]), attention_mask=torch.Size([4, 88])


Predicting:  33%|███▎      | 801/2400 [16:58<42:30,  1.60s/it]

批次 800: 原始预测: ['Yes\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because']
GPU memory after batch 800: 14.97 GB
Batch 801 input shapes: input_ids=torch.Size([4, 89]), attention_mask=torch.Size([4, 89])


Predicting:  33%|███▎      | 802/2400 [17:00<41:18,  1.55s/it]

GPU memory after batch 801: 14.97 GB
Batch 802 input shapes: input_ids=torch.Size([4, 131]), attention_mask=torch.Size([4, 131])


Predicting:  33%|███▎      | 803/2400 [17:01<39:51,  1.50s/it]

GPU memory after batch 802: 14.97 GB
Batch 803 input shapes: input_ids=torch.Size([4, 124]), attention_mask=torch.Size([4, 124])


Predicting:  34%|███▎      | 804/2400 [17:03<41:12,  1.55s/it]

GPU memory after batch 803: 14.97 GB
Batch 804 input shapes: input_ids=torch.Size([4, 64]), attention_mask=torch.Size([4, 64])


Predicting:  34%|███▎      | 805/2400 [17:04<38:12,  1.44s/it]

GPU memory after batch 804: 14.97 GB
Batch 805 input shapes: input_ids=torch.Size([4, 107]), attention_mask=torch.Size([4, 107])


Predicting:  34%|███▎      | 806/2400 [17:06<39:24,  1.48s/it]

GPU memory after batch 805: 14.97 GB
Batch 806 input shapes: input_ids=torch.Size([4, 119]), attention_mask=torch.Size([4, 119])


Predicting:  34%|███▎      | 807/2400 [17:07<40:34,  1.53s/it]

GPU memory after batch 806: 14.97 GB
Batch 807 input shapes: input_ids=torch.Size([4, 197]), attention_mask=torch.Size([4, 197])


Predicting:  34%|███▎      | 808/2400 [17:09<47:09,  1.78s/it]

GPU memory after batch 807: 14.97 GB
Batch 808 input shapes: input_ids=torch.Size([4, 83]), attention_mask=torch.Size([4, 83])


Predicting:  34%|███▎      | 809/2400 [17:11<44:01,  1.66s/it]

GPU memory after batch 808: 14.97 GB
Batch 809 input shapes: input_ids=torch.Size([4, 95]), attention_mask=torch.Size([4, 95])


Predicting:  34%|███▍      | 810/2400 [17:12<42:19,  1.60s/it]

GPU memory after batch 809: 14.97 GB
Batch 810 input shapes: input_ids=torch.Size([4, 76]), attention_mask=torch.Size([4, 76])


Predicting:  34%|███▍      | 811/2400 [17:14<39:57,  1.51s/it]

批次 810: 原始预测: ['No\nYes, because', 'No\nYes, because', 'No\nYes, because', 'No\n\nYes, because']
GPU memory after batch 810: 14.97 GB
Batch 811 input shapes: input_ids=torch.Size([4, 102]), attention_mask=torch.Size([4, 102])


Predicting:  34%|███▍      | 812/2400 [17:15<39:58,  1.51s/it]

GPU memory after batch 811: 14.97 GB
Batch 812 input shapes: input_ids=torch.Size([4, 113]), attention_mask=torch.Size([4, 113])


Predicting:  34%|███▍      | 813/2400 [17:17<40:52,  1.55s/it]

GPU memory after batch 812: 14.97 GB
Batch 813 input shapes: input_ids=torch.Size([4, 72]), attention_mask=torch.Size([4, 72])


Predicting:  34%|███▍      | 814/2400 [17:18<38:25,  1.45s/it]

GPU memory after batch 813: 14.97 GB
Batch 814 input shapes: input_ids=torch.Size([4, 108]), attention_mask=torch.Size([4, 108])


Predicting:  34%|███▍      | 815/2400 [17:20<39:30,  1.50s/it]

GPU memory after batch 814: 14.97 GB
Batch 815 input shapes: input_ids=torch.Size([4, 98]), attention_mask=torch.Size([4, 98])


Predicting:  34%|███▍      | 816/2400 [17:21<39:37,  1.50s/it]

GPU memory after batch 815: 14.97 GB
Batch 816 input shapes: input_ids=torch.Size([4, 188]), attention_mask=torch.Size([4, 188])


Predicting:  34%|███▍      | 817/2400 [17:23<45:11,  1.71s/it]

GPU memory after batch 816: 14.97 GB
Batch 817 input shapes: input_ids=torch.Size([4, 81]), attention_mask=torch.Size([4, 81])


Predicting:  34%|███▍      | 818/2400 [17:25<42:34,  1.61s/it]

GPU memory after batch 817: 14.97 GB
Batch 818 input shapes: input_ids=torch.Size([4, 79]), attention_mask=torch.Size([4, 79])


Predicting:  34%|███▍      | 819/2400 [17:26<40:05,  1.52s/it]

GPU memory after batch 818: 14.97 GB
Batch 819 input shapes: input_ids=torch.Size([4, 77]), attention_mask=torch.Size([4, 77])


Predicting:  34%|███▍      | 820/2400 [17:27<38:20,  1.46s/it]

GPU memory after batch 819: 14.97 GB
Batch 820 input shapes: input_ids=torch.Size([4, 133]), attention_mask=torch.Size([4, 133])


Predicting:  34%|███▍      | 821/2400 [17:29<41:08,  1.56s/it]

批次 820: 原始预测: ['Yes', 'Yes', 'Yes\nYes, because', 'Yes\nYes, because']
GPU memory after batch 820: 14.97 GB
Batch 821 input shapes: input_ids=torch.Size([4, 98]), attention_mask=torch.Size([4, 98])


Predicting:  34%|███▍      | 822/2400 [17:31<40:43,  1.55s/it]

GPU memory after batch 821: 14.97 GB
Batch 822 input shapes: input_ids=torch.Size([4, 97]), attention_mask=torch.Size([4, 97])


Predicting:  34%|███▍      | 823/2400 [17:32<40:25,  1.54s/it]

GPU memory after batch 822: 14.97 GB
Batch 823 input shapes: input_ids=torch.Size([4, 102]), attention_mask=torch.Size([4, 102])


Predicting:  34%|███▍      | 824/2400 [17:34<40:12,  1.53s/it]

GPU memory after batch 823: 14.97 GB
Batch 824 input shapes: input_ids=torch.Size([4, 85]), attention_mask=torch.Size([4, 85])


Predicting:  34%|███▍      | 825/2400 [17:35<39:03,  1.49s/it]

GPU memory after batch 824: 14.97 GB
Batch 825 input shapes: input_ids=torch.Size([4, 86]), attention_mask=torch.Size([4, 86])


Predicting:  34%|███▍      | 826/2400 [17:36<38:14,  1.46s/it]

GPU memory after batch 825: 14.97 GB
Batch 826 input shapes: input_ids=torch.Size([4, 94]), attention_mask=torch.Size([4, 94])


Predicting:  34%|███▍      | 827/2400 [17:38<38:10,  1.46s/it]

GPU memory after batch 826: 14.97 GB
Batch 827 input shapes: input_ids=torch.Size([4, 94]), attention_mask=torch.Size([4, 94])


Predicting:  34%|███▍      | 828/2400 [17:39<38:06,  1.45s/it]

GPU memory after batch 827: 14.97 GB
Batch 828 input shapes: input_ids=torch.Size([4, 139]), attention_mask=torch.Size([4, 139])


Predicting:  35%|███▍      | 829/2400 [17:41<41:18,  1.58s/it]

GPU memory after batch 828: 14.97 GB
Batch 829 input shapes: input_ids=torch.Size([4, 74]), attention_mask=torch.Size([4, 74])


Predicting:  35%|███▍      | 830/2400 [17:43<39:06,  1.49s/it]

GPU memory after batch 829: 14.97 GB
Batch 830 input shapes: input_ids=torch.Size([4, 71]), attention_mask=torch.Size([4, 71])


Predicting:  35%|███▍      | 831/2400 [17:44<37:01,  1.42s/it]

批次 830: 原始预测: ['No\n\nStep-by-step', 'No\n\nStep-by-step', 'No\n\nYes', 'No\n\nYes']
GPU memory after batch 830: 14.97 GB
Batch 831 input shapes: input_ids=torch.Size([4, 84]), attention_mask=torch.Size([4, 84])


Predicting:  35%|███▍      | 832/2400 [17:45<36:46,  1.41s/it]

GPU memory after batch 831: 14.97 GB
Batch 832 input shapes: input_ids=torch.Size([4, 77]), attention_mask=torch.Size([4, 77])


Predicting:  35%|███▍      | 833/2400 [17:46<35:55,  1.38s/it]

GPU memory after batch 832: 14.97 GB
Batch 833 input shapes: input_ids=torch.Size([4, 86]), attention_mask=torch.Size([4, 86])


Predicting:  35%|███▍      | 834/2400 [17:48<35:59,  1.38s/it]

GPU memory after batch 833: 14.97 GB
Batch 834 input shapes: input_ids=torch.Size([4, 102]), attention_mask=torch.Size([4, 102])


Predicting:  35%|███▍      | 835/2400 [17:49<37:02,  1.42s/it]

GPU memory after batch 834: 14.97 GB
Batch 835 input shapes: input_ids=torch.Size([4, 122]), attention_mask=torch.Size([4, 122])


Predicting:  35%|███▍      | 836/2400 [17:51<38:58,  1.50s/it]

GPU memory after batch 835: 14.97 GB
Batch 836 input shapes: input_ids=torch.Size([4, 129]), attention_mask=torch.Size([4, 129])


Predicting:  35%|███▍      | 837/2400 [17:53<41:26,  1.59s/it]

GPU memory after batch 836: 14.97 GB
Batch 837 input shapes: input_ids=torch.Size([4, 85]), attention_mask=torch.Size([4, 85])


Predicting:  35%|███▍      | 838/2400 [17:54<39:49,  1.53s/it]

GPU memory after batch 837: 14.97 GB
Batch 838 input shapes: input_ids=torch.Size([4, 92]), attention_mask=torch.Size([4, 92])


Predicting:  35%|███▍      | 839/2400 [17:56<39:11,  1.51s/it]

GPU memory after batch 838: 14.97 GB
Batch 839 input shapes: input_ids=torch.Size([4, 109]), attention_mask=torch.Size([4, 109])


Predicting:  35%|███▌      | 840/2400 [17:57<39:51,  1.53s/it]

GPU memory after batch 839: 14.97 GB
Batch 840 input shapes: input_ids=torch.Size([4, 221]), attention_mask=torch.Size([4, 221])


Predicting:  35%|███▌      | 841/2400 [18:00<47:07,  1.81s/it]

批次 840: 原始预测: ['Yes\nQuestion: Is', 'Yes\nQuestion: Is', 'Yes', 'No']
GPU memory after batch 840: 14.97 GB
Batch 841 input shapes: input_ids=torch.Size([4, 109]), attention_mask=torch.Size([4, 109])


Predicting:  35%|███▌      | 842/2400 [18:01<41:57,  1.62s/it]

GPU memory after batch 841: 14.97 GB
Batch 842 input shapes: input_ids=torch.Size([4, 74]), attention_mask=torch.Size([4, 74])


Predicting:  35%|███▌      | 843/2400 [18:02<39:28,  1.52s/it]

GPU memory after batch 842: 14.97 GB
Batch 843 input shapes: input_ids=torch.Size([4, 97]), attention_mask=torch.Size([4, 97])


Predicting:  35%|███▌      | 844/2400 [18:04<39:22,  1.52s/it]

GPU memory after batch 843: 14.97 GB
Batch 844 input shapes: input_ids=torch.Size([4, 98]), attention_mask=torch.Size([4, 98])


Predicting:  35%|███▌      | 845/2400 [18:05<39:18,  1.52s/it]

GPU memory after batch 844: 14.97 GB
Batch 845 input shapes: input_ids=torch.Size([4, 98]), attention_mask=torch.Size([4, 98])


Predicting:  35%|███▌      | 846/2400 [18:07<39:16,  1.52s/it]

GPU memory after batch 845: 14.97 GB
Batch 846 input shapes: input_ids=torch.Size([4, 90]), attention_mask=torch.Size([4, 90])


Predicting:  35%|███▌      | 847/2400 [18:08<38:43,  1.50s/it]

GPU memory after batch 846: 14.97 GB
Batch 847 input shapes: input_ids=torch.Size([4, 94]), attention_mask=torch.Size([4, 94])


Predicting:  35%|███▌      | 848/2400 [18:10<38:21,  1.48s/it]

GPU memory after batch 847: 14.97 GB
Batch 848 input shapes: input_ids=torch.Size([4, 79]), attention_mask=torch.Size([4, 79])


Predicting:  35%|███▌      | 849/2400 [18:11<36:55,  1.43s/it]

GPU memory after batch 848: 14.97 GB
Batch 849 input shapes: input_ids=torch.Size([4, 95]), attention_mask=torch.Size([4, 95])


Predicting:  35%|███▌      | 850/2400 [18:12<37:05,  1.44s/it]

GPU memory after batch 849: 14.97 GB
Batch 850 input shapes: input_ids=torch.Size([4, 93]), attention_mask=torch.Size([4, 93])


Predicting:  35%|███▌      | 851/2400 [18:14<37:11,  1.44s/it]

批次 850: 原始预测: ['Yes\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because']
GPU memory after batch 850: 14.97 GB
Batch 851 input shapes: input_ids=torch.Size([4, 110]), attention_mask=torch.Size([4, 110])


Predicting:  36%|███▌      | 852/2400 [18:15<38:19,  1.49s/it]

GPU memory after batch 851: 14.97 GB
Batch 852 input shapes: input_ids=torch.Size([4, 113]), attention_mask=torch.Size([4, 113])


Predicting:  36%|███▌      | 853/2400 [18:17<39:24,  1.53s/it]

GPU memory after batch 852: 14.97 GB
Batch 853 input shapes: input_ids=torch.Size([4, 75]), attention_mask=torch.Size([4, 75])


Predicting:  36%|███▌      | 854/2400 [18:18<37:37,  1.46s/it]

GPU memory after batch 853: 14.97 GB
Batch 854 input shapes: input_ids=torch.Size([4, 105]), attention_mask=torch.Size([4, 105])


Predicting:  36%|███▌      | 855/2400 [18:20<38:35,  1.50s/it]

GPU memory after batch 854: 14.97 GB
Batch 855 input shapes: input_ids=torch.Size([4, 105]), attention_mask=torch.Size([4, 105])


Predicting:  36%|███▌      | 856/2400 [18:22<39:16,  1.53s/it]

GPU memory after batch 855: 14.97 GB
Batch 856 input shapes: input_ids=torch.Size([4, 84]), attention_mask=torch.Size([4, 84])


Predicting:  36%|███▌      | 857/2400 [18:23<38:10,  1.48s/it]

GPU memory after batch 856: 14.97 GB
Batch 857 input shapes: input_ids=torch.Size([4, 114]), attention_mask=torch.Size([4, 114])


Predicting:  36%|███▌      | 858/2400 [18:25<39:15,  1.53s/it]

GPU memory after batch 857: 14.97 GB
Batch 858 input shapes: input_ids=torch.Size([4, 118]), attention_mask=torch.Size([4, 118])


Predicting:  36%|███▌      | 859/2400 [18:26<40:01,  1.56s/it]

GPU memory after batch 858: 14.97 GB
Batch 859 input shapes: input_ids=torch.Size([4, 66]), attention_mask=torch.Size([4, 66])


Predicting:  36%|███▌      | 860/2400 [18:27<37:30,  1.46s/it]

GPU memory after batch 859: 14.97 GB
Batch 860 input shapes: input_ids=torch.Size([4, 105]), attention_mask=torch.Size([4, 105])


Predicting:  36%|███▌      | 861/2400 [18:29<38:28,  1.50s/it]

批次 860: 原始预测: ['Yes\nYes, because', 'Yes\nYes, because', 'Yes', 'Yes']
GPU memory after batch 860: 14.97 GB
Batch 861 input shapes: input_ids=torch.Size([4, 97]), attention_mask=torch.Size([4, 97])


Predicting:  36%|███▌      | 862/2400 [18:31<38:32,  1.50s/it]

GPU memory after batch 861: 14.97 GB
Batch 862 input shapes: input_ids=torch.Size([4, 95]), attention_mask=torch.Size([4, 95])


Predicting:  36%|███▌      | 863/2400 [18:32<38:07,  1.49s/it]

GPU memory after batch 862: 14.97 GB
Batch 863 input shapes: input_ids=torch.Size([4, 91]), attention_mask=torch.Size([4, 91])


Predicting:  36%|███▌      | 864/2400 [18:33<37:49,  1.48s/it]

GPU memory after batch 863: 14.97 GB
Batch 864 input shapes: input_ids=torch.Size([4, 185]), attention_mask=torch.Size([4, 185])


Predicting:  36%|███▌      | 865/2400 [18:36<43:22,  1.70s/it]

GPU memory after batch 864: 14.97 GB
Batch 865 input shapes: input_ids=torch.Size([4, 77]), attention_mask=torch.Size([4, 77])


Predicting:  36%|███▌      | 866/2400 [18:37<40:19,  1.58s/it]

GPU memory after batch 865: 14.97 GB
Batch 866 input shapes: input_ids=torch.Size([4, 89]), attention_mask=torch.Size([4, 89])


Predicting:  36%|███▌      | 867/2400 [18:38<39:18,  1.54s/it]

GPU memory after batch 866: 14.97 GB
Batch 867 input shapes: input_ids=torch.Size([4, 91]), attention_mask=torch.Size([4, 91])


Predicting:  36%|███▌      | 868/2400 [18:40<38:35,  1.51s/it]

GPU memory after batch 867: 14.97 GB
Batch 868 input shapes: input_ids=torch.Size([4, 91]), attention_mask=torch.Size([4, 91])


Predicting:  36%|███▌      | 869/2400 [18:41<38:05,  1.49s/it]

GPU memory after batch 868: 14.97 GB
Batch 869 input shapes: input_ids=torch.Size([4, 91]), attention_mask=torch.Size([4, 91])


Predicting:  36%|███▋      | 870/2400 [18:43<37:44,  1.48s/it]

GPU memory after batch 869: 14.97 GB
Batch 870 input shapes: input_ids=torch.Size([4, 138]), attention_mask=torch.Size([4, 138])


Predicting:  36%|███▋      | 871/2400 [18:45<40:38,  1.59s/it]

批次 870: 原始预测: ['No', 'No\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because']
GPU memory after batch 870: 14.97 GB
Batch 871 input shapes: input_ids=torch.Size([4, 57]), attention_mask=torch.Size([4, 57])


Predicting:  36%|███▋      | 872/2400 [18:46<37:21,  1.47s/it]

GPU memory after batch 871: 14.97 GB
Batch 872 input shapes: input_ids=torch.Size([4, 115]), attention_mask=torch.Size([4, 115])


Predicting:  36%|███▋      | 873/2400 [18:47<38:35,  1.52s/it]

GPU memory after batch 872: 14.97 GB
Batch 873 input shapes: input_ids=torch.Size([4, 97]), attention_mask=torch.Size([4, 97])


Predicting:  36%|███▋      | 874/2400 [18:49<38:31,  1.51s/it]

GPU memory after batch 873: 14.97 GB
Batch 874 input shapes: input_ids=torch.Size([4, 189]), attention_mask=torch.Size([4, 189])


Predicting:  36%|███▋      | 875/2400 [18:51<43:46,  1.72s/it]

GPU memory after batch 874: 14.97 GB
Batch 875 input shapes: input_ids=torch.Size([4, 101]), attention_mask=torch.Size([4, 101])


Predicting:  36%|███▋      | 876/2400 [18:53<42:10,  1.66s/it]

GPU memory after batch 875: 14.97 GB
Batch 876 input shapes: input_ids=torch.Size([4, 68]), attention_mask=torch.Size([4, 68])


Predicting:  37%|███▋      | 877/2400 [18:54<38:53,  1.53s/it]

GPU memory after batch 876: 14.97 GB
Batch 877 input shapes: input_ids=torch.Size([4, 90]), attention_mask=torch.Size([4, 90])


Predicting:  37%|███▋      | 878/2400 [18:55<38:13,  1.51s/it]

GPU memory after batch 877: 14.97 GB
Batch 878 input shapes: input_ids=torch.Size([4, 116]), attention_mask=torch.Size([4, 116])


Predicting:  37%|███▋      | 879/2400 [18:57<39:07,  1.54s/it]

GPU memory after batch 878: 14.97 GB
Batch 879 input shapes: input_ids=torch.Size([4, 63]), attention_mask=torch.Size([4, 63])


Predicting:  37%|███▋      | 880/2400 [18:58<36:18,  1.43s/it]

GPU memory after batch 879: 14.97 GB
Batch 880 input shapes: input_ids=torch.Size([4, 92]), attention_mask=torch.Size([4, 92])


Predicting:  37%|███▋      | 881/2400 [19:00<36:24,  1.44s/it]

批次 880: 原始预测: ['Yes\nYes, because', 'Yes\nYes, because', 'Yes', 'Yes']
GPU memory after batch 880: 14.97 GB
Batch 881 input shapes: input_ids=torch.Size([4, 117]), attention_mask=torch.Size([4, 117])


Predicting:  37%|███▋      | 882/2400 [19:01<37:51,  1.50s/it]

GPU memory after batch 881: 14.97 GB
Batch 882 input shapes: input_ids=torch.Size([4, 55]), attention_mask=torch.Size([4, 55])


Predicting:  37%|███▋      | 883/2400 [19:02<34:45,  1.37s/it]

GPU memory after batch 882: 14.97 GB
Batch 883 input shapes: input_ids=torch.Size([4, 72]), attention_mask=torch.Size([4, 72])


Predicting:  37%|███▋      | 884/2400 [19:04<33:41,  1.33s/it]

GPU memory after batch 883: 14.97 GB
Batch 884 input shapes: input_ids=torch.Size([4, 97]), attention_mask=torch.Size([4, 97])


Predicting:  37%|███▋      | 885/2400 [19:05<35:01,  1.39s/it]

GPU memory after batch 884: 14.97 GB
Batch 885 input shapes: input_ids=torch.Size([4, 88]), attention_mask=torch.Size([4, 88])


Predicting:  37%|███▋      | 886/2400 [19:06<34:59,  1.39s/it]

GPU memory after batch 885: 14.97 GB
Batch 886 input shapes: input_ids=torch.Size([4, 88]), attention_mask=torch.Size([4, 88])


Predicting:  37%|███▋      | 887/2400 [19:08<34:58,  1.39s/it]

GPU memory after batch 886: 14.97 GB
Batch 887 input shapes: input_ids=torch.Size([4, 118]), attention_mask=torch.Size([4, 118])


Predicting:  37%|███▋      | 888/2400 [19:09<36:48,  1.46s/it]

GPU memory after batch 887: 14.97 GB
Batch 888 input shapes: input_ids=torch.Size([4, 185]), attention_mask=torch.Size([4, 185])


Predicting:  37%|███▋      | 889/2400 [19:12<42:24,  1.68s/it]

GPU memory after batch 888: 14.97 GB
Batch 889 input shapes: input_ids=torch.Size([4, 96]), attention_mask=torch.Size([4, 96])


Predicting:  37%|███▋      | 890/2400 [19:13<40:38,  1.61s/it]

GPU memory after batch 889: 14.97 GB
Batch 890 input shapes: input_ids=torch.Size([4, 96]), attention_mask=torch.Size([4, 96])


Predicting:  37%|███▋      | 891/2400 [19:15<39:23,  1.57s/it]

批次 890: 原始预测: ['Yes\nYes, because', 'Yes\nYes, because', 'No\nAnswer\nYes', 'Yes\nYes, because']
GPU memory after batch 890: 14.97 GB
Batch 891 input shapes: input_ids=torch.Size([4, 75]), attention_mask=torch.Size([4, 75])


Predicting:  37%|███▋      | 892/2400 [19:16<37:22,  1.49s/it]

GPU memory after batch 891: 14.97 GB
Batch 892 input shapes: input_ids=torch.Size([4, 101]), attention_mask=torch.Size([4, 101])


Predicting:  37%|███▋      | 893/2400 [19:17<37:34,  1.50s/it]

GPU memory after batch 892: 14.97 GB
Batch 893 input shapes: input_ids=torch.Size([4, 117]), attention_mask=torch.Size([4, 117])


Predicting:  37%|███▋      | 894/2400 [19:19<38:34,  1.54s/it]

GPU memory after batch 893: 14.97 GB
Batch 894 input shapes: input_ids=torch.Size([4, 125]), attention_mask=torch.Size([4, 125])


Predicting:  37%|███▋      | 895/2400 [19:21<39:33,  1.58s/it]

GPU memory after batch 894: 14.97 GB
Batch 895 input shapes: input_ids=torch.Size([4, 76]), attention_mask=torch.Size([4, 76])


Predicting:  37%|███▋      | 896/2400 [19:22<37:28,  1.50s/it]

GPU memory after batch 895: 14.97 GB
Batch 896 input shapes: input_ids=torch.Size([4, 115]), attention_mask=torch.Size([4, 115])


Predicting:  37%|███▋      | 897/2400 [19:24<38:28,  1.54s/it]

GPU memory after batch 896: 14.97 GB
Batch 897 input shapes: input_ids=torch.Size([4, 68]), attention_mask=torch.Size([4, 68])


Predicting:  37%|███▋      | 898/2400 [19:25<36:11,  1.45s/it]

GPU memory after batch 897: 14.97 GB
Batch 898 input shapes: input_ids=torch.Size([4, 82]), attention_mask=torch.Size([4, 82])


Predicting:  37%|███▋      | 899/2400 [19:26<35:44,  1.43s/it]

GPU memory after batch 898: 14.97 GB
Batch 899 input shapes: input_ids=torch.Size([4, 101]), attention_mask=torch.Size([4, 101])


Predicting:  38%|███▊      | 900/2400 [19:28<36:19,  1.45s/it]

GPU memory after batch 899: 14.97 GB
Batch 900 input shapes: input_ids=torch.Size([4, 82]), attention_mask=torch.Size([4, 82])


Predicting:  38%|███▊      | 901/2400 [19:29<35:48,  1.43s/it]

批次 900: 原始预测: ['Yes\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because']
GPU memory after batch 900: 14.97 GB
Batch 901 input shapes: input_ids=torch.Size([4, 112]), attention_mask=torch.Size([4, 112])


Predicting:  38%|███▊      | 902/2400 [19:31<36:58,  1.48s/it]

GPU memory after batch 901: 14.97 GB
Batch 902 input shapes: input_ids=torch.Size([4, 69]), attention_mask=torch.Size([4, 69])


Predicting:  38%|███▊      | 903/2400 [19:32<35:05,  1.41s/it]

GPU memory after batch 902: 14.97 GB
Batch 903 input shapes: input_ids=torch.Size([4, 114]), attention_mask=torch.Size([4, 114])


Predicting:  38%|███▊      | 904/2400 [19:34<36:44,  1.47s/it]

GPU memory after batch 903: 14.97 GB
Batch 904 input shapes: input_ids=torch.Size([4, 71]), attention_mask=torch.Size([4, 71])


Predicting:  38%|███▊      | 905/2400 [19:35<34:54,  1.40s/it]

GPU memory after batch 904: 14.97 GB
Batch 905 input shapes: input_ids=torch.Size([4, 72]), attention_mask=torch.Size([4, 72])


Predicting:  38%|███▊      | 906/2400 [19:36<33:40,  1.35s/it]

GPU memory after batch 905: 14.97 GB
Batch 906 input shapes: input_ids=torch.Size([4, 82]), attention_mask=torch.Size([4, 82])


Predicting:  38%|███▊      | 907/2400 [19:37<33:54,  1.36s/it]

GPU memory after batch 906: 14.97 GB
Batch 907 input shapes: input_ids=torch.Size([4, 79]), attention_mask=torch.Size([4, 79])


Predicting:  38%|███▊      | 908/2400 [19:39<33:26,  1.34s/it]

GPU memory after batch 907: 14.97 GB
Batch 908 input shapes: input_ids=torch.Size([4, 88]), attention_mask=torch.Size([4, 88])


Predicting:  38%|███▊      | 909/2400 [19:40<33:44,  1.36s/it]

GPU memory after batch 908: 14.97 GB
Batch 909 input shapes: input_ids=torch.Size([4, 69]), attention_mask=torch.Size([4, 69])


Predicting:  38%|███▊      | 910/2400 [19:41<32:48,  1.32s/it]

GPU memory after batch 909: 14.97 GB
Batch 910 input shapes: input_ids=torch.Size([4, 92]), attention_mask=torch.Size([4, 92])


Predicting:  38%|███▊      | 911/2400 [19:43<33:43,  1.36s/it]

批次 910: 原始预测: ['Yes', 'Yes', 'Yes\n\nYes, because', 'Yes\n\nYes, because']
GPU memory after batch 910: 14.97 GB
Batch 911 input shapes: input_ids=torch.Size([4, 92]), attention_mask=torch.Size([4, 92])


Predicting:  38%|███▊      | 912/2400 [19:44<34:22,  1.39s/it]

GPU memory after batch 911: 14.97 GB
Batch 912 input shapes: input_ids=torch.Size([4, 204]), attention_mask=torch.Size([4, 204])


Predicting:  38%|███▊      | 913/2400 [19:47<41:49,  1.69s/it]

GPU memory after batch 912: 14.97 GB
Batch 913 input shapes: input_ids=torch.Size([4, 91]), attention_mask=torch.Size([4, 91])


Predicting:  38%|███▊      | 914/2400 [19:48<40:02,  1.62s/it]

GPU memory after batch 913: 14.97 GB
Batch 914 input shapes: input_ids=torch.Size([4, 88]), attention_mask=torch.Size([4, 88])


Predicting:  38%|███▊      | 915/2400 [19:49<38:18,  1.55s/it]

GPU memory after batch 914: 14.97 GB
Batch 915 input shapes: input_ids=torch.Size([4, 83]), attention_mask=torch.Size([4, 83])


Predicting:  38%|███▊      | 916/2400 [19:51<37:05,  1.50s/it]

GPU memory after batch 915: 14.97 GB
Batch 916 input shapes: input_ids=torch.Size([4, 100]), attention_mask=torch.Size([4, 100])


Predicting:  38%|███▊      | 917/2400 [19:52<37:10,  1.50s/it]

GPU memory after batch 916: 14.97 GB
Batch 917 input shapes: input_ids=torch.Size([4, 80]), attention_mask=torch.Size([4, 80])


Predicting:  38%|███▊      | 918/2400 [19:54<35:40,  1.44s/it]

GPU memory after batch 917: 14.97 GB
Batch 918 input shapes: input_ids=torch.Size([4, 110]), attention_mask=torch.Size([4, 110])


Predicting:  38%|███▊      | 919/2400 [19:55<36:44,  1.49s/it]

GPU memory after batch 918: 14.97 GB
Batch 919 input shapes: input_ids=torch.Size([4, 86]), attention_mask=torch.Size([4, 86])


Predicting:  38%|███▊      | 920/2400 [19:57<35:59,  1.46s/it]

GPU memory after batch 919: 14.97 GB
Batch 920 input shapes: input_ids=torch.Size([4, 82]), attention_mask=torch.Size([4, 82])


Predicting:  38%|███▊      | 921/2400 [19:58<35:25,  1.44s/it]

批次 920: 原始预测: ['Yes\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because']
GPU memory after batch 920: 14.97 GB
Batch 921 input shapes: input_ids=torch.Size([4, 129]), attention_mask=torch.Size([4, 129])


Predicting:  38%|███▊      | 922/2400 [20:00<38:10,  1.55s/it]

GPU memory after batch 921: 14.97 GB
Batch 922 input shapes: input_ids=torch.Size([4, 106]), attention_mask=torch.Size([4, 106])


Predicting:  38%|███▊      | 923/2400 [20:01<38:26,  1.56s/it]

GPU memory after batch 922: 14.97 GB
Batch 923 input shapes: input_ids=torch.Size([4, 145]), attention_mask=torch.Size([4, 145])


Predicting:  38%|███▊      | 924/2400 [20:03<40:54,  1.66s/it]

GPU memory after batch 923: 14.97 GB
Batch 924 input shapes: input_ids=torch.Size([4, 136]), attention_mask=torch.Size([4, 136])


Predicting:  39%|███▊      | 925/2400 [20:05<42:00,  1.71s/it]

GPU memory after batch 924: 14.97 GB
Batch 925 input shapes: input_ids=torch.Size([4, 116]), attention_mask=torch.Size([4, 116])


Predicting:  39%|███▊      | 926/2400 [20:07<41:24,  1.69s/it]

GPU memory after batch 925: 14.97 GB
Batch 926 input shapes: input_ids=torch.Size([4, 80]), attention_mask=torch.Size([4, 80])


Predicting:  39%|███▊      | 927/2400 [20:08<38:35,  1.57s/it]

GPU memory after batch 926: 14.97 GB
Batch 927 input shapes: input_ids=torch.Size([4, 132]), attention_mask=torch.Size([4, 132])


Predicting:  39%|███▊      | 928/2400 [20:10<40:21,  1.64s/it]

GPU memory after batch 927: 14.97 GB
Batch 928 input shapes: input_ids=torch.Size([4, 90]), attention_mask=torch.Size([4, 90])


Predicting:  39%|███▊      | 929/2400 [20:11<38:52,  1.59s/it]

GPU memory after batch 928: 14.97 GB
Batch 929 input shapes: input_ids=torch.Size([4, 92]), attention_mask=torch.Size([4, 92])


Predicting:  39%|███▉      | 930/2400 [20:13<37:50,  1.54s/it]

GPU memory after batch 929: 14.97 GB
Batch 930 input shapes: input_ids=torch.Size([4, 131]), attention_mask=torch.Size([4, 131])


Predicting:  39%|███▉      | 931/2400 [20:15<39:48,  1.63s/it]

批次 930: 原始预测: ['Yes\nYes, because', 'Yes\nExplanation: The', 'Yes\nYes, because', 'Yes\nYes, because']
GPU memory after batch 930: 14.97 GB
Batch 931 input shapes: input_ids=torch.Size([4, 84]), attention_mask=torch.Size([4, 84])


Predicting:  39%|███▉      | 932/2400 [20:16<38:01,  1.55s/it]

GPU memory after batch 931: 14.97 GB
Batch 932 input shapes: input_ids=torch.Size([4, 84]), attention_mask=torch.Size([4, 84])


Predicting:  39%|███▉      | 933/2400 [20:17<36:46,  1.50s/it]

GPU memory after batch 932: 14.97 GB
Batch 933 input shapes: input_ids=torch.Size([4, 79]), attention_mask=torch.Size([4, 79])


Predicting:  39%|███▉      | 934/2400 [20:19<35:15,  1.44s/it]

GPU memory after batch 933: 14.97 GB
Batch 934 input shapes: input_ids=torch.Size([4, 121]), attention_mask=torch.Size([4, 121])


Predicting:  39%|███▉      | 935/2400 [20:20<36:52,  1.51s/it]

GPU memory after batch 934: 14.97 GB
Batch 935 input shapes: input_ids=torch.Size([4, 88]), attention_mask=torch.Size([4, 88])


Predicting:  39%|███▉      | 936/2400 [20:22<35:58,  1.47s/it]

GPU memory after batch 935: 14.97 GB
Batch 936 input shapes: input_ids=torch.Size([4, 187]), attention_mask=torch.Size([4, 187])


Predicting:  39%|███▉      | 937/2400 [20:24<41:17,  1.69s/it]

GPU memory after batch 936: 14.97 GB
Batch 937 input shapes: input_ids=torch.Size([4, 127]), attention_mask=torch.Size([4, 127])


Predicting:  39%|███▉      | 938/2400 [20:26<41:07,  1.69s/it]

GPU memory after batch 937: 14.97 GB
Batch 938 input shapes: input_ids=torch.Size([4, 127]), attention_mask=torch.Size([4, 127])


Predicting:  39%|███▉      | 939/2400 [20:27<41:00,  1.68s/it]

GPU memory after batch 938: 14.97 GB
Batch 939 input shapes: input_ids=torch.Size([4, 78]), attention_mask=torch.Size([4, 78])


Predicting:  39%|███▉      | 940/2400 [20:29<38:11,  1.57s/it]

GPU memory after batch 939: 14.97 GB
Batch 940 input shapes: input_ids=torch.Size([4, 85]), attention_mask=torch.Size([4, 85])


Predicting:  39%|███▉      | 941/2400 [20:30<36:50,  1.51s/it]

批次 940: 原始预测: ['Yes\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because']
GPU memory after batch 940: 14.97 GB
Batch 941 input shapes: input_ids=torch.Size([4, 131]), attention_mask=torch.Size([4, 131])


Predicting:  39%|███▉      | 942/2400 [20:32<39:00,  1.61s/it]

GPU memory after batch 941: 14.97 GB
Batch 942 input shapes: input_ids=torch.Size([4, 63]), attention_mask=torch.Size([4, 63])


Predicting:  39%|███▉      | 943/2400 [20:33<35:50,  1.48s/it]

GPU memory after batch 942: 14.97 GB
Batch 943 input shapes: input_ids=torch.Size([4, 119]), attention_mask=torch.Size([4, 119])


Predicting:  39%|███▉      | 944/2400 [20:35<36:56,  1.52s/it]

GPU memory after batch 943: 14.97 GB
Batch 944 input shapes: input_ids=torch.Size([4, 115]), attention_mask=torch.Size([4, 115])


Predicting:  39%|███▉      | 945/2400 [20:36<37:42,  1.56s/it]

GPU memory after batch 944: 14.97 GB
Batch 945 input shapes: input_ids=torch.Size([4, 101]), attention_mask=torch.Size([4, 101])


Predicting:  39%|███▉      | 946/2400 [20:38<37:25,  1.54s/it]

GPU memory after batch 945: 14.97 GB
Batch 946 input shapes: input_ids=torch.Size([4, 141]), attention_mask=torch.Size([4, 141])


Predicting:  39%|███▉      | 947/2400 [20:40<39:43,  1.64s/it]

GPU memory after batch 946: 14.97 GB
Batch 947 input shapes: input_ids=torch.Size([4, 101]), attention_mask=torch.Size([4, 101])


Predicting:  40%|███▉      | 948/2400 [20:41<38:47,  1.60s/it]

GPU memory after batch 947: 14.97 GB
Batch 948 input shapes: input_ids=torch.Size([4, 75]), attention_mask=torch.Size([4, 75])


Predicting:  40%|███▉      | 949/2400 [20:42<36:33,  1.51s/it]

GPU memory after batch 948: 14.97 GB
Batch 949 input shapes: input_ids=torch.Size([4, 81]), attention_mask=torch.Size([4, 81])


Predicting:  40%|███▉      | 950/2400 [20:44<35:37,  1.47s/it]

GPU memory after batch 949: 14.97 GB
Batch 950 input shapes: input_ids=torch.Size([4, 127]), attention_mask=torch.Size([4, 127])


Predicting:  40%|███▉      | 951/2400 [20:46<37:04,  1.53s/it]

批次 950: 原始预测: ['Yes\nYes, because', 'Yes\nYes, because', 'No\nYes\nYes', 'No\nYes\nYes']
GPU memory after batch 950: 14.97 GB
Batch 951 input shapes: input_ids=torch.Size([4, 115]), attention_mask=torch.Size([4, 115])


Predicting:  40%|███▉      | 952/2400 [20:47<34:33,  1.43s/it]

GPU memory after batch 951: 14.97 GB
Batch 952 input shapes: input_ids=torch.Size([4, 118]), attention_mask=torch.Size([4, 118])


Predicting:  40%|███▉      | 953/2400 [20:48<32:48,  1.36s/it]

GPU memory after batch 952: 14.97 GB
Batch 953 input shapes: input_ids=torch.Size([4, 97]), attention_mask=torch.Size([4, 97])


Predicting:  40%|███▉      | 954/2400 [20:49<33:53,  1.41s/it]

GPU memory after batch 953: 14.97 GB
Batch 954 input shapes: input_ids=torch.Size([4, 117]), attention_mask=torch.Size([4, 117])


Predicting:  40%|███▉      | 955/2400 [20:51<32:19,  1.34s/it]

GPU memory after batch 954: 14.97 GB
Batch 955 input shapes: input_ids=torch.Size([4, 114]), attention_mask=torch.Size([4, 114])


Predicting:  40%|███▉      | 956/2400 [20:52<34:23,  1.43s/it]

GPU memory after batch 955: 14.97 GB
Batch 956 input shapes: input_ids=torch.Size([4, 139]), attention_mask=torch.Size([4, 139])


Predicting:  40%|███▉      | 957/2400 [20:54<37:29,  1.56s/it]

GPU memory after batch 956: 14.97 GB
Batch 957 input shapes: input_ids=torch.Size([4, 97]), attention_mask=torch.Size([4, 97])


Predicting:  40%|███▉      | 958/2400 [20:56<37:08,  1.55s/it]

GPU memory after batch 957: 14.97 GB
Batch 958 input shapes: input_ids=torch.Size([4, 80]), attention_mask=torch.Size([4, 80])


Predicting:  40%|███▉      | 959/2400 [20:57<35:20,  1.47s/it]

GPU memory after batch 958: 14.97 GB
Batch 959 input shapes: input_ids=torch.Size([4, 83]), attention_mask=torch.Size([4, 83])


Predicting:  40%|████      | 960/2400 [20:58<34:42,  1.45s/it]

GPU memory after batch 959: 14.97 GB
Batch 960 input shapes: input_ids=torch.Size([4, 191]), attention_mask=torch.Size([4, 191])


Predicting:  40%|████      | 961/2400 [21:01<40:10,  1.67s/it]

批次 960: 原始预测: ['No\nQuestion: Is', 'Yes\nQuestion: Is', 'Yes\nYes, because', 'Yes\nYes, because']
GPU memory after batch 960: 14.97 GB
Batch 961 input shapes: input_ids=torch.Size([4, 78]), attention_mask=torch.Size([4, 78])


Predicting:  40%|████      | 962/2400 [21:02<37:27,  1.56s/it]

GPU memory after batch 961: 14.97 GB
Batch 962 input shapes: input_ids=torch.Size([4, 97]), attention_mask=torch.Size([4, 97])


Predicting:  40%|████      | 963/2400 [21:03<37:04,  1.55s/it]

GPU memory after batch 962: 14.97 GB
Batch 963 input shapes: input_ids=torch.Size([4, 99]), attention_mask=torch.Size([4, 99])


Predicting:  40%|████      | 964/2400 [21:05<36:48,  1.54s/it]

GPU memory after batch 963: 14.97 GB
Batch 964 input shapes: input_ids=torch.Size([4, 163]), attention_mask=torch.Size([4, 163])


Predicting:  40%|████      | 965/2400 [21:07<40:41,  1.70s/it]

GPU memory after batch 964: 14.97 GB
Batch 965 input shapes: input_ids=torch.Size([4, 91]), attention_mask=torch.Size([4, 91])


Predicting:  40%|████      | 966/2400 [21:08<38:51,  1.63s/it]

GPU memory after batch 965: 14.97 GB
Batch 966 input shapes: input_ids=torch.Size([4, 85]), attention_mask=torch.Size([4, 85])


Predicting:  40%|████      | 967/2400 [21:10<37:08,  1.56s/it]

GPU memory after batch 966: 14.97 GB
Batch 967 input shapes: input_ids=torch.Size([4, 65]), attention_mask=torch.Size([4, 65])


Predicting:  40%|████      | 968/2400 [21:11<34:49,  1.46s/it]

GPU memory after batch 967: 14.97 GB
Batch 968 input shapes: input_ids=torch.Size([4, 85]), attention_mask=torch.Size([4, 85])


Predicting:  40%|████      | 969/2400 [21:12<34:17,  1.44s/it]

GPU memory after batch 968: 14.97 GB
Batch 969 input shapes: input_ids=torch.Size([4, 75]), attention_mask=torch.Size([4, 75])


Predicting:  40%|████      | 970/2400 [21:14<33:17,  1.40s/it]

GPU memory after batch 969: 14.97 GB
Batch 970 input shapes: input_ids=torch.Size([4, 85]), attention_mask=torch.Size([4, 85])


Predicting:  40%|████      | 971/2400 [21:15<33:12,  1.39s/it]

批次 970: 原始预测: ['Yes\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because']
GPU memory after batch 970: 14.97 GB
Batch 971 input shapes: input_ids=torch.Size([4, 85]), attention_mask=torch.Size([4, 85])


Predicting:  40%|████      | 972/2400 [21:16<33:08,  1.39s/it]

GPU memory after batch 971: 14.97 GB
Batch 972 input shapes: input_ids=torch.Size([4, 77]), attention_mask=torch.Size([4, 77])


Predicting:  41%|████      | 973/2400 [21:18<32:27,  1.37s/it]

GPU memory after batch 972: 14.97 GB
Batch 973 input shapes: input_ids=torch.Size([4, 94]), attention_mask=torch.Size([4, 94])


Predicting:  41%|████      | 974/2400 [21:19<33:03,  1.39s/it]

GPU memory after batch 973: 14.97 GB
Batch 974 input shapes: input_ids=torch.Size([4, 368]), attention_mask=torch.Size([4, 368])


Predicting:  41%|████      | 975/2400 [21:23<46:42,  1.97s/it]

GPU memory after batch 974: 14.97 GB
Batch 975 input shapes: input_ids=torch.Size([4, 461]), attention_mask=torch.Size([4, 461])


Predicting:  41%|████      | 976/2400 [21:27<1:01:50,  2.61s/it]

GPU memory after batch 975: 14.97 GB
Batch 976 input shapes: input_ids=torch.Size([4, 255]), attention_mask=torch.Size([4, 255])


Predicting:  41%|████      | 977/2400 [21:29<59:37,  2.51s/it]  

GPU memory after batch 976: 14.97 GB
Batch 977 input shapes: input_ids=torch.Size([4, 355]), attention_mask=torch.Size([4, 355])


Predicting:  41%|████      | 978/2400 [21:32<1:04:52,  2.74s/it]

GPU memory after batch 977: 14.97 GB
Batch 978 input shapes: input_ids=torch.Size([4, 328]), attention_mask=torch.Size([4, 328])


Predicting:  41%|████      | 979/2400 [21:35<1:06:34,  2.81s/it]

GPU memory after batch 978: 14.97 GB
Batch 979 input shapes: input_ids=torch.Size([4, 362]), attention_mask=torch.Size([4, 362])


Predicting:  41%|████      | 980/2400 [21:39<1:13:22,  3.10s/it]

GPU memory after batch 979: 14.97 GB
Batch 980 input shapes: input_ids=torch.Size([4, 460]), attention_mask=torch.Size([4, 460])


Predicting:  41%|████      | 981/2400 [21:43<1:16:57,  3.25s/it]

批次 980: 原始预测: ['No', 'No', 'Yes', 'Yes']
GPU memory after batch 980: 14.97 GB
Batch 981 input shapes: input_ids=torch.Size([4, 440]), attention_mask=torch.Size([4, 440])


Predicting:  41%|████      | 982/2400 [21:46<1:20:32,  3.41s/it]

GPU memory after batch 981: 14.97 GB
Batch 982 input shapes: input_ids=torch.Size([4, 430]), attention_mask=torch.Size([4, 430])


Predicting:  41%|████      | 983/2400 [21:50<1:22:59,  3.51s/it]

GPU memory after batch 982: 14.97 GB
Batch 983 input shapes: input_ids=torch.Size([4, 379]), attention_mask=torch.Size([4, 379])


Predicting:  41%|████      | 984/2400 [21:53<1:21:56,  3.47s/it]

GPU memory after batch 983: 14.97 GB
Batch 984 input shapes: input_ids=torch.Size([4, 358]), attention_mask=torch.Size([4, 358])


Predicting:  41%|████      | 985/2400 [21:57<1:23:40,  3.55s/it]

GPU memory after batch 984: 14.97 GB
Batch 985 input shapes: input_ids=torch.Size([4, 269]), attention_mask=torch.Size([4, 269])


Predicting:  41%|████      | 986/2400 [22:00<1:16:10,  3.23s/it]

GPU memory after batch 985: 14.97 GB
Batch 986 input shapes: input_ids=torch.Size([4, 432]), attention_mask=torch.Size([4, 432])


Predicting:  41%|████      | 987/2400 [22:03<1:19:51,  3.39s/it]

GPU memory after batch 986: 14.97 GB
Batch 987 input shapes: input_ids=torch.Size([4, 432]), attention_mask=torch.Size([4, 432])


Predicting:  41%|████      | 988/2400 [22:07<1:19:02,  3.36s/it]

GPU memory after batch 987: 14.97 GB
Batch 988 input shapes: input_ids=torch.Size([4, 437]), attention_mask=torch.Size([4, 437])


Predicting:  41%|████      | 989/2400 [22:11<1:21:50,  3.48s/it]

GPU memory after batch 988: 14.97 GB
Batch 989 input shapes: input_ids=torch.Size([4, 455]), attention_mask=torch.Size([4, 455])


Predicting:  41%|████▏     | 990/2400 [22:14<1:22:43,  3.52s/it]

GPU memory after batch 989: 14.97 GB
Batch 990 input shapes: input_ids=torch.Size([4, 346]), attention_mask=torch.Size([4, 346])


Predicting:  41%|████▏     | 991/2400 [22:18<1:22:57,  3.53s/it]

批次 990: 原始预测: ['Yes\nYes, because', 'Yes\nYes, because', 'Yes', 'No']
GPU memory after batch 990: 14.97 GB
Batch 991 input shapes: input_ids=torch.Size([4, 335]), attention_mask=torch.Size([4, 335])


Predicting:  41%|████▏     | 992/2400 [22:21<1:19:13,  3.38s/it]

GPU memory after batch 991: 14.97 GB
Batch 992 input shapes: input_ids=torch.Size([4, 359]), attention_mask=torch.Size([4, 359])


Predicting:  41%|████▏     | 993/2400 [22:24<1:21:38,  3.48s/it]

GPU memory after batch 992: 14.97 GB
Batch 993 input shapes: input_ids=torch.Size([4, 304]), attention_mask=torch.Size([4, 304])


Predicting:  41%|████▏     | 994/2400 [22:27<1:16:36,  3.27s/it]

GPU memory after batch 993: 14.97 GB
Batch 994 input shapes: input_ids=torch.Size([4, 357]), attention_mask=torch.Size([4, 357])


Predicting:  41%|████▏     | 995/2400 [22:31<1:19:46,  3.41s/it]

GPU memory after batch 994: 14.97 GB
Batch 995 input shapes: input_ids=torch.Size([4, 438]), attention_mask=torch.Size([4, 438])


Predicting:  42%|████▏     | 996/2400 [22:34<1:18:52,  3.37s/it]

GPU memory after batch 995: 14.97 GB
Batch 996 input shapes: input_ids=torch.Size([4, 306]), attention_mask=torch.Size([4, 306])


Predicting:  42%|████▏     | 997/2400 [22:37<1:18:04,  3.34s/it]

GPU memory after batch 996: 14.97 GB
Batch 997 input shapes: input_ids=torch.Size([4, 372]), attention_mask=torch.Size([4, 372])


Predicting:  42%|████▏     | 998/2400 [22:41<1:17:59,  3.34s/it]

GPU memory after batch 997: 14.97 GB
Batch 998 input shapes: input_ids=torch.Size([4, 353]), attention_mask=torch.Size([4, 353])


Predicting:  42%|████▏     | 999/2400 [22:45<1:20:39,  3.45s/it]

GPU memory after batch 998: 14.97 GB
Batch 999 input shapes: input_ids=torch.Size([4, 403]), attention_mask=torch.Size([4, 403])


Predicting:  42%|████▏     | 1000/2400 [22:48<1:21:39,  3.50s/it]

GPU memory after batch 999: 14.97 GB
Batch 1000 input shapes: input_ids=torch.Size([4, 279]), attention_mask=torch.Size([4, 279])


Predicting:  42%|████▏     | 1001/2400 [22:51<1:14:44,  3.21s/it]

批次 1000: 原始预测: ['Yes', 'Yes', 'No', 'No']
GPU memory after batch 1000: 14.97 GB
Batch 1001 input shapes: input_ids=torch.Size([4, 439]), attention_mask=torch.Size([4, 439])


Predicting:  42%|████▏     | 1002/2400 [22:54<1:18:36,  3.37s/it]

GPU memory after batch 1001: 14.97 GB
Batch 1002 input shapes: input_ids=torch.Size([4, 347]), attention_mask=torch.Size([4, 347])


Predicting:  42%|████▏     | 1003/2400 [22:58<1:16:36,  3.29s/it]

GPU memory after batch 1002: 14.97 GB
Batch 1003 input shapes: input_ids=torch.Size([4, 375]), attention_mask=torch.Size([4, 375])


Predicting:  42%|████▏     | 1004/2400 [23:01<1:20:08,  3.44s/it]

GPU memory after batch 1003: 14.97 GB
Batch 1004 input shapes: input_ids=torch.Size([4, 463]), attention_mask=torch.Size([4, 463])


Predicting:  42%|████▏     | 1005/2400 [23:05<1:24:42,  3.64s/it]

GPU memory after batch 1004: 14.97 GB
Batch 1005 input shapes: input_ids=torch.Size([4, 341]), attention_mask=torch.Size([4, 341])


Predicting:  42%|████▏     | 1006/2400 [23:09<1:23:50,  3.61s/it]

GPU memory after batch 1005: 14.97 GB
Batch 1006 input shapes: input_ids=torch.Size([4, 476]), attention_mask=torch.Size([4, 476])


Predicting:  42%|████▏     | 1007/2400 [23:13<1:27:15,  3.76s/it]

GPU memory after batch 1006: 14.97 GB
Batch 1007 input shapes: input_ids=torch.Size([4, 271]), attention_mask=torch.Size([4, 271])


Predicting:  42%|████▏     | 1008/2400 [23:16<1:18:24,  3.38s/it]

GPU memory after batch 1007: 14.97 GB
Batch 1008 input shapes: input_ids=torch.Size([4, 436]), attention_mask=torch.Size([4, 436])


Predicting:  42%|████▏     | 1009/2400 [23:19<1:21:03,  3.50s/it]

GPU memory after batch 1008: 14.97 GB
Batch 1009 input shapes: input_ids=torch.Size([4, 303]), attention_mask=torch.Size([4, 303])


Predicting:  42%|████▏     | 1010/2400 [23:23<1:19:12,  3.42s/it]

GPU memory after batch 1009: 14.97 GB
Batch 1010 input shapes: input_ids=torch.Size([4, 336]), attention_mask=torch.Size([4, 336])


Predicting:  42%|████▏     | 1011/2400 [23:26<1:19:32,  3.44s/it]

批次 1010: 原始预测: ['Yes\nYes, because', 'Yes\nYes, because', 'Yes\nIs "200', 'Yes\nIs "200']
GPU memory after batch 1010: 14.97 GB
Batch 1011 input shapes: input_ids=torch.Size([4, 353]), attention_mask=torch.Size([4, 353])


Predicting:  42%|████▏     | 1012/2400 [23:30<1:21:30,  3.52s/it]

GPU memory after batch 1011: 14.97 GB
Batch 1012 input shapes: input_ids=torch.Size([4, 381]), attention_mask=torch.Size([4, 381])


Predicting:  42%|████▏     | 1013/2400 [23:33<1:20:25,  3.48s/it]

GPU memory after batch 1012: 14.97 GB
Batch 1013 input shapes: input_ids=torch.Size([4, 216]), attention_mask=torch.Size([4, 216])


Predicting:  42%|████▏     | 1014/2400 [23:36<1:13:06,  3.17s/it]

GPU memory after batch 1013: 14.97 GB
Batch 1014 input shapes: input_ids=torch.Size([4, 285]), attention_mask=torch.Size([4, 285])


Predicting:  42%|████▏     | 1015/2400 [23:39<1:12:04,  3.12s/it]

GPU memory after batch 1014: 14.97 GB
Batch 1015 input shapes: input_ids=torch.Size([4, 325]), attention_mask=torch.Size([4, 325])


Predicting:  42%|████▏     | 1016/2400 [23:42<1:14:15,  3.22s/it]

GPU memory after batch 1015: 14.97 GB
Batch 1016 input shapes: input_ids=torch.Size([4, 376]), attention_mask=torch.Size([4, 376])


Predicting:  42%|████▏     | 1017/2400 [23:45<1:14:49,  3.25s/it]

GPU memory after batch 1016: 14.97 GB
Batch 1017 input shapes: input_ids=torch.Size([4, 376]), attention_mask=torch.Size([4, 376])


Predicting:  42%|████▏     | 1018/2400 [23:49<1:18:37,  3.41s/it]

GPU memory after batch 1017: 14.97 GB
Batch 1018 input shapes: input_ids=torch.Size([4, 489]), attention_mask=torch.Size([4, 489])


Predicting:  42%|████▏     | 1019/2400 [23:54<1:24:58,  3.69s/it]

GPU memory after batch 1018: 14.97 GB
Batch 1019 input shapes: input_ids=torch.Size([4, 286]), attention_mask=torch.Size([4, 286])


Predicting:  42%|████▎     | 1020/2400 [23:57<1:20:18,  3.49s/it]

GPU memory after batch 1019: 14.97 GB
Batch 1020 input shapes: input_ids=torch.Size([4, 288]), attention_mask=torch.Size([4, 288])


Predicting:  43%|████▎     | 1021/2400 [24:00<1:17:01,  3.35s/it]

批次 1020: 原始预测: ['Yes', 'Yes', 'Yes', 'Yes\nYes, because']
GPU memory after batch 1020: 14.97 GB
Batch 1021 input shapes: input_ids=torch.Size([4, 244]), attention_mask=torch.Size([4, 244])


Predicting:  43%|████▎     | 1022/2400 [24:02<1:09:22,  3.02s/it]

GPU memory after batch 1021: 14.97 GB
Batch 1022 input shapes: input_ids=torch.Size([4, 270]), attention_mask=torch.Size([4, 270])


Predicting:  43%|████▎     | 1023/2400 [24:05<1:08:52,  3.00s/it]

GPU memory after batch 1022: 14.97 GB
Batch 1023 input shapes: input_ids=torch.Size([4, 380]), attention_mask=torch.Size([4, 380])


Predicting:  43%|████▎     | 1024/2400 [24:08<1:11:22,  3.11s/it]

GPU memory after batch 1023: 14.97 GB
Batch 1024 input shapes: input_ids=torch.Size([4, 344]), attention_mask=torch.Size([4, 344])


Predicting:  43%|████▎     | 1025/2400 [24:11<1:10:59,  3.10s/it]

GPU memory after batch 1024: 14.97 GB
Batch 1025 input shapes: input_ids=torch.Size([4, 318]), attention_mask=torch.Size([4, 318])


Predicting:  43%|████▎     | 1026/2400 [24:14<1:09:10,  3.02s/it]

GPU memory after batch 1025: 14.97 GB
Batch 1026 input shapes: input_ids=torch.Size([4, 276]), attention_mask=torch.Size([4, 276])


Predicting:  43%|████▎     | 1027/2400 [24:17<1:05:40,  2.87s/it]

GPU memory after batch 1026: 14.97 GB
Batch 1027 input shapes: input_ids=torch.Size([4, 332]), attention_mask=torch.Size([4, 332])


Predicting:  43%|████▎     | 1028/2400 [24:20<1:06:35,  2.91s/it]

GPU memory after batch 1027: 14.97 GB
Batch 1028 input shapes: input_ids=torch.Size([4, 227]), attention_mask=torch.Size([4, 227])


Predicting:  43%|████▎     | 1029/2400 [24:22<1:01:33,  2.69s/it]

GPU memory after batch 1028: 14.97 GB
Batch 1029 input shapes: input_ids=torch.Size([4, 297]), attention_mask=torch.Size([4, 297])


Predicting:  43%|████▎     | 1030/2400 [24:25<1:05:12,  2.86s/it]

GPU memory after batch 1029: 14.97 GB
Batch 1030 input shapes: input_ids=torch.Size([4, 294]), attention_mask=torch.Size([4, 294])


Predicting:  43%|████▎     | 1031/2400 [24:28<1:07:26,  2.96s/it]

批次 1030: 原始预测: ['Yes', 'Yes', 'Yes\nYes, because', 'Yes\nYes, because']
GPU memory after batch 1030: 14.97 GB
Batch 1031 input shapes: input_ids=torch.Size([4, 504]), attention_mask=torch.Size([4, 504])


Predicting:  43%|████▎     | 1032/2400 [24:33<1:16:55,  3.37s/it]

GPU memory after batch 1031: 14.97 GB
Batch 1032 input shapes: input_ids=torch.Size([4, 420]), attention_mask=torch.Size([4, 420])


Predicting:  43%|████▎     | 1033/2400 [24:36<1:19:28,  3.49s/it]

GPU memory after batch 1032: 14.97 GB
Batch 1033 input shapes: input_ids=torch.Size([4, 317]), attention_mask=torch.Size([4, 317])


Predicting:  43%|████▎     | 1034/2400 [24:40<1:18:08,  3.43s/it]

GPU memory after batch 1033: 14.97 GB
Batch 1034 input shapes: input_ids=torch.Size([4, 270]), attention_mask=torch.Size([4, 270])


Predicting:  43%|████▎     | 1035/2400 [24:43<1:14:50,  3.29s/it]

GPU memory after batch 1034: 14.97 GB
Batch 1035 input shapes: input_ids=torch.Size([4, 260]), attention_mask=torch.Size([4, 260])


Predicting:  43%|████▎     | 1036/2400 [24:45<1:09:04,  3.04s/it]

GPU memory after batch 1035: 14.97 GB
Batch 1036 input shapes: input_ids=torch.Size([4, 243]), attention_mask=torch.Size([4, 243])


Predicting:  43%|████▎     | 1037/2400 [24:48<1:06:43,  2.94s/it]

GPU memory after batch 1036: 14.97 GB
Batch 1037 input shapes: input_ids=torch.Size([4, 233]), attention_mask=torch.Size([4, 233])


Predicting:  43%|████▎     | 1038/2400 [24:50<1:04:49,  2.86s/it]

GPU memory after batch 1037: 14.97 GB
Batch 1038 input shapes: input_ids=torch.Size([4, 302]), attention_mask=torch.Size([4, 302])


Predicting:  43%|████▎     | 1039/2400 [24:54<1:07:21,  2.97s/it]

GPU memory after batch 1038: 14.97 GB
Batch 1039 input shapes: input_ids=torch.Size([4, 320]), attention_mask=torch.Size([4, 320])


Predicting:  43%|████▎     | 1040/2400 [24:56<1:06:25,  2.93s/it]

GPU memory after batch 1039: 14.97 GB
Batch 1040 input shapes: input_ids=torch.Size([4, 376]), attention_mask=torch.Size([4, 376])


Predicting:  43%|████▎     | 1041/2400 [25:00<1:09:08,  3.05s/it]

批次 1040: 原始预测: ['Yes', 'Yes', 'Yes', 'Yes']
GPU memory after batch 1040: 14.97 GB
Batch 1041 input shapes: input_ids=torch.Size([4, 255]), attention_mask=torch.Size([4, 255])


Predicting:  43%|████▎     | 1042/2400 [25:03<1:07:05,  2.96s/it]

GPU memory after batch 1041: 14.97 GB
Batch 1042 input shapes: input_ids=torch.Size([4, 270]), attention_mask=torch.Size([4, 270])


Predicting:  43%|████▎     | 1043/2400 [25:05<1:03:52,  2.82s/it]

GPU memory after batch 1042: 14.97 GB
Batch 1043 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  44%|████▎     | 1044/2400 [25:09<1:14:16,  3.29s/it]

GPU memory after batch 1043: 14.97 GB
Batch 1044 input shapes: input_ids=torch.Size([4, 217]), attention_mask=torch.Size([4, 217])


Predicting:  44%|████▎     | 1045/2400 [25:11<1:05:37,  2.91s/it]

GPU memory after batch 1044: 14.97 GB
Batch 1045 input shapes: input_ids=torch.Size([4, 308]), attention_mask=torch.Size([4, 308])


Predicting:  44%|████▎     | 1046/2400 [25:15<1:08:01,  3.01s/it]

GPU memory after batch 1045: 14.97 GB
Batch 1046 input shapes: input_ids=torch.Size([4, 162]), attention_mask=torch.Size([4, 162])


Predicting:  44%|████▎     | 1047/2400 [25:17<1:01:39,  2.73s/it]

GPU memory after batch 1046: 14.97 GB
Batch 1047 input shapes: input_ids=torch.Size([4, 305]), attention_mask=torch.Size([4, 305])


Predicting:  44%|████▎     | 1048/2400 [25:20<1:05:13,  2.89s/it]

GPU memory after batch 1047: 14.97 GB
Batch 1048 input shapes: input_ids=torch.Size([4, 380]), attention_mask=torch.Size([4, 380])


Predicting:  44%|████▎     | 1049/2400 [25:24<1:11:34,  3.18s/it]

GPU memory after batch 1048: 14.97 GB
Batch 1049 input shapes: input_ids=torch.Size([4, 251]), attention_mask=torch.Size([4, 251])


Predicting:  44%|████▍     | 1050/2400 [25:26<1:05:35,  2.91s/it]

GPU memory after batch 1049: 14.97 GB
Batch 1050 input shapes: input_ids=torch.Size([4, 203]), attention_mask=torch.Size([4, 203])


Predicting:  44%|████▍     | 1051/2400 [25:29<1:02:01,  2.76s/it]

批次 1050: 原始预测: ['Yes', 'Yes', 'Yes', 'No\nAnswer\nYes']
GPU memory after batch 1050: 14.97 GB
Batch 1051 input shapes: input_ids=torch.Size([4, 316]), attention_mask=torch.Size([4, 316])


Predicting:  44%|████▍     | 1052/2400 [25:32<1:05:37,  2.92s/it]

GPU memory after batch 1051: 14.97 GB
Batch 1052 input shapes: input_ids=torch.Size([4, 413]), attention_mask=torch.Size([4, 413])


Predicting:  44%|████▍     | 1053/2400 [25:35<1:07:01,  2.99s/it]

GPU memory after batch 1052: 14.97 GB
Batch 1053 input shapes: input_ids=torch.Size([4, 413]), attention_mask=torch.Size([4, 413])


Predicting:  44%|████▍     | 1054/2400 [25:39<1:11:09,  3.17s/it]

GPU memory after batch 1053: 14.97 GB
Batch 1054 input shapes: input_ids=torch.Size([4, 319]), attention_mask=torch.Size([4, 319])


Predicting:  44%|████▍     | 1055/2400 [25:41<1:08:52,  3.07s/it]

GPU memory after batch 1054: 14.97 GB
Batch 1055 input shapes: input_ids=torch.Size([4, 447]), attention_mask=torch.Size([4, 447])


Predicting:  44%|████▍     | 1056/2400 [25:45<1:13:31,  3.28s/it]

GPU memory after batch 1055: 14.97 GB
Batch 1056 input shapes: input_ids=torch.Size([4, 320]), attention_mask=torch.Size([4, 320])


Predicting:  44%|████▍     | 1057/2400 [25:49<1:13:36,  3.29s/it]

GPU memory after batch 1056: 14.97 GB
Batch 1057 input shapes: input_ids=torch.Size([4, 347]), attention_mask=torch.Size([4, 347])


Predicting:  44%|████▍     | 1058/2400 [25:52<1:12:15,  3.23s/it]

GPU memory after batch 1057: 14.97 GB
Batch 1058 input shapes: input_ids=torch.Size([4, 287]), attention_mask=torch.Size([4, 287])


Predicting:  44%|████▍     | 1059/2400 [25:54<1:07:44,  3.03s/it]

GPU memory after batch 1058: 14.97 GB
Batch 1059 input shapes: input_ids=torch.Size([4, 311]), attention_mask=torch.Size([4, 311])


Predicting:  44%|████▍     | 1060/2400 [25:57<1:06:11,  2.96s/it]

GPU memory after batch 1059: 14.97 GB
Batch 1060 input shapes: input_ids=torch.Size([4, 278]), attention_mask=torch.Size([4, 278])


Predicting:  44%|████▍     | 1061/2400 [26:00<1:03:09,  2.83s/it]

批次 1060: 原始预测: ['Yes', 'Yes', 'Yes', 'Yes']
GPU memory after batch 1060: 14.97 GB
Batch 1061 input shapes: input_ids=torch.Size([4, 322]), attention_mask=torch.Size([4, 322])


Predicting:  44%|████▍     | 1062/2400 [26:02<1:04:07,  2.88s/it]

GPU memory after batch 1061: 14.97 GB
Batch 1062 input shapes: input_ids=torch.Size([4, 304]), attention_mask=torch.Size([4, 304])


Predicting:  44%|████▍     | 1063/2400 [26:05<1:03:23,  2.84s/it]

GPU memory after batch 1062: 14.97 GB
Batch 1063 input shapes: input_ids=torch.Size([4, 256]), attention_mask=torch.Size([4, 256])


Predicting:  44%|████▍     | 1064/2400 [26:08<1:02:47,  2.82s/it]

GPU memory after batch 1063: 14.97 GB
Batch 1064 input shapes: input_ids=torch.Size([4, 341]), attention_mask=torch.Size([4, 341])


Predicting:  44%|████▍     | 1065/2400 [26:11<1:04:21,  2.89s/it]

GPU memory after batch 1064: 14.97 GB
Batch 1065 input shapes: input_ids=torch.Size([4, 419]), attention_mask=torch.Size([4, 419])


Predicting:  44%|████▍     | 1066/2400 [26:14<1:06:52,  3.01s/it]

GPU memory after batch 1065: 14.97 GB
Batch 1066 input shapes: input_ids=torch.Size([4, 247]), attention_mask=torch.Size([4, 247])


Predicting:  44%|████▍     | 1067/2400 [26:17<1:04:48,  2.92s/it]

GPU memory after batch 1066: 14.97 GB
Batch 1067 input shapes: input_ids=torch.Size([4, 424]), attention_mask=torch.Size([4, 424])


Predicting:  44%|████▍     | 1068/2400 [26:20<1:07:09,  3.03s/it]

GPU memory after batch 1067: 14.97 GB
Batch 1068 input shapes: input_ids=torch.Size([4, 289]), attention_mask=torch.Size([4, 289])


Predicting:  45%|████▍     | 1069/2400 [26:24<1:08:11,  3.07s/it]

GPU memory after batch 1068: 14.97 GB
Batch 1069 input shapes: input_ids=torch.Size([4, 336]), attention_mask=torch.Size([4, 336])


Predicting:  45%|████▍     | 1070/2400 [26:27<1:10:52,  3.20s/it]

GPU memory after batch 1069: 14.97 GB
Batch 1070 input shapes: input_ids=torch.Size([4, 417]), attention_mask=torch.Size([4, 417])


Predicting:  45%|████▍     | 1071/2400 [26:30<1:11:20,  3.22s/it]

批次 1070: 原始预测: ['Yes', 'Yes', 'No', 'No']
GPU memory after batch 1070: 14.97 GB
Batch 1071 input shapes: input_ids=torch.Size([4, 373]), attention_mask=torch.Size([4, 373])


Predicting:  45%|████▍     | 1072/2400 [26:34<1:15:09,  3.40s/it]

GPU memory after batch 1071: 14.97 GB
Batch 1072 input shapes: input_ids=torch.Size([4, 388]), attention_mask=torch.Size([4, 388])


Predicting:  45%|████▍     | 1073/2400 [26:37<1:13:16,  3.31s/it]

GPU memory after batch 1072: 14.97 GB
Batch 1073 input shapes: input_ids=torch.Size([4, 396]), attention_mask=torch.Size([4, 396])


Predicting:  45%|████▍     | 1074/2400 [26:41<1:15:07,  3.40s/it]

GPU memory after batch 1073: 14.97 GB
Batch 1074 input shapes: input_ids=torch.Size([4, 352]), attention_mask=torch.Size([4, 352])


Predicting:  45%|████▍     | 1075/2400 [26:44<1:13:04,  3.31s/it]

GPU memory after batch 1074: 14.97 GB
Batch 1075 input shapes: input_ids=torch.Size([4, 201]), attention_mask=torch.Size([4, 201])


Predicting:  45%|████▍     | 1076/2400 [26:46<1:03:58,  2.90s/it]

GPU memory after batch 1075: 14.97 GB
Batch 1076 input shapes: input_ids=torch.Size([4, 359]), attention_mask=torch.Size([4, 359])


Predicting:  45%|████▍     | 1077/2400 [26:49<1:06:19,  3.01s/it]

GPU memory after batch 1076: 14.97 GB
Batch 1077 input shapes: input_ids=torch.Size([4, 287]), attention_mask=torch.Size([4, 287])


Predicting:  45%|████▍     | 1078/2400 [26:52<1:03:20,  2.87s/it]

GPU memory after batch 1077: 14.97 GB
Batch 1078 input shapes: input_ids=torch.Size([4, 393]), attention_mask=torch.Size([4, 393])


Predicting:  45%|████▍     | 1079/2400 [26:55<1:08:05,  3.09s/it]

GPU memory after batch 1078: 14.97 GB
Batch 1079 input shapes: input_ids=torch.Size([4, 406]), attention_mask=torch.Size([4, 406])


Predicting:  45%|████▌     | 1080/2400 [26:59<1:11:25,  3.25s/it]

GPU memory after batch 1079: 14.97 GB
Batch 1080 input shapes: input_ids=torch.Size([4, 329]), attention_mask=torch.Size([4, 329])


Predicting:  45%|████▌     | 1081/2400 [27:02<1:12:52,  3.31s/it]

批次 1080: 原始预测: ['Yes\nQuestion: Is', 'Yes\nQuestion: Is', 'Yes', 'Yes']
GPU memory after batch 1080: 14.97 GB
Batch 1081 input shapes: input_ids=torch.Size([4, 409]), attention_mask=torch.Size([4, 409])


Predicting:  45%|████▌     | 1082/2400 [27:06<1:14:44,  3.40s/it]

GPU memory after batch 1081: 14.97 GB
Batch 1082 input shapes: input_ids=torch.Size([4, 352]), attention_mask=torch.Size([4, 352])


Predicting:  45%|████▌     | 1083/2400 [27:09<1:12:41,  3.31s/it]

GPU memory after batch 1082: 14.97 GB
Batch 1083 input shapes: input_ids=torch.Size([4, 359]), attention_mask=torch.Size([4, 359])


Predicting:  45%|████▌     | 1084/2400 [27:13<1:15:21,  3.44s/it]

GPU memory after batch 1083: 14.97 GB
Batch 1084 input shapes: input_ids=torch.Size([4, 365]), attention_mask=torch.Size([4, 365])


Predicting:  45%|████▌     | 1085/2400 [27:16<1:14:28,  3.40s/it]

GPU memory after batch 1084: 14.97 GB
Batch 1085 input shapes: input_ids=torch.Size([4, 455]), attention_mask=torch.Size([4, 455])


Predicting:  45%|████▌     | 1086/2400 [27:20<1:18:37,  3.59s/it]

GPU memory after batch 1085: 14.97 GB
Batch 1086 input shapes: input_ids=torch.Size([4, 470]), attention_mask=torch.Size([4, 470])


Predicting:  45%|████▌     | 1087/2400 [27:24<1:22:03,  3.75s/it]

GPU memory after batch 1086: 14.97 GB
Batch 1087 input shapes: input_ids=torch.Size([4, 472]), attention_mask=torch.Size([4, 472])


Predicting:  45%|████▌     | 1088/2400 [27:28<1:21:04,  3.71s/it]

GPU memory after batch 1087: 14.97 GB
Batch 1088 input shapes: input_ids=torch.Size([4, 281]), attention_mask=torch.Size([4, 281])


Predicting:  45%|████▌     | 1089/2400 [27:31<1:16:31,  3.50s/it]

GPU memory after batch 1088: 14.97 GB
Batch 1089 input shapes: input_ids=torch.Size([4, 366]), attention_mask=torch.Size([4, 366])


Predicting:  45%|████▌     | 1090/2400 [27:34<1:15:12,  3.44s/it]

GPU memory after batch 1089: 14.97 GB
Batch 1090 input shapes: input_ids=torch.Size([4, 450]), attention_mask=torch.Size([4, 450])


Predicting:  45%|████▌     | 1091/2400 [27:38<1:16:14,  3.49s/it]

批次 1090: 原始预测: ['Yes', 'Yes', 'Yes', 'Yes']
GPU memory after batch 1090: 14.97 GB
Batch 1091 input shapes: input_ids=torch.Size([4, 339]), attention_mask=torch.Size([4, 339])


Predicting:  46%|████▌     | 1092/2400 [27:41<1:16:23,  3.50s/it]

GPU memory after batch 1091: 14.97 GB
Batch 1092 input shapes: input_ids=torch.Size([4, 460]), attention_mask=torch.Size([4, 460])


Predicting:  46%|████▌     | 1093/2400 [27:45<1:20:07,  3.68s/it]

GPU memory after batch 1092: 14.97 GB
Batch 1093 input shapes: input_ids=torch.Size([4, 387]), attention_mask=torch.Size([4, 387])


Predicting:  46%|████▌     | 1094/2400 [27:49<1:19:31,  3.65s/it]

GPU memory after batch 1093: 14.97 GB
Batch 1094 input shapes: input_ids=torch.Size([4, 377]), attention_mask=torch.Size([4, 377])


Predicting:  46%|████▌     | 1095/2400 [27:53<1:20:41,  3.71s/it]

GPU memory after batch 1094: 14.97 GB
Batch 1095 input shapes: input_ids=torch.Size([4, 355]), attention_mask=torch.Size([4, 355])


Predicting:  46%|████▌     | 1096/2400 [27:56<1:17:41,  3.58s/it]

GPU memory after batch 1095: 14.97 GB
Batch 1096 input shapes: input_ids=torch.Size([4, 292]), attention_mask=torch.Size([4, 292])


Predicting:  46%|████▌     | 1097/2400 [27:59<1:15:08,  3.46s/it]

GPU memory after batch 1096: 14.97 GB
Batch 1097 input shapes: input_ids=torch.Size([4, 268]), attention_mask=torch.Size([4, 268])


Predicting:  46%|████▌     | 1098/2400 [28:02<1:08:40,  3.16s/it]

GPU memory after batch 1097: 14.97 GB
Batch 1098 input shapes: input_ids=torch.Size([4, 356]), attention_mask=torch.Size([4, 356])


Predicting:  46%|████▌     | 1099/2400 [28:05<1:09:14,  3.19s/it]

GPU memory after batch 1098: 14.97 GB
Batch 1099 input shapes: input_ids=torch.Size([4, 385]), attention_mask=torch.Size([4, 385])


Predicting:  46%|████▌     | 1100/2400 [28:08<1:08:43,  3.17s/it]

GPU memory after batch 1099: 14.97 GB
Batch 1100 input shapes: input_ids=torch.Size([4, 276]), attention_mask=torch.Size([4, 276])


Predicting:  46%|████▌     | 1101/2400 [28:11<1:07:24,  3.11s/it]

批次 1100: 原始预测: ['Yes', 'Yes', 'No\nIs "191', 'No\nIs "191']
GPU memory after batch 1100: 14.97 GB
Batch 1101 input shapes: input_ids=torch.Size([4, 433]), attention_mask=torch.Size([4, 433])


Predicting:  46%|████▌     | 1102/2400 [28:15<1:11:35,  3.31s/it]

GPU memory after batch 1101: 14.97 GB
Batch 1102 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  46%|████▌     | 1103/2400 [28:19<1:18:23,  3.63s/it]

GPU memory after batch 1102: 14.97 GB
Batch 1103 input shapes: input_ids=torch.Size([4, 415]), attention_mask=torch.Size([4, 415])


Predicting:  46%|████▌     | 1104/2400 [28:23<1:18:13,  3.62s/it]

GPU memory after batch 1103: 14.97 GB
Batch 1104 input shapes: input_ids=torch.Size([4, 328]), attention_mask=torch.Size([4, 328])


Predicting:  46%|████▌     | 1105/2400 [28:26<1:17:02,  3.57s/it]

GPU memory after batch 1104: 14.97 GB
Batch 1105 input shapes: input_ids=torch.Size([4, 414]), attention_mask=torch.Size([4, 414])


Predicting:  46%|████▌     | 1106/2400 [28:30<1:17:15,  3.58s/it]

GPU memory after batch 1105: 14.97 GB
Batch 1106 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  46%|████▌     | 1107/2400 [28:34<1:22:15,  3.82s/it]

GPU memory after batch 1106: 14.97 GB
Batch 1107 input shapes: input_ids=torch.Size([4, 416]), attention_mask=torch.Size([4, 416])


Predicting:  46%|████▌     | 1108/2400 [28:37<1:17:47,  3.61s/it]

GPU memory after batch 1107: 14.97 GB
Batch 1108 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  46%|████▌     | 1109/2400 [28:42<1:22:35,  3.84s/it]

GPU memory after batch 1108: 14.97 GB
Batch 1109 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  46%|████▋     | 1110/2400 [28:46<1:25:55,  4.00s/it]

GPU memory after batch 1109: 14.97 GB
Batch 1110 input shapes: input_ids=torch.Size([4, 327]), attention_mask=torch.Size([4, 327])


Predicting:  46%|████▋     | 1111/2400 [28:49<1:19:20,  3.69s/it]

批次 1110: 原始预测: ['Yes', 'Yes', 'Yes', 'Yes']
GPU memory after batch 1110: 14.97 GB
Batch 1111 input shapes: input_ids=torch.Size([4, 242]), attention_mask=torch.Size([4, 242])


Predicting:  46%|████▋     | 1112/2400 [28:52<1:12:53,  3.40s/it]

GPU memory after batch 1111: 14.97 GB
Batch 1112 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  46%|████▋     | 1113/2400 [28:56<1:18:45,  3.67s/it]

GPU memory after batch 1112: 14.97 GB
Batch 1113 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  46%|████▋     | 1114/2400 [29:01<1:22:52,  3.87s/it]

GPU memory after batch 1113: 14.97 GB
Batch 1114 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  46%|████▋     | 1115/2400 [29:05<1:26:04,  4.02s/it]

GPU memory after batch 1114: 14.97 GB
Batch 1115 input shapes: input_ids=torch.Size([4, 328]), attention_mask=torch.Size([4, 328])


Predicting:  46%|████▋     | 1116/2400 [29:08<1:19:21,  3.71s/it]

GPU memory after batch 1115: 14.97 GB
Batch 1116 input shapes: input_ids=torch.Size([4, 258]), attention_mask=torch.Size([4, 258])


Predicting:  47%|████▋     | 1117/2400 [29:10<1:11:13,  3.33s/it]

GPU memory after batch 1116: 14.97 GB
Batch 1117 input shapes: input_ids=torch.Size([4, 260]), attention_mask=torch.Size([4, 260])


Predicting:  47%|████▋     | 1118/2400 [29:13<1:05:32,  3.07s/it]

GPU memory after batch 1117: 14.97 GB
Batch 1118 input shapes: input_ids=torch.Size([4, 311]), attention_mask=torch.Size([4, 311])


Predicting:  47%|████▋     | 1119/2400 [29:16<1:03:50,  2.99s/it]

GPU memory after batch 1118: 14.97 GB
Batch 1119 input shapes: input_ids=torch.Size([4, 369]), attention_mask=torch.Size([4, 369])


Predicting:  47%|████▋     | 1120/2400 [29:19<1:05:58,  3.09s/it]

GPU memory after batch 1119: 14.97 GB
Batch 1120 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  47%|████▋     | 1121/2400 [29:23<1:13:45,  3.46s/it]

批次 1120: 原始预测: [', led to a', ', led to a', 'this time from a different', 'this time from a different']
GPU memory after batch 1120: 14.97 GB
Batch 1121 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  47%|████▋     | 1122/2400 [29:28<1:19:10,  3.72s/it]

GPU memory after batch 1121: 14.97 GB
Batch 1122 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  47%|████▋     | 1123/2400 [29:32<1:22:57,  3.90s/it]

GPU memory after batch 1122: 14.97 GB
Batch 1123 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  47%|████▋     | 1124/2400 [29:36<1:25:35,  4.02s/it]

GPU memory after batch 1123: 14.97 GB
Batch 1124 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  47%|████▋     | 1125/2400 [29:41<1:27:23,  4.11s/it]

GPU memory after batch 1124: 14.97 GB
Batch 1125 input shapes: input_ids=torch.Size([4, 293]), attention_mask=torch.Size([4, 293])


Predicting:  47%|████▋     | 1126/2400 [29:43<1:18:22,  3.69s/it]

GPU memory after batch 1125: 14.97 GB
Batch 1126 input shapes: input_ids=torch.Size([4, 358]), attention_mask=torch.Size([4, 358])


Predicting:  47%|████▋     | 1127/2400 [29:46<1:15:34,  3.56s/it]

GPU memory after batch 1126: 14.97 GB
Batch 1127 input shapes: input_ids=torch.Size([4, 494]), attention_mask=torch.Size([4, 494])


Predicting:  47%|████▋     | 1128/2400 [29:50<1:17:25,  3.65s/it]

GPU memory after batch 1127: 14.97 GB
Batch 1128 input shapes: input_ids=torch.Size([4, 378]), attention_mask=torch.Size([4, 378])


Predicting:  47%|████▋     | 1129/2400 [29:54<1:18:34,  3.71s/it]

GPU memory after batch 1128: 14.97 GB
Batch 1129 input shapes: input_ids=torch.Size([4, 294]), attention_mask=torch.Size([4, 294])


Predicting:  47%|████▋     | 1130/2400 [29:57<1:15:12,  3.55s/it]

GPU memory after batch 1129: 14.97 GB
Batch 1130 input shapes: input_ids=torch.Size([4, 370]), attention_mask=torch.Size([4, 370])


Predicting:  47%|████▋     | 1131/2400 [30:01<1:13:45,  3.49s/it]

批次 1130: 原始预测: ['Yes', 'Yes', 'Yes', 'Yes']
GPU memory after batch 1130: 14.97 GB
Batch 1131 input shapes: input_ids=torch.Size([4, 263]), attention_mask=torch.Size([4, 263])


Predicting:  47%|████▋     | 1132/2400 [30:03<1:07:08,  3.18s/it]

GPU memory after batch 1131: 14.97 GB
Batch 1132 input shapes: input_ids=torch.Size([4, 287]), attention_mask=torch.Size([4, 287])


Predicting:  47%|████▋     | 1133/2400 [30:06<1:03:12,  2.99s/it]

GPU memory after batch 1132: 14.97 GB
Batch 1133 input shapes: input_ids=torch.Size([4, 294]), attention_mask=torch.Size([4, 294])


Predicting:  47%|████▋     | 1134/2400 [30:08<1:01:29,  2.91s/it]

GPU memory after batch 1133: 14.97 GB
Batch 1134 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  47%|████▋     | 1135/2400 [30:13<1:10:18,  3.34s/it]

GPU memory after batch 1134: 14.97 GB
Batch 1135 input shapes: input_ids=torch.Size([4, 495]), attention_mask=torch.Size([4, 495])


Predicting:  47%|████▋     | 1136/2400 [30:17<1:13:35,  3.49s/it]

GPU memory after batch 1135: 14.97 GB
Batch 1136 input shapes: input_ids=torch.Size([4, 454]), attention_mask=torch.Size([4, 454])


Predicting:  47%|████▋     | 1137/2400 [30:20<1:14:18,  3.53s/it]

GPU memory after batch 1136: 14.97 GB
Batch 1137 input shapes: input_ids=torch.Size([4, 450]), attention_mask=torch.Size([4, 450])


Predicting:  47%|████▋     | 1138/2400 [30:24<1:14:42,  3.55s/it]

GPU memory after batch 1137: 14.97 GB
Batch 1138 input shapes: input_ids=torch.Size([4, 297]), attention_mask=torch.Size([4, 297])


Predicting:  47%|████▋     | 1139/2400 [30:27<1:09:43,  3.32s/it]

GPU memory after batch 1138: 14.97 GB
Batch 1139 input shapes: input_ids=torch.Size([4, 319]), attention_mask=torch.Size([4, 319])


Predicting:  48%|████▊     | 1140/2400 [30:29<1:06:40,  3.17s/it]

GPU memory after batch 1139: 14.97 GB
Batch 1140 input shapes: input_ids=torch.Size([4, 318]), attention_mask=torch.Size([4, 318])


Predicting:  48%|████▊     | 1141/2400 [30:32<1:04:30,  3.07s/it]

批次 1140: 原始预测: ['Yes', 'Yes', 'Yes', 'Yes']
GPU memory after batch 1140: 14.97 GB
Batch 1141 input shapes: input_ids=torch.Size([4, 338]), attention_mask=torch.Size([4, 338])


Predicting:  48%|████▊     | 1142/2400 [30:35<1:04:22,  3.07s/it]

GPU memory after batch 1141: 14.97 GB
Batch 1142 input shapes: input_ids=torch.Size([4, 427]), attention_mask=torch.Size([4, 427])


Predicting:  48%|████▊     | 1143/2400 [30:39<1:05:40,  3.13s/it]

GPU memory after batch 1142: 14.97 GB
Batch 1143 input shapes: input_ids=torch.Size([4, 431]), attention_mask=torch.Size([4, 431])


Predicting:  48%|████▊     | 1144/2400 [30:42<1:06:34,  3.18s/it]

GPU memory after batch 1143: 14.97 GB
Batch 1144 input shapes: input_ids=torch.Size([4, 322]), attention_mask=torch.Size([4, 322])


Predicting:  48%|████▊     | 1145/2400 [30:45<1:05:16,  3.12s/it]

GPU memory after batch 1144: 14.97 GB
Batch 1145 input shapes: input_ids=torch.Size([4, 334]), attention_mask=torch.Size([4, 334])


Predicting:  48%|████▊     | 1146/2400 [30:48<1:04:32,  3.09s/it]

GPU memory after batch 1145: 14.97 GB
Batch 1146 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  48%|████▊     | 1147/2400 [30:52<1:12:30,  3.47s/it]

GPU memory after batch 1146: 14.97 GB
Batch 1147 input shapes: input_ids=torch.Size([4, 265]), attention_mask=torch.Size([4, 265])


Predicting:  48%|████▊     | 1148/2400 [30:55<1:06:18,  3.18s/it]

GPU memory after batch 1147: 14.97 GB
Batch 1148 input shapes: input_ids=torch.Size([4, 347]), attention_mask=torch.Size([4, 347])


Predicting:  48%|████▊     | 1149/2400 [30:58<1:05:45,  3.15s/it]

GPU memory after batch 1148: 14.97 GB
Batch 1149 input shapes: input_ids=torch.Size([4, 343]), attention_mask=torch.Size([4, 343])


Predicting:  48%|████▊     | 1150/2400 [31:01<1:05:07,  3.13s/it]

GPU memory after batch 1149: 14.97 GB
Batch 1150 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  48%|████▊     | 1151/2400 [31:05<1:12:49,  3.50s/it]

批次 1150: 原始预测: ['No', 'No', ', Carson Palmer led his', ', Carson Palmer led his']
GPU memory after batch 1150: 14.97 GB
Batch 1151 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  48%|████▊     | 1152/2400 [31:10<1:17:53,  3.74s/it]

GPU memory after batch 1151: 14.97 GB
Batch 1152 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  48%|████▊     | 1153/2400 [31:14<1:21:24,  3.92s/it]

GPU memory after batch 1152: 14.97 GB
Batch 1153 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  48%|████▊     | 1154/2400 [31:18<1:24:09,  4.05s/it]

GPU memory after batch 1153: 14.97 GB
Batch 1154 input shapes: input_ids=torch.Size([4, 279]), attention_mask=torch.Size([4, 279])


Predicting:  48%|████▊     | 1155/2400 [31:21<1:14:32,  3.59s/it]

GPU memory after batch 1154: 14.97 GB
Batch 1155 input shapes: input_ids=torch.Size([4, 423]), attention_mask=torch.Size([4, 423])


Predicting:  48%|████▊     | 1156/2400 [31:24<1:12:32,  3.50s/it]

GPU memory after batch 1155: 14.97 GB
Batch 1156 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  48%|████▊     | 1157/2400 [31:28<1:17:33,  3.74s/it]

GPU memory after batch 1156: 14.97 GB
Batch 1157 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  48%|████▊     | 1158/2400 [31:33<1:21:03,  3.92s/it]

GPU memory after batch 1157: 14.97 GB
Batch 1158 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  48%|████▊     | 1159/2400 [31:37<1:23:29,  4.04s/it]

GPU memory after batch 1158: 14.97 GB
Batch 1159 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  48%|████▊     | 1160/2400 [31:41<1:25:09,  4.12s/it]

GPU memory after batch 1159: 14.97 GB
Batch 1160 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  48%|████▊     | 1161/2400 [31:46<1:26:19,  4.18s/it]

批次 1160: 原始预测: ['to 14-10', 'lead to 14-', 'Hail Mary attempt.', 'Hail Mary attempt.']
GPU memory after batch 1160: 14.97 GB
Batch 1161 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  48%|████▊     | 1162/2400 [31:50<1:27:24,  4.24s/it]

GPU memory after batch 1161: 14.97 GB
Batch 1162 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  48%|████▊     | 1163/2400 [31:54<1:28:07,  4.27s/it]

GPU memory after batch 1162: 14.97 GB
Batch 1163 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  48%|████▊     | 1164/2400 [31:59<1:28:38,  4.30s/it]

GPU memory after batch 1163: 14.97 GB
Batch 1164 input shapes: input_ids=torch.Size([4, 350]), attention_mask=torch.Size([4, 350])


Predicting:  49%|████▊     | 1165/2400 [32:02<1:21:07,  3.94s/it]

GPU memory after batch 1164: 14.97 GB
Batch 1165 input shapes: input_ids=torch.Size([4, 460]), attention_mask=torch.Size([4, 460])


Predicting:  49%|████▊     | 1166/2400 [32:06<1:19:08,  3.85s/it]

GPU memory after batch 1165: 14.97 GB
Batch 1166 input shapes: input_ids=torch.Size([4, 457]), attention_mask=torch.Size([4, 457])


Predicting:  49%|████▊     | 1167/2400 [32:09<1:17:45,  3.78s/it]

GPU memory after batch 1166: 14.97 GB
Batch 1167 input shapes: input_ids=torch.Size([4, 485]), attention_mask=torch.Size([4, 485])


Predicting:  49%|████▊     | 1168/2400 [32:13<1:18:09,  3.81s/it]

GPU memory after batch 1167: 14.97 GB
Batch 1168 input shapes: input_ids=torch.Size([4, 489]), attention_mask=torch.Size([4, 489])


Predicting:  49%|████▊     | 1169/2400 [32:17<1:18:25,  3.82s/it]

GPU memory after batch 1168: 14.97 GB
Batch 1169 input shapes: input_ids=torch.Size([4, 270]), attention_mask=torch.Size([4, 270])


Predicting:  49%|████▉     | 1170/2400 [32:19<1:10:12,  3.42s/it]

GPU memory after batch 1169: 14.97 GB
Batch 1170 input shapes: input_ids=torch.Size([4, 324]), attention_mask=torch.Size([4, 324])


Predicting:  49%|████▉     | 1171/2400 [32:22<1:07:26,  3.29s/it]

批次 1170: 原始预测: ['Yes', 'Yes', 'Yes', 'Yes']
GPU memory after batch 1170: 14.97 GB
Batch 1171 input shapes: input_ids=torch.Size([4, 336]), attention_mask=torch.Size([4, 336])


Predicting:  49%|████▉     | 1172/2400 [32:25<1:05:39,  3.21s/it]

GPU memory after batch 1171: 14.97 GB
Batch 1172 input shapes: input_ids=torch.Size([4, 336]), attention_mask=torch.Size([4, 336])


Predicting:  49%|████▉     | 1173/2400 [32:28<1:04:24,  3.15s/it]

GPU memory after batch 1172: 14.97 GB
Batch 1173 input shapes: input_ids=torch.Size([4, 390]), attention_mask=torch.Size([4, 390])


Predicting:  49%|████▉     | 1174/2400 [32:32<1:07:05,  3.28s/it]

GPU memory after batch 1173: 14.97 GB
Batch 1174 input shapes: input_ids=torch.Size([4, 216]), attention_mask=torch.Size([4, 216])


Predicting:  49%|████▉     | 1175/2400 [32:34<59:04,  2.89s/it]  

GPU memory after batch 1174: 14.97 GB
Batch 1175 input shapes: input_ids=torch.Size([4, 366]), attention_mask=torch.Size([4, 366])


Predicting:  49%|████▉     | 1176/2400 [32:37<1:01:34,  3.02s/it]

GPU memory after batch 1175: 14.97 GB
Batch 1176 input shapes: input_ids=torch.Size([4, 374]), attention_mask=torch.Size([4, 374])


Predicting:  49%|████▉     | 1177/2400 [32:41<1:06:19,  3.25s/it]

GPU memory after batch 1176: 14.97 GB
Batch 1177 input shapes: input_ids=torch.Size([4, 383]), attention_mask=torch.Size([4, 383])


Predicting:  49%|████▉     | 1178/2400 [32:44<1:07:00,  3.29s/it]

GPU memory after batch 1177: 14.97 GB
Batch 1178 input shapes: input_ids=torch.Size([4, 292]), attention_mask=torch.Size([4, 292])


Predicting:  49%|████▉     | 1179/2400 [32:47<1:03:32,  3.12s/it]

GPU memory after batch 1178: 14.97 GB
Batch 1179 input shapes: input_ids=torch.Size([4, 274]), attention_mask=torch.Size([4, 274])


Predicting:  49%|████▉     | 1180/2400 [32:50<59:47,  2.94s/it]  

GPU memory after batch 1179: 14.97 GB
Batch 1180 input shapes: input_ids=torch.Size([4, 272]), attention_mask=torch.Size([4, 272])


Predicting:  49%|████▉     | 1181/2400 [32:52<56:55,  2.80s/it]

批次 1180: 原始预测: ['No', 'No', 'No', 'No']
GPU memory after batch 1180: 14.97 GB
Batch 1181 input shapes: input_ids=torch.Size([4, 182]), attention_mask=torch.Size([4, 182])


Predicting:  49%|████▉     | 1182/2400 [32:54<50:19,  2.48s/it]

GPU memory after batch 1181: 14.97 GB
Batch 1182 input shapes: input_ids=torch.Size([4, 495]), attention_mask=torch.Size([4, 495])


Predicting:  49%|████▉     | 1183/2400 [32:58<58:41,  2.89s/it]

GPU memory after batch 1182: 14.97 GB
Batch 1183 input shapes: input_ids=torch.Size([4, 279]), attention_mask=torch.Size([4, 279])


Predicting:  49%|████▉     | 1184/2400 [33:00<56:22,  2.78s/it]

GPU memory after batch 1183: 14.97 GB
Batch 1184 input shapes: input_ids=torch.Size([4, 235]), attention_mask=torch.Size([4, 235])


Predicting:  49%|████▉     | 1185/2400 [33:02<52:52,  2.61s/it]

GPU memory after batch 1184: 14.97 GB
Batch 1185 input shapes: input_ids=torch.Size([4, 406]), attention_mask=torch.Size([4, 406])


Predicting:  49%|████▉     | 1186/2400 [33:06<55:58,  2.77s/it]

GPU memory after batch 1185: 14.97 GB
Batch 1186 input shapes: input_ids=torch.Size([4, 322]), attention_mask=torch.Size([4, 322])


Predicting:  49%|████▉     | 1187/2400 [33:09<57:14,  2.83s/it]

GPU memory after batch 1186: 14.97 GB
Batch 1187 input shapes: input_ids=torch.Size([4, 365]), attention_mask=torch.Size([4, 365])


Predicting:  50%|████▉     | 1188/2400 [33:12<1:00:05,  2.97s/it]

GPU memory after batch 1187: 14.97 GB
Batch 1188 input shapes: input_ids=torch.Size([4, 329]), attention_mask=torch.Size([4, 329])


Predicting:  50%|████▉     | 1189/2400 [33:15<1:00:15,  2.99s/it]

GPU memory after batch 1188: 14.97 GB
Batch 1189 input shapes: input_ids=torch.Size([4, 388]), attention_mask=torch.Size([4, 388])


Predicting:  50%|████▉     | 1190/2400 [33:18<1:01:01,  3.03s/it]

GPU memory after batch 1189: 14.97 GB
Batch 1190 input shapes: input_ids=torch.Size([4, 344]), attention_mask=torch.Size([4, 344])


Predicting:  50%|████▉     | 1191/2400 [33:21<1:01:12,  3.04s/it]

批次 1190: 原始预测: ['Yes', 'Yes', 'Yes', 'Yes']
GPU memory after batch 1190: 14.97 GB
Batch 1191 input shapes: input_ids=torch.Size([4, 344]), attention_mask=torch.Size([4, 344])


Predicting:  50%|████▉     | 1192/2400 [33:24<1:01:18,  3.04s/it]

GPU memory after batch 1191: 14.97 GB
Batch 1192 input shapes: input_ids=torch.Size([4, 311]), attention_mask=torch.Size([4, 311])


Predicting:  50%|████▉     | 1193/2400 [33:27<59:48,  2.97s/it]  

GPU memory after batch 1192: 14.97 GB
Batch 1193 input shapes: input_ids=torch.Size([4, 497]), attention_mask=torch.Size([4, 497])


Predicting:  50%|████▉     | 1194/2400 [33:31<1:05:08,  3.24s/it]

GPU memory after batch 1193: 14.97 GB
Batch 1194 input shapes: input_ids=torch.Size([4, 281]), attention_mask=torch.Size([4, 281])


Predicting:  50%|████▉     | 1195/2400 [33:33<1:01:00,  3.04s/it]

GPU memory after batch 1194: 14.97 GB
Batch 1195 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  50%|████▉     | 1196/2400 [33:38<1:08:56,  3.44s/it]

GPU memory after batch 1195: 14.97 GB
Batch 1196 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  50%|████▉     | 1197/2400 [33:42<1:14:10,  3.70s/it]

GPU memory after batch 1196: 14.97 GB
Batch 1197 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  50%|████▉     | 1198/2400 [33:46<1:18:07,  3.90s/it]

GPU memory after batch 1197: 14.97 GB
Batch 1198 input shapes: input_ids=torch.Size([4, 487]), attention_mask=torch.Size([4, 487])


Predicting:  50%|████▉     | 1199/2400 [33:50<1:17:48,  3.89s/it]

GPU memory after batch 1198: 14.97 GB
Batch 1199 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  50%|█████     | 1200/2400 [33:55<1:20:18,  4.02s/it]

GPU memory after batch 1199: 14.97 GB
Batch 1200 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  50%|█████     | 1201/2400 [33:59<1:22:02,  4.11s/it]

批次 1200: 原始预测: ['61 yards in', 'Colts offense struggled mightily', 'second touchdown pass of the', 'second touchdown pass of the']
GPU memory after batch 1200: 14.97 GB
Batch 1201 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  50%|█████     | 1202/2400 [34:03<1:23:13,  4.17s/it]

GPU memory after batch 1201: 14.97 GB
Batch 1202 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  50%|█████     | 1203/2400 [34:08<1:24:02,  4.21s/it]

GPU memory after batch 1202: 14.97 GB
Batch 1203 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  50%|█████     | 1204/2400 [34:12<1:24:35,  4.24s/it]

GPU memory after batch 1203: 14.97 GB
Batch 1204 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  50%|█████     | 1205/2400 [34:16<1:25:14,  4.28s/it]

GPU memory after batch 1204: 14.97 GB
Batch 1205 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  50%|█████     | 1206/2400 [34:21<1:25:40,  4.31s/it]

GPU memory after batch 1205: 14.97 GB
Batch 1206 input shapes: input_ids=torch.Size([4, 274]), attention_mask=torch.Size([4, 274])


Predicting:  50%|█████     | 1207/2400 [34:23<1:14:56,  3.77s/it]

GPU memory after batch 1206: 14.97 GB
Batch 1207 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  50%|█████     | 1208/2400 [34:28<1:18:26,  3.95s/it]

GPU memory after batch 1207: 14.97 GB
Batch 1208 input shapes: input_ids=torch.Size([4, 482]), attention_mask=torch.Size([4, 482])


Predicting:  50%|█████     | 1209/2400 [34:31<1:17:49,  3.92s/it]

GPU memory after batch 1208: 14.97 GB
Batch 1209 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  50%|█████     | 1210/2400 [34:36<1:20:24,  4.05s/it]

GPU memory after batch 1209: 14.97 GB
Batch 1210 input shapes: input_ids=torch.Size([4, 495]), attention_mask=torch.Size([4, 495])


Predicting:  50%|█████     | 1211/2400 [34:40<1:19:11,  4.00s/it]

批次 1210: 原始预测: ['Yes', 'Yes', 'Yes', 'Yes']
GPU memory after batch 1210: 14.97 GB
Batch 1211 input shapes: input_ids=torch.Size([4, 252]), attention_mask=torch.Size([4, 252])


Predicting:  50%|█████     | 1212/2400 [34:42<1:09:03,  3.49s/it]

GPU memory after batch 1211: 14.97 GB
Batch 1212 input shapes: input_ids=torch.Size([4, 292]), attention_mask=torch.Size([4, 292])


Predicting:  51%|█████     | 1213/2400 [34:45<1:06:19,  3.35s/it]

GPU memory after batch 1212: 14.97 GB
Batch 1213 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  51%|█████     | 1214/2400 [34:49<1:12:16,  3.66s/it]

GPU memory after batch 1213: 14.97 GB
Batch 1214 input shapes: input_ids=torch.Size([4, 199]), attention_mask=torch.Size([4, 199])


Predicting:  51%|█████     | 1215/2400 [34:51<1:01:52,  3.13s/it]

GPU memory after batch 1214: 14.97 GB
Batch 1215 input shapes: input_ids=torch.Size([4, 385]), attention_mask=torch.Size([4, 385])


Predicting:  51%|█████     | 1216/2400 [34:54<1:01:45,  3.13s/it]

GPU memory after batch 1215: 14.97 GB
Batch 1216 input shapes: input_ids=torch.Size([4, 274]), attention_mask=torch.Size([4, 274])


Predicting:  51%|█████     | 1217/2400 [34:57<58:04,  2.95s/it]  

GPU memory after batch 1216: 14.97 GB
Batch 1217 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  51%|█████     | 1218/2400 [35:01<1:06:25,  3.37s/it]

GPU memory after batch 1217: 14.97 GB
Batch 1218 input shapes: input_ids=torch.Size([4, 346]), attention_mask=torch.Size([4, 346])


Predicting:  51%|█████     | 1219/2400 [35:04<1:04:36,  3.28s/it]

GPU memory after batch 1218: 14.97 GB
Batch 1219 input shapes: input_ids=torch.Size([4, 289]), attention_mask=torch.Size([4, 289])


Predicting:  51%|█████     | 1220/2400 [35:07<1:01:16,  3.12s/it]

GPU memory after batch 1219: 14.97 GB
Batch 1220 input shapes: input_ids=torch.Size([4, 317]), attention_mask=torch.Size([4, 317])


Predicting:  51%|█████     | 1221/2400 [35:10<59:36,  3.03s/it]  

批次 1220: 原始预测: ['No', 'No', 'Yes', 'No']
GPU memory after batch 1220: 14.97 GB
Batch 1221 input shapes: input_ids=torch.Size([4, 382]), attention_mask=torch.Size([4, 382])


Predicting:  51%|█████     | 1222/2400 [35:13<1:01:33,  3.14s/it]

GPU memory after batch 1221: 14.97 GB
Batch 1222 input shapes: input_ids=torch.Size([4, 478]), attention_mask=torch.Size([4, 478])


Predicting:  51%|█████     | 1223/2400 [35:17<1:04:28,  3.29s/it]

GPU memory after batch 1222: 14.97 GB
Batch 1223 input shapes: input_ids=torch.Size([4, 418]), attention_mask=torch.Size([4, 418])


Predicting:  51%|█████     | 1224/2400 [35:20<1:04:21,  3.28s/it]

GPU memory after batch 1223: 14.97 GB
Batch 1224 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  51%|█████     | 1225/2400 [35:25<1:10:39,  3.61s/it]

GPU memory after batch 1224: 14.97 GB
Batch 1225 input shapes: input_ids=torch.Size([4, 340]), attention_mask=torch.Size([4, 340])


Predicting:  51%|█████     | 1226/2400 [35:28<1:07:23,  3.44s/it]

GPU memory after batch 1225: 14.97 GB
Batch 1226 input shapes: input_ids=torch.Size([4, 320]), attention_mask=torch.Size([4, 320])


Predicting:  51%|█████     | 1227/2400 [35:30<1:03:47,  3.26s/it]

GPU memory after batch 1226: 14.97 GB
Batch 1227 input shapes: input_ids=torch.Size([4, 256]), attention_mask=torch.Size([4, 256])


Predicting:  51%|█████     | 1228/2400 [35:33<58:07,  2.98s/it]  

GPU memory after batch 1227: 14.97 GB
Batch 1228 input shapes: input_ids=torch.Size([4, 404]), attention_mask=torch.Size([4, 404])


Predicting:  51%|█████     | 1229/2400 [35:36<58:58,  3.02s/it]

GPU memory after batch 1228: 14.97 GB
Batch 1229 input shapes: input_ids=torch.Size([4, 459]), attention_mask=torch.Size([4, 459])


Predicting:  51%|█████▏    | 1230/2400 [35:39<1:02:31,  3.21s/it]

GPU memory after batch 1229: 14.97 GB
Batch 1230 input shapes: input_ids=torch.Size([4, 200]), attention_mask=torch.Size([4, 200])


Predicting:  51%|█████▏    | 1231/2400 [35:41<54:54,  2.82s/it]  

批次 1230: 原始预测: ['No', 'No', 'No', 'No']
GPU memory after batch 1230: 14.97 GB
Batch 1231 input shapes: input_ids=torch.Size([4, 369]), attention_mask=torch.Size([4, 369])


Predicting:  51%|█████▏    | 1232/2400 [35:45<57:51,  2.97s/it]

GPU memory after batch 1231: 14.97 GB
Batch 1232 input shapes: input_ids=torch.Size([4, 197]), attention_mask=torch.Size([4, 197])


Predicting:  51%|█████▏    | 1233/2400 [35:47<51:37,  2.65s/it]

GPU memory after batch 1232: 14.97 GB
Batch 1233 input shapes: input_ids=torch.Size([4, 368]), attention_mask=torch.Size([4, 368])


Predicting:  51%|█████▏    | 1234/2400 [35:50<55:24,  2.85s/it]

GPU memory after batch 1233: 14.97 GB
Batch 1234 input shapes: input_ids=torch.Size([4, 368]), attention_mask=torch.Size([4, 368])


Predicting:  51%|█████▏    | 1235/2400 [35:53<57:53,  2.98s/it]

GPU memory after batch 1234: 14.97 GB
Batch 1235 input shapes: input_ids=torch.Size([4, 268]), attention_mask=torch.Size([4, 268])


Predicting:  52%|█████▏    | 1236/2400 [35:56<55:00,  2.84s/it]

GPU memory after batch 1235: 14.97 GB
Batch 1236 input shapes: input_ids=torch.Size([4, 378]), attention_mask=torch.Size([4, 378])


Predicting:  52%|█████▏    | 1237/2400 [35:59<58:04,  3.00s/it]

GPU memory after batch 1236: 14.97 GB
Batch 1237 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  52%|█████▏    | 1238/2400 [36:03<1:05:58,  3.41s/it]

GPU memory after batch 1237: 14.97 GB
Batch 1238 input shapes: input_ids=torch.Size([4, 458]), attention_mask=torch.Size([4, 458])


Predicting:  52%|█████▏    | 1239/2400 [36:07<1:07:11,  3.47s/it]

GPU memory after batch 1238: 14.97 GB
Batch 1239 input shapes: input_ids=torch.Size([4, 276]), attention_mask=torch.Size([4, 276])


Predicting:  52%|█████▏    | 1240/2400 [36:10<1:01:35,  3.19s/it]

GPU memory after batch 1239: 14.97 GB
Batch 1240 input shapes: input_ids=torch.Size([4, 339]), attention_mask=torch.Size([4, 339])


Predicting:  52%|█████▏    | 1241/2400 [36:13<1:00:49,  3.15s/it]

批次 1240: 原始预测: ['Yes', 'Yes', 'No', 'No']
GPU memory after batch 1240: 14.97 GB
Batch 1241 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  52%|█████▏    | 1242/2400 [36:17<1:07:48,  3.51s/it]

GPU memory after batch 1241: 14.97 GB
Batch 1242 input shapes: input_ids=torch.Size([4, 346]), attention_mask=torch.Size([4, 346])


Predicting:  52%|█████▏    | 1243/2400 [36:20<1:05:20,  3.39s/it]

GPU memory after batch 1242: 14.97 GB
Batch 1243 input shapes: input_ids=torch.Size([4, 293]), attention_mask=torch.Size([4, 293])


Predicting:  52%|█████▏    | 1244/2400 [36:23<1:01:29,  3.19s/it]

GPU memory after batch 1243: 14.97 GB
Batch 1244 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  52%|█████▏    | 1245/2400 [36:27<1:08:12,  3.54s/it]

GPU memory after batch 1244: 14.97 GB
Batch 1245 input shapes: input_ids=torch.Size([4, 343]), attention_mask=torch.Size([4, 343])


Predicting:  52%|█████▏    | 1246/2400 [36:30<1:05:22,  3.40s/it]

GPU memory after batch 1245: 14.97 GB
Batch 1246 input shapes: input_ids=torch.Size([4, 352]), attention_mask=torch.Size([4, 352])


Predicting:  52%|█████▏    | 1247/2400 [36:33<1:03:27,  3.30s/it]

GPU memory after batch 1246: 14.97 GB
Batch 1247 input shapes: input_ids=torch.Size([4, 449]), attention_mask=torch.Size([4, 449])


Predicting:  52%|█████▏    | 1248/2400 [36:37<1:08:00,  3.54s/it]

GPU memory after batch 1247: 14.97 GB
Batch 1248 input shapes: input_ids=torch.Size([4, 427]), attention_mask=torch.Size([4, 427])


Predicting:  52%|█████▏    | 1249/2400 [36:41<1:09:11,  3.61s/it]

GPU memory after batch 1248: 14.97 GB
Batch 1249 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  52%|█████▏    | 1250/2400 [36:46<1:13:12,  3.82s/it]

GPU memory after batch 1249: 14.97 GB
Batch 1250 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  52%|█████▏    | 1251/2400 [36:50<1:15:59,  3.97s/it]

批次 1250: 原始预测: ['at the Bills 25', 'fumble at the Bills', 'The Patriots answered with a', 'The Patriots answered with a']
GPU memory after batch 1250: 14.97 GB
Batch 1251 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  52%|█████▏    | 1252/2400 [36:54<1:17:55,  4.07s/it]

GPU memory after batch 1251: 14.97 GB
Batch 1252 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  52%|█████▏    | 1253/2400 [36:58<1:19:14,  4.15s/it]

GPU memory after batch 1252: 14.97 GB
Batch 1253 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  52%|█████▏    | 1254/2400 [37:03<1:20:08,  4.20s/it]

GPU memory after batch 1253: 14.97 GB
Batch 1254 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  52%|█████▏    | 1255/2400 [37:07<1:21:02,  4.25s/it]

GPU memory after batch 1254: 14.97 GB
Batch 1255 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  52%|█████▏    | 1256/2400 [37:11<1:21:21,  4.27s/it]

GPU memory after batch 1255: 14.97 GB
Batch 1256 input shapes: input_ids=torch.Size([4, 183]), attention_mask=torch.Size([4, 183])


Predicting:  52%|█████▏    | 1257/2400 [37:14<1:09:19,  3.64s/it]

GPU memory after batch 1256: 14.97 GB
Batch 1257 input shapes: input_ids=torch.Size([4, 381]), attention_mask=torch.Size([4, 381])


Predicting:  52%|█████▏    | 1258/2400 [37:17<1:07:45,  3.56s/it]

GPU memory after batch 1257: 14.97 GB
Batch 1258 input shapes: input_ids=torch.Size([4, 374]), attention_mask=torch.Size([4, 374])


Predicting:  52%|█████▏    | 1259/2400 [37:20<1:06:24,  3.49s/it]

GPU memory after batch 1258: 14.97 GB
Batch 1259 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  52%|█████▎    | 1260/2400 [37:25<1:11:19,  3.75s/it]

GPU memory after batch 1259: 14.97 GB
Batch 1260 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  53%|█████▎    | 1261/2400 [37:29<1:14:45,  3.94s/it]

批次 1260: 原始预测: ['the first time in', '1-1."\nOkay', 'Yes', 'Yes']
GPU memory after batch 1260: 14.97 GB
Batch 1261 input shapes: input_ids=torch.Size([4, 353]), attention_mask=torch.Size([4, 353])


Predicting:  53%|█████▎    | 1262/2400 [37:32<1:10:49,  3.73s/it]

GPU memory after batch 1261: 14.97 GB
Batch 1262 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  53%|█████▎    | 1263/2400 [37:37<1:14:04,  3.91s/it]

GPU memory after batch 1262: 14.97 GB
Batch 1263 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  53%|█████▎    | 1264/2400 [37:41<1:16:36,  4.05s/it]

GPU memory after batch 1263: 14.97 GB
Batch 1264 input shapes: input_ids=torch.Size([4, 510]), attention_mask=torch.Size([4, 510])


Predicting:  53%|█████▎    | 1265/2400 [37:45<1:15:34,  4.00s/it]

GPU memory after batch 1264: 14.97 GB
Batch 1265 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  53%|█████▎    | 1266/2400 [37:49<1:17:19,  4.09s/it]

GPU memory after batch 1265: 14.97 GB
Batch 1266 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  53%|█████▎    | 1267/2400 [37:54<1:18:48,  4.17s/it]

GPU memory after batch 1266: 14.97 GB
Batch 1267 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  53%|█████▎    | 1268/2400 [37:58<1:19:32,  4.22s/it]

GPU memory after batch 1267: 14.97 GB
Batch 1268 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  53%|█████▎    | 1269/2400 [38:02<1:20:02,  4.25s/it]

GPU memory after batch 1268: 14.97 GB
Batch 1269 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  53%|█████▎    | 1270/2400 [38:07<1:20:21,  4.27s/it]

GPU memory after batch 1269: 14.97 GB
Batch 1270 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  53%|█████▎    | 1271/2400 [38:11<1:20:33,  4.28s/it]

批次 1270: 原始预测: ['a 2-yard run', 'a 2-yard run', '2-yard run.', "Marshall's 2-yard"]
GPU memory after batch 1270: 14.97 GB
Batch 1271 input shapes: input_ids=torch.Size([4, 359]), attention_mask=torch.Size([4, 359])


Predicting:  53%|█████▎    | 1272/2400 [38:14<1:14:43,  3.97s/it]

GPU memory after batch 1271: 14.97 GB
Batch 1272 input shapes: input_ids=torch.Size([4, 400]), attention_mask=torch.Size([4, 400])


Predicting:  53%|█████▎    | 1273/2400 [38:18<1:12:33,  3.86s/it]

GPU memory after batch 1272: 14.97 GB
Batch 1273 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  53%|█████▎    | 1274/2400 [38:22<1:15:18,  4.01s/it]

GPU memory after batch 1273: 14.97 GB
Batch 1274 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  53%|█████▎    | 1275/2400 [38:26<1:16:57,  4.10s/it]

GPU memory after batch 1274: 14.97 GB
Batch 1275 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  53%|█████▎    | 1276/2400 [38:31<1:18:21,  4.18s/it]

GPU memory after batch 1275: 14.97 GB
Batch 1276 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  53%|█████▎    | 1277/2400 [38:35<1:19:18,  4.24s/it]

GPU memory after batch 1276: 14.97 GB
Batch 1277 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  53%|█████▎    | 1278/2400 [38:39<1:19:57,  4.28s/it]

GPU memory after batch 1277: 14.97 GB
Batch 1278 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  53%|█████▎    | 1279/2400 [38:44<1:20:23,  4.30s/it]

GPU memory after batch 1278: 14.97 GB
Batch 1279 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  53%|█████▎    | 1280/2400 [38:48<1:20:23,  4.31s/it]

GPU memory after batch 1279: 14.97 GB
Batch 1280 input shapes: input_ids=torch.Size([4, 398]), attention_mask=torch.Size([4, 398])


Predicting:  53%|█████▎    | 1281/2400 [38:51<1:13:42,  3.95s/it]

批次 1280: 原始预测: ['Yes', 'Yes', 'Yes', 'Yes']
GPU memory after batch 1280: 14.97 GB
Batch 1281 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  53%|█████▎    | 1282/2400 [38:56<1:15:57,  4.08s/it]

GPU memory after batch 1281: 14.97 GB
Batch 1282 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  53%|█████▎    | 1283/2400 [39:00<1:17:13,  4.15s/it]

GPU memory after batch 1282: 14.97 GB
Batch 1283 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  54%|█████▎    | 1284/2400 [39:04<1:18:22,  4.21s/it]

GPU memory after batch 1283: 14.97 GB
Batch 1284 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  54%|█████▎    | 1285/2400 [39:09<1:18:52,  4.24s/it]

GPU memory after batch 1284: 14.97 GB
Batch 1285 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  54%|█████▎    | 1286/2400 [39:13<1:19:12,  4.27s/it]

GPU memory after batch 1285: 14.97 GB
Batch 1286 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  54%|█████▎    | 1287/2400 [39:17<1:19:24,  4.28s/it]

GPU memory after batch 1286: 14.97 GB
Batch 1287 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  54%|█████▎    | 1288/2400 [39:22<1:19:32,  4.29s/it]

GPU memory after batch 1287: 14.97 GB
Batch 1288 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  54%|█████▎    | 1289/2400 [39:26<1:19:36,  4.30s/it]

GPU memory after batch 1288: 14.97 GB
Batch 1289 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  54%|█████▍    | 1290/2400 [39:30<1:19:38,  4.30s/it]

GPU memory after batch 1289: 14.97 GB
Batch 1290 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  54%|█████▍    | 1291/2400 [39:35<1:19:36,  4.31s/it]

批次 1290: 原始预测: ['turnover battle 3-', '53. The Eagles had', 'turnover battle 3-', '53. The Eagles had']
GPU memory after batch 1290: 14.97 GB
Batch 1291 input shapes: input_ids=torch.Size([4, 468]), attention_mask=torch.Size([4, 468])


Predicting:  54%|█████▍    | 1292/2400 [39:38<1:15:45,  4.10s/it]

GPU memory after batch 1291: 14.97 GB
Batch 1292 input shapes: input_ids=torch.Size([4, 440]), attention_mask=torch.Size([4, 440])


Predicting:  54%|█████▍    | 1293/2400 [39:41<1:11:11,  3.86s/it]

GPU memory after batch 1292: 14.97 GB
Batch 1293 input shapes: input_ids=torch.Size([4, 456]), attention_mask=torch.Size([4, 456])


Predicting:  54%|█████▍    | 1294/2400 [39:45<1:09:44,  3.78s/it]

GPU memory after batch 1293: 14.97 GB
Batch 1294 input shapes: input_ids=torch.Size([4, 458]), attention_mask=torch.Size([4, 458])


Predicting:  54%|█████▍    | 1295/2400 [39:49<1:08:42,  3.73s/it]

GPU memory after batch 1294: 14.97 GB
Batch 1295 input shapes: input_ids=torch.Size([4, 413]), attention_mask=torch.Size([4, 413])


Predicting:  54%|█████▍    | 1296/2400 [39:52<1:05:21,  3.55s/it]

GPU memory after batch 1295: 14.97 GB
Batch 1296 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  54%|█████▍    | 1297/2400 [39:56<1:09:47,  3.80s/it]

GPU memory after batch 1296: 14.97 GB
Batch 1297 input shapes: input_ids=torch.Size([4, 500]), attention_mask=torch.Size([4, 500])


Predicting:  54%|█████▍    | 1298/2400 [40:00<1:11:53,  3.91s/it]

GPU memory after batch 1297: 14.97 GB
Batch 1298 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  54%|█████▍    | 1299/2400 [40:05<1:14:04,  4.04s/it]

GPU memory after batch 1298: 14.97 GB
Batch 1299 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  54%|█████▍    | 1300/2400 [40:09<1:15:32,  4.12s/it]

GPU memory after batch 1299: 14.97 GB
Batch 1300 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  54%|█████▍    | 1301/2400 [40:13<1:16:49,  4.19s/it]

批次 1300: 原始预测: ['Yes', 'Yes', 'kicked a 41-yard', 'kicked a 41-yard']
GPU memory after batch 1300: 14.97 GB
Batch 1301 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  54%|█████▍    | 1302/2400 [40:18<1:17:24,  4.23s/it]

GPU memory after batch 1301: 14.97 GB
Batch 1302 input shapes: input_ids=torch.Size([4, 507]), attention_mask=torch.Size([4, 507])


Predicting:  54%|█████▍    | 1303/2400 [40:22<1:15:23,  4.12s/it]

GPU memory after batch 1302: 14.97 GB
Batch 1303 input shapes: input_ids=torch.Size([4, 399]), attention_mask=torch.Size([4, 399])


Predicting:  54%|█████▍    | 1304/2400 [40:25<1:09:51,  3.82s/it]

GPU memory after batch 1303: 14.97 GB
Batch 1304 input shapes: input_ids=torch.Size([4, 399]), attention_mask=torch.Size([4, 399])


Predicting:  54%|█████▍    | 1305/2400 [40:28<1:05:58,  3.61s/it]

GPU memory after batch 1304: 14.97 GB
Batch 1305 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  54%|█████▍    | 1306/2400 [40:32<1:10:01,  3.84s/it]

GPU memory after batch 1305: 14.97 GB
Batch 1306 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  54%|█████▍    | 1307/2400 [40:37<1:12:33,  3.98s/it]

GPU memory after batch 1306: 14.97 GB
Batch 1307 input shapes: input_ids=torch.Size([4, 494]), attention_mask=torch.Size([4, 494])


Predicting:  55%|█████▍    | 1308/2400 [40:40<1:11:49,  3.95s/it]

GPU memory after batch 1307: 14.97 GB
Batch 1308 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  55%|█████▍    | 1309/2400 [40:45<1:14:03,  4.07s/it]

GPU memory after batch 1308: 14.97 GB
Batch 1309 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  55%|█████▍    | 1310/2400 [40:49<1:15:19,  4.15s/it]

GPU memory after batch 1309: 14.97 GB
Batch 1310 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  55%|█████▍    | 1311/2400 [40:53<1:16:10,  4.20s/it]

批次 1310: 原始预测: ['career three touchdown game).', 'tying his career high', 'had 7-3', 'but the Broncos came out']
GPU memory after batch 1310: 14.97 GB
Batch 1311 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  55%|█████▍    | 1312/2400 [40:58<1:16:45,  4.23s/it]

GPU memory after batch 1311: 14.97 GB
Batch 1312 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  55%|█████▍    | 1313/2400 [41:02<1:17:07,  4.26s/it]

GPU memory after batch 1312: 14.97 GB
Batch 1313 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  55%|█████▍    | 1314/2400 [41:06<1:17:38,  4.29s/it]

GPU memory after batch 1313: 14.97 GB
Batch 1314 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  55%|█████▍    | 1315/2400 [41:11<1:17:43,  4.30s/it]

GPU memory after batch 1314: 14.97 GB
Batch 1315 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  55%|█████▍    | 1316/2400 [41:15<1:17:44,  4.30s/it]

GPU memory after batch 1315: 14.97 GB
Batch 1316 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  55%|█████▍    | 1317/2400 [41:19<1:18:00,  4.32s/it]

GPU memory after batch 1316: 14.97 GB
Batch 1317 input shapes: input_ids=torch.Size([4, 343]), attention_mask=torch.Size([4, 343])


Predicting:  55%|█████▍    | 1318/2400 [41:22<1:11:07,  3.94s/it]

GPU memory after batch 1317: 14.97 GB
Batch 1318 input shapes: input_ids=torch.Size([4, 445]), attention_mask=torch.Size([4, 445])


Predicting:  55%|█████▍    | 1319/2400 [41:26<1:07:31,  3.75s/it]

GPU memory after batch 1318: 14.97 GB
Batch 1319 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  55%|█████▌    | 1320/2400 [41:30<1:10:48,  3.93s/it]

GPU memory after batch 1319: 14.97 GB
Batch 1320 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  55%|█████▌    | 1321/2400 [41:34<1:12:48,  4.05s/it]

批次 1320: 原始预测: ['2012. The', '2012. The', ', once on a', 'the game, once on']
GPU memory after batch 1320: 14.97 GB
Batch 1321 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  55%|█████▌    | 1322/2400 [41:39<1:14:10,  4.13s/it]

GPU memory after batch 1321: 14.97 GB
Batch 1322 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  55%|█████▌    | 1323/2400 [41:43<1:15:22,  4.20s/it]

GPU memory after batch 1322: 14.97 GB
Batch 1323 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  55%|█████▌    | 1324/2400 [41:47<1:16:12,  4.25s/it]

GPU memory after batch 1323: 14.97 GB
Batch 1324 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  55%|█████▌    | 1325/2400 [41:52<1:16:29,  4.27s/it]

GPU memory after batch 1324: 14.97 GB
Batch 1325 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  55%|█████▌    | 1326/2400 [41:56<1:16:57,  4.30s/it]

GPU memory after batch 1325: 14.97 GB
Batch 1326 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  55%|█████▌    | 1327/2400 [42:01<1:17:15,  4.32s/it]

GPU memory after batch 1326: 14.97 GB
Batch 1327 input shapes: input_ids=torch.Size([4, 510]), attention_mask=torch.Size([4, 510])


Predicting:  55%|█████▌    | 1328/2400 [42:04<1:14:48,  4.19s/it]

GPU memory after batch 1327: 14.97 GB
Batch 1328 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  55%|█████▌    | 1329/2400 [42:09<1:15:41,  4.24s/it]

GPU memory after batch 1328: 14.97 GB
Batch 1329 input shapes: input_ids=torch.Size([4, 447]), attention_mask=torch.Size([4, 447])


Predicting:  55%|█████▌    | 1330/2400 [42:12<1:10:33,  3.96s/it]

GPU memory after batch 1329: 14.97 GB
Batch 1330 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  55%|█████▌    | 1331/2400 [42:16<1:12:24,  4.06s/it]

批次 1330: 原始预测: ['On the next play,', '8-yard loss by', 'On the next play,', '8-yard loss by']
GPU memory after batch 1330: 14.97 GB
Batch 1331 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  56%|█████▌    | 1332/2400 [42:21<1:13:57,  4.15s/it]

GPU memory after batch 1331: 14.97 GB
Batch 1332 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  56%|█████▌    | 1333/2400 [42:25<1:15:00,  4.22s/it]

GPU memory after batch 1332: 14.97 GB
Batch 1333 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  56%|█████▌    | 1334/2400 [42:29<1:15:43,  4.26s/it]

GPU memory after batch 1333: 14.97 GB
Batch 1334 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  56%|█████▌    | 1335/2400 [42:34<1:15:56,  4.28s/it]

GPU memory after batch 1334: 14.97 GB
Batch 1335 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  56%|█████▌    | 1336/2400 [42:38<1:16:04,  4.29s/it]

GPU memory after batch 1335: 14.97 GB
Batch 1336 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  56%|█████▌    | 1337/2400 [42:42<1:16:08,  4.30s/it]

GPU memory after batch 1336: 14.97 GB
Batch 1337 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  56%|█████▌    | 1338/2400 [42:47<1:16:26,  4.32s/it]

GPU memory after batch 1337: 14.97 GB
Batch 1338 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  56%|█████▌    | 1339/2400 [42:51<1:16:37,  4.33s/it]

GPU memory after batch 1338: 14.97 GB
Batch 1339 input shapes: input_ids=torch.Size([4, 296]), attention_mask=torch.Size([4, 296])


Predicting:  56%|█████▌    | 1340/2400 [42:54<1:08:04,  3.85s/it]

GPU memory after batch 1339: 14.97 GB
Batch 1340 input shapes: input_ids=torch.Size([4, 411]), attention_mask=torch.Size([4, 411])


Predicting:  56%|█████▌    | 1341/2400 [42:57<1:04:11,  3.64s/it]

批次 1340: 原始预测: ['Yes', 'Yes', 'Yes', 'Yes']
GPU memory after batch 1340: 14.97 GB
Batch 1341 input shapes: input_ids=torch.Size([4, 411]), attention_mask=torch.Size([4, 411])


Predicting:  56%|█████▌    | 1342/2400 [43:00<1:01:27,  3.49s/it]

GPU memory after batch 1341: 14.97 GB
Batch 1342 input shapes: input_ids=torch.Size([4, 212]), attention_mask=torch.Size([4, 212])


Predicting:  56%|█████▌    | 1343/2400 [43:02<53:27,  3.03s/it]  

GPU memory after batch 1342: 14.97 GB
Batch 1343 input shapes: input_ids=torch.Size([4, 208]), attention_mask=torch.Size([4, 208])


Predicting:  56%|█████▌    | 1344/2400 [43:04<47:37,  2.71s/it]

GPU memory after batch 1343: 14.97 GB
Batch 1344 input shapes: input_ids=torch.Size([4, 352]), attention_mask=torch.Size([4, 352])


Predicting:  56%|█████▌    | 1345/2400 [43:08<52:05,  2.96s/it]

GPU memory after batch 1344: 14.97 GB
Batch 1345 input shapes: input_ids=torch.Size([4, 157]), attention_mask=torch.Size([4, 157])


Predicting:  56%|█████▌    | 1346/2400 [43:09<44:17,  2.52s/it]

GPU memory after batch 1345: 14.97 GB
Batch 1346 input shapes: input_ids=torch.Size([4, 155]), attention_mask=torch.Size([4, 155])


Predicting:  56%|█████▌    | 1347/2400 [43:11<38:49,  2.21s/it]

GPU memory after batch 1346: 14.97 GB
Batch 1347 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  56%|█████▌    | 1348/2400 [43:15<49:50,  2.84s/it]

GPU memory after batch 1347: 14.97 GB
Batch 1348 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  56%|█████▌    | 1349/2400 [43:19<57:32,  3.29s/it]

GPU memory after batch 1348: 14.97 GB
Batch 1349 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  56%|█████▋    | 1350/2400 [43:24<1:02:54,  3.59s/it]

GPU memory after batch 1349: 14.97 GB
Batch 1350 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  56%|█████▋    | 1351/2400 [43:28<1:06:37,  3.81s/it]

批次 1350: 原始预测: ['-yard run. The Bears', '-yard run. The Bears', '16-yard touchdown pass to', "Vikings' kicker Walsh attempted"]
GPU memory after batch 1350: 14.97 GB
Batch 1351 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  56%|█████▋    | 1352/2400 [43:32<1:09:12,  3.96s/it]

GPU memory after batch 1351: 14.97 GB
Batch 1352 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  56%|█████▋    | 1353/2400 [43:36<1:10:59,  4.07s/it]

GPU memory after batch 1352: 14.97 GB
Batch 1353 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  56%|█████▋    | 1354/2400 [43:41<1:12:28,  4.16s/it]

GPU memory after batch 1353: 14.97 GB
Batch 1354 input shapes: input_ids=torch.Size([4, 441]), attention_mask=torch.Size([4, 441])


Predicting:  56%|█████▋    | 1355/2400 [43:44<1:07:52,  3.90s/it]

GPU memory after batch 1354: 14.97 GB
Batch 1355 input shapes: input_ids=torch.Size([4, 343]), attention_mask=torch.Size([4, 343])


Predicting:  56%|█████▋    | 1356/2400 [43:47<1:03:27,  3.65s/it]

GPU memory after batch 1355: 14.97 GB
Batch 1356 input shapes: input_ids=torch.Size([4, 383]), attention_mask=torch.Size([4, 383])


Predicting:  57%|█████▋    | 1357/2400 [43:51<1:01:58,  3.57s/it]

GPU memory after batch 1356: 14.97 GB
Batch 1357 input shapes: input_ids=torch.Size([4, 356]), attention_mask=torch.Size([4, 356])


Predicting:  57%|█████▋    | 1358/2400 [43:54<1:00:11,  3.47s/it]

GPU memory after batch 1357: 14.97 GB
Batch 1358 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  57%|█████▋    | 1359/2400 [43:58<1:04:49,  3.74s/it]

GPU memory after batch 1358: 14.97 GB
Batch 1359 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  57%|█████▋    | 1360/2400 [44:03<1:07:46,  3.91s/it]

GPU memory after batch 1359: 14.97 GB
Batch 1360 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  57%|█████▋    | 1361/2400 [44:07<1:09:49,  4.03s/it]

批次 1360: 原始预测: ['Tucker kicked a 45', 'Then, the Ravens defense', 'Tucker kicked a 45', 'Tucker kicked a 45']
GPU memory after batch 1360: 14.97 GB
Batch 1361 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  57%|█████▋    | 1362/2400 [44:11<1:11:13,  4.12s/it]

GPU memory after batch 1361: 14.97 GB
Batch 1362 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  57%|█████▋    | 1363/2400 [44:15<1:12:13,  4.18s/it]

GPU memory after batch 1362: 14.97 GB
Batch 1363 input shapes: input_ids=torch.Size([4, 450]), attention_mask=torch.Size([4, 450])


Predicting:  57%|█████▋    | 1364/2400 [44:19<1:09:10,  4.01s/it]

GPU memory after batch 1363: 14.97 GB
Batch 1364 input shapes: input_ids=torch.Size([4, 202]), attention_mask=torch.Size([4, 202])


Predicting:  57%|█████▋    | 1365/2400 [44:21<58:25,  3.39s/it]  

GPU memory after batch 1364: 14.97 GB
Batch 1365 input shapes: input_ids=torch.Size([4, 460]), attention_mask=torch.Size([4, 460])


Predicting:  57%|█████▋    | 1366/2400 [44:25<59:33,  3.46s/it]

GPU memory after batch 1365: 14.97 GB
Batch 1366 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  57%|█████▋    | 1367/2400 [44:29<1:04:11,  3.73s/it]

GPU memory after batch 1366: 14.97 GB
Batch 1367 input shapes: input_ids=torch.Size([4, 327]), attention_mask=torch.Size([4, 327])


Predicting:  57%|█████▋    | 1368/2400 [44:32<1:00:16,  3.50s/it]

GPU memory after batch 1367: 14.97 GB
Batch 1368 input shapes: input_ids=torch.Size([4, 407]), attention_mask=torch.Size([4, 407])


Predicting:  57%|█████▋    | 1369/2400 [44:36<1:00:44,  3.53s/it]

GPU memory after batch 1368: 14.97 GB
Batch 1369 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  57%|█████▋    | 1370/2400 [44:40<1:04:58,  3.78s/it]

GPU memory after batch 1369: 14.97 GB
Batch 1370 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  57%|█████▋    | 1371/2400 [44:44<1:07:38,  3.94s/it]

批次 1370: 原始预测: ['Eric Kendricks and J', 'Eric Kendricks and J', "'s pass in the end", "'s pass in the end"]
GPU memory after batch 1370: 14.97 GB
Batch 1371 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  57%|█████▋    | 1372/2400 [44:49<1:09:44,  4.07s/it]

GPU memory after batch 1371: 14.97 GB
Batch 1372 input shapes: input_ids=torch.Size([4, 378]), attention_mask=torch.Size([4, 378])


Predicting:  57%|█████▋    | 1373/2400 [44:52<1:06:05,  3.86s/it]

GPU memory after batch 1372: 14.97 GB
Batch 1373 input shapes: input_ids=torch.Size([4, 381]), attention_mask=torch.Size([4, 381])


Predicting:  57%|█████▋    | 1374/2400 [44:55<1:03:31,  3.72s/it]

GPU memory after batch 1373: 14.97 GB
Batch 1374 input shapes: input_ids=torch.Size([4, 434]), attention_mask=torch.Size([4, 434])


Predicting:  57%|█████▋    | 1375/2400 [44:59<1:01:16,  3.59s/it]

GPU memory after batch 1374: 14.97 GB
Batch 1375 input shapes: input_ids=torch.Size([4, 437]), attention_mask=torch.Size([4, 437])


Predicting:  57%|█████▋    | 1376/2400 [45:02<59:41,  3.50s/it]  

GPU memory after batch 1375: 14.97 GB
Batch 1376 input shapes: input_ids=torch.Size([4, 454]), attention_mask=torch.Size([4, 454])


Predicting:  57%|█████▋    | 1377/2400 [45:06<1:00:16,  3.53s/it]

GPU memory after batch 1376: 14.97 GB
Batch 1377 input shapes: input_ids=torch.Size([4, 295]), attention_mask=torch.Size([4, 295])


Predicting:  57%|█████▋    | 1378/2400 [45:08<56:06,  3.29s/it]  

GPU memory after batch 1377: 14.97 GB
Batch 1378 input shapes: input_ids=torch.Size([4, 375]), attention_mask=torch.Size([4, 375])


Predicting:  57%|█████▋    | 1379/2400 [45:12<56:15,  3.31s/it]

GPU memory after batch 1378: 14.97 GB
Batch 1379 input shapes: input_ids=torch.Size([4, 345]), attention_mask=torch.Size([4, 345])


Predicting:  57%|█████▊    | 1380/2400 [45:15<55:07,  3.24s/it]

GPU memory after batch 1379: 14.97 GB
Batch 1380 input shapes: input_ids=torch.Size([4, 264]), attention_mask=torch.Size([4, 264])


Predicting:  58%|█████▊    | 1381/2400 [45:17<52:36,  3.10s/it]

批次 1380: 原始预测: ['Yes', 'Yes', 'No', 'No\nYes']
GPU memory after batch 1380: 14.97 GB
Batch 1381 input shapes: input_ids=torch.Size([4, 419]), attention_mask=torch.Size([4, 419])


Predicting:  58%|█████▊    | 1382/2400 [45:21<55:53,  3.29s/it]

GPU memory after batch 1381: 14.97 GB
Batch 1382 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  58%|█████▊    | 1383/2400 [45:26<1:01:17,  3.62s/it]

GPU memory after batch 1382: 14.97 GB
Batch 1383 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  58%|█████▊    | 1384/2400 [45:30<1:04:46,  3.83s/it]

GPU memory after batch 1383: 14.97 GB
Batch 1384 input shapes: input_ids=torch.Size([4, 413]), attention_mask=torch.Size([4, 413])


Predicting:  58%|█████▊    | 1385/2400 [45:33<1:01:12,  3.62s/it]

GPU memory after batch 1384: 14.97 GB
Batch 1385 input shapes: input_ids=torch.Size([4, 369]), attention_mask=torch.Size([4, 369])


Predicting:  58%|█████▊    | 1386/2400 [45:36<59:41,  3.53s/it]  

GPU memory after batch 1385: 14.97 GB
Batch 1386 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  58%|█████▊    | 1387/2400 [45:41<1:03:51,  3.78s/it]

GPU memory after batch 1386: 14.97 GB
Batch 1387 input shapes: input_ids=torch.Size([4, 388]), attention_mask=torch.Size([4, 388])


Predicting:  58%|█████▊    | 1388/2400 [45:44<1:00:27,  3.58s/it]

GPU memory after batch 1387: 14.97 GB
Batch 1388 input shapes: input_ids=torch.Size([4, 451]), attention_mask=torch.Size([4, 451])


Predicting:  58%|█████▊    | 1389/2400 [45:47<1:00:26,  3.59s/it]

GPU memory after batch 1388: 14.97 GB
Batch 1389 input shapes: input_ids=torch.Size([4, 453]), attention_mask=torch.Size([4, 453])


Predicting:  58%|█████▊    | 1390/2400 [45:51<1:00:31,  3.60s/it]

GPU memory after batch 1389: 14.97 GB
Batch 1390 input shapes: input_ids=torch.Size([4, 461]), attention_mask=torch.Size([4, 461])


Predicting:  58%|█████▊    | 1391/2400 [45:55<1:00:41,  3.61s/it]

批次 1390: 原始预测: ['Yes', 'Yes', 'Yes', 'No']
GPU memory after batch 1390: 14.97 GB
Batch 1391 input shapes: input_ids=torch.Size([4, 260]), attention_mask=torch.Size([4, 260])


Predicting:  58%|█████▊    | 1392/2400 [45:57<54:47,  3.26s/it]  

GPU memory after batch 1391: 14.97 GB
Batch 1392 input shapes: input_ids=torch.Size([4, 314]), attention_mask=torch.Size([4, 314])


Predicting:  58%|█████▊    | 1393/2400 [46:00<54:56,  3.27s/it]

GPU memory after batch 1392: 14.97 GB
Batch 1393 input shapes: input_ids=torch.Size([4, 297]), attention_mask=torch.Size([4, 297])


Predicting:  58%|█████▊    | 1394/2400 [46:03<52:21,  3.12s/it]

GPU memory after batch 1393: 14.97 GB
Batch 1394 input shapes: input_ids=torch.Size([4, 463]), attention_mask=torch.Size([4, 463])


Predicting:  58%|█████▊    | 1395/2400 [46:07<54:50,  3.27s/it]

GPU memory after batch 1394: 14.97 GB
Batch 1395 input shapes: input_ids=torch.Size([4, 508]), attention_mask=torch.Size([4, 508])


Predicting:  58%|█████▊    | 1396/2400 [46:11<57:48,  3.45s/it]

GPU memory after batch 1395: 14.97 GB
Batch 1396 input shapes: input_ids=torch.Size([4, 471]), attention_mask=torch.Size([4, 471])


Predicting:  58%|█████▊    | 1397/2400 [46:14<58:34,  3.50s/it]

GPU memory after batch 1396: 14.97 GB
Batch 1397 input shapes: input_ids=torch.Size([4, 335]), attention_mask=torch.Size([4, 335])


Predicting:  58%|█████▊    | 1398/2400 [46:17<56:02,  3.36s/it]

GPU memory after batch 1397: 14.97 GB
Batch 1398 input shapes: input_ids=torch.Size([4, 511]), attention_mask=torch.Size([4, 511])


Predicting:  58%|█████▊    | 1399/2400 [46:21<58:36,  3.51s/it]

GPU memory after batch 1398: 14.97 GB
Batch 1399 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  58%|█████▊    | 1400/2400 [46:26<1:02:48,  3.77s/it]

GPU memory after batch 1399: 14.97 GB
Batch 1400 input shapes: input_ids=torch.Size([4, 311]), attention_mask=torch.Size([4, 311])


Predicting:  58%|█████▊    | 1401/2400 [46:28<57:56,  3.48s/it]  

批次 1400: 原始预测: ['Yes', 'Yes', 'Yes', 'Yes']
GPU memory after batch 1400: 14.97 GB
Batch 1401 input shapes: input_ids=torch.Size([4, 408]), attention_mask=torch.Size([4, 408])


Predicting:  58%|█████▊    | 1402/2400 [46:32<56:08,  3.38s/it]

GPU memory after batch 1401: 14.97 GB
Batch 1402 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  58%|█████▊    | 1403/2400 [46:36<1:01:01,  3.67s/it]

GPU memory after batch 1402: 14.97 GB
Batch 1403 input shapes: input_ids=torch.Size([4, 391]), attention_mask=torch.Size([4, 391])


Predicting:  58%|█████▊    | 1404/2400 [46:39<58:14,  3.51s/it]  

GPU memory after batch 1403: 14.97 GB
Batch 1404 input shapes: input_ids=torch.Size([4, 269]), attention_mask=torch.Size([4, 269])


Predicting:  59%|█████▊    | 1405/2400 [46:42<53:08,  3.20s/it]

GPU memory after batch 1404: 14.97 GB
Batch 1405 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  59%|█████▊    | 1406/2400 [46:46<58:51,  3.55s/it]

GPU memory after batch 1405: 14.97 GB
Batch 1406 input shapes: input_ids=torch.Size([4, 301]), attention_mask=torch.Size([4, 301])


Predicting:  59%|█████▊    | 1407/2400 [46:49<57:13,  3.46s/it]

GPU memory after batch 1406: 14.97 GB
Batch 1407 input shapes: input_ids=torch.Size([4, 276]), attention_mask=torch.Size([4, 276])


Predicting:  59%|█████▊    | 1408/2400 [46:52<52:29,  3.18s/it]

GPU memory after batch 1407: 14.97 GB
Batch 1408 input shapes: input_ids=torch.Size([4, 354]), attention_mask=torch.Size([4, 354])


Predicting:  59%|█████▊    | 1409/2400 [46:55<52:51,  3.20s/it]

GPU memory after batch 1408: 14.97 GB
Batch 1409 input shapes: input_ids=torch.Size([4, 389]), attention_mask=torch.Size([4, 389])


Predicting:  59%|█████▉    | 1410/2400 [46:59<54:45,  3.32s/it]

GPU memory after batch 1409: 14.97 GB
Batch 1410 input shapes: input_ids=torch.Size([4, 495]), attention_mask=torch.Size([4, 495])


Predicting:  59%|█████▉    | 1411/2400 [47:02<57:23,  3.48s/it]

批次 1410: 原始预测: ['Yes', 'Yes', 'Yes', 'Yes']
GPU memory after batch 1410: 14.97 GB
Batch 1411 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  59%|█████▉    | 1412/2400 [47:07<1:01:27,  3.73s/it]

GPU memory after batch 1411: 14.97 GB
Batch 1412 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  59%|█████▉    | 1413/2400 [47:11<1:04:16,  3.91s/it]

GPU memory after batch 1412: 14.97 GB
Batch 1413 input shapes: input_ids=torch.Size([4, 436]), attention_mask=torch.Size([4, 436])


Predicting:  59%|█████▉    | 1414/2400 [47:14<1:01:09,  3.72s/it]

GPU memory after batch 1413: 14.97 GB
Batch 1414 input shapes: input_ids=torch.Size([4, 301]), attention_mask=torch.Size([4, 301])


Predicting:  59%|█████▉    | 1415/2400 [47:17<56:24,  3.44s/it]  

GPU memory after batch 1414: 14.97 GB
Batch 1415 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  59%|█████▉    | 1416/2400 [47:21<1:00:55,  3.72s/it]

GPU memory after batch 1415: 14.97 GB
Batch 1416 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  59%|█████▉    | 1417/2400 [47:26<1:04:04,  3.91s/it]

GPU memory after batch 1416: 14.97 GB
Batch 1417 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  59%|█████▉    | 1418/2400 [47:30<1:06:00,  4.03s/it]

GPU memory after batch 1417: 14.97 GB
Batch 1418 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  59%|█████▉    | 1419/2400 [47:34<1:07:34,  4.13s/it]

GPU memory after batch 1418: 14.97 GB
Batch 1419 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  59%|█████▉    | 1420/2400 [47:39<1:08:39,  4.20s/it]

GPU memory after batch 1419: 14.97 GB
Batch 1420 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  59%|█████▉    | 1421/2400 [47:43<1:09:23,  4.25s/it]

批次 1420: 原始预测: ['Yes', 'Yes', '40 left in the half', '. The Cowboys took the']
GPU memory after batch 1420: 14.97 GB
Batch 1421 input shapes: input_ids=torch.Size([4, 270]), attention_mask=torch.Size([4, 270])


Predicting:  59%|█████▉    | 1422/2400 [47:46<1:02:58,  3.86s/it]

GPU memory after batch 1421: 14.97 GB
Batch 1422 input shapes: input_ids=torch.Size([4, 327]), attention_mask=torch.Size([4, 327])


Predicting:  59%|█████▉    | 1423/2400 [47:49<58:36,  3.60s/it]  

GPU memory after batch 1422: 14.97 GB
Batch 1423 input shapes: input_ids=torch.Size([4, 362]), attention_mask=torch.Size([4, 362])


Predicting:  59%|█████▉    | 1424/2400 [47:52<57:07,  3.51s/it]

GPU memory after batch 1423: 14.97 GB
Batch 1424 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  59%|█████▉    | 1425/2400 [47:57<1:01:14,  3.77s/it]

GPU memory after batch 1424: 14.97 GB
Batch 1425 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  59%|█████▉    | 1426/2400 [48:01<1:04:04,  3.95s/it]

GPU memory after batch 1425: 14.97 GB
Batch 1426 input shapes: input_ids=torch.Size([4, 338]), attention_mask=torch.Size([4, 338])


Predicting:  59%|█████▉    | 1427/2400 [48:04<59:42,  3.68s/it]  

GPU memory after batch 1426: 14.97 GB
Batch 1427 input shapes: input_ids=torch.Size([4, 274]), attention_mask=torch.Size([4, 274])


Predicting:  60%|█████▉    | 1428/2400 [48:07<53:58,  3.33s/it]

GPU memory after batch 1427: 14.97 GB
Batch 1428 input shapes: input_ids=torch.Size([4, 381]), attention_mask=torch.Size([4, 381])


Predicting:  60%|█████▉    | 1429/2400 [48:10<54:07,  3.34s/it]

GPU memory after batch 1428: 14.97 GB
Batch 1429 input shapes: input_ids=torch.Size([4, 330]), attention_mask=torch.Size([4, 330])


Predicting:  60%|█████▉    | 1430/2400 [48:13<52:27,  3.24s/it]

GPU memory after batch 1429: 14.97 GB
Batch 1430 input shapes: input_ids=torch.Size([4, 332]), attention_mask=torch.Size([4, 332])


Predicting:  60%|█████▉    | 1431/2400 [48:16<51:16,  3.17s/it]

批次 1430: 原始预测: ['No', 'No', 'Yes', 'No']
GPU memory after batch 1430: 14.97 GB
Batch 1431 input shapes: input_ids=torch.Size([4, 200]), attention_mask=torch.Size([4, 200])


Predicting:  60%|█████▉    | 1432/2400 [48:18<45:03,  2.79s/it]

GPU memory after batch 1431: 14.97 GB
Batch 1432 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  60%|█████▉    | 1433/2400 [48:22<52:37,  3.27s/it]

GPU memory after batch 1432: 14.97 GB
Batch 1433 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  60%|█████▉    | 1434/2400 [48:27<57:52,  3.60s/it]

GPU memory after batch 1433: 14.97 GB
Batch 1434 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  60%|█████▉    | 1435/2400 [48:31<1:01:17,  3.81s/it]

GPU memory after batch 1434: 14.97 GB
Batch 1435 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  60%|█████▉    | 1436/2400 [48:35<1:03:39,  3.96s/it]

GPU memory after batch 1435: 14.97 GB
Batch 1436 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  60%|█████▉    | 1437/2400 [48:40<1:05:18,  4.07s/it]

GPU memory after batch 1436: 14.97 GB
Batch 1437 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  60%|█████▉    | 1438/2400 [48:44<1:06:25,  4.14s/it]

GPU memory after batch 1437: 14.97 GB
Batch 1438 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  60%|█████▉    | 1439/2400 [48:48<1:07:10,  4.19s/it]

GPU memory after batch 1438: 14.97 GB
Batch 1439 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  60%|██████    | 1440/2400 [48:53<1:07:42,  4.23s/it]

GPU memory after batch 1439: 14.97 GB
Batch 1440 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  60%|██████    | 1441/2400 [48:57<1:08:02,  4.26s/it]

批次 1440: 原始预测: ['. After a 9', '-yard line to start the', 'less. In the fourth', 'less. In the fourth']
GPU memory after batch 1440: 14.97 GB
Batch 1441 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  60%|██████    | 1442/2400 [49:01<1:08:29,  4.29s/it]

GPU memory after batch 1441: 14.97 GB
Batch 1442 input shapes: input_ids=torch.Size([4, 259]), attention_mask=torch.Size([4, 259])


Predicting:  60%|██████    | 1443/2400 [49:04<1:01:49,  3.88s/it]

GPU memory after batch 1442: 14.97 GB
Batch 1443 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  60%|██████    | 1444/2400 [49:09<1:03:51,  4.01s/it]

GPU memory after batch 1443: 14.97 GB
Batch 1444 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  60%|██████    | 1445/2400 [49:13<1:05:30,  4.12s/it]

GPU memory after batch 1444: 14.97 GB
Batch 1445 input shapes: input_ids=torch.Size([4, 481]), attention_mask=torch.Size([4, 481])


Predicting:  60%|██████    | 1446/2400 [49:17<1:04:11,  4.04s/it]

GPU memory after batch 1445: 14.97 GB
Batch 1446 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  60%|██████    | 1447/2400 [49:21<1:05:27,  4.12s/it]

GPU memory after batch 1446: 14.97 GB
Batch 1447 input shapes: input_ids=torch.Size([4, 361]), attention_mask=torch.Size([4, 361])


Predicting:  60%|██████    | 1448/2400 [49:24<1:01:31,  3.88s/it]

GPU memory after batch 1447: 14.97 GB
Batch 1448 input shapes: input_ids=torch.Size([4, 424]), attention_mask=torch.Size([4, 424])


Predicting:  60%|██████    | 1449/2400 [49:28<58:36,  3.70s/it]  

GPU memory after batch 1448: 14.97 GB
Batch 1449 input shapes: input_ids=torch.Size([4, 296]), attention_mask=torch.Size([4, 296])


Predicting:  60%|██████    | 1450/2400 [49:30<53:57,  3.41s/it]

GPU memory after batch 1449: 14.97 GB
Batch 1450 input shapes: input_ids=torch.Size([4, 398]), attention_mask=torch.Size([4, 398])


Predicting:  60%|██████    | 1451/2400 [49:34<52:33,  3.32s/it]

批次 1450: 原始预测: ['Yes', 'No', 'Yes', 'Yes']
GPU memory after batch 1450: 14.97 GB
Batch 1451 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  60%|██████    | 1452/2400 [49:38<57:26,  3.64s/it]

GPU memory after batch 1451: 14.97 GB
Batch 1452 input shapes: input_ids=torch.Size([4, 496]), attention_mask=torch.Size([4, 496])


Predicting:  61%|██████    | 1453/2400 [49:42<58:27,  3.70s/it]

GPU memory after batch 1452: 14.97 GB
Batch 1453 input shapes: input_ids=torch.Size([4, 403]), attention_mask=torch.Size([4, 403])


Predicting:  61%|██████    | 1454/2400 [49:45<55:40,  3.53s/it]

GPU memory after batch 1453: 14.97 GB
Batch 1454 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  61%|██████    | 1455/2400 [49:49<59:19,  3.77s/it]

GPU memory after batch 1454: 14.97 GB
Batch 1455 input shapes: input_ids=torch.Size([4, 210]), attention_mask=torch.Size([4, 210])


Predicting:  61%|██████    | 1456/2400 [49:52<52:57,  3.37s/it]

GPU memory after batch 1455: 14.97 GB
Batch 1456 input shapes: input_ids=torch.Size([4, 495]), attention_mask=torch.Size([4, 495])


Predicting:  61%|██████    | 1457/2400 [49:56<57:30,  3.66s/it]

GPU memory after batch 1456: 14.97 GB
Batch 1457 input shapes: input_ids=torch.Size([4, 323]), attention_mask=torch.Size([4, 323])


Predicting:  61%|██████    | 1458/2400 [49:59<54:15,  3.46s/it]

GPU memory after batch 1457: 14.97 GB
Batch 1458 input shapes: input_ids=torch.Size([4, 335]), attention_mask=torch.Size([4, 335])


Predicting:  61%|██████    | 1459/2400 [50:03<54:18,  3.46s/it]

GPU memory after batch 1458: 14.97 GB
Batch 1459 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  61%|██████    | 1460/2400 [50:07<58:29,  3.73s/it]

GPU memory after batch 1459: 14.97 GB
Batch 1460 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  61%|██████    | 1461/2400 [50:11<1:01:24,  3.92s/it]

批次 1460: 原始预测: ['er against the Giants.', 'er against the Giants.', 'No', 'No']
GPU memory after batch 1460: 14.97 GB
Batch 1461 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  61%|██████    | 1462/2400 [50:16<1:03:25,  4.06s/it]

GPU memory after batch 1461: 14.97 GB
Batch 1462 input shapes: input_ids=torch.Size([4, 429]), attention_mask=torch.Size([4, 429])


Predicting:  61%|██████    | 1463/2400 [50:19<59:44,  3.83s/it]  

GPU memory after batch 1462: 14.97 GB
Batch 1463 input shapes: input_ids=torch.Size([4, 322]), attention_mask=torch.Size([4, 322])


Predicting:  61%|██████    | 1464/2400 [50:22<55:43,  3.57s/it]

GPU memory after batch 1463: 14.97 GB
Batch 1464 input shapes: input_ids=torch.Size([4, 403]), attention_mask=torch.Size([4, 403])


Predicting:  61%|██████    | 1465/2400 [50:25<55:48,  3.58s/it]

GPU memory after batch 1464: 14.97 GB
Batch 1465 input shapes: input_ids=torch.Size([4, 185]), attention_mask=torch.Size([4, 185])


Predicting:  61%|██████    | 1466/2400 [50:27<47:14,  3.03s/it]

GPU memory after batch 1465: 14.97 GB
Batch 1466 input shapes: input_ids=torch.Size([4, 355]), attention_mask=torch.Size([4, 355])


Predicting:  61%|██████    | 1467/2400 [50:30<48:14,  3.10s/it]

GPU memory after batch 1466: 14.97 GB
Batch 1467 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  61%|██████    | 1468/2400 [50:35<53:50,  3.47s/it]

GPU memory after batch 1467: 14.97 GB
Batch 1468 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  61%|██████    | 1469/2400 [50:39<57:43,  3.72s/it]

GPU memory after batch 1468: 14.97 GB
Batch 1469 input shapes: input_ids=torch.Size([4, 445]), attention_mask=torch.Size([4, 445])


Predicting:  61%|██████▏   | 1470/2400 [50:42<55:40,  3.59s/it]

GPU memory after batch 1469: 14.97 GB
Batch 1470 input shapes: input_ids=torch.Size([4, 296]), attention_mask=torch.Size([4, 296])


Predicting:  61%|██████▏   | 1471/2400 [50:45<51:36,  3.33s/it]

批次 1470: 原始预测: ['Yes', 'Yes', 'No', 'No']
GPU memory after batch 1470: 14.97 GB
Batch 1471 input shapes: input_ids=torch.Size([4, 280]), attention_mask=torch.Size([4, 280])


Predicting:  61%|██████▏   | 1472/2400 [50:48<49:54,  3.23s/it]

GPU memory after batch 1471: 14.97 GB
Batch 1472 input shapes: input_ids=torch.Size([4, 245]), attention_mask=torch.Size([4, 245])


Predicting:  61%|██████▏   | 1473/2400 [50:50<45:19,  2.93s/it]

GPU memory after batch 1472: 14.97 GB
Batch 1473 input shapes: input_ids=torch.Size([4, 266]), attention_mask=torch.Size([4, 266])


Predicting:  61%|██████▏   | 1474/2400 [50:53<45:22,  2.94s/it]

GPU memory after batch 1473: 14.97 GB
Batch 1474 input shapes: input_ids=torch.Size([4, 276]), attention_mask=torch.Size([4, 276])


Predicting:  61%|██████▏   | 1475/2400 [50:56<43:22,  2.81s/it]

GPU memory after batch 1474: 14.97 GB
Batch 1475 input shapes: input_ids=torch.Size([4, 267]), attention_mask=torch.Size([4, 267])


Predicting:  62%|██████▏   | 1476/2400 [50:59<43:58,  2.86s/it]

GPU memory after batch 1475: 14.97 GB
Batch 1476 input shapes: input_ids=torch.Size([4, 287]), attention_mask=torch.Size([4, 287])


Predicting:  62%|██████▏   | 1477/2400 [51:02<44:42,  2.91s/it]

GPU memory after batch 1476: 14.97 GB
Batch 1477 input shapes: input_ids=torch.Size([4, 251]), attention_mask=torch.Size([4, 251])


Predicting:  62%|██████▏   | 1478/2400 [51:05<43:56,  2.86s/it]

GPU memory after batch 1477: 14.97 GB
Batch 1478 input shapes: input_ids=torch.Size([4, 288]), attention_mask=torch.Size([4, 288])


Predicting:  62%|██████▏   | 1479/2400 [51:07<42:32,  2.77s/it]

GPU memory after batch 1478: 14.97 GB
Batch 1479 input shapes: input_ids=torch.Size([4, 267]), attention_mask=torch.Size([4, 267])


Predicting:  62%|██████▏   | 1480/2400 [51:10<41:13,  2.69s/it]

GPU memory after batch 1479: 14.97 GB
Batch 1480 input shapes: input_ids=torch.Size([4, 343]), attention_mask=torch.Size([4, 343])


Predicting:  62%|██████▏   | 1481/2400 [51:13<45:02,  2.94s/it]

批次 1480: 原始预测: ['Yes\nYes, because', 'Yes\nYes, because', 'Yes', 'Yes']
GPU memory after batch 1480: 14.97 GB
Batch 1481 input shapes: input_ids=torch.Size([4, 385]), attention_mask=torch.Size([4, 385])


Predicting:  62%|██████▏   | 1482/2400 [51:16<45:49,  2.99s/it]

GPU memory after batch 1481: 14.97 GB
Batch 1482 input shapes: input_ids=torch.Size([4, 257]), attention_mask=torch.Size([4, 257])


Predicting:  62%|██████▏   | 1483/2400 [51:19<43:16,  2.83s/it]

GPU memory after batch 1482: 14.97 GB
Batch 1483 input shapes: input_ids=torch.Size([4, 294]), attention_mask=torch.Size([4, 294])


Predicting:  62%|██████▏   | 1484/2400 [51:21<42:45,  2.80s/it]

GPU memory after batch 1483: 14.97 GB
Batch 1484 input shapes: input_ids=torch.Size([4, 318]), attention_mask=torch.Size([4, 318])


Predicting:  62%|██████▏   | 1485/2400 [51:25<45:01,  2.95s/it]

GPU memory after batch 1484: 14.97 GB
Batch 1485 input shapes: input_ids=torch.Size([4, 390]), attention_mask=torch.Size([4, 390])


Predicting:  62%|██████▏   | 1486/2400 [51:28<47:54,  3.15s/it]

GPU memory after batch 1485: 14.97 GB
Batch 1486 input shapes: input_ids=torch.Size([4, 346]), attention_mask=torch.Size([4, 346])


Predicting:  62%|██████▏   | 1487/2400 [51:32<49:45,  3.27s/it]

GPU memory after batch 1486: 14.97 GB
Batch 1487 input shapes: input_ids=torch.Size([4, 318]), attention_mask=torch.Size([4, 318])


Predicting:  62%|██████▏   | 1488/2400 [51:35<47:44,  3.14s/it]

GPU memory after batch 1487: 14.97 GB
Batch 1488 input shapes: input_ids=torch.Size([4, 454]), attention_mask=torch.Size([4, 454])


Predicting:  62%|██████▏   | 1489/2400 [51:39<52:00,  3.43s/it]

GPU memory after batch 1488: 14.97 GB
Batch 1489 input shapes: input_ids=torch.Size([4, 276]), attention_mask=torch.Size([4, 276])


Predicting:  62%|██████▏   | 1490/2400 [51:42<49:55,  3.29s/it]

GPU memory after batch 1489: 14.97 GB
Batch 1490 input shapes: input_ids=torch.Size([4, 326]), attention_mask=torch.Size([4, 326])


Predicting:  62%|██████▏   | 1491/2400 [51:45<48:27,  3.20s/it]

批次 1490: 原始预测: ['Yes', 'Yes', 'Yes', 'Yes']
GPU memory after batch 1490: 14.97 GB
Batch 1491 input shapes: input_ids=torch.Size([4, 274]), attention_mask=torch.Size([4, 274])


Predicting:  62%|██████▏   | 1492/2400 [51:47<45:13,  2.99s/it]

GPU memory after batch 1491: 14.97 GB
Batch 1492 input shapes: input_ids=torch.Size([4, 295]), attention_mask=torch.Size([4, 295])


Predicting:  62%|██████▏   | 1493/2400 [51:50<45:59,  3.04s/it]

GPU memory after batch 1492: 14.97 GB
Batch 1493 input shapes: input_ids=torch.Size([4, 239]), attention_mask=torch.Size([4, 239])


Predicting:  62%|██████▏   | 1494/2400 [51:53<44:14,  2.93s/it]

GPU memory after batch 1493: 14.97 GB
Batch 1494 input shapes: input_ids=torch.Size([4, 240]), attention_mask=torch.Size([4, 240])


Predicting:  62%|██████▏   | 1495/2400 [51:55<40:58,  2.72s/it]

GPU memory after batch 1494: 14.97 GB
Batch 1495 input shapes: input_ids=torch.Size([4, 295]), attention_mask=torch.Size([4, 295])


Predicting:  62%|██████▏   | 1496/2400 [51:58<41:40,  2.77s/it]

GPU memory after batch 1495: 14.97 GB
Batch 1496 input shapes: input_ids=torch.Size([4, 269]), attention_mask=torch.Size([4, 269])


Predicting:  62%|██████▏   | 1497/2400 [52:01<40:24,  2.69s/it]

GPU memory after batch 1496: 14.97 GB
Batch 1497 input shapes: input_ids=torch.Size([4, 259]), attention_mask=torch.Size([4, 259])


Predicting:  62%|██████▏   | 1498/2400 [52:03<40:00,  2.66s/it]

GPU memory after batch 1497: 14.97 GB
Batch 1498 input shapes: input_ids=torch.Size([4, 241]), attention_mask=torch.Size([4, 241])


Predicting:  62%|██████▏   | 1499/2400 [52:06<38:06,  2.54s/it]

GPU memory after batch 1498: 14.97 GB
Batch 1499 input shapes: input_ids=torch.Size([4, 282]), attention_mask=torch.Size([4, 282])


Predicting:  62%|██████▎   | 1500/2400 [52:08<38:10,  2.54s/it]

GPU memory after batch 1499: 14.97 GB
Batch 1500 input shapes: input_ids=torch.Size([4, 277]), attention_mask=torch.Size([4, 277])


Predicting:  63%|██████▎   | 1501/2400 [52:11<38:00,  2.54s/it]

批次 1500: 原始预测: ['Yes', 'Yes', 'Yes', 'Yes']
GPU memory after batch 1500: 14.97 GB
Batch 1501 input shapes: input_ids=torch.Size([4, 275]), attention_mask=torch.Size([4, 275])


Predicting:  63%|██████▎   | 1502/2400 [52:14<39:57,  2.67s/it]

GPU memory after batch 1501: 14.97 GB
Batch 1502 input shapes: input_ids=torch.Size([4, 331]), attention_mask=torch.Size([4, 331])


Predicting:  63%|██████▎   | 1503/2400 [52:17<41:26,  2.77s/it]

GPU memory after batch 1502: 14.97 GB
Batch 1503 input shapes: input_ids=torch.Size([4, 300]), attention_mask=torch.Size([4, 300])


Predicting:  63%|██████▎   | 1504/2400 [52:19<41:23,  2.77s/it]

GPU memory after batch 1503: 14.97 GB
Batch 1504 input shapes: input_ids=torch.Size([4, 263]), attention_mask=torch.Size([4, 263])


Predicting:  63%|██████▎   | 1505/2400 [52:22<41:58,  2.81s/it]

GPU memory after batch 1504: 14.97 GB
Batch 1505 input shapes: input_ids=torch.Size([4, 249]), attention_mask=torch.Size([4, 249])


Predicting:  63%|██████▎   | 1506/2400 [52:25<41:39,  2.80s/it]

GPU memory after batch 1505: 14.97 GB
Batch 1506 input shapes: input_ids=torch.Size([4, 267]), attention_mask=torch.Size([4, 267])


Predicting:  63%|██████▎   | 1507/2400 [52:28<40:15,  2.70s/it]

GPU memory after batch 1506: 14.97 GB
Batch 1507 input shapes: input_ids=torch.Size([4, 247]), attention_mask=torch.Size([4, 247])


Predicting:  63%|██████▎   | 1508/2400 [52:30<40:11,  2.70s/it]

GPU memory after batch 1507: 14.97 GB
Batch 1508 input shapes: input_ids=torch.Size([4, 254]), attention_mask=torch.Size([4, 254])


Predicting:  63%|██████▎   | 1509/2400 [52:33<40:23,  2.72s/it]

GPU memory after batch 1508: 14.97 GB
Batch 1509 input shapes: input_ids=torch.Size([4, 257]), attention_mask=torch.Size([4, 257])


Predicting:  63%|██████▎   | 1510/2400 [52:36<41:11,  2.78s/it]

GPU memory after batch 1509: 14.97 GB
Batch 1510 input shapes: input_ids=torch.Size([4, 248]), attention_mask=torch.Size([4, 248])


Predicting:  63%|██████▎   | 1511/2400 [52:38<38:48,  2.62s/it]

批次 1510: 原始预测: ['Yes', 'Yes', 'Yes', 'Yes']
GPU memory after batch 1510: 14.97 GB
Batch 1511 input shapes: input_ids=torch.Size([4, 309]), attention_mask=torch.Size([4, 309])


Predicting:  63%|██████▎   | 1512/2400 [52:41<41:38,  2.81s/it]

GPU memory after batch 1511: 14.97 GB
Batch 1512 input shapes: input_ids=torch.Size([4, 386]), attention_mask=torch.Size([4, 386])


Predicting:  63%|██████▎   | 1513/2400 [52:45<45:03,  3.05s/it]

GPU memory after batch 1512: 14.97 GB
Batch 1513 input shapes: input_ids=torch.Size([4, 323]), attention_mask=torch.Size([4, 323])


Predicting:  63%|██████▎   | 1514/2400 [52:49<46:46,  3.17s/it]

GPU memory after batch 1513: 14.97 GB
Batch 1514 input shapes: input_ids=torch.Size([4, 369]), attention_mask=torch.Size([4, 369])


Predicting:  63%|██████▎   | 1515/2400 [52:52<49:31,  3.36s/it]

GPU memory after batch 1514: 14.97 GB
Batch 1515 input shapes: input_ids=torch.Size([4, 339]), attention_mask=torch.Size([4, 339])


Predicting:  63%|██████▎   | 1516/2400 [52:56<50:12,  3.41s/it]

GPU memory after batch 1515: 14.97 GB
Batch 1516 input shapes: input_ids=torch.Size([4, 333]), attention_mask=torch.Size([4, 333])


Predicting:  63%|██████▎   | 1517/2400 [52:59<50:27,  3.43s/it]

GPU memory after batch 1516: 14.97 GB
Batch 1517 input shapes: input_ids=torch.Size([4, 263]), attention_mask=torch.Size([4, 263])


Predicting:  63%|██████▎   | 1518/2400 [53:02<48:07,  3.27s/it]

GPU memory after batch 1517: 14.97 GB
Batch 1518 input shapes: input_ids=torch.Size([4, 239]), attention_mask=torch.Size([4, 239])


Predicting:  63%|██████▎   | 1519/2400 [53:05<45:20,  3.09s/it]

GPU memory after batch 1518: 14.97 GB
Batch 1519 input shapes: input_ids=torch.Size([4, 252]), attention_mask=torch.Size([4, 252])


Predicting:  63%|██████▎   | 1520/2400 [53:08<43:50,  2.99s/it]

GPU memory after batch 1519: 14.97 GB
Batch 1520 input shapes: input_ids=torch.Size([4, 291]), attention_mask=torch.Size([4, 291])


Predicting:  63%|██████▎   | 1521/2400 [53:10<42:38,  2.91s/it]

批次 1520: 原始预测: ['No', 'Yes', 'No', 'Yes']
GPU memory after batch 1520: 14.97 GB
Batch 1521 input shapes: input_ids=torch.Size([4, 297]), attention_mask=torch.Size([4, 297])


Predicting:  63%|██████▎   | 1522/2400 [53:13<41:59,  2.87s/it]

GPU memory after batch 1521: 14.97 GB
Batch 1522 input shapes: input_ids=torch.Size([4, 309]), attention_mask=torch.Size([4, 309])


Predicting:  63%|██████▎   | 1523/2400 [53:16<41:39,  2.85s/it]

GPU memory after batch 1522: 14.97 GB
Batch 1523 input shapes: input_ids=torch.Size([4, 262]), attention_mask=torch.Size([4, 262])


Predicting:  64%|██████▎   | 1524/2400 [53:18<39:52,  2.73s/it]

GPU memory after batch 1523: 14.97 GB
Batch 1524 input shapes: input_ids=torch.Size([4, 358]), attention_mask=torch.Size([4, 358])


Predicting:  64%|██████▎   | 1525/2400 [53:22<44:11,  3.03s/it]

GPU memory after batch 1524: 14.97 GB
Batch 1525 input shapes: input_ids=torch.Size([4, 353]), attention_mask=torch.Size([4, 353])


Predicting:  64%|██████▎   | 1526/2400 [53:26<47:03,  3.23s/it]

GPU memory after batch 1525: 14.97 GB
Batch 1526 input shapes: input_ids=torch.Size([4, 251]), attention_mask=torch.Size([4, 251])


Predicting:  64%|██████▎   | 1527/2400 [53:29<44:55,  3.09s/it]

GPU memory after batch 1526: 14.97 GB
Batch 1527 input shapes: input_ids=torch.Size([4, 277]), attention_mask=torch.Size([4, 277])


Predicting:  64%|██████▎   | 1528/2400 [53:31<42:23,  2.92s/it]

GPU memory after batch 1527: 14.97 GB
Batch 1528 input shapes: input_ids=torch.Size([4, 366]), attention_mask=torch.Size([4, 366])


Predicting:  64%|██████▎   | 1529/2400 [53:34<44:03,  3.04s/it]

GPU memory after batch 1528: 14.97 GB
Batch 1529 input shapes: input_ids=torch.Size([4, 276]), attention_mask=torch.Size([4, 276])


Predicting:  64%|██████▍   | 1530/2400 [53:37<43:40,  3.01s/it]

GPU memory after batch 1529: 14.97 GB
Batch 1530 input shapes: input_ids=torch.Size([4, 250]), attention_mask=torch.Size([4, 250])


Predicting:  64%|██████▍   | 1531/2400 [53:40<42:29,  2.93s/it]

批次 1530: 原始预测: ['Yes\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because']
GPU memory after batch 1530: 14.97 GB
Batch 1531 input shapes: input_ids=torch.Size([4, 232]), attention_mask=torch.Size([4, 232])


Predicting:  64%|██████▍   | 1532/2400 [53:42<39:12,  2.71s/it]

GPU memory after batch 1531: 14.97 GB
Batch 1532 input shapes: input_ids=torch.Size([4, 410]), attention_mask=torch.Size([4, 410])


Predicting:  64%|██████▍   | 1533/2400 [53:46<43:03,  2.98s/it]

GPU memory after batch 1532: 14.97 GB
Batch 1533 input shapes: input_ids=torch.Size([4, 295]), attention_mask=torch.Size([4, 295])


Predicting:  64%|██████▍   | 1534/2400 [53:49<41:55,  2.91s/it]

GPU memory after batch 1533: 14.97 GB
Batch 1534 input shapes: input_ids=torch.Size([4, 295]), attention_mask=torch.Size([4, 295])


Predicting:  64%|██████▍   | 1535/2400 [53:51<41:07,  2.85s/it]

GPU memory after batch 1534: 14.97 GB
Batch 1535 input shapes: input_ids=torch.Size([4, 272]), attention_mask=torch.Size([4, 272])


Predicting:  64%|██████▍   | 1536/2400 [53:54<39:32,  2.75s/it]

GPU memory after batch 1535: 14.97 GB
Batch 1536 input shapes: input_ids=torch.Size([4, 485]), attention_mask=torch.Size([4, 485])


Predicting:  64%|██████▍   | 1537/2400 [53:58<46:22,  3.22s/it]

GPU memory after batch 1536: 14.97 GB
Batch 1537 input shapes: input_ids=torch.Size([4, 259]), attention_mask=torch.Size([4, 259])


Predicting:  64%|██████▍   | 1538/2400 [54:01<42:59,  2.99s/it]

GPU memory after batch 1537: 14.97 GB
Batch 1538 input shapes: input_ids=torch.Size([4, 313]), attention_mask=torch.Size([4, 313])


Predicting:  64%|██████▍   | 1539/2400 [54:04<44:16,  3.09s/it]

GPU memory after batch 1538: 14.97 GB
Batch 1539 input shapes: input_ids=torch.Size([4, 236]), attention_mask=torch.Size([4, 236])


Predicting:  64%|██████▍   | 1540/2400 [54:06<40:28,  2.82s/it]

GPU memory after batch 1539: 14.97 GB
Batch 1540 input shapes: input_ids=torch.Size([4, 301]), attention_mask=torch.Size([4, 301])


Predicting:  64%|██████▍   | 1541/2400 [54:09<42:11,  2.95s/it]

批次 1540: 原始预测: ['Yes', 'Yes', 'Yes\nIs "tw', 'Yes\nIs "112']
GPU memory after batch 1540: 14.97 GB
Batch 1541 input shapes: input_ids=torch.Size([4, 400]), attention_mask=torch.Size([4, 400])


Predicting:  64%|██████▍   | 1542/2400 [54:13<44:56,  3.14s/it]

GPU memory after batch 1541: 14.97 GB
Batch 1542 input shapes: input_ids=torch.Size([4, 225]), attention_mask=torch.Size([4, 225])


Predicting:  64%|██████▍   | 1543/2400 [54:15<40:47,  2.86s/it]

GPU memory after batch 1542: 14.97 GB
Batch 1543 input shapes: input_ids=torch.Size([4, 269]), attention_mask=torch.Size([4, 269])


Predicting:  64%|██████▍   | 1544/2400 [54:18<39:12,  2.75s/it]

GPU memory after batch 1543: 14.97 GB
Batch 1544 input shapes: input_ids=torch.Size([4, 286]), attention_mask=torch.Size([4, 286])


Predicting:  64%|██████▍   | 1545/2400 [54:20<39:01,  2.74s/it]

GPU memory after batch 1544: 14.97 GB
Batch 1545 input shapes: input_ids=torch.Size([4, 284]), attention_mask=torch.Size([4, 284])


Predicting:  64%|██████▍   | 1546/2400 [54:23<40:11,  2.82s/it]

GPU memory after batch 1545: 14.97 GB
Batch 1546 input shapes: input_ids=torch.Size([4, 237]), attention_mask=torch.Size([4, 237])


Predicting:  64%|██████▍   | 1547/2400 [54:26<39:28,  2.78s/it]

GPU memory after batch 1546: 14.97 GB
Batch 1547 input shapes: input_ids=torch.Size([4, 249]), attention_mask=torch.Size([4, 249])


Predicting:  64%|██████▍   | 1548/2400 [54:28<37:24,  2.63s/it]

GPU memory after batch 1547: 14.97 GB
Batch 1548 input shapes: input_ids=torch.Size([4, 368]), attention_mask=torch.Size([4, 368])


Predicting:  65%|██████▍   | 1549/2400 [54:32<40:14,  2.84s/it]

GPU memory after batch 1548: 14.97 GB
Batch 1549 input shapes: input_ids=torch.Size([4, 382]), attention_mask=torch.Size([4, 382])


Predicting:  65%|██████▍   | 1550/2400 [54:36<44:28,  3.14s/it]

GPU memory after batch 1549: 14.97 GB
Batch 1550 input shapes: input_ids=torch.Size([4, 238]), attention_mask=torch.Size([4, 238])


Predicting:  65%|██████▍   | 1551/2400 [54:38<41:46,  2.95s/it]

批次 1550: 原始预测: ['No\nYes', 'No\nYes', 'Yes', 'Yes']
GPU memory after batch 1550: 14.97 GB
Batch 1551 input shapes: input_ids=torch.Size([4, 444]), attention_mask=torch.Size([4, 444])


Predicting:  65%|██████▍   | 1552/2400 [54:42<45:11,  3.20s/it]

GPU memory after batch 1551: 14.97 GB
Batch 1552 input shapes: input_ids=torch.Size([4, 279]), attention_mask=torch.Size([4, 279])


Predicting:  65%|██████▍   | 1553/2400 [54:44<42:15,  2.99s/it]

GPU memory after batch 1552: 14.97 GB
Batch 1553 input shapes: input_ids=torch.Size([4, 320]), attention_mask=torch.Size([4, 320])


Predicting:  65%|██████▍   | 1554/2400 [54:47<41:33,  2.95s/it]

GPU memory after batch 1553: 14.97 GB
Batch 1554 input shapes: input_ids=torch.Size([4, 349]), attention_mask=torch.Size([4, 349])


Predicting:  65%|██████▍   | 1555/2400 [54:51<44:06,  3.13s/it]

GPU memory after batch 1554: 14.97 GB
Batch 1555 input shapes: input_ids=torch.Size([4, 220]), attention_mask=torch.Size([4, 220])


Predicting:  65%|██████▍   | 1556/2400 [54:53<41:16,  2.93s/it]

GPU memory after batch 1555: 14.97 GB
Batch 1556 input shapes: input_ids=torch.Size([4, 218]), attention_mask=torch.Size([4, 218])


Predicting:  65%|██████▍   | 1557/2400 [54:55<37:18,  2.66s/it]

GPU memory after batch 1556: 14.97 GB
Batch 1557 input shapes: input_ids=torch.Size([4, 168]), attention_mask=torch.Size([4, 168])


Predicting:  65%|██████▍   | 1558/2400 [54:57<32:59,  2.35s/it]

GPU memory after batch 1557: 14.97 GB
Batch 1558 input shapes: input_ids=torch.Size([4, 271]), attention_mask=torch.Size([4, 271])


Predicting:  65%|██████▍   | 1559/2400 [55:00<35:24,  2.53s/it]

GPU memory after batch 1558: 14.97 GB
Batch 1559 input shapes: input_ids=torch.Size([4, 223]), attention_mask=torch.Size([4, 223])


Predicting:  65%|██████▌   | 1560/2400 [55:02<33:14,  2.37s/it]

GPU memory after batch 1559: 14.97 GB
Batch 1560 input shapes: input_ids=torch.Size([4, 399]), attention_mask=torch.Size([4, 399])


Predicting:  65%|██████▌   | 1561/2400 [55:05<38:20,  2.74s/it]

批次 1560: 原始预测: ['Yes\nQuestion: Is', 'Yes\nQuestion: Is', 'No\nIs "30', 'No\nIs "201']
GPU memory after batch 1560: 14.97 GB
Batch 1561 input shapes: input_ids=torch.Size([4, 215]), attention_mask=torch.Size([4, 215])


Predicting:  65%|██████▌   | 1562/2400 [55:08<36:59,  2.65s/it]

GPU memory after batch 1561: 14.97 GB
Batch 1562 input shapes: input_ids=torch.Size([4, 377]), attention_mask=torch.Size([4, 377])


Predicting:  65%|██████▌   | 1563/2400 [55:12<41:56,  3.01s/it]

GPU memory after batch 1562: 14.97 GB
Batch 1563 input shapes: input_ids=torch.Size([4, 197]), attention_mask=torch.Size([4, 197])


Predicting:  65%|██████▌   | 1564/2400 [55:14<39:11,  2.81s/it]

GPU memory after batch 1563: 14.97 GB
Batch 1564 input shapes: input_ids=torch.Size([4, 231]), attention_mask=torch.Size([4, 231])


Predicting:  65%|██████▌   | 1565/2400 [55:16<36:31,  2.62s/it]

GPU memory after batch 1564: 14.97 GB
Batch 1565 input shapes: input_ids=torch.Size([4, 234]), attention_mask=torch.Size([4, 234])


Predicting:  65%|██████▌   | 1566/2400 [55:19<36:39,  2.64s/it]

GPU memory after batch 1565: 14.97 GB
Batch 1566 input shapes: input_ids=torch.Size([4, 205]), attention_mask=torch.Size([4, 205])


Predicting:  65%|██████▌   | 1567/2400 [55:21<33:43,  2.43s/it]

GPU memory after batch 1566: 14.97 GB
Batch 1567 input shapes: input_ids=torch.Size([4, 231]), attention_mask=torch.Size([4, 231])


Predicting:  65%|██████▌   | 1568/2400 [55:24<34:32,  2.49s/it]

GPU memory after batch 1567: 14.97 GB
Batch 1568 input shapes: input_ids=torch.Size([4, 207]), attention_mask=torch.Size([4, 207])


Predicting:  65%|██████▌   | 1569/2400 [55:25<32:14,  2.33s/it]

GPU memory after batch 1568: 14.97 GB
Batch 1569 input shapes: input_ids=torch.Size([4, 272]), attention_mask=torch.Size([4, 272])


Predicting:  65%|██████▌   | 1570/2400 [55:28<32:53,  2.38s/it]

GPU memory after batch 1569: 14.97 GB
Batch 1570 input shapes: input_ids=torch.Size([4, 233]), attention_mask=torch.Size([4, 233])


Predicting:  65%|██████▌   | 1571/2400 [55:31<33:59,  2.46s/it]

批次 1570: 原始预测: ['Yes', 'Yes', 'Yes\nYes, because', 'Yes']
GPU memory after batch 1570: 14.97 GB
Batch 1571 input shapes: input_ids=torch.Size([4, 279]), attention_mask=torch.Size([4, 279])


Predicting:  66%|██████▌   | 1572/2400 [55:34<36:05,  2.62s/it]

GPU memory after batch 1571: 14.97 GB
Batch 1572 input shapes: input_ids=torch.Size([4, 284]), attention_mask=torch.Size([4, 284])


Predicting:  66%|██████▌   | 1573/2400 [55:37<37:44,  2.74s/it]

GPU memory after batch 1572: 14.97 GB
Batch 1573 input shapes: input_ids=torch.Size([4, 284]), attention_mask=torch.Size([4, 284])


Predicting:  66%|██████▌   | 1574/2400 [55:40<38:52,  2.82s/it]

GPU memory after batch 1573: 14.97 GB
Batch 1574 input shapes: input_ids=torch.Size([4, 179]), attention_mask=torch.Size([4, 179])


Predicting:  66%|██████▌   | 1575/2400 [55:41<34:17,  2.49s/it]

GPU memory after batch 1574: 14.97 GB
Batch 1575 input shapes: input_ids=torch.Size([4, 186]), attention_mask=torch.Size([4, 186])


Predicting:  66%|██████▌   | 1576/2400 [55:44<33:04,  2.41s/it]

GPU memory after batch 1575: 14.97 GB
Batch 1576 input shapes: input_ids=torch.Size([4, 313]), attention_mask=torch.Size([4, 313])


Predicting:  66%|██████▌   | 1577/2400 [55:46<34:47,  2.54s/it]

GPU memory after batch 1576: 14.97 GB
Batch 1577 input shapes: input_ids=torch.Size([4, 317]), attention_mask=torch.Size([4, 317])


Predicting:  66%|██████▌   | 1578/2400 [55:49<36:00,  2.63s/it]

GPU memory after batch 1577: 14.97 GB
Batch 1578 input shapes: input_ids=torch.Size([4, 317]), attention_mask=torch.Size([4, 317])


Predicting:  66%|██████▌   | 1579/2400 [55:52<36:50,  2.69s/it]

GPU memory after batch 1578: 14.97 GB
Batch 1579 input shapes: input_ids=torch.Size([4, 307]), attention_mask=torch.Size([4, 307])


Predicting:  66%|██████▌   | 1580/2400 [55:55<37:15,  2.73s/it]

GPU memory after batch 1579: 14.97 GB
Batch 1580 input shapes: input_ids=torch.Size([4, 273]), attention_mask=torch.Size([4, 273])


Predicting:  66%|██████▌   | 1581/2400 [55:58<38:13,  2.80s/it]

批次 1580: 原始预测: ['Yes\nIs "35', 'Yes\nIs "350', 'Yes\nYes, because', 'Yes\nYes, because']
GPU memory after batch 1580: 14.97 GB
Batch 1581 input shapes: input_ids=torch.Size([4, 413]), attention_mask=torch.Size([4, 413])


Predicting:  66%|██████▌   | 1582/2400 [56:01<41:29,  3.04s/it]

GPU memory after batch 1581: 14.97 GB
Batch 1582 input shapes: input_ids=torch.Size([4, 409]), attention_mask=torch.Size([4, 409])


Predicting:  66%|██████▌   | 1583/2400 [56:05<43:34,  3.20s/it]

GPU memory after batch 1582: 14.97 GB
Batch 1583 input shapes: input_ids=torch.Size([4, 413]), attention_mask=torch.Size([4, 413])


Predicting:  66%|██████▌   | 1584/2400 [56:09<45:11,  3.32s/it]

GPU memory after batch 1583: 14.97 GB
Batch 1584 input shapes: input_ids=torch.Size([4, 433]), attention_mask=torch.Size([4, 433])


Predicting:  66%|██████▌   | 1585/2400 [56:12<46:55,  3.45s/it]

GPU memory after batch 1584: 14.97 GB
Batch 1585 input shapes: input_ids=torch.Size([4, 322]), attention_mask=torch.Size([4, 322])


Predicting:  66%|██████▌   | 1586/2400 [56:16<46:49,  3.45s/it]

GPU memory after batch 1585: 14.97 GB
Batch 1586 input shapes: input_ids=torch.Size([4, 388]), attention_mask=torch.Size([4, 388])


Predicting:  66%|██████▌   | 1587/2400 [56:19<47:21,  3.49s/it]

GPU memory after batch 1586: 14.97 GB
Batch 1587 input shapes: input_ids=torch.Size([4, 391]), attention_mask=torch.Size([4, 391])


Predicting:  66%|██████▌   | 1588/2400 [56:23<45:47,  3.38s/it]

GPU memory after batch 1587: 14.97 GB
Batch 1588 input shapes: input_ids=torch.Size([4, 228]), attention_mask=torch.Size([4, 228])


Predicting:  66%|██████▌   | 1589/2400 [56:25<42:42,  3.16s/it]

GPU memory after batch 1588: 14.97 GB
Batch 1589 input shapes: input_ids=torch.Size([4, 216]), attention_mask=torch.Size([4, 216])


Predicting:  66%|██████▋   | 1590/2400 [56:28<39:43,  2.94s/it]

GPU memory after batch 1589: 14.97 GB
Batch 1590 input shapes: input_ids=torch.Size([4, 220]), attention_mask=torch.Size([4, 220])


Predicting:  66%|██████▋   | 1591/2400 [56:30<35:56,  2.67s/it]

批次 1590: 原始预测: ['Yes', 'Yes', 'Yes', 'No']
GPU memory after batch 1590: 14.97 GB
Batch 1591 input shapes: input_ids=torch.Size([4, 263]), attention_mask=torch.Size([4, 263])


Predicting:  66%|██████▋   | 1592/2400 [56:32<35:01,  2.60s/it]

GPU memory after batch 1591: 14.97 GB
Batch 1592 input shapes: input_ids=torch.Size([4, 257]), attention_mask=torch.Size([4, 257])


Predicting:  66%|██████▋   | 1593/2400 [56:35<36:13,  2.69s/it]

GPU memory after batch 1592: 14.97 GB
Batch 1593 input shapes: input_ids=torch.Size([4, 228]), attention_mask=torch.Size([4, 228])


Predicting:  66%|██████▋   | 1594/2400 [56:38<35:57,  2.68s/it]

GPU memory after batch 1593: 14.97 GB
Batch 1594 input shapes: input_ids=torch.Size([4, 322]), attention_mask=torch.Size([4, 322])


Predicting:  66%|██████▋   | 1595/2400 [56:41<38:59,  2.91s/it]

GPU memory after batch 1594: 14.97 GB
Batch 1595 input shapes: input_ids=torch.Size([4, 234]), attention_mask=torch.Size([4, 234])


Predicting:  66%|██████▋   | 1596/2400 [56:44<37:58,  2.83s/it]

GPU memory after batch 1595: 14.97 GB
Batch 1596 input shapes: input_ids=torch.Size([4, 251]), attention_mask=torch.Size([4, 251])


Predicting:  67%|██████▋   | 1597/2400 [56:47<37:36,  2.81s/it]

GPU memory after batch 1596: 14.97 GB
Batch 1597 input shapes: input_ids=torch.Size([4, 397]), attention_mask=torch.Size([4, 397])


Predicting:  67%|██████▋   | 1598/2400 [56:50<40:43,  3.05s/it]

GPU memory after batch 1597: 14.97 GB
Batch 1598 input shapes: input_ids=torch.Size([4, 400]), attention_mask=torch.Size([4, 400])


Predicting:  67%|██████▋   | 1599/2400 [56:53<41:00,  3.07s/it]

GPU memory after batch 1598: 14.97 GB
Batch 1599 input shapes: input_ids=torch.Size([4, 398]), attention_mask=torch.Size([4, 398])


Predicting:  67%|██████▋   | 1600/2400 [56:57<43:03,  3.23s/it]

GPU memory after batch 1599: 14.97 GB
Batch 1600 input shapes: input_ids=torch.Size([4, 344]), attention_mask=torch.Size([4, 344])


Predicting:  67%|██████▋   | 1601/2400 [57:00<44:12,  3.32s/it]

批次 1600: 原始预测: ['Yes', 'Yes', 'Yes\nYes, because', 'Yes\nYes, because']
GPU memory after batch 1600: 14.97 GB
Batch 1601 input shapes: input_ids=torch.Size([4, 267]), attention_mask=torch.Size([4, 267])


Predicting:  67%|██████▋   | 1602/2400 [57:03<42:41,  3.21s/it]

GPU memory after batch 1601: 14.97 GB
Batch 1602 input shapes: input_ids=torch.Size([4, 206]), attention_mask=torch.Size([4, 206])


Predicting:  67%|██████▋   | 1603/2400 [57:05<37:35,  2.83s/it]

GPU memory after batch 1602: 14.97 GB
Batch 1603 input shapes: input_ids=torch.Size([4, 206]), attention_mask=torch.Size([4, 206])


Predicting:  67%|██████▋   | 1604/2400 [57:08<35:48,  2.70s/it]

GPU memory after batch 1603: 14.97 GB
Batch 1604 input shapes: input_ids=torch.Size([4, 292]), attention_mask=torch.Size([4, 292])


Predicting:  67%|██████▋   | 1605/2400 [57:11<37:42,  2.85s/it]

GPU memory after batch 1604: 14.97 GB
Batch 1605 input shapes: input_ids=torch.Size([4, 295]), attention_mask=torch.Size([4, 295])


Predicting:  67%|██████▋   | 1606/2400 [57:14<39:02,  2.95s/it]

GPU memory after batch 1605: 14.97 GB
Batch 1606 input shapes: input_ids=torch.Size([4, 295]), attention_mask=torch.Size([4, 295])


Predicting:  67%|██████▋   | 1607/2400 [57:17<39:57,  3.02s/it]

GPU memory after batch 1606: 14.97 GB
Batch 1607 input shapes: input_ids=torch.Size([4, 208]), attention_mask=torch.Size([4, 208])


Predicting:  67%|██████▋   | 1608/2400 [57:20<37:25,  2.83s/it]

GPU memory after batch 1607: 14.97 GB
Batch 1608 input shapes: input_ids=torch.Size([4, 319]), attention_mask=torch.Size([4, 319])


Predicting:  67%|██████▋   | 1609/2400 [57:23<39:14,  2.98s/it]

GPU memory after batch 1608: 14.97 GB
Batch 1609 input shapes: input_ids=torch.Size([4, 255]), attention_mask=torch.Size([4, 255])


Predicting:  67%|██████▋   | 1610/2400 [57:26<38:19,  2.91s/it]

GPU memory after batch 1609: 14.97 GB
Batch 1610 input shapes: input_ids=torch.Size([4, 255]), attention_mask=torch.Size([4, 255])


Predicting:  67%|██████▋   | 1611/2400 [57:28<37:40,  2.87s/it]

批次 1610: 原始预测: ['Yes\nIs "178', 'Yes\nIs "356', 'Yes\nIs "14', 'Yes\nIs "-14']
GPU memory after batch 1610: 14.97 GB
Batch 1611 input shapes: input_ids=torch.Size([4, 314]), attention_mask=torch.Size([4, 314])


Predicting:  67%|██████▋   | 1612/2400 [57:31<37:31,  2.86s/it]

GPU memory after batch 1611: 14.97 GB
Batch 1612 input shapes: input_ids=torch.Size([4, 236]), attention_mask=torch.Size([4, 236])


Predicting:  67%|██████▋   | 1613/2400 [57:34<34:57,  2.66s/it]

GPU memory after batch 1612: 14.97 GB
Batch 1613 input shapes: input_ids=torch.Size([4, 244]), attention_mask=torch.Size([4, 244])


Predicting:  67%|██████▋   | 1614/2400 [57:36<33:16,  2.54s/it]

GPU memory after batch 1613: 14.97 GB
Batch 1614 input shapes: input_ids=torch.Size([4, 242]), attention_mask=torch.Size([4, 242])


Predicting:  67%|██████▋   | 1615/2400 [57:38<33:48,  2.58s/it]

GPU memory after batch 1614: 14.97 GB
Batch 1615 input shapes: input_ids=torch.Size([4, 220]), attention_mask=torch.Size([4, 220])


Predicting:  67%|██████▋   | 1616/2400 [57:40<31:31,  2.41s/it]

GPU memory after batch 1615: 14.97 GB
Batch 1616 input shapes: input_ids=torch.Size([4, 235]), attention_mask=torch.Size([4, 235])


Predicting:  67%|██████▋   | 1617/2400 [57:43<32:28,  2.49s/it]

GPU memory after batch 1616: 14.97 GB
Batch 1617 input shapes: input_ids=torch.Size([4, 215]), attention_mask=torch.Size([4, 215])


Predicting:  67%|██████▋   | 1618/2400 [57:45<30:27,  2.34s/it]

GPU memory after batch 1617: 14.97 GB
Batch 1618 input shapes: input_ids=torch.Size([4, 215]), attention_mask=torch.Size([4, 215])


Predicting:  67%|██████▋   | 1619/2400 [57:47<29:02,  2.23s/it]

GPU memory after batch 1618: 14.97 GB
Batch 1619 input shapes: input_ids=torch.Size([4, 230]), attention_mask=torch.Size([4, 230])


Predicting:  68%|██████▊   | 1620/2400 [57:50<30:32,  2.35s/it]

GPU memory after batch 1619: 14.97 GB
Batch 1620 input shapes: input_ids=torch.Size([4, 232]), attention_mask=torch.Size([4, 232])


Predicting:  68%|██████▊   | 1621/2400 [57:52<31:37,  2.44s/it]

批次 1620: 原始预测: ['Yes', 'Yes', 'Yes', 'Yes\nYes, because']
GPU memory after batch 1620: 14.97 GB
Batch 1621 input shapes: input_ids=torch.Size([4, 303]), attention_mask=torch.Size([4, 303])


Predicting:  68%|██████▊   | 1622/2400 [57:56<34:41,  2.68s/it]

GPU memory after batch 1621: 14.97 GB
Batch 1622 input shapes: input_ids=torch.Size([4, 178]), attention_mask=torch.Size([4, 178])


Predicting:  68%|██████▊   | 1623/2400 [57:57<30:57,  2.39s/it]

GPU memory after batch 1622: 14.97 GB
Batch 1623 input shapes: input_ids=torch.Size([4, 228]), attention_mask=torch.Size([4, 228])


Predicting:  68%|██████▊   | 1624/2400 [58:00<31:51,  2.46s/it]

GPU memory after batch 1623: 14.97 GB
Batch 1624 input shapes: input_ids=torch.Size([4, 231]), attention_mask=torch.Size([4, 231])


Predicting:  68%|██████▊   | 1625/2400 [58:02<30:44,  2.38s/it]

GPU memory after batch 1624: 14.97 GB
Batch 1625 input shapes: input_ids=torch.Size([4, 292]), attention_mask=torch.Size([4, 292])


Predicting:  68%|██████▊   | 1626/2400 [58:05<33:50,  2.62s/it]

GPU memory after batch 1625: 14.97 GB
Batch 1626 input shapes: input_ids=torch.Size([4, 194]), attention_mask=torch.Size([4, 194])


Predicting:  68%|██████▊   | 1627/2400 [58:07<31:02,  2.41s/it]

GPU memory after batch 1626: 14.97 GB
Batch 1627 input shapes: input_ids=torch.Size([4, 177]), attention_mask=torch.Size([4, 177])


Predicting:  68%|██████▊   | 1628/2400 [58:09<30:04,  2.34s/it]

GPU memory after batch 1627: 14.97 GB
Batch 1628 input shapes: input_ids=torch.Size([4, 187]), attention_mask=torch.Size([4, 187])


Predicting:  68%|██████▊   | 1629/2400 [58:11<27:48,  2.16s/it]

GPU memory after batch 1628: 14.97 GB
Batch 1629 input shapes: input_ids=torch.Size([4, 325]), attention_mask=torch.Size([4, 325])


Predicting:  68%|██████▊   | 1630/2400 [58:15<32:42,  2.55s/it]

GPU memory after batch 1629: 14.97 GB
Batch 1630 input shapes: input_ids=torch.Size([4, 300]), attention_mask=torch.Size([4, 300])


Predicting:  68%|██████▊   | 1631/2400 [58:18<35:18,  2.75s/it]

批次 1630: 原始预测: ['Yes\nIs "145', 'Yes\nIs "0', 'Yes\nIs "1', 'Yes\nIs "33']
GPU memory after batch 1630: 14.97 GB
Batch 1631 input shapes: input_ids=torch.Size([4, 197]), attention_mask=torch.Size([4, 197])


Predicting:  68%|██████▊   | 1632/2400 [58:20<33:41,  2.63s/it]

GPU memory after batch 1631: 14.97 GB
Batch 1632 input shapes: input_ids=torch.Size([4, 313]), attention_mask=torch.Size([4, 313])


Predicting:  68%|██████▊   | 1633/2400 [58:24<36:13,  2.83s/it]

GPU memory after batch 1632: 14.97 GB
Batch 1633 input shapes: input_ids=torch.Size([4, 278]), attention_mask=torch.Size([4, 278])


Predicting:  68%|██████▊   | 1634/2400 [58:27<36:43,  2.88s/it]

GPU memory after batch 1633: 14.97 GB
Batch 1634 input shapes: input_ids=torch.Size([4, 205]), attention_mask=torch.Size([4, 205])


Predicting:  68%|██████▊   | 1635/2400 [58:29<34:47,  2.73s/it]

GPU memory after batch 1634: 14.97 GB
Batch 1635 input shapes: input_ids=torch.Size([4, 280]), attention_mask=torch.Size([4, 280])


Predicting:  68%|██████▊   | 1636/2400 [58:32<35:42,  2.80s/it]

GPU memory after batch 1635: 14.97 GB
Batch 1636 input shapes: input_ids=torch.Size([4, 317]), attention_mask=torch.Size([4, 317])


Predicting:  68%|██████▊   | 1637/2400 [58:35<35:47,  2.81s/it]

GPU memory after batch 1636: 14.97 GB
Batch 1637 input shapes: input_ids=torch.Size([4, 370]), attention_mask=torch.Size([4, 370])


Predicting:  68%|██████▊   | 1638/2400 [58:39<39:30,  3.11s/it]

GPU memory after batch 1637: 14.97 GB
Batch 1638 input shapes: input_ids=torch.Size([4, 278]), attention_mask=torch.Size([4, 278])


Predicting:  68%|██████▊   | 1639/2400 [58:41<37:12,  2.93s/it]

GPU memory after batch 1638: 14.97 GB
Batch 1639 input shapes: input_ids=torch.Size([4, 335]), attention_mask=torch.Size([4, 335])


Predicting:  68%|██████▊   | 1640/2400 [58:45<39:13,  3.10s/it]

GPU memory after batch 1639: 14.97 GB
Batch 1640 input shapes: input_ids=torch.Size([4, 324]), attention_mask=torch.Size([4, 324])


Predicting:  68%|██████▊   | 1641/2400 [58:48<40:29,  3.20s/it]

批次 1640: 原始预测: ['Yes', 'Yes', 'Yes\nYes, because', 'Yes\nYes, because']
GPU memory after batch 1640: 14.97 GB
Batch 1641 input shapes: input_ids=torch.Size([4, 275]), attention_mask=torch.Size([4, 275])


Predicting:  68%|██████▊   | 1642/2400 [58:51<39:35,  3.13s/it]

GPU memory after batch 1641: 14.97 GB
Batch 1642 input shapes: input_ids=torch.Size([4, 275]), attention_mask=torch.Size([4, 275])


Predicting:  68%|██████▊   | 1643/2400 [58:54<38:56,  3.09s/it]

GPU memory after batch 1642: 14.97 GB
Batch 1643 input shapes: input_ids=torch.Size([4, 275]), attention_mask=torch.Size([4, 275])


Predicting:  68%|██████▊   | 1644/2400 [58:57<38:28,  3.05s/it]

GPU memory after batch 1643: 14.97 GB
Batch 1644 input shapes: input_ids=torch.Size([4, 290]), attention_mask=torch.Size([4, 290])


Predicting:  69%|██████▊   | 1645/2400 [59:00<38:56,  3.09s/it]

GPU memory after batch 1644: 14.97 GB
Batch 1645 input shapes: input_ids=torch.Size([4, 226]), attention_mask=torch.Size([4, 226])


Predicting:  69%|██████▊   | 1646/2400 [59:03<37:09,  2.96s/it]

GPU memory after batch 1645: 14.97 GB
Batch 1646 input shapes: input_ids=torch.Size([4, 418]), attention_mask=torch.Size([4, 418])


Predicting:  69%|██████▊   | 1647/2400 [59:06<38:19,  3.05s/it]

GPU memory after batch 1646: 14.97 GB
Batch 1647 input shapes: input_ids=torch.Size([4, 418]), attention_mask=torch.Size([4, 418])


Predicting:  69%|██████▊   | 1648/2400 [59:10<40:53,  3.26s/it]

GPU memory after batch 1647: 14.97 GB
Batch 1648 input shapes: input_ids=torch.Size([4, 221]), attention_mask=torch.Size([4, 221])


Predicting:  69%|██████▊   | 1649/2400 [59:12<37:51,  3.02s/it]

GPU memory after batch 1648: 14.97 GB
Batch 1649 input shapes: input_ids=torch.Size([4, 236]), attention_mask=torch.Size([4, 236])


Predicting:  69%|██████▉   | 1650/2400 [59:15<36:24,  2.91s/it]

GPU memory after batch 1649: 14.97 GB
Batch 1650 input shapes: input_ids=torch.Size([4, 236]), attention_mask=torch.Size([4, 236])


Predicting:  69%|██████▉   | 1651/2400 [59:18<35:26,  2.84s/it]

批次 1650: 原始预测: ['Yes\nYes, because', 'Yes\nYes, because', 'Yes', 'Yes']
GPU memory after batch 1650: 14.97 GB
Batch 1651 input shapes: input_ids=torch.Size([4, 287]), attention_mask=torch.Size([4, 287])


Predicting:  69%|██████▉   | 1652/2400 [59:21<36:05,  2.89s/it]

GPU memory after batch 1651: 14.97 GB
Batch 1652 input shapes: input_ids=torch.Size([4, 234]), attention_mask=torch.Size([4, 234])


Predicting:  69%|██████▉   | 1653/2400 [59:23<33:29,  2.69s/it]

GPU memory after batch 1652: 14.97 GB
Batch 1653 input shapes: input_ids=torch.Size([4, 235]), attention_mask=torch.Size([4, 235])


Predicting:  69%|██████▉   | 1654/2400 [59:25<31:40,  2.55s/it]

GPU memory after batch 1653: 14.97 GB
Batch 1654 input shapes: input_ids=torch.Size([4, 322]), attention_mask=torch.Size([4, 322])


Predicting:  69%|██████▉   | 1655/2400 [59:28<34:58,  2.82s/it]

GPU memory after batch 1654: 14.97 GB
Batch 1655 input shapes: input_ids=torch.Size([4, 217]), attention_mask=torch.Size([4, 217])


Predicting:  69%|██████▉   | 1656/2400 [59:31<33:37,  2.71s/it]

GPU memory after batch 1655: 14.97 GB
Batch 1656 input shapes: input_ids=torch.Size([4, 288]), attention_mask=torch.Size([4, 288])


Predicting:  69%|██████▉   | 1657/2400 [59:34<34:44,  2.81s/it]

GPU memory after batch 1656: 14.97 GB
Batch 1657 input shapes: input_ids=torch.Size([4, 232]), attention_mask=torch.Size([4, 232])


Predicting:  69%|██████▉   | 1658/2400 [59:36<32:24,  2.62s/it]

GPU memory after batch 1657: 14.97 GB
Batch 1658 input shapes: input_ids=torch.Size([4, 177]), attention_mask=torch.Size([4, 177])


Predicting:  69%|██████▉   | 1659/2400 [59:38<29:02,  2.35s/it]

GPU memory after batch 1658: 14.97 GB
Batch 1659 input shapes: input_ids=torch.Size([4, 225]), attention_mask=torch.Size([4, 225])


Predicting:  69%|██████▉   | 1660/2400 [59:40<28:23,  2.30s/it]

GPU memory after batch 1659: 14.97 GB
Batch 1660 input shapes: input_ids=torch.Size([4, 374]), attention_mask=torch.Size([4, 374])


Predicting:  69%|██████▉   | 1661/2400 [59:44<33:53,  2.75s/it]

批次 1660: 原始预测: ['Yes', 'Yes', 'Yes', 'Yes\nYes, because']
GPU memory after batch 1660: 14.97 GB
Batch 1661 input shapes: input_ids=torch.Size([4, 370]), attention_mask=torch.Size([4, 370])


Predicting:  69%|██████▉   | 1662/2400 [59:48<37:43,  3.07s/it]

GPU memory after batch 1661: 14.97 GB
Batch 1662 input shapes: input_ids=torch.Size([4, 302]), attention_mask=torch.Size([4, 302])


Predicting:  69%|██████▉   | 1663/2400 [59:51<38:17,  3.12s/it]

GPU memory after batch 1662: 14.97 GB
Batch 1663 input shapes: input_ids=torch.Size([4, 210]), attention_mask=torch.Size([4, 210])


Predicting:  69%|██████▉   | 1664/2400 [59:53<34:01,  2.77s/it]

GPU memory after batch 1663: 14.97 GB
Batch 1664 input shapes: input_ids=torch.Size([4, 319]), attention_mask=torch.Size([4, 319])


Predicting:  69%|██████▉   | 1665/2400 [59:56<35:56,  2.93s/it]

GPU memory after batch 1664: 14.97 GB
Batch 1665 input shapes: input_ids=torch.Size([4, 247]), attention_mask=torch.Size([4, 247])


Predicting:  69%|██████▉   | 1666/2400 [59:58<33:23,  2.73s/it]

GPU memory after batch 1665: 14.97 GB
Batch 1666 input shapes: input_ids=torch.Size([4, 202]), attention_mask=torch.Size([4, 202])


Predicting:  69%|██████▉   | 1667/2400 [1:00:00<30:28,  2.49s/it]

GPU memory after batch 1666: 14.97 GB
Batch 1667 input shapes: input_ids=torch.Size([4, 251]), attention_mask=torch.Size([4, 251])


Predicting:  70%|██████▉   | 1668/2400 [1:00:03<29:42,  2.44s/it]

GPU memory after batch 1667: 14.97 GB
Batch 1668 input shapes: input_ids=torch.Size([4, 253]), attention_mask=torch.Size([4, 253])


Predicting:  70%|██████▉   | 1669/2400 [1:00:05<29:10,  2.40s/it]

GPU memory after batch 1668: 14.97 GB
Batch 1669 input shapes: input_ids=torch.Size([4, 215]), attention_mask=torch.Size([4, 215])


Predicting:  70%|██████▉   | 1670/2400 [1:00:07<29:16,  2.41s/it]

GPU memory after batch 1669: 14.97 GB
Batch 1670 input shapes: input_ids=torch.Size([4, 216]), attention_mask=torch.Size([4, 216])


Predicting:  70%|██████▉   | 1671/2400 [1:00:10<29:20,  2.41s/it]

批次 1670: 原始预测: ['Yes\nYes, because', 'Yes\nYes, because', 'Yes', 'Yes']
GPU memory after batch 1670: 14.97 GB
Batch 1671 input shapes: input_ids=torch.Size([4, 215]), attention_mask=torch.Size([4, 215])


Predicting:  70%|██████▉   | 1672/2400 [1:00:12<29:21,  2.42s/it]

GPU memory after batch 1671: 14.97 GB
Batch 1672 input shapes: input_ids=torch.Size([4, 379]), attention_mask=torch.Size([4, 379])


Predicting:  70%|██████▉   | 1673/2400 [1:00:16<34:22,  2.84s/it]

GPU memory after batch 1672: 14.97 GB
Batch 1673 input shapes: input_ids=torch.Size([4, 406]), attention_mask=torch.Size([4, 406])


Predicting:  70%|██████▉   | 1674/2400 [1:00:20<36:58,  3.06s/it]

GPU memory after batch 1673: 14.97 GB
Batch 1674 input shapes: input_ids=torch.Size([4, 409]), attention_mask=torch.Size([4, 409])


Predicting:  70%|██████▉   | 1675/2400 [1:00:23<38:55,  3.22s/it]

GPU memory after batch 1674: 14.97 GB
Batch 1675 input shapes: input_ids=torch.Size([4, 341]), attention_mask=torch.Size([4, 341])


Predicting:  70%|██████▉   | 1676/2400 [1:00:26<38:17,  3.17s/it]

GPU memory after batch 1675: 14.97 GB
Batch 1676 input shapes: input_ids=torch.Size([4, 240]), attention_mask=torch.Size([4, 240])


Predicting:  70%|██████▉   | 1677/2400 [1:00:28<34:47,  2.89s/it]

GPU memory after batch 1676: 14.97 GB
Batch 1677 input shapes: input_ids=torch.Size([4, 410]), attention_mask=torch.Size([4, 410])


Predicting:  70%|██████▉   | 1678/2400 [1:00:32<37:20,  3.10s/it]

GPU memory after batch 1677: 14.97 GB
Batch 1678 input shapes: input_ids=torch.Size([4, 337]), attention_mask=torch.Size([4, 337])


Predicting:  70%|██████▉   | 1679/2400 [1:00:36<38:49,  3.23s/it]

GPU memory after batch 1678: 14.97 GB
Batch 1679 input shapes: input_ids=torch.Size([4, 314]), attention_mask=torch.Size([4, 314])


Predicting:  70%|███████   | 1680/2400 [1:00:39<38:56,  3.25s/it]

GPU memory after batch 1679: 14.97 GB
Batch 1680 input shapes: input_ids=torch.Size([4, 491]), attention_mask=torch.Size([4, 491])


Predicting:  70%|███████   | 1681/2400 [1:00:43<42:49,  3.57s/it]

批次 1680: 原始预测: ['Yes\nQuestion: Is', 'Yes\nQuestion: Is', 'Yes', 'Yes']
GPU memory after batch 1680: 14.97 GB
Batch 1681 input shapes: input_ids=torch.Size([4, 190]), attention_mask=torch.Size([4, 190])


Predicting:  70%|███████   | 1682/2400 [1:00:45<36:15,  3.03s/it]

GPU memory after batch 1681: 14.97 GB
Batch 1682 input shapes: input_ids=torch.Size([4, 233]), attention_mask=torch.Size([4, 233])


Predicting:  70%|███████   | 1683/2400 [1:00:48<34:53,  2.92s/it]

GPU memory after batch 1682: 14.97 GB
Batch 1683 input shapes: input_ids=torch.Size([4, 223]), attention_mask=torch.Size([4, 223])


Predicting:  70%|███████   | 1684/2400 [1:00:50<33:14,  2.79s/it]

GPU memory after batch 1683: 14.97 GB
Batch 1684 input shapes: input_ids=torch.Size([4, 235]), attention_mask=torch.Size([4, 235])


Predicting:  70%|███████   | 1685/2400 [1:00:52<31:09,  2.61s/it]

GPU memory after batch 1684: 14.97 GB
Batch 1685 input shapes: input_ids=torch.Size([4, 253]), attention_mask=torch.Size([4, 253])


Predicting:  70%|███████   | 1686/2400 [1:00:55<29:59,  2.52s/it]

GPU memory after batch 1685: 14.97 GB
Batch 1686 input shapes: input_ids=torch.Size([4, 246]), attention_mask=torch.Size([4, 246])


Predicting:  70%|███████   | 1687/2400 [1:00:57<28:59,  2.44s/it]

GPU memory after batch 1686: 14.97 GB
Batch 1687 input shapes: input_ids=torch.Size([4, 199]), attention_mask=torch.Size([4, 199])


Predicting:  70%|███████   | 1688/2400 [1:00:59<27:04,  2.28s/it]

GPU memory after batch 1687: 14.97 GB
Batch 1688 input shapes: input_ids=torch.Size([4, 179]), attention_mask=torch.Size([4, 179])


Predicting:  70%|███████   | 1689/2400 [1:01:01<26:38,  2.25s/it]

GPU memory after batch 1688: 14.97 GB
Batch 1689 input shapes: input_ids=torch.Size([4, 282]), attention_mask=torch.Size([4, 282])


Predicting:  70%|███████   | 1690/2400 [1:01:04<27:43,  2.34s/it]

GPU memory after batch 1689: 14.97 GB
Batch 1690 input shapes: input_ids=torch.Size([4, 375]), attention_mask=torch.Size([4, 375])


Predicting:  70%|███████   | 1691/2400 [1:01:07<31:12,  2.64s/it]

批次 1690: 原始预测: ['Yes', 'Yes', 'Yes', 'Yes']
GPU memory after batch 1690: 14.97 GB
Batch 1691 input shapes: input_ids=torch.Size([4, 394]), attention_mask=torch.Size([4, 394])


Predicting:  70%|███████   | 1692/2400 [1:01:11<34:33,  2.93s/it]

GPU memory after batch 1691: 14.97 GB
Batch 1692 input shapes: input_ids=torch.Size([4, 325]), attention_mask=torch.Size([4, 325])


Predicting:  71%|███████   | 1693/2400 [1:01:14<36:20,  3.08s/it]

GPU memory after batch 1692: 14.97 GB
Batch 1693 input shapes: input_ids=torch.Size([4, 323]), attention_mask=torch.Size([4, 323])


Predicting:  71%|███████   | 1694/2400 [1:01:17<37:34,  3.19s/it]

GPU memory after batch 1693: 14.97 GB
Batch 1694 input shapes: input_ids=torch.Size([4, 267]), attention_mask=torch.Size([4, 267])


Predicting:  71%|███████   | 1695/2400 [1:01:20<35:03,  2.98s/it]

GPU memory after batch 1694: 14.97 GB
Batch 1695 input shapes: input_ids=torch.Size([4, 231]), attention_mask=torch.Size([4, 231])


Predicting:  71%|███████   | 1696/2400 [1:01:23<33:47,  2.88s/it]

GPU memory after batch 1695: 14.97 GB
Batch 1696 input shapes: input_ids=torch.Size([4, 211]), attention_mask=torch.Size([4, 211])


Predicting:  71%|███████   | 1697/2400 [1:01:25<30:35,  2.61s/it]

GPU memory after batch 1696: 14.97 GB
Batch 1697 input shapes: input_ids=torch.Size([4, 380]), attention_mask=torch.Size([4, 380])


Predicting:  71%|███████   | 1698/2400 [1:01:28<34:46,  2.97s/it]

GPU memory after batch 1697: 14.97 GB
Batch 1698 input shapes: input_ids=torch.Size([4, 191]), attention_mask=torch.Size([4, 191])


Predicting:  71%|███████   | 1699/2400 [1:01:30<31:31,  2.70s/it]

GPU memory after batch 1698: 14.97 GB
Batch 1699 input shapes: input_ids=torch.Size([4, 210]), attention_mask=torch.Size([4, 210])


Predicting:  71%|███████   | 1700/2400 [1:01:33<30:33,  2.62s/it]

GPU memory after batch 1699: 14.97 GB
Batch 1700 input shapes: input_ids=torch.Size([4, 247]), attention_mask=torch.Size([4, 247])


Predicting:  71%|███████   | 1701/2400 [1:01:36<30:48,  2.64s/it]

批次 1700: 原始预测: ['Yes', 'Yes', 'Yes\nIs "90', 'Yes\nIs "-910']
GPU memory after batch 1700: 14.97 GB
Batch 1701 input shapes: input_ids=torch.Size([4, 219]), attention_mask=torch.Size([4, 219])


Predicting:  71%|███████   | 1702/2400 [1:01:38<28:32,  2.45s/it]

GPU memory after batch 1701: 14.97 GB
Batch 1702 input shapes: input_ids=torch.Size([4, 235]), attention_mask=torch.Size([4, 235])


Predicting:  71%|███████   | 1703/2400 [1:01:40<29:14,  2.52s/it]

GPU memory after batch 1702: 14.97 GB
Batch 1703 input shapes: input_ids=torch.Size([4, 231]), attention_mask=torch.Size([4, 231])


Predicting:  71%|███████   | 1704/2400 [1:01:42<28:03,  2.42s/it]

GPU memory after batch 1703: 14.97 GB
Batch 1704 input shapes: input_ids=torch.Size([4, 313]), attention_mask=torch.Size([4, 313])


Predicting:  71%|███████   | 1705/2400 [1:01:46<31:04,  2.68s/it]

GPU memory after batch 1704: 14.97 GB
Batch 1705 input shapes: input_ids=torch.Size([4, 303]), attention_mask=torch.Size([4, 303])


Predicting:  71%|███████   | 1706/2400 [1:01:48<31:21,  2.71s/it]

GPU memory after batch 1705: 14.97 GB
Batch 1706 input shapes: input_ids=torch.Size([4, 193]), attention_mask=torch.Size([4, 193])


Predicting:  71%|███████   | 1707/2400 [1:01:50<28:31,  2.47s/it]

GPU memory after batch 1706: 14.97 GB
Batch 1707 input shapes: input_ids=torch.Size([4, 358]), attention_mask=torch.Size([4, 358])


Predicting:  71%|███████   | 1708/2400 [1:01:54<32:50,  2.85s/it]

GPU memory after batch 1707: 14.97 GB
Batch 1708 input shapes: input_ids=torch.Size([4, 249]), attention_mask=torch.Size([4, 249])


Predicting:  71%|███████   | 1709/2400 [1:01:57<31:57,  2.77s/it]

GPU memory after batch 1708: 14.97 GB
Batch 1709 input shapes: input_ids=torch.Size([4, 251]), attention_mask=torch.Size([4, 251])


Predicting:  71%|███████▏  | 1710/2400 [1:01:59<31:50,  2.77s/it]

GPU memory after batch 1709: 14.97 GB
Batch 1710 input shapes: input_ids=torch.Size([4, 227]), attention_mask=torch.Size([4, 227])


Predicting:  71%|███████▏  | 1711/2400 [1:02:02<31:20,  2.73s/it]

批次 1710: 原始预测: ['Yes\nIs "8', 'Yes\nYes, because', 'Yes', 'No']
GPU memory after batch 1710: 14.97 GB
Batch 1711 input shapes: input_ids=torch.Size([4, 326]), attention_mask=torch.Size([4, 326])


Predicting:  71%|███████▏  | 1712/2400 [1:02:06<33:46,  2.95s/it]

GPU memory after batch 1711: 14.97 GB
Batch 1712 input shapes: input_ids=torch.Size([4, 321]), attention_mask=torch.Size([4, 321])


Predicting:  71%|███████▏  | 1713/2400 [1:02:09<35:27,  3.10s/it]

GPU memory after batch 1712: 14.97 GB
Batch 1713 input shapes: input_ids=torch.Size([4, 318]), attention_mask=torch.Size([4, 318])


Predicting:  71%|███████▏  | 1714/2400 [1:02:12<36:06,  3.16s/it]

GPU memory after batch 1713: 14.97 GB
Batch 1714 input shapes: input_ids=torch.Size([4, 225]), attention_mask=torch.Size([4, 225])


Predicting:  71%|███████▏  | 1715/2400 [1:02:14<32:43,  2.87s/it]

GPU memory after batch 1714: 14.97 GB
Batch 1715 input shapes: input_ids=torch.Size([4, 231]), attention_mask=torch.Size([4, 231])


Predicting:  72%|███████▏  | 1716/2400 [1:02:17<31:53,  2.80s/it]

GPU memory after batch 1715: 14.97 GB
Batch 1716 input shapes: input_ids=torch.Size([4, 278]), attention_mask=torch.Size([4, 278])


Predicting:  72%|███████▏  | 1717/2400 [1:02:20<32:28,  2.85s/it]

GPU memory after batch 1716: 14.97 GB
Batch 1717 input shapes: input_ids=torch.Size([4, 205]), attention_mask=torch.Size([4, 205])


Predicting:  72%|███████▏  | 1718/2400 [1:02:22<29:17,  2.58s/it]

GPU memory after batch 1717: 14.97 GB
Batch 1718 input shapes: input_ids=torch.Size([4, 262]), attention_mask=torch.Size([4, 262])


Predicting:  72%|███████▏  | 1719/2400 [1:02:24<28:49,  2.54s/it]

GPU memory after batch 1718: 14.97 GB
Batch 1719 input shapes: input_ids=torch.Size([4, 207]), attention_mask=torch.Size([4, 207])


Predicting:  72%|███████▏  | 1720/2400 [1:02:26<26:46,  2.36s/it]

GPU memory after batch 1719: 14.97 GB
Batch 1720 input shapes: input_ids=torch.Size([4, 274]), attention_mask=torch.Size([4, 274])


Predicting:  72%|███████▏  | 1721/2400 [1:02:29<28:48,  2.55s/it]

批次 1720: 原始预测: ['Yes\nYes, because', 'Yes\nYes, because', 'Yes', 'Yes\nIs "50']
GPU memory after batch 1720: 14.97 GB
Batch 1721 input shapes: input_ids=torch.Size([4, 218]), attention_mask=torch.Size([4, 218])


Predicting:  72%|███████▏  | 1722/2400 [1:02:31<26:56,  2.38s/it]

GPU memory after batch 1721: 14.97 GB
Batch 1722 input shapes: input_ids=torch.Size([4, 293]), attention_mask=torch.Size([4, 293])


Predicting:  72%|███████▏  | 1723/2400 [1:02:35<29:38,  2.63s/it]

GPU memory after batch 1722: 14.97 GB
Batch 1723 input shapes: input_ids=torch.Size([4, 348]), attention_mask=torch.Size([4, 348])


Predicting:  72%|███████▏  | 1724/2400 [1:02:38<32:45,  2.91s/it]

GPU memory after batch 1723: 14.97 GB
Batch 1724 input shapes: input_ids=torch.Size([4, 332]), attention_mask=torch.Size([4, 332])


Predicting:  72%|███████▏  | 1725/2400 [1:02:42<34:37,  3.08s/it]

GPU memory after batch 1724: 14.97 GB
Batch 1725 input shapes: input_ids=torch.Size([4, 256]), attention_mask=torch.Size([4, 256])


Predicting:  72%|███████▏  | 1726/2400 [1:02:44<33:30,  2.98s/it]

GPU memory after batch 1725: 14.97 GB
Batch 1726 input shapes: input_ids=torch.Size([4, 183]), attention_mask=torch.Size([4, 183])


Predicting:  72%|███████▏  | 1727/2400 [1:02:46<29:13,  2.61s/it]

GPU memory after batch 1726: 14.97 GB
Batch 1727 input shapes: input_ids=torch.Size([4, 185]), attention_mask=torch.Size([4, 185])


Predicting:  72%|███████▏  | 1728/2400 [1:02:48<26:20,  2.35s/it]

GPU memory after batch 1727: 14.97 GB
Batch 1728 input shapes: input_ids=torch.Size([4, 383]), attention_mask=torch.Size([4, 383])


Predicting:  72%|███████▏  | 1729/2400 [1:02:52<31:19,  2.80s/it]

GPU memory after batch 1728: 14.97 GB
Batch 1729 input shapes: input_ids=torch.Size([4, 273]), attention_mask=torch.Size([4, 273])


Predicting:  72%|███████▏  | 1730/2400 [1:02:55<31:51,  2.85s/it]

GPU memory after batch 1729: 14.97 GB
Batch 1730 input shapes: input_ids=torch.Size([4, 348]), attention_mask=torch.Size([4, 348])


Predicting:  72%|███████▏  | 1731/2400 [1:02:58<34:11,  3.07s/it]

批次 1730: 原始预测: ['Yes\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because']
GPU memory after batch 1730: 14.97 GB
Batch 1731 input shapes: input_ids=torch.Size([4, 206]), attention_mask=torch.Size([4, 206])


Predicting:  72%|███████▏  | 1732/2400 [1:03:00<30:23,  2.73s/it]

GPU memory after batch 1731: 14.97 GB
Batch 1732 input shapes: input_ids=torch.Size([4, 294]), attention_mask=torch.Size([4, 294])


Predicting:  72%|███████▏  | 1733/2400 [1:03:03<31:52,  2.87s/it]

GPU memory after batch 1732: 14.97 GB
Batch 1733 input shapes: input_ids=torch.Size([4, 230]), attention_mask=torch.Size([4, 230])


Predicting:  72%|███████▏  | 1734/2400 [1:03:06<31:04,  2.80s/it]

GPU memory after batch 1733: 14.97 GB
Batch 1734 input shapes: input_ids=torch.Size([4, 439]), attention_mask=torch.Size([4, 439])


Predicting:  72%|███████▏  | 1735/2400 [1:03:09<32:39,  2.95s/it]

GPU memory after batch 1734: 14.97 GB
Batch 1735 input shapes: input_ids=torch.Size([4, 442]), attention_mask=torch.Size([4, 442])


Predicting:  72%|███████▏  | 1736/2400 [1:03:13<33:45,  3.05s/it]

GPU memory after batch 1735: 14.97 GB
Batch 1736 input shapes: input_ids=torch.Size([4, 440]), attention_mask=torch.Size([4, 440])


Predicting:  72%|███████▏  | 1737/2400 [1:03:16<34:29,  3.12s/it]

GPU memory after batch 1736: 14.97 GB
Batch 1737 input shapes: input_ids=torch.Size([4, 300]), attention_mask=torch.Size([4, 300])


Predicting:  72%|███████▏  | 1738/2400 [1:03:19<34:48,  3.16s/it]

GPU memory after batch 1737: 14.97 GB
Batch 1738 input shapes: input_ids=torch.Size([4, 252]), attention_mask=torch.Size([4, 252])


Predicting:  72%|███████▏  | 1739/2400 [1:03:22<33:25,  3.03s/it]

GPU memory after batch 1738: 14.97 GB
Batch 1739 input shapes: input_ids=torch.Size([4, 327]), attention_mask=torch.Size([4, 327])


Predicting:  72%|███████▎  | 1740/2400 [1:03:25<34:44,  3.16s/it]

GPU memory after batch 1739: 14.97 GB
Batch 1740 input shapes: input_ids=torch.Size([4, 206]), attention_mask=torch.Size([4, 206])


Predicting:  73%|███████▎  | 1741/2400 [1:03:27<30:41,  2.79s/it]

批次 1740: 原始预测: ['Yes', 'Yes', 'Yes', 'Yes']
GPU memory after batch 1740: 14.97 GB
Batch 1741 input shapes: input_ids=torch.Size([4, 209]), attention_mask=torch.Size([4, 209])


Predicting:  73%|███████▎  | 1742/2400 [1:03:30<29:24,  2.68s/it]

GPU memory after batch 1741: 14.97 GB
Batch 1742 input shapes: input_ids=torch.Size([4, 249]), attention_mask=torch.Size([4, 249])


Predicting:  73%|███████▎  | 1743/2400 [1:03:32<29:36,  2.70s/it]

GPU memory after batch 1742: 14.97 GB
Batch 1743 input shapes: input_ids=torch.Size([4, 247]), attention_mask=torch.Size([4, 247])


Predicting:  73%|███████▎  | 1744/2400 [1:03:35<29:33,  2.70s/it]

GPU memory after batch 1743: 14.97 GB
Batch 1744 input shapes: input_ids=torch.Size([4, 241]), attention_mask=torch.Size([4, 241])


Predicting:  73%|███████▎  | 1745/2400 [1:03:38<29:30,  2.70s/it]

GPU memory after batch 1744: 14.97 GB
Batch 1745 input shapes: input_ids=torch.Size([4, 285]), attention_mask=torch.Size([4, 285])


Predicting:  73%|███████▎  | 1746/2400 [1:03:41<30:30,  2.80s/it]

GPU memory after batch 1745: 14.97 GB
Batch 1746 input shapes: input_ids=torch.Size([4, 281]), attention_mask=torch.Size([4, 281])


Predicting:  73%|███████▎  | 1747/2400 [1:03:44<31:11,  2.87s/it]

GPU memory after batch 1746: 14.97 GB
Batch 1747 input shapes: input_ids=torch.Size([4, 186]), attention_mask=torch.Size([4, 186])


Predicting:  73%|███████▎  | 1748/2400 [1:03:46<28:59,  2.67s/it]

GPU memory after batch 1747: 14.97 GB
Batch 1748 input shapes: input_ids=torch.Size([4, 214]), attention_mask=torch.Size([4, 214])


Predicting:  73%|███████▎  | 1749/2400 [1:03:48<26:43,  2.46s/it]

GPU memory after batch 1748: 14.97 GB
Batch 1749 input shapes: input_ids=torch.Size([4, 270]), attention_mask=torch.Size([4, 270])


Predicting:  73%|███████▎  | 1750/2400 [1:03:51<28:17,  2.61s/it]

GPU memory after batch 1749: 14.97 GB
Batch 1750 input shapes: input_ids=torch.Size([4, 218]), attention_mask=torch.Size([4, 218])


Predicting:  73%|███████▎  | 1751/2400 [1:03:53<26:19,  2.43s/it]

批次 1750: 原始预测: ['Yes', 'Yes', 'Yes', 'Yes']
GPU memory after batch 1750: 14.97 GB
Batch 1751 input shapes: input_ids=torch.Size([4, 220]), attention_mask=torch.Size([4, 220])


Predicting:  73%|███████▎  | 1752/2400 [1:03:55<24:56,  2.31s/it]

GPU memory after batch 1751: 14.97 GB
Batch 1752 input shapes: input_ids=torch.Size([4, 275]), attention_mask=torch.Size([4, 275])


Predicting:  73%|███████▎  | 1753/2400 [1:03:58<27:03,  2.51s/it]

GPU memory after batch 1752: 14.97 GB
Batch 1753 input shapes: input_ids=torch.Size([4, 198]), attention_mask=torch.Size([4, 198])


Predicting:  73%|███████▎  | 1754/2400 [1:04:00<26:32,  2.46s/it]

GPU memory after batch 1753: 14.97 GB
Batch 1754 input shapes: input_ids=torch.Size([4, 342]), attention_mask=torch.Size([4, 342])


Predicting:  73%|███████▎  | 1755/2400 [1:04:04<29:55,  2.78s/it]

GPU memory after batch 1754: 14.97 GB
Batch 1755 input shapes: input_ids=torch.Size([4, 221]), attention_mask=torch.Size([4, 221])


Predicting:  73%|███████▎  | 1756/2400 [1:04:06<28:52,  2.69s/it]

GPU memory after batch 1755: 14.97 GB
Batch 1756 input shapes: input_ids=torch.Size([4, 241]), attention_mask=torch.Size([4, 241])


Predicting:  73%|███████▎  | 1757/2400 [1:04:09<28:51,  2.69s/it]

GPU memory after batch 1756: 14.97 GB
Batch 1757 input shapes: input_ids=torch.Size([4, 396]), attention_mask=torch.Size([4, 396])


Predicting:  73%|███████▎  | 1758/2400 [1:04:13<31:43,  2.97s/it]

GPU memory after batch 1757: 14.97 GB
Batch 1758 input shapes: input_ids=torch.Size([4, 387]), attention_mask=torch.Size([4, 387])


Predicting:  73%|███████▎  | 1759/2400 [1:04:16<33:34,  3.14s/it]

GPU memory after batch 1758: 14.97 GB
Batch 1759 input shapes: input_ids=torch.Size([4, 341]), attention_mask=torch.Size([4, 341])


Predicting:  73%|███████▎  | 1760/2400 [1:04:20<34:45,  3.26s/it]

GPU memory after batch 1759: 14.97 GB
Batch 1760 input shapes: input_ids=torch.Size([4, 341]), attention_mask=torch.Size([4, 341])


Predicting:  73%|███████▎  | 1761/2400 [1:04:23<35:33,  3.34s/it]

批次 1760: 原始预测: ['Yes\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because']
GPU memory after batch 1760: 14.97 GB
Batch 1761 input shapes: input_ids=torch.Size([4, 331]), attention_mask=torch.Size([4, 331])


Predicting:  73%|███████▎  | 1762/2400 [1:04:27<35:56,  3.38s/it]

GPU memory after batch 1761: 14.97 GB
Batch 1762 input shapes: input_ids=torch.Size([4, 485]), attention_mask=torch.Size([4, 485])


Predicting:  73%|███████▎  | 1763/2400 [1:04:31<38:56,  3.67s/it]

GPU memory after batch 1762: 14.97 GB
Batch 1763 input shapes: input_ids=torch.Size([4, 396]), attention_mask=torch.Size([4, 396])


Predicting:  74%|███████▎  | 1764/2400 [1:04:35<38:39,  3.65s/it]

GPU memory after batch 1763: 14.97 GB
Batch 1764 input shapes: input_ids=torch.Size([4, 395]), attention_mask=torch.Size([4, 395])


Predicting:  74%|███████▎  | 1765/2400 [1:04:38<38:26,  3.63s/it]

GPU memory after batch 1764: 14.97 GB
Batch 1765 input shapes: input_ids=torch.Size([4, 369]), attention_mask=torch.Size([4, 369])


Predicting:  74%|███████▎  | 1766/2400 [1:04:42<38:55,  3.68s/it]

GPU memory after batch 1765: 14.97 GB
Batch 1766 input shapes: input_ids=torch.Size([4, 369]), attention_mask=torch.Size([4, 369])


Predicting:  74%|███████▎  | 1767/2400 [1:04:46<39:14,  3.72s/it]

GPU memory after batch 1766: 14.97 GB
Batch 1767 input shapes: input_ids=torch.Size([4, 406]), attention_mask=torch.Size([4, 406])


Predicting:  74%|███████▎  | 1768/2400 [1:04:50<38:49,  3.69s/it]

GPU memory after batch 1767: 14.97 GB
Batch 1768 input shapes: input_ids=torch.Size([4, 401]), attention_mask=torch.Size([4, 401])


Predicting:  74%|███████▎  | 1769/2400 [1:04:53<38:29,  3.66s/it]

GPU memory after batch 1768: 14.97 GB
Batch 1769 input shapes: input_ids=torch.Size([4, 438]), attention_mask=torch.Size([4, 438])


Predicting:  74%|███████▍  | 1770/2400 [1:04:56<37:16,  3.55s/it]

GPU memory after batch 1769: 14.97 GB
Batch 1770 input shapes: input_ids=torch.Size([4, 380]), attention_mask=torch.Size([4, 380])


Predicting:  74%|███████▍  | 1771/2400 [1:05:00<38:07,  3.64s/it]

批次 1770: 原始预测: ['Yes\nIs "8', 'Yes\nIs "12', 'Yes\nYes, because', 'Yes\nYes, because']
GPU memory after batch 1770: 14.97 GB
Batch 1771 input shapes: input_ids=torch.Size([4, 380]), attention_mask=torch.Size([4, 380])


Predicting:  74%|███████▍  | 1772/2400 [1:05:04<38:42,  3.70s/it]

GPU memory after batch 1771: 14.97 GB
Batch 1772 input shapes: input_ids=torch.Size([4, 320]), attention_mask=torch.Size([4, 320])


Predicting:  74%|███████▍  | 1773/2400 [1:05:07<37:25,  3.58s/it]

GPU memory after batch 1772: 14.97 GB
Batch 1773 input shapes: input_ids=torch.Size([4, 381]), attention_mask=torch.Size([4, 381])


Predicting:  74%|███████▍  | 1774/2400 [1:05:11<38:11,  3.66s/it]

GPU memory after batch 1773: 14.97 GB
Batch 1774 input shapes: input_ids=torch.Size([4, 368]), attention_mask=torch.Size([4, 368])


Predicting:  74%|███████▍  | 1775/2400 [1:05:15<38:29,  3.70s/it]

GPU memory after batch 1774: 14.97 GB
Batch 1775 input shapes: input_ids=torch.Size([4, 396]), attention_mask=torch.Size([4, 396])


Predicting:  74%|███████▍  | 1776/2400 [1:05:19<38:07,  3.67s/it]

GPU memory after batch 1775: 14.97 GB
Batch 1776 input shapes: input_ids=torch.Size([4, 508]), attention_mask=torch.Size([4, 508])


Predicting:  74%|███████▍  | 1777/2400 [1:05:23<40:13,  3.87s/it]

GPU memory after batch 1776: 14.97 GB
Batch 1777 input shapes: input_ids=torch.Size([4, 377]), attention_mask=torch.Size([4, 377])


Predicting:  74%|███████▍  | 1778/2400 [1:05:27<40:03,  3.86s/it]

GPU memory after batch 1777: 14.97 GB
Batch 1778 input shapes: input_ids=torch.Size([4, 368]), attention_mask=torch.Size([4, 368])


Predicting:  74%|███████▍  | 1779/2400 [1:05:31<39:43,  3.84s/it]

GPU memory after batch 1778: 14.97 GB
Batch 1779 input shapes: input_ids=torch.Size([4, 368]), attention_mask=torch.Size([4, 368])


Predicting:  74%|███████▍  | 1780/2400 [1:05:34<39:28,  3.82s/it]

GPU memory after batch 1779: 14.97 GB
Batch 1780 input shapes: input_ids=torch.Size([4, 409]), attention_mask=torch.Size([4, 409])


Predicting:  74%|███████▍  | 1781/2400 [1:05:38<38:45,  3.76s/it]

批次 1780: 原始预测: ['Yes\nOkay, so', 'Yes\nYes, because', 'Yes\n"16"', 'Yes\nOkay, so']
GPU memory after batch 1780: 14.97 GB
Batch 1781 input shapes: input_ids=torch.Size([4, 407]), attention_mask=torch.Size([4, 407])


Predicting:  74%|███████▍  | 1782/2400 [1:05:42<38:13,  3.71s/it]

GPU memory after batch 1781: 14.97 GB
Batch 1782 input shapes: input_ids=torch.Size([4, 386]), attention_mask=torch.Size([4, 386])


Predicting:  74%|███████▍  | 1783/2400 [1:05:45<37:48,  3.68s/it]

GPU memory after batch 1782: 14.97 GB
Batch 1783 input shapes: input_ids=torch.Size([4, 363]), attention_mask=torch.Size([4, 363])


Predicting:  74%|███████▍  | 1784/2400 [1:05:49<38:02,  3.71s/it]

GPU memory after batch 1783: 14.97 GB
Batch 1784 input shapes: input_ids=torch.Size([4, 373]), attention_mask=torch.Size([4, 373])


Predicting:  74%|███████▍  | 1785/2400 [1:05:53<38:16,  3.73s/it]

GPU memory after batch 1784: 14.97 GB
Batch 1785 input shapes: input_ids=torch.Size([4, 376]), attention_mask=torch.Size([4, 376])


Predicting:  74%|███████▍  | 1786/2400 [1:05:57<38:25,  3.76s/it]

GPU memory after batch 1785: 14.97 GB
Batch 1786 input shapes: input_ids=torch.Size([4, 389]), attention_mask=torch.Size([4, 389])


Predicting:  74%|███████▍  | 1787/2400 [1:06:00<37:52,  3.71s/it]

GPU memory after batch 1786: 14.97 GB
Batch 1787 input shapes: input_ids=torch.Size([4, 393]), attention_mask=torch.Size([4, 393])


Predicting:  74%|███████▍  | 1788/2400 [1:06:04<37:29,  3.68s/it]

GPU memory after batch 1787: 14.97 GB
Batch 1788 input shapes: input_ids=torch.Size([4, 364]), attention_mask=torch.Size([4, 364])


Predicting:  75%|███████▍  | 1789/2400 [1:06:08<37:44,  3.71s/it]

GPU memory after batch 1788: 14.97 GB
Batch 1789 input shapes: input_ids=torch.Size([4, 368]), attention_mask=torch.Size([4, 368])


Predicting:  75%|███████▍  | 1790/2400 [1:06:11<37:53,  3.73s/it]

GPU memory after batch 1789: 14.97 GB
Batch 1790 input shapes: input_ids=torch.Size([4, 385]), attention_mask=torch.Size([4, 385])


Predicting:  75%|███████▍  | 1791/2400 [1:06:15<37:25,  3.69s/it]

批次 1790: 原始预测: ['Yes\nIs "224', 'Yes\nYes, because', 'Yes\nIs "36', 'Yes\nYes, because']
GPU memory after batch 1790: 14.97 GB
Batch 1791 input shapes: input_ids=torch.Size([4, 385]), attention_mask=torch.Size([4, 385])


Predicting:  75%|███████▍  | 1792/2400 [1:06:19<37:04,  3.66s/it]

GPU memory after batch 1791: 14.97 GB
Batch 1792 input shapes: input_ids=torch.Size([4, 359]), attention_mask=torch.Size([4, 359])


Predicting:  75%|███████▍  | 1793/2400 [1:06:22<37:13,  3.68s/it]

GPU memory after batch 1792: 14.97 GB
Batch 1793 input shapes: input_ids=torch.Size([4, 395]), attention_mask=torch.Size([4, 395])


Predicting:  75%|███████▍  | 1794/2400 [1:06:26<36:55,  3.66s/it]

GPU memory after batch 1793: 14.97 GB
Batch 1794 input shapes: input_ids=torch.Size([4, 309]), attention_mask=torch.Size([4, 309])


Predicting:  75%|███████▍  | 1795/2400 [1:06:29<35:41,  3.54s/it]

GPU memory after batch 1794: 14.97 GB
Batch 1795 input shapes: input_ids=torch.Size([4, 351]), attention_mask=torch.Size([4, 351])


Predicting:  75%|███████▍  | 1796/2400 [1:06:33<35:42,  3.55s/it]

GPU memory after batch 1795: 14.97 GB
Batch 1796 input shapes: input_ids=torch.Size([4, 372]), attention_mask=torch.Size([4, 372])


Predicting:  75%|███████▍  | 1797/2400 [1:06:37<36:25,  3.62s/it]

GPU memory after batch 1796: 14.97 GB
Batch 1797 input shapes: input_ids=torch.Size([4, 369]), attention_mask=torch.Size([4, 369])


Predicting:  75%|███████▍  | 1798/2400 [1:06:40<36:53,  3.68s/it]

GPU memory after batch 1797: 14.97 GB
Batch 1798 input shapes: input_ids=torch.Size([4, 387]), attention_mask=torch.Size([4, 387])


Predicting:  75%|███████▍  | 1799/2400 [1:06:44<36:35,  3.65s/it]

GPU memory after batch 1798: 14.97 GB
Batch 1799 input shapes: input_ids=torch.Size([4, 385]), attention_mask=torch.Size([4, 385])


Predicting:  75%|███████▌  | 1800/2400 [1:06:48<36:21,  3.64s/it]

GPU memory after batch 1799: 14.97 GB
Batch 1800 input shapes: input_ids=torch.Size([4, 512]), attention_mask=torch.Size([4, 512])


Predicting:  75%|███████▌  | 1801/2400 [1:06:52<38:28,  3.85s/it]

批次 1800: 原始预测: ['est.\nThe Taipei', 'est.\nThe Taipei', 'Yes\nOkay, so', 'Yes\nOkay, so']
GPU memory after batch 1800: 14.97 GB
Batch 1801 input shapes: input_ids=torch.Size([4, 404]), attention_mask=torch.Size([4, 404])


Predicting:  75%|███████▌  | 1802/2400 [1:06:55<37:32,  3.77s/it]

GPU memory after batch 1801: 14.97 GB
Batch 1802 input shapes: input_ids=torch.Size([4, 363]), attention_mask=torch.Size([4, 363])


Predicting:  75%|███████▌  | 1803/2400 [1:06:59<37:30,  3.77s/it]

GPU memory after batch 1802: 14.97 GB
Batch 1803 input shapes: input_ids=torch.Size([4, 360]), attention_mask=torch.Size([4, 360])


Predicting:  75%|███████▌  | 1804/2400 [1:07:03<37:18,  3.76s/it]

GPU memory after batch 1803: 14.97 GB
Batch 1804 input shapes: input_ids=torch.Size([4, 444]), attention_mask=torch.Size([4, 444])


Predicting:  75%|███████▌  | 1805/2400 [1:07:07<37:17,  3.76s/it]

GPU memory after batch 1804: 14.97 GB
Batch 1805 input shapes: input_ids=torch.Size([4, 442]), attention_mask=torch.Size([4, 442])


Predicting:  75%|███████▌  | 1806/2400 [1:07:10<37:07,  3.75s/it]

GPU memory after batch 1805: 14.97 GB
Batch 1806 input shapes: input_ids=torch.Size([4, 446]), attention_mask=torch.Size([4, 446])


Predicting:  75%|███████▌  | 1807/2400 [1:07:14<37:07,  3.76s/it]

GPU memory after batch 1806: 14.97 GB
Batch 1807 input shapes: input_ids=torch.Size([4, 394]), attention_mask=torch.Size([4, 394])


Predicting:  75%|███████▌  | 1808/2400 [1:07:18<36:35,  3.71s/it]

GPU memory after batch 1807: 14.97 GB
Batch 1808 input shapes: input_ids=torch.Size([4, 405]), attention_mask=torch.Size([4, 405])


Predicting:  75%|███████▌  | 1809/2400 [1:07:21<36:13,  3.68s/it]

GPU memory after batch 1808: 14.97 GB
Batch 1809 input shapes: input_ids=torch.Size([4, 364]), attention_mask=torch.Size([4, 364])


Predicting:  75%|███████▌  | 1810/2400 [1:07:25<36:26,  3.71s/it]

GPU memory after batch 1809: 14.97 GB
Batch 1810 input shapes: input_ids=torch.Size([4, 364]), attention_mask=torch.Size([4, 364])


Predicting:  75%|███████▌  | 1811/2400 [1:07:29<36:35,  3.73s/it]

批次 1810: 原始预测: ['Yes\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because']
GPU memory after batch 1810: 14.97 GB
Batch 1811 input shapes: input_ids=torch.Size([4, 382]), attention_mask=torch.Size([4, 382])


Predicting:  76%|███████▌  | 1812/2400 [1:07:33<36:52,  3.76s/it]

GPU memory after batch 1811: 14.97 GB
Batch 1812 input shapes: input_ids=torch.Size([4, 409]), attention_mask=torch.Size([4, 409])


Predicting:  76%|███████▌  | 1813/2400 [1:07:36<36:21,  3.72s/it]

GPU memory after batch 1812: 14.97 GB
Batch 1813 input shapes: input_ids=torch.Size([4, 391]), attention_mask=torch.Size([4, 391])


Predicting:  76%|███████▌  | 1814/2400 [1:07:40<35:57,  3.68s/it]

GPU memory after batch 1813: 14.97 GB
Batch 1814 input shapes: input_ids=torch.Size([4, 383]), attention_mask=torch.Size([4, 383])


Predicting:  76%|███████▌  | 1815/2400 [1:07:44<36:22,  3.73s/it]

GPU memory after batch 1814: 14.97 GB
Batch 1815 input shapes: input_ids=torch.Size([4, 382]), attention_mask=torch.Size([4, 382])


Predicting:  76%|███████▌  | 1816/2400 [1:07:48<36:39,  3.77s/it]

GPU memory after batch 1815: 14.97 GB
Batch 1816 input shapes: input_ids=torch.Size([4, 408]), attention_mask=torch.Size([4, 408])


Predicting:  76%|███████▌  | 1817/2400 [1:07:51<36:07,  3.72s/it]

GPU memory after batch 1816: 14.97 GB
Batch 1817 input shapes: input_ids=torch.Size([4, 360]), attention_mask=torch.Size([4, 360])


Predicting:  76%|███████▌  | 1818/2400 [1:07:55<36:05,  3.72s/it]

GPU memory after batch 1817: 14.97 GB
Batch 1818 input shapes: input_ids=torch.Size([4, 383]), attention_mask=torch.Size([4, 383])


Predicting:  76%|███████▌  | 1819/2400 [1:07:59<36:23,  3.76s/it]

GPU memory after batch 1818: 14.97 GB
Batch 1819 input shapes: input_ids=torch.Size([4, 392]), attention_mask=torch.Size([4, 392])


Predicting:  76%|███████▌  | 1820/2400 [1:08:03<35:51,  3.71s/it]

GPU memory after batch 1819: 14.97 GB
Batch 1820 input shapes: input_ids=torch.Size([4, 393]), attention_mask=torch.Size([4, 393])


Predicting:  76%|███████▌  | 1821/2400 [1:08:06<35:29,  3.68s/it]

批次 1820: 原始预测: ['Yes\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because']
GPU memory after batch 1820: 14.97 GB
Batch 1821 input shapes: input_ids=torch.Size([4, 357]), attention_mask=torch.Size([4, 357])


Predicting:  76%|███████▌  | 1822/2400 [1:08:10<35:34,  3.69s/it]

GPU memory after batch 1821: 14.97 GB
Batch 1822 input shapes: input_ids=torch.Size([4, 376]), attention_mask=torch.Size([4, 376])


Predicting:  76%|███████▌  | 1823/2400 [1:08:14<35:49,  3.73s/it]

GPU memory after batch 1822: 14.97 GB
Batch 1823 input shapes: input_ids=torch.Size([4, 366]), attention_mask=torch.Size([4, 366])


Predicting:  76%|███████▌  | 1824/2400 [1:08:17<35:50,  3.73s/it]

GPU memory after batch 1823: 14.97 GB
Batch 1824 input shapes: input_ids=torch.Size([4, 458]), attention_mask=torch.Size([4, 458])


Predicting:  76%|███████▌  | 1825/2400 [1:08:22<36:53,  3.85s/it]

GPU memory after batch 1824: 14.97 GB
Batch 1825 input shapes: input_ids=torch.Size([4, 353]), attention_mask=torch.Size([4, 353])


Predicting:  76%|███████▌  | 1826/2400 [1:08:25<36:23,  3.80s/it]

GPU memory after batch 1825: 14.97 GB
Batch 1826 input shapes: input_ids=torch.Size([4, 381]), attention_mask=torch.Size([4, 381])


Predicting:  76%|███████▌  | 1827/2400 [1:08:29<36:26,  3.82s/it]

GPU memory after batch 1826: 14.97 GB
Batch 1827 input shapes: input_ids=torch.Size([4, 301]), attention_mask=torch.Size([4, 301])


Predicting:  76%|███████▌  | 1828/2400 [1:08:32<34:43,  3.64s/it]

GPU memory after batch 1827: 14.97 GB
Batch 1828 input shapes: input_ids=torch.Size([4, 402]), attention_mask=torch.Size([4, 402])


Predicting:  76%|███████▌  | 1829/2400 [1:08:36<34:32,  3.63s/it]

GPU memory after batch 1828: 14.97 GB
Batch 1829 input shapes: input_ids=torch.Size([4, 406]), attention_mask=torch.Size([4, 406])


Predicting:  76%|███████▋  | 1830/2400 [1:08:39<34:24,  3.62s/it]

GPU memory after batch 1829: 14.97 GB
Batch 1830 input shapes: input_ids=torch.Size([4, 453]), attention_mask=torch.Size([4, 453])


Predicting:  76%|███████▋  | 1831/2400 [1:08:44<35:35,  3.75s/it]

批次 1830: 原始预测: ['Yes\nOkay, so', 'Yes\nIs "100', 'Yes\nOkay, so', 'Yes\nOkay, so']
GPU memory after batch 1830: 14.97 GB
Batch 1831 input shapes: input_ids=torch.Size([4, 452]), attention_mask=torch.Size([4, 452])


Predicting:  76%|███████▋  | 1832/2400 [1:08:48<36:20,  3.84s/it]

GPU memory after batch 1831: 14.97 GB
Batch 1832 input shapes: input_ids=torch.Size([4, 445]), attention_mask=torch.Size([4, 445])


Predicting:  76%|███████▋  | 1833/2400 [1:08:51<36:06,  3.82s/it]

GPU memory after batch 1832: 14.97 GB
Batch 1833 input shapes: input_ids=torch.Size([4, 444]), attention_mask=torch.Size([4, 444])


Predicting:  76%|███████▋  | 1834/2400 [1:08:55<35:54,  3.81s/it]

GPU memory after batch 1833: 14.97 GB
Batch 1834 input shapes: input_ids=torch.Size([4, 371]), attention_mask=torch.Size([4, 371])


Predicting:  76%|███████▋  | 1835/2400 [1:08:59<35:49,  3.81s/it]

GPU memory after batch 1834: 14.97 GB
Batch 1835 input shapes: input_ids=torch.Size([4, 371]), attention_mask=torch.Size([4, 371])


Predicting:  76%|███████▋  | 1836/2400 [1:09:03<35:45,  3.80s/it]

GPU memory after batch 1835: 14.97 GB
Batch 1836 input shapes: input_ids=torch.Size([4, 360]), attention_mask=torch.Size([4, 360])


Predicting:  77%|███████▋  | 1837/2400 [1:09:06<35:29,  3.78s/it]

GPU memory after batch 1836: 14.97 GB
Batch 1837 input shapes: input_ids=torch.Size([4, 316]), attention_mask=torch.Size([4, 316])


Predicting:  77%|███████▋  | 1838/2400 [1:09:10<34:04,  3.64s/it]

GPU memory after batch 1837: 14.97 GB
Batch 1838 input shapes: input_ids=torch.Size([4, 239]), attention_mask=torch.Size([4, 239])


Predicting:  77%|███████▋  | 1839/2400 [1:09:12<31:18,  3.35s/it]

GPU memory after batch 1838: 14.97 GB
Batch 1839 input shapes: input_ids=torch.Size([4, 410]), attention_mask=torch.Size([4, 410])


Predicting:  77%|███████▋  | 1840/2400 [1:09:16<31:58,  3.43s/it]

GPU memory after batch 1839: 14.97 GB
Batch 1840 input shapes: input_ids=torch.Size([4, 408]), attention_mask=torch.Size([4, 408])


Predicting:  77%|███████▋  | 1841/2400 [1:09:20<32:25,  3.48s/it]

批次 1840: 原始预测: ['Yes\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because']
GPU memory after batch 1840: 14.97 GB
Batch 1841 input shapes: input_ids=torch.Size([4, 346]), attention_mask=torch.Size([4, 346])


Predicting:  77%|███████▋  | 1842/2400 [1:09:23<32:35,  3.50s/it]

GPU memory after batch 1841: 14.97 GB
Batch 1842 input shapes: input_ids=torch.Size([4, 420]), attention_mask=torch.Size([4, 420])


Predicting:  77%|███████▋  | 1843/2400 [1:09:27<33:13,  3.58s/it]

GPU memory after batch 1842: 14.97 GB
Batch 1843 input shapes: input_ids=torch.Size([4, 415]), attention_mask=torch.Size([4, 415])


Predicting:  77%|███████▋  | 1844/2400 [1:09:31<33:15,  3.59s/it]

GPU memory after batch 1843: 14.97 GB
Batch 1844 input shapes: input_ids=torch.Size([4, 364]), attention_mask=torch.Size([4, 364])


Predicting:  77%|███████▋  | 1845/2400 [1:09:34<33:42,  3.64s/it]

GPU memory after batch 1844: 14.97 GB
Batch 1845 input shapes: input_ids=torch.Size([4, 357]), attention_mask=torch.Size([4, 357])


Predicting:  77%|███████▋  | 1846/2400 [1:09:38<33:48,  3.66s/it]

GPU memory after batch 1845: 14.97 GB
Batch 1846 input shapes: input_ids=torch.Size([4, 357]), attention_mask=torch.Size([4, 357])


Predicting:  77%|███████▋  | 1847/2400 [1:09:42<33:55,  3.68s/it]

GPU memory after batch 1846: 14.97 GB
Batch 1847 input shapes: input_ids=torch.Size([4, 376]), attention_mask=torch.Size([4, 376])


Predicting:  77%|███████▋  | 1848/2400 [1:09:46<34:12,  3.72s/it]

GPU memory after batch 1847: 14.97 GB
Batch 1848 input shapes: input_ids=torch.Size([4, 488]), attention_mask=torch.Size([4, 488])


Predicting:  77%|███████▋  | 1849/2400 [1:09:50<35:51,  3.91s/it]

GPU memory after batch 1848: 14.97 GB
Batch 1849 input shapes: input_ids=torch.Size([4, 397]), attention_mask=torch.Size([4, 397])


Predicting:  77%|███████▋  | 1850/2400 [1:09:54<34:57,  3.81s/it]

GPU memory after batch 1849: 14.97 GB
Batch 1850 input shapes: input_ids=torch.Size([4, 397]), attention_mask=torch.Size([4, 397])


Predicting:  77%|███████▋  | 1851/2400 [1:09:57<34:12,  3.74s/it]

批次 1850: 原始预测: ['Yes\nExplanation: The', 'Yes\nExplanation: The', 'Yes\nIs "2', 'Yes\nIs "2']
GPU memory after batch 1850: 14.97 GB
Batch 1851 input shapes: input_ids=torch.Size([4, 395]), attention_mask=torch.Size([4, 395])


Predicting:  77%|███████▋  | 1852/2400 [1:10:01<33:38,  3.68s/it]

GPU memory after batch 1851: 14.97 GB
Batch 1852 input shapes: input_ids=torch.Size([4, 416]), attention_mask=torch.Size([4, 416])


Predicting:  77%|███████▋  | 1853/2400 [1:10:04<33:23,  3.66s/it]

GPU memory after batch 1852: 14.97 GB
Batch 1853 input shapes: input_ids=torch.Size([4, 404]), attention_mask=torch.Size([4, 404])


Predicting:  77%|███████▋  | 1854/2400 [1:10:08<33:10,  3.64s/it]

GPU memory after batch 1853: 14.97 GB
Batch 1854 input shapes: input_ids=torch.Size([4, 379]), attention_mask=torch.Size([4, 379])


Predicting:  77%|███████▋  | 1855/2400 [1:10:12<33:38,  3.70s/it]

GPU memory after batch 1854: 14.97 GB
Batch 1855 input shapes: input_ids=torch.Size([4, 441]), attention_mask=torch.Size([4, 441])


Predicting:  77%|███████▋  | 1856/2400 [1:10:15<33:45,  3.72s/it]

GPU memory after batch 1855: 14.97 GB
Batch 1856 input shapes: input_ids=torch.Size([4, 440]), attention_mask=torch.Size([4, 440])


Predicting:  77%|███████▋  | 1857/2400 [1:10:19<33:49,  3.74s/it]

GPU memory after batch 1856: 14.97 GB
Batch 1857 input shapes: input_ids=torch.Size([4, 438]), attention_mask=torch.Size([4, 438])


Predicting:  77%|███████▋  | 1858/2400 [1:10:23<33:50,  3.75s/it]

GPU memory after batch 1857: 14.97 GB
Batch 1858 input shapes: input_ids=torch.Size([4, 413]), attention_mask=torch.Size([4, 413])


Predicting:  77%|███████▋  | 1859/2400 [1:10:27<33:24,  3.71s/it]

GPU memory after batch 1858: 14.97 GB
Batch 1859 input shapes: input_ids=torch.Size([4, 399]), attention_mask=torch.Size([4, 399])


Predicting:  78%|███████▊  | 1860/2400 [1:10:30<33:04,  3.67s/it]

GPU memory after batch 1859: 14.97 GB
Batch 1860 input shapes: input_ids=torch.Size([4, 387]), attention_mask=torch.Size([4, 387])


Predicting:  78%|███████▊  | 1861/2400 [1:10:34<32:47,  3.65s/it]

批次 1860: 原始预测: ['Yes\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because']
GPU memory after batch 1860: 14.97 GB
Batch 1861 input shapes: input_ids=torch.Size([4, 391]), attention_mask=torch.Size([4, 391])


Predicting:  78%|███████▊  | 1862/2400 [1:10:37<32:35,  3.63s/it]

GPU memory after batch 1861: 14.97 GB
Batch 1862 input shapes: input_ids=torch.Size([4, 378]), attention_mask=torch.Size([4, 378])


Predicting:  78%|███████▊  | 1863/2400 [1:10:41<33:05,  3.70s/it]

GPU memory after batch 1862: 14.97 GB
Batch 1863 input shapes: input_ids=torch.Size([4, 378]), attention_mask=torch.Size([4, 378])


Predicting:  78%|███████▊  | 1864/2400 [1:10:45<33:24,  3.74s/it]

GPU memory after batch 1863: 14.97 GB
Batch 1864 input shapes: input_ids=torch.Size([4, 363]), attention_mask=torch.Size([4, 363])


Predicting:  78%|███████▊  | 1865/2400 [1:10:49<33:27,  3.75s/it]

GPU memory after batch 1864: 14.97 GB
Batch 1865 input shapes: input_ids=torch.Size([4, 367]), attention_mask=torch.Size([4, 367])


Predicting:  78%|███████▊  | 1866/2400 [1:10:53<33:27,  3.76s/it]

GPU memory after batch 1865: 14.97 GB
Batch 1866 input shapes: input_ids=torch.Size([4, 365]), attention_mask=torch.Size([4, 365])


Predicting:  78%|███████▊  | 1867/2400 [1:10:56<33:27,  3.77s/it]

GPU memory after batch 1866: 14.97 GB
Batch 1867 input shapes: input_ids=torch.Size([4, 373]), attention_mask=torch.Size([4, 373])


Predicting:  78%|███████▊  | 1868/2400 [1:11:00<33:29,  3.78s/it]

GPU memory after batch 1867: 14.97 GB
Batch 1868 input shapes: input_ids=torch.Size([4, 373]), attention_mask=torch.Size([4, 373])


Predicting:  78%|███████▊  | 1869/2400 [1:11:04<33:29,  3.78s/it]

GPU memory after batch 1868: 14.97 GB
Batch 1869 input shapes: input_ids=torch.Size([4, 346]), attention_mask=torch.Size([4, 346])


Predicting:  78%|███████▊  | 1870/2400 [1:11:08<32:50,  3.72s/it]

GPU memory after batch 1869: 14.97 GB
Batch 1870 input shapes: input_ids=torch.Size([4, 323]), attention_mask=torch.Size([4, 323])


Predicting:  78%|███████▊  | 1871/2400 [1:11:11<30:49,  3.50s/it]

批次 1870: 原始预测: ['Yes', 'Yes', 'Yes', 'Yes']
GPU memory after batch 1870: 14.97 GB
Batch 1871 input shapes: input_ids=torch.Size([4, 384]), attention_mask=torch.Size([4, 384])


Predicting:  78%|███████▊  | 1872/2400 [1:11:14<31:41,  3.60s/it]

GPU memory after batch 1871: 14.97 GB
Batch 1872 input shapes: input_ids=torch.Size([4, 496]), attention_mask=torch.Size([4, 496])


Predicting:  78%|███████▊  | 1873/2400 [1:11:19<33:35,  3.82s/it]

GPU memory after batch 1872: 14.97 GB
Batch 1873 input shapes: input_ids=torch.Size([4, 382]), attention_mask=torch.Size([4, 382])


Predicting:  78%|███████▊  | 1874/2400 [1:11:23<33:34,  3.83s/it]

GPU memory after batch 1873: 14.97 GB
Batch 1874 input shapes: input_ids=torch.Size([4, 359]), attention_mask=torch.Size([4, 359])


Predicting:  78%|███████▊  | 1875/2400 [1:11:26<33:15,  3.80s/it]

GPU memory after batch 1874: 14.97 GB
Batch 1875 input shapes: input_ids=torch.Size([4, 338]), attention_mask=torch.Size([4, 338])


Predicting:  78%|███████▊  | 1876/2400 [1:11:30<32:28,  3.72s/it]

GPU memory after batch 1875: 14.97 GB
Batch 1876 input shapes: input_ids=torch.Size([4, 367]), attention_mask=torch.Size([4, 367])


Predicting:  78%|███████▊  | 1877/2400 [1:11:34<32:33,  3.74s/it]

GPU memory after batch 1876: 14.97 GB
Batch 1877 input shapes: input_ids=torch.Size([4, 388]), attention_mask=torch.Size([4, 388])


Predicting:  78%|███████▊  | 1878/2400 [1:11:37<32:08,  3.69s/it]

GPU memory after batch 1877: 14.97 GB
Batch 1878 input shapes: input_ids=torch.Size([4, 372]), attention_mask=torch.Size([4, 372])


Predicting:  78%|███████▊  | 1879/2400 [1:11:41<32:21,  3.73s/it]

GPU memory after batch 1878: 14.97 GB
Batch 1879 input shapes: input_ids=torch.Size([4, 372]), attention_mask=torch.Size([4, 372])


Predicting:  78%|███████▊  | 1880/2400 [1:11:45<32:29,  3.75s/it]

GPU memory after batch 1879: 14.97 GB
Batch 1880 input shapes: input_ids=torch.Size([4, 382]), attention_mask=torch.Size([4, 382])


Predicting:  78%|███████▊  | 1881/2400 [1:11:49<32:40,  3.78s/it]

批次 1880: 原始预测: ['Yes\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because']
GPU memory after batch 1880: 14.97 GB
Batch 1881 input shapes: input_ids=torch.Size([4, 369]), attention_mask=torch.Size([4, 369])


Predicting:  78%|███████▊  | 1882/2400 [1:11:53<32:40,  3.79s/it]

GPU memory after batch 1881: 14.97 GB
Batch 1882 input shapes: input_ids=torch.Size([4, 369]), attention_mask=torch.Size([4, 369])


Predicting:  78%|███████▊  | 1883/2400 [1:11:56<32:39,  3.79s/it]

GPU memory after batch 1882: 14.97 GB
Batch 1883 input shapes: input_ids=torch.Size([4, 289]), attention_mask=torch.Size([4, 289])


Predicting:  78%|███████▊  | 1884/2400 [1:11:59<30:59,  3.60s/it]

GPU memory after batch 1883: 14.97 GB
Batch 1884 input shapes: input_ids=torch.Size([4, 293]), attention_mask=torch.Size([4, 293])


Predicting:  79%|███████▊  | 1885/2400 [1:12:03<29:51,  3.48s/it]

GPU memory after batch 1884: 14.97 GB
Batch 1885 input shapes: input_ids=torch.Size([4, 327]), attention_mask=torch.Size([4, 327])


Predicting:  79%|███████▊  | 1886/2400 [1:12:06<29:43,  3.47s/it]

GPU memory after batch 1885: 14.97 GB
Batch 1886 input shapes: input_ids=torch.Size([4, 305]), attention_mask=torch.Size([4, 305])


Predicting:  79%|███████▊  | 1887/2400 [1:12:09<29:09,  3.41s/it]

GPU memory after batch 1886: 14.97 GB
Batch 1887 input shapes: input_ids=torch.Size([4, 374]), attention_mask=torch.Size([4, 374])


Predicting:  79%|███████▊  | 1888/2400 [1:12:13<30:06,  3.53s/it]

GPU memory after batch 1887: 14.97 GB
Batch 1888 input shapes: input_ids=torch.Size([4, 384]), attention_mask=torch.Size([4, 384])


Predicting:  79%|███████▊  | 1889/2400 [1:12:17<30:51,  3.62s/it]

GPU memory after batch 1888: 14.97 GB
Batch 1889 input shapes: input_ids=torch.Size([4, 386]), attention_mask=torch.Size([4, 386])


Predicting:  79%|███████▉  | 1890/2400 [1:12:21<30:43,  3.62s/it]

GPU memory after batch 1889: 14.97 GB
Batch 1890 input shapes: input_ids=torch.Size([4, 271]), attention_mask=torch.Size([4, 271])


Predicting:  79%|███████▉  | 1891/2400 [1:12:24<28:59,  3.42s/it]

批次 1890: 原始预测: ['Yes\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because']
GPU memory after batch 1890: 14.97 GB
Batch 1891 input shapes: input_ids=torch.Size([4, 426]), attention_mask=torch.Size([4, 426])


Predicting:  79%|███████▉  | 1892/2400 [1:12:27<29:48,  3.52s/it]

GPU memory after batch 1891: 14.97 GB
Batch 1892 input shapes: input_ids=torch.Size([4, 352]), attention_mask=torch.Size([4, 352])


Predicting:  79%|███████▉  | 1893/2400 [1:12:31<29:51,  3.53s/it]

GPU memory after batch 1892: 14.97 GB
Batch 1893 input shapes: input_ids=torch.Size([4, 353]), attention_mask=torch.Size([4, 353])


Predicting:  79%|███████▉  | 1894/2400 [1:12:35<30:17,  3.59s/it]

GPU memory after batch 1893: 14.97 GB
Batch 1894 input shapes: input_ids=torch.Size([4, 350]), attention_mask=torch.Size([4, 350])


Predicting:  79%|███████▉  | 1895/2400 [1:12:38<30:09,  3.58s/it]

GPU memory after batch 1894: 14.97 GB
Batch 1895 input shapes: input_ids=torch.Size([4, 382]), attention_mask=torch.Size([4, 382])


Predicting:  79%|███████▉  | 1896/2400 [1:12:42<30:45,  3.66s/it]

GPU memory after batch 1895: 14.97 GB
Batch 1896 input shapes: input_ids=torch.Size([4, 492]), attention_mask=torch.Size([4, 492])


Predicting:  79%|███████▉  | 1897/2400 [1:12:46<32:24,  3.87s/it]

GPU memory after batch 1896: 14.97 GB
Batch 1897 input shapes: input_ids=torch.Size([4, 317]), attention_mask=torch.Size([4, 317])


Predicting:  79%|███████▉  | 1898/2400 [1:12:50<30:56,  3.70s/it]

GPU memory after batch 1897: 14.97 GB
Batch 1898 input shapes: input_ids=torch.Size([4, 317]), attention_mask=torch.Size([4, 317])


Predicting:  79%|███████▉  | 1899/2400 [1:12:53<29:53,  3.58s/it]

GPU memory after batch 1898: 14.97 GB
Batch 1899 input shapes: input_ids=torch.Size([4, 315]), attention_mask=torch.Size([4, 315])


Predicting:  79%|███████▉  | 1900/2400 [1:12:56<29:08,  3.50s/it]

GPU memory after batch 1899: 14.97 GB
Batch 1900 input shapes: input_ids=torch.Size([4, 341]), attention_mask=torch.Size([4, 341])


Predicting:  79%|███████▉  | 1901/2400 [1:13:00<29:05,  3.50s/it]

批次 1900: 原始预测: ['Yes\nIs "201', 'Yes\nIs "202', 'Yes\nIs "201', 'Yes\nYes, because']
GPU memory after batch 1900: 14.97 GB
Batch 1901 input shapes: input_ids=torch.Size([4, 344]), attention_mask=torch.Size([4, 344])


Predicting:  79%|███████▉  | 1902/2400 [1:13:03<29:06,  3.51s/it]

GPU memory after batch 1901: 14.97 GB
Batch 1902 input shapes: input_ids=torch.Size([4, 317]), attention_mask=torch.Size([4, 317])


Predicting:  79%|███████▉  | 1903/2400 [1:13:07<28:32,  3.45s/it]

GPU memory after batch 1902: 14.97 GB
Batch 1903 input shapes: input_ids=torch.Size([4, 328]), attention_mask=torch.Size([4, 328])


Predicting:  79%|███████▉  | 1904/2400 [1:13:10<28:29,  3.45s/it]

GPU memory after batch 1903: 14.97 GB
Batch 1904 input shapes: input_ids=torch.Size([4, 331]), attention_mask=torch.Size([4, 331])


Predicting:  79%|███████▉  | 1905/2400 [1:13:14<28:30,  3.46s/it]

GPU memory after batch 1904: 14.97 GB
Batch 1905 input shapes: input_ids=torch.Size([4, 308]), attention_mask=torch.Size([4, 308])


Predicting:  79%|███████▉  | 1906/2400 [1:13:17<27:59,  3.40s/it]

GPU memory after batch 1905: 14.97 GB
Batch 1906 input shapes: input_ids=torch.Size([4, 271]), attention_mask=torch.Size([4, 271])


Predicting:  79%|███████▉  | 1907/2400 [1:13:20<26:50,  3.27s/it]

GPU memory after batch 1906: 14.97 GB
Batch 1907 input shapes: input_ids=torch.Size([4, 315]), attention_mask=torch.Size([4, 315])


Predicting:  80%|███████▉  | 1908/2400 [1:13:23<26:52,  3.28s/it]

GPU memory after batch 1907: 14.97 GB
Batch 1908 input shapes: input_ids=torch.Size([4, 142]), attention_mask=torch.Size([4, 142])


Predicting:  80%|███████▉  | 1909/2400 [1:13:25<23:20,  2.85s/it]

GPU memory after batch 1908: 14.97 GB
Batch 1909 input shapes: input_ids=torch.Size([4, 192]), attention_mask=torch.Size([4, 192])


Predicting:  80%|███████▉  | 1910/2400 [1:13:27<21:43,  2.66s/it]

GPU memory after batch 1909: 14.97 GB
Batch 1910 input shapes: input_ids=torch.Size([4, 162]), attention_mask=torch.Size([4, 162])


Predicting:  80%|███████▉  | 1911/2400 [1:13:29<20:16,  2.49s/it]

批次 1910: 原始预测: ['Yes', 'No\nIs "50', 'Yes\nIs "2', 'Yes\nIs "3']
GPU memory after batch 1910: 14.97 GB
Batch 1911 input shapes: input_ids=torch.Size([4, 186]), attention_mask=torch.Size([4, 186])


Predicting:  80%|███████▉  | 1912/2400 [1:13:31<19:32,  2.40s/it]

GPU memory after batch 1911: 14.97 GB
Batch 1912 input shapes: input_ids=torch.Size([4, 180]), attention_mask=torch.Size([4, 180])


Predicting:  80%|███████▉  | 1913/2400 [1:13:34<18:56,  2.33s/it]

GPU memory after batch 1912: 14.97 GB
Batch 1913 input shapes: input_ids=torch.Size([4, 139]), attention_mask=torch.Size([4, 139])


Predicting:  80%|███████▉  | 1914/2400 [1:13:35<17:45,  2.19s/it]

GPU memory after batch 1913: 14.97 GB
Batch 1914 input shapes: input_ids=torch.Size([4, 256]), attention_mask=torch.Size([4, 256])


Predicting:  80%|███████▉  | 1915/2400 [1:13:38<19:06,  2.36s/it]

GPU memory after batch 1914: 14.97 GB
Batch 1915 input shapes: input_ids=torch.Size([4, 150]), attention_mask=torch.Size([4, 150])


Predicting:  80%|███████▉  | 1916/2400 [1:13:40<17:56,  2.22s/it]

GPU memory after batch 1915: 14.97 GB
Batch 1916 input shapes: input_ids=torch.Size([4, 158]), attention_mask=torch.Size([4, 158])


Predicting:  80%|███████▉  | 1917/2400 [1:13:42<17:12,  2.14s/it]

GPU memory after batch 1916: 14.97 GB
Batch 1917 input shapes: input_ids=torch.Size([4, 196]), attention_mask=torch.Size([4, 196])


Predicting:  80%|███████▉  | 1918/2400 [1:13:44<17:42,  2.20s/it]

GPU memory after batch 1917: 14.97 GB
Batch 1918 input shapes: input_ids=torch.Size([4, 175]), attention_mask=torch.Size([4, 175])


Predicting:  80%|███████▉  | 1919/2400 [1:13:46<16:47,  2.10s/it]

GPU memory after batch 1918: 14.97 GB
Batch 1919 input shapes: input_ids=torch.Size([4, 142]), attention_mask=torch.Size([4, 142])


Predicting:  80%|████████  | 1920/2400 [1:13:48<15:08,  1.89s/it]

GPU memory after batch 1919: 14.97 GB
Batch 1920 input shapes: input_ids=torch.Size([4, 263]), attention_mask=torch.Size([4, 263])


Predicting:  80%|████████  | 1921/2400 [1:13:51<17:32,  2.20s/it]

批次 1920: 原始预测: ['Yes\nQuestion: Is', 'Yes\nQuestion: Is', 'No\nYes, because', 'Yes']
GPU memory after batch 1920: 14.97 GB
Batch 1921 input shapes: input_ids=torch.Size([4, 149]), attention_mask=torch.Size([4, 149])


Predicting:  80%|████████  | 1922/2400 [1:13:53<16:48,  2.11s/it]

GPU memory after batch 1921: 14.97 GB
Batch 1922 input shapes: input_ids=torch.Size([4, 164]), attention_mask=torch.Size([4, 164])


Predicting:  80%|████████  | 1923/2400 [1:13:54<15:38,  1.97s/it]

GPU memory after batch 1922: 14.97 GB
Batch 1923 input shapes: input_ids=torch.Size([4, 197]), attention_mask=torch.Size([4, 197])


Predicting:  80%|████████  | 1924/2400 [1:13:57<16:32,  2.09s/it]

GPU memory after batch 1923: 14.97 GB
Batch 1924 input shapes: input_ids=torch.Size([4, 164]), attention_mask=torch.Size([4, 164])


Predicting:  80%|████████  | 1925/2400 [1:13:59<16:30,  2.08s/it]

GPU memory after batch 1924: 14.97 GB
Batch 1925 input shapes: input_ids=torch.Size([4, 161]), attention_mask=torch.Size([4, 161])


Predicting:  80%|████████  | 1926/2400 [1:14:01<16:27,  2.08s/it]

GPU memory after batch 1925: 14.97 GB
Batch 1926 input shapes: input_ids=torch.Size([4, 181]), attention_mask=torch.Size([4, 181])


Predicting:  80%|████████  | 1927/2400 [1:14:02<15:35,  1.98s/it]

GPU memory after batch 1926: 14.97 GB
Batch 1927 input shapes: input_ids=torch.Size([4, 183]), attention_mask=torch.Size([4, 183])


Predicting:  80%|████████  | 1928/2400 [1:14:05<16:01,  2.04s/it]

GPU memory after batch 1927: 14.97 GB
Batch 1928 input shapes: input_ids=torch.Size([4, 144]), attention_mask=torch.Size([4, 144])


Predicting:  80%|████████  | 1929/2400 [1:14:06<15:35,  1.99s/it]

GPU memory after batch 1928: 14.97 GB
Batch 1929 input shapes: input_ids=torch.Size([4, 147]), attention_mask=torch.Size([4, 147])


Predicting:  80%|████████  | 1930/2400 [1:14:08<15:21,  1.96s/it]

GPU memory after batch 1929: 14.97 GB
Batch 1930 input shapes: input_ids=torch.Size([4, 151]), attention_mask=torch.Size([4, 151])


Predicting:  80%|████████  | 1931/2400 [1:14:10<15:11,  1.94s/it]

批次 1930: 原始预测: ['No\nYes, because', 'No\nYes, because', 'No\nYes, because', 'No\nYes, because']
GPU memory after batch 1930: 14.97 GB
Batch 1931 input shapes: input_ids=torch.Size([4, 142]), attention_mask=torch.Size([4, 142])


Predicting:  80%|████████  | 1932/2400 [1:14:12<14:58,  1.92s/it]

GPU memory after batch 1931: 14.97 GB
Batch 1932 input shapes: input_ids=torch.Size([4, 174]), attention_mask=torch.Size([4, 174])


Predicting:  81%|████████  | 1933/2400 [1:14:14<15:26,  1.98s/it]

GPU memory after batch 1932: 14.97 GB
Batch 1933 input shapes: input_ids=torch.Size([4, 83]), attention_mask=torch.Size([4, 83])


Predicting:  81%|████████  | 1934/2400 [1:14:16<14:01,  1.81s/it]

GPU memory after batch 1933: 14.97 GB
Batch 1934 input shapes: input_ids=torch.Size([4, 58]), attention_mask=torch.Size([4, 58])


Predicting:  81%|████████  | 1935/2400 [1:14:17<12:31,  1.62s/it]

GPU memory after batch 1934: 14.97 GB
Batch 1935 input shapes: input_ids=torch.Size([4, 82]), attention_mask=torch.Size([4, 82])


Predicting:  81%|████████  | 1936/2400 [1:14:18<11:57,  1.55s/it]

GPU memory after batch 1935: 14.97 GB
Batch 1936 input shapes: input_ids=torch.Size([4, 74]), attention_mask=torch.Size([4, 74])


Predicting:  81%|████████  | 1937/2400 [1:14:20<11:21,  1.47s/it]

GPU memory after batch 1936: 14.97 GB
Batch 1937 input shapes: input_ids=torch.Size([4, 73]), attention_mask=torch.Size([4, 73])


Predicting:  81%|████████  | 1938/2400 [1:14:21<10:56,  1.42s/it]

GPU memory after batch 1937: 14.97 GB
Batch 1938 input shapes: input_ids=torch.Size([4, 72]), attention_mask=torch.Size([4, 72])


Predicting:  81%|████████  | 1939/2400 [1:14:22<10:29,  1.37s/it]

GPU memory after batch 1938: 14.97 GB
Batch 1939 input shapes: input_ids=torch.Size([4, 73]), attention_mask=torch.Size([4, 73])


Predicting:  81%|████████  | 1940/2400 [1:14:23<10:19,  1.35s/it]

GPU memory after batch 1939: 14.97 GB
Batch 1940 input shapes: input_ids=torch.Size([4, 98]), attention_mask=torch.Size([4, 98])


Predicting:  81%|████████  | 1941/2400 [1:14:25<10:41,  1.40s/it]

批次 1940: 原始预测: ['No\nOkay, so', 'No\nOkay, so', 'No\nAnswer\nYes', 'No\nAnswer\nYes']
GPU memory after batch 1940: 14.97 GB
Batch 1941 input shapes: input_ids=torch.Size([4, 72]), attention_mask=torch.Size([4, 72])


Predicting:  81%|████████  | 1942/2400 [1:14:26<10:17,  1.35s/it]

GPU memory after batch 1941: 14.97 GB
Batch 1942 input shapes: input_ids=torch.Size([4, 85]), attention_mask=torch.Size([4, 85])


Predicting:  81%|████████  | 1943/2400 [1:14:27<10:21,  1.36s/it]

GPU memory after batch 1942: 14.97 GB
Batch 1943 input shapes: input_ids=torch.Size([4, 80]), attention_mask=torch.Size([4, 80])


Predicting:  81%|████████  | 1944/2400 [1:14:29<10:12,  1.34s/it]

GPU memory after batch 1943: 14.97 GB
Batch 1944 input shapes: input_ids=torch.Size([4, 192]), attention_mask=torch.Size([4, 192])


Predicting:  81%|████████  | 1945/2400 [1:14:31<12:09,  1.60s/it]

GPU memory after batch 1944: 14.97 GB
Batch 1945 input shapes: input_ids=torch.Size([4, 75]), attention_mask=torch.Size([4, 75])


Predicting:  81%|████████  | 1946/2400 [1:14:32<11:27,  1.51s/it]

GPU memory after batch 1945: 14.97 GB
Batch 1946 input shapes: input_ids=torch.Size([4, 64]), attention_mask=torch.Size([4, 64])


Predicting:  81%|████████  | 1947/2400 [1:14:33<10:39,  1.41s/it]

GPU memory after batch 1946: 14.97 GB
Batch 1947 input shapes: input_ids=torch.Size([4, 80]), attention_mask=torch.Size([4, 80])


Predicting:  81%|████████  | 1948/2400 [1:14:35<10:23,  1.38s/it]

GPU memory after batch 1947: 14.97 GB
Batch 1948 input shapes: input_ids=torch.Size([4, 64]), attention_mask=torch.Size([4, 64])


Predicting:  81%|████████  | 1949/2400 [1:14:36<09:54,  1.32s/it]

GPU memory after batch 1948: 14.97 GB
Batch 1949 input shapes: input_ids=torch.Size([4, 80]), attention_mask=torch.Size([4, 80])


Predicting:  81%|████████▏ | 1950/2400 [1:14:37<09:51,  1.31s/it]

GPU memory after batch 1949: 14.97 GB
Batch 1950 input shapes: input_ids=torch.Size([4, 70]), attention_mask=torch.Size([4, 70])


Predicting:  81%|████████▏ | 1951/2400 [1:14:38<09:39,  1.29s/it]

批次 1950: 原始预测: ['No\nYes', 'Yes\nYes, because', 'No\nYes\nYes', 'No\nYes\nYes']
GPU memory after batch 1950: 14.97 GB
Batch 1951 input shapes: input_ids=torch.Size([4, 70]), attention_mask=torch.Size([4, 70])


Predicting:  81%|████████▏ | 1952/2400 [1:14:40<09:30,  1.27s/it]

GPU memory after batch 1951: 14.97 GB
Batch 1952 input shapes: input_ids=torch.Size([4, 110]), attention_mask=torch.Size([4, 110])


Predicting:  81%|████████▏ | 1953/2400 [1:14:41<10:12,  1.37s/it]

GPU memory after batch 1952: 14.97 GB
Batch 1953 input shapes: input_ids=torch.Size([4, 107]), attention_mask=torch.Size([4, 107])


Predicting:  81%|████████▏ | 1954/2400 [1:14:43<10:40,  1.44s/it]

GPU memory after batch 1953: 14.97 GB
Batch 1954 input shapes: input_ids=torch.Size([4, 76]), attention_mask=torch.Size([4, 76])


Predicting:  81%|████████▏ | 1955/2400 [1:14:44<10:21,  1.40s/it]

GPU memory after batch 1954: 14.97 GB
Batch 1955 input shapes: input_ids=torch.Size([4, 63]), attention_mask=torch.Size([4, 63])


Predicting:  82%|████████▏ | 1956/2400 [1:14:45<09:50,  1.33s/it]

GPU memory after batch 1955: 14.97 GB
Batch 1956 input shapes: input_ids=torch.Size([4, 72]), attention_mask=torch.Size([4, 72])


Predicting:  82%|████████▏ | 1957/2400 [1:14:47<09:36,  1.30s/it]

GPU memory after batch 1956: 14.97 GB
Batch 1957 input shapes: input_ids=torch.Size([4, 76]), attention_mask=torch.Size([4, 76])


Predicting:  82%|████████▏ | 1958/2400 [1:14:48<09:35,  1.30s/it]

GPU memory after batch 1957: 14.97 GB
Batch 1958 input shapes: input_ids=torch.Size([4, 67]), attention_mask=torch.Size([4, 67])


Predicting:  82%|████████▏ | 1959/2400 [1:14:49<09:25,  1.28s/it]

GPU memory after batch 1958: 14.97 GB
Batch 1959 input shapes: input_ids=torch.Size([4, 100]), attention_mask=torch.Size([4, 100])


Predicting:  82%|████████▏ | 1960/2400 [1:14:51<09:54,  1.35s/it]

GPU memory after batch 1959: 14.97 GB
Batch 1960 input shapes: input_ids=torch.Size([4, 71]), attention_mask=torch.Size([4, 71])


Predicting:  82%|████████▏ | 1961/2400 [1:14:52<09:37,  1.32s/it]

批次 1960: 原始预测: ['No\nAnswer: No', 'No\nAnswer: No', 'No\nYes', 'No\nYes']
GPU memory after batch 1960: 14.97 GB
Batch 1961 input shapes: input_ids=torch.Size([4, 79]), attention_mask=torch.Size([4, 79])


Predicting:  82%|████████▏ | 1962/2400 [1:14:53<09:34,  1.31s/it]

GPU memory after batch 1961: 14.97 GB
Batch 1962 input shapes: input_ids=torch.Size([4, 113]), attention_mask=torch.Size([4, 113])


Predicting:  82%|████████▏ | 1963/2400 [1:14:55<10:14,  1.41s/it]

GPU memory after batch 1962: 14.97 GB
Batch 1963 input shapes: input_ids=torch.Size([4, 91]), attention_mask=torch.Size([4, 91])


Predicting:  82%|████████▏ | 1964/2400 [1:14:56<10:19,  1.42s/it]

GPU memory after batch 1963: 14.97 GB
Batch 1964 input shapes: input_ids=torch.Size([4, 81]), attention_mask=torch.Size([4, 81])


Predicting:  82%|████████▏ | 1965/2400 [1:14:58<10:13,  1.41s/it]

GPU memory after batch 1964: 14.97 GB
Batch 1965 input shapes: input_ids=torch.Size([4, 74]), attention_mask=torch.Size([4, 74])


Predicting:  82%|████████▏ | 1966/2400 [1:14:59<09:57,  1.38s/it]

GPU memory after batch 1965: 14.97 GB
Batch 1966 input shapes: input_ids=torch.Size([4, 104]), attention_mask=torch.Size([4, 104])


Predicting:  82%|████████▏ | 1967/2400 [1:15:00<10:14,  1.42s/it]

GPU memory after batch 1966: 14.97 GB
Batch 1967 input shapes: input_ids=torch.Size([4, 106]), attention_mask=torch.Size([4, 106])


Predicting:  82%|████████▏ | 1968/2400 [1:15:02<10:35,  1.47s/it]

GPU memory after batch 1967: 14.97 GB
Batch 1968 input shapes: input_ids=torch.Size([4, 175]), attention_mask=torch.Size([4, 175])


Predicting:  82%|████████▏ | 1969/2400 [1:15:04<12:00,  1.67s/it]

GPU memory after batch 1968: 14.97 GB
Batch 1969 input shapes: input_ids=torch.Size([4, 70]), attention_mask=torch.Size([4, 70])


Predicting:  82%|████████▏ | 1970/2400 [1:15:05<11:02,  1.54s/it]

GPU memory after batch 1969: 14.97 GB
Batch 1970 input shapes: input_ids=torch.Size([4, 69]), attention_mask=torch.Size([4, 69])


Predicting:  82%|████████▏ | 1971/2400 [1:15:07<10:21,  1.45s/it]

批次 1970: 原始预测: ['No\nExplanation: The', 'No\nExplanation: The', 'No\nAnswer\nYes', 'No\nAnswer: No']
GPU memory after batch 1970: 14.97 GB
Batch 1971 input shapes: input_ids=torch.Size([4, 73]), attention_mask=torch.Size([4, 73])


Predicting:  82%|████████▏ | 1972/2400 [1:15:08<10:00,  1.40s/it]

GPU memory after batch 1971: 14.97 GB
Batch 1972 input shapes: input_ids=torch.Size([4, 60]), attention_mask=torch.Size([4, 60])


Predicting:  82%|████████▏ | 1973/2400 [1:15:09<09:30,  1.34s/it]

GPU memory after batch 1972: 14.97 GB
Batch 1973 input shapes: input_ids=torch.Size([4, 86]), attention_mask=torch.Size([4, 86])


Predicting:  82%|████████▏ | 1974/2400 [1:15:11<09:35,  1.35s/it]

GPU memory after batch 1973: 14.97 GB
Batch 1974 input shapes: input_ids=torch.Size([4, 79]), attention_mask=torch.Size([4, 79])


Predicting:  82%|████████▏ | 1975/2400 [1:15:12<09:27,  1.34s/it]

GPU memory after batch 1974: 14.97 GB
Batch 1975 input shapes: input_ids=torch.Size([4, 77]), attention_mask=torch.Size([4, 77])


Predicting:  82%|████████▏ | 1976/2400 [1:15:13<09:22,  1.33s/it]

GPU memory after batch 1975: 14.97 GB
Batch 1976 input shapes: input_ids=torch.Size([4, 67]), attention_mask=torch.Size([4, 67])


Predicting:  82%|████████▏ | 1977/2400 [1:15:14<09:09,  1.30s/it]

GPU memory after batch 1976: 14.97 GB
Batch 1977 input shapes: input_ids=torch.Size([4, 65]), attention_mask=torch.Size([4, 65])


Predicting:  82%|████████▏ | 1978/2400 [1:15:16<08:59,  1.28s/it]

GPU memory after batch 1977: 14.97 GB
Batch 1978 input shapes: input_ids=torch.Size([4, 71]), attention_mask=torch.Size([4, 71])


Predicting:  82%|████████▏ | 1979/2400 [1:15:17<08:52,  1.27s/it]

GPU memory after batch 1978: 14.97 GB
Batch 1979 input shapes: input_ids=torch.Size([4, 72]), attention_mask=torch.Size([4, 72])


Predicting:  82%|████████▎ | 1980/2400 [1:15:18<08:47,  1.26s/it]

GPU memory after batch 1979: 14.97 GB
Batch 1980 input shapes: input_ids=torch.Size([4, 72]), attention_mask=torch.Size([4, 72])


Predicting:  83%|████████▎ | 1981/2400 [1:15:19<08:44,  1.25s/it]

批次 1980: 原始预测: ['No\nAnswer: No', 'No\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because']
GPU memory after batch 1980: 14.97 GB
Batch 1981 input shapes: input_ids=torch.Size([4, 91]), attention_mask=torch.Size([4, 91])


Predicting:  83%|████████▎ | 1982/2400 [1:15:21<09:07,  1.31s/it]

GPU memory after batch 1981: 14.97 GB
Batch 1982 input shapes: input_ids=torch.Size([4, 65]), attention_mask=torch.Size([4, 65])


Predicting:  83%|████████▎ | 1983/2400 [1:15:22<08:56,  1.29s/it]

GPU memory after batch 1982: 14.97 GB
Batch 1983 input shapes: input_ids=torch.Size([4, 71]), attention_mask=torch.Size([4, 71])


Predicting:  83%|████████▎ | 1984/2400 [1:15:23<08:49,  1.27s/it]

GPU memory after batch 1983: 14.97 GB
Batch 1984 input shapes: input_ids=torch.Size([4, 64]), attention_mask=torch.Size([4, 64])


Predicting:  83%|████████▎ | 1985/2400 [1:15:24<08:35,  1.24s/it]

GPU memory after batch 1984: 14.97 GB
Batch 1985 input shapes: input_ids=torch.Size([4, 68]), attention_mask=torch.Size([4, 68])


Predicting:  83%|████████▎ | 1986/2400 [1:15:26<08:33,  1.24s/it]

GPU memory after batch 1985: 14.97 GB
Batch 1986 input shapes: input_ids=torch.Size([4, 79]), attention_mask=torch.Size([4, 79])


Predicting:  83%|████████▎ | 1987/2400 [1:15:27<08:39,  1.26s/it]

GPU memory after batch 1986: 14.97 GB
Batch 1987 input shapes: input_ids=torch.Size([4, 63]), attention_mask=torch.Size([4, 63])


Predicting:  83%|████████▎ | 1988/2400 [1:15:28<08:28,  1.23s/it]

GPU memory after batch 1987: 14.97 GB
Batch 1988 input shapes: input_ids=torch.Size([4, 80]), attention_mask=torch.Size([4, 80])


Predicting:  83%|████████▎ | 1989/2400 [1:15:29<08:35,  1.25s/it]

GPU memory after batch 1988: 14.97 GB
Batch 1989 input shapes: input_ids=torch.Size([4, 99]), attention_mask=torch.Size([4, 99])


Predicting:  83%|████████▎ | 1990/2400 [1:15:31<09:06,  1.33s/it]

GPU memory after batch 1989: 14.97 GB
Batch 1990 input shapes: input_ids=torch.Size([4, 87]), attention_mask=torch.Size([4, 87])


Predicting:  83%|████████▎ | 1991/2400 [1:15:32<09:11,  1.35s/it]

批次 1990: 原始预测: ['No\nAnswer: No', 'No\nAnswer: No', 'No\nAnswer: No', 'No\nAnswer: No']
GPU memory after batch 1990: 14.97 GB
Batch 1991 input shapes: input_ids=torch.Size([4, 100]), attention_mask=torch.Size([4, 100])


Predicting:  83%|████████▎ | 1992/2400 [1:15:34<09:30,  1.40s/it]

GPU memory after batch 1991: 14.97 GB
Batch 1992 input shapes: input_ids=torch.Size([4, 176]), attention_mask=torch.Size([4, 176])


Predicting:  83%|████████▎ | 1993/2400 [1:15:36<10:59,  1.62s/it]

GPU memory after batch 1992: 14.97 GB
Batch 1993 input shapes: input_ids=torch.Size([4, 97]), attention_mask=torch.Size([4, 97])


Predicting:  83%|████████▎ | 1994/2400 [1:15:38<10:44,  1.59s/it]

GPU memory after batch 1993: 14.97 GB
Batch 1994 input shapes: input_ids=torch.Size([4, 63]), attention_mask=torch.Size([4, 63])


Predicting:  83%|████████▎ | 1995/2400 [1:15:39<09:53,  1.46s/it]

GPU memory after batch 1994: 14.97 GB
Batch 1995 input shapes: input_ids=torch.Size([4, 78]), attention_mask=torch.Size([4, 78])


Predicting:  83%|████████▎ | 1996/2400 [1:15:40<09:32,  1.42s/it]

GPU memory after batch 1995: 14.97 GB
Batch 1996 input shapes: input_ids=torch.Size([4, 73]), attention_mask=torch.Size([4, 73])


Predicting:  83%|████████▎ | 1997/2400 [1:15:41<09:16,  1.38s/it]

GPU memory after batch 1996: 14.97 GB
Batch 1997 input shapes: input_ids=torch.Size([4, 74]), attention_mask=torch.Size([4, 74])


Predicting:  83%|████████▎ | 1998/2400 [1:15:43<09:05,  1.36s/it]

GPU memory after batch 1997: 14.97 GB
Batch 1998 input shapes: input_ids=torch.Size([4, 88]), attention_mask=torch.Size([4, 88])


Predicting:  83%|████████▎ | 1999/2400 [1:15:44<09:08,  1.37s/it]

GPU memory after batch 1998: 14.97 GB
Batch 1999 input shapes: input_ids=torch.Size([4, 103]), attention_mask=torch.Size([4, 103])


Predicting:  83%|████████▎ | 2000/2400 [1:15:46<09:24,  1.41s/it]

GPU memory after batch 1999: 14.97 GB
Batch 2000 input shapes: input_ids=torch.Size([4, 66]), attention_mask=torch.Size([4, 66])


Predicting:  83%|████████▎ | 2001/2400 [1:15:47<09:02,  1.36s/it]

批次 2000: 原始预测: ['No\n\nAnswer: No', 'No\n\nYes, because', 'No\nYes, because', 'No\nYes, because']
GPU memory after batch 2000: 14.97 GB
Batch 2001 input shapes: input_ids=torch.Size([4, 72]), attention_mask=torch.Size([4, 72])


Predicting:  83%|████████▎ | 2002/2400 [1:15:48<08:46,  1.32s/it]

GPU memory after batch 2001: 14.97 GB
Batch 2002 input shapes: input_ids=torch.Size([4, 70]), attention_mask=torch.Size([4, 70])


Predicting:  83%|████████▎ | 2003/2400 [1:15:49<08:34,  1.30s/it]

GPU memory after batch 2002: 14.97 GB
Batch 2003 input shapes: input_ids=torch.Size([4, 76]), attention_mask=torch.Size([4, 76])


Predicting:  84%|████████▎ | 2004/2400 [1:15:51<08:33,  1.30s/it]

GPU memory after batch 2003: 14.97 GB
Batch 2004 input shapes: input_ids=torch.Size([4, 69]), attention_mask=torch.Size([4, 69])


Predicting:  84%|████████▎ | 2005/2400 [1:15:52<08:25,  1.28s/it]

GPU memory after batch 2004: 14.97 GB
Batch 2005 input shapes: input_ids=torch.Size([4, 75]), attention_mask=torch.Size([4, 75])


Predicting:  84%|████████▎ | 2006/2400 [1:15:53<08:26,  1.29s/it]

GPU memory after batch 2005: 14.97 GB
Batch 2006 input shapes: input_ids=torch.Size([4, 74]), attention_mask=torch.Size([4, 74])


Predicting:  84%|████████▎ | 2007/2400 [1:15:54<08:26,  1.29s/it]

GPU memory after batch 2006: 14.97 GB
Batch 2007 input shapes: input_ids=torch.Size([4, 96]), attention_mask=torch.Size([4, 96])


Predicting:  84%|████████▎ | 2008/2400 [1:15:56<08:44,  1.34s/it]

GPU memory after batch 2007: 14.97 GB
Batch 2008 input shapes: input_ids=torch.Size([4, 70]), attention_mask=torch.Size([4, 70])


Predicting:  84%|████████▎ | 2009/2400 [1:15:57<08:31,  1.31s/it]

GPU memory after batch 2008: 14.97 GB
Batch 2009 input shapes: input_ids=torch.Size([4, 89]), attention_mask=torch.Size([4, 89])


Predicting:  84%|████████▍ | 2010/2400 [1:15:58<08:46,  1.35s/it]

GPU memory after batch 2009: 14.97 GB
Batch 2010 input shapes: input_ids=torch.Size([4, 103]), attention_mask=torch.Size([4, 103])


Predicting:  84%|████████▍ | 2011/2400 [1:16:00<09:04,  1.40s/it]

批次 2010: 原始预测: ['Yes\nYes, because', 'Yes\nYes, because', 'No\nYes, because', 'No\nAnswer: No']
GPU memory after batch 2010: 14.97 GB
Batch 2011 input shapes: input_ids=torch.Size([4, 86]), attention_mask=torch.Size([4, 86])


Predicting:  84%|████████▍ | 2012/2400 [1:16:01<09:02,  1.40s/it]

GPU memory after batch 2011: 14.97 GB
Batch 2012 input shapes: input_ids=torch.Size([4, 90]), attention_mask=torch.Size([4, 90])


Predicting:  84%|████████▍ | 2013/2400 [1:16:03<09:06,  1.41s/it]

GPU memory after batch 2012: 14.97 GB
Batch 2013 input shapes: input_ids=torch.Size([4, 92]), attention_mask=torch.Size([4, 92])


Predicting:  84%|████████▍ | 2014/2400 [1:16:04<09:10,  1.42s/it]

GPU memory after batch 2013: 14.97 GB
Batch 2014 input shapes: input_ids=torch.Size([4, 87]), attention_mask=torch.Size([4, 87])


Predicting:  84%|████████▍ | 2015/2400 [1:16:06<09:04,  1.41s/it]

GPU memory after batch 2014: 14.97 GB
Batch 2015 input shapes: input_ids=torch.Size([4, 57]), attention_mask=torch.Size([4, 57])


Predicting:  84%|████████▍ | 2016/2400 [1:16:07<08:35,  1.34s/it]

GPU memory after batch 2015: 14.97 GB
Batch 2016 input shapes: input_ids=torch.Size([4, 193]), attention_mask=torch.Size([4, 193])


Predicting:  84%|████████▍ | 2017/2400 [1:16:09<10:30,  1.65s/it]

GPU memory after batch 2016: 14.97 GB
Batch 2017 input shapes: input_ids=torch.Size([4, 57]), attention_mask=torch.Size([4, 57])


Predicting:  84%|████████▍ | 2018/2400 [1:16:10<09:34,  1.50s/it]

GPU memory after batch 2017: 14.97 GB
Batch 2018 input shapes: input_ids=torch.Size([4, 90]), attention_mask=torch.Size([4, 90])


Predicting:  84%|████████▍ | 2019/2400 [1:16:12<09:27,  1.49s/it]

GPU memory after batch 2018: 14.97 GB
Batch 2019 input shapes: input_ids=torch.Size([4, 65]), attention_mask=torch.Size([4, 65])


Predicting:  84%|████████▍ | 2020/2400 [1:16:13<08:56,  1.41s/it]

GPU memory after batch 2019: 14.97 GB
Batch 2020 input shapes: input_ids=torch.Size([4, 76]), attention_mask=torch.Size([4, 76])


Predicting:  84%|████████▍ | 2021/2400 [1:16:14<08:42,  1.38s/it]

批次 2020: 原始预测: ['No\nAnswer\nYes', 'No\nYes, because', 'No\nAnswer\nYes', 'No\nAnswer: No']
GPU memory after batch 2020: 14.97 GB
Batch 2021 input shapes: input_ids=torch.Size([4, 78]), attention_mask=torch.Size([4, 78])


Predicting:  84%|████████▍ | 2022/2400 [1:16:16<08:32,  1.36s/it]

GPU memory after batch 2021: 14.97 GB
Batch 2022 input shapes: input_ids=torch.Size([4, 75]), attention_mask=torch.Size([4, 75])


Predicting:  84%|████████▍ | 2023/2400 [1:16:17<08:24,  1.34s/it]

GPU memory after batch 2022: 14.97 GB
Batch 2023 input shapes: input_ids=torch.Size([4, 78]), attention_mask=torch.Size([4, 78])


Predicting:  84%|████████▍ | 2024/2400 [1:16:18<08:19,  1.33s/it]

GPU memory after batch 2023: 14.97 GB
Batch 2024 input shapes: input_ids=torch.Size([4, 61]), attention_mask=torch.Size([4, 61])


Predicting:  84%|████████▍ | 2025/2400 [1:16:19<08:00,  1.28s/it]

GPU memory after batch 2024: 14.97 GB
Batch 2025 input shapes: input_ids=torch.Size([4, 68]), attention_mask=torch.Size([4, 68])


Predicting:  84%|████████▍ | 2026/2400 [1:16:21<07:54,  1.27s/it]

GPU memory after batch 2025: 14.97 GB
Batch 2026 input shapes: input_ids=torch.Size([4, 73]), attention_mask=torch.Size([4, 73])


Predicting:  84%|████████▍ | 2027/2400 [1:16:22<07:56,  1.28s/it]

GPU memory after batch 2026: 14.97 GB
Batch 2027 input shapes: input_ids=torch.Size([4, 68]), attention_mask=torch.Size([4, 68])


Predicting:  84%|████████▍ | 2028/2400 [1:16:23<07:50,  1.26s/it]

GPU memory after batch 2027: 14.97 GB
Batch 2028 input shapes: input_ids=torch.Size([4, 69]), attention_mask=torch.Size([4, 69])


Predicting:  85%|████████▍ | 2029/2400 [1:16:24<07:45,  1.26s/it]

GPU memory after batch 2028: 14.97 GB
Batch 2029 input shapes: input_ids=torch.Size([4, 65]), attention_mask=torch.Size([4, 65])


Predicting:  85%|████████▍ | 2030/2400 [1:16:26<07:42,  1.25s/it]

GPU memory after batch 2029: 14.97 GB
Batch 2030 input shapes: input_ids=torch.Size([4, 71]), attention_mask=torch.Size([4, 71])


Predicting:  85%|████████▍ | 2031/2400 [1:16:27<07:39,  1.25s/it]

批次 2030: 原始预测: ['Yes\nYes, because', 'No\nYes', 'No\nYes', 'No\nAnswer: No']
GPU memory after batch 2030: 14.97 GB
Batch 2031 input shapes: input_ids=torch.Size([4, 69]), attention_mask=torch.Size([4, 69])


Predicting:  85%|████████▍ | 2032/2400 [1:16:28<07:37,  1.24s/it]

GPU memory after batch 2031: 14.97 GB
Batch 2032 input shapes: input_ids=torch.Size([4, 86]), attention_mask=torch.Size([4, 86])


Predicting:  85%|████████▍ | 2033/2400 [1:16:30<07:51,  1.29s/it]

GPU memory after batch 2032: 14.97 GB
Batch 2033 input shapes: input_ids=torch.Size([4, 108]), attention_mask=torch.Size([4, 108])


Predicting:  85%|████████▍ | 2034/2400 [1:16:31<08:24,  1.38s/it]

GPU memory after batch 2033: 14.97 GB
Batch 2034 input shapes: input_ids=torch.Size([4, 74]), attention_mask=torch.Size([4, 74])


Predicting:  85%|████████▍ | 2035/2400 [1:16:32<08:14,  1.35s/it]

GPU memory after batch 2034: 14.97 GB
Batch 2035 input shapes: input_ids=torch.Size([4, 82]), attention_mask=torch.Size([4, 82])


Predicting:  85%|████████▍ | 2036/2400 [1:16:34<08:16,  1.36s/it]

GPU memory after batch 2035: 14.97 GB
Batch 2036 input shapes: input_ids=torch.Size([4, 70]), attention_mask=torch.Size([4, 70])


Predicting:  85%|████████▍ | 2037/2400 [1:16:35<08:01,  1.33s/it]

GPU memory after batch 2036: 14.97 GB
Batch 2037 input shapes: input_ids=torch.Size([4, 63]), attention_mask=torch.Size([4, 63])


Predicting:  85%|████████▍ | 2038/2400 [1:16:36<07:43,  1.28s/it]

GPU memory after batch 2037: 14.97 GB
Batch 2038 input shapes: input_ids=torch.Size([4, 73]), attention_mask=torch.Size([4, 73])


Predicting:  85%|████████▍ | 2039/2400 [1:16:38<07:44,  1.29s/it]

GPU memory after batch 2038: 14.97 GB
Batch 2039 input shapes: input_ids=torch.Size([4, 60]), attention_mask=torch.Size([4, 60])


Predicting:  85%|████████▌ | 2040/2400 [1:16:39<07:31,  1.25s/it]

GPU memory after batch 2039: 14.97 GB
Batch 2040 input shapes: input_ids=torch.Size([4, 176]), attention_mask=torch.Size([4, 176])


Predicting:  85%|████████▌ | 2041/2400 [1:16:41<09:05,  1.52s/it]

批次 2040: 原始预测: ['Yes\nQuestion: Is', 'Yes\nQuestion: Is', 'Yes\nYes, because', 'Yes\nYes, because']
GPU memory after batch 2040: 14.97 GB
Batch 2041 input shapes: input_ids=torch.Size([4, 61]), attention_mask=torch.Size([4, 61])


Predicting:  85%|████████▌ | 2042/2400 [1:16:42<08:26,  1.42s/it]

GPU memory after batch 2041: 14.97 GB
Batch 2042 input shapes: input_ids=torch.Size([4, 74]), attention_mask=torch.Size([4, 74])


Predicting:  85%|████████▌ | 2043/2400 [1:16:43<08:13,  1.38s/it]

GPU memory after batch 2042: 14.97 GB
Batch 2043 input shapes: input_ids=torch.Size([4, 78]), attention_mask=torch.Size([4, 78])


Predicting:  85%|████████▌ | 2044/2400 [1:16:45<08:03,  1.36s/it]

GPU memory after batch 2043: 14.97 GB
Batch 2044 input shapes: input_ids=torch.Size([4, 64]), attention_mask=torch.Size([4, 64])


Predicting:  85%|████████▌ | 2045/2400 [1:16:46<07:42,  1.30s/it]

GPU memory after batch 2044: 14.97 GB
Batch 2045 input shapes: input_ids=torch.Size([4, 90]), attention_mask=torch.Size([4, 90])


Predicting:  85%|████████▌ | 2046/2400 [1:16:47<07:56,  1.35s/it]

GPU memory after batch 2045: 14.97 GB
Batch 2046 input shapes: input_ids=torch.Size([4, 63]), attention_mask=torch.Size([4, 63])


Predicting:  85%|████████▌ | 2047/2400 [1:16:48<07:37,  1.30s/it]

GPU memory after batch 2046: 14.97 GB
Batch 2047 input shapes: input_ids=torch.Size([4, 78]), attention_mask=torch.Size([4, 78])


Predicting:  85%|████████▌ | 2048/2400 [1:16:50<07:36,  1.30s/it]

GPU memory after batch 2047: 14.97 GB
Batch 2048 input shapes: input_ids=torch.Size([4, 77]), attention_mask=torch.Size([4, 77])


Predicting:  85%|████████▌ | 2049/2400 [1:16:51<07:35,  1.30s/it]

GPU memory after batch 2048: 14.97 GB
Batch 2049 input shapes: input_ids=torch.Size([4, 85]), attention_mask=torch.Size([4, 85])


Predicting:  85%|████████▌ | 2050/2400 [1:16:52<07:43,  1.33s/it]

GPU memory after batch 2049: 14.97 GB
Batch 2050 input shapes: input_ids=torch.Size([4, 58]), attention_mask=torch.Size([4, 58])


Predicting:  85%|████████▌ | 2051/2400 [1:16:54<07:26,  1.28s/it]

批次 2050: 原始预测: ['No\nYes, because', 'No\nYes, because', 'No\nYes', 'No\nYes']
GPU memory after batch 2050: 14.97 GB
Batch 2051 input shapes: input_ids=torch.Size([4, 62]), attention_mask=torch.Size([4, 62])


Predicting:  86%|████████▌ | 2052/2400 [1:16:55<07:13,  1.25s/it]

GPU memory after batch 2051: 14.97 GB
Batch 2052 input shapes: input_ids=torch.Size([4, 62]), attention_mask=torch.Size([4, 62])


Predicting:  86%|████████▌ | 2053/2400 [1:16:56<07:05,  1.23s/it]

GPU memory after batch 2052: 14.97 GB
Batch 2053 input shapes: input_ids=torch.Size([4, 71]), attention_mask=torch.Size([4, 71])


Predicting:  86%|████████▌ | 2054/2400 [1:16:57<07:05,  1.23s/it]

GPU memory after batch 2053: 14.97 GB
Batch 2054 input shapes: input_ids=torch.Size([4, 74]), attention_mask=torch.Size([4, 74])


Predicting:  86%|████████▌ | 2055/2400 [1:16:58<07:11,  1.25s/it]

GPU memory after batch 2054: 14.97 GB
Batch 2055 input shapes: input_ids=torch.Size([4, 78]), attention_mask=torch.Size([4, 78])


Predicting:  86%|████████▌ | 2056/2400 [1:17:00<07:15,  1.27s/it]

GPU memory after batch 2055: 14.97 GB
Batch 2056 input shapes: input_ids=torch.Size([4, 72]), attention_mask=torch.Size([4, 72])


Predicting:  86%|████████▌ | 2057/2400 [1:17:01<07:11,  1.26s/it]

GPU memory after batch 2056: 14.97 GB
Batch 2057 input shapes: input_ids=torch.Size([4, 70]), attention_mask=torch.Size([4, 70])


Predicting:  86%|████████▌ | 2058/2400 [1:17:02<07:08,  1.25s/it]

GPU memory after batch 2057: 14.97 GB
Batch 2058 input shapes: input_ids=torch.Size([4, 78]), attention_mask=torch.Size([4, 78])


Predicting:  86%|████████▌ | 2059/2400 [1:17:04<07:12,  1.27s/it]

GPU memory after batch 2058: 14.97 GB
Batch 2059 input shapes: input_ids=torch.Size([4, 79]), attention_mask=torch.Size([4, 79])


Predicting:  86%|████████▌ | 2060/2400 [1:17:05<07:14,  1.28s/it]

GPU memory after batch 2059: 14.97 GB
Batch 2060 input shapes: input_ids=torch.Size([4, 76]), attention_mask=torch.Size([4, 76])


Predicting:  86%|████████▌ | 2061/2400 [1:17:06<07:15,  1.29s/it]

批次 2060: 原始预测: ['No\nAnswer\nYes', 'Yes\nIs "0', 'No\nAnswer\nYes', 'No\nAnswer\nYes']
GPU memory after batch 2060: 14.97 GB
Batch 2061 input shapes: input_ids=torch.Size([4, 72]), attention_mask=torch.Size([4, 72])


Predicting:  86%|████████▌ | 2062/2400 [1:17:07<07:09,  1.27s/it]

GPU memory after batch 2061: 14.97 GB
Batch 2062 input shapes: input_ids=torch.Size([4, 84]), attention_mask=torch.Size([4, 84])


Predicting:  86%|████████▌ | 2063/2400 [1:17:09<07:20,  1.31s/it]

GPU memory after batch 2062: 14.97 GB
Batch 2063 input shapes: input_ids=torch.Size([4, 56]), attention_mask=torch.Size([4, 56])


Predicting:  86%|████████▌ | 2064/2400 [1:17:10<06:57,  1.24s/it]

GPU memory after batch 2063: 14.97 GB
Batch 2064 input shapes: input_ids=torch.Size([4, 166]), attention_mask=torch.Size([4, 166])


Predicting:  86%|████████▌ | 2065/2400 [1:17:12<08:20,  1.50s/it]

GPU memory after batch 2064: 14.97 GB
Batch 2065 input shapes: input_ids=torch.Size([4, 104]), attention_mask=torch.Size([4, 104])


Predicting:  86%|████████▌ | 2066/2400 [1:17:13<08:21,  1.50s/it]

GPU memory after batch 2065: 14.97 GB
Batch 2066 input shapes: input_ids=torch.Size([4, 74]), attention_mask=torch.Size([4, 74])


Predicting:  86%|████████▌ | 2067/2400 [1:17:15<08:00,  1.44s/it]

GPU memory after batch 2066: 14.97 GB
Batch 2067 input shapes: input_ids=torch.Size([4, 77]), attention_mask=torch.Size([4, 77])


Predicting:  86%|████████▌ | 2068/2400 [1:17:16<07:44,  1.40s/it]

GPU memory after batch 2067: 14.97 GB
Batch 2068 input shapes: input_ids=torch.Size([4, 73]), attention_mask=torch.Size([4, 73])


Predicting:  86%|████████▌ | 2069/2400 [1:17:17<07:33,  1.37s/it]

GPU memory after batch 2068: 14.97 GB
Batch 2069 input shapes: input_ids=torch.Size([4, 103]), attention_mask=torch.Size([4, 103])


Predicting:  86%|████████▋ | 2070/2400 [1:17:19<07:46,  1.42s/it]

GPU memory after batch 2069: 14.97 GB
Batch 2070 input shapes: input_ids=torch.Size([4, 77]), attention_mask=torch.Size([4, 77])


Predicting:  86%|████████▋ | 2071/2400 [1:17:20<07:34,  1.38s/it]

批次 2070: 原始预测: ['No\nYes, the', 'No\nYes, because', 'No\nYes, the', 'No\nYes']
GPU memory after batch 2070: 14.97 GB
Batch 2071 input shapes: input_ids=torch.Size([4, 68]), attention_mask=torch.Size([4, 68])


Predicting:  86%|████████▋ | 2072/2400 [1:17:21<07:18,  1.34s/it]

GPU memory after batch 2071: 14.97 GB
Batch 2072 input shapes: input_ids=torch.Size([4, 76]), attention_mask=torch.Size([4, 76])


Predicting:  86%|████████▋ | 2073/2400 [1:17:23<07:13,  1.33s/it]

GPU memory after batch 2072: 14.97 GB
Batch 2073 input shapes: input_ids=torch.Size([4, 105]), attention_mask=torch.Size([4, 105])


Predicting:  86%|████████▋ | 2074/2400 [1:17:24<07:38,  1.41s/it]

GPU memory after batch 2073: 14.97 GB
Batch 2074 input shapes: input_ids=torch.Size([4, 80]), attention_mask=torch.Size([4, 80])


Predicting:  86%|████████▋ | 2075/2400 [1:17:26<07:26,  1.37s/it]

GPU memory after batch 2074: 14.97 GB
Batch 2075 input shapes: input_ids=torch.Size([4, 87]), attention_mask=torch.Size([4, 87])


Predicting:  86%|████████▋ | 2076/2400 [1:17:27<07:26,  1.38s/it]

GPU memory after batch 2075: 14.97 GB
Batch 2076 input shapes: input_ids=torch.Size([4, 63]), attention_mask=torch.Size([4, 63])


Predicting:  87%|████████▋ | 2077/2400 [1:17:28<07:05,  1.32s/it]

GPU memory after batch 2076: 14.97 GB
Batch 2077 input shapes: input_ids=torch.Size([4, 75]), attention_mask=torch.Size([4, 75])


Predicting:  87%|████████▋ | 2078/2400 [1:17:30<07:02,  1.31s/it]

GPU memory after batch 2077: 14.97 GB
Batch 2078 input shapes: input_ids=torch.Size([4, 72]), attention_mask=torch.Size([4, 72])


Predicting:  87%|████████▋ | 2079/2400 [1:17:31<06:54,  1.29s/it]

GPU memory after batch 2078: 14.97 GB
Batch 2079 input shapes: input_ids=torch.Size([4, 70]), attention_mask=torch.Size([4, 70])


Predicting:  87%|████████▋ | 2080/2400 [1:17:32<06:47,  1.27s/it]

GPU memory after batch 2079: 14.97 GB
Batch 2080 input shapes: input_ids=torch.Size([4, 70]), attention_mask=torch.Size([4, 70])


Predicting:  87%|████████▋ | 2081/2400 [1:17:33<06:42,  1.26s/it]

批次 2080: 原始预测: ['No\nAnswer: No', 'No\nAnswer: No', 'No\nAnswer: No', 'No\nAnswer: No']
GPU memory after batch 2080: 14.97 GB
Batch 2081 input shapes: input_ids=torch.Size([4, 75]), attention_mask=torch.Size([4, 75])


Predicting:  87%|████████▋ | 2082/2400 [1:17:35<06:45,  1.27s/it]

GPU memory after batch 2081: 14.97 GB
Batch 2082 input shapes: input_ids=torch.Size([4, 70]), attention_mask=torch.Size([4, 70])


Predicting:  87%|████████▋ | 2083/2400 [1:17:36<06:40,  1.26s/it]

GPU memory after batch 2082: 14.97 GB
Batch 2083 input shapes: input_ids=torch.Size([4, 69]), attention_mask=torch.Size([4, 69])


Predicting:  87%|████████▋ | 2084/2400 [1:17:37<06:36,  1.25s/it]

GPU memory after batch 2083: 14.97 GB
Batch 2084 input shapes: input_ids=torch.Size([4, 63]), attention_mask=torch.Size([4, 63])


Predicting:  87%|████████▋ | 2085/2400 [1:17:38<06:27,  1.23s/it]

GPU memory after batch 2084: 14.97 GB
Batch 2085 input shapes: input_ids=torch.Size([4, 76]), attention_mask=torch.Size([4, 76])


Predicting:  87%|████████▋ | 2086/2400 [1:17:39<06:33,  1.25s/it]

GPU memory after batch 2085: 14.97 GB
Batch 2086 input shapes: input_ids=torch.Size([4, 89]), attention_mask=torch.Size([4, 89])


Predicting:  87%|████████▋ | 2087/2400 [1:17:41<06:50,  1.31s/it]

GPU memory after batch 2086: 14.97 GB
Batch 2087 input shapes: input_ids=torch.Size([4, 81]), attention_mask=torch.Size([4, 81])


Predicting:  87%|████████▋ | 2088/2400 [1:17:42<06:56,  1.33s/it]

GPU memory after batch 2087: 14.97 GB
Batch 2088 input shapes: input_ids=torch.Size([4, 185]), attention_mask=torch.Size([4, 185])


Predicting:  87%|████████▋ | 2089/2400 [1:17:45<08:16,  1.60s/it]

GPU memory after batch 2088: 14.97 GB
Batch 2089 input shapes: input_ids=torch.Size([4, 68]), attention_mask=torch.Size([4, 68])


Predicting:  87%|████████▋ | 2090/2400 [1:17:46<07:41,  1.49s/it]

GPU memory after batch 2089: 14.97 GB
Batch 2090 input shapes: input_ids=torch.Size([4, 74]), attention_mask=torch.Size([4, 74])


Predicting:  87%|████████▋ | 2091/2400 [1:17:47<07:22,  1.43s/it]

批次 2090: 原始预测: ['Yes\nIs "222', 'Yes\nIs "111', 'No\nYes', 'No\nYes']
GPU memory after batch 2090: 14.97 GB
Batch 2091 input shapes: input_ids=torch.Size([4, 54]), attention_mask=torch.Size([4, 54])


Predicting:  87%|████████▋ | 2092/2400 [1:17:48<06:49,  1.33s/it]

GPU memory after batch 2091: 14.97 GB
Batch 2092 input shapes: input_ids=torch.Size([4, 100]), attention_mask=torch.Size([4, 100])


Predicting:  87%|████████▋ | 2093/2400 [1:17:50<07:05,  1.39s/it]

GPU memory after batch 2092: 14.97 GB
Batch 2093 input shapes: input_ids=torch.Size([4, 61]), attention_mask=torch.Size([4, 61])


Predicting:  87%|████████▋ | 2094/2400 [1:17:51<06:44,  1.32s/it]

GPU memory after batch 2093: 14.97 GB
Batch 2094 input shapes: input_ids=torch.Size([4, 66]), attention_mask=torch.Size([4, 66])


Predicting:  87%|████████▋ | 2095/2400 [1:17:52<06:35,  1.30s/it]

GPU memory after batch 2094: 14.97 GB
Batch 2095 input shapes: input_ids=torch.Size([4, 70]), attention_mask=torch.Size([4, 70])


Predicting:  87%|████████▋ | 2096/2400 [1:17:53<06:28,  1.28s/it]

GPU memory after batch 2095: 14.97 GB
Batch 2096 input shapes: input_ids=torch.Size([4, 75]), attention_mask=torch.Size([4, 75])


Predicting:  87%|████████▋ | 2097/2400 [1:17:55<06:29,  1.28s/it]

GPU memory after batch 2096: 14.97 GB
Batch 2097 input shapes: input_ids=torch.Size([4, 62]), attention_mask=torch.Size([4, 62])


Predicting:  87%|████████▋ | 2098/2400 [1:17:56<06:18,  1.25s/it]

GPU memory after batch 2097: 14.97 GB
Batch 2098 input shapes: input_ids=torch.Size([4, 73]), attention_mask=torch.Size([4, 73])


Predicting:  87%|████████▋ | 2099/2400 [1:17:57<06:21,  1.27s/it]

GPU memory after batch 2098: 14.97 GB
Batch 2099 input shapes: input_ids=torch.Size([4, 70]), attention_mask=torch.Size([4, 70])


Predicting:  88%|████████▊ | 2100/2400 [1:17:58<06:17,  1.26s/it]

GPU memory after batch 2099: 14.97 GB
Batch 2100 input shapes: input_ids=torch.Size([4, 84]), attention_mask=torch.Size([4, 84])


Predicting:  88%|████████▊ | 2101/2400 [1:18:00<06:27,  1.30s/it]

批次 2100: 原始预测: ['No\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because']
GPU memory after batch 2100: 14.97 GB
Batch 2101 input shapes: input_ids=torch.Size([4, 67]), attention_mask=torch.Size([4, 67])


Predicting:  88%|████████▊ | 2102/2400 [1:18:01<06:20,  1.28s/it]

GPU memory after batch 2101: 14.97 GB
Batch 2102 input shapes: input_ids=torch.Size([4, 82]), attention_mask=torch.Size([4, 82])


Predicting:  88%|████████▊ | 2103/2400 [1:18:02<06:29,  1.31s/it]

GPU memory after batch 2102: 14.97 GB
Batch 2103 input shapes: input_ids=torch.Size([4, 90]), attention_mask=torch.Size([4, 90])


Predicting:  88%|████████▊ | 2104/2400 [1:18:04<06:40,  1.35s/it]

GPU memory after batch 2103: 14.97 GB
Batch 2104 input shapes: input_ids=torch.Size([4, 63]), attention_mask=torch.Size([4, 63])


Predicting:  88%|████████▊ | 2105/2400 [1:18:05<06:23,  1.30s/it]

GPU memory after batch 2104: 14.97 GB
Batch 2105 input shapes: input_ids=torch.Size([4, 74]), attention_mask=torch.Size([4, 74])


Predicting:  88%|████████▊ | 2106/2400 [1:18:06<06:22,  1.30s/it]

GPU memory after batch 2105: 14.97 GB
Batch 2106 input shapes: input_ids=torch.Size([4, 80]), attention_mask=torch.Size([4, 80])


Predicting:  88%|████████▊ | 2107/2400 [1:18:08<06:21,  1.30s/it]

GPU memory after batch 2106: 14.97 GB
Batch 2107 input shapes: input_ids=torch.Size([4, 76]), attention_mask=torch.Size([4, 76])


Predicting:  88%|████████▊ | 2108/2400 [1:18:09<06:20,  1.30s/it]

GPU memory after batch 2107: 14.97 GB
Batch 2108 input shapes: input_ids=torch.Size([4, 80]), attention_mask=torch.Size([4, 80])


Predicting:  88%|████████▊ | 2109/2400 [1:18:10<06:18,  1.30s/it]

GPU memory after batch 2108: 14.97 GB
Batch 2109 input shapes: input_ids=torch.Size([4, 69]), attention_mask=torch.Size([4, 69])


Predicting:  88%|████████▊ | 2110/2400 [1:18:11<06:11,  1.28s/it]

GPU memory after batch 2109: 14.97 GB
Batch 2110 input shapes: input_ids=torch.Size([4, 70]), attention_mask=torch.Size([4, 70])


Predicting:  88%|████████▊ | 2111/2400 [1:18:13<06:06,  1.27s/it]

批次 2110: 原始预测: ['Yes\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because']
GPU memory after batch 2110: 14.97 GB
Batch 2111 input shapes: input_ids=torch.Size([4, 80]), attention_mask=torch.Size([4, 80])


Predicting:  88%|████████▊ | 2112/2400 [1:18:14<06:08,  1.28s/it]

GPU memory after batch 2111: 14.97 GB
Batch 2112 input shapes: input_ids=torch.Size([4, 183]), attention_mask=torch.Size([4, 183])


Predicting:  88%|████████▊ | 2113/2400 [1:18:16<07:24,  1.55s/it]

GPU memory after batch 2112: 14.97 GB
Batch 2113 input shapes: input_ids=torch.Size([4, 69]), attention_mask=torch.Size([4, 69])


Predicting:  88%|████████▊ | 2114/2400 [1:18:17<06:55,  1.45s/it]

GPU memory after batch 2113: 14.97 GB
Batch 2114 input shapes: input_ids=torch.Size([4, 69]), attention_mask=torch.Size([4, 69])


Predicting:  88%|████████▊ | 2115/2400 [1:18:19<06:35,  1.39s/it]

GPU memory after batch 2114: 14.97 GB
Batch 2115 input shapes: input_ids=torch.Size([4, 71]), attention_mask=torch.Size([4, 71])


Predicting:  88%|████████▊ | 2116/2400 [1:18:20<06:21,  1.34s/it]

GPU memory after batch 2115: 14.97 GB
Batch 2116 input shapes: input_ids=torch.Size([4, 71]), attention_mask=torch.Size([4, 71])


Predicting:  88%|████████▊ | 2117/2400 [1:18:21<06:11,  1.31s/it]

GPU memory after batch 2116: 14.97 GB
Batch 2117 input shapes: input_ids=torch.Size([4, 81]), attention_mask=torch.Size([4, 81])


Predicting:  88%|████████▊ | 2118/2400 [1:18:22<06:16,  1.33s/it]

GPU memory after batch 2117: 14.97 GB
Batch 2118 input shapes: input_ids=torch.Size([4, 74]), attention_mask=torch.Size([4, 74])


Predicting:  88%|████████▊ | 2119/2400 [1:18:24<06:12,  1.32s/it]

GPU memory after batch 2118: 14.97 GB
Batch 2119 input shapes: input_ids=torch.Size([4, 72]), attention_mask=torch.Size([4, 72])


Predicting:  88%|████████▊ | 2120/2400 [1:18:25<06:03,  1.30s/it]

GPU memory after batch 2119: 14.97 GB
Batch 2120 input shapes: input_ids=torch.Size([4, 89]), attention_mask=torch.Size([4, 89])


Predicting:  88%|████████▊ | 2121/2400 [1:18:26<06:14,  1.34s/it]

批次 2120: 原始预测: ['Yes\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because']
GPU memory after batch 2120: 14.97 GB
Batch 2121 input shapes: input_ids=torch.Size([4, 77]), attention_mask=torch.Size([4, 77])


Predicting:  88%|████████▊ | 2122/2400 [1:18:28<06:09,  1.33s/it]

GPU memory after batch 2121: 14.97 GB
Batch 2122 input shapes: input_ids=torch.Size([4, 104]), attention_mask=torch.Size([4, 104])


Predicting:  88%|████████▊ | 2123/2400 [1:18:29<06:24,  1.39s/it]

GPU memory after batch 2122: 14.97 GB
Batch 2123 input shapes: input_ids=torch.Size([4, 101]), attention_mask=torch.Size([4, 101])


Predicting:  88%|████████▊ | 2124/2400 [1:18:31<06:33,  1.43s/it]

GPU memory after batch 2123: 14.97 GB
Batch 2124 input shapes: input_ids=torch.Size([4, 81]), attention_mask=torch.Size([4, 81])


Predicting:  89%|████████▊ | 2125/2400 [1:18:32<06:28,  1.41s/it]

GPU memory after batch 2124: 14.97 GB
Batch 2125 input shapes: input_ids=torch.Size([4, 70]), attention_mask=torch.Size([4, 70])


Predicting:  89%|████████▊ | 2126/2400 [1:18:33<06:12,  1.36s/it]

GPU memory after batch 2125: 14.97 GB
Batch 2126 input shapes: input_ids=torch.Size([4, 72]), attention_mask=torch.Size([4, 72])


Predicting:  89%|████████▊ | 2127/2400 [1:18:35<06:01,  1.32s/it]

GPU memory after batch 2126: 14.97 GB
Batch 2127 input shapes: input_ids=torch.Size([4, 88]), attention_mask=torch.Size([4, 88])


Predicting:  89%|████████▊ | 2128/2400 [1:18:36<06:05,  1.34s/it]

GPU memory after batch 2127: 14.97 GB
Batch 2128 input shapes: input_ids=torch.Size([4, 70]), attention_mask=torch.Size([4, 70])


Predicting:  89%|████████▊ | 2129/2400 [1:18:37<05:55,  1.31s/it]

GPU memory after batch 2128: 14.97 GB
Batch 2129 input shapes: input_ids=torch.Size([4, 72]), attention_mask=torch.Size([4, 72])


Predicting:  89%|████████▉ | 2130/2400 [1:18:38<05:47,  1.29s/it]

GPU memory after batch 2129: 14.97 GB
Batch 2130 input shapes: input_ids=torch.Size([4, 67]), attention_mask=torch.Size([4, 67])


Predicting:  89%|████████▉ | 2131/2400 [1:18:40<05:41,  1.27s/it]

批次 2130: 原始预测: ['Yes', 'No\nYes', 'Yes\nIs "12', 'Yes\nIs "120']
GPU memory after batch 2130: 14.97 GB
Batch 2131 input shapes: input_ids=torch.Size([4, 69]), attention_mask=torch.Size([4, 69])


Predicting:  89%|████████▉ | 2132/2400 [1:18:41<05:37,  1.26s/it]

GPU memory after batch 2131: 14.97 GB
Batch 2132 input shapes: input_ids=torch.Size([4, 72]), attention_mask=torch.Size([4, 72])


Predicting:  89%|████████▉ | 2133/2400 [1:18:42<05:34,  1.25s/it]

GPU memory after batch 2132: 14.97 GB
Batch 2133 input shapes: input_ids=torch.Size([4, 71]), attention_mask=torch.Size([4, 71])


Predicting:  89%|████████▉ | 2134/2400 [1:18:43<05:32,  1.25s/it]

GPU memory after batch 2133: 14.97 GB
Batch 2134 input shapes: input_ids=torch.Size([4, 100]), attention_mask=torch.Size([4, 100])


Predicting:  89%|████████▉ | 2135/2400 [1:18:45<05:52,  1.33s/it]

GPU memory after batch 2134: 14.97 GB
Batch 2135 input shapes: input_ids=torch.Size([4, 69]), attention_mask=torch.Size([4, 69])


Predicting:  89%|████████▉ | 2136/2400 [1:18:46<05:43,  1.30s/it]

GPU memory after batch 2135: 14.97 GB
Batch 2136 input shapes: input_ids=torch.Size([4, 186]), attention_mask=torch.Size([4, 186])


Predicting:  89%|████████▉ | 2137/2400 [1:18:48<06:53,  1.57s/it]

GPU memory after batch 2136: 14.97 GB
Batch 2137 input shapes: input_ids=torch.Size([4, 73]), attention_mask=torch.Size([4, 73])


Predicting:  89%|████████▉ | 2138/2400 [1:18:50<06:30,  1.49s/it]

GPU memory after batch 2137: 14.97 GB
Batch 2138 input shapes: input_ids=torch.Size([4, 87]), attention_mask=torch.Size([4, 87])


Predicting:  89%|████████▉ | 2139/2400 [1:18:51<06:21,  1.46s/it]

GPU memory after batch 2138: 14.97 GB
Batch 2139 input shapes: input_ids=torch.Size([4, 73]), attention_mask=torch.Size([4, 73])


Predicting:  89%|████████▉ | 2140/2400 [1:18:52<06:07,  1.41s/it]

GPU memory after batch 2139: 14.97 GB
Batch 2140 input shapes: input_ids=torch.Size([4, 59]), attention_mask=torch.Size([4, 59])


Predicting:  89%|████████▉ | 2141/2400 [1:18:54<05:47,  1.34s/it]

批次 2140: 原始预测: ['Yes\nYes, because', 'Yes\nYes, because', 'No\nYes, because', 'No\nYes\nYes']
GPU memory after batch 2140: 14.97 GB
Batch 2141 input shapes: input_ids=torch.Size([4, 69]), attention_mask=torch.Size([4, 69])


Predicting:  89%|████████▉ | 2142/2400 [1:18:55<05:37,  1.31s/it]

GPU memory after batch 2141: 14.97 GB
Batch 2142 input shapes: input_ids=torch.Size([4, 71]), attention_mask=torch.Size([4, 71])


Predicting:  89%|████████▉ | 2143/2400 [1:18:56<05:30,  1.29s/it]

GPU memory after batch 2142: 14.97 GB
Batch 2143 input shapes: input_ids=torch.Size([4, 80]), attention_mask=torch.Size([4, 80])


Predicting:  89%|████████▉ | 2144/2400 [1:18:57<05:30,  1.29s/it]

GPU memory after batch 2143: 14.97 GB
Batch 2144 input shapes: input_ids=torch.Size([4, 73]), attention_mask=torch.Size([4, 73])


Predicting:  89%|████████▉ | 2145/2400 [1:18:59<05:30,  1.29s/it]

GPU memory after batch 2144: 14.97 GB
Batch 2145 input shapes: input_ids=torch.Size([4, 74]), attention_mask=torch.Size([4, 74])


Predicting:  89%|████████▉ | 2146/2400 [1:19:00<05:29,  1.30s/it]

GPU memory after batch 2145: 14.97 GB
Batch 2146 input shapes: input_ids=torch.Size([4, 75]), attention_mask=torch.Size([4, 75])


Predicting:  89%|████████▉ | 2147/2400 [1:19:01<05:28,  1.30s/it]

GPU memory after batch 2146: 14.97 GB
Batch 2147 input shapes: input_ids=torch.Size([4, 65]), attention_mask=torch.Size([4, 65])


Predicting:  90%|████████▉ | 2148/2400 [1:19:02<05:22,  1.28s/it]

GPU memory after batch 2147: 14.97 GB
Batch 2148 input shapes: input_ids=torch.Size([4, 81]), attention_mask=torch.Size([4, 81])


Predicting:  90%|████████▉ | 2149/2400 [1:19:04<05:28,  1.31s/it]

GPU memory after batch 2148: 14.97 GB
Batch 2149 input shapes: input_ids=torch.Size([4, 77]), attention_mask=torch.Size([4, 77])


Predicting:  90%|████████▉ | 2150/2400 [1:19:05<05:26,  1.31s/it]

GPU memory after batch 2149: 14.97 GB
Batch 2150 input shapes: input_ids=torch.Size([4, 75]), attention_mask=torch.Size([4, 75])


Predicting:  90%|████████▉ | 2151/2400 [1:19:06<05:25,  1.31s/it]

批次 2150: 原始预测: ['Yes\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because', 'No\nYes, because']
GPU memory after batch 2150: 14.97 GB
Batch 2151 input shapes: input_ids=torch.Size([4, 73]), attention_mask=torch.Size([4, 73])


Predicting:  90%|████████▉ | 2152/2400 [1:19:08<05:23,  1.30s/it]

GPU memory after batch 2151: 14.97 GB
Batch 2152 input shapes: input_ids=torch.Size([4, 73]), attention_mask=torch.Size([4, 73])


Predicting:  90%|████████▉ | 2153/2400 [1:19:09<05:21,  1.30s/it]

GPU memory after batch 2152: 14.97 GB
Batch 2153 input shapes: input_ids=torch.Size([4, 75]), attention_mask=torch.Size([4, 75])


Predicting:  90%|████████▉ | 2154/2400 [1:19:10<05:20,  1.30s/it]

GPU memory after batch 2153: 14.97 GB
Batch 2154 input shapes: input_ids=torch.Size([4, 81]), attention_mask=torch.Size([4, 81])


Predicting:  90%|████████▉ | 2155/2400 [1:19:12<05:25,  1.33s/it]

GPU memory after batch 2154: 14.97 GB
Batch 2155 input shapes: input_ids=torch.Size([4, 81]), attention_mask=torch.Size([4, 81])


Predicting:  90%|████████▉ | 2156/2400 [1:19:13<05:28,  1.35s/it]

GPU memory after batch 2155: 14.97 GB
Batch 2156 input shapes: input_ids=torch.Size([4, 82]), attention_mask=torch.Size([4, 82])


Predicting:  90%|████████▉ | 2157/2400 [1:19:15<05:30,  1.36s/it]

GPU memory after batch 2156: 14.97 GB
Batch 2157 input shapes: input_ids=torch.Size([4, 66]), attention_mask=torch.Size([4, 66])


Predicting:  90%|████████▉ | 2158/2400 [1:19:16<05:20,  1.32s/it]

GPU memory after batch 2157: 14.97 GB
Batch 2158 input shapes: input_ids=torch.Size([4, 64]), attention_mask=torch.Size([4, 64])


Predicting:  90%|████████▉ | 2159/2400 [1:19:17<05:08,  1.28s/it]

GPU memory after batch 2158: 14.97 GB
Batch 2159 input shapes: input_ids=torch.Size([4, 77]), attention_mask=torch.Size([4, 77])


Predicting:  90%|█████████ | 2160/2400 [1:19:18<05:08,  1.29s/it]

GPU memory after batch 2159: 14.97 GB
Batch 2160 input shapes: input_ids=torch.Size([4, 200]), attention_mask=torch.Size([4, 200])


Predicting:  90%|█████████ | 2161/2400 [1:19:21<06:24,  1.61s/it]

批次 2160: 原始预测: ['Yes\nQuestion: Is', 'Yes\nQuestion: Is', 'No\nYes, because', 'No\nAnswer: No']
GPU memory after batch 2160: 14.97 GB
Batch 2161 input shapes: input_ids=torch.Size([4, 62]), attention_mask=torch.Size([4, 62])


Predicting:  90%|█████████ | 2162/2400 [1:19:22<05:51,  1.48s/it]

GPU memory after batch 2161: 14.97 GB
Batch 2162 input shapes: input_ids=torch.Size([4, 83]), attention_mask=torch.Size([4, 83])


Predicting:  90%|█████████ | 2163/2400 [1:19:23<05:43,  1.45s/it]

GPU memory after batch 2162: 14.97 GB
Batch 2163 input shapes: input_ids=torch.Size([4, 70]), attention_mask=torch.Size([4, 70])


Predicting:  90%|█████████ | 2164/2400 [1:19:24<05:27,  1.39s/it]

GPU memory after batch 2163: 14.97 GB
Batch 2164 input shapes: input_ids=torch.Size([4, 87]), attention_mask=torch.Size([4, 87])


Predicting:  90%|█████████ | 2165/2400 [1:19:26<05:26,  1.39s/it]

GPU memory after batch 2164: 14.97 GB
Batch 2165 input shapes: input_ids=torch.Size([4, 70]), attention_mask=torch.Size([4, 70])


Predicting:  90%|█████████ | 2166/2400 [1:19:27<05:14,  1.34s/it]

GPU memory after batch 2165: 14.97 GB
Batch 2166 input shapes: input_ids=torch.Size([4, 77]), attention_mask=torch.Size([4, 77])


Predicting:  90%|█████████ | 2167/2400 [1:19:28<05:09,  1.33s/it]

GPU memory after batch 2166: 14.97 GB
Batch 2167 input shapes: input_ids=torch.Size([4, 66]), attention_mask=torch.Size([4, 66])


Predicting:  90%|█████████ | 2168/2400 [1:19:30<05:01,  1.30s/it]

GPU memory after batch 2167: 14.97 GB
Batch 2168 input shapes: input_ids=torch.Size([4, 67]), attention_mask=torch.Size([4, 67])


Predicting:  90%|█████████ | 2169/2400 [1:19:31<04:55,  1.28s/it]

GPU memory after batch 2168: 14.97 GB
Batch 2169 input shapes: input_ids=torch.Size([4, 81]), attention_mask=torch.Size([4, 81])


Predicting:  90%|█████████ | 2170/2400 [1:19:32<05:01,  1.31s/it]

GPU memory after batch 2169: 14.97 GB
Batch 2170 input shapes: input_ids=torch.Size([4, 73]), attention_mask=torch.Size([4, 73])


Predicting:  90%|█████████ | 2171/2400 [1:19:33<04:59,  1.31s/it]

批次 2170: 原始预测: ['Yes\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because', 'No\nYes, because']
GPU memory after batch 2170: 14.97 GB
Batch 2171 input shapes: input_ids=torch.Size([4, 81]), attention_mask=torch.Size([4, 81])


Predicting:  90%|█████████ | 2172/2400 [1:19:35<05:03,  1.33s/it]

GPU memory after batch 2171: 14.97 GB
Batch 2172 input shapes: input_ids=torch.Size([4, 71]), attention_mask=torch.Size([4, 71])


Predicting:  91%|█████████ | 2173/2400 [1:19:36<04:55,  1.30s/it]

GPU memory after batch 2172: 14.97 GB
Batch 2173 input shapes: input_ids=torch.Size([4, 82]), attention_mask=torch.Size([4, 82])


Predicting:  91%|█████████ | 2174/2400 [1:19:37<05:00,  1.33s/it]

GPU memory after batch 2173: 14.97 GB
Batch 2174 input shapes: input_ids=torch.Size([4, 79]), attention_mask=torch.Size([4, 79])


Predicting:  91%|█████████ | 2175/2400 [1:19:39<04:57,  1.32s/it]

GPU memory after batch 2174: 14.97 GB
Batch 2175 input shapes: input_ids=torch.Size([4, 79]), attention_mask=torch.Size([4, 79])


Predicting:  91%|█████████ | 2176/2400 [1:19:40<04:54,  1.32s/it]

GPU memory after batch 2175: 14.97 GB
Batch 2176 input shapes: input_ids=torch.Size([4, 138]), attention_mask=torch.Size([4, 138])


Predicting:  91%|█████████ | 2177/2400 [1:19:42<05:30,  1.48s/it]

GPU memory after batch 2176: 14.97 GB
Batch 2177 input shapes: input_ids=torch.Size([4, 75]), attention_mask=torch.Size([4, 75])


Predicting:  91%|█████████ | 2178/2400 [1:19:43<05:16,  1.43s/it]

GPU memory after batch 2177: 14.97 GB
Batch 2178 input shapes: input_ids=torch.Size([4, 94]), attention_mask=torch.Size([4, 94])


Predicting:  91%|█████████ | 2179/2400 [1:19:45<05:16,  1.43s/it]

GPU memory after batch 2178: 14.97 GB
Batch 2179 input shapes: input_ids=torch.Size([4, 71]), attention_mask=torch.Size([4, 71])


Predicting:  91%|█████████ | 2180/2400 [1:19:46<05:02,  1.38s/it]

GPU memory after batch 2179: 14.97 GB
Batch 2180 input shapes: input_ids=torch.Size([4, 104]), attention_mask=torch.Size([4, 104])


Predicting:  91%|█████████ | 2181/2400 [1:19:47<05:10,  1.42s/it]

批次 2180: 原始预测: ['No\n**Step-by', 'No\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because']
GPU memory after batch 2180: 14.97 GB
Batch 2181 input shapes: input_ids=torch.Size([4, 82]), attention_mask=torch.Size([4, 82])


Predicting:  91%|█████████ | 2182/2400 [1:19:49<05:07,  1.41s/it]

GPU memory after batch 2181: 14.97 GB
Batch 2182 input shapes: input_ids=torch.Size([4, 84]), attention_mask=torch.Size([4, 84])


Predicting:  91%|█████████ | 2183/2400 [1:19:50<05:04,  1.40s/it]

GPU memory after batch 2182: 14.97 GB
Batch 2183 input shapes: input_ids=torch.Size([4, 69]), attention_mask=torch.Size([4, 69])


Predicting:  91%|█████████ | 2184/2400 [1:19:51<04:52,  1.35s/it]

GPU memory after batch 2183: 14.97 GB
Batch 2184 input shapes: input_ids=torch.Size([4, 168]), attention_mask=torch.Size([4, 168])


Predicting:  91%|█████████ | 2185/2400 [1:19:54<05:38,  1.57s/it]

GPU memory after batch 2184: 14.97 GB
Batch 2185 input shapes: input_ids=torch.Size([4, 75]), attention_mask=torch.Size([4, 75])


Predicting:  91%|█████████ | 2186/2400 [1:19:55<05:19,  1.49s/it]

GPU memory after batch 2185: 14.97 GB
Batch 2186 input shapes: input_ids=torch.Size([4, 80]), attention_mask=torch.Size([4, 80])


Predicting:  91%|█████████ | 2187/2400 [1:19:56<05:05,  1.43s/it]

GPU memory after batch 2186: 14.97 GB
Batch 2187 input shapes: input_ids=torch.Size([4, 74]), attention_mask=torch.Size([4, 74])


Predicting:  91%|█████████ | 2188/2400 [1:19:57<04:55,  1.39s/it]

GPU memory after batch 2187: 14.97 GB
Batch 2188 input shapes: input_ids=torch.Size([4, 62]), attention_mask=torch.Size([4, 62])


Predicting:  91%|█████████ | 2189/2400 [1:19:59<04:40,  1.33s/it]

GPU memory after batch 2188: 14.97 GB
Batch 2189 input shapes: input_ids=torch.Size([4, 157]), attention_mask=torch.Size([4, 157])


Predicting:  91%|█████████▏| 2190/2400 [1:20:01<05:17,  1.51s/it]

GPU memory after batch 2189: 14.97 GB
Batch 2190 input shapes: input_ids=torch.Size([4, 70]), attention_mask=torch.Size([4, 70])


Predicting:  91%|█████████▏| 2191/2400 [1:20:02<04:58,  1.43s/it]

批次 2190: 原始预测: ['No\nYes\nYes', 'No\nYes\nYes', 'No\nYes, because', 'No\nYes, because']
GPU memory after batch 2190: 14.97 GB
Batch 2191 input shapes: input_ids=torch.Size([4, 72]), attention_mask=torch.Size([4, 72])


Predicting:  91%|█████████▏| 2192/2400 [1:20:03<04:45,  1.37s/it]

GPU memory after batch 2191: 14.97 GB
Batch 2192 input shapes: input_ids=torch.Size([4, 73]), attention_mask=torch.Size([4, 73])


Predicting:  91%|█████████▏| 2193/2400 [1:20:04<04:39,  1.35s/it]

GPU memory after batch 2192: 14.97 GB
Batch 2193 input shapes: input_ids=torch.Size([4, 69]), attention_mask=torch.Size([4, 69])


Predicting:  91%|█████████▏| 2194/2400 [1:20:06<04:31,  1.32s/it]

GPU memory after batch 2193: 14.97 GB
Batch 2194 input shapes: input_ids=torch.Size([4, 76]), attention_mask=torch.Size([4, 76])


Predicting:  91%|█████████▏| 2195/2400 [1:20:07<04:28,  1.31s/it]

GPU memory after batch 2194: 14.97 GB
Batch 2195 input shapes: input_ids=torch.Size([4, 72]), attention_mask=torch.Size([4, 72])


Predicting:  92%|█████████▏| 2196/2400 [1:20:08<04:23,  1.29s/it]

GPU memory after batch 2195: 14.97 GB
Batch 2196 input shapes: input_ids=torch.Size([4, 74]), attention_mask=torch.Size([4, 74])


Predicting:  92%|█████████▏| 2197/2400 [1:20:09<04:22,  1.29s/it]

GPU memory after batch 2196: 14.97 GB
Batch 2197 input shapes: input_ids=torch.Size([4, 102]), attention_mask=torch.Size([4, 102])


Predicting:  92%|█████████▏| 2198/2400 [1:20:11<04:34,  1.36s/it]

GPU memory after batch 2197: 14.97 GB
Batch 2198 input shapes: input_ids=torch.Size([4, 105]), attention_mask=torch.Size([4, 105])


Predicting:  92%|█████████▏| 2199/2400 [1:20:13<04:47,  1.43s/it]

GPU memory after batch 2198: 14.97 GB
Batch 2199 input shapes: input_ids=torch.Size([4, 68]), attention_mask=torch.Size([4, 68])


Predicting:  92%|█████████▏| 2200/2400 [1:20:14<04:34,  1.37s/it]

GPU memory after batch 2199: 14.97 GB
Batch 2200 input shapes: input_ids=torch.Size([4, 54]), attention_mask=torch.Size([4, 54])


Predicting:  92%|█████████▏| 2201/2400 [1:20:15<04:16,  1.29s/it]

批次 2200: 原始预测: ['No\nYes\nYes', 'No\nYes\nYes', 'No\nYes\nYes', 'No\nYes, because']
GPU memory after batch 2200: 14.97 GB
Batch 2201 input shapes: input_ids=torch.Size([4, 81]), attention_mask=torch.Size([4, 81])


Predicting:  92%|█████████▏| 2202/2400 [1:20:16<04:21,  1.32s/it]

GPU memory after batch 2201: 14.97 GB
Batch 2202 input shapes: input_ids=torch.Size([4, 78]), attention_mask=torch.Size([4, 78])


Predicting:  92%|█████████▏| 2203/2400 [1:20:18<04:18,  1.31s/it]

GPU memory after batch 2202: 14.97 GB
Batch 2203 input shapes: input_ids=torch.Size([4, 102]), attention_mask=torch.Size([4, 102])


Predicting:  92%|█████████▏| 2204/2400 [1:20:19<04:29,  1.38s/it]

GPU memory after batch 2203: 14.97 GB
Batch 2204 input shapes: input_ids=torch.Size([4, 81]), attention_mask=torch.Size([4, 81])


Predicting:  92%|█████████▏| 2205/2400 [1:20:20<04:28,  1.38s/it]

GPU memory after batch 2204: 14.97 GB
Batch 2205 input shapes: input_ids=torch.Size([4, 74]), attention_mask=torch.Size([4, 74])


Predicting:  92%|█████████▏| 2206/2400 [1:20:22<04:22,  1.36s/it]

GPU memory after batch 2205: 14.97 GB
Batch 2206 input shapes: input_ids=torch.Size([4, 73]), attention_mask=torch.Size([4, 73])


Predicting:  92%|█████████▏| 2207/2400 [1:20:23<04:18,  1.34s/it]

GPU memory after batch 2206: 14.97 GB
Batch 2207 input shapes: input_ids=torch.Size([4, 85]), attention_mask=torch.Size([4, 85])


Predicting:  92%|█████████▏| 2208/2400 [1:20:24<04:19,  1.35s/it]

GPU memory after batch 2207: 14.97 GB
Batch 2208 input shapes: input_ids=torch.Size([4, 175]), attention_mask=torch.Size([4, 175])


Predicting:  92%|█████████▏| 2209/2400 [1:20:27<05:03,  1.59s/it]

GPU memory after batch 2208: 14.97 GB
Batch 2209 input shapes: input_ids=torch.Size([4, 90]), attention_mask=torch.Size([4, 90])


Predicting:  92%|█████████▏| 2210/2400 [1:20:28<04:53,  1.55s/it]

GPU memory after batch 2209: 14.97 GB
Batch 2210 input shapes: input_ids=torch.Size([4, 77]), attention_mask=torch.Size([4, 77])


Predicting:  92%|█████████▏| 2211/2400 [1:20:29<04:38,  1.47s/it]

批次 2210: 原始预测: ['Yes\nYes, because', 'Yes', 'Yes', 'Yes']
GPU memory after batch 2210: 14.97 GB
Batch 2211 input shapes: input_ids=torch.Size([4, 91]), attention_mask=torch.Size([4, 91])


Predicting:  92%|█████████▏| 2212/2400 [1:20:31<04:35,  1.47s/it]

GPU memory after batch 2211: 14.97 GB
Batch 2212 input shapes: input_ids=torch.Size([4, 91]), attention_mask=torch.Size([4, 91])


Predicting:  92%|█████████▏| 2213/2400 [1:20:32<04:33,  1.46s/it]

GPU memory after batch 2212: 14.97 GB
Batch 2213 input shapes: input_ids=torch.Size([4, 91]), attention_mask=torch.Size([4, 91])


Predicting:  92%|█████████▏| 2214/2400 [1:20:34<04:31,  1.46s/it]

GPU memory after batch 2213: 14.97 GB
Batch 2214 input shapes: input_ids=torch.Size([4, 77]), attention_mask=torch.Size([4, 77])


Predicting:  92%|█████████▏| 2215/2400 [1:20:35<04:21,  1.41s/it]

GPU memory after batch 2214: 14.97 GB
Batch 2215 input shapes: input_ids=torch.Size([4, 66]), attention_mask=torch.Size([4, 66])


Predicting:  92%|█████████▏| 2216/2400 [1:20:36<04:10,  1.36s/it]

GPU memory after batch 2215: 14.97 GB
Batch 2216 input shapes: input_ids=torch.Size([4, 66]), attention_mask=torch.Size([4, 66])


Predicting:  92%|█████████▏| 2217/2400 [1:20:37<04:01,  1.32s/it]

GPU memory after batch 2216: 14.97 GB
Batch 2217 input shapes: input_ids=torch.Size([4, 88]), attention_mask=torch.Size([4, 88])


Predicting:  92%|█████████▏| 2218/2400 [1:20:39<04:04,  1.34s/it]

GPU memory after batch 2217: 14.97 GB
Batch 2218 input shapes: input_ids=torch.Size([4, 77]), attention_mask=torch.Size([4, 77])


Predicting:  92%|█████████▏| 2219/2400 [1:20:40<04:00,  1.33s/it]

GPU memory after batch 2218: 14.97 GB
Batch 2219 input shapes: input_ids=torch.Size([4, 62]), attention_mask=torch.Size([4, 62])


Predicting:  92%|█████████▎| 2220/2400 [1:20:41<03:50,  1.28s/it]

GPU memory after batch 2219: 14.97 GB
Batch 2220 input shapes: input_ids=torch.Size([4, 73]), attention_mask=torch.Size([4, 73])


Predicting:  93%|█████████▎| 2221/2400 [1:20:43<03:50,  1.29s/it]

批次 2220: 原始预测: ['No\nYes', 'No\nAnswer: No', 'Yes\nYes, because', 'No\nAnswer: Yes']
GPU memory after batch 2220: 14.97 GB
Batch 2221 input shapes: input_ids=torch.Size([4, 68]), attention_mask=torch.Size([4, 68])


Predicting:  93%|█████████▎| 2222/2400 [1:20:44<03:46,  1.27s/it]

GPU memory after batch 2221: 14.97 GB
Batch 2222 input shapes: input_ids=torch.Size([4, 74]), attention_mask=torch.Size([4, 74])


Predicting:  93%|█████████▎| 2223/2400 [1:20:45<03:46,  1.28s/it]

GPU memory after batch 2222: 14.97 GB
Batch 2223 input shapes: input_ids=torch.Size([4, 82]), attention_mask=torch.Size([4, 82])


Predicting:  93%|█████████▎| 2224/2400 [1:20:47<03:51,  1.31s/it]

GPU memory after batch 2223: 14.97 GB
Batch 2224 input shapes: input_ids=torch.Size([4, 75]), attention_mask=torch.Size([4, 75])


Predicting:  93%|█████████▎| 2225/2400 [1:20:48<03:49,  1.31s/it]

GPU memory after batch 2224: 14.97 GB
Batch 2225 input shapes: input_ids=torch.Size([4, 74]), attention_mask=torch.Size([4, 74])


Predicting:  93%|█████████▎| 2226/2400 [1:20:49<03:47,  1.31s/it]

GPU memory after batch 2225: 14.97 GB
Batch 2226 input shapes: input_ids=torch.Size([4, 67]), attention_mask=torch.Size([4, 67])


Predicting:  93%|█████████▎| 2227/2400 [1:20:50<03:42,  1.29s/it]

GPU memory after batch 2226: 14.97 GB
Batch 2227 input shapes: input_ids=torch.Size([4, 60]), attention_mask=torch.Size([4, 60])


Predicting:  93%|█████████▎| 2228/2400 [1:20:52<03:35,  1.25s/it]

GPU memory after batch 2227: 14.97 GB
Batch 2228 input shapes: input_ids=torch.Size([4, 67]), attention_mask=torch.Size([4, 67])


Predicting:  93%|█████████▎| 2229/2400 [1:20:53<03:33,  1.25s/it]

GPU memory after batch 2228: 14.97 GB
Batch 2229 input shapes: input_ids=torch.Size([4, 75]), attention_mask=torch.Size([4, 75])


Predicting:  93%|█████████▎| 2230/2400 [1:20:54<03:34,  1.26s/it]

GPU memory after batch 2229: 14.97 GB
Batch 2230 input shapes: input_ids=torch.Size([4, 68]), attention_mask=torch.Size([4, 68])


Predicting:  93%|█████████▎| 2231/2400 [1:20:55<03:32,  1.26s/it]

批次 2230: 原始预测: ['Yes', 'Yes', 'No\nYes, because', 'No\nYes, because']
GPU memory after batch 2230: 14.97 GB
Batch 2231 input shapes: input_ids=torch.Size([4, 81]), attention_mask=torch.Size([4, 81])


Predicting:  93%|█████████▎| 2232/2400 [1:20:57<03:37,  1.29s/it]

GPU memory after batch 2231: 14.97 GB
Batch 2232 input shapes: input_ids=torch.Size([4, 171]), attention_mask=torch.Size([4, 171])


Predicting:  93%|█████████▎| 2233/2400 [1:20:59<04:18,  1.55s/it]

GPU memory after batch 2232: 14.97 GB
Batch 2233 input shapes: input_ids=torch.Size([4, 100]), attention_mask=torch.Size([4, 100])


Predicting:  93%|█████████▎| 2234/2400 [1:21:00<04:15,  1.54s/it]

GPU memory after batch 2233: 14.97 GB
Batch 2234 input shapes: input_ids=torch.Size([4, 74]), attention_mask=torch.Size([4, 74])


Predicting:  93%|█████████▎| 2235/2400 [1:21:02<04:01,  1.47s/it]

GPU memory after batch 2234: 14.97 GB
Batch 2235 input shapes: input_ids=torch.Size([4, 80]), attention_mask=torch.Size([4, 80])


Predicting:  93%|█████████▎| 2236/2400 [1:21:03<03:52,  1.42s/it]

GPU memory after batch 2235: 14.97 GB
Batch 2236 input shapes: input_ids=torch.Size([4, 74]), attention_mask=torch.Size([4, 74])


Predicting:  93%|█████████▎| 2237/2400 [1:21:04<03:45,  1.38s/it]

GPU memory after batch 2236: 14.97 GB
Batch 2237 input shapes: input_ids=torch.Size([4, 69]), attention_mask=torch.Size([4, 69])


Predicting:  93%|█████████▎| 2238/2400 [1:21:06<03:36,  1.34s/it]

GPU memory after batch 2237: 14.97 GB
Batch 2238 input shapes: input_ids=torch.Size([4, 77]), attention_mask=torch.Size([4, 77])


Predicting:  93%|█████████▎| 2239/2400 [1:21:07<03:33,  1.33s/it]

GPU memory after batch 2238: 14.97 GB
Batch 2239 input shapes: input_ids=torch.Size([4, 74]), attention_mask=torch.Size([4, 74])


Predicting:  93%|█████████▎| 2240/2400 [1:21:08<03:31,  1.32s/it]

GPU memory after batch 2239: 14.97 GB
Batch 2240 input shapes: input_ids=torch.Size([4, 65]), attention_mask=torch.Size([4, 65])


Predicting:  93%|█████████▎| 2241/2400 [1:21:09<03:25,  1.29s/it]

批次 2240: 原始预测: ['No\nYes', 'No\nAnswer: No', 'No\nYes, the', 'No\nYes, the']
GPU memory after batch 2240: 14.97 GB
Batch 2241 input shapes: input_ids=torch.Size([4, 69]), attention_mask=torch.Size([4, 69])


Predicting:  93%|█████████▎| 2242/2400 [1:21:11<03:21,  1.28s/it]

GPU memory after batch 2241: 14.97 GB
Batch 2242 input shapes: input_ids=torch.Size([4, 67]), attention_mask=torch.Size([4, 67])


Predicting:  93%|█████████▎| 2243/2400 [1:21:12<03:18,  1.26s/it]

GPU memory after batch 2242: 14.97 GB
Batch 2243 input shapes: input_ids=torch.Size([4, 81]), attention_mask=torch.Size([4, 81])


Predicting:  94%|█████████▎| 2244/2400 [1:21:13<03:22,  1.30s/it]

GPU memory after batch 2243: 14.97 GB
Batch 2244 input shapes: input_ids=torch.Size([4, 79]), attention_mask=torch.Size([4, 79])


Predicting:  94%|█████████▎| 2245/2400 [1:21:15<03:21,  1.30s/it]

GPU memory after batch 2244: 14.97 GB
Batch 2245 input shapes: input_ids=torch.Size([4, 71]), attention_mask=torch.Size([4, 71])


Predicting:  94%|█████████▎| 2246/2400 [1:21:16<03:17,  1.28s/it]

GPU memory after batch 2245: 14.97 GB
Batch 2246 input shapes: input_ids=torch.Size([4, 67]), attention_mask=torch.Size([4, 67])


Predicting:  94%|█████████▎| 2247/2400 [1:21:17<03:07,  1.22s/it]

GPU memory after batch 2246: 14.97 GB
Batch 2247 input shapes: input_ids=torch.Size([4, 72]), attention_mask=torch.Size([4, 72])


Predicting:  94%|█████████▎| 2248/2400 [1:21:18<03:06,  1.23s/it]

GPU memory after batch 2247: 14.97 GB
Batch 2248 input shapes: input_ids=torch.Size([4, 72]), attention_mask=torch.Size([4, 72])


Predicting:  94%|█████████▎| 2249/2400 [1:21:19<03:05,  1.23s/it]

GPU memory after batch 2248: 14.97 GB
Batch 2249 input shapes: input_ids=torch.Size([4, 77]), attention_mask=torch.Size([4, 77])


Predicting:  94%|█████████▍| 2250/2400 [1:21:21<03:07,  1.25s/it]

GPU memory after batch 2249: 14.97 GB
Batch 2250 input shapes: input_ids=torch.Size([4, 81]), attention_mask=torch.Size([4, 81])


Predicting:  94%|█████████▍| 2251/2400 [1:21:22<03:12,  1.29s/it]

批次 2250: 原始预测: ['Yes\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because']
GPU memory after batch 2250: 14.97 GB
Batch 2251 input shapes: input_ids=torch.Size([4, 76]), attention_mask=torch.Size([4, 76])


Predicting:  94%|█████████▍| 2252/2400 [1:21:23<03:11,  1.30s/it]

GPU memory after batch 2251: 14.97 GB
Batch 2252 input shapes: input_ids=torch.Size([4, 66]), attention_mask=torch.Size([4, 66])


Predicting:  94%|█████████▍| 2253/2400 [1:21:25<03:07,  1.28s/it]

GPU memory after batch 2252: 14.97 GB
Batch 2253 input shapes: input_ids=torch.Size([4, 104]), attention_mask=torch.Size([4, 104])


Predicting:  94%|█████████▍| 2254/2400 [1:21:26<03:16,  1.35s/it]

GPU memory after batch 2253: 14.97 GB
Batch 2254 input shapes: input_ids=torch.Size([4, 56]), attention_mask=torch.Size([4, 56])


Predicting:  94%|█████████▍| 2255/2400 [1:21:27<03:04,  1.27s/it]

GPU memory after batch 2254: 14.97 GB
Batch 2255 input shapes: input_ids=torch.Size([4, 63]), attention_mask=torch.Size([4, 63])


Predicting:  94%|█████████▍| 2256/2400 [1:21:28<02:59,  1.24s/it]

GPU memory after batch 2255: 14.97 GB
Batch 2256 input shapes: input_ids=torch.Size([4, 184]), attention_mask=torch.Size([4, 184])


Predicting:  94%|█████████▍| 2257/2400 [1:21:31<03:37,  1.52s/it]

GPU memory after batch 2256: 14.97 GB
Batch 2257 input shapes: input_ids=torch.Size([4, 90]), attention_mask=torch.Size([4, 90])


Predicting:  94%|█████████▍| 2258/2400 [1:21:32<03:33,  1.50s/it]

GPU memory after batch 2257: 14.97 GB
Batch 2258 input shapes: input_ids=torch.Size([4, 75]), attention_mask=torch.Size([4, 75])


Predicting:  94%|█████████▍| 2259/2400 [1:21:33<03:23,  1.44s/it]

GPU memory after batch 2258: 14.97 GB
Batch 2259 input shapes: input_ids=torch.Size([4, 66]), attention_mask=torch.Size([4, 66])


Predicting:  94%|█████████▍| 2260/2400 [1:21:34<03:13,  1.38s/it]

GPU memory after batch 2259: 14.97 GB
Batch 2260 input shapes: input_ids=torch.Size([4, 70]), attention_mask=torch.Size([4, 70])


Predicting:  94%|█████████▍| 2261/2400 [1:21:36<03:05,  1.34s/it]

批次 2260: 原始预测: ['No\nYes\nYes', 'No\nAnswer\nYes', 'No\nAnswer: No', 'No\nAnswer: No']
GPU memory after batch 2260: 14.97 GB
Batch 2261 input shapes: input_ids=torch.Size([4, 72]), attention_mask=torch.Size([4, 72])


Predicting:  94%|█████████▍| 2262/2400 [1:21:37<03:00,  1.31s/it]

GPU memory after batch 2261: 14.97 GB
Batch 2262 input shapes: input_ids=torch.Size([4, 87]), attention_mask=torch.Size([4, 87])


Predicting:  94%|█████████▍| 2263/2400 [1:21:38<03:02,  1.33s/it]

GPU memory after batch 2262: 14.97 GB
Batch 2263 input shapes: input_ids=torch.Size([4, 68]), attention_mask=torch.Size([4, 68])


Predicting:  94%|█████████▍| 2264/2400 [1:21:40<02:57,  1.30s/it]

GPU memory after batch 2263: 14.97 GB
Batch 2264 input shapes: input_ids=torch.Size([4, 74]), attention_mask=torch.Size([4, 74])


Predicting:  94%|█████████▍| 2265/2400 [1:21:41<02:55,  1.30s/it]

GPU memory after batch 2264: 14.97 GB
Batch 2265 input shapes: input_ids=torch.Size([4, 71]), attention_mask=torch.Size([4, 71])


Predicting:  94%|█████████▍| 2266/2400 [1:21:42<02:51,  1.28s/it]

GPU memory after batch 2265: 14.97 GB
Batch 2266 input shapes: input_ids=torch.Size([4, 76]), attention_mask=torch.Size([4, 76])


Predicting:  94%|█████████▍| 2267/2400 [1:21:43<02:51,  1.29s/it]

GPU memory after batch 2266: 14.97 GB
Batch 2267 input shapes: input_ids=torch.Size([4, 72]), attention_mask=torch.Size([4, 72])


Predicting:  94%|█████████▍| 2268/2400 [1:21:45<02:48,  1.27s/it]

GPU memory after batch 2267: 14.97 GB
Batch 2268 input shapes: input_ids=torch.Size([4, 58]), attention_mask=torch.Size([4, 58])


Predicting:  95%|█████████▍| 2269/2400 [1:21:46<02:42,  1.24s/it]

GPU memory after batch 2268: 14.97 GB
Batch 2269 input shapes: input_ids=torch.Size([4, 80]), attention_mask=torch.Size([4, 80])


Predicting:  95%|█████████▍| 2270/2400 [1:21:47<02:43,  1.26s/it]

GPU memory after batch 2269: 14.97 GB
Batch 2270 input shapes: input_ids=torch.Size([4, 62]), attention_mask=torch.Size([4, 62])


Predicting:  95%|█████████▍| 2271/2400 [1:21:48<02:39,  1.24s/it]

批次 2270: 原始预测: ['Yes\nYes, because', 'No\nYes', 'No\n\nAnswer with only', 'No\n\nYes, because']
GPU memory after batch 2270: 14.97 GB
Batch 2271 input shapes: input_ids=torch.Size([4, 74]), attention_mask=torch.Size([4, 74])


Predicting:  95%|█████████▍| 2272/2400 [1:21:50<02:40,  1.26s/it]

GPU memory after batch 2271: 14.97 GB
Batch 2272 input shapes: input_ids=torch.Size([4, 84]), attention_mask=torch.Size([4, 84])


Predicting:  95%|█████████▍| 2273/2400 [1:21:51<02:44,  1.30s/it]

GPU memory after batch 2272: 14.97 GB
Batch 2273 input shapes: input_ids=torch.Size([4, 71]), attention_mask=torch.Size([4, 71])


Predicting:  95%|█████████▍| 2274/2400 [1:21:52<02:41,  1.28s/it]

GPU memory after batch 2273: 14.97 GB
Batch 2274 input shapes: input_ids=torch.Size([4, 68]), attention_mask=torch.Size([4, 68])


Predicting:  95%|█████████▍| 2275/2400 [1:21:53<02:38,  1.27s/it]

GPU memory after batch 2274: 14.97 GB
Batch 2275 input shapes: input_ids=torch.Size([4, 63]), attention_mask=torch.Size([4, 63])


Predicting:  95%|█████████▍| 2276/2400 [1:21:55<02:33,  1.24s/it]

GPU memory after batch 2275: 14.97 GB
Batch 2276 input shapes: input_ids=torch.Size([4, 81]), attention_mask=torch.Size([4, 81])


Predicting:  95%|█████████▍| 2277/2400 [1:21:56<02:37,  1.28s/it]

GPU memory after batch 2276: 14.97 GB
Batch 2277 input shapes: input_ids=torch.Size([4, 77]), attention_mask=torch.Size([4, 77])


Predicting:  95%|█████████▍| 2278/2400 [1:21:57<02:37,  1.29s/it]

GPU memory after batch 2277: 14.97 GB
Batch 2278 input shapes: input_ids=torch.Size([4, 72]), attention_mask=torch.Size([4, 72])


Predicting:  95%|█████████▍| 2279/2400 [1:21:59<02:34,  1.27s/it]

GPU memory after batch 2278: 14.97 GB
Batch 2279 input shapes: input_ids=torch.Size([4, 67]), attention_mask=torch.Size([4, 67])


Predicting:  95%|█████████▌| 2280/2400 [1:22:00<02:31,  1.26s/it]

GPU memory after batch 2279: 14.97 GB
Batch 2280 input shapes: input_ids=torch.Size([4, 180]), attention_mask=torch.Size([4, 180])


Predicting:  95%|█████████▌| 2281/2400 [1:22:02<03:02,  1.53s/it]

批次 2280: 原始预测: ['No\nQuestion: Is', 'Yes\nQuestion: Is', 'Yes\nIs "250', 'No\nExplanation: The']
GPU memory after batch 2280: 14.97 GB
Batch 2281 input shapes: input_ids=torch.Size([4, 97]), attention_mask=torch.Size([4, 97])


Predicting:  95%|█████████▌| 2282/2400 [1:22:04<03:00,  1.53s/it]

GPU memory after batch 2281: 14.97 GB
Batch 2282 input shapes: input_ids=torch.Size([4, 60]), attention_mask=torch.Size([4, 60])


Predicting:  95%|█████████▌| 2283/2400 [1:22:05<02:46,  1.42s/it]

GPU memory after batch 2282: 14.97 GB
Batch 2283 input shapes: input_ids=torch.Size([4, 72]), attention_mask=torch.Size([4, 72])


Predicting:  95%|█████████▌| 2284/2400 [1:22:06<02:38,  1.37s/it]

GPU memory after batch 2283: 14.97 GB
Batch 2284 input shapes: input_ids=torch.Size([4, 78]), attention_mask=torch.Size([4, 78])


Predicting:  95%|█████████▌| 2285/2400 [1:22:07<02:35,  1.35s/it]

GPU memory after batch 2284: 14.97 GB
Batch 2285 input shapes: input_ids=torch.Size([4, 77]), attention_mask=torch.Size([4, 77])


Predicting:  95%|█████████▌| 2286/2400 [1:22:09<02:32,  1.33s/it]

GPU memory after batch 2285: 14.97 GB
Batch 2286 input shapes: input_ids=torch.Size([4, 70]), attention_mask=torch.Size([4, 70])


Predicting:  95%|█████████▌| 2287/2400 [1:22:10<02:27,  1.31s/it]

GPU memory after batch 2286: 14.97 GB
Batch 2287 input shapes: input_ids=torch.Size([4, 76]), attention_mask=torch.Size([4, 76])


Predicting:  95%|█████████▌| 2288/2400 [1:22:11<02:26,  1.30s/it]

GPU memory after batch 2287: 14.97 GB
Batch 2288 input shapes: input_ids=torch.Size([4, 71]), attention_mask=torch.Size([4, 71])


Predicting:  95%|█████████▌| 2289/2400 [1:22:12<02:22,  1.28s/it]

GPU memory after batch 2288: 14.97 GB
Batch 2289 input shapes: input_ids=torch.Size([4, 75]), attention_mask=torch.Size([4, 75])


Predicting:  95%|█████████▌| 2290/2400 [1:22:14<02:21,  1.29s/it]

GPU memory after batch 2289: 14.97 GB
Batch 2290 input shapes: input_ids=torch.Size([4, 84]), attention_mask=torch.Size([4, 84])


Predicting:  95%|█████████▌| 2291/2400 [1:22:15<02:23,  1.32s/it]

批次 2290: 原始预测: ['No\nAnswer\nYes', 'No\nYes, because', 'No\nAnswer: No', 'No\nAnswer: No']
GPU memory after batch 2290: 14.97 GB
Batch 2291 input shapes: input_ids=torch.Size([4, 72]), attention_mask=torch.Size([4, 72])


Predicting:  96%|█████████▌| 2292/2400 [1:22:16<02:19,  1.29s/it]

GPU memory after batch 2291: 14.97 GB
Batch 2292 input shapes: input_ids=torch.Size([4, 74]), attention_mask=torch.Size([4, 74])


Predicting:  96%|█████████▌| 2293/2400 [1:22:18<02:18,  1.30s/it]

GPU memory after batch 2292: 14.97 GB
Batch 2293 input shapes: input_ids=torch.Size([4, 96]), attention_mask=torch.Size([4, 96])


Predicting:  96%|█████████▌| 2294/2400 [1:22:19<02:22,  1.34s/it]

GPU memory after batch 2293: 14.97 GB
Batch 2294 input shapes: input_ids=torch.Size([4, 72]), attention_mask=torch.Size([4, 72])


Predicting:  96%|█████████▌| 2295/2400 [1:22:20<02:17,  1.31s/it]

GPU memory after batch 2294: 14.97 GB
Batch 2295 input shapes: input_ids=torch.Size([4, 73]), attention_mask=torch.Size([4, 73])


Predicting:  96%|█████████▌| 2296/2400 [1:22:22<02:16,  1.31s/it]

GPU memory after batch 2295: 14.97 GB
Batch 2296 input shapes: input_ids=torch.Size([4, 55]), attention_mask=torch.Size([4, 55])


Predicting:  96%|█████████▌| 2297/2400 [1:22:23<02:08,  1.24s/it]

GPU memory after batch 2296: 14.97 GB
Batch 2297 input shapes: input_ids=torch.Size([4, 129]), attention_mask=torch.Size([4, 129])


Predicting:  96%|█████████▌| 2298/2400 [1:22:24<02:24,  1.42s/it]

GPU memory after batch 2297: 14.97 GB
Batch 2298 input shapes: input_ids=torch.Size([4, 62]), attention_mask=torch.Size([4, 62])


Predicting:  96%|█████████▌| 2299/2400 [1:22:26<02:15,  1.34s/it]

GPU memory after batch 2298: 14.97 GB
Batch 2299 input shapes: input_ids=torch.Size([4, 67]), attention_mask=torch.Size([4, 67])


Predicting:  96%|█████████▌| 2300/2400 [1:22:27<02:11,  1.31s/it]

GPU memory after batch 2299: 14.97 GB
Batch 2300 input shapes: input_ids=torch.Size([4, 69]), attention_mask=torch.Size([4, 69])


Predicting:  96%|█████████▌| 2301/2400 [1:22:28<02:07,  1.29s/it]

批次 2300: 原始预测: ['Yes\nYes, because', 'Yes\nYes, because', 'No\nYes, because', 'No\nYes, because']
GPU memory after batch 2300: 14.97 GB
Batch 2301 input shapes: input_ids=torch.Size([4, 68]), attention_mask=torch.Size([4, 68])


Predicting:  96%|█████████▌| 2302/2400 [1:22:29<02:04,  1.27s/it]

GPU memory after batch 2301: 14.97 GB
Batch 2302 input shapes: input_ids=torch.Size([4, 73]), attention_mask=torch.Size([4, 73])


Predicting:  96%|█████████▌| 2303/2400 [1:22:31<02:04,  1.28s/it]

GPU memory after batch 2302: 14.97 GB
Batch 2303 input shapes: input_ids=torch.Size([4, 77]), attention_mask=torch.Size([4, 77])


Predicting:  96%|█████████▌| 2304/2400 [1:22:32<01:59,  1.24s/it]

GPU memory after batch 2303: 14.97 GB
Batch 2304 input shapes: input_ids=torch.Size([4, 187]), attention_mask=torch.Size([4, 187])


Predicting:  96%|█████████▌| 2305/2400 [1:22:34<02:25,  1.53s/it]

GPU memory after batch 2304: 14.97 GB
Batch 2305 input shapes: input_ids=torch.Size([4, 77]), attention_mask=torch.Size([4, 77])


Predicting:  96%|█████████▌| 2306/2400 [1:22:35<02:17,  1.46s/it]

GPU memory after batch 2305: 14.97 GB
Batch 2306 input shapes: input_ids=torch.Size([4, 67]), attention_mask=torch.Size([4, 67])


Predicting:  96%|█████████▌| 2307/2400 [1:22:37<02:09,  1.40s/it]

GPU memory after batch 2306: 14.97 GB
Batch 2307 input shapes: input_ids=torch.Size([4, 87]), attention_mask=torch.Size([4, 87])


Predicting:  96%|█████████▌| 2308/2400 [1:22:38<02:08,  1.39s/it]

GPU memory after batch 2307: 14.97 GB
Batch 2308 input shapes: input_ids=torch.Size([4, 83]), attention_mask=torch.Size([4, 83])


Predicting:  96%|█████████▌| 2309/2400 [1:22:39<02:06,  1.39s/it]

GPU memory after batch 2308: 14.97 GB
Batch 2309 input shapes: input_ids=torch.Size([4, 97]), attention_mask=torch.Size([4, 97])


Predicting:  96%|█████████▋| 2310/2400 [1:22:41<02:08,  1.43s/it]

GPU memory after batch 2309: 14.97 GB
Batch 2310 input shapes: input_ids=torch.Size([4, 63]), attention_mask=torch.Size([4, 63])


Predicting:  96%|█████████▋| 2311/2400 [1:22:42<02:00,  1.35s/it]

批次 2310: 原始预测: ['Yes\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because']
GPU memory after batch 2310: 14.97 GB
Batch 2311 input shapes: input_ids=torch.Size([4, 100]), attention_mask=torch.Size([4, 100])


Predicting:  96%|█████████▋| 2312/2400 [1:22:43<02:03,  1.40s/it]

GPU memory after batch 2311: 14.97 GB
Batch 2312 input shapes: input_ids=torch.Size([4, 77]), attention_mask=torch.Size([4, 77])


Predicting:  96%|█████████▋| 2313/2400 [1:22:45<01:59,  1.37s/it]

GPU memory after batch 2312: 14.97 GB
Batch 2313 input shapes: input_ids=torch.Size([4, 75]), attention_mask=torch.Size([4, 75])


Predicting:  96%|█████████▋| 2314/2400 [1:22:46<01:56,  1.35s/it]

GPU memory after batch 2313: 14.97 GB
Batch 2314 input shapes: input_ids=torch.Size([4, 70]), attention_mask=torch.Size([4, 70])


Predicting:  96%|█████████▋| 2315/2400 [1:22:47<01:51,  1.32s/it]

GPU memory after batch 2314: 14.97 GB
Batch 2315 input shapes: input_ids=torch.Size([4, 79]), attention_mask=torch.Size([4, 79])


Predicting:  96%|█████████▋| 2316/2400 [1:22:49<01:50,  1.31s/it]

GPU memory after batch 2315: 14.97 GB
Batch 2316 input shapes: input_ids=torch.Size([4, 80]), attention_mask=torch.Size([4, 80])


Predicting:  97%|█████████▋| 2317/2400 [1:22:50<01:48,  1.31s/it]

GPU memory after batch 2316: 14.97 GB
Batch 2317 input shapes: input_ids=torch.Size([4, 93]), attention_mask=torch.Size([4, 93])


Predicting:  97%|█████████▋| 2318/2400 [1:22:51<01:50,  1.35s/it]

GPU memory after batch 2317: 14.97 GB
Batch 2318 input shapes: input_ids=torch.Size([4, 69]), attention_mask=torch.Size([4, 69])


Predicting:  97%|█████████▋| 2319/2400 [1:22:53<01:46,  1.32s/it]

GPU memory after batch 2318: 14.97 GB
Batch 2319 input shapes: input_ids=torch.Size([4, 78]), attention_mask=torch.Size([4, 78])


Predicting:  97%|█████████▋| 2320/2400 [1:22:54<01:45,  1.31s/it]

GPU memory after batch 2319: 14.97 GB
Batch 2320 input shapes: input_ids=torch.Size([4, 81]), attention_mask=torch.Size([4, 81])


Predicting:  97%|█████████▋| 2321/2400 [1:22:55<01:45,  1.34s/it]

批次 2320: 原始预测: ['No\nYes, because', 'No\nYes, because', 'No\nAnswer: No', 'No\n**Step-by']
GPU memory after batch 2320: 14.97 GB
Batch 2321 input shapes: input_ids=torch.Size([4, 88]), attention_mask=torch.Size([4, 88])


Predicting:  97%|█████████▋| 2322/2400 [1:22:57<01:45,  1.35s/it]

GPU memory after batch 2321: 14.97 GB
Batch 2322 input shapes: input_ids=torch.Size([4, 79]), attention_mask=torch.Size([4, 79])


Predicting:  97%|█████████▋| 2323/2400 [1:22:58<01:43,  1.34s/it]

GPU memory after batch 2322: 14.97 GB
Batch 2323 input shapes: input_ids=torch.Size([4, 89]), attention_mask=torch.Size([4, 89])


Predicting:  97%|█████████▋| 2324/2400 [1:22:59<01:44,  1.37s/it]

GPU memory after batch 2323: 14.97 GB
Batch 2324 input shapes: input_ids=torch.Size([4, 123]), attention_mask=torch.Size([4, 123])


Predicting:  97%|█████████▋| 2325/2400 [1:23:01<01:49,  1.46s/it]

GPU memory after batch 2324: 14.97 GB
Batch 2325 input shapes: input_ids=torch.Size([4, 125]), attention_mask=torch.Size([4, 125])


Predicting:  97%|█████████▋| 2326/2400 [1:23:03<01:52,  1.52s/it]

GPU memory after batch 2325: 14.97 GB
Batch 2326 input shapes: input_ids=torch.Size([4, 99]), attention_mask=torch.Size([4, 99])


Predicting:  97%|█████████▋| 2327/2400 [1:23:04<01:51,  1.52s/it]

GPU memory after batch 2326: 14.97 GB
Batch 2327 input shapes: input_ids=torch.Size([4, 82]), attention_mask=torch.Size([4, 82])


Predicting:  97%|█████████▋| 2328/2400 [1:23:06<01:46,  1.48s/it]

GPU memory after batch 2327: 14.97 GB
Batch 2328 input shapes: input_ids=torch.Size([4, 184]), attention_mask=torch.Size([4, 184])


Predicting:  97%|█████████▋| 2329/2400 [1:23:08<01:59,  1.69s/it]

GPU memory after batch 2328: 14.97 GB
Batch 2329 input shapes: input_ids=torch.Size([4, 69]), attention_mask=torch.Size([4, 69])


Predicting:  97%|█████████▋| 2330/2400 [1:23:09<01:48,  1.55s/it]

GPU memory after batch 2329: 14.97 GB
Batch 2330 input shapes: input_ids=torch.Size([4, 66]), attention_mask=torch.Size([4, 66])


Predicting:  97%|█████████▋| 2331/2400 [1:23:10<01:40,  1.46s/it]

批次 2330: 原始预测: ['No\nYes, because', 'No\nYes, because', 'No\nYes', 'No\nYes']
GPU memory after batch 2330: 14.97 GB
Batch 2331 input shapes: input_ids=torch.Size([4, 74]), attention_mask=torch.Size([4, 74])


Predicting:  97%|█████████▋| 2332/2400 [1:23:12<01:35,  1.41s/it]

GPU memory after batch 2331: 14.97 GB
Batch 2332 input shapes: input_ids=torch.Size([4, 59]), attention_mask=torch.Size([4, 59])


Predicting:  97%|█████████▋| 2333/2400 [1:23:13<01:29,  1.34s/it]

GPU memory after batch 2332: 14.97 GB
Batch 2333 input shapes: input_ids=torch.Size([4, 80]), attention_mask=torch.Size([4, 80])


Predicting:  97%|█████████▋| 2334/2400 [1:23:14<01:27,  1.33s/it]

GPU memory after batch 2333: 14.97 GB
Batch 2334 input shapes: input_ids=torch.Size([4, 85]), attention_mask=torch.Size([4, 85])


Predicting:  97%|█████████▋| 2335/2400 [1:23:16<01:27,  1.35s/it]

GPU memory after batch 2334: 14.97 GB
Batch 2335 input shapes: input_ids=torch.Size([4, 75]), attention_mask=torch.Size([4, 75])


Predicting:  97%|█████████▋| 2336/2400 [1:23:17<01:25,  1.33s/it]

GPU memory after batch 2335: 14.97 GB
Batch 2336 input shapes: input_ids=torch.Size([4, 77]), attention_mask=torch.Size([4, 77])


Predicting:  97%|█████████▋| 2337/2400 [1:23:18<01:23,  1.32s/it]

GPU memory after batch 2336: 14.97 GB
Batch 2337 input shapes: input_ids=torch.Size([4, 85]), attention_mask=torch.Size([4, 85])


Predicting:  97%|█████████▋| 2338/2400 [1:23:20<01:23,  1.34s/it]

GPU memory after batch 2337: 14.97 GB
Batch 2338 input shapes: input_ids=torch.Size([4, 80]), attention_mask=torch.Size([4, 80])


Predicting:  97%|█████████▋| 2339/2400 [1:23:21<01:21,  1.33s/it]

GPU memory after batch 2338: 14.97 GB
Batch 2339 input shapes: input_ids=torch.Size([4, 84]), attention_mask=torch.Size([4, 84])


Predicting:  98%|█████████▊| 2340/2400 [1:23:22<01:20,  1.35s/it]

GPU memory after batch 2339: 14.97 GB
Batch 2340 input shapes: input_ids=torch.Size([4, 80]), attention_mask=torch.Size([4, 80])


Predicting:  98%|█████████▊| 2341/2400 [1:23:24<01:18,  1.34s/it]

批次 2340: 原始预测: ['Yes\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because']
GPU memory after batch 2340: 14.97 GB
Batch 2341 input shapes: input_ids=torch.Size([4, 79]), attention_mask=torch.Size([4, 79])


Predicting:  98%|█████████▊| 2342/2400 [1:23:25<01:16,  1.33s/it]

GPU memory after batch 2341: 14.97 GB
Batch 2342 input shapes: input_ids=torch.Size([4, 70]), attention_mask=torch.Size([4, 70])


Predicting:  98%|█████████▊| 2343/2400 [1:23:26<01:14,  1.30s/it]

GPU memory after batch 2342: 14.97 GB
Batch 2343 input shapes: input_ids=torch.Size([4, 64]), attention_mask=torch.Size([4, 64])


Predicting:  98%|█████████▊| 2344/2400 [1:23:27<01:10,  1.26s/it]

GPU memory after batch 2343: 14.97 GB
Batch 2344 input shapes: input_ids=torch.Size([4, 93]), attention_mask=torch.Size([4, 93])


Predicting:  98%|█████████▊| 2345/2400 [1:23:29<01:12,  1.32s/it]

GPU memory after batch 2344: 14.97 GB
Batch 2345 input shapes: input_ids=torch.Size([4, 72]), attention_mask=torch.Size([4, 72])


Predicting:  98%|█████████▊| 2346/2400 [1:23:30<01:05,  1.21s/it]

GPU memory after batch 2345: 14.97 GB
Batch 2346 input shapes: input_ids=torch.Size([4, 90]), attention_mask=torch.Size([4, 90])


Predicting:  98%|█████████▊| 2347/2400 [1:23:31<01:07,  1.28s/it]

GPU memory after batch 2346: 14.97 GB
Batch 2347 input shapes: input_ids=torch.Size([4, 74]), attention_mask=torch.Size([4, 74])


Predicting:  98%|█████████▊| 2348/2400 [1:23:32<01:06,  1.29s/it]

GPU memory after batch 2347: 14.97 GB
Batch 2348 input shapes: input_ids=torch.Size([4, 72]), attention_mask=torch.Size([4, 72])


Predicting:  98%|█████████▊| 2349/2400 [1:23:34<01:04,  1.27s/it]

GPU memory after batch 2348: 14.97 GB
Batch 2349 input shapes: input_ids=torch.Size([4, 105]), attention_mask=torch.Size([4, 105])


Predicting:  98%|█████████▊| 2350/2400 [1:23:35<01:08,  1.37s/it]

GPU memory after batch 2349: 14.97 GB
Batch 2350 input shapes: input_ids=torch.Size([4, 69]), attention_mask=torch.Size([4, 69])


Predicting:  98%|█████████▊| 2351/2400 [1:23:36<01:05,  1.33s/it]

批次 2350: 原始预测: ['No\nYes', 'Yes\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because']
GPU memory after batch 2350: 14.97 GB
Batch 2351 input shapes: input_ids=torch.Size([4, 105]), attention_mask=torch.Size([4, 105])


Predicting:  98%|█████████▊| 2352/2400 [1:23:38<01:07,  1.41s/it]

GPU memory after batch 2351: 14.97 GB
Batch 2352 input shapes: input_ids=torch.Size([4, 175]), attention_mask=torch.Size([4, 175])


Predicting:  98%|█████████▊| 2353/2400 [1:23:40<01:16,  1.63s/it]

GPU memory after batch 2352: 14.97 GB
Batch 2353 input shapes: input_ids=torch.Size([4, 64]), attention_mask=torch.Size([4, 64])


Predicting:  98%|█████████▊| 2354/2400 [1:23:41<01:08,  1.49s/it]

GPU memory after batch 2353: 14.97 GB
Batch 2354 input shapes: input_ids=torch.Size([4, 74]), attention_mask=torch.Size([4, 74])


Predicting:  98%|█████████▊| 2355/2400 [1:23:43<01:04,  1.44s/it]

GPU memory after batch 2354: 14.97 GB
Batch 2355 input shapes: input_ids=torch.Size([4, 81]), attention_mask=torch.Size([4, 81])


Predicting:  98%|█████████▊| 2356/2400 [1:23:44<01:02,  1.42s/it]

GPU memory after batch 2355: 14.97 GB
Batch 2356 input shapes: input_ids=torch.Size([4, 82]), attention_mask=torch.Size([4, 82])


Predicting:  98%|█████████▊| 2357/2400 [1:23:45<01:00,  1.41s/it]

GPU memory after batch 2356: 14.97 GB
Batch 2357 input shapes: input_ids=torch.Size([4, 60]), attention_mask=torch.Size([4, 60])


Predicting:  98%|█████████▊| 2358/2400 [1:23:47<00:56,  1.34s/it]

GPU memory after batch 2357: 14.97 GB
Batch 2358 input shapes: input_ids=torch.Size([4, 83]), attention_mask=torch.Size([4, 83])


Predicting:  98%|█████████▊| 2359/2400 [1:23:48<00:55,  1.35s/it]

GPU memory after batch 2358: 14.97 GB
Batch 2359 input shapes: input_ids=torch.Size([4, 85]), attention_mask=torch.Size([4, 85])


Predicting:  98%|█████████▊| 2360/2400 [1:23:49<00:54,  1.36s/it]

GPU memory after batch 2359: 14.97 GB
Batch 2360 input shapes: input_ids=torch.Size([4, 66]), attention_mask=torch.Size([4, 66])


Predicting:  98%|█████████▊| 2361/2400 [1:23:51<00:51,  1.33s/it]

批次 2360: 原始预测: ['No\nYes, because', 'No\nYes, because', 'No\nExplanation: The', 'No\nAnswer: Yes']
GPU memory after batch 2360: 14.97 GB
Batch 2361 input shapes: input_ids=torch.Size([4, 83]), attention_mask=torch.Size([4, 83])


Predicting:  98%|█████████▊| 2362/2400 [1:23:52<00:51,  1.34s/it]

GPU memory after batch 2361: 14.97 GB
Batch 2362 input shapes: input_ids=torch.Size([4, 80]), attention_mask=torch.Size([4, 80])


Predicting:  98%|█████████▊| 2363/2400 [1:23:53<00:49,  1.33s/it]

GPU memory after batch 2362: 14.97 GB
Batch 2363 input shapes: input_ids=torch.Size([4, 85]), attention_mask=torch.Size([4, 85])


Predicting:  98%|█████████▊| 2364/2400 [1:23:55<00:48,  1.35s/it]

GPU memory after batch 2363: 14.97 GB
Batch 2364 input shapes: input_ids=torch.Size([4, 85]), attention_mask=torch.Size([4, 85])


Predicting:  99%|█████████▊| 2365/2400 [1:23:56<00:47,  1.36s/it]

GPU memory after batch 2364: 14.97 GB
Batch 2365 input shapes: input_ids=torch.Size([4, 85]), attention_mask=torch.Size([4, 85])


Predicting:  99%|█████████▊| 2366/2400 [1:23:57<00:46,  1.37s/it]

GPU memory after batch 2365: 14.97 GB
Batch 2366 input shapes: input_ids=torch.Size([4, 69]), attention_mask=torch.Size([4, 69])


Predicting:  99%|█████████▊| 2367/2400 [1:23:59<00:43,  1.33s/it]

GPU memory after batch 2366: 14.97 GB
Batch 2367 input shapes: input_ids=torch.Size([4, 80]), attention_mask=torch.Size([4, 80])


Predicting:  99%|█████████▊| 2368/2400 [1:24:00<00:42,  1.32s/it]

GPU memory after batch 2367: 14.97 GB
Batch 2368 input shapes: input_ids=torch.Size([4, 79]), attention_mask=torch.Size([4, 79])


Predicting:  99%|█████████▊| 2369/2400 [1:24:01<00:40,  1.32s/it]

GPU memory after batch 2368: 14.97 GB
Batch 2369 input shapes: input_ids=torch.Size([4, 76]), attention_mask=torch.Size([4, 76])


Predicting:  99%|█████████▉| 2370/2400 [1:24:03<00:39,  1.31s/it]

GPU memory after batch 2369: 14.97 GB
Batch 2370 input shapes: input_ids=torch.Size([4, 88]), attention_mask=torch.Size([4, 88])


Predicting:  99%|█████████▉| 2371/2400 [1:24:04<00:38,  1.34s/it]

批次 2370: 原始预测: ['No\nYes, because', 'No\nYes, because', 'No\nExplanation: The', 'No\nYes\nYes']
GPU memory after batch 2370: 14.97 GB
Batch 2371 input shapes: input_ids=torch.Size([4, 79]), attention_mask=torch.Size([4, 79])


Predicting:  99%|█████████▉| 2372/2400 [1:24:05<00:37,  1.33s/it]

GPU memory after batch 2371: 14.97 GB
Batch 2372 input shapes: input_ids=torch.Size([4, 88]), attention_mask=torch.Size([4, 88])


Predicting:  99%|█████████▉| 2373/2400 [1:24:07<00:36,  1.35s/it]

GPU memory after batch 2372: 14.97 GB
Batch 2373 input shapes: input_ids=torch.Size([4, 74]), attention_mask=torch.Size([4, 74])


Predicting:  99%|█████████▉| 2374/2400 [1:24:08<00:34,  1.33s/it]

GPU memory after batch 2373: 14.97 GB
Batch 2374 input shapes: input_ids=torch.Size([4, 72]), attention_mask=torch.Size([4, 72])


Predicting:  99%|█████████▉| 2375/2400 [1:24:09<00:32,  1.30s/it]

GPU memory after batch 2374: 14.97 GB
Batch 2375 input shapes: input_ids=torch.Size([4, 76]), attention_mask=torch.Size([4, 76])


Predicting:  99%|█████████▉| 2376/2400 [1:24:11<00:31,  1.30s/it]

GPU memory after batch 2375: 14.97 GB
Batch 2376 input shapes: input_ids=torch.Size([4, 165]), attention_mask=torch.Size([4, 165])


Predicting:  99%|█████████▉| 2377/2400 [1:24:13<00:35,  1.54s/it]

GPU memory after batch 2376: 14.97 GB
Batch 2377 input shapes: input_ids=torch.Size([4, 80]), attention_mask=torch.Size([4, 80])


Predicting:  99%|█████████▉| 2378/2400 [1:24:14<00:32,  1.47s/it]

GPU memory after batch 2377: 14.97 GB
Batch 2378 input shapes: input_ids=torch.Size([4, 70]), attention_mask=torch.Size([4, 70])


Predicting:  99%|█████████▉| 2379/2400 [1:24:15<00:29,  1.40s/it]

GPU memory after batch 2378: 14.97 GB
Batch 2379 input shapes: input_ids=torch.Size([4, 66]), attention_mask=torch.Size([4, 66])


Predicting:  99%|█████████▉| 2380/2400 [1:24:16<00:26,  1.35s/it]

GPU memory after batch 2379: 14.97 GB
Batch 2380 input shapes: input_ids=torch.Size([4, 113]), attention_mask=torch.Size([4, 113])


Predicting:  99%|█████████▉| 2381/2400 [1:24:18<00:27,  1.43s/it]

批次 2380: 原始预测: ['Yes', 'Yes', 'No\nYes, because', 'No\nYes, because']
GPU memory after batch 2380: 14.97 GB
Batch 2381 input shapes: input_ids=torch.Size([4, 71]), attention_mask=torch.Size([4, 71])


Predicting:  99%|█████████▉| 2382/2400 [1:24:19<00:24,  1.37s/it]

GPU memory after batch 2381: 14.97 GB
Batch 2382 input shapes: input_ids=torch.Size([4, 87]), attention_mask=torch.Size([4, 87])


Predicting:  99%|█████████▉| 2383/2400 [1:24:21<00:23,  1.38s/it]

GPU memory after batch 2382: 14.97 GB
Batch 2383 input shapes: input_ids=torch.Size([4, 83]), attention_mask=torch.Size([4, 83])


Predicting:  99%|█████████▉| 2384/2400 [1:24:22<00:22,  1.38s/it]

GPU memory after batch 2383: 14.97 GB
Batch 2384 input shapes: input_ids=torch.Size([4, 64]), attention_mask=torch.Size([4, 64])


Predicting:  99%|█████████▉| 2385/2400 [1:24:23<00:19,  1.32s/it]

GPU memory after batch 2384: 14.97 GB
Batch 2385 input shapes: input_ids=torch.Size([4, 72]), attention_mask=torch.Size([4, 72])


Predicting:  99%|█████████▉| 2386/2400 [1:24:24<00:18,  1.29s/it]

GPU memory after batch 2385: 14.97 GB
Batch 2386 input shapes: input_ids=torch.Size([4, 82]), attention_mask=torch.Size([4, 82])


Predicting:  99%|█████████▉| 2387/2400 [1:24:26<00:17,  1.32s/it]

GPU memory after batch 2386: 14.97 GB
Batch 2387 input shapes: input_ids=torch.Size([4, 68]), attention_mask=torch.Size([4, 68])


Predicting: 100%|█████████▉| 2388/2400 [1:24:27<00:15,  1.30s/it]

GPU memory after batch 2387: 14.97 GB
Batch 2388 input shapes: input_ids=torch.Size([4, 82]), attention_mask=torch.Size([4, 82])


Predicting: 100%|█████████▉| 2389/2400 [1:24:28<00:14,  1.32s/it]

GPU memory after batch 2388: 14.97 GB
Batch 2389 input shapes: input_ids=torch.Size([4, 82]), attention_mask=torch.Size([4, 82])


Predicting: 100%|█████████▉| 2390/2400 [1:24:30<00:13,  1.34s/it]

GPU memory after batch 2389: 14.97 GB
Batch 2390 input shapes: input_ids=torch.Size([4, 79]), attention_mask=torch.Size([4, 79])


Predicting: 100%|█████████▉| 2391/2400 [1:24:31<00:11,  1.33s/it]

批次 2390: 原始预测: ['No\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because', 'Yes\nYes, because']
GPU memory after batch 2390: 14.97 GB
Batch 2391 input shapes: input_ids=torch.Size([4, 110]), attention_mask=torch.Size([4, 110])


Predicting: 100%|█████████▉| 2392/2400 [1:24:33<00:11,  1.41s/it]

GPU memory after batch 2391: 14.97 GB
Batch 2392 input shapes: input_ids=torch.Size([4, 78]), attention_mask=torch.Size([4, 78])


Predicting: 100%|█████████▉| 2393/2400 [1:24:34<00:09,  1.38s/it]

GPU memory after batch 2392: 14.97 GB
Batch 2393 input shapes: input_ids=torch.Size([4, 85]), attention_mask=torch.Size([4, 85])


Predicting: 100%|█████████▉| 2394/2400 [1:24:35<00:08,  1.38s/it]

GPU memory after batch 2393: 14.97 GB
Batch 2394 input shapes: input_ids=torch.Size([4, 71]), attention_mask=torch.Size([4, 71])


Predicting: 100%|█████████▉| 2395/2400 [1:24:37<00:06,  1.34s/it]

GPU memory after batch 2394: 14.97 GB
Batch 2395 input shapes: input_ids=torch.Size([4, 71]), attention_mask=torch.Size([4, 71])


Predicting: 100%|█████████▉| 2396/2400 [1:24:38<00:05,  1.31s/it]

GPU memory after batch 2395: 14.97 GB
Batch 2396 input shapes: input_ids=torch.Size([4, 59]), attention_mask=torch.Size([4, 59])


Predicting: 100%|█████████▉| 2397/2400 [1:24:39<00:03,  1.27s/it]

GPU memory after batch 2396: 14.97 GB
Batch 2397 input shapes: input_ids=torch.Size([4, 70]), attention_mask=torch.Size([4, 70])


Predicting: 100%|█████████▉| 2398/2400 [1:24:40<00:02,  1.26s/it]

GPU memory after batch 2397: 14.97 GB
Batch 2398 input shapes: input_ids=torch.Size([4, 64]), attention_mask=torch.Size([4, 64])


Predicting: 100%|█████████▉| 2399/2400 [1:24:42<00:01,  1.23s/it]

GPU memory after batch 2398: 14.97 GB
Batch 2399 input shapes: input_ids=torch.Size([4, 66]), attention_mask=torch.Size([4, 66])


Predicting: 100%|██████████| 2400/2400 [1:24:43<00:00,  2.12s/it]

GPU memory after batch 2399: 14.97 GB





预测结果已保存到 /kaggle/working/predictions.jsonl


In [1]:
import json
from collections import Counter, defaultdict
from typing import List, Dict, Tuple

In [2]:
def parse_prediction(raw_prediction: str) -> str:
    raw_prediction = raw_prediction.lower()
    if 'yes' in raw_prediction:
        return 'yes'
    elif 'no' in raw_prediction:
        return 'no'
    else:
        # print(f"无法解析响应: {raw_prediction}")
        return 'generation_error'

def evaluate_model(data_list: List[Dict], unparsed_output_file: str = 'unparsed_predictions.json') -> Tuple[Dict, Dict]:
    metrics = Counter()
    detailed_metrics = {
        'by_domain': defaultdict(Counter),
        'by_error_type': defaultdict(Counter),
        'by_operation': defaultdict(Counter),
        'by_prompt_type': defaultdict(Counter)
    }
    unparsed_data = {}  # 存储无法解析的样本，格式为 {id: {...}}
    
    for idx, item in enumerate(data_list):
        expected = item['expected_answer'].lower()  # Yes/No 转为小写
        pred = parse_prediction(item['raw_prediction'])
        item['parsel_prediction'] = pred  # 保存解析结果
        
        # 如果无法解析，添加到 unparsed_data
        if pred == 'generation_error':
            # 只保存 expected_answer == "Yes" 的样本（错误样本）
            if expected == 'yes':
                sample_id = f"unparsed_{idx}"
                unparsed_data[sample_id] = {
                    "error_number": item['number'],
                    "error_passage": item['passage'],
                    "dataset": item['dataset'],
                    "operation": item['operation'],
                    "error_annotation": item['error_annotation'],
                    # 以下字段需补充（若有正确数据）
                    "correct_number": "",  # 需手动补充或从原始数据推导
                    "correct_passage": ""  # 需手动补充或从原始数据推导
                }
        
        domain = item['dataset']
        operation = item['operation']
        prompt_type = item['prompt_type']
        error_types = [k for k, v in item['error_annotation'].items() if v > 0]
        
        # 计算总体指标
        if pred == expected:
            if expected == 'yes':
                metrics['TP'] += 1
                for et in error_types:
                    detailed_metrics['by_error_type'][et]['TP'] += 1
                detailed_metrics['by_domain'][domain]['TP'] += 1
                detailed_metrics['by_operation'][operation]['TP'] += 1
                detailed_metrics['by_prompt_type'][prompt_type]['TP'] += 1
            else:  # expected == 'no'
                metrics['TN'] += 1
                for et in error_types:
                    detailed_metrics['by_error_type'][et]['TN'] += 1
                detailed_metrics['by_domain'][domain]['TN'] += 1
                detailed_metrics['by_operation'][operation]['TN'] += 1
                detailed_metrics['by_prompt_type'][prompt_type]['TN'] += 1
        else:
            if expected == 'yes':
                metrics['FN'] += 1
                for et in error_types:
                    detailed_metrics['by_error_type'][et]['FN'] += 1
                detailed_metrics['by_domain'][domain]['FN'] += 1
                detailed_metrics['by_operation'][operation]['FN'] += 1
                detailed_metrics['by_prompt_type'][prompt_type]['FN'] += 1
            else:  # expected == 'no'
                metrics['FP'] += 1
                for et in error_types:
                    detailed_metrics['by_error_type'][et]['FP'] += 1
                detailed_metrics['by_domain'][domain]['FP'] += 1
                detailed_metrics['by_operation'][operation]['FP'] += 1
                detailed_metrics['by_prompt_type'][prompt_type]['FP'] += 1
        
        if pred == 'generation_error':
            metrics['Generation Error'] += 1
            for et in error_types:
                detailed_metrics['by_error_type'][et]['Generation Error'] += 1
            detailed_metrics['by_domain'][domain]['Generation Error'] += 1
            detailed_metrics['by_operation'][operation]['Generation Error'] += 1
            detailed_metrics['by_prompt_type'][prompt_type]['Generation Error'] += 1
    
    # 保存无法解析的数据到 JSON
    if unparsed_data:
        with open(unparsed_output_file, 'w', encoding='utf-8') as f:
            json.dump(unparsed_data, f, indent=2, ensure_ascii=False)
        # print(f"无法解析的 {len(unparsed_data)} 条数据已保存到 {unparsed_output_file}")
        print("注意：JSON 文件仅包含 expected_answer='Yes' 的样本，correct_number 和 correct_passage 需手动补充")
    else:
        print("没有无法解析的数据")
    
    total = len(data_list)
    metrics['Accuracy'] = (metrics['TP'] + metrics['TN']) / total if total > 0 else 0
    return metrics, detailed_metrics

In [3]:
# 读取 predictions.jsonl
data_list = []
input_file = 'deepseek_predictions.jsonl'  # 确认路径
with open(input_file, 'r', encoding='utf-8') as f:
    for line in f:
        data = json.loads(line)
        data_list.append(data)

# 评测模型并保存无法解析的数据
unparsed_output_file = 'deepseek_unparsed_predictions.json'
metrics, detailed_metrics = evaluate_model(data_list, unparsed_output_file)

# 打印总体指标
print("\nOverall Metrics:")
total = len(data_list)
for key, value in metrics.items():
    if key == 'Accuracy':
        print(f"{key}: {value:.3f}")
    else:
        print(f"{key}: {value} ({value / total:.3f})")

# 打印分维度指标
print("\nMetrics by Domain:")
for domain, counts in detailed_metrics['by_domain'].items():
    print(f"{domain}: {dict(counts)}")

print("\nMetrics by Error Type:")
for error_type, counts in detailed_metrics['by_error_type'].items():
    print(f"{error_type}: {dict(counts)}")

print("\nMetrics by Operation:")
for operation, counts in detailed_metrics['by_operation'].items():
    print(f"{operation}: {dict(counts)}")

print("\nMetrics by Prompt Type:")
for prompt_type, counts in detailed_metrics['by_prompt_type'].items():
    print(f"{prompt_type}: {dict(counts)}")

注意：JSON 文件仅包含 expected_answer='Yes' 的样本，correct_number 和 correct_passage 需手动补充

Overall Metrics:
FP: 4207 (0.438)
TP: 3849 (0.401)
TN: 593 (0.062)
FN: 951 (0.099)
Generation Error: 509 (0.053)
Accuracy: 0.463

Metrics by Domain:
Numeracy_600K_article_title: {'FP': 860, 'TP': 822, 'TN': 140, 'FN': 178}
aclsent: {'FP': 905, 'TP': 905, 'FN': 43, 'TN': 43}
DROP: {'TN': 180, 'FN': 474, 'FP': 814, 'TP': 520, 'Generation Error': 499}
qa-text-source-comparison: {'TN': 67, 'FN': 92, 'FP': 857, 'TP': 832, 'Generation Error': 8}
FinNum: {'FP': 771, 'TP': 770, 'TN': 163, 'FN': 164, 'Generation Error': 2}

Metrics by Error Type:
Error in Number Relationships: {'FP': 172, 'TP': 161, 'TN': 24, 'FN': 35, 'Generation Error': 4}
Undetectable Error: {'FP': 395, 'TP': 387, 'TN': 69, 'FN': 77, 'Generation Error': 14}
Type Error: {'FP': 436, 'TP': 415, 'TN': 82, 'FN': 103, 'Generation Error': 10}
Anomaly: {'FP': 203, 'TP': 186, 'TN': 27, 'FN': 44, 'Generation Error': 10}
Improper Data: {'FP': 26, 'TP': 27, 