In [None]:
# install libraries

# !pip install peft
# !pip install transformers

Successfully installed nvidia-cublas-cu12-12.1.3.1 nvidia-cuda-cupti-cu12-12.1.105 nvidia-cuda-nvrtc-cu12-12.1.105 nvidia-cuda-runtime-cu12-12.1.105 nvidia-cudnn-cu12-8.9.2.26 nvidia-cufft-cu12-11.0.2.54 nvidia-curand-cu12-10.3.2.106 nvidia-cusolver-cu12-11.4.5.107 nvidia-cusparse-cu12-12.1.0.106 nvidia-nccl-cu12-2.20.5 nvidia-nvjitlink-cu12-12.5.82 nvidia-nvtx-cu12-12.1.105 peft-0.11.1


# Load Processor and Model

In [None]:
# Imports
import torch
from PIL import Image
from transformers import AutoProcessor, BitsAndBytesConfig, Idefics2ForConditionalGeneration
import json
import time
from distance import levenshtein
from peft import LoraConfig


  from .autonotebook import tqdm as notebook_tqdm


In [None]:
# device setup
device = "cuda" if torch.cuda.is_available() else "cpu"
#device = torch.device('cpu')
device

'cuda'

In [None]:
# from transformers import Idefics2ForConditionalGeneration, BitsAndBytesConfig

# # Configure bitsandbytes for 8-bit loading
# bnb_config = BitsAndBytesConfig(
#     load_in_8bit=True,  # Enable 8-bit quantization
#     bnb_8bit_use_double_quant=True,  # Optional: Enable double quantization for better precision
#     bnb_8bit_quant_type="nf4",  # Optional: Use 'nf4' (non-finite 4-bit) quantization type
# )

# # Load the model with 8-bit precision
# model = Idefics2ForConditionalGeneration.from_pretrained(
#     "Reverb/Idefics2-8b-docVQA-finetuned",
#     quantization_config=bnb_config,
#     torch_dtype=torch.float16,  # Use FP16 for non-quantized parts
#     device_map="auto"  # Automatically place the model on the appropriate device
# )

# # Move the model to the correct device (e.g., GPU)
# model.to(device)


In [None]:
# Load Processor
processor = AutoProcessor.from_pretrained("HuggingFaceM4/idefics2-8b", do_image_splitting = False)

# Processor attributes
processor.attributes

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


['image_processor', 'tokenizer']

In [None]:
import torch
from peft import LoraConfig
from transformers import AutoProcessor, BitsAndBytesConfig, Idefics2ForConditionalGeneration

DEVICE = "cuda:0"
USE_LORA = False
USE_QLORA = True


# Three options for training, from the lowest precision training to the highest precision training:
# - QLora
# - Standard Lora
# - Full fine-tuning
if USE_QLORA or USE_LORA:
    lora_Config = LoraConfig(
        r=8,
        lora_alpha=8,
        lora_dropout=0.1,
        target_modules='.*(text_model|modality_projection|perceiver_resampler).*(down_proj|gate_proj|up_proj|k_proj|q_proj|v_proj|o_proj).*$',
        use_dora=False if USE_QLORA else True,
        init_lora_weights="gaussian"
    )
    if USE_QLORA:
        bnb_config = BitsAndBytesConfig(
            load_in_4bit=True,
            bnb_4bit_quant_type="nf4",
            bnb_4bit_compute_dtype=torch.float16
        )
    model = Idefics2ForConditionalGeneration.from_pretrained(
        "HuggingFaceM4/idefics2-8b",
        torch_dtype=torch.float16,
        quantization_config=bnb_config if USE_QLORA else None,
    )


    model.add_adapter(lora_Config)
    model.enable_adapters()
else:
    model = Idefics2ForConditionalGeneration.from_pretrained(
        "HuggingFaceM4/idefics2-8b",
        torch_dtype=torch.float16,
        _attn_implementation="flash_attention_2", # Only available on A100 or H100
    ).to(DEVICE)



`low_cpu_mem_usage` was None, now set to True since model is quantized.
Loading checkpoint shards: 100%|█████████████████████████████████████████████████████████| 7/7 [00:23<00:00,  3.30s/it]


In [None]:
# # Load the fine-tuned model
# model = Idefics2ForConditionalGeneration.from_pretrained(
#     r"E:\Abdul_Muqtadir\Thesis\Idefics2\checkpoint-1600",  # Replace with the path to your fine-tuned model
#     torch_dtype=torch.float16,
# )

# # If you used LoRA or QLORA, you might need to enable adapters
# if USE_LORA or USE_QLORA:
#     model.enable_adapters()

# # Move the model to the desired device
# model.to(DEVICE)

In [None]:
# from transformers import BitsAndBytesConfig, AutoModelForVision2Seq

# quantization_config = BitsAndBytesConfig(
#     load_in_4bit=True,
#     bnb_4bit_quant_type="nf4",
#     bnb_4bit_use_double_quant=True,
#     bnb_4bit_compute_dtype=torch.float16
# )
# model = AutoModelForVision2Seq.from_pretrained(
#     "SalmanFaroz/idefics2-8b-DocVQA-SP",
#     torch_dtype=torch.float16,
#     quantization_config=quantization_config,
# ).to(device)


In [None]:
print('MODEL')
print('-----------------------------------------------------------------------------------------------------------')
print(model.num_parameters())


MODEL
-----------------------------------------------------------------------------------------------------------
8426094832


## Model Study



In [None]:
print('MODEL')
print('-----------------------------------------------------------------------------------------------------------')
print(model.num_parameters())
print('VISION MODEL')
print('-----------------------------------------------------------------------------------------------------------')
print('ENCODER EMBEDDING')
print(model.base_model.vision_model.embeddings)
print('-----------------------------------------------------------------------------------------------------------')
print('ENCODER')
print(model.base_model.vision_model.encoder)
print('-----------------------------------------------------------------------------------------------------------')
print('POST LAYER NORMALIZATION')
print(model.base_model.vision_model.post_layernorm)
print('-----------------------------------------------------------------------------------------------------------')
print('')
print('TEXT MODEL')
print('-----------------------------------------------------------------------------------------------------------')
print('DECODER EMBEDDING')
print(model.base_model.text_model.embed_tokens)
print('-----------------------------------------------------------------------------------------------------------')
print('DECODER')
print(model.base_model.text_model.layers)
print('-----------------------------------------------------------------------------------------------------------')
print('DECODER LAYER NORMALIZATION')
print(model.base_model.text_model.norm)
print('-----------------------------------------------------------------------------------------------------------')
print('')
print('LM HEAD')
print('-----------------------------------------------------------------------------------------------------------')
print('FINAL LINEAR LAYER')
print(model.lm_head)
print('-----------------------------------------------------------------------------------------------------------')

MODEL
-----------------------------------------------------------------------------------------------------------
8426094832
VISION MODEL
-----------------------------------------------------------------------------------------------------------
ENCODER EMBEDDING
Idefics2VisionEmbeddings(
  (patch_embedding): Conv2d(3, 1152, kernel_size=(14, 14), stride=(14, 14), padding=valid)
  (position_embedding): Embedding(4900, 1152)
)
-----------------------------------------------------------------------------------------------------------
ENCODER
Idefics2Encoder(
  (layers): ModuleList(
    (0-26): 27 x Idefics2EncoderLayer(
      (self_attn): Idefics2VisionAttention(
        (k_proj): Linear(in_features=1152, out_features=1152, bias=True)
        (v_proj): Linear(in_features=1152, out_features=1152, bias=True)
        (q_proj): Linear(in_features=1152, out_features=1152, bias=True)
        (out_proj): Linear(in_features=1152, out_features=1152, bias=True)
      )
      (layer_norm1): LayerNor

# Load Dataset

In [None]:
from datasets import load_dataset, load_from_disk

#dataset = load_from_disk("Desktop/Abdul_Muqtadir/Thesis/Dataset/Dataset1")

# If the dataset is gated/private, make sure you have run huggingface-cli login
#dataset = load_dataset("AbdulMuqtadir/DocVQA_Processed_Dataset")

# Compelete Dataset locally stored in GPU
dataset = load_from_disk("E:\Abdul_Muqtadir\Thesis\Dataset\Processed_Dataset2")

# Subset of data locally stored in GPU
#dataset = load_from_disk("E:\Abdul_Muqtadir\Thesis\Dataset\subset")
dataset

DatasetDict({
    train: Dataset({
        features: ['question', 'answers', 'image_raw', 'ground_truth'],
        num_rows: 30000
    })
    distill: Dataset({
        features: ['question', 'answers', 'image_raw', 'ground_truth'],
        num_rows: 9463
    })
    test: Dataset({
        features: ['question', 'answers', 'image_raw', 'ground_truth'],
        num_rows: 2500
    })
    valid: Dataset({
        features: ['question', 'answers', 'image_raw', 'ground_truth'],
        num_rows: 2849
    })
})

In [None]:
train_dataset = dataset['train']
#eval_dataset = dataset['valid']

In [None]:
eval_dataset = load_from_disk("E:\Abdul_Muqtadir\Thesis\Dataset\eval_subset")

In [None]:
len(train_dataset), len(eval_dataset)

(30000, 1500)

# Training

## Dataset Class

In [None]:
import random

class MyDataCollator:
    def __init__(self, processor):
        self.processor = processor
        self.image_token_id = processor.tokenizer.additional_special_tokens_ids[
            processor.tokenizer.additional_special_tokens.index("<image>")
        ]

    def __call__(self, examples):
        texts = []
        images = []
        for example in examples:




            GT = json.loads(example['ground_truth'])
            question = GT['gt_parses'][0]['question']
            answer = GT['gt_parses'][0]['answer']
            image = example['image_raw']

            #image = example["image_raw"]
            #question = example["query"]["en"]
            #answer = random.choice(example["answers"])
            messages = [
                {
                    "role": "user",
                    "content": [
                        {"type": "text", "text": "Answer briefly. short and key, value based"},
                        {"type": "image"},
                        {"type": "text", "text": question}
                    ]
                },
                {
                    "role": "assistant",
                    "content": [
                        {"type": "text", "text": answer}
                    ]
                }
            ]
            text = processor.apply_chat_template(messages, add_generation_prompt=False)
            texts.append(text.strip())
            images.append([image])

        batch = processor(text=texts, images=images, return_tensors="pt", padding=True)

        labels = batch["input_ids"].clone()
        labels[labels == processor.tokenizer.pad_token_id] = self.image_token_id
        batch["labels"] = labels

        return batch




In [None]:
data_collator = MyDataCollator(processor)

## Training Loop

In [None]:
# Create Trainer
#from pytorch_lightning.loggers import WandbLogger

#wandb_logger = WandbLogger(project="NRPU", name="DONUT_FT_EUGD_7000")
#wandb_logger = WandbLogger(project="DONUT", name="Distil_DONUT_Realdata")



import wandb
from pytorch_lightning.loggers import WandbLogger

wandb.finish()
wandb.init(project="DocVQA_with_VLM", name="Idefics2_with_validation_final4")
wandb_logger = WandbLogger()



wandb: Currently logged in as: studydrive-ee (abdul1). Use `wandb login --relogin` to force relogin


In [None]:
len(eval_dataset)

1500

In [None]:
# import random

# # Shuffle the dataset and select the first 1200 examples
# eval_dataset = eval_dataset.shuffle(seed=random.randint(0, 1000)).select(range(1500))


In [None]:
len(eval_dataset)

1500

In [None]:
# from datasets import dataset_dict
# eval_dataset.save_to_disk("E:\Abdul_Muqtadir\Thesis\Dataset\eval_subset")

Saving the dataset (3/3 shards): 100%|█████████████████████████████████████| 1500/1500 [00:08<00:00, 186.08 examples/s]


In [None]:
from transformers import TrainingArguments, Trainer

training_args = TrainingArguments(
    output_dir= r"E:\Abdul_Muqtadir\Thesis\Idefics2\model save directory",
    num_train_epochs=5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=8,
    gradient_accumulation_steps=4,
    warmup_steps=50,
    learning_rate=1e-4,
    weight_decay=0.001,
    logging_steps=50,
    evaluation_strategy="steps",
    eval_steps=400,
    save_strategy="steps",
    save_steps=100,
    save_total_limit=15,
    #load_best_model_at_end=True,
    fp16=True,
    #push_to_hub_model_id="idefics2-8b-docvqa-finetuned-tutorial",
    remove_unused_columns=False,
    report_to="wandb",
)




In [None]:
print('hell')



True

In [None]:
wandb.login(key='64b4c0e264e78227e0ef7ffd55e5236e5665e193')

In [None]:
# from peft import get_peft_model
# model = get_peft_model(model)

In [None]:

trainer = Trainer(
    model=model,
    args=training_args,
    data_collator=data_collator,
    train_dataset=train_dataset,
    #eval_dataset=eval_dataset, # You can also evaluate (loss) on the eval set, note that it will incur some additional GPU memory
)


  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


## Trainer

In [None]:
trainer.train()

KeyboardInterrupt: 

# Inference


In [None]:
Test_dataset = dataset['test']
Test_dataset

Dataset({
    features: ['question', 'answers', 'image_raw', 'ground_truth'],
    num_rows: 2500
})

In [None]:
Example = Test_dataset[0]
Example

{'question': 'What is the no of copies of the report distributed in the regional desk?',
 'answers': ['2 copies', '2'],
 'image_raw': <PIL.PngImagePlugin.PngImageFile image mode=L size=1684x2189>,
 'ground_truth': '{"gt_parses": [{"question" : "What is the no of copies of the report distributed in the regional desk?", "answer" : "2 copies"}, {"question" : "What is the no of copies of the report distributed in the regional desk?", "answer" : "2"}]}'}

In [None]:
Example['image_raw']

In [None]:
def Inference(Model, Processor, Image, Query):

    messages = [{
    "role": "user",
    "content": [
        {"type": "text", "text": "Answer briefly. short and key, value based"},
        {"type": "image"},
        {"type": "text", "text": Query}
    ],
    }]


    text = processor.apply_chat_template(messages, add_generation_prompt=True)
    #print(text)

    inputs = processor(text, images=Image, return_tensors="pt")
    inputs.to(device)

    generated_ids = model.generate(**inputs, max_new_tokens=1024)
    generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]

    return generated_text




In [None]:
Example = Test_dataset[3]
# img = Example['image_raw']
# query = Example['question']


# for i in range(len(Test_dataset)):
#     Sample = Test_dataset[i]


GT = json.loads(Example['ground_truth'])
question = GT['gt_parses'][0]['question']
print("question is :", question)
target = GT['gt_parses'][0]['answer']
print("Answer is:", target)
image = Example['image_raw']





A = Inference(model, processor, image, question)
#print(Q)
print("Prediction", A)

question is : What is the cost of TIP STEAK?
Answer is: $1.69
Prediction User: Answer briefly. short and key, value based What is the cost of TIP STEAK? 
Assistant: $1 69.


In [None]:
# for k, v in Example.items():
#     print(k)

# Evaluation

In [None]:
model = Idefics2ForConditionalGeneration.from_pretrained(
    r"E:\Abdul_Muqtadir\Thesis\Idefics2\model save directory\checkpoint-300",
    torch_dtype=torch.float16,
)

Loading checkpoint shards: 100%|█████████████████████████████████████████████████████████| 7/7 [00:27<00:00,  3.97s/it]


In [None]:
print(model.num_parameters())

8426094832


In [None]:
model.to(device)

Idefics2ForConditionalGeneration(
  (model): Idefics2Model(
    (vision_model): Idefics2VisionTransformer(
      (embeddings): Idefics2VisionEmbeddings(
        (patch_embedding): Conv2d(3, 1152, kernel_size=(14, 14), stride=(14, 14), padding=valid)
        (position_embedding): Embedding(4900, 1152)
      )
      (encoder): Idefics2Encoder(
        (layers): ModuleList(
          (0-26): 27 x Idefics2EncoderLayer(
            (self_attn): Idefics2VisionAttention(
              (k_proj): Linear(in_features=1152, out_features=1152, bias=True)
              (v_proj): Linear(in_features=1152, out_features=1152, bias=True)
              (q_proj): Linear(in_features=1152, out_features=1152, bias=True)
              (out_proj): Linear(in_features=1152, out_features=1152, bias=True)
            )
            (layer_norm1): LayerNorm((1152,), eps=1e-06, elementwise_affine=True)
            (mlp): Idefics2VisionMLP(
              (activation_fn): PytorchGELUTanh()
              (fc1): Linear(in

In [None]:
#import donut

In [None]:
from donut import JSONParseEvaluator

def compare_json(target, prediction):
# def compare_json(file_name, target, prediction):
  # print("Image : ",file_name)
  evaluator = JSONParseEvaluator()
  score = evaluator.cal_acc(prediction, target)
  #print("Score",score)
  return score

In [None]:
import re
import json
import time
from distance import levenshtein

Test_TED = 0
total_norm_leven_sim = 0
total_inference_time = 0

for i in range(len(Test_dataset)):
    start_time = time.time()
    print("sample number:", i)
    Sample = Test_dataset[i]
    GT = json.loads(Sample['ground_truth'])
    question = GT['gt_parses'][0]['question']
    target = GT['gt_parses'][0]['answer']
    image = Sample['image_raw']
    type(image)

    Prediction = Inference(model, processor, image, question)
    #print(Prediction)
    match = re.search(r'Assistant:\s*(.*)', Prediction)
    Prediction = match.group(1)

    Prediction = Prediction.upper().replace(" ", "").rstrip(".")
    target = target.upper().replace(" ", "").rstrip(".")

    Sample_TED_Score = compare_json(target, Prediction)




    print({
    "question": question,
    "Target": target,
    "Prediction": Prediction,
    "Sample TED": Sample_TED_Score
    })


    Test_TED += Sample_TED_Score


    # Calculate Levenshtein similarity
    gt_text = json.dumps(target)
    pred_text = json.dumps(Prediction)
    len_gt = max(len(gt_text), 1)  # Ensure denominator is not zero
    norm_leven_sim = (len_gt - levenshtein(gt_text, pred_text)) / len_gt
    total_norm_leven_sim += norm_leven_sim

    end_time = time.time()
    inference_time = end_time - start_time
    total_inference_time += inference_time

TED_Score_TestDataset = Test_TED / len(Test_dataset)
average_norm_leven_sim = total_norm_leven_sim / len(Test_dataset)

print("Average TED on Test Dataset:", TED_Score_TestDataset)
print("Average Normalized Levenshtein Similarity:", average_norm_leven_sim)
print("Total Inference Time:", total_inference_time, "seconds")


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
{'question': 'what is the amount mentioned in the given form ?', 'Target': '88.80', 'Prediction': '88.80', 'Sample TED': 1.0}
sample number: 2
{'question': 'What is the status of security?', 'Target': 'OFF', 'Prediction': 'OFF', 'Sample TED': 1.0}
sample number: 3
{'question': 'What is the cost of TIP STEAK?', 'Target': '$1.69', 'Prediction': '$1.69', 'Sample TED': 1.0}
sample number: 4
{'question': 'What is the document title?', 'Target': 'PROMOTIONPENETRATIONMODEL', 'Prediction': 'PROMOTIONPENETRATIONMODEL', 'Sample TED': 1.0}
sample number: 5
{'question': 'what is the standard form no.?', 'Target': '1034A', 'Prediction': '1034A', 'Sample TED': 1.0}
sample number: 6
{'question': 'What is the date mentioned in the top of the document ?', 'Target': 'MARCH61962', 'Prediction': 'MARCH6,1963', 'Sample TED': 0.8}
sample number: 7
{'question': 'what is the vendor name ?', 'Target': 'R.J.REYNOLDSTOBACCOCO', 'Prediction': 'R.J.R

# Evaluation on categories

In [None]:
from datasets import load_from_disk
category_dataset = load_from_disk(r'E:\Abdul_Muqtadir\Thesis\Dataset\Test_categories')
category_dataset

DatasetDict({
    text: Dataset({
        features: ['question', 'answers', 'image_raw', 'ground_truth'],
        num_rows: 58
    })
    table: Dataset({
        features: ['question', 'answers', 'image_raw', 'ground_truth'],
        num_rows: 58
    })
    figure: Dataset({
        features: ['question', 'answers', 'image_raw', 'ground_truth'],
        num_rows: 58
    })
    checklists_ratings: Dataset({
        features: ['image', 'text'],
        num_rows: 58
    })
})

In [None]:
import re
import json
import time
from distance import levenshtein

Test_TED = 0
total_norm_leven_sim = 0
total_inference_time = 0
total_samples = 0  # To track total samples across splits

# Iterate over each split
for split in category_dataset:
    print(f"Processing split: {split}")
    print("------------------------------------------------------------")
    print("------------------------------------------------------------")
    print("------------------------------------------------------------")
    data = category_dataset[split]

    split_TED = 0  # For tracking TED within a split
    split_norm_leven_sim = 0  # For tracking normalized Levenshtein within a split
    split_inference_time = 0  # To track time for each split

    for i in range(len(data)):
        print(f"Sample {i+1}/{len(data)} in split {split}")

        # Timing inference
        start_time = time.time()

        Sample = data[i]
        GT = json.loads(Sample['ground_truth'])
        question = GT['gt_parses'][0]['question']
        target = GT['gt_parses'][0]['answer']
        image = Sample['image_raw']

        Prediction= Inference(model, processor, image, question)
        match = re.search(r'Assistant:\s*(.*)', Prediction)
        Prediction = match.group(1)

        # Clean up Prediction and target
        Prediction = Prediction.upper().replace(" ", "").rstrip(".")
        target = target.upper().replace(" ", "").rstrip(".")

        # Calculate TED score
        Sample_TED_Score = compare_json(target, Prediction)
        split_TED += Sample_TED_Score

        print({
            "PREDICTION": Prediction,
            "TARGET": target,
            "SAMPLE TED": Sample_TED_Score
        })

        # Calculate Levenshtein similarity
        gt_text = json.dumps(target)
        pred_text = json.dumps(Prediction)
        len_gt = max(len(gt_text), 1)  # Ensure denominator is not zero
        norm_leven_sim = (len_gt - levenshtein(gt_text, pred_text)) / len_gt
        split_norm_leven_sim += norm_leven_sim

        # End timing inference
        end_time = time.time()
        inference_time = end_time - start_time
        split_inference_time += inference_time

    # Average TED and Levenshtein similarity for the split
    TED_Score_TestDataset = split_TED / len(data)
    average_norm_leven_sim = split_norm_leven_sim / len(data)

    print(f"Average TED on Split {split}: {TED_Score_TestDataset}")
    print(f"Average Normalized Levenshtein Similarity for Split {split}: {average_norm_leven_sim}")
    print(f"Total Inference Time for Split {split}: {split_inference_time} seconds")

    # Aggregate results across all splits
    Test_TED += split_TED
    total_norm_leven_sim += split_norm_leven_sim
    total_inference_time += split_inference_time
    total_samples += len(data)

# Calculate overall metrics across all splits
if total_samples > 0:
    TED_Score_TestDataset = Test_TED / total_samples
    average_norm_leven_sim = total_norm_leven_sim / total_samples

    print("Overall Average TED on Test Dataset:", TED_Score_TestDataset)
    print("Overall Average Normalized Levenshtein Similarity:", average_norm_leven_sim)
    print("Overall Total Inference Time:", total_inference_time, "seconds")


Processing split: text
------------------------------------------------------------
------------------------------------------------------------
------------------------------------------------------------
Sample 1/58 in split text
{'PREDICTION': '4910.97', 'TARGET': '49910.91', 'SAMPLE TED': 0.75}
Sample 2/58 in split text
{'PREDICTION': '98.1%', 'TARGET': '98.1%', 'SAMPLE TED': 1.0}
Sample 3/58 in split text
{'PREDICTION': '42', 'TARGET': '8', 'SAMPLE TED': 0}
Sample 4/58 in split text
{'PREDICTION': 'RACETRAC', 'TARGET': 'RACETRAC', 'SAMPLE TED': 1.0}
Sample 5/58 in split text
{'PREDICTION': 'LUNCH', 'TARGET': 'LUNCH', 'SAMPLE TED': 1.0}
Sample 6/58 in split text
{'PREDICTION': '1034A', 'TARGET': '1034A', 'SAMPLE TED': 1.0}
Sample 7/58 in split text
{'PREDICTION': '3,865', 'TARGET': '1,326', 'SAMPLE TED': 0.19999999999999996}
Sample 8/58 in split text
{'PREDICTION': 'ONEDAY', 'TARGET': 'ONEDAY', 'SAMPLE TED': 1.0}
Sample 9/58 in split text
{'PREDICTION': 'SEPTEMBER20,1985', 'TARGET'

KeyError: 'ground_truth'