In [None]:
!pip install datasets
!pip install orjson
!pip install git+https://github.com/openai/CLIP.git
!pip install faiss-gpu
!pip install accelerate
!pip install -U bitsandbytes
!pip install nltk
!pip install sentence-transformers
!pip install qwen_vl_utils

Collecting datasets
  Downloading datasets-3.1.0-py3-none-any.whl.metadata (20 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.9.0,>=2023.1.0 (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.1.0-py3-none-any.whl (480 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m480.6/480.6 kB[0m [31m10.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m11.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fsspec-2024.9.0-py3-none-any.whl 

In [None]:
from datasets import load_dataset
from PIL import Image
import requests
from io import BytesIO
import matplotlib.pyplot as plt
import transformers
import orjson
import torch
from transformers import CLIPProcessor, CLIPModel
import clip
import faiss
import numpy as np
import json
import copy
from transformers import Blip2Processor, Blip2ForConditionalGeneration, BitsAndBytesConfig
from transformers import AutoModelForCausalLM, AutoTokenizer
from nltk.translate.bleu_score import sentence_bleu
from sentence_transformers import SentenceTransformer
from sklearn.metrics import f1_score
import nltk
nltk.download('punkt')
nltk.download('wordnet')
from nltk.translate.meteor_score import meteor_score
from peft import LoraConfig, get_peft_model


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...


In [None]:
#Upload data/vqa_rad/VQA_RAD Dataset Public.json from the Github repository to your Google Drive and put the absolute path to it below
VQA_RAD_ANNOTS_FILE = r""

#Upload data/mimic_cxr/clip_text_features.json from the Github repository to your Google Drive and put the absolute path to it below
CLIP_TEXT_FEATURES_FILE = r""

#Upload data/mimic_cxr/clip-imp-pretrained_128_6_after_4.pt from the Github repository to your Google Drive and put the absolute path to it below
CLIP_MODEL_FILE = r""

#Upload data/vqa_rad/images from the Github repository to your Google Drive and put the absolute path to it below
IMAGE_PATH_PREFIX = r""

#Upload data/vqa_rad/vqa_annotations_promptmrg.json from the Github repository to your Google Drive and put the absolute path to it below
CHEX_ANNOTS_FILE = r""

TOP_K = 21

#Extract only the Chest X-ray [image, question, answer] triplets from the entire VQA-RAD dataset
def get_chex_image_data(vqa_rad_annotations_file):
    #Annotations originally stored in JSON format
    with open(vqa_rad_annotations_file, "rb") as f:
        annotations = orjson.loads(f.read())

    chex_data = [] #[[image_path, question, answer]]
    for entry in annotations:
      if entry["image_organ"] == "CHEST":
        chex_data.append([entry["image_name"], entry["question"], entry["answer"]])

    return chex_data


#Store all CLIP text features in Vector Database
def get_report_db_clip_features():
    #Load large JSON file with all clip features 270,790 x (1 x 512)
    with open(CLIP_TEXT_FEATURES_FILE, 'rb') as f:
        clip_text_features = orjson.loads(f.read())

    #Create FAISS Vector Database with all the CLIP text features stored in it
    embedding_dimension = len(clip_text_features[0])
    faiss_index = faiss.IndexFlatL2(embedding_dimension)
    faiss_index.add(np.array(clip_text_features).astype("float32"))
    faiss.write_index(faiss_index, "faiss_index.faiss")

    return clip_text_features, faiss_index


#Extract indices of Top-K most similar text features from Vector Database
def get_clip_indices(image_paths, clip_text_features, faiss_index):
    #Use ViT-B/32 because that is same backbone used in the CLIP model
    clip_model, clip_processor = clip.load("ViT-B/32", device="cuda", jit=False)
    clip_model.load_state_dict(torch.load(CLIP_MODEL_FILE, map_location="cuda"))
    clip_model.eval()

    all_top_matches = []
    for image_path in image_paths:
      image = Image.open(IMAGE_PATH_PREFIX + image_path)
      model_input = clip_processor(image).unsqueeze(0).to("cuda")

      #Pass image features through CLIP to get embeddings
      with torch.no_grad():
          image_features = clip_model.encode_image(model_input)

      #Normalize the CLIP output embeddings and move to CPU device for direct comparison with Vector Database
      image_features /= image_features.norm(dim=-1, keepdim=True) #Size is 1x512 and matches each embedding in clip_text_features length = 512
      image_features = image_features.cpu().numpy().astype('float32')

      #Do Similarity Search to find Top-K most similar vectors in Vector Database and return their indices
      distance_matrix, top_matches = faiss_index.search(image_features, k=TOP_K)
      all_top_matches.append(top_matches)

    return all_top_matches


#Create JSON file with annotations to be downloaded and sent to PromptMRG for inferencing to generate the reports for the images in the VQA RAD testing dataset
def create_json_file(all_top_matches, image_names):
  data = {"test": []}

  for image_name, clip_indices in zip(image_names, all_top_matches):
    clip_indices = clip_indices[0].tolist()

    #Create JSON entry that follows the format PromptMRG expects each image to be annotated in
    entry = {
        "id": image_name,
        "report": "NA",
        "image_path": [
            image_name
        ],
        "split": "test",
        "view": [
            "PA"
        ],
        "labels": 14 * [0],
        "clip_indices": clip_indices
    }
    data["test"].append(entry)

  with open(CHEX_ANNOTS_FILE, 'w') as chex_annots_json:
    json.dump(data, chex_annots_json, indent=4)

  return data



if __name__ == "__main__":
    #First process dataset to only include all 794 CHEX images
    image_data = get_chex_image_data(VQA_RAD_ANNOTS_FILE)
    image_names = list(set([image_data[i][0] for i in range(len(image_data))]))
    print(f"{len(image_names)} CHEX Images and {len(image_data)} total QA Pairs")

    #Then extract and store all clip_text_features in FAISS Vector Store
    clip_text_features, faiss_index = get_report_db_clip_features()

    #Then get clip_indices for all 794 CHEX images extracted earlier
    all_top_matches = get_clip_indices(image_names, clip_text_features, faiss_index)
    print(f"Acquired {len(all_top_matches)} sets for {len(image_names)} Images of Top {TOP_K} matches")

    #Now create new JSON vqa_rad_annotations_promptmrg.json file to be passed as input to PromptMRG to generate reports for these images
    json_file_data = create_json_file(all_top_matches, image_names)
    print(f"Created CHEX Annotations JSON File and Stored in Google Drive ready for local download")

107 CHEX Images and 794 total QA Pairs


100%|████████████████████████████████████████| 338M/338M [00:02<00:00, 146MiB/s]
  clip_model.load_state_dict(torch.load(CLIP_MODEL_FILE, map_location="cuda"))


Acquired 107 sets for 107 Images of Top 21 matches
Created CHEX Annotations JSON File and Stored in Google Drive ready for local download


In [None]:
# At this point, here is what has happened
# 1. We have taken the full VQA-RAD dataset and extracted only the [image, question, answer] triplets for Chest X-rays

# 2. For each of these images we have found the Top-K most similar feature embeddings from the Vector Database that stores feature embeddings from the MIMIC-CXR dataset the CLIP model was trained on

# 3. Given these [image, question, answer] and Top-K most similar feature embedding information we have created a new annotation JSON file that stores all of this information together

# 4. We are still missing one thing - and that is the medical reports for all these Chest-Xray images. We do not have these reports already because VQA-RAD is a VQA dataset not a medical report generation one
#    Therefore, we have to use PromptMRG to generate these reports for these images.

# 5. Now, the annotation JSON file simply needs to be inputted into PromptMRG so that it can generate reports for these images

# 6. This requires going back to your local Python environment and running inferencing for PromptMRG using "./test_vqa_rad.sh"

# 7. However, to save time, we have already done this step and have saved the medical reports that PromptMRG generated for these images

# 8. You can find them in the GitHub repository at "results/promptmrg/experiment_results/base_iu_model/test/base_iu_model_vqa_rad_mrg_test_log.json"

# 9. All you have to do is upload that file into your Google Drive and put the absolute path to it below in the next cell

# 10. Then simply continue running this notebook!

In [None]:
#Upload results/promptmrg/experiment_results/base_iu_model/test/base_iu_model_vqa_rad_mrg_test_log.json from the Github repository to your Google Drive and put the absolute path to it below
IMAGE_REPORTS_FILE = r""

#The goal here is to take a list storing [image, question, answer] triplets and convert it to -> [image, question, answer, report] by adding in the reports that PromptMRG just created
with open(IMAGE_REPORTS_FILE, "rb") as f:
    image_report_dict = orjson.loads(f.read())

image_data = get_chex_image_data(VQA_RAD_ANNOTS_FILE)

#Print out some sample [image, question, answer] entries
print(image_data[0])
print(image_data[56])


#Append report to the triplet
image_data_with_report = copy.deepcopy(image_data)

for i in range(len(image_data_with_report)):
  image_name = image_data_with_report[i][0]
  report = image_report_dict[image_name]
  image_data_with_report[i].append(report)

#Print the same sample values now with the report appended
print(image_data_with_report[0])
print(image_data_with_report[56])

['synpic29265.jpg', 'Are the lungs normal appearing?', 'No']
['synpic33422.jpg', 'Has the midline of the mediastinum shifted?', 'No']
['synpic29265.jpg', 'Are the lungs normal appearing?', 'No', 'the heart is normal in size. the mediastinum is unremarkable. the lungs are clear. there is no pleural effusion or pneumothorax. the xxxx are intact.']
['synpic33422.jpg', 'Has the midline of the mediastinum shifted?', 'No', 'the heart is normal in size. the mediastinum is unremarkable. the lungs are clear. there is no pleural effusion or pneumothorax. the skeletal structures are normal.']


In [None]:
#Now load the Moondream 2 VLM from Huggingface to perform the experiments
model_id = "vikhyatk/moondream2"
revision = "2024-08-26"
model = AutoModelForCausalLM.from_pretrained(
    model_id, trust_remote_code=True, revision=revision
)
tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/319 [00:00<?, ?B/s]

configuration_moondream.py:   0%|          | 0.00/3.34k [00:00<?, ?B/s]

moondream.py:   0%|          | 0.00/7.20k [00:00<?, ?B/s]

modeling_phi.py:   0%|          | 0.00/63.1k [00:00<?, ?B/s]

region_model.py:   0%|          | 0.00/1.33k [00:00<?, ?B/s]

fourier_features.py:   0%|          | 0.00/558 [00:00<?, ?B/s]

vision_encoder.py:   0%|          | 0.00/10.2k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/3.74G [00:00<?, ?B/s]

In [None]:
#Perform Experiment 1: VLM Not Fine Tuned and Knowledge-Enhanced MVQA With Reports

results_with_reports = {}
for i, image_q_pair in enumerate(image_data_with_report):
  #Iterate through first 160 images = images in testing dataset
  if i <= 160:
    image = Image.open(IMAGE_PATH_PREFIX + image_q_pair[0])
    enc_image = model.encode_image(image)
    question = image_q_pair[1]
    text_prompt = f"Question: {question}\nMedical Report as reference: {image_q_pair[3]}"
    vlm_output = model.answer_question(enc_image, text_prompt, tokenizer)
    results_with_reports[i] = [text_prompt, image_q_pair[2], vlm_output]

    print(f"Image: {image_q_pair[0]}")
    print(f"{text_prompt}")
    print(f"GT Answer: {image_q_pair[2]}")
    print(f"VLM Answer: {vlm_output}")


Image: synpic29265.jpg
Question: Are the lungs normal appearing?
Medical Report as reference: the heart is normal in size. the mediastinum is unremarkable. the lungs are clear. there is no pleural effusion or pneumothorax. the xxxx are intact.
GT Answer: No
VLM Answer: Yes
Image: synpic29265.jpg
Question: Is there evidence of a pneumothorax
Medical Report as reference: the heart is normal in size. the mediastinum is unremarkable. the lungs are clear. there is no pleural effusion or pneumothorax. the xxxx are intact.
GT Answer: No
VLM Answer: No
Image: synpic28602.jpg
Question: What type of imaging does this not represent?
Medical Report as reference: the heart is normal in size. the mediastinum is unremarkable. the lungs are clear. there is no pleural effusion or pneumothorax. the xxxx are intact.
GT Answer: ultrasound
VLM Answer: Radiographic
Image: synpic29265.jpg
Question: Is this a MRI of the chest?
Medical Report as reference: the heart is normal in size. the mediastinum is unrema

In [None]:
#Perform analysis of Experiment 1
#Calculate SBERT Similarity Score, METEOR Score, and F1 Score


sbert_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
total_sim_score = 0

binary_answer_ground_truth = []
binary_answer_vlm = []
binary_answers = ["yes", "no"]

total_meteor_score = 0

for entry in results_with_reports.keys():
  ground_truth = results_with_reports[entry][1]
  vlm_prediction = results_with_reports[entry][2]

  #Check if question has binary answer (ie a close ended question) and if so, keep track of ground truth and VLM answers
  if ground_truth.lower() in binary_answers and vlm_prediction.lower() in binary_answers:
    binary_answer_ground_truth.append(ground_truth.lower())
    binary_answer_vlm.append(vlm_prediction.lower())

  #If answer is incorrect, count similarity as zero manually becasue Sentence Bert does not always think yes is orthogonal to no
  if ground_truth.lower() == "yes" and vlm_prediction.lower() == "no" or ground_truth.lower() == "no" and vlm_prediction.lower() == "yes":
    sim_score = 0

  else:
    #SBERT Score
    ground_truth_embedding = sbert_model.encode(ground_truth)
    vlm_prediction_embedding = sbert_model.encode(vlm_prediction)
    sim_score = sbert_model.similarity(ground_truth_embedding, vlm_prediction_embedding).item() #returns Tensor so call .item() to get scalar value

    #METEOR Score
    meteor_score_with = meteor_score([nltk.word_tokenize(ground_truth.lower())], nltk.word_tokenize(vlm_prediction.lower()))
    total_meteor_score += meteor_score_with

  total_sim_score += sim_score

#Display Final Results for Experiment 1
avg_sim_score_with = round(total_sim_score / len(results_with_reports), 2)
print(f"Average Similarity Score With Reports: {avg_sim_score_with}")

avg_meteor_score_with = round(total_meteor_score / len(results_with_reports), 2)
print(f"Average METEOR Score With Reports: {avg_meteor_score_with}")

binary_f1_score = round(f1_score(binary_answer_ground_truth, binary_answer_vlm, average="binary", pos_label="yes"), 2)
print(f"Binary F1 Score: {binary_f1_score}")



Average Similarity Score With Reports: 0.52
Average METEOR Score With Reports: 0.22
Binary F1 Score: 0.64


In [None]:
#Perform Experiment 2: VLM Not Fine Tuned and Independent MVQA Without Reports

results_without_reports = {}
for i, image_q_pair in enumerate(image_data):
  #Iterate through first 160 images = images in testing dataset
  if i <= 160:
    image = Image.open(IMAGE_PATH_PREFIX + image_q_pair[0])
    enc_image = model.encode_image(image)
    question = image_q_pair[1]
    text_prompt = f"Question: {question}"
    vlm_output = model.answer_question(enc_image, text_prompt, tokenizer)
    results_without_reports[i] = [text_prompt, image_q_pair[2], vlm_output]

    print(f"Image: {image_q_pair[0]}")
    print(f"Question: {image_q_pair[1]}")
    print(f"GT Answer: {image_q_pair[2]}")
    print(f"VLM Answer: {vlm_output}")

Image: synpic29265.jpg
Question: Are the lungs normal appearing?
GT Answer: No
VLM Answer: No, the lungs appear to be damaged or affected, as they are depicted in a black and white radiographic image.
Image: synpic29265.jpg
Question: Is there evidence of a pneumothorax
GT Answer: No
VLM Answer: Yes, there is evidence of a pneumothorax in the image. The chest radiograph shows a large amount of air in the space between the lungs, which is a characteristic of a pneumothorax. This condition can cause shortness of breath, chest pain, and sometimes a feeling of suffocation. The radiograph helps the medical professional to identify and diagnose the presence of a pneumothorax, allowing for appropriate treatment and care for the patient.
Image: synpic28602.jpg
Question: What type of imaging does this not represent?
GT Answer: ultrasound
VLM Answer: This image represents an X-ray, which is a type of medical imaging that uses electromagnetic radiation to create an image of the body's internal str

In [None]:
#Perform analysis of Experiment 2
#Calculate SBERT Similarity Score, METEOR Score, and F1 Score

sbert_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
total_sim_score = 0

binary_answer_ground_truth = []
binary_answer_vlm = []
binary_answers = ["yes", "no"]

total_meteor_score = 0

for entry in results_without_reports.keys():
  ground_truth = results_without_reports[entry][1]
  vlm_prediction = results_without_reports[entry][2]

  #Check if question has binary answer (ie a close ended question) and if so, keep track of ground truth and VLM answers
  index = vlm_prediction.find(",")
  if index != -1 and ground_truth.lower() in binary_answers and vlm_prediction[:index].lower() in binary_answers:
    binary_answer_ground_truth.append(ground_truth.lower())
    binary_answer_vlm.append(vlm_prediction[:index].lower())

  #If answer is incorrect, count similarity as zero manually becasue Sentence Bert does not always think yes is orthogonal to no
  if ground_truth.lower() == "yes" and vlm_prediction.lower() == "no" or ground_truth.lower() == "no" and vlm_prediction.lower() == "yes":
    sim_score = 0

  else:
    #SBERT Score
    ground_truth_embedding = sbert_model.encode(ground_truth)
    vlm_prediction_embedding = sbert_model.encode(vlm_prediction)
    sim_score = sbert_model.similarity(ground_truth_embedding, vlm_prediction_embedding).item() #returns Tensor so call .item()

    #METEOR Score
    meteor_score_with = meteor_score([nltk.word_tokenize(ground_truth.lower())], nltk.word_tokenize(vlm_prediction.lower()))
    total_meteor_score += meteor_score_with

  total_sim_score += sim_score

#Display Final Results for Experiment 2
avg_sim_score_with = round(total_sim_score / len(results_with_reports), 2)
print(f"Average Similarity Score With Reports: {avg_sim_score_with}")

avg_meteor_score_with = round(total_meteor_score / len(results_with_reports), 2)
print(f"Average METEOR Score With Reports: {avg_meteor_score_with}")

binary_f1_score = round(f1_score(binary_answer_ground_truth, binary_answer_vlm, average="binary", pos_label="yes"), 2)
print(f"Binary F1 Score: {binary_f1_score}")



Average Similarity Score With Reports: 0.22
Average METEOR Score With Reports: 0.1
Binary F1 Score: 0.61


In [None]:
#Run this if you want to push the dataset to huggingface hub to visualize it on the Web UI
from huggingface_hub import notebook_login

notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [None]:
#Create formal train and test dataset objects to finetune the VLM

#ACKNOWLEDGEMENT OF CREDIT: https://medium.com/google-developer-experts/ml-story-fine-tune-vision-language-model-on-custom-dataset-8e5f5dace7b1
  #Utilized this tutorial for guidance on how to convert a dictionary to dataset object and push it to Huggingface Hub for visualization

from datasets import Dataset, Image

#Enter your HF ID if you want to push the datasets to your HF account for visualization
HF_ID = r""

#Reverse the image data due to the choice of images used for inferencing in Experiments 1 and 2
dataset_dict = {"ids": [], "image_names": [], "images": [], "questions": [], "answers": []}
image_data_copy = copy.deepcopy(image_data)
image_data_reversed = image_data_copy[::-1]

for i, data_triplet in enumerate(image_data_reversed):
  dataset_dict["ids"].append(str(i))
  dataset_dict["image_names"].append(str(data_triplet[0]))
  dataset_dict["images"].append(str(IMAGE_PATH_PREFIX + data_triplet[0]))
  dataset_dict["questions"].append(str(data_triplet[1]))
  dataset_dict["answers"].append(str(data_triplet[2]))

dataset = Dataset.from_dict(dataset_dict)
#Convert all the images to type Image()
dataset = dataset.cast_column("images", Image())

split_dataset = dataset.train_test_split(test_size=0.2, shuffle=False)

#If you want to push to HF Hub, then run the following lines (my default)
# split_dataset.push_to_hub(HF_ID)
# train_dataset = load_dataset(HF_ID, split="train")
# eval_dataset = load_dataset(HF_ID, split="test")

#If you dont want to push to HF Hub then run the following lines
train_dataset = split_dataset["train"]
eval_dataset = split_dataset["test"]

[['synpic29265.jpg', 'Are the lungs normal appearing?', 'No'], ['synpic29265.jpg', 'Is there evidence of a pneumothorax', 'No'], ['synpic28602.jpg', 'What type of imaging does this not represent?', 'ultrasound'], ['synpic29265.jpg', 'Is this a MRI of the chest?', 'no'], ['synpic28602.jpg', 'What is not pictured in this image?', 'The extremities'], ['synpic28602.jpg', 'Is the trachea midline?', 'yes'], ['synpic28602.jpg', 'Is there evidence of an aortic aneurysm?', 'No'], ['synpic28602.jpg', 'Is there blunting of the costovertebral angles?', 'No'], ['synpic42202.jpg', 'Is there evidence of an aortic aneurysm?', 'yes'], ['synpic42202.jpg', 'Which organ system is abnormal in this image?', 'Cardiovascular'], ['synpic29265.jpg', 'Is there airspace consolidation on the left side?', 'Yes'], ['synpic42202.jpg', 'Is there a pneumothorax?', 'no'], ['synpic29265.jpg', 'Is there a fracture?', 'no'], ['synpic29265.jpg', 'How is the patient oriented?', 'Posterior-Anterior'], ['synpic28602.jpg', 'Is 

Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Map:   0%|          | 0/635 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/7 [00:00<?, ?ba/s]

Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Map:   0%|          | 0/159 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/2 [00:00<?, ?ba/s]

No files have been modified since last commit. Skipping to prevent empty commit.


In [None]:
#Use this to verify the datasets have been created correctly
print(train_dataset[0])
print(train_dataset[1])
print(train_dataset[2])

print(eval_dataset[-1])
print(eval_dataset[-2])
print(eval_dataset[-3])

{'ids': '0', 'image_names': 'synpic26764.jpg', 'images': <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=1024x1192 at 0x7F53C4345FC0>, 'questions': 'Is this film taken in a PA modality?', 'answers': 'Yes'}
{'ids': '1', 'image_names': 'synpic26764.jpg', 'images': <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=1024x1192 at 0x7F53C4346650>, 'questions': 'Is this a PA film?', 'answers': 'Yes'}
{'ids': '2', 'image_names': 'synpic38531.jpg', 'images': <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=1024x873 at 0x7F53C43470D0>, 'questions': 'Is the apical aeration normal or decreased?', 'answers': 'Normal'}
{'ids': '793', 'image_names': 'synpic29265.jpg', 'images': <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=480x503 at 0x7F53C4347130>, 'questions': 'Are the lungs normal appearing?', 'answers': 'No'}
{'ids': '792', 'image_names': 'synpic29265.jpg', 'images': <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=480x503 at 0x7F53C4347160>, 'questions': 'Is ther

In [None]:
#Configure the hyperparameters
EPOCHS = 2
BATCH_SIZE = 2
GRAD_ACCUM_STEPS = 1
LR = 1e-5
USE_WANDB = False

In [None]:
#Finetune the VLM for MVQA on the VQA-RAD training dataset

#ACKNOWLEDGEMENT OF CREDIT: https://github.com/vikhyat/moondream/blob/main/notebooks/Finetuning.ipynb
  #The following code was provided as the standard code for finetuning the Moondream 2 VLM. Given its unique architecture, the developers
  #released sample code as a tutorial for how to finetune their model. The only modifications I make to their code involve changing
  #some of the for loops and iteration structure to load my VQA-RAD dataset correctly. As the focus of this project is not exploring different
  #finetuning techniques, I relied on their default finetuning code. Future work for this project would definitely involve digging into this code
  #even more deeply and optimizing it further for the AIMRG and MVQA tasks.


from torch.utils.data import DataLoader
from bitsandbytes.optim import Adam8bit
import math
from einops import rearrange
from tqdm import tqdm

ANSWER_EOS = "<|endoftext|>"

# Number of tokens used to represent each image.
IMG_TOKENS = 729

DEVICE = "cuda"

model.to(DEVICE)

def collate_fn(batch):
    images = [sample['images'] for sample in batch]
    images = [model.vision_encoder.preprocess(image) for image in images]

    labels_acc = []
    tokens_acc = []

    for sample in batch:
        # print(sample)
        toks = [tokenizer.bos_token_id]
        labs = [-100] * (IMG_TOKENS + 1)

        # for qa in sample['qa']:
        q_t = tokenizer(
            f"\n\nQuestion: {sample['questions']}\n\nAnswer:",
            add_special_tokens=False
        ).input_ids
        toks.extend(q_t)
        labs.extend([-100] * len(q_t))

        a_t = tokenizer(
            f" {sample['answers']}{ANSWER_EOS}",
            add_special_tokens=False
        ).input_ids
        toks.extend(a_t)
        labs.extend(a_t)

        tokens_acc.append(toks)
        labels_acc.append(labs)

    max_len = -1
    for labels in labels_acc:
        max_len = max(max_len, len(labels))

    attn_mask_acc = []

    for i in range(len(batch)):
        len_i = len(labels_acc[i])
        pad_i = max_len - len_i

        labels_acc[i].extend([-100] * pad_i)
        tokens_acc[i].extend([tokenizer.eos_token_id] * pad_i)
        attn_mask_acc.append([1] * len_i + [0] * pad_i)

    return (
        images,
        torch.stack([torch.tensor(t, dtype=torch.long).to(DEVICE) for t in tokens_acc]),
        torch.stack([torch.tensor(l, dtype=torch.long).to(DEVICE) for l in labels_acc]),
        torch.stack([torch.tensor(a, dtype=torch.bool).to(DEVICE) for a in attn_mask_acc]),
    )

def compute_loss(batch):
    images, tokens, labels, attn_mask = batch

    images = [image.to(DEVICE) for image in images]
    tokens = tokens.to(DEVICE)
    labels = labels.to(DEVICE)
    attn_mask = attn_mask.to(DEVICE)

    with torch.no_grad():
        img_embs = model.vision_encoder(images)

    tok_embs = model.text_model.get_input_embeddings()(tokens)
    inputs_embeds = torch.cat((tok_embs[:, 0:1, :], img_embs, tok_embs[:, 1:, :]), dim=1)

    outputs = model.text_model(
        inputs_embeds=inputs_embeds,
        labels=labels,
        attention_mask=attn_mask,
    )

    return outputs.loss

def lr_schedule(step, max_steps):
    x = step / max_steps
    if x < 0.1:
        return 0.1 * LR + 0.9 * LR * x / 0.1
    else:
        return 0.1 * LR + 0.9 * LR * (1 + math.cos(math.pi * (x - 0.1))) / 2

dataloaders = {
    "train": DataLoader(
        train_dataset,
        batch_size=BATCH_SIZE,
        shuffle=True,
        collate_fn=collate_fn,
    )
}

model.text_model.train()
model.text_model.transformer.gradient_checkpointing_enable()

total_steps = EPOCHS * len(dataloaders["train"]) // GRAD_ACCUM_STEPS
optimizer = Adam8bit(
    [
        {"params": model.text_model.parameters()},
    ],
    lr=LR * 0.1,
    betas=(0.9, 0.95),
    eps=1e-6
)

if USE_WANDB:
    import wandb
    wandb.init(
        project="moondream-ft",
        config={
            "EPOCHS": EPOCHS,
            "BATCH_SIZE": BATCH_SIZE,
            "GRAD_ACCUM_STEPS": GRAD_ACCUM_STEPS,
            "LR": LR,
        }
    )

i = 0
for epoch in range(EPOCHS):
    for batch in tqdm(dataloaders["train"], desc=f"Epoch {epoch + 1}/{EPOCHS}"):
        i += 1

        loss = compute_loss(batch)
        loss.backward()
        print(f"\nLoss: {round(loss.item(), 3)} | Epoch: {epoch}/{EPOCHS} | {i}/{len(dataloaders['train'])}")

        if i % GRAD_ACCUM_STEPS == 0:
            optimizer.step()
            optimizer.zero_grad()

            lr = lr_schedule(i / GRAD_ACCUM_STEPS, total_steps)
            for param_group in optimizer.param_groups:
                param_group['lr'] = lr

        if USE_WANDB:
            wandb.log({
                "loss/train": loss.item(),
                "lr": optimizer.param_groups[0]['lr']
            })

if USE_WANDB:
    wandb.finish()

Epoch 1/2:   0%|          | 1/318 [00:01<09:25,  1.78s/it]


Loss: 7.144 | Epoch: 0/2 | 1/318


Epoch 1/2:   1%|          | 2/318 [00:03<08:38,  1.64s/it]


Loss: 2.962 | Epoch: 0/2 | 2/318


Epoch 1/2:   1%|          | 3/318 [00:04<08:27,  1.61s/it]


Loss: 4.717 | Epoch: 0/2 | 3/318


Epoch 1/2:   1%|▏         | 4/318 [00:06<08:17,  1.58s/it]


Loss: 3.21 | Epoch: 0/2 | 4/318


Epoch 1/2:   2%|▏         | 5/318 [00:08<08:15,  1.58s/it]


Loss: 2.506 | Epoch: 0/2 | 5/318


Epoch 1/2:   2%|▏         | 6/318 [00:09<08:10,  1.57s/it]


Loss: 0.883 | Epoch: 0/2 | 6/318


Epoch 1/2:   2%|▏         | 7/318 [00:11<08:06,  1.56s/it]


Loss: 0.701 | Epoch: 0/2 | 7/318


Epoch 1/2:   3%|▎         | 8/318 [00:12<08:03,  1.56s/it]


Loss: 3.613 | Epoch: 0/2 | 8/318


Epoch 1/2:   3%|▎         | 9/318 [00:14<08:02,  1.56s/it]


Loss: 0.293 | Epoch: 0/2 | 9/318


Epoch 1/2:   3%|▎         | 10/318 [00:15<07:56,  1.55s/it]


Loss: 2.812 | Epoch: 0/2 | 10/318


Epoch 1/2:   3%|▎         | 11/318 [00:17<07:58,  1.56s/it]


Loss: 2.993 | Epoch: 0/2 | 11/318


Epoch 1/2:   4%|▍         | 12/318 [00:18<07:55,  1.56s/it]


Loss: 0.456 | Epoch: 0/2 | 12/318


Epoch 1/2:   4%|▍         | 13/318 [00:20<07:52,  1.55s/it]


Loss: 3.059 | Epoch: 0/2 | 13/318


Epoch 1/2:   4%|▍         | 14/318 [00:21<07:50,  1.55s/it]


Loss: 3.564 | Epoch: 0/2 | 14/318


Epoch 1/2:   5%|▍         | 15/318 [00:23<07:47,  1.54s/it]


Loss: 0.181 | Epoch: 0/2 | 15/318


Epoch 1/2:   5%|▌         | 16/318 [00:25<07:43,  1.54s/it]


Loss: 2.526 | Epoch: 0/2 | 16/318


Epoch 1/2:   5%|▌         | 17/318 [00:26<07:30,  1.50s/it]


Loss: 1.83 | Epoch: 0/2 | 17/318


Epoch 1/2:   6%|▌         | 18/318 [00:27<07:33,  1.51s/it]


Loss: 0.471 | Epoch: 0/2 | 18/318


Epoch 1/2:   6%|▌         | 19/318 [00:29<07:35,  1.52s/it]


Loss: 0.375 | Epoch: 0/2 | 19/318


Epoch 1/2:   6%|▋         | 20/318 [00:31<07:35,  1.53s/it]


Loss: 0.069 | Epoch: 0/2 | 20/318


Epoch 1/2:   7%|▋         | 21/318 [00:32<07:33,  1.53s/it]


Loss: 6.898 | Epoch: 0/2 | 21/318


Epoch 1/2:   7%|▋         | 22/318 [00:34<07:32,  1.53s/it]


Loss: 4.595 | Epoch: 0/2 | 22/318


Epoch 1/2:   7%|▋         | 23/318 [00:35<07:36,  1.55s/it]


Loss: 5.216 | Epoch: 0/2 | 23/318


Epoch 1/2:   8%|▊         | 24/318 [00:37<07:36,  1.55s/it]


Loss: 2.884 | Epoch: 0/2 | 24/318


Epoch 1/2:   8%|▊         | 25/318 [00:38<07:36,  1.56s/it]


Loss: 0.424 | Epoch: 0/2 | 25/318


Epoch 1/2:   8%|▊         | 26/318 [00:40<07:36,  1.56s/it]


Loss: 2.775 | Epoch: 0/2 | 26/318


Epoch 1/2:   8%|▊         | 27/318 [00:41<07:35,  1.57s/it]


Loss: 2.957 | Epoch: 0/2 | 27/318


Epoch 1/2:   9%|▉         | 28/318 [00:43<07:29,  1.55s/it]


Loss: 1.633 | Epoch: 0/2 | 28/318


Epoch 1/2:   9%|▉         | 29/318 [00:45<07:27,  1.55s/it]


Loss: 2.126 | Epoch: 0/2 | 29/318


Epoch 1/2:   9%|▉         | 30/318 [00:46<07:25,  1.55s/it]


Loss: 0.32 | Epoch: 0/2 | 30/318


Epoch 1/2:  10%|▉         | 31/318 [00:48<07:20,  1.53s/it]


Loss: 1.348 | Epoch: 0/2 | 31/318


Epoch 1/2:  10%|█         | 32/318 [00:49<07:20,  1.54s/it]


Loss: 0.516 | Epoch: 0/2 | 32/318


Epoch 1/2:  10%|█         | 33/318 [00:51<07:04,  1.49s/it]


Loss: 0.283 | Epoch: 0/2 | 33/318


Epoch 1/2:  11%|█         | 34/318 [00:52<06:53,  1.46s/it]


Loss: 0.422 | Epoch: 0/2 | 34/318


Epoch 1/2:  11%|█         | 35/318 [00:53<06:59,  1.48s/it]


Loss: 0.314 | Epoch: 0/2 | 35/318


Epoch 1/2:  11%|█▏        | 36/318 [00:55<07:06,  1.51s/it]


Loss: 4.293 | Epoch: 0/2 | 36/318


Epoch 1/2:  12%|█▏        | 37/318 [00:57<07:05,  1.51s/it]


Loss: 2.14 | Epoch: 0/2 | 37/318


Epoch 1/2:  12%|█▏        | 38/318 [00:58<07:06,  1.52s/it]


Loss: 0.565 | Epoch: 0/2 | 38/318


Epoch 1/2:  12%|█▏        | 39/318 [01:00<07:05,  1.53s/it]


Loss: 1.196 | Epoch: 0/2 | 39/318


Epoch 1/2:  13%|█▎        | 40/318 [01:01<07:08,  1.54s/it]


Loss: 3.195 | Epoch: 0/2 | 40/318


Epoch 1/2:  13%|█▎        | 41/318 [01:03<06:52,  1.49s/it]


Loss: 0.407 | Epoch: 0/2 | 41/318


Epoch 1/2:  13%|█▎        | 42/318 [01:04<06:58,  1.52s/it]


Loss: 1.343 | Epoch: 0/2 | 42/318


Epoch 1/2:  14%|█▎        | 43/318 [01:06<07:01,  1.53s/it]


Loss: 4.373 | Epoch: 0/2 | 43/318


Epoch 1/2:  14%|█▍        | 44/318 [01:07<06:47,  1.49s/it]


Loss: 0.441 | Epoch: 0/2 | 44/318


Epoch 1/2:  14%|█▍        | 45/318 [01:09<06:50,  1.50s/it]


Loss: 2.593 | Epoch: 0/2 | 45/318


Epoch 1/2:  14%|█▍        | 46/318 [01:10<06:51,  1.51s/it]


Loss: 1.622 | Epoch: 0/2 | 46/318


Epoch 1/2:  15%|█▍        | 47/318 [01:12<06:51,  1.52s/it]


Loss: 0.604 | Epoch: 0/2 | 47/318


Epoch 1/2:  15%|█▌        | 48/318 [01:13<06:36,  1.47s/it]


Loss: 1.406 | Epoch: 0/2 | 48/318


Epoch 1/2:  15%|█▌        | 49/318 [01:15<06:43,  1.50s/it]


Loss: 1.149 | Epoch: 0/2 | 49/318


Epoch 1/2:  16%|█▌        | 50/318 [01:16<06:41,  1.50s/it]


Loss: 0.921 | Epoch: 0/2 | 50/318


Epoch 1/2:  16%|█▌        | 51/318 [01:18<06:30,  1.46s/it]


Loss: 1.7 | Epoch: 0/2 | 51/318


Epoch 1/2:  16%|█▋        | 52/318 [01:19<06:35,  1.49s/it]


Loss: 2.831 | Epoch: 0/2 | 52/318


Epoch 1/2:  17%|█▋        | 53/318 [01:21<06:40,  1.51s/it]


Loss: 2.658 | Epoch: 0/2 | 53/318


Epoch 1/2:  17%|█▋        | 54/318 [01:22<06:35,  1.50s/it]


Loss: 0.816 | Epoch: 0/2 | 54/318


Epoch 1/2:  17%|█▋        | 55/318 [01:24<06:37,  1.51s/it]


Loss: 0.862 | Epoch: 0/2 | 55/318


Epoch 1/2:  18%|█▊        | 56/318 [01:25<06:40,  1.53s/it]


Loss: 2.241 | Epoch: 0/2 | 56/318


Epoch 1/2:  18%|█▊        | 57/318 [01:27<06:42,  1.54s/it]


Loss: 0.213 | Epoch: 0/2 | 57/318


Epoch 1/2:  18%|█▊        | 58/318 [01:28<06:42,  1.55s/it]


Loss: 0.967 | Epoch: 0/2 | 58/318


Epoch 1/2:  19%|█▊        | 59/318 [01:30<06:41,  1.55s/it]


Loss: 3.218 | Epoch: 0/2 | 59/318


Epoch 1/2:  19%|█▉        | 60/318 [01:31<06:41,  1.56s/it]


Loss: 2.06 | Epoch: 0/2 | 60/318


Epoch 1/2:  19%|█▉        | 61/318 [01:33<06:39,  1.55s/it]


Loss: 0.194 | Epoch: 0/2 | 61/318


Epoch 1/2:  19%|█▉        | 62/318 [01:34<06:26,  1.51s/it]


Loss: 1.268 | Epoch: 0/2 | 62/318


Epoch 1/2:  20%|█▉        | 63/318 [01:36<06:30,  1.53s/it]


Loss: 2.811 | Epoch: 0/2 | 63/318


Epoch 1/2:  20%|██        | 64/318 [01:38<06:29,  1.54s/it]


Loss: 1.233 | Epoch: 0/2 | 64/318


Epoch 1/2:  20%|██        | 65/318 [01:39<06:19,  1.50s/it]


Loss: 4.566 | Epoch: 0/2 | 65/318


Epoch 1/2:  21%|██        | 66/318 [01:41<06:23,  1.52s/it]


Loss: 2.689 | Epoch: 0/2 | 66/318


Epoch 1/2:  21%|██        | 67/318 [01:42<06:24,  1.53s/it]


Loss: 0.192 | Epoch: 0/2 | 67/318


Epoch 1/2:  21%|██▏       | 68/318 [01:44<06:24,  1.54s/it]


Loss: 0.156 | Epoch: 0/2 | 68/318


Epoch 1/2:  22%|██▏       | 69/318 [01:45<06:22,  1.54s/it]


Loss: 1.388 | Epoch: 0/2 | 69/318


Epoch 1/2:  22%|██▏       | 70/318 [01:47<06:24,  1.55s/it]


Loss: 0.675 | Epoch: 0/2 | 70/318


Epoch 1/2:  22%|██▏       | 71/318 [01:48<06:21,  1.54s/it]


Loss: 0.606 | Epoch: 0/2 | 71/318


Epoch 1/2:  23%|██▎       | 72/318 [01:50<06:19,  1.54s/it]


Loss: 0.445 | Epoch: 0/2 | 72/318


Epoch 1/2:  23%|██▎       | 73/318 [01:51<06:20,  1.55s/it]


Loss: 2.108 | Epoch: 0/2 | 73/318


Epoch 1/2:  23%|██▎       | 74/318 [01:53<06:20,  1.56s/it]


Loss: 1.455 | Epoch: 0/2 | 74/318


Epoch 1/2:  24%|██▎       | 75/318 [01:55<06:17,  1.56s/it]


Loss: 1.453 | Epoch: 0/2 | 75/318


Epoch 1/2:  24%|██▍       | 76/318 [01:56<06:03,  1.50s/it]


Loss: 0.545 | Epoch: 0/2 | 76/318


Epoch 1/2:  24%|██▍       | 77/318 [01:57<06:04,  1.51s/it]


Loss: 0.635 | Epoch: 0/2 | 77/318


Epoch 1/2:  25%|██▍       | 78/318 [01:59<06:05,  1.52s/it]


Loss: 2.282 | Epoch: 0/2 | 78/318


Epoch 1/2:  25%|██▍       | 79/318 [02:01<06:05,  1.53s/it]


Loss: 0.505 | Epoch: 0/2 | 79/318


Epoch 1/2:  25%|██▌       | 80/318 [02:02<06:06,  1.54s/it]


Loss: 0.444 | Epoch: 0/2 | 80/318


Epoch 1/2:  25%|██▌       | 81/318 [02:03<05:52,  1.49s/it]


Loss: 2.423 | Epoch: 0/2 | 81/318


Epoch 1/2:  26%|██▌       | 82/318 [02:05<05:58,  1.52s/it]


Loss: 1.496 | Epoch: 0/2 | 82/318


Epoch 1/2:  26%|██▌       | 83/318 [02:07<06:00,  1.53s/it]


Loss: 1.759 | Epoch: 0/2 | 83/318


Epoch 1/2:  26%|██▋       | 84/318 [02:08<05:59,  1.54s/it]


Loss: 1.822 | Epoch: 0/2 | 84/318


Epoch 1/2:  27%|██▋       | 85/318 [02:10<06:01,  1.55s/it]


Loss: 0.568 | Epoch: 0/2 | 85/318


Epoch 1/2:  27%|██▋       | 86/318 [02:11<05:47,  1.50s/it]


Loss: 0.88 | Epoch: 0/2 | 86/318


Epoch 1/2:  27%|██▋       | 87/318 [02:13<05:50,  1.52s/it]


Loss: 1.234 | Epoch: 0/2 | 87/318


Epoch 1/2:  28%|██▊       | 88/318 [02:14<05:46,  1.51s/it]


Loss: 1.808 | Epoch: 0/2 | 88/318


Epoch 1/2:  28%|██▊       | 89/318 [02:16<05:50,  1.53s/it]


Loss: 3.754 | Epoch: 0/2 | 89/318


Epoch 1/2:  28%|██▊       | 90/318 [02:17<05:51,  1.54s/it]


Loss: 3.748 | Epoch: 0/2 | 90/318


Epoch 1/2:  29%|██▊       | 91/318 [02:19<05:50,  1.54s/it]


Loss: 2.523 | Epoch: 0/2 | 91/318


Epoch 1/2:  29%|██▉       | 92/318 [02:20<05:45,  1.53s/it]


Loss: 4.641 | Epoch: 0/2 | 92/318


Epoch 1/2:  29%|██▉       | 93/318 [02:22<05:47,  1.54s/it]


Loss: 0.235 | Epoch: 0/2 | 93/318


Epoch 1/2:  30%|██▉       | 94/318 [02:23<05:45,  1.54s/it]


Loss: 0.677 | Epoch: 0/2 | 94/318


Epoch 1/2:  30%|██▉       | 95/318 [02:25<05:45,  1.55s/it]


Loss: 0.501 | Epoch: 0/2 | 95/318


Epoch 1/2:  30%|███       | 96/318 [02:27<05:46,  1.56s/it]


Loss: 0.486 | Epoch: 0/2 | 96/318


Epoch 1/2:  31%|███       | 97/318 [02:28<05:42,  1.55s/it]


Loss: 2.758 | Epoch: 0/2 | 97/318


Epoch 1/2:  31%|███       | 98/318 [02:30<05:42,  1.56s/it]


Loss: 1.209 | Epoch: 0/2 | 98/318


Epoch 1/2:  31%|███       | 99/318 [02:31<05:37,  1.54s/it]


Loss: 0.196 | Epoch: 0/2 | 99/318


Epoch 1/2:  31%|███▏      | 100/318 [02:33<05:37,  1.55s/it]


Loss: 1.941 | Epoch: 0/2 | 100/318


Epoch 1/2:  32%|███▏      | 101/318 [02:34<05:37,  1.56s/it]


Loss: 2.558 | Epoch: 0/2 | 101/318


Epoch 1/2:  32%|███▏      | 102/318 [02:36<05:35,  1.55s/it]


Loss: 2.043 | Epoch: 0/2 | 102/318


Epoch 1/2:  32%|███▏      | 103/318 [02:37<05:35,  1.56s/it]


Loss: 3.61 | Epoch: 0/2 | 103/318


Epoch 1/2:  33%|███▎      | 104/318 [02:39<05:23,  1.51s/it]


Loss: 2.571 | Epoch: 0/2 | 104/318


Epoch 1/2:  33%|███▎      | 105/318 [02:40<05:23,  1.52s/it]


Loss: 0.817 | Epoch: 0/2 | 105/318


Epoch 1/2:  33%|███▎      | 106/318 [02:42<05:23,  1.53s/it]


Loss: 0.145 | Epoch: 0/2 | 106/318


Epoch 1/2:  34%|███▎      | 107/318 [02:44<05:23,  1.53s/it]


Loss: 0.202 | Epoch: 0/2 | 107/318


Epoch 1/2:  34%|███▍      | 108/318 [02:45<05:24,  1.54s/it]


Loss: 0.14 | Epoch: 0/2 | 108/318


Epoch 1/2:  34%|███▍      | 109/318 [02:46<05:12,  1.50s/it]


Loss: 2.831 | Epoch: 0/2 | 109/318


Epoch 1/2:  35%|███▍      | 110/318 [02:48<05:13,  1.51s/it]


Loss: 0.197 | Epoch: 0/2 | 110/318


Epoch 1/2:  35%|███▍      | 111/318 [02:49<05:03,  1.47s/it]


Loss: 0.675 | Epoch: 0/2 | 111/318


Epoch 1/2:  35%|███▌      | 112/318 [02:51<05:04,  1.48s/it]


Loss: 1.556 | Epoch: 0/2 | 112/318


Epoch 1/2:  36%|███▌      | 113/318 [02:52<04:57,  1.45s/it]


Loss: 2.474 | Epoch: 0/2 | 113/318


Epoch 1/2:  36%|███▌      | 114/318 [02:54<05:03,  1.49s/it]


Loss: 2.11 | Epoch: 0/2 | 114/318


Epoch 1/2:  36%|███▌      | 115/318 [02:55<05:06,  1.51s/it]


Loss: 2.385 | Epoch: 0/2 | 115/318


Epoch 1/2:  36%|███▋      | 116/318 [02:57<05:06,  1.52s/it]


Loss: 0.995 | Epoch: 0/2 | 116/318


Epoch 1/2:  37%|███▋      | 117/318 [02:58<05:06,  1.52s/it]


Loss: 1.339 | Epoch: 0/2 | 117/318


Epoch 1/2:  37%|███▋      | 118/318 [03:00<05:07,  1.54s/it]


Loss: 3.312 | Epoch: 0/2 | 118/318


Epoch 1/2:  37%|███▋      | 119/318 [03:01<04:55,  1.49s/it]


Loss: 0.524 | Epoch: 0/2 | 119/318


Epoch 1/2:  38%|███▊      | 120/318 [03:03<04:47,  1.45s/it]


Loss: 0.579 | Epoch: 0/2 | 120/318


Epoch 1/2:  38%|███▊      | 121/318 [03:04<04:51,  1.48s/it]


Loss: 1.861 | Epoch: 0/2 | 121/318


Epoch 1/2:  38%|███▊      | 122/318 [03:06<04:55,  1.51s/it]


Loss: 3.167 | Epoch: 0/2 | 122/318


Epoch 1/2:  39%|███▊      | 123/318 [03:07<04:53,  1.51s/it]


Loss: 0.278 | Epoch: 0/2 | 123/318


Epoch 1/2:  39%|███▉      | 124/318 [03:09<04:45,  1.47s/it]


Loss: 2.071 | Epoch: 0/2 | 124/318


Epoch 1/2:  39%|███▉      | 125/318 [03:10<04:38,  1.44s/it]


Loss: 3.041 | Epoch: 0/2 | 125/318


Epoch 1/2:  40%|███▉      | 126/318 [03:12<04:42,  1.47s/it]


Loss: 1.152 | Epoch: 0/2 | 126/318


Epoch 1/2:  40%|███▉      | 127/318 [03:13<04:35,  1.44s/it]


Loss: 1.225 | Epoch: 0/2 | 127/318


Epoch 1/2:  40%|████      | 128/318 [03:14<04:31,  1.43s/it]


Loss: 0.551 | Epoch: 0/2 | 128/318


Epoch 1/2:  41%|████      | 129/318 [03:16<04:38,  1.47s/it]


Loss: 1.549 | Epoch: 0/2 | 129/318


Epoch 1/2:  41%|████      | 130/318 [03:18<04:39,  1.49s/it]


Loss: 2.324 | Epoch: 0/2 | 130/318


Epoch 1/2:  41%|████      | 131/318 [03:19<04:40,  1.50s/it]


Loss: 2.872 | Epoch: 0/2 | 131/318


Epoch 1/2:  42%|████▏     | 132/318 [03:21<04:43,  1.52s/it]


Loss: 1.801 | Epoch: 0/2 | 132/318


Epoch 1/2:  42%|████▏     | 133/318 [03:22<04:44,  1.54s/it]


Loss: 0.446 | Epoch: 0/2 | 133/318


Epoch 1/2:  42%|████▏     | 134/318 [03:24<04:40,  1.53s/it]


Loss: 0.583 | Epoch: 0/2 | 134/318


Epoch 1/2:  42%|████▏     | 135/318 [03:25<04:41,  1.54s/it]


Loss: 2.823 | Epoch: 0/2 | 135/318


Epoch 1/2:  43%|████▎     | 136/318 [03:27<04:39,  1.54s/it]


Loss: 0.498 | Epoch: 0/2 | 136/318


Epoch 1/2:  43%|████▎     | 137/318 [03:28<04:38,  1.54s/it]


Loss: 0.627 | Epoch: 0/2 | 137/318


Epoch 1/2:  43%|████▎     | 138/318 [03:30<04:37,  1.54s/it]


Loss: 0.637 | Epoch: 0/2 | 138/318


Epoch 1/2:  44%|████▎     | 139/318 [03:32<04:37,  1.55s/it]


Loss: 1.614 | Epoch: 0/2 | 139/318


Epoch 1/2:  44%|████▍     | 140/318 [03:33<04:34,  1.54s/it]


Loss: 1.826 | Epoch: 0/2 | 140/318


Epoch 1/2:  44%|████▍     | 141/318 [03:35<04:35,  1.56s/it]


Loss: 1.628 | Epoch: 0/2 | 141/318


Epoch 1/2:  45%|████▍     | 142/318 [03:36<04:32,  1.55s/it]


Loss: 0.213 | Epoch: 0/2 | 142/318


Epoch 1/2:  45%|████▍     | 143/318 [03:38<04:22,  1.50s/it]


Loss: 1.456 | Epoch: 0/2 | 143/318


Epoch 1/2:  45%|████▌     | 144/318 [03:39<04:23,  1.51s/it]


Loss: 0.129 | Epoch: 0/2 | 144/318


Epoch 1/2:  46%|████▌     | 145/318 [03:41<04:23,  1.52s/it]


Loss: 0.224 | Epoch: 0/2 | 145/318


Epoch 1/2:  46%|████▌     | 146/318 [03:42<04:22,  1.53s/it]


Loss: 0.503 | Epoch: 0/2 | 146/318


Epoch 1/2:  46%|████▌     | 147/318 [03:43<04:05,  1.44s/it]


Loss: 0.16 | Epoch: 0/2 | 147/318


Epoch 1/2:  47%|████▋     | 148/318 [03:45<04:11,  1.48s/it]


Loss: 0.781 | Epoch: 0/2 | 148/318


Epoch 1/2:  47%|████▋     | 149/318 [03:47<04:13,  1.50s/it]


Loss: 0.398 | Epoch: 0/2 | 149/318


Epoch 1/2:  47%|████▋     | 150/318 [03:48<04:13,  1.51s/it]


Loss: 1.208 | Epoch: 0/2 | 150/318


Epoch 1/2:  47%|████▋     | 151/318 [03:50<04:14,  1.52s/it]


Loss: 0.823 | Epoch: 0/2 | 151/318


Epoch 1/2:  48%|████▊     | 152/318 [03:51<04:13,  1.53s/it]


Loss: 1.924 | Epoch: 0/2 | 152/318


Epoch 1/2:  48%|████▊     | 153/318 [03:53<04:12,  1.53s/it]


Loss: 2.959 | Epoch: 0/2 | 153/318


Epoch 1/2:  48%|████▊     | 154/318 [03:54<04:11,  1.53s/it]


Loss: 0.596 | Epoch: 0/2 | 154/318


Epoch 1/2:  49%|████▊     | 155/318 [03:56<04:13,  1.56s/it]


Loss: 2.101 | Epoch: 0/2 | 155/318


Epoch 1/2:  49%|████▉     | 156/318 [03:57<04:08,  1.54s/it]


Loss: 0.691 | Epoch: 0/2 | 156/318


Epoch 1/2:  49%|████▉     | 157/318 [03:59<04:07,  1.54s/it]


Loss: 0.773 | Epoch: 0/2 | 157/318


Epoch 1/2:  50%|████▉     | 158/318 [04:00<04:00,  1.50s/it]


Loss: 0.149 | Epoch: 0/2 | 158/318


Epoch 1/2:  50%|█████     | 159/318 [04:02<04:02,  1.52s/it]


Loss: 0.414 | Epoch: 0/2 | 159/318


Epoch 1/2:  50%|█████     | 160/318 [04:03<03:53,  1.48s/it]


Loss: 0.427 | Epoch: 0/2 | 160/318


Epoch 1/2:  51%|█████     | 161/318 [04:05<03:56,  1.51s/it]


Loss: 1.325 | Epoch: 0/2 | 161/318


Epoch 1/2:  51%|█████     | 162/318 [04:06<03:56,  1.52s/it]


Loss: 0.645 | Epoch: 0/2 | 162/318


Epoch 1/2:  51%|█████▏    | 163/318 [04:08<03:50,  1.48s/it]


Loss: 2.263 | Epoch: 0/2 | 163/318


Epoch 1/2:  52%|█████▏    | 164/318 [04:09<03:43,  1.45s/it]


Loss: 2.539 | Epoch: 0/2 | 164/318


Epoch 1/2:  52%|█████▏    | 165/318 [04:11<03:47,  1.49s/it]


Loss: 2.802 | Epoch: 0/2 | 165/318


Epoch 1/2:  52%|█████▏    | 166/318 [04:12<03:48,  1.50s/it]


Loss: 0.347 | Epoch: 0/2 | 166/318


Epoch 1/2:  53%|█████▎    | 167/318 [04:14<03:50,  1.53s/it]


Loss: 2.037 | Epoch: 0/2 | 167/318


Epoch 1/2:  53%|█████▎    | 168/318 [04:15<03:51,  1.54s/it]


Loss: 2.699 | Epoch: 0/2 | 168/318


Epoch 1/2:  53%|█████▎    | 169/318 [04:17<03:49,  1.54s/it]


Loss: 0.902 | Epoch: 0/2 | 169/318


Epoch 1/2:  53%|█████▎    | 170/318 [04:18<03:47,  1.54s/it]


Loss: 4.054 | Epoch: 0/2 | 170/318


Epoch 1/2:  54%|█████▍    | 171/318 [04:20<03:46,  1.54s/it]


Loss: 0.321 | Epoch: 0/2 | 171/318


Epoch 1/2:  54%|█████▍    | 172/318 [04:22<03:43,  1.53s/it]


Loss: 1.057 | Epoch: 0/2 | 172/318


Epoch 1/2:  54%|█████▍    | 173/318 [04:23<03:36,  1.49s/it]


Loss: 0.352 | Epoch: 0/2 | 173/318


Epoch 1/2:  55%|█████▍    | 174/318 [04:24<03:36,  1.51s/it]


Loss: 0.95 | Epoch: 0/2 | 174/318


Epoch 1/2:  55%|█████▌    | 175/318 [04:26<03:29,  1.47s/it]


Loss: 0.849 | Epoch: 0/2 | 175/318


Epoch 1/2:  55%|█████▌    | 176/318 [04:27<03:30,  1.48s/it]


Loss: 1.11 | Epoch: 0/2 | 176/318


Epoch 1/2:  56%|█████▌    | 177/318 [04:29<03:31,  1.50s/it]


Loss: 2.658 | Epoch: 0/2 | 177/318


Epoch 1/2:  56%|█████▌    | 178/318 [04:30<03:30,  1.50s/it]


Loss: 0.336 | Epoch: 0/2 | 178/318


Epoch 1/2:  56%|█████▋    | 179/318 [04:32<03:30,  1.52s/it]


Loss: 1.019 | Epoch: 0/2 | 179/318


Epoch 1/2:  57%|█████▋    | 180/318 [04:34<03:31,  1.53s/it]


Loss: 1.987 | Epoch: 0/2 | 180/318


Epoch 1/2:  57%|█████▋    | 181/318 [04:35<03:23,  1.49s/it]


Loss: 2.055 | Epoch: 0/2 | 181/318


Epoch 1/2:  57%|█████▋    | 182/318 [04:36<03:24,  1.50s/it]


Loss: 1.39 | Epoch: 0/2 | 182/318


Epoch 1/2:  58%|█████▊    | 183/318 [04:38<03:26,  1.53s/it]


Loss: 1.691 | Epoch: 0/2 | 183/318


Epoch 1/2:  58%|█████▊    | 184/318 [04:40<03:25,  1.53s/it]


Loss: 0.426 | Epoch: 0/2 | 184/318


Epoch 1/2:  58%|█████▊    | 185/318 [04:41<03:21,  1.52s/it]


Loss: 0.18 | Epoch: 0/2 | 185/318


Epoch 1/2:  58%|█████▊    | 186/318 [04:42<03:14,  1.47s/it]


Loss: 0.815 | Epoch: 0/2 | 186/318


Epoch 1/2:  59%|█████▉    | 187/318 [04:44<03:16,  1.50s/it]


Loss: 1.128 | Epoch: 0/2 | 187/318


Epoch 1/2:  59%|█████▉    | 188/318 [04:46<03:18,  1.53s/it]


Loss: 1.642 | Epoch: 0/2 | 188/318


Epoch 1/2:  59%|█████▉    | 189/318 [04:47<03:12,  1.49s/it]


Loss: 1.733 | Epoch: 0/2 | 189/318


Epoch 1/2:  60%|█████▉    | 190/318 [04:49<03:14,  1.52s/it]


Loss: 1.485 | Epoch: 0/2 | 190/318


Epoch 1/2:  60%|██████    | 191/318 [04:50<03:13,  1.53s/it]


Loss: 0.303 | Epoch: 0/2 | 191/318


Epoch 1/2:  60%|██████    | 192/318 [04:52<03:14,  1.54s/it]


Loss: 0.847 | Epoch: 0/2 | 192/318


Epoch 1/2:  61%|██████    | 193/318 [04:53<03:13,  1.55s/it]


Loss: 2.997 | Epoch: 0/2 | 193/318


Epoch 1/2:  61%|██████    | 194/318 [04:55<03:11,  1.55s/it]


Loss: 0.298 | Epoch: 0/2 | 194/318


Epoch 1/2:  61%|██████▏   | 195/318 [04:56<03:10,  1.55s/it]


Loss: 1.017 | Epoch: 0/2 | 195/318


Epoch 1/2:  62%|██████▏   | 196/318 [04:58<03:09,  1.55s/it]


Loss: 1.822 | Epoch: 0/2 | 196/318


Epoch 1/2:  62%|██████▏   | 197/318 [04:59<03:02,  1.51s/it]


Loss: 3.643 | Epoch: 0/2 | 197/318


Epoch 1/2:  62%|██████▏   | 198/318 [05:01<02:57,  1.48s/it]


Loss: 1.712 | Epoch: 0/2 | 198/318


Epoch 1/2:  63%|██████▎   | 199/318 [05:02<02:57,  1.50s/it]


Loss: 0.677 | Epoch: 0/2 | 199/318


Epoch 1/2:  63%|██████▎   | 200/318 [05:04<02:58,  1.52s/it]


Loss: 0.217 | Epoch: 0/2 | 200/318


Epoch 1/2:  63%|██████▎   | 201/318 [05:05<02:57,  1.51s/it]


Loss: 1.818 | Epoch: 0/2 | 201/318


Epoch 1/2:  64%|██████▎   | 202/318 [05:07<02:51,  1.48s/it]


Loss: 3.124 | Epoch: 0/2 | 202/318


Epoch 1/2:  64%|██████▍   | 203/318 [05:08<02:51,  1.49s/it]


Loss: 0.599 | Epoch: 0/2 | 203/318


Epoch 1/2:  64%|██████▍   | 204/318 [05:10<02:51,  1.50s/it]


Loss: 0.83 | Epoch: 0/2 | 204/318


Epoch 1/2:  64%|██████▍   | 205/318 [05:11<02:49,  1.50s/it]


Loss: 0.184 | Epoch: 0/2 | 205/318


Epoch 1/2:  65%|██████▍   | 206/318 [05:13<02:48,  1.51s/it]


Loss: 0.728 | Epoch: 0/2 | 206/318


Epoch 1/2:  65%|██████▌   | 207/318 [05:14<02:42,  1.47s/it]


Loss: 0.28 | Epoch: 0/2 | 207/318


Epoch 1/2:  65%|██████▌   | 208/318 [05:16<02:39,  1.45s/it]


Loss: 1.288 | Epoch: 0/2 | 208/318


Epoch 1/2:  66%|██████▌   | 209/318 [05:17<02:41,  1.49s/it]


Loss: 0.548 | Epoch: 0/2 | 209/318


Epoch 1/2:  66%|██████▌   | 210/318 [05:19<02:36,  1.45s/it]


Loss: 0.191 | Epoch: 0/2 | 210/318


Epoch 1/2:  66%|██████▋   | 211/318 [05:20<02:32,  1.43s/it]


Loss: 0.34 | Epoch: 0/2 | 211/318


Epoch 1/2:  67%|██████▋   | 212/318 [05:21<02:34,  1.46s/it]


Loss: 1.182 | Epoch: 0/2 | 212/318


Epoch 1/2:  67%|██████▋   | 213/318 [05:23<02:35,  1.49s/it]


Loss: 0.534 | Epoch: 0/2 | 213/318


Epoch 1/2:  67%|██████▋   | 214/318 [05:24<02:30,  1.45s/it]


Loss: 0.272 | Epoch: 0/2 | 214/318


Epoch 1/2:  68%|██████▊   | 215/318 [05:26<02:32,  1.48s/it]


Loss: 0.527 | Epoch: 0/2 | 215/318


Epoch 1/2:  68%|██████▊   | 216/318 [05:27<02:27,  1.45s/it]


Loss: 1.645 | Epoch: 0/2 | 216/318


Epoch 1/2:  68%|██████▊   | 217/318 [05:29<02:30,  1.49s/it]


Loss: 2.443 | Epoch: 0/2 | 217/318


Epoch 1/2:  69%|██████▊   | 218/318 [05:30<02:31,  1.52s/it]


Loss: 2.586 | Epoch: 0/2 | 218/318


Epoch 1/2:  69%|██████▉   | 219/318 [05:32<02:30,  1.52s/it]


Loss: 0.198 | Epoch: 0/2 | 219/318


Epoch 1/2:  69%|██████▉   | 220/318 [05:34<02:29,  1.53s/it]


Loss: 0.24 | Epoch: 0/2 | 220/318


Epoch 1/2:  69%|██████▉   | 221/318 [05:35<02:28,  1.53s/it]


Loss: 0.157 | Epoch: 0/2 | 221/318


Epoch 1/2:  70%|██████▉   | 222/318 [05:37<02:27,  1.53s/it]


Loss: 0.475 | Epoch: 0/2 | 222/318


Epoch 1/2:  70%|███████   | 223/318 [05:38<02:26,  1.55s/it]


Loss: 3.236 | Epoch: 0/2 | 223/318


Epoch 1/2:  70%|███████   | 224/318 [05:40<02:25,  1.55s/it]


Loss: 0.467 | Epoch: 0/2 | 224/318


Epoch 1/2:  71%|███████   | 225/318 [05:41<02:23,  1.55s/it]


Loss: 2.317 | Epoch: 0/2 | 225/318


Epoch 1/2:  71%|███████   | 226/318 [05:43<02:23,  1.56s/it]


Loss: 3.514 | Epoch: 0/2 | 226/318


Epoch 1/2:  71%|███████▏  | 227/318 [05:44<02:22,  1.56s/it]


Loss: 0.179 | Epoch: 0/2 | 227/318


Epoch 1/2:  72%|███████▏  | 228/318 [05:46<02:19,  1.55s/it]


Loss: 1.596 | Epoch: 0/2 | 228/318


Epoch 1/2:  72%|███████▏  | 229/318 [05:48<02:18,  1.56s/it]


Loss: 0.311 | Epoch: 0/2 | 229/318


Epoch 1/2:  72%|███████▏  | 230/318 [05:49<02:12,  1.50s/it]


Loss: 0.593 | Epoch: 0/2 | 230/318


Epoch 1/2:  73%|███████▎  | 231/318 [05:50<02:10,  1.50s/it]


Loss: 0.634 | Epoch: 0/2 | 231/318


Epoch 1/2:  73%|███████▎  | 232/318 [05:52<02:09,  1.51s/it]


Loss: 1.829 | Epoch: 0/2 | 232/318


Epoch 1/2:  73%|███████▎  | 233/318 [05:54<02:10,  1.54s/it]


Loss: 0.936 | Epoch: 0/2 | 233/318


Epoch 1/2:  74%|███████▎  | 234/318 [05:55<02:09,  1.54s/it]


Loss: 0.948 | Epoch: 0/2 | 234/318


Epoch 1/2:  74%|███████▍  | 235/318 [05:57<02:07,  1.54s/it]


Loss: 3.12 | Epoch: 0/2 | 235/318


Epoch 1/2:  74%|███████▍  | 236/318 [05:58<02:06,  1.54s/it]


Loss: 1.604 | Epoch: 0/2 | 236/318


Epoch 1/2:  75%|███████▍  | 237/318 [06:00<02:04,  1.54s/it]


Loss: 0.678 | Epoch: 0/2 | 237/318


Epoch 1/2:  75%|███████▍  | 238/318 [06:01<02:03,  1.54s/it]


Loss: 0.543 | Epoch: 0/2 | 238/318


Epoch 1/2:  75%|███████▌  | 239/318 [06:03<01:58,  1.50s/it]


Loss: 2.245 | Epoch: 0/2 | 239/318


Epoch 1/2:  75%|███████▌  | 240/318 [06:04<01:56,  1.50s/it]


Loss: 2.121 | Epoch: 0/2 | 240/318


Epoch 1/2:  76%|███████▌  | 241/318 [06:06<01:52,  1.46s/it]


Loss: 0.473 | Epoch: 0/2 | 241/318


Epoch 1/2:  76%|███████▌  | 242/318 [06:07<01:53,  1.49s/it]


Loss: 3.323 | Epoch: 0/2 | 242/318


Epoch 1/2:  76%|███████▋  | 243/318 [06:09<01:53,  1.52s/it]


Loss: 1.426 | Epoch: 0/2 | 243/318


Epoch 1/2:  77%|███████▋  | 244/318 [06:10<01:53,  1.53s/it]


Loss: 2.738 | Epoch: 0/2 | 244/318


Epoch 1/2:  77%|███████▋  | 245/318 [06:12<01:52,  1.54s/it]


Loss: 0.891 | Epoch: 0/2 | 245/318


Epoch 1/2:  77%|███████▋  | 246/318 [06:13<01:52,  1.56s/it]


Loss: 1.427 | Epoch: 0/2 | 246/318


Epoch 1/2:  78%|███████▊  | 247/318 [06:15<01:46,  1.49s/it]


Loss: 1.477 | Epoch: 0/2 | 247/318


Epoch 1/2:  78%|███████▊  | 248/318 [06:16<01:44,  1.49s/it]


Loss: 3.327 | Epoch: 0/2 | 248/318


Epoch 1/2:  78%|███████▊  | 249/318 [06:18<01:43,  1.50s/it]


Loss: 0.655 | Epoch: 0/2 | 249/318


Epoch 1/2:  79%|███████▊  | 250/318 [06:19<01:43,  1.52s/it]


Loss: 1.828 | Epoch: 0/2 | 250/318


Epoch 1/2:  79%|███████▉  | 251/318 [06:21<01:42,  1.53s/it]


Loss: 1.074 | Epoch: 0/2 | 251/318


Epoch 1/2:  79%|███████▉  | 252/318 [06:22<01:41,  1.54s/it]


Loss: 4.042 | Epoch: 0/2 | 252/318


Epoch 1/2:  80%|███████▉  | 253/318 [06:24<01:40,  1.55s/it]


Loss: 2.094 | Epoch: 0/2 | 253/318


Epoch 1/2:  80%|███████▉  | 254/318 [06:26<01:38,  1.54s/it]


Loss: 1.799 | Epoch: 0/2 | 254/318


Epoch 1/2:  80%|████████  | 255/318 [06:27<01:37,  1.54s/it]


Loss: 1.263 | Epoch: 0/2 | 255/318


Epoch 1/2:  81%|████████  | 256/318 [06:29<01:35,  1.54s/it]


Loss: 0.54 | Epoch: 0/2 | 256/318


Epoch 1/2:  81%|████████  | 257/318 [06:30<01:31,  1.50s/it]


Loss: 1.654 | Epoch: 0/2 | 257/318


Epoch 1/2:  81%|████████  | 258/318 [06:32<01:30,  1.51s/it]


Loss: 1.646 | Epoch: 0/2 | 258/318


Epoch 1/2:  81%|████████▏ | 259/318 [06:33<01:30,  1.53s/it]


Loss: 0.76 | Epoch: 0/2 | 259/318


Epoch 1/2:  82%|████████▏ | 260/318 [06:35<01:28,  1.53s/it]


Loss: 0.538 | Epoch: 0/2 | 260/318


Epoch 1/2:  82%|████████▏ | 261/318 [06:36<01:27,  1.53s/it]


Loss: 0.799 | Epoch: 0/2 | 261/318


Epoch 1/2:  82%|████████▏ | 262/318 [06:38<01:25,  1.53s/it]


Loss: 0.552 | Epoch: 0/2 | 262/318


Epoch 1/2:  83%|████████▎ | 263/318 [06:39<01:24,  1.53s/it]


Loss: 0.988 | Epoch: 0/2 | 263/318


Epoch 1/2:  83%|████████▎ | 264/318 [06:41<01:23,  1.54s/it]


Loss: 0.698 | Epoch: 0/2 | 264/318


Epoch 1/2:  83%|████████▎ | 265/318 [06:42<01:22,  1.55s/it]


Loss: 1.348 | Epoch: 0/2 | 265/318


Epoch 1/2:  84%|████████▎ | 266/318 [06:44<01:20,  1.54s/it]


Loss: 0.672 | Epoch: 0/2 | 266/318


Epoch 1/2:  84%|████████▍ | 267/318 [06:45<01:18,  1.53s/it]


Loss: 0.448 | Epoch: 0/2 | 267/318


Epoch 1/2:  84%|████████▍ | 268/318 [06:47<01:16,  1.52s/it]


Loss: 0.303 | Epoch: 0/2 | 268/318


Epoch 1/2:  85%|████████▍ | 269/318 [06:48<01:14,  1.52s/it]


Loss: 1.899 | Epoch: 0/2 | 269/318


Epoch 1/2:  85%|████████▍ | 270/318 [06:50<01:13,  1.52s/it]


Loss: 0.411 | Epoch: 0/2 | 270/318


Epoch 1/2:  85%|████████▌ | 271/318 [06:52<01:11,  1.53s/it]


Loss: 2.404 | Epoch: 0/2 | 271/318


Epoch 1/2:  86%|████████▌ | 272/318 [06:53<01:10,  1.54s/it]


Loss: 1.282 | Epoch: 0/2 | 272/318


Epoch 1/2:  86%|████████▌ | 273/318 [06:55<01:09,  1.53s/it]


Loss: 0.757 | Epoch: 0/2 | 273/318


Epoch 1/2:  86%|████████▌ | 274/318 [06:56<01:08,  1.55s/it]


Loss: 0.73 | Epoch: 0/2 | 274/318


Epoch 1/2:  86%|████████▋ | 275/318 [06:58<01:06,  1.56s/it]


Loss: 0.879 | Epoch: 0/2 | 275/318


Epoch 1/2:  87%|████████▋ | 276/318 [06:59<01:05,  1.56s/it]


Loss: 3.408 | Epoch: 0/2 | 276/318


Epoch 1/2:  87%|████████▋ | 277/318 [07:01<01:00,  1.48s/it]


Loss: 1.694 | Epoch: 0/2 | 277/318


Epoch 1/2:  87%|████████▋ | 278/318 [07:02<01:00,  1.50s/it]


Loss: 0.094 | Epoch: 0/2 | 278/318


Epoch 1/2:  88%|████████▊ | 279/318 [07:04<00:59,  1.52s/it]


Loss: 2.238 | Epoch: 0/2 | 279/318


Epoch 1/2:  88%|████████▊ | 280/318 [07:05<00:56,  1.49s/it]


Loss: 1.425 | Epoch: 0/2 | 280/318


Epoch 1/2:  88%|████████▊ | 281/318 [07:07<00:55,  1.49s/it]


Loss: 0.52 | Epoch: 0/2 | 281/318


Epoch 1/2:  89%|████████▊ | 282/318 [07:08<00:52,  1.46s/it]


Loss: 1.611 | Epoch: 0/2 | 282/318


Epoch 1/2:  89%|████████▉ | 283/318 [07:10<00:51,  1.47s/it]


Loss: 0.608 | Epoch: 0/2 | 283/318


Epoch 1/2:  89%|████████▉ | 284/318 [07:11<00:47,  1.39s/it]


Loss: 2.532 | Epoch: 0/2 | 284/318


Epoch 1/2:  90%|████████▉ | 285/318 [07:12<00:47,  1.43s/it]


Loss: 0.255 | Epoch: 0/2 | 285/318


Epoch 1/2:  90%|████████▉ | 286/318 [07:14<00:46,  1.46s/it]


Loss: 0.477 | Epoch: 0/2 | 286/318


Epoch 1/2:  90%|█████████ | 287/318 [07:15<00:46,  1.50s/it]


Loss: 0.473 | Epoch: 0/2 | 287/318


Epoch 1/2:  91%|█████████ | 288/318 [07:17<00:45,  1.51s/it]


Loss: 1.353 | Epoch: 0/2 | 288/318


Epoch 1/2:  91%|█████████ | 289/318 [07:18<00:44,  1.53s/it]


Loss: 1.939 | Epoch: 0/2 | 289/318


Epoch 1/2:  91%|█████████ | 290/318 [07:20<00:42,  1.52s/it]


Loss: 0.597 | Epoch: 0/2 | 290/318


Epoch 1/2:  92%|█████████▏| 291/318 [07:22<00:41,  1.54s/it]


Loss: 2.952 | Epoch: 0/2 | 291/318


Epoch 1/2:  92%|█████████▏| 292/318 [07:23<00:38,  1.49s/it]


Loss: 0.507 | Epoch: 0/2 | 292/318


Epoch 1/2:  92%|█████████▏| 293/318 [07:24<00:37,  1.50s/it]


Loss: 0.84 | Epoch: 0/2 | 293/318


Epoch 1/2:  92%|█████████▏| 294/318 [07:26<00:36,  1.52s/it]


Loss: 4.821 | Epoch: 0/2 | 294/318


Epoch 1/2:  93%|█████████▎| 295/318 [07:28<00:35,  1.53s/it]


Loss: 1.571 | Epoch: 0/2 | 295/318


Epoch 1/2:  93%|█████████▎| 296/318 [07:29<00:33,  1.53s/it]


Loss: 0.369 | Epoch: 0/2 | 296/318


Epoch 1/2:  93%|█████████▎| 297/318 [07:31<00:32,  1.53s/it]


Loss: 0.919 | Epoch: 0/2 | 297/318


Epoch 1/2:  94%|█████████▎| 298/318 [07:32<00:30,  1.53s/it]


Loss: 1.033 | Epoch: 0/2 | 298/318


Epoch 1/2:  94%|█████████▍| 299/318 [07:34<00:29,  1.53s/it]


Loss: 1.076 | Epoch: 0/2 | 299/318


Epoch 1/2:  94%|█████████▍| 300/318 [07:35<00:27,  1.53s/it]


Loss: 2.147 | Epoch: 0/2 | 300/318


Epoch 1/2:  95%|█████████▍| 301/318 [07:37<00:26,  1.54s/it]


Loss: 0.979 | Epoch: 0/2 | 301/318


Epoch 1/2:  95%|█████████▍| 302/318 [07:38<00:24,  1.55s/it]


Loss: 3.93 | Epoch: 0/2 | 302/318


Epoch 1/2:  95%|█████████▌| 303/318 [07:40<00:23,  1.54s/it]


Loss: 0.957 | Epoch: 0/2 | 303/318


Epoch 1/2:  96%|█████████▌| 304/318 [07:41<00:21,  1.54s/it]


Loss: 1.029 | Epoch: 0/2 | 304/318


Epoch 1/2:  96%|█████████▌| 305/318 [07:43<00:20,  1.54s/it]


Loss: 0.9 | Epoch: 0/2 | 305/318


Epoch 1/2:  96%|█████████▌| 306/318 [07:45<00:18,  1.54s/it]


Loss: 1.037 | Epoch: 0/2 | 306/318


Epoch 1/2:  97%|█████████▋| 307/318 [07:46<00:16,  1.54s/it]


Loss: 0.829 | Epoch: 0/2 | 307/318


Epoch 1/2:  97%|█████████▋| 308/318 [07:48<00:15,  1.55s/it]


Loss: 0.95 | Epoch: 0/2 | 308/318


Epoch 1/2:  97%|█████████▋| 309/318 [07:49<00:13,  1.55s/it]


Loss: 2.001 | Epoch: 0/2 | 309/318


Epoch 1/2:  97%|█████████▋| 310/318 [07:51<00:11,  1.50s/it]


Loss: 0.349 | Epoch: 0/2 | 310/318


Epoch 1/2:  98%|█████████▊| 311/318 [07:52<00:10,  1.50s/it]


Loss: 1.213 | Epoch: 0/2 | 311/318


Epoch 1/2:  98%|█████████▊| 312/318 [07:54<00:09,  1.51s/it]


Loss: 0.069 | Epoch: 0/2 | 312/318


Epoch 1/2:  98%|█████████▊| 313/318 [07:55<00:07,  1.52s/it]


Loss: 2.644 | Epoch: 0/2 | 313/318


Epoch 1/2:  99%|█████████▊| 314/318 [07:57<00:06,  1.52s/it]


Loss: 0.585 | Epoch: 0/2 | 314/318


Epoch 1/2:  99%|█████████▉| 315/318 [07:58<00:04,  1.54s/it]


Loss: 0.328 | Epoch: 0/2 | 315/318


Epoch 1/2:  99%|█████████▉| 316/318 [08:00<00:03,  1.55s/it]


Loss: 1.787 | Epoch: 0/2 | 316/318


Epoch 1/2: 100%|█████████▉| 317/318 [08:01<00:01,  1.52s/it]


Loss: 0.377 | Epoch: 0/2 | 317/318


Epoch 1/2: 100%|██████████| 318/318 [08:02<00:00,  1.52s/it]



Loss: 0.971 | Epoch: 0/2 | 318/318


Epoch 2/2:   0%|          | 1/318 [00:01<08:06,  1.53s/it]


Loss: 0.475 | Epoch: 1/2 | 319/318


Epoch 2/2:   1%|          | 2/318 [00:02<07:36,  1.44s/it]


Loss: 0.517 | Epoch: 1/2 | 320/318


Epoch 2/2:   1%|          | 3/318 [00:04<07:52,  1.50s/it]


Loss: 0.334 | Epoch: 1/2 | 321/318


Epoch 2/2:   1%|▏         | 4/318 [00:06<07:56,  1.52s/it]


Loss: 0.269 | Epoch: 1/2 | 322/318


Epoch 2/2:   2%|▏         | 5/318 [00:07<07:53,  1.51s/it]


Loss: 0.632 | Epoch: 1/2 | 323/318


Epoch 2/2:   2%|▏         | 6/318 [00:08<07:34,  1.46s/it]


Loss: 0.633 | Epoch: 1/2 | 324/318


Epoch 2/2:   2%|▏         | 7/318 [00:10<07:45,  1.50s/it]


Loss: 1.512 | Epoch: 1/2 | 325/318


Epoch 2/2:   3%|▎         | 8/318 [00:11<07:35,  1.47s/it]


Loss: 0.725 | Epoch: 1/2 | 326/318


Epoch 2/2:   3%|▎         | 9/318 [00:13<07:24,  1.44s/it]


Loss: 0.806 | Epoch: 1/2 | 327/318


Epoch 2/2:   3%|▎         | 10/318 [00:14<07:35,  1.48s/it]


Loss: 0.562 | Epoch: 1/2 | 328/318


Epoch 2/2:   3%|▎         | 11/318 [00:16<07:37,  1.49s/it]


Loss: 0.61 | Epoch: 1/2 | 329/318


Epoch 2/2:   4%|▍         | 12/318 [00:17<07:40,  1.51s/it]


Loss: 1.401 | Epoch: 1/2 | 330/318


Epoch 2/2:   4%|▍         | 13/318 [00:19<07:35,  1.49s/it]


Loss: 0.177 | Epoch: 1/2 | 331/318


Epoch 2/2:   4%|▍         | 14/318 [00:20<07:38,  1.51s/it]


Loss: 0.626 | Epoch: 1/2 | 332/318


Epoch 2/2:   5%|▍         | 15/318 [00:22<07:08,  1.41s/it]


Loss: 0.165 | Epoch: 1/2 | 333/318


Epoch 2/2:   5%|▌         | 16/318 [00:23<07:20,  1.46s/it]


Loss: 0.203 | Epoch: 1/2 | 334/318


Epoch 2/2:   5%|▌         | 17/318 [00:25<07:28,  1.49s/it]


Loss: 0.662 | Epoch: 1/2 | 335/318


Epoch 2/2:   6%|▌         | 18/318 [00:26<07:30,  1.50s/it]


Loss: 0.322 | Epoch: 1/2 | 336/318


Epoch 2/2:   6%|▌         | 19/318 [00:28<07:36,  1.53s/it]


Loss: 0.162 | Epoch: 1/2 | 337/318


Epoch 2/2:   6%|▋         | 20/318 [00:29<07:35,  1.53s/it]


Loss: 0.331 | Epoch: 1/2 | 338/318


Epoch 2/2:   7%|▋         | 21/318 [00:31<07:35,  1.53s/it]


Loss: 0.657 | Epoch: 1/2 | 339/318


Epoch 2/2:   7%|▋         | 22/318 [00:32<07:20,  1.49s/it]


Loss: 0.779 | Epoch: 1/2 | 340/318


Epoch 2/2:   7%|▋         | 23/318 [00:34<07:23,  1.50s/it]


Loss: 0.748 | Epoch: 1/2 | 341/318


Epoch 2/2:   8%|▊         | 24/318 [00:35<07:10,  1.46s/it]


Loss: 0.369 | Epoch: 1/2 | 342/318


Epoch 2/2:   8%|▊         | 25/318 [00:37<07:15,  1.49s/it]


Loss: 0.37 | Epoch: 1/2 | 343/318


Epoch 2/2:   8%|▊         | 26/318 [00:38<07:21,  1.51s/it]


Loss: 1.463 | Epoch: 1/2 | 344/318


Epoch 2/2:   8%|▊         | 27/318 [00:40<07:23,  1.52s/it]


Loss: 0.614 | Epoch: 1/2 | 345/318


Epoch 2/2:   9%|▉         | 28/318 [00:41<07:17,  1.51s/it]


Loss: 0.073 | Epoch: 1/2 | 346/318


Epoch 2/2:   9%|▉         | 29/318 [00:43<07:19,  1.52s/it]


Loss: 0.394 | Epoch: 1/2 | 347/318


Epoch 2/2:   9%|▉         | 30/318 [00:44<07:19,  1.53s/it]


Loss: 0.726 | Epoch: 1/2 | 348/318


Epoch 2/2:  10%|▉         | 31/318 [00:46<07:19,  1.53s/it]


Loss: 0.163 | Epoch: 1/2 | 349/318


Epoch 2/2:  10%|█         | 32/318 [00:47<07:04,  1.48s/it]


Loss: 0.81 | Epoch: 1/2 | 350/318


Epoch 2/2:  10%|█         | 33/318 [00:49<07:07,  1.50s/it]


Loss: 0.296 | Epoch: 1/2 | 351/318


Epoch 2/2:  11%|█         | 34/318 [00:50<07:08,  1.51s/it]


Loss: 0.413 | Epoch: 1/2 | 352/318


Epoch 2/2:  11%|█         | 35/318 [00:52<06:55,  1.47s/it]


Loss: 0.602 | Epoch: 1/2 | 353/318


Epoch 2/2:  11%|█▏        | 36/318 [00:53<07:00,  1.49s/it]


Loss: 0.497 | Epoch: 1/2 | 354/318


Epoch 2/2:  12%|█▏        | 37/318 [00:55<07:03,  1.51s/it]


Loss: 0.896 | Epoch: 1/2 | 355/318


Epoch 2/2:  12%|█▏        | 38/318 [00:56<07:04,  1.51s/it]


Loss: 0.231 | Epoch: 1/2 | 356/318


Epoch 2/2:  12%|█▏        | 39/318 [00:58<07:04,  1.52s/it]


Loss: 0.122 | Epoch: 1/2 | 357/318


Epoch 2/2:  13%|█▎        | 40/318 [00:59<07:07,  1.54s/it]


Loss: 0.206 | Epoch: 1/2 | 358/318


Epoch 2/2:  13%|█▎        | 41/318 [01:01<06:52,  1.49s/it]


Loss: 0.232 | Epoch: 1/2 | 359/318


Epoch 2/2:  13%|█▎        | 42/318 [01:02<06:55,  1.51s/it]


Loss: 1.247 | Epoch: 1/2 | 360/318


Epoch 2/2:  14%|█▎        | 43/318 [01:04<06:59,  1.53s/it]


Loss: 1.086 | Epoch: 1/2 | 361/318


Epoch 2/2:  14%|█▍        | 44/318 [01:06<07:02,  1.54s/it]


Loss: 0.554 | Epoch: 1/2 | 362/318


Epoch 2/2:  14%|█▍        | 45/318 [01:07<07:04,  1.55s/it]


Loss: 0.115 | Epoch: 1/2 | 363/318


Epoch 2/2:  14%|█▍        | 46/318 [01:09<07:08,  1.58s/it]


Loss: 2.038 | Epoch: 1/2 | 364/318


Epoch 2/2:  15%|█▍        | 47/318 [01:10<07:04,  1.57s/it]


Loss: 0.237 | Epoch: 1/2 | 365/318


Epoch 2/2:  15%|█▌        | 48/318 [01:12<07:00,  1.56s/it]


Loss: 1.244 | Epoch: 1/2 | 366/318


Epoch 2/2:  15%|█▌        | 49/318 [01:13<06:44,  1.50s/it]


Loss: 0.438 | Epoch: 1/2 | 367/318


Epoch 2/2:  16%|█▌        | 50/318 [01:15<06:49,  1.53s/it]


Loss: 1.393 | Epoch: 1/2 | 368/318


Epoch 2/2:  16%|█▌        | 51/318 [01:16<06:48,  1.53s/it]


Loss: 0.027 | Epoch: 1/2 | 369/318


Epoch 2/2:  16%|█▋        | 52/318 [01:18<06:34,  1.48s/it]


Loss: 0.075 | Epoch: 1/2 | 370/318


Epoch 2/2:  17%|█▋        | 53/318 [01:19<06:40,  1.51s/it]


Loss: 0.158 | Epoch: 1/2 | 371/318


Epoch 2/2:  17%|█▋        | 54/318 [01:21<06:43,  1.53s/it]


Loss: 1.193 | Epoch: 1/2 | 372/318


Epoch 2/2:  17%|█▋        | 55/318 [01:22<06:32,  1.49s/it]


Loss: 0.174 | Epoch: 1/2 | 373/318


Epoch 2/2:  18%|█▊        | 56/318 [01:24<06:35,  1.51s/it]


Loss: 0.482 | Epoch: 1/2 | 374/318


Epoch 2/2:  18%|█▊        | 57/318 [01:25<06:35,  1.52s/it]


Loss: 0.043 | Epoch: 1/2 | 375/318


Epoch 2/2:  18%|█▊        | 58/318 [01:27<06:21,  1.47s/it]


Loss: 0.975 | Epoch: 1/2 | 376/318


Epoch 2/2:  19%|█▊        | 59/318 [01:28<06:14,  1.45s/it]


Loss: 0.431 | Epoch: 1/2 | 377/318


Epoch 2/2:  19%|█▉        | 60/318 [01:30<06:23,  1.49s/it]


Loss: 0.762 | Epoch: 1/2 | 378/318


Epoch 2/2:  19%|█▉        | 61/318 [01:31<06:13,  1.45s/it]


Loss: 0.242 | Epoch: 1/2 | 379/318


Epoch 2/2:  19%|█▉        | 62/318 [01:33<06:18,  1.48s/it]


Loss: 0.681 | Epoch: 1/2 | 380/318


Epoch 2/2:  20%|█▉        | 63/318 [01:34<06:08,  1.45s/it]


Loss: 0.982 | Epoch: 1/2 | 381/318


Epoch 2/2:  20%|██        | 64/318 [01:36<06:16,  1.48s/it]


Loss: 0.971 | Epoch: 1/2 | 382/318


Epoch 2/2:  20%|██        | 65/318 [01:37<06:21,  1.51s/it]


Loss: 0.375 | Epoch: 1/2 | 383/318


Epoch 2/2:  21%|██        | 66/318 [01:39<06:24,  1.53s/it]


Loss: 0.088 | Epoch: 1/2 | 384/318


Epoch 2/2:  21%|██        | 67/318 [01:40<06:25,  1.54s/it]


Loss: 0.127 | Epoch: 1/2 | 385/318


Epoch 2/2:  21%|██▏       | 68/318 [01:42<06:25,  1.54s/it]


Loss: 0.852 | Epoch: 1/2 | 386/318


Epoch 2/2:  22%|██▏       | 69/318 [01:43<06:23,  1.54s/it]


Loss: 0.081 | Epoch: 1/2 | 387/318


Epoch 2/2:  22%|██▏       | 70/318 [01:45<06:24,  1.55s/it]


Loss: 0.459 | Epoch: 1/2 | 388/318


Epoch 2/2:  22%|██▏       | 71/318 [01:46<06:19,  1.54s/it]


Loss: 1.082 | Epoch: 1/2 | 389/318


Epoch 2/2:  23%|██▎       | 72/318 [01:48<06:21,  1.55s/it]


Loss: 0.101 | Epoch: 1/2 | 390/318


Epoch 2/2:  23%|██▎       | 73/318 [01:49<06:09,  1.51s/it]


Loss: 0.361 | Epoch: 1/2 | 391/318


Epoch 2/2:  23%|██▎       | 74/318 [01:51<06:09,  1.52s/it]


Loss: 0.057 | Epoch: 1/2 | 392/318


Epoch 2/2:  24%|██▎       | 75/318 [01:53<06:12,  1.53s/it]


Loss: 1.62 | Epoch: 1/2 | 393/318


Epoch 2/2:  24%|██▍       | 76/318 [01:54<06:14,  1.55s/it]


Loss: 1.6 | Epoch: 1/2 | 394/318


Epoch 2/2:  24%|██▍       | 77/318 [01:56<06:12,  1.55s/it]


Loss: 0.17 | Epoch: 1/2 | 395/318


Epoch 2/2:  25%|██▍       | 78/318 [01:57<06:07,  1.53s/it]


Loss: 0.13 | Epoch: 1/2 | 396/318


Epoch 2/2:  25%|██▍       | 79/318 [01:59<06:05,  1.53s/it]


Loss: 0.218 | Epoch: 1/2 | 397/318


Epoch 2/2:  25%|██▌       | 80/318 [02:00<06:07,  1.54s/it]


Loss: 0.831 | Epoch: 1/2 | 398/318


Epoch 2/2:  25%|██▌       | 81/318 [02:02<06:08,  1.55s/it]


Loss: 2.0 | Epoch: 1/2 | 399/318


Epoch 2/2:  26%|██▌       | 82/318 [02:03<06:03,  1.54s/it]


Loss: 0.576 | Epoch: 1/2 | 400/318


Epoch 2/2:  26%|██▌       | 83/318 [02:05<05:50,  1.49s/it]


Loss: 0.184 | Epoch: 1/2 | 401/318


Epoch 2/2:  26%|██▋       | 84/318 [02:06<05:54,  1.51s/it]


Loss: 0.5 | Epoch: 1/2 | 402/318


Epoch 2/2:  27%|██▋       | 85/318 [02:08<05:55,  1.53s/it]


Loss: 0.962 | Epoch: 1/2 | 403/318


Epoch 2/2:  27%|██▋       | 86/318 [02:09<05:55,  1.53s/it]


Loss: 0.852 | Epoch: 1/2 | 404/318


Epoch 2/2:  27%|██▋       | 87/318 [02:11<05:54,  1.53s/it]


Loss: 0.482 | Epoch: 1/2 | 405/318


Epoch 2/2:  28%|██▊       | 88/318 [02:12<05:44,  1.50s/it]


Loss: 0.172 | Epoch: 1/2 | 406/318


Epoch 2/2:  28%|██▊       | 89/318 [02:14<05:49,  1.53s/it]


Loss: 0.17 | Epoch: 1/2 | 407/318


Epoch 2/2:  28%|██▊       | 90/318 [02:15<05:49,  1.53s/it]


Loss: 0.075 | Epoch: 1/2 | 408/318


Epoch 2/2:  29%|██▊       | 91/318 [02:17<05:37,  1.49s/it]


Loss: 0.138 | Epoch: 1/2 | 409/318


Epoch 2/2:  29%|██▉       | 92/318 [02:18<05:41,  1.51s/it]


Loss: 0.281 | Epoch: 1/2 | 410/318


Epoch 2/2:  29%|██▉       | 93/318 [02:20<05:41,  1.52s/it]


Loss: 0.04 | Epoch: 1/2 | 411/318


Epoch 2/2:  30%|██▉       | 94/318 [02:21<05:43,  1.53s/it]


Loss: 0.423 | Epoch: 1/2 | 412/318


Epoch 2/2:  30%|██▉       | 95/318 [02:23<05:42,  1.53s/it]


Loss: 1.192 | Epoch: 1/2 | 413/318


Epoch 2/2:  30%|███       | 96/318 [02:24<05:29,  1.49s/it]


Loss: 1.172 | Epoch: 1/2 | 414/318


Epoch 2/2:  31%|███       | 97/318 [02:26<05:32,  1.50s/it]


Loss: 0.046 | Epoch: 1/2 | 415/318


Epoch 2/2:  31%|███       | 98/318 [02:27<05:32,  1.51s/it]


Loss: 0.455 | Epoch: 1/2 | 416/318


Epoch 2/2:  31%|███       | 99/318 [02:29<05:35,  1.53s/it]


Loss: 0.153 | Epoch: 1/2 | 417/318


Epoch 2/2:  31%|███▏      | 100/318 [02:31<05:33,  1.53s/it]


Loss: 0.316 | Epoch: 1/2 | 418/318


Epoch 2/2:  32%|███▏      | 101/318 [02:32<05:32,  1.53s/it]


Loss: 0.304 | Epoch: 1/2 | 419/318


Epoch 2/2:  32%|███▏      | 102/318 [02:33<05:20,  1.48s/it]


Loss: 0.079 | Epoch: 1/2 | 420/318


Epoch 2/2:  32%|███▏      | 103/318 [02:35<05:00,  1.40s/it]


Loss: 0.258 | Epoch: 1/2 | 421/318


Epoch 2/2:  33%|███▎      | 104/318 [02:36<04:59,  1.40s/it]


Loss: 1.084 | Epoch: 1/2 | 422/318


Epoch 2/2:  33%|███▎      | 105/318 [02:38<05:10,  1.46s/it]


Loss: 0.656 | Epoch: 1/2 | 423/318


Epoch 2/2:  33%|███▎      | 106/318 [02:39<05:03,  1.43s/it]


Loss: 0.541 | Epoch: 1/2 | 424/318


Epoch 2/2:  34%|███▎      | 107/318 [02:41<05:04,  1.44s/it]


Loss: 0.135 | Epoch: 1/2 | 425/318


Epoch 2/2:  34%|███▍      | 108/318 [02:42<05:10,  1.48s/it]


Loss: 1.138 | Epoch: 1/2 | 426/318


Epoch 2/2:  34%|███▍      | 109/318 [02:44<05:12,  1.49s/it]


Loss: 0.073 | Epoch: 1/2 | 427/318


Epoch 2/2:  35%|███▍      | 110/318 [02:45<05:13,  1.51s/it]


Loss: 0.368 | Epoch: 1/2 | 428/318


Epoch 2/2:  35%|███▍      | 111/318 [02:47<05:16,  1.53s/it]


Loss: 0.319 | Epoch: 1/2 | 429/318


Epoch 2/2:  35%|███▌      | 112/318 [02:48<05:15,  1.53s/it]


Loss: 0.684 | Epoch: 1/2 | 430/318


Epoch 2/2:  36%|███▌      | 113/318 [02:50<05:16,  1.55s/it]


Loss: 0.022 | Epoch: 1/2 | 431/318


Epoch 2/2:  36%|███▌      | 114/318 [02:51<05:13,  1.53s/it]


Loss: 0.167 | Epoch: 1/2 | 432/318


Epoch 2/2:  36%|███▌      | 115/318 [02:53<05:10,  1.53s/it]


Loss: 0.06 | Epoch: 1/2 | 433/318


Epoch 2/2:  36%|███▋      | 116/318 [02:54<05:11,  1.54s/it]


Loss: 0.028 | Epoch: 1/2 | 434/318


Epoch 2/2:  37%|███▋      | 117/318 [02:56<05:12,  1.55s/it]


Loss: 2.798 | Epoch: 1/2 | 435/318


Epoch 2/2:  37%|███▋      | 118/318 [02:58<05:10,  1.55s/it]


Loss: 0.292 | Epoch: 1/2 | 436/318


Epoch 2/2:  37%|███▋      | 119/318 [02:59<05:09,  1.55s/it]


Loss: 0.163 | Epoch: 1/2 | 437/318


Epoch 2/2:  38%|███▊      | 120/318 [03:01<05:07,  1.55s/it]


Loss: 0.06 | Epoch: 1/2 | 438/318


Epoch 2/2:  38%|███▊      | 121/318 [03:02<05:04,  1.55s/it]


Loss: 0.343 | Epoch: 1/2 | 439/318


Epoch 2/2:  38%|███▊      | 122/318 [03:04<05:03,  1.55s/it]


Loss: 1.427 | Epoch: 1/2 | 440/318


Epoch 2/2:  39%|███▊      | 123/318 [03:05<05:03,  1.56s/it]


Loss: 0.019 | Epoch: 1/2 | 441/318


Epoch 2/2:  39%|███▉      | 124/318 [03:07<05:00,  1.55s/it]


Loss: 0.874 | Epoch: 1/2 | 442/318


Epoch 2/2:  39%|███▉      | 125/318 [03:08<04:58,  1.55s/it]


Loss: 1.385 | Epoch: 1/2 | 443/318


Epoch 2/2:  40%|███▉      | 126/318 [03:10<04:56,  1.54s/it]


Loss: 0.187 | Epoch: 1/2 | 444/318


Epoch 2/2:  40%|███▉      | 127/318 [03:12<04:59,  1.57s/it]


Loss: 0.023 | Epoch: 1/2 | 445/318


Epoch 2/2:  40%|████      | 128/318 [03:13<04:55,  1.56s/it]


Loss: 0.346 | Epoch: 1/2 | 446/318


Epoch 2/2:  41%|████      | 129/318 [03:15<04:53,  1.55s/it]


Loss: 0.487 | Epoch: 1/2 | 447/318


Epoch 2/2:  41%|████      | 130/318 [03:16<04:53,  1.56s/it]


Loss: 0.548 | Epoch: 1/2 | 448/318


Epoch 2/2:  41%|████      | 131/318 [03:18<04:52,  1.56s/it]


Loss: 0.023 | Epoch: 1/2 | 449/318


Epoch 2/2:  42%|████▏     | 132/318 [03:19<04:48,  1.55s/it]


Loss: 0.471 | Epoch: 1/2 | 450/318


Epoch 2/2:  42%|████▏     | 133/318 [03:21<04:46,  1.55s/it]


Loss: 0.064 | Epoch: 1/2 | 451/318


Epoch 2/2:  42%|████▏     | 134/318 [03:22<04:46,  1.56s/it]


Loss: 0.369 | Epoch: 1/2 | 452/318


Epoch 2/2:  42%|████▏     | 135/318 [03:24<04:36,  1.51s/it]


Loss: 0.502 | Epoch: 1/2 | 453/318


Epoch 2/2:  43%|████▎     | 136/318 [03:25<04:36,  1.52s/it]


Loss: 0.231 | Epoch: 1/2 | 454/318


Epoch 2/2:  43%|████▎     | 137/318 [03:27<04:34,  1.52s/it]


Loss: 0.349 | Epoch: 1/2 | 455/318


Epoch 2/2:  43%|████▎     | 138/318 [03:28<04:33,  1.52s/it]


Loss: 0.028 | Epoch: 1/2 | 456/318


Epoch 2/2:  44%|████▎     | 139/318 [03:30<04:33,  1.53s/it]


Loss: 0.718 | Epoch: 1/2 | 457/318


Epoch 2/2:  44%|████▍     | 140/318 [03:32<04:34,  1.54s/it]


Loss: 0.088 | Epoch: 1/2 | 458/318


Epoch 2/2:  44%|████▍     | 141/318 [03:33<04:32,  1.54s/it]


Loss: 0.828 | Epoch: 1/2 | 459/318


Epoch 2/2:  45%|████▍     | 142/318 [03:35<04:33,  1.56s/it]


Loss: 0.477 | Epoch: 1/2 | 460/318


Epoch 2/2:  45%|████▍     | 143/318 [03:36<04:31,  1.55s/it]


Loss: 0.952 | Epoch: 1/2 | 461/318


Epoch 2/2:  45%|████▌     | 144/318 [03:38<04:30,  1.56s/it]


Loss: 0.251 | Epoch: 1/2 | 462/318


Epoch 2/2:  46%|████▌     | 145/318 [03:39<04:30,  1.56s/it]


Loss: 1.076 | Epoch: 1/2 | 463/318


Epoch 2/2:  46%|████▌     | 146/318 [03:41<04:28,  1.56s/it]


Loss: 0.785 | Epoch: 1/2 | 464/318


Epoch 2/2:  46%|████▌     | 147/318 [03:42<04:27,  1.57s/it]


Loss: 0.747 | Epoch: 1/2 | 465/318


Epoch 2/2:  47%|████▋     | 148/318 [03:44<04:27,  1.57s/it]


Loss: 0.065 | Epoch: 1/2 | 466/318


Epoch 2/2:  47%|████▋     | 149/318 [03:46<04:24,  1.56s/it]


Loss: 0.483 | Epoch: 1/2 | 467/318


Epoch 2/2:  47%|████▋     | 150/318 [03:47<04:21,  1.55s/it]


Loss: 0.173 | Epoch: 1/2 | 468/318


Epoch 2/2:  47%|████▋     | 151/318 [03:49<04:20,  1.56s/it]


Loss: 0.839 | Epoch: 1/2 | 469/318


Epoch 2/2:  48%|████▊     | 152/318 [03:50<04:18,  1.56s/it]


Loss: 1.034 | Epoch: 1/2 | 470/318


Epoch 2/2:  48%|████▊     | 153/318 [03:52<04:16,  1.55s/it]


Loss: 0.099 | Epoch: 1/2 | 471/318


Epoch 2/2:  48%|████▊     | 154/318 [03:53<04:15,  1.55s/it]


Loss: 0.498 | Epoch: 1/2 | 472/318


Epoch 2/2:  49%|████▊     | 155/318 [03:55<04:14,  1.56s/it]


Loss: 0.432 | Epoch: 1/2 | 473/318


Epoch 2/2:  49%|████▉     | 156/318 [03:57<04:13,  1.56s/it]


Loss: 0.921 | Epoch: 1/2 | 474/318


Epoch 2/2:  49%|████▉     | 157/318 [03:58<04:10,  1.55s/it]


Loss: 0.079 | Epoch: 1/2 | 475/318


Epoch 2/2:  50%|████▉     | 158/318 [04:00<04:07,  1.55s/it]


Loss: 1.257 | Epoch: 1/2 | 476/318


Epoch 2/2:  50%|█████     | 159/318 [04:01<04:06,  1.55s/it]


Loss: 0.934 | Epoch: 1/2 | 477/318


Epoch 2/2:  50%|█████     | 160/318 [04:03<04:04,  1.54s/it]


Loss: 0.174 | Epoch: 1/2 | 478/318


Epoch 2/2:  51%|█████     | 161/318 [04:04<04:03,  1.55s/it]


Loss: 1.174 | Epoch: 1/2 | 479/318


Epoch 2/2:  51%|█████     | 162/318 [04:06<04:03,  1.56s/it]


Loss: 0.11 | Epoch: 1/2 | 480/318


Epoch 2/2:  51%|█████▏    | 163/318 [04:07<04:00,  1.55s/it]


Loss: 0.456 | Epoch: 1/2 | 481/318


Epoch 2/2:  52%|█████▏    | 164/318 [04:09<04:00,  1.56s/it]


Loss: 1.286 | Epoch: 1/2 | 482/318


Epoch 2/2:  52%|█████▏    | 165/318 [04:10<03:50,  1.50s/it]


Loss: 0.897 | Epoch: 1/2 | 483/318


Epoch 2/2:  52%|█████▏    | 166/318 [04:12<03:42,  1.47s/it]


Loss: 0.61 | Epoch: 1/2 | 484/318


Epoch 2/2:  53%|█████▎    | 167/318 [04:13<03:44,  1.49s/it]


Loss: 0.794 | Epoch: 1/2 | 485/318


Epoch 2/2:  53%|█████▎    | 168/318 [04:15<03:47,  1.52s/it]


Loss: 0.221 | Epoch: 1/2 | 486/318


Epoch 2/2:  53%|█████▎    | 169/318 [04:16<03:48,  1.53s/it]


Loss: 0.523 | Epoch: 1/2 | 487/318


Epoch 2/2:  53%|█████▎    | 170/318 [04:18<03:39,  1.49s/it]


Loss: 0.191 | Epoch: 1/2 | 488/318


Epoch 2/2:  54%|█████▍    | 171/318 [04:19<03:40,  1.50s/it]


Loss: 0.285 | Epoch: 1/2 | 489/318


Epoch 2/2:  54%|█████▍    | 172/318 [04:21<03:41,  1.51s/it]


Loss: 0.628 | Epoch: 1/2 | 490/318


Epoch 2/2:  54%|█████▍    | 173/318 [04:22<03:33,  1.47s/it]


Loss: 0.813 | Epoch: 1/2 | 491/318


Epoch 2/2:  55%|█████▍    | 174/318 [04:24<03:27,  1.44s/it]


Loss: 0.422 | Epoch: 1/2 | 492/318


Epoch 2/2:  55%|█████▌    | 175/318 [04:25<03:28,  1.46s/it]


Loss: 0.355 | Epoch: 1/2 | 493/318


Epoch 2/2:  55%|█████▌    | 176/318 [04:27<03:30,  1.48s/it]


Loss: 0.755 | Epoch: 1/2 | 494/318


Epoch 2/2:  56%|█████▌    | 177/318 [04:28<03:25,  1.45s/it]


Loss: 0.197 | Epoch: 1/2 | 495/318


Epoch 2/2:  56%|█████▌    | 178/318 [04:29<03:20,  1.43s/it]


Loss: 0.32 | Epoch: 1/2 | 496/318


Epoch 2/2:  56%|█████▋    | 179/318 [04:31<03:23,  1.47s/it]


Loss: 0.086 | Epoch: 1/2 | 497/318


Epoch 2/2:  57%|█████▋    | 180/318 [04:32<03:25,  1.49s/it]


Loss: 0.021 | Epoch: 1/2 | 498/318


Epoch 2/2:  57%|█████▋    | 181/318 [04:34<03:27,  1.51s/it]


Loss: 1.187 | Epoch: 1/2 | 499/318


Epoch 2/2:  57%|█████▋    | 182/318 [04:36<03:24,  1.50s/it]


Loss: 0.96 | Epoch: 1/2 | 500/318


Epoch 2/2:  58%|█████▊    | 183/318 [04:37<03:24,  1.51s/it]


Loss: 0.944 | Epoch: 1/2 | 501/318


Epoch 2/2:  58%|█████▊    | 184/318 [04:39<03:23,  1.52s/it]


Loss: 0.444 | Epoch: 1/2 | 502/318


Epoch 2/2:  58%|█████▊    | 185/318 [04:40<03:17,  1.48s/it]


Loss: 0.046 | Epoch: 1/2 | 503/318


Epoch 2/2:  58%|█████▊    | 186/318 [04:41<03:16,  1.49s/it]


Loss: 0.053 | Epoch: 1/2 | 504/318


Epoch 2/2:  59%|█████▉    | 187/318 [04:43<03:16,  1.50s/it]


Loss: 0.103 | Epoch: 1/2 | 505/318


Epoch 2/2:  59%|█████▉    | 188/318 [04:44<03:11,  1.47s/it]


Loss: 0.339 | Epoch: 1/2 | 506/318


Epoch 2/2:  59%|█████▉    | 189/318 [04:46<03:13,  1.50s/it]


Loss: 0.632 | Epoch: 1/2 | 507/318


Epoch 2/2:  60%|█████▉    | 190/318 [04:48<03:15,  1.53s/it]


Loss: 0.208 | Epoch: 1/2 | 508/318


Epoch 2/2:  60%|██████    | 191/318 [04:49<03:14,  1.53s/it]


Loss: 0.702 | Epoch: 1/2 | 509/318


Epoch 2/2:  60%|██████    | 192/318 [04:51<03:13,  1.53s/it]


Loss: 0.884 | Epoch: 1/2 | 510/318


Epoch 2/2:  61%|██████    | 193/318 [04:52<03:10,  1.53s/it]


Loss: 0.381 | Epoch: 1/2 | 511/318


Epoch 2/2:  61%|██████    | 194/318 [04:54<03:09,  1.53s/it]


Loss: 0.477 | Epoch: 1/2 | 512/318


Epoch 2/2:  61%|██████▏   | 195/318 [04:55<03:08,  1.53s/it]


Loss: 0.354 | Epoch: 1/2 | 513/318


Epoch 2/2:  62%|██████▏   | 196/318 [04:57<03:05,  1.52s/it]


Loss: 0.92 | Epoch: 1/2 | 514/318


Epoch 2/2:  62%|██████▏   | 197/318 [04:58<03:03,  1.52s/it]


Loss: 0.287 | Epoch: 1/2 | 515/318


Epoch 2/2:  62%|██████▏   | 198/318 [05:00<03:02,  1.52s/it]


Loss: 0.389 | Epoch: 1/2 | 516/318


Epoch 2/2:  63%|██████▎   | 199/318 [05:01<03:01,  1.53s/it]


Loss: 0.263 | Epoch: 1/2 | 517/318


Epoch 2/2:  63%|██████▎   | 200/318 [05:03<03:00,  1.53s/it]


Loss: 0.31 | Epoch: 1/2 | 518/318


Epoch 2/2:  63%|██████▎   | 201/318 [05:04<03:00,  1.54s/it]


Loss: 0.118 | Epoch: 1/2 | 519/318


Epoch 2/2:  64%|██████▎   | 202/318 [05:06<02:59,  1.55s/it]


Loss: 0.409 | Epoch: 1/2 | 520/318


Epoch 2/2:  64%|██████▍   | 203/318 [05:08<02:57,  1.54s/it]


Loss: 0.415 | Epoch: 1/2 | 521/318


Epoch 2/2:  64%|██████▍   | 204/318 [05:09<02:55,  1.54s/it]


Loss: 0.11 | Epoch: 1/2 | 522/318


Epoch 2/2:  64%|██████▍   | 205/318 [05:11<02:55,  1.55s/it]


Loss: 0.901 | Epoch: 1/2 | 523/318


Epoch 2/2:  65%|██████▍   | 206/318 [05:12<02:53,  1.55s/it]


Loss: 1.268 | Epoch: 1/2 | 524/318


Epoch 2/2:  65%|██████▌   | 207/318 [05:14<02:51,  1.54s/it]


Loss: 0.277 | Epoch: 1/2 | 525/318


Epoch 2/2:  65%|██████▌   | 208/318 [05:15<02:49,  1.54s/it]


Loss: 0.275 | Epoch: 1/2 | 526/318


Epoch 2/2:  66%|██████▌   | 209/318 [05:17<02:48,  1.54s/it]


Loss: 0.853 | Epoch: 1/2 | 527/318


Epoch 2/2:  66%|██████▌   | 210/318 [05:18<02:46,  1.54s/it]


Loss: 0.457 | Epoch: 1/2 | 528/318


Epoch 2/2:  66%|██████▋   | 211/318 [05:20<02:45,  1.55s/it]


Loss: 0.62 | Epoch: 1/2 | 529/318


Epoch 2/2:  67%|██████▋   | 212/318 [05:21<02:44,  1.55s/it]


Loss: 0.41 | Epoch: 1/2 | 530/318


Epoch 2/2:  67%|██████▋   | 213/318 [05:23<02:42,  1.55s/it]


Loss: 0.426 | Epoch: 1/2 | 531/318


Epoch 2/2:  67%|██████▋   | 214/318 [05:25<02:42,  1.56s/it]


Loss: 0.178 | Epoch: 1/2 | 532/318


Epoch 2/2:  68%|██████▊   | 215/318 [05:26<02:39,  1.55s/it]


Loss: 0.185 | Epoch: 1/2 | 533/318


Epoch 2/2:  68%|██████▊   | 216/318 [05:28<02:38,  1.56s/it]


Loss: 0.163 | Epoch: 1/2 | 534/318


Epoch 2/2:  68%|██████▊   | 217/318 [05:29<02:37,  1.56s/it]


Loss: 0.332 | Epoch: 1/2 | 535/318


Epoch 2/2:  69%|██████▊   | 218/318 [05:31<02:30,  1.51s/it]


Loss: 1.559 | Epoch: 1/2 | 536/318


Epoch 2/2:  69%|██████▉   | 219/318 [05:32<02:31,  1.53s/it]


Loss: 0.627 | Epoch: 1/2 | 537/318


Epoch 2/2:  69%|██████▉   | 220/318 [05:34<02:25,  1.48s/it]


Loss: 0.088 | Epoch: 1/2 | 538/318


Epoch 2/2:  69%|██████▉   | 221/318 [05:35<02:24,  1.49s/it]


Loss: 0.584 | Epoch: 1/2 | 539/318


Epoch 2/2:  70%|██████▉   | 222/318 [05:37<02:24,  1.51s/it]


Loss: 1.343 | Epoch: 1/2 | 540/318


Epoch 2/2:  70%|███████   | 223/318 [05:38<02:23,  1.51s/it]


Loss: 0.05 | Epoch: 1/2 | 541/318


Epoch 2/2:  70%|███████   | 224/318 [05:40<02:23,  1.53s/it]


Loss: 0.682 | Epoch: 1/2 | 542/318


Epoch 2/2:  71%|███████   | 225/318 [05:41<02:22,  1.53s/it]


Loss: 0.277 | Epoch: 1/2 | 543/318


Epoch 2/2:  71%|███████   | 226/318 [05:43<02:21,  1.53s/it]


Loss: 0.063 | Epoch: 1/2 | 544/318


Epoch 2/2:  71%|███████▏  | 227/318 [05:44<02:19,  1.54s/it]


Loss: 0.17 | Epoch: 1/2 | 545/318


Epoch 2/2:  72%|███████▏  | 228/318 [05:46<02:18,  1.53s/it]


Loss: 0.385 | Epoch: 1/2 | 546/318


Epoch 2/2:  72%|███████▏  | 229/318 [05:47<02:17,  1.54s/it]


Loss: 2.234 | Epoch: 1/2 | 547/318


Epoch 2/2:  72%|███████▏  | 230/318 [05:49<02:15,  1.54s/it]


Loss: 0.599 | Epoch: 1/2 | 548/318


Epoch 2/2:  73%|███████▎  | 231/318 [05:50<02:10,  1.49s/it]


Loss: 0.03 | Epoch: 1/2 | 549/318


Epoch 2/2:  73%|███████▎  | 232/318 [05:52<02:09,  1.51s/it]


Loss: 0.207 | Epoch: 1/2 | 550/318


Epoch 2/2:  73%|███████▎  | 233/318 [05:53<02:07,  1.50s/it]


Loss: 0.151 | Epoch: 1/2 | 551/318


Epoch 2/2:  74%|███████▎  | 234/318 [05:55<02:07,  1.51s/it]


Loss: 0.434 | Epoch: 1/2 | 552/318


Epoch 2/2:  74%|███████▍  | 235/318 [05:56<02:06,  1.52s/it]


Loss: 0.85 | Epoch: 1/2 | 553/318


Epoch 2/2:  74%|███████▍  | 236/318 [05:58<02:05,  1.54s/it]


Loss: 0.046 | Epoch: 1/2 | 554/318


Epoch 2/2:  75%|███████▍  | 237/318 [06:00<02:04,  1.54s/it]


Loss: 0.912 | Epoch: 1/2 | 555/318


Epoch 2/2:  75%|███████▍  | 238/318 [06:01<02:03,  1.54s/it]


Loss: 1.823 | Epoch: 1/2 | 556/318


Epoch 2/2:  75%|███████▌  | 239/318 [06:03<01:58,  1.49s/it]


Loss: 0.791 | Epoch: 1/2 | 557/318


Epoch 2/2:  75%|███████▌  | 240/318 [06:04<01:58,  1.52s/it]


Loss: 1.221 | Epoch: 1/2 | 558/318


Epoch 2/2:  76%|███████▌  | 241/318 [06:06<01:58,  1.54s/it]


Loss: 0.877 | Epoch: 1/2 | 559/318


Epoch 2/2:  76%|███████▌  | 242/318 [06:07<01:57,  1.55s/it]


Loss: 0.655 | Epoch: 1/2 | 560/318


Epoch 2/2:  76%|███████▋  | 243/318 [06:09<01:56,  1.56s/it]


Loss: 0.029 | Epoch: 1/2 | 561/318


Epoch 2/2:  77%|███████▋  | 244/318 [06:10<01:55,  1.56s/it]


Loss: 0.665 | Epoch: 1/2 | 562/318


Epoch 2/2:  77%|███████▋  | 245/318 [06:12<01:52,  1.55s/it]


Loss: 0.135 | Epoch: 1/2 | 563/318


Epoch 2/2:  77%|███████▋  | 246/318 [06:13<01:50,  1.54s/it]


Loss: 0.094 | Epoch: 1/2 | 564/318


Epoch 2/2:  78%|███████▊  | 247/318 [06:15<01:50,  1.56s/it]


Loss: 0.715 | Epoch: 1/2 | 565/318


Epoch 2/2:  78%|███████▊  | 248/318 [06:17<01:48,  1.55s/it]


Loss: 0.176 | Epoch: 1/2 | 566/318


Epoch 2/2:  78%|███████▊  | 249/318 [06:18<01:47,  1.56s/it]


Loss: 0.691 | Epoch: 1/2 | 567/318


Epoch 2/2:  79%|███████▊  | 250/318 [06:20<01:44,  1.54s/it]


Loss: 0.168 | Epoch: 1/2 | 568/318


Epoch 2/2:  79%|███████▉  | 251/318 [06:21<01:43,  1.54s/it]


Loss: 0.204 | Epoch: 1/2 | 569/318


Epoch 2/2:  79%|███████▉  | 252/318 [06:23<01:41,  1.54s/it]


Loss: 0.719 | Epoch: 1/2 | 570/318


Epoch 2/2:  80%|███████▉  | 253/318 [06:24<01:39,  1.54s/it]


Loss: 0.7 | Epoch: 1/2 | 571/318


Epoch 2/2:  80%|███████▉  | 254/318 [06:26<01:39,  1.56s/it]


Loss: 0.719 | Epoch: 1/2 | 572/318


Epoch 2/2:  80%|████████  | 255/318 [06:27<01:37,  1.55s/it]


Loss: 0.491 | Epoch: 1/2 | 573/318


Epoch 2/2:  81%|████████  | 256/318 [06:29<01:35,  1.55s/it]


Loss: 0.534 | Epoch: 1/2 | 574/318


Epoch 2/2:  81%|████████  | 257/318 [06:30<01:31,  1.51s/it]


Loss: 0.949 | Epoch: 1/2 | 575/318


Epoch 2/2:  81%|████████  | 258/318 [06:32<01:30,  1.51s/it]


Loss: 0.269 | Epoch: 1/2 | 576/318


Epoch 2/2:  81%|████████▏ | 259/318 [06:33<01:26,  1.47s/it]


Loss: 0.569 | Epoch: 1/2 | 577/318


Epoch 2/2:  82%|████████▏ | 260/318 [06:35<01:27,  1.50s/it]


Loss: 0.644 | Epoch: 1/2 | 578/318


Epoch 2/2:  82%|████████▏ | 261/318 [06:36<01:26,  1.51s/it]


Loss: 0.061 | Epoch: 1/2 | 579/318


Epoch 2/2:  82%|████████▏ | 262/318 [06:38<01:25,  1.52s/it]


Loss: 0.395 | Epoch: 1/2 | 580/318


Epoch 2/2:  83%|████████▎ | 263/318 [06:39<01:24,  1.53s/it]


Loss: 0.108 | Epoch: 1/2 | 581/318


Epoch 2/2:  83%|████████▎ | 264/318 [06:41<01:22,  1.53s/it]


Loss: 0.411 | Epoch: 1/2 | 582/318


Epoch 2/2:  83%|████████▎ | 265/318 [06:43<01:21,  1.55s/it]


Loss: 0.143 | Epoch: 1/2 | 583/318


Epoch 2/2:  84%|████████▎ | 266/318 [06:44<01:20,  1.54s/it]


Loss: 0.503 | Epoch: 1/2 | 584/318


Epoch 2/2:  84%|████████▍ | 267/318 [06:46<01:18,  1.54s/it]


Loss: 0.058 | Epoch: 1/2 | 585/318


Epoch 2/2:  84%|████████▍ | 268/318 [06:47<01:17,  1.55s/it]


Loss: 0.952 | Epoch: 1/2 | 586/318


Epoch 2/2:  85%|████████▍ | 269/318 [06:49<01:15,  1.54s/it]


Loss: 0.807 | Epoch: 1/2 | 587/318


Epoch 2/2:  85%|████████▍ | 270/318 [06:50<01:14,  1.55s/it]


Loss: 0.452 | Epoch: 1/2 | 588/318


Epoch 2/2:  85%|████████▌ | 271/318 [06:52<01:10,  1.51s/it]


Loss: 0.545 | Epoch: 1/2 | 589/318


Epoch 2/2:  86%|████████▌ | 272/318 [06:53<01:10,  1.53s/it]


Loss: 0.118 | Epoch: 1/2 | 590/318


Epoch 2/2:  86%|████████▌ | 273/318 [06:55<01:08,  1.53s/it]


Loss: 0.752 | Epoch: 1/2 | 591/318


Epoch 2/2:  86%|████████▌ | 274/318 [06:56<01:07,  1.54s/it]


Loss: 0.046 | Epoch: 1/2 | 592/318


Epoch 2/2:  86%|████████▋ | 275/318 [06:58<01:06,  1.55s/it]


Loss: 0.087 | Epoch: 1/2 | 593/318


Epoch 2/2:  87%|████████▋ | 276/318 [06:59<01:04,  1.53s/it]


Loss: 0.195 | Epoch: 1/2 | 594/318


Epoch 2/2:  87%|████████▋ | 277/318 [07:01<01:02,  1.52s/it]


Loss: 0.555 | Epoch: 1/2 | 595/318


Epoch 2/2:  87%|████████▋ | 278/318 [07:03<01:01,  1.53s/it]


Loss: 0.13 | Epoch: 1/2 | 596/318


Epoch 2/2:  88%|████████▊ | 279/318 [07:04<01:00,  1.54s/it]


Loss: 0.642 | Epoch: 1/2 | 597/318


Epoch 2/2:  88%|████████▊ | 280/318 [07:06<00:58,  1.55s/it]


Loss: 0.02 | Epoch: 1/2 | 598/318


Epoch 2/2:  88%|████████▊ | 281/318 [07:07<00:55,  1.50s/it]


Loss: 0.147 | Epoch: 1/2 | 599/318


Epoch 2/2:  89%|████████▊ | 282/318 [07:09<00:54,  1.51s/it]


Loss: 0.142 | Epoch: 1/2 | 600/318


Epoch 2/2:  89%|████████▉ | 283/318 [07:10<00:53,  1.52s/it]


Loss: 0.261 | Epoch: 1/2 | 601/318


Epoch 2/2:  89%|████████▉ | 284/318 [07:12<00:52,  1.53s/it]


Loss: 0.239 | Epoch: 1/2 | 602/318


Epoch 2/2:  90%|████████▉ | 285/318 [07:13<00:50,  1.54s/it]


Loss: 0.1 | Epoch: 1/2 | 603/318


Epoch 2/2:  90%|████████▉ | 286/318 [07:15<00:48,  1.50s/it]


Loss: 0.571 | Epoch: 1/2 | 604/318


Epoch 2/2:  90%|█████████ | 287/318 [07:16<00:46,  1.52s/it]


Loss: 0.175 | Epoch: 1/2 | 605/318


Epoch 2/2:  91%|█████████ | 288/318 [07:18<00:46,  1.54s/it]


Loss: 0.208 | Epoch: 1/2 | 606/318


Epoch 2/2:  91%|█████████ | 289/318 [07:19<00:43,  1.49s/it]


Loss: 0.473 | Epoch: 1/2 | 607/318


Epoch 2/2:  91%|█████████ | 290/318 [07:21<00:42,  1.51s/it]


Loss: 0.619 | Epoch: 1/2 | 608/318


Epoch 2/2:  92%|█████████▏| 291/318 [07:22<00:40,  1.51s/it]


Loss: 1.642 | Epoch: 1/2 | 609/318


Epoch 2/2:  92%|█████████▏| 292/318 [07:24<00:38,  1.48s/it]


Loss: 0.243 | Epoch: 1/2 | 610/318


Epoch 2/2:  92%|█████████▏| 293/318 [07:25<00:37,  1.51s/it]


Loss: 0.589 | Epoch: 1/2 | 611/318


Epoch 2/2:  92%|█████████▏| 294/318 [07:27<00:36,  1.52s/it]


Loss: 0.319 | Epoch: 1/2 | 612/318


Epoch 2/2:  93%|█████████▎| 295/318 [07:28<00:35,  1.53s/it]


Loss: 0.231 | Epoch: 1/2 | 613/318


Epoch 2/2:  93%|█████████▎| 296/318 [07:30<00:32,  1.49s/it]


Loss: 0.666 | Epoch: 1/2 | 614/318


Epoch 2/2:  93%|█████████▎| 297/318 [07:31<00:30,  1.47s/it]


Loss: 0.505 | Epoch: 1/2 | 615/318


Epoch 2/2:  94%|█████████▎| 298/318 [07:33<00:29,  1.49s/it]


Loss: 0.16 | Epoch: 1/2 | 616/318


Epoch 2/2:  94%|█████████▍| 299/318 [07:34<00:28,  1.50s/it]


Loss: 0.669 | Epoch: 1/2 | 617/318


Epoch 2/2:  94%|█████████▍| 300/318 [07:36<00:27,  1.52s/it]


Loss: 0.522 | Epoch: 1/2 | 618/318


Epoch 2/2:  95%|█████████▍| 301/318 [07:37<00:25,  1.47s/it]


Loss: 0.128 | Epoch: 1/2 | 619/318


Epoch 2/2:  95%|█████████▍| 302/318 [07:39<00:24,  1.50s/it]


Loss: 0.111 | Epoch: 1/2 | 620/318


Epoch 2/2:  95%|█████████▌| 303/318 [07:40<00:22,  1.47s/it]


Loss: 0.411 | Epoch: 1/2 | 621/318


Epoch 2/2:  96%|█████████▌| 304/318 [07:42<00:20,  1.50s/it]


Loss: 0.277 | Epoch: 1/2 | 622/318


Epoch 2/2:  96%|█████████▌| 305/318 [07:43<00:19,  1.52s/it]


Loss: 0.143 | Epoch: 1/2 | 623/318


Epoch 2/2:  96%|█████████▌| 306/318 [07:45<00:18,  1.51s/it]


Loss: 0.268 | Epoch: 1/2 | 624/318


Epoch 2/2:  97%|█████████▋| 307/318 [07:46<00:16,  1.52s/it]


Loss: 0.158 | Epoch: 1/2 | 625/318


Epoch 2/2:  97%|█████████▋| 308/318 [07:48<00:15,  1.53s/it]


Loss: 1.057 | Epoch: 1/2 | 626/318


Epoch 2/2:  97%|█████████▋| 309/318 [07:49<00:13,  1.54s/it]


Loss: 0.386 | Epoch: 1/2 | 627/318


Epoch 2/2:  97%|█████████▋| 310/318 [07:51<00:12,  1.55s/it]


Loss: 1.641 | Epoch: 1/2 | 628/318


Epoch 2/2:  98%|█████████▊| 311/318 [07:52<00:10,  1.55s/it]


Loss: 0.164 | Epoch: 1/2 | 629/318


Epoch 2/2:  98%|█████████▊| 312/318 [07:54<00:09,  1.55s/it]


Loss: 0.25 | Epoch: 1/2 | 630/318


Epoch 2/2:  98%|█████████▊| 313/318 [07:55<00:07,  1.50s/it]


Loss: 0.119 | Epoch: 1/2 | 631/318


Epoch 2/2:  99%|█████████▊| 314/318 [07:57<00:06,  1.51s/it]


Loss: 0.167 | Epoch: 1/2 | 632/318


Epoch 2/2:  99%|█████████▉| 315/318 [07:58<00:04,  1.48s/it]


Loss: 1.167 | Epoch: 1/2 | 633/318


Epoch 2/2:  99%|█████████▉| 316/318 [08:00<00:03,  1.50s/it]


Loss: 0.791 | Epoch: 1/2 | 634/318


Epoch 2/2: 100%|█████████▉| 317/318 [08:01<00:01,  1.52s/it]


Loss: 0.208 | Epoch: 1/2 | 635/318


Epoch 2/2: 100%|██████████| 318/318 [08:02<00:00,  1.52s/it]


Loss: 1.043 | Epoch: 1/2 | 636/318





In [None]:
#Save the safetensors file if you wish
SAVE_PATH = r""
model.save_pretrained(SAVE_PATH)

In [None]:
#Perform Experiment 3: VLM Fine Tuned and Independent MVQA Without Reports

model.eval()

ft_results_without_reports = {}

for i, entry in enumerate(eval_dataset):
  print(f"Question: {entry['questions']}")
  print(f"Ground Truth: {entry['answers']}")
  vlm_output = model.answer_question(model.encode_image(entry['images']), entry['questions'], tokenizer=tokenizer)
  print(f"Moondream: {vlm_output}")
  ft_results_without_reports[i] = [entry["questions"], entry["answers"], vlm_output]

Question: Where are the most infiltrates located?
Ground Truth: Left lung
Moondream: Right lung
Question: Which lung has more extensive infiltration?
Ground Truth: Left
Moondream: Right
Question: Are the borders of the heart clearly visualized?
Ground Truth: yes
Moondream: No
Question: Is this image a CT scan?
Ground Truth: no
Moondream: no
Question: Which cardiac border is more prominently visualized?
Ground Truth: Left
Moondream: Left
Question: Which side of the cardiac border is more prominent?
Ground Truth: Left
Moondream: Left
Question: Where is the cardiac border more obscured?
Ground Truth: Right
Moondream: Right side
Question: On which side is the cardiac border more obscured?
Ground Truth: Right
Moondream: Right
Question: What rib is the lesion located inferior to?
Ground Truth: 7th rib
Moondream: Bilateral
Question: At which rib is the lesion located?
Ground Truth: 7th rib
Moondream: 3rd rib
Question: What is the most important abnormality found in this image?
Ground Truth: W

In [None]:
#Perform analysis of Experiment 3
#Calculate SBERT Similarity Score, METEOR Score, and F1 Score

sbert_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
total_sim_score = 0

binary_answer_ground_truth = []
binary_answer_vlm = []
binary_answers = ["yes", "no"]

total_meteor_score = 0

for entry in ft_results_without_reports.keys():
  ground_truth = ft_results_without_reports[entry][1]
  vlm_prediction = ft_results_without_reports[entry][2]

  #Check if question has binary answer (ie a close ended question) and if so, keep track of ground truth and VLM answers
  if ground_truth.lower() in binary_answers and vlm_prediction.lower() in binary_answers:
    binary_answer_ground_truth.append(ground_truth.lower())
    binary_answer_vlm.append(vlm_prediction.lower())

  #If answer is incorrect, count similarity as zero manually becasue Sentence Bert does not always think yes is orthogonal to no
  if ground_truth.lower() == "yes" and vlm_prediction.lower() == "no" or ground_truth.lower() == "no" and vlm_prediction.lower() == "yes":
    sim_score = 0

  else:
    #SBERT Score
    ground_truth_embedding = sbert_model.encode(ground_truth)
    vlm_prediction_embedding = sbert_model.encode(vlm_prediction)
    sim_score = sbert_model.similarity(ground_truth_embedding, vlm_prediction_embedding).item() #returns Tensor so call .item()

    #METEOR Score
    meteor_score_with = meteor_score([nltk.word_tokenize(ground_truth.lower())], nltk.word_tokenize(vlm_prediction.lower()))
    total_meteor_score += meteor_score_with

  total_sim_score += sim_score

#Display Final Results for Experiment 3
avg_sim_score_with = round(total_sim_score / len(ft_results_without_reports), 2)
print(f"Average Similarity Score With FT without Reports: {avg_sim_score_with}")

avg_meteor_score_with = round(total_meteor_score / len(ft_results_without_reports), 2)
print(f"Average METEOR Score With FT without Reports: {avg_meteor_score_with}")

binary_f1_score = round(f1_score(binary_answer_ground_truth, binary_answer_vlm, average="binary", pos_label="yes"), 2)
print(f"Binary F1 Score: {binary_f1_score}")



Average Similarity Score With FT without Reports: 0.64
Average METEOR Score With FT without Reports: 0.29
Binary F1 Score: 0.64


In [None]:
#Perform Experiment 4: VLM Fine Tuned and Knowledge-Enhanced MVQA With Reports

model.eval()

ft_results_with_reports = {}

for i, entry in enumerate(eval_dataset):
  question = entry["questions"]

  #Get report for current image
  image_path = entry["image_names"]
  for image_data in image_data_with_report:
    if image_data[0] == image_path:
      report = image_data[3]
      break

  text_prompt = f"{question}\nMedical Report as reference: {report}"
  vlm_output = model.answer_question(model.encode_image(entry['images']), text_prompt, tokenizer=tokenizer)

  ft_results_with_reports[i] = [entry["questions"], entry["answers"], vlm_output]

  print(f"Question: {text_prompt}")
  print(f"Ground Truth: {entry['answers']}")
  print(f"Moondream: {vlm_output}\n")

Question: Where are the most infiltrates located?
Medical Report as reference: the heart is normal in size. the mediastinum is unremarkable. there is no pleural effusion or pneumothorax. there is mild degenerative changes of the spine.
Ground Truth: Left lung
Moondream: Right side

Question: Which lung has more extensive infiltration?
Medical Report as reference: the heart is normal in size. the mediastinum is unremarkable. there is no pleural effusion or pneumothorax. there is mild degenerative changes of the spine.
Ground Truth: Left
Moondream: Right lung

Question: Are the borders of the heart clearly visualized?
Medical Report as reference: heart size within normal limits stable mediastinal and hilar contours. no focal alveolar consolidation no definite pleural effusion seen. no typical findings of pulmonary edema. no typical findings of pulmonary edema.
Ground Truth: yes
Moondream: yes

Question: Is this image a CT scan?
Medical Report as reference: the lungs are clear. there is n

In [None]:
#Perform analysis of Experiment 4
#Calculate SBERT Similarity Score, METEOR Score, and F1 Score

sbert_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
total_sim_score = 0

binary_answer_ground_truth = []
binary_answer_vlm = []
binary_answers = ["yes", "no"]

total_meteor_score = 0

for entry in ft_results_with_reports.keys():
  ground_truth = ft_results_with_reports[entry][1]
  vlm_prediction = ft_results_with_reports[entry][2]

  #Check if question has binary answer (ie a close ended question) and if so, keep track of ground truth and VLM answers
  if ground_truth.lower() in binary_answers and vlm_prediction.lower() in binary_answers:
    binary_answer_ground_truth.append(ground_truth.lower())
    binary_answer_vlm.append(vlm_prediction.lower())

  #If answer is incorrect, count similarity as zero manually becasue Sentence Bert does not always think yes is orthogonal to no
  if ground_truth.lower() == "yes" and vlm_prediction.lower() == "no" or ground_truth.lower() == "no" and vlm_prediction.lower() == "yes":
    sim_score = 0

  else:
    #SBERT Score
    ground_truth_embedding = sbert_model.encode(ground_truth)
    vlm_prediction_embedding = sbert_model.encode(vlm_prediction)
    sim_score = sbert_model.similarity(ground_truth_embedding, vlm_prediction_embedding).item() #returns Tensor so call .item()

    #METEOR Score
    meteor_score_with = meteor_score([nltk.word_tokenize(ground_truth.lower())], nltk.word_tokenize(vlm_prediction.lower()))
    total_meteor_score += meteor_score_with

  total_sim_score += sim_score

#Display Final Results for Experiment 4
avg_sim_score_with = round(total_sim_score / len(ft_results_without_reports), 2)
print(f"Average Similarity Score With FT with Reports: {avg_sim_score_with}")

avg_meteor_score_with = round(total_meteor_score / len(ft_results_without_reports), 2)
print(f"Average METEOR Score With FT with Reports: {avg_meteor_score_with}")

binary_f1_score = round(f1_score(binary_answer_ground_truth, binary_answer_vlm, average="binary", pos_label="yes"), 2)
print(f"Binary F1 Score: {binary_f1_score}")



Average Similarity Score With FT with Reports: 0.61
Average METEOR Score With FT with Reports: 0.28
Binary F1 Score: 0.75


In [None]:
torch.cuda.empty_cache()

In [1]:
!pip freeze > Stage2B_MVQA_VLM.txt