In [1]:
import transformers 
import datasets
import torch
import logging
import json
import pandas as pd
from torch.utils.data import Subset

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
class MyDataset:
    def __init__(self, data):
        self.data = data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        row = self.data[index]
        return {
            'id': row['id'],
            'question': row['question'],
            'answer': row['answer'],
            'context': row['context'],
            'answer_start': row['answer_start'],
            'answer_end': row['answer_end']
        }

    def get(self, index):
        return self.__getitem__(index)

    def set(self, index, id=None, question=None, answer=None, context=None, answer_start=None, answer_end=None):
        if id is not None:
            self.data[index]['id'] = id
        if question is not None:
            self.data[index]['question'] = question
        if answer is not None:
            self.data[index]['answer'] = answer
        if context is not None:
            self.data[index]['context'] = context
        if answer_start is not None:
            self.data[index]['answer_start'] = answer_start
        if answer_end is not None:
            self.data[index]['answer_end'] = answer_end

In [3]:
dataset=torch.load("../datasets/ms-marco_train_qa.pt")

In [6]:
dataset

DatasetDict({
    train: Dataset({
        features: ['id', 'question', 'answer', 'context', 'answer_start', 'answer_end'],
        num_rows: 301763
    })
    test: Dataset({
        features: ['id', 'question', 'answer', 'context', 'answer_start', 'answer_end'],
        num_rows: 201176
    })
})

In [4]:
dataset=dataset.train_test_split(test_size=0.4)

In [5]:
dataset_train=dataset['train']

In [6]:
dataset_train[0]

{'id': 862709,
 'question': 'what is whatsapp executable',
 'answer': 'Known as BlackBerry Desktop Software and it is developed by Research In Motion.',
 'context': "Start FreeFixer and scan your computer again. If WhatsApp.exe still remains in the scan result, proceed with the next step. If WhatsApp.exe is gone from the scan result you're done. If WhatsApp.exe still remains in the scan result, check its checkbox again in the scan result and click Fix. Whatsapp.exe for pc social advice. Users interested in Whatsapp.exe for pc generally download: Free. Wassapp is a PC application developed to be a non-official client for WhatsApp Messenger. You just have to type your phone number, import your contacts and chat with them instantly using your keyboard. The application has a simple and easy to use interface. What is WhatsApp .exe ? WhatsApp .exe is known as BlackBerry Desktop Software and it is developed by Research In Motion , it is also developed by . We have seen about 2 different insta

In [7]:
dataset=dataset['test'].train_test_split(test_size=0.5)

In [8]:
dataset_valid=dataset['train']

In [9]:
dataset_valid[0]

{'id': 575498,
 'question': 'what are the uses of juniper berries',
 'answer': 'As a spice used in a wide variety of culinary dishes, as a primary flavoring in gin.Also used as the primary flavor in the liquor Jenever and sahti-style of beers.Juniper berry sauce is often a popular flavoring choice.',
 'context': "Comments. Submit. Juniper berries have been in use since the time of the Greeks. They have had many uses from medicinal to culinary for hundreds of years. If you have access to this interesting spice, you can use juniper berries in a number of different ways, from seasoning food to being used as a diuretic.However, fresh juniper berries can be dangerous to ingest for those with kidney problems or those who are pregnant.f you have access to this interesting spice, you can use juniper berries in a number of different ways, from seasoning food to being used as a diuretic. However, fresh juniper berries can be dangerous to ingest for those with kidney problems or those who are pre

In [10]:
dataset_test=dataset['test']

In [12]:
dataset_test[0]

{'id': 770053,
 'question': 'what is meant by the phrase the reflective practitioner',
 'answer': 'The capacity to reflect on action so as to engage in a process of continuous learning. According to one definition it involves paying critical attention to the practical values and theories which inform everyday actions, by examining practice reflectively and reflexively.',
 'context': "Give an overview of how reflection is used in nursing. Explain its relevance to nursing and how and when it is used. Explore the concepts of reflective practice and critical incident analysis. Introduce use of reflection for personal development. This page provides all possible meanings and translations of the word Reflective practice. Reflective practice. Reflective practice is the capacity to reflect on action so as to engage in a process of continuous learning. According to one definition it involves paying critical attention to the practical values and theories which inform everyday actions, by examini

In [4]:
squad=torch.load("../datasets/squad.pt")

In [14]:
squad['train']

Dataset({
    features: ['id', 'title', 'context', 'question', 'answers'],
    num_rows: 87599
})

In [13]:
# calculate max context length for dataset
def calc_max_len(dataset):
  context_length_max=len(dataset[0]['context'])
  for i in range(len(dataset)):
    con_len=len(dataset[i]['context'])
    if(con_len<context_length_max):
      context_length_max=con_len
      print(context_length_max)
      print(dataset[i]['context'])
  return context_length_max


In [14]:
calc_max_len(dataset_train)

2769
Excess mucus in the stool is sometimes accompanied by other symptoms, which may be a sign of a bigger problem. These symptoms include: blood or pus in the stool. abdominal pain, cramping, or bloating. changes in bowel movements or habits. Diarrhea is not always a sign of illness. For some children, the occasional bout may be normal, even if it contains mucus. The key is to know when diarrhea is a symptom of a problem. Factors to evaluate include the frequency and consistency of the stool. A large increase in the amount of mucus in your child's feces may point to illness, however, especially if it is accompanied by loose stool or blood. The combination of watery stool and mucus may indicate a viral infection or medical problem with the intestines. Blood & Mucus: Work-up should be done to determine if you have inflammatory bowel disease(ulcerative colitis or crohn's ) depending upon the duration of the symptoms , there may of course be other explanation. Evaluation may include stool

301

In [16]:
# Preprocessing
# sep_token = '<sep>'
dataset_name = "ms_marco"
model_type="roberta"
model_name= "roberta-base"
models_dir = "saved_models/roberta-base_ms-marco_mod"
checkpoint = 'roberta-base'
max_input_length = 301


# ## Training
learning_rate = 3e-5
num_epochs = 2

In [17]:
tokenizer = transformers.AutoTokenizer.from_pretrained(model_name)

In [27]:
def preprocess_function(examples):
    questions = [q.strip() for q in examples["question"]]
    inputs = tokenizer(
        questions,
        examples["context"],
        max_length=max_input_length ,
        truncation="only_second",
        return_offsets_mapping=True,
        padding="max_length",
    )

    offset_mapping = inputs.pop("offset_mapping")
    # answers = examples["answer"]
    start_positions = []
    end_positions = []

    for i, offset in enumerate(offset_mapping):
        # answer = examples["answer_start"][i]
        
        start_char = examples["answer_start"][i]      
        end_char = examples["answer_end"][i]
        sequence_ids = inputs.sequence_ids(i)

        # Find the start and end of the context
        idx = 0
        while sequence_ids[idx] != 1:
            idx += 1
        context_start = idx
        while sequence_ids[idx] == 1:
            idx += 1
        context_end = idx - 1

        # If the answer is not fully inside the context, label it (0, 0)
        if offset[context_start][0] > end_char or offset[context_end][1] < start_char:
            start_positions.append(0)
            end_positions.append(0)
        else:
            # Otherwise it's the start and end token positions
            idx = context_start
            while idx <= context_end and offset[idx][0] <= start_char:
                idx += 1
            start_positions.append(idx - 1)

            idx = context_end
            while idx >= context_start and offset[idx][1] >= end_char:
                idx -= 1
            end_positions.append(idx + 1)

    inputs["start_positions"] = start_positions
    inputs["end_positions"] = end_positions
    return inputs

In [None]:
# Transform your custom dataset to a PyTorch dataset
# dataset = datasets.Dataset.from_generator(
#     generator=lambda: iter(dataset)
# )

In [12]:
# dataset_val= datasets.Dataset.from_generator(
#     generator=lambda: iter(dataset_valid))

Downloading and preparing dataset generator/default to C:/Users/dama_/.cache/huggingface/datasets/generator/default-1f482045682b315d/0.0.0...


                                                                    

Dataset generator downloaded and prepared to C:/Users/dama_/.cache/huggingface/datasets/generator/default-1f482045682b315d/0.0.0. Subsequent calls will reuse this data.


In [16]:
# torch.save(dataset,"../datasets/ms-marco_train_qa.pt")

In [17]:
# torch.save(dataset_valid,"../datasets/ms-marco_valid_qa.pt")# 

In [29]:
dataset_train

Dataset({
    features: ['id', 'question', 'answer', 'context', 'answer_start', 'answer_end'],
    num_rows: 301763
})

In [38]:
dataset_train[0]

{'id': 584861,
 'question': 'what causes a rotator cuff tear',
 'answer': 'Bleeding and inflammation.',
 'context': "Print. The rotator cuff is a group of muscles and tendons that surround the shoulder joint, keeping the head of your upper arm bone firmly within the shallow socket of the shoulder. A rotator cuff injury can cause a dull ache in the shoulder, which often worsens when you try to sleep on the involved side.he rotator cuff is a group of muscles and tendons that surround the shoulder joint, keeping the head of your upper arm bone firmly within the shallow socket of the shoulder. The rotator cuff is a group of muscles and tendons that surround the shoulder joint, keeping the head of your upper arm bone firmly within the shallow socket of the shoulder.A rotator cuff injury can cause a dull ache in the shoulder, which often worsens when you try to sleep on the involved side.he rotator cuff is a group of muscles and tendons that surround the shoulder joint, keeping the head of y

In [15]:
dataset_train

Dataset({
    features: ['id', 'question', 'answer', 'context', 'answer_start', 'answer_end'],
    num_rows: 301763
})

In [29]:
tokenized_msmarco_train = dataset_train.map(preprocess_function, batched=True, remove_columns=dataset_train.column_names)

                                                                     

In [28]:
tokenized_msmarco_val=dataset_valid.map(preprocess_function, batched=True, remove_columns=dataset_valid.column_names)

                                                                     

In [30]:
data_collator = transformers.DefaultDataCollator()

In [31]:
model = transformers.AutoModelForQuestionAnswering.from_pretrained(model_name)

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForQuestionAnswering: ['lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaForQuestionAnswering from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForQuestionAnswering from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForQuestionAnswering were not initialized from the model checkpoint at roberta-base and are newly initialized: ['qa_outputs.bias', 'qa_outputs.weight']
You should probably TRAIN this model on a down-stream task to be able to use 

In [32]:
training_args = transformers.TrainingArguments(
    output_dir=models_dir,
    evaluation_strategy="epoch",
    learning_rate=learning_rate,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=num_epochs,
    weight_decay=0.01,
    push_to_hub=True,
)

trainer = transformers.Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_msmarco_train,
    eval_dataset=tokenized_msmarco_val,
    tokenizer=tokenizer,
    data_collator=data_collator,
)

Cloning https://huggingface.co/damapika/roberta-base_ms-marco_mod into local empty directory.
Download file pytorch_model.bin:   0%|          | 316k/473M [00:01<26:14, 315kB/s]
[A

[A[A


[A[A[A



[A[A[A[A




[A[A[A[A[A





[A[A[A[A[A[A






[A[A[A[A[A[A[A







[A[A[A[A[A[A[A[A








[A[A[A[A[A[A[A[A[A









[A[A[A[A[A[A[A[A[A[A










[A[A[A[A[A[A[A[A[A[A[A











[A[A[A[A[A[A[A[A[A[A[A[A












[A[A[A[A[A[A[A[A[A[A[A[A[A













[A[A[A[A[A[A[A[A[A[A[A[A[A[A














[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A















[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A
















[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A

















Download file pytorch_model.bin: 100%|██████████| 473M/473M [00:41<00:00, 2.70MB/s] 


















Download file pytorch_model.bin: 100%|██████████| 473M/473M [00:41<00:00, 12.1MB/s]
Downloa

In [33]:
trainer.train()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mdamapika[0m. Use [1m`wandb login --relogin`[0m to force relogin


  1%|▏         | 500/37722 [16:12<28:34:14,  2.76s/it]

{'loss': 4.4283, 'learning_rate': 2.9602354063941467e-05, 'epoch': 0.03}


  3%|▎         | 1000/37722 [38:54<28:12:42,  2.77s/it]

{'loss': 4.1017, 'learning_rate': 2.9204708127882934e-05, 'epoch': 0.05}


  4%|▍         | 1500/37722 [1:01:06<23:13:34,  2.31s/it]

{'loss': 3.951, 'learning_rate': 2.88070621918244e-05, 'epoch': 0.08}


  5%|▌         | 2000/37722 [1:13:05<6:58:36,  1.42it/s]  

{'loss': 3.8818, 'learning_rate': 2.8409416255765864e-05, 'epoch': 0.11}


  7%|▋         | 2500/37722 [1:19:23<6:50:50,  1.43it/s] 

{'loss': 3.8779, 'learning_rate': 2.8011770319707334e-05, 'epoch': 0.13}


  8%|▊         | 3000/37722 [1:25:46<6:44:16,  1.43it/s] 

{'loss': 3.8209, 'learning_rate': 2.76141243836488e-05, 'epoch': 0.16}


  9%|▉         | 3500/37722 [1:32:03<6:43:28,  1.41it/s] 

{'loss': 3.8076, 'learning_rate': 2.7216478447590267e-05, 'epoch': 0.19}


 11%|█         | 4000/37722 [1:38:29<6:54:13,  1.36it/s] 

{'loss': 3.7648, 'learning_rate': 2.6818832511531734e-05, 'epoch': 0.21}


 12%|█▏        | 4500/37722 [1:45:07<6:25:22,  1.44it/s] 

{'loss': 3.7455, 'learning_rate': 2.64211865754732e-05, 'epoch': 0.24}


 13%|█▎        | 5000/37722 [1:51:38<6:58:24,  1.30it/s] 

{'loss': 3.7474, 'learning_rate': 2.6023540639414667e-05, 'epoch': 0.27}


 15%|█▍        | 5500/37722 [1:58:25<6:42:13,  1.34it/s] 

{'loss': 3.7625, 'learning_rate': 2.562589470335613e-05, 'epoch': 0.29}


 16%|█▌        | 6000/37722 [2:05:22<6:36:00,  1.34it/s] 

{'loss': 3.7728, 'learning_rate': 2.5228248767297597e-05, 'epoch': 0.32}


 17%|█▋        | 6500/37722 [2:11:52<6:14:01,  1.39it/s] 

{'loss': 3.7332, 'learning_rate': 2.4830602831239064e-05, 'epoch': 0.34}


 19%|█▊        | 7000/37722 [2:18:29<6:59:09,  1.22it/s] 

{'loss': 3.708, 'learning_rate': 2.443295689518053e-05, 'epoch': 0.37}


 20%|█▉        | 7500/37722 [2:24:59<6:01:17,  1.39it/s] 

{'loss': 3.6817, 'learning_rate': 2.4035310959122e-05, 'epoch': 0.4}


 21%|██        | 8000/37722 [2:31:27<5:56:03,  1.39it/s] 

{'loss': 3.7119, 'learning_rate': 2.3637665023063467e-05, 'epoch': 0.42}


 23%|██▎       | 8500/37722 [2:38:01<6:06:16,  1.33it/s] 

{'loss': 3.7066, 'learning_rate': 2.3240019087004934e-05, 'epoch': 0.45}


 24%|██▍       | 9000/37722 [2:44:45<5:45:43,  1.38it/s] 

{'loss': 3.6937, 'learning_rate': 2.2842373150946397e-05, 'epoch': 0.48}


 25%|██▌       | 9500/37722 [2:51:36<6:55:42,  1.13it/s] 

{'loss': 3.6698, 'learning_rate': 2.2444727214887864e-05, 'epoch': 0.5}


 27%|██▋       | 10000/37722 [2:58:25<5:30:35,  1.40it/s]

{'loss': 3.6692, 'learning_rate': 2.204708127882933e-05, 'epoch': 0.53}


 28%|██▊       | 10500/37722 [3:05:06<5:29:03,  1.38it/s] 

{'loss': 3.6411, 'learning_rate': 2.1649435342770797e-05, 'epoch': 0.56}


 29%|██▉       | 11000/37722 [3:11:42<6:00:28,  1.24it/s] 

{'loss': 3.6393, 'learning_rate': 2.1251789406712264e-05, 'epoch': 0.58}


 30%|███       | 11500/37722 [3:18:21<5:32:57,  1.31it/s] 

{'loss': 3.6269, 'learning_rate': 2.085414347065373e-05, 'epoch': 0.61}


 32%|███▏      | 12000/37722 [3:24:54<5:10:18,  1.38it/s] 

{'loss': 3.6599, 'learning_rate': 2.0456497534595197e-05, 'epoch': 0.64}


 33%|███▎      | 12500/37722 [3:31:23<5:04:14,  1.38it/s] 

{'loss': 3.6506, 'learning_rate': 2.0058851598536663e-05, 'epoch': 0.66}


 34%|███▍      | 13000/37722 [3:37:53<4:56:55,  1.39it/s] 

{'loss': 3.6232, 'learning_rate': 1.966120566247813e-05, 'epoch': 0.69}


 36%|███▌      | 13500/37722 [3:44:20<4:52:24,  1.38it/s] 

{'loss': 3.6405, 'learning_rate': 1.9263559726419597e-05, 'epoch': 0.72}


 37%|███▋      | 14000/37722 [3:50:42<4:26:10,  1.49it/s] 

{'loss': 3.6246, 'learning_rate': 1.8865913790361063e-05, 'epoch': 0.74}


 38%|███▊      | 14500/37722 [3:56:43<4:21:05,  1.48it/s] 

{'loss': 3.6585, 'learning_rate': 1.846826785430253e-05, 'epoch': 0.77}


 40%|███▉      | 15000/37722 [4:02:45<4:14:50,  1.49it/s] 

{'loss': 3.6049, 'learning_rate': 1.8070621918243997e-05, 'epoch': 0.8}


 41%|████      | 15500/37722 [4:08:47<4:09:24,  1.49it/s] 

{'loss': 3.6627, 'learning_rate': 1.7672975982185463e-05, 'epoch': 0.82}


 42%|████▏     | 16000/37722 [4:14:49<4:03:14,  1.49it/s] 

{'loss': 3.6189, 'learning_rate': 1.7275330046126927e-05, 'epoch': 0.85}


 44%|████▎     | 16500/37722 [4:20:50<3:57:27,  1.49it/s] 

{'loss': 3.6079, 'learning_rate': 1.6877684110068393e-05, 'epoch': 0.87}


 45%|████▌     | 17000/37722 [4:26:52<3:52:29,  1.49it/s] 

{'loss': 3.6234, 'learning_rate': 1.648003817400986e-05, 'epoch': 0.9}


 46%|████▋     | 17500/37722 [4:32:53<3:47:36,  1.48it/s] 

{'loss': 3.6029, 'learning_rate': 1.608239223795133e-05, 'epoch': 0.93}


 48%|████▊     | 18000/37722 [4:38:53<3:40:42,  1.49it/s] 

{'loss': 3.6369, 'learning_rate': 1.5684746301892797e-05, 'epoch': 0.95}


 49%|████▉     | 18500/37722 [4:44:55<3:35:38,  1.49it/s] 

{'loss': 3.5498, 'learning_rate': 1.5287100365834263e-05, 'epoch': 0.98}


                                                          
 50%|█████     | 18861/37722 [5:10:57<2:51:49,  1.83it/s]

{'eval_loss': 3.5602710247039795, 'eval_runtime': 1293.7999, 'eval_samples_per_second': 77.746, 'eval_steps_per_second': 4.859, 'epoch': 1.0}


 50%|█████     | 19000/37722 [5:12:30<3:30:00,  1.49it/s]    

{'loss': 3.5663, 'learning_rate': 1.4889454429775728e-05, 'epoch': 1.01}


 52%|█████▏    | 19500/37722 [5:18:32<3:23:59,  1.49it/s] 

{'loss': 3.4926, 'learning_rate': 1.4491808493717195e-05, 'epoch': 1.03}


 53%|█████▎    | 20000/37722 [5:24:34<3:18:12,  1.49it/s] 

{'loss': 3.4989, 'learning_rate': 1.4094162557658661e-05, 'epoch': 1.06}


 54%|█████▍    | 20500/37722 [5:30:35<3:12:51,  1.49it/s] 

{'loss': 3.4977, 'learning_rate': 1.3696516621600126e-05, 'epoch': 1.09}


 56%|█████▌    | 21000/37722 [5:36:37<3:08:23,  1.48it/s] 

{'loss': 3.499, 'learning_rate': 1.3298870685541595e-05, 'epoch': 1.11}


 57%|█████▋    | 21500/37722 [5:42:39<3:00:49,  1.50it/s] 

{'loss': 3.5178, 'learning_rate': 1.2901224749483061e-05, 'epoch': 1.14}


 58%|█████▊    | 22000/37722 [5:48:40<2:55:55,  1.49it/s] 

{'loss': 3.4858, 'learning_rate': 1.2503578813424528e-05, 'epoch': 1.17}


 60%|█████▉    | 22500/37722 [5:54:43<2:50:56,  1.48it/s] 

{'loss': 3.4604, 'learning_rate': 1.2105932877365993e-05, 'epoch': 1.19}


 61%|██████    | 23000/37722 [6:00:45<2:44:33,  1.49it/s] 

{'loss': 3.4937, 'learning_rate': 1.170828694130746e-05, 'epoch': 1.22}


 62%|██████▏   | 23500/37722 [6:06:47<2:39:20,  1.49it/s] 

{'loss': 3.5157, 'learning_rate': 1.1310641005248928e-05, 'epoch': 1.25}


 64%|██████▎   | 24000/37722 [6:12:49<2:33:56,  1.49it/s] 

{'loss': 3.4805, 'learning_rate': 1.0912995069190393e-05, 'epoch': 1.27}


 65%|██████▍   | 24500/37722 [6:18:50<2:28:13,  1.49it/s] 

{'loss': 3.4846, 'learning_rate': 1.051534913313186e-05, 'epoch': 1.3}


 66%|██████▋   | 25000/37722 [6:24:51<2:22:32,  1.49it/s] 

{'loss': 3.4874, 'learning_rate': 1.0117703197073326e-05, 'epoch': 1.33}


 68%|██████▊   | 25500/37722 [6:30:53<2:17:01,  1.49it/s] 

{'loss': 3.4966, 'learning_rate': 9.720057261014793e-06, 'epoch': 1.35}


 69%|██████▉   | 26000/37722 [6:36:55<2:11:23,  1.49it/s] 

{'loss': 3.5014, 'learning_rate': 9.32241132495626e-06, 'epoch': 1.38}


 70%|███████   | 26500/37722 [6:42:56<2:06:08,  1.48it/s] 

{'loss': 3.4864, 'learning_rate': 8.924765388897726e-06, 'epoch': 1.41}


 72%|███████▏  | 27000/37722 [6:48:57<2:00:50,  1.48it/s] 

{'loss': 3.4545, 'learning_rate': 8.527119452839193e-06, 'epoch': 1.43}


 73%|███████▎  | 27500/37722 [6:54:59<1:54:37,  1.49it/s] 

{'loss': 3.4538, 'learning_rate': 8.129473516780658e-06, 'epoch': 1.46}


 74%|███████▍  | 28000/37722 [7:01:01<1:49:10,  1.48it/s] 

{'loss': 3.4769, 'learning_rate': 7.731827580722124e-06, 'epoch': 1.48}


 76%|███████▌  | 28500/37722 [7:07:13<1:51:22,  1.38it/s] 

{'loss': 3.4826, 'learning_rate': 7.334181644663592e-06, 'epoch': 1.51}


 77%|███████▋  | 29000/37722 [7:13:41<1:45:29,  1.38it/s] 

{'loss': 3.5209, 'learning_rate': 6.9365357086050584e-06, 'epoch': 1.54}


 78%|███████▊  | 29500/37722 [7:20:10<1:39:53,  1.37it/s] 

{'loss': 3.4611, 'learning_rate': 6.538889772546524e-06, 'epoch': 1.56}


 80%|███████▉  | 30000/37722 [7:26:39<1:34:09,  1.37it/s] 

{'loss': 3.4329, 'learning_rate': 6.141243836487992e-06, 'epoch': 1.59}


 81%|████████  | 30500/37722 [7:33:07<1:27:47,  1.37it/s] 

{'loss': 3.4485, 'learning_rate': 5.7435979004294575e-06, 'epoch': 1.62}


 82%|████████▏ | 31000/37722 [7:39:19<1:15:15,  1.49it/s] 

{'loss': 3.4562, 'learning_rate': 5.345951964370924e-06, 'epoch': 1.64}


 84%|████████▎ | 31500/37722 [7:45:22<1:09:55,  1.48it/s] 

{'loss': 3.4574, 'learning_rate': 4.948306028312391e-06, 'epoch': 1.67}


 85%|████████▍ | 32000/37722 [7:51:24<1:04:17,  1.48it/s] 

{'loss': 3.4661, 'learning_rate': 4.5506600922538574e-06, 'epoch': 1.7}


Several commits (2) will be pushed upstream.
 86%|████████▌ | 32500/37722 [7:57:26<58:31,  1.49it/s]   

{'loss': 3.4561, 'learning_rate': 4.153014156195324e-06, 'epoch': 1.72}


 87%|████████▋ | 33000/37722 [8:03:29<53:10,  1.48it/s]   

{'loss': 3.4909, 'learning_rate': 3.7553682201367903e-06, 'epoch': 1.75}


Several commits (2) will be pushed upstream.
 89%|████████▉ | 33500/37722 [8:09:29<47:34,  1.48it/s]   

{'loss': 3.4548, 'learning_rate': 3.357722284078257e-06, 'epoch': 1.78}


 90%|█████████ | 34000/37722 [8:15:31<41:55,  1.48it/s]   

{'loss': 3.4396, 'learning_rate': 2.9600763480197236e-06, 'epoch': 1.8}


 91%|█████████▏| 34500/37722 [8:21:35<36:08,  1.49it/s]  

{'loss': 3.4879, 'learning_rate': 2.56243041196119e-06, 'epoch': 1.83}


 93%|█████████▎| 35000/37722 [8:27:36<30:31,  1.49it/s]  

{'loss': 3.4466, 'learning_rate': 2.164784475902656e-06, 'epoch': 1.86}


 94%|█████████▍| 35500/37722 [8:33:36<24:54,  1.49it/s]  

{'loss': 3.4689, 'learning_rate': 1.7671385398441229e-06, 'epoch': 1.88}


Several commits (2) will be pushed upstream.
 95%|█████████▌| 36000/37722 [8:39:39<19:18,  1.49it/s]  

{'loss': 3.4744, 'learning_rate': 1.3694926037855893e-06, 'epoch': 1.91}


 97%|█████████▋| 36500/37722 [8:45:43<13:41,  1.49it/s]  

{'loss': 3.4421, 'learning_rate': 9.718466677270557e-07, 'epoch': 1.94}


 98%|█████████▊| 37000/37722 [8:51:43<08:07,  1.48it/s]  

{'loss': 3.4717, 'learning_rate': 5.742007316685224e-07, 'epoch': 1.96}


 99%|█████████▉| 37500/37722 [8:57:44<02:28,  1.49it/s]  

{'loss': 3.4253, 'learning_rate': 1.7655479560998888e-07, 'epoch': 1.99}


                                                       
100%|██████████| 37722/37722 [9:22:17<00:00,  1.12it/s]

{'eval_loss': 3.535928964614868, 'eval_runtime': 1296.9223, 'eval_samples_per_second': 77.559, 'eval_steps_per_second': 4.848, 'epoch': 2.0}
{'train_runtime': 33743.3362, 'train_samples_per_second': 17.886, 'train_steps_per_second': 1.118, 'train_loss': 3.5997305618706013, 'epoch': 2.0}





TrainOutput(global_step=37722, training_loss=3.5997305618706013, metrics={'train_runtime': 33743.3362, 'train_samples_per_second': 17.886, 'train_steps_per_second': 1.118, 'train_loss': 3.5997305618706013, 'epoch': 2.0})

In [34]:
trainer.push_to_hub()


Upload file pytorch_model.bin: 488MB [02:38, 5.71MB/s]                            To https://huggingface.co/damapika/roberta-base_ms-marco_mod
   0adf892..e1722fe  main -> main

Upload file pytorch_model.bin: 100%|██████████| 473M/473M [02:39<00:00, 3.12MB/s]
Upload file runs/May13_10-26-56_Damapika/events.out.tfevents.1683966499.Damapika.32352.0: 100%|██████████| 16.5k/16.5k [02:39<00:00, 106B/s]  
To https://huggingface.co/damapika/roberta-base_ms-marco_mod
   e1722fe..76c582c  main -> main



'https://huggingface.co/damapika/roberta-base_ms-marco_mod/commit/e1722fefff5c4629cf75eea71e43b76643cc38d6'

In [2]:
question = "Who is Fyodor Dostoevsky?"
context = "In the world of literature, there have been many authors who have gained a reputation for their ability to create complex characters. One such author is Fyodor Dostoevsky, a Russian novelist who wrote several influential works in the 19th century."

In [3]:
question_answerer = transformers.pipeline("question-answering", model="damapika/roberta-base_ms-marco_mod")
question_answerer(question=question, context=context)

{'score': 0.026595955714583397, 'start': 246, 'end': 247, 'answer': '.'}