# What to do when you get an error

Install the Transformers and Datasets libraries to run this notebook.

In [None]:
# Install dependencies
!pip install datasets transformers[sentencepiece]
!apt install git-lfs

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting datasets
  Downloading datasets-2.2.2-py3-none-any.whl (346 kB)
[K     |████████████████████████████████| 346 kB 5.0 MB/s 
[?25hCollecting transformers[sentencepiece]
  Downloading transformers-4.19.2-py3-none-any.whl (4.2 MB)
[K     |████████████████████████████████| 4.2 MB 48.0 MB/s 
Collecting dill<0.3.5
  Downloading dill-0.3.4-py2.py3-none-any.whl (86 kB)
[K     |████████████████████████████████| 86 kB 1.4 MB/s 
[?25hCollecting xxhash
  Downloading xxhash-3.0.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (212 kB)
[K     |████████████████████████████████| 212 kB 32.9 MB/s 
[?25hCollecting huggingface-hub<1.0.0,>=0.1.0
  Downloading huggingface_hub-0.7.0-py3-none-any.whl (86 kB)
[K     |████████████████████████████████| 86 kB 1.3 MB/s 
[?25hCollecting responses<0.19
  Downloading responses-0.18.0-py3-none-any.whl (38 kB)
Collecting aiohttp
  Downloadin

You will need to setup git, adapt your email and name in the following cell.

In [None]:
# Config
!git config --global user.email "miesner.jacob@gmail.com"
!git config --global user.name "MiesnerJacob"

You will also need to be logged in to the Hugging Face Hub. Execute the following and enter your credentials.

In [None]:
from huggingface_hub import notebook_login

notebook_login()

Login successful
Your token has been saved to /root/.huggingface/token
[1m[31mAuthenticated through git-credential store but this isn't the helper defined on your machine.
You might have to re-authenticate when pushing to the Hugging Face Hub. Run the following command in your terminal in case you want to set this credential helper as the default

git config --global credential.helper store[0m


In [None]:
# Copy and clone model repo for this lesson
from distutils.dir_util import copy_tree
from huggingface_hub import Repository, snapshot_download, create_repo, get_full_repo_name


def copy_repository_template():
    # Clone the repo and extract the local path
    template_repo_id = "lewtun/distilbert-base-uncased-finetuned-squad-d5716d28"
    commit_hash = "be3eaffc28669d7932492681cd5f3e8905e358b4"
    template_repo_dir = snapshot_download(template_repo_id, revision=commit_hash)
    # Create an empty repo on the Hub
    model_name = template_repo_id.split("/")[1]
    create_repo(model_name, exist_ok=True)
    # Clone the empty repo
    new_repo_id = get_full_repo_name(model_name)
    new_repo_dir = model_name
    repo = Repository(local_dir=new_repo_dir, clone_from=new_repo_id)
    # Copy files
    copy_tree(template_repo_dir, new_repo_dir)
    # Push to Hub
    repo.push_to_hub()

copy_repository_template()

Downloading:   0%|          | 0.00/345 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.58k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/265M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/112 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/258 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/2.10k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/232k [00:00<?, ?B/s]

Cloning https://huggingface.co/miesnerjacob/distilbert-base-uncased-finetuned-squad-d5716d28 into local empty directory.


Upload file pytorch_model.bin:   0%|          | 3.34k/253M [00:00<?, ?B/s]

Upload file training_args.bin: 100%|##########| 2.05k/2.05k [00:00<?, ?B/s]

remote: Enforcing permissions...        
remote: Allowed refs: all        
To https://huggingface.co/miesnerjacob/distilbert-base-uncased-finetuned-squad-d5716d28
   8ff5d6b..2c06081  main -> main



In [None]:
# Try to load model
from transformers import pipeline

model_checkpoint = get_full_repo_name("distillbert-base-uncased-finetuned-squad-d5716d28")
reader = pipeline("question-answering", model=model_checkpoint)

OSError: ignored

In [None]:
# Try to load model again
model_checkpoint = get_full_repo_name("distilbert-base-uncased-finetuned-squad-d5716d28")
reader = pipeline("question-answering", model=model_checkpoint)

OSError: ignored

In [None]:
# List files in repo
from huggingface_hub import list_repo_files

list_repo_files(repo_id='miesnerjacob/distilbert-base-uncased-finetuned-squad-d5716d28')

['.gitattributes',
 'README.md',
 'pytorch_model.bin',
 'special_tokens_map.json',
 'tokenizer_config.json',
 'training_args.bin',
 'vocab.txt']

In [None]:
# Try to load model one more time
from transformers import AutoConfig

pretrained_checkpoint = "distilbert-base-uncased"
config = AutoConfig.from_pretrained(pretrained_checkpoint)

Downloading:   0%|          | 0.00/483 [00:00<?, ?B/s]

In [None]:
# Push config file to hub
config.push_to_hub('miesnerjacob/distilbert-base-uncased-finetuned-squad-d5716d28', commit_message="Add config.json")

Cloning https://huggingface.co/miesnerjacob/distilbert-base-uncased-finetuned-squad-d5716d28 into local empty directory.


Download file pytorch_model.bin:   0%|          | 16.0k/253M [00:00<?, ?B/s]

Download file training_args.bin: 100%|##########| 2.05k/2.05k [00:00<?, ?B/s]

Clean file training_args.bin:  49%|####8     | 1.00k/2.05k [00:00<?, ?B/s]

Clean file pytorch_model.bin:   0%|          | 1.00k/253M [00:00<?, ?B/s]

remote: Enforcing permissions...        
remote: Allowed refs: all        
To https://huggingface.co/miesnerjacob/distilbert-base-uncased-finetuned-squad-d5716d28
   2c06081..a1935f2  main -> main



'https://huggingface.co/miesnerjacob/distilbert-base-uncased-finetuned-squad-d5716d28/commit/a1935f2b809bfdb3f20190d1436036b9d25ab7c4'

In [None]:
# Try running QA through pipeline again
reader = pipeline("question-answering", model='miesnerjacob/distilbert-base-uncased-finetuned-squad-d5716d28', revision="main")

context = r"""
Extractive Question Answering is the task of extracting an answer from a text
given a question. An example of a question answering dataset is the SQuAD
dataset, which is entirely based on that task. If you would like to fine-tune a
model on a SQuAD task, you may leverage the
examples/pytorch/question-answering/run_squad.py script.

🤗 Transformers is interoperable with the PyTorch, TensorFlow, and JAX
frameworks, so you can use your favourite tools for a wide variety of tasks!
"""

question = "What is extractive question answering?"
reader(question=question, context=context)

{'answer': 'the task of extracting an answer from a text\ngiven a question',
 'end': 95,
 'score': 0.37610766291618347,
 'start': 34}

In [None]:
# Access tokenizer and model from pipeline
tokenizer = reader.tokenizer
model = reader.model

In [None]:
# Define question
question = "Which frameworks can I use?"

In [None]:
# Print out answer using logits and converting to pred tokens
# Will produce error unless you switch to teturn pytorch tensors from tokenizer
import torch

inputs = tokenizer(question, context, add_special_tokens=True, return_tensors='pt')
input_ids = inputs["input_ids"][0]
outputs = model(**inputs)
answer_start_scores = outputs.start_logits
answer_end_scores = outputs.end_logits
# Get the most likely beginning of answer with the argmax of the score
answer_start = torch.argmax(answer_start_scores)
# Get the most likely end of answer with the argmax of the score
answer_end = torch.argmax(answer_end_scores) + 1
answer = tokenizer.convert_tokens_to_string(
    tokenizer.convert_ids_to_tokens(input_ids[answer_start:answer_end])
)
print(f"Question: {question}")
print(f"Answer: {answer}")

Question: Which frameworks can I use?
Answer: pytorch, tensorflow, and jax


In [None]:
# Check out first few input_ids
inputs["input_ids"][:5]

tensor([[  101,  2029,  7705,  2015,  2064,  1045,  2224,  1029,   102, 14817,
          3512,  3160, 10739,  2003,  1996,  4708,  1997, 14817,  2075,  2019,
          3437,  2013,  1037,  3793,  2445,  1037,  3160,  1012,  2019,  2742,
          1997,  1037,  3160, 10739,  2951, 13462,  2003,  1996,  4686,  2951,
         13462,  1010,  2029,  2003,  4498,  2241,  2006,  2008,  4708,  1012,
          2065,  2017,  2052,  2066,  2000,  2986,  1011,  8694,  1037,  2944,
          2006,  1037,  4686,  4708,  1010,  2017,  2089, 21155,  1996,  4973,
          1013,  1052, 22123,  2953,  2818,  1013,  3160,  1011, 10739,  1013,
          2448,  1035,  4686,  1012,  1052,  2100,  5896,  1012,   100, 19081,
          2003,  6970, 25918,  3085,  2007,  1996,  1052, 22123,  2953,  2818,
          1010, 23435, 12314,  1010,  1998, 13118,  7705,  2015,  1010,  2061,
          2017,  2064,  2224,  2115,  8837,  5906,  2005,  1037,  2898,  3528,
          1997,  8518,   999,   102]])

In [None]:
# Print out input ids type
type(inputs["input_ids"])

torch.Tensor