<a href="https://colab.research.google.com/github/TheHackerLlama/charlas/blob/main/debugging_transformers.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
%%capture
!pip install transformers

In [None]:
!apt-get install git-lfs

In [None]:
from huggingface_hub import notebook_login

notebook_login()

In [None]:
from distutils.dir_util import copy_tree
from huggingface_hub import Repository, snapshot_download, create_repo, get_full_repo_name

def copy_repository_template():
    # Clone the repo and extract the local path
    template_repo_id = "lewtun/distilbert-base-uncased-finetuned-squad-d5716d28"
    commit_hash = "be3eaffc28669d7932492681cd5f3e8905e358b4"
    template_repo_dir = snapshot_download(template_repo_id, revision=commit_hash)
    # Create an empty repo on the Hub
    model_name = template_repo_id.split("/")[1]
    create_repo(model_name, exist_ok=True)
    # Clone the empty repo
    new_repo_id = get_full_repo_name(model_name)
    new_repo_dir = model_name
    repo = Repository(local_dir=new_repo_dir, clone_from=new_repo_id)
    # Copy files
    copy_tree(template_repo_dir, new_repo_dir)
    # Push to Hub
    repo.push_to_hub()

In [None]:
copy_repository_template()

## Debugging the pipeline

In [None]:
from transformers import pipeline

pipe = pipeline("question-answering")

In [None]:
pipe(
    question="Where do I work?",
    context="My name is Omar and I work at Hugging Face in a mountain"
)

In [None]:
model_checkpoint = get_full_repo_name("distillbert-base-uncased-finetuned-squad-d5716d28")
reader = pipeline("question-answering", model=model_checkpoint)

In [None]:
model_checkpoint = get_full_repo_name("distilbert-base-uncased-finetuned-squad-d5716d28")
reader = pipeline("question-answering", model=model_checkpoint)

In [None]:
from transformers import AutoConfig

pretrained_checkpoint = "distilbert-base-uncased"
config = AutoConfig.from_pretrained(pretrained_checkpoint)

In [None]:
config.push_to_hub(model_checkpoint, commit_message="Add config.json")

In [None]:
reader = pipeline("question-answering", model=model_checkpoint, revision="main")

context = r"""
Extractive Question Answering is the task of extracting an answer from a text
given a question. An example of a question answering dataset is the SQuAD
dataset, which is entirely based on that task. If you would like to fine-tune a
model on a SQuAD task, you may leverage the
examples/pytorch/question-answering/run_squad.py script.

🤗 Transformers is interoperable with the PyTorch, TensorFlow, and JAX
frameworks, so you can use your favourite tools for a wide variety of tasks!
"""

question = "What is extractive question answering?"
reader(question=question, context=context)

# Debugging de forward pass

In [None]:
tokenizer = reader.tokenizer
model = reader.model

In [None]:
question = "Which frameworks can I use?"

In [None]:
import torch

In [None]:
# Tokenizamos las entradas
inputs = tokenizer(question, context, add_special_tokens=True)

# Hacemos inferencia
outputs = model(**inputs)

answer_start_scores = outputs.start_logits
answer_end_scores = outputs.end_logits

# Get the most likely beginning of answer with the argmax of the score
answer_start = torch.argmax(answer_start_scores)

# Get the most likely end of answer with the argmax of the score
answer_end = torch.argmax(answer_end_scores) + 1
input_ids = inputs["input_ids"][0]
answer = tokenizer.convert_tokens_to_string(
    tokenizer.convert_ids_to_tokens(input_ids[answer_start:answer_end])
)
print(f"Question: {question}")
print(f"Answer: {answer}")

In [None]:
inputs["input_ids"][:5]

In [None]:
type(inputs["input_ids"])

In [None]:
# Tokenizamos las entradas
inputs = tokenizer(question, context, add_special_tokens=True, return_tensors="pt")

# Hacemos inferencia
outputs = model(**inputs)

answer_start_scores = outputs.start_logits
answer_end_scores = outputs.end_logits

# Get the most likely beginning of answer with the argmax of the score
answer_start = torch.argmax(answer_start_scores)

# Get the most likely end of answer with the argmax of the score
answer_end = torch.argmax(answer_end_scores) + 1
input_ids = inputs["input_ids"][0]
answer = tokenizer.convert_tokens_to_string(
    tokenizer.convert_ids_to_tokens(input_ids[answer_start:answer_end])
)
print(f"Question: {question}")
print(f"Answer: {answer}")