In [None]:
# Transformers installation
! pip install transformers datasets
# To install from source instead of the last release, comment the command above and uncomment the following one.
# ! pip install git+https://github.com/huggingface/transformers.git

# Question answering

In [2]:
#@title
from IPython.display import HTML

HTML('<iframe width="560" height="315" src="https://www.youtube.com/embed/ajPx5LwJD-I?rel=0&amp;controls=0&amp;showinfo=0" frameborder="0" allowfullscreen></iframe>')



Question answering tasks return an answer given a question. If you've ever asked a virtual assistant like Alexa, Siri or Google what the weather is, then you've used a question answering model before. There are two common types of question answering tasks:

- Extractive: extract the answer from the given context.
- Abstractive: generate an answer from the context that correctly answers the question.

This guide will show you how to:

1. Finetune [DistilBERT](https://huggingface.co/distilbert-base-uncased) on the [SQuAD](https://huggingface.co/datasets/squad) dataset for extractive question answering.
2. Use your finetuned model for inference.

<Tip>
The task illustrated in this tutorial is supported by the following model architectures:

<!--This tip is automatically generated by `make fix-copies`, do not fill manually!-->

[ALBERT](https://huggingface.co/docs/transformers/main/en/tasks/../model_doc/albert), [BART](https://huggingface.co/docs/transformers/main/en/tasks/../model_doc/bart), [BERT](https://huggingface.co/docs/transformers/main/en/tasks/../model_doc/bert), [BigBird](https://huggingface.co/docs/transformers/main/en/tasks/../model_doc/big_bird), [BigBird-Pegasus](https://huggingface.co/docs/transformers/main/en/tasks/../model_doc/bigbird_pegasus), [BLOOM](https://huggingface.co/docs/transformers/main/en/tasks/../model_doc/bloom), [CamemBERT](https://huggingface.co/docs/transformers/main/en/tasks/../model_doc/camembert), [CANINE](https://huggingface.co/docs/transformers/main/en/tasks/../model_doc/canine), [ConvBERT](https://huggingface.co/docs/transformers/main/en/tasks/../model_doc/convbert), [Data2VecText](https://huggingface.co/docs/transformers/main/en/tasks/../model_doc/data2vec-text), [DeBERTa](https://huggingface.co/docs/transformers/main/en/tasks/../model_doc/deberta), [DeBERTa-v2](https://huggingface.co/docs/transformers/main/en/tasks/../model_doc/deberta-v2), [DistilBERT](https://huggingface.co/docs/transformers/main/en/tasks/../model_doc/distilbert), [ELECTRA](https://huggingface.co/docs/transformers/main/en/tasks/../model_doc/electra), [ERNIE](https://huggingface.co/docs/transformers/main/en/tasks/../model_doc/ernie), [ErnieM](https://huggingface.co/docs/transformers/main/en/tasks/../model_doc/ernie_m), [FlauBERT](https://huggingface.co/docs/transformers/main/en/tasks/../model_doc/flaubert), [FNet](https://huggingface.co/docs/transformers/main/en/tasks/../model_doc/fnet), [Funnel Transformer](https://huggingface.co/docs/transformers/main/en/tasks/../model_doc/funnel), [OpenAI GPT-2](https://huggingface.co/docs/transformers/main/en/tasks/../model_doc/gpt2), [GPT Neo](https://huggingface.co/docs/transformers/main/en/tasks/../model_doc/gpt_neo), [GPT NeoX](https://huggingface.co/docs/transformers/main/en/tasks/../model_doc/gpt_neox), [GPT-J](https://huggingface.co/docs/transformers/main/en/tasks/../model_doc/gptj), [I-BERT](https://huggingface.co/docs/transformers/main/en/tasks/../model_doc/ibert), [LayoutLMv2](https://huggingface.co/docs/transformers/main/en/tasks/../model_doc/layoutlmv2), [LayoutLMv3](https://huggingface.co/docs/transformers/main/en/tasks/../model_doc/layoutlmv3), [LED](https://huggingface.co/docs/transformers/main/en/tasks/../model_doc/led), [LiLT](https://huggingface.co/docs/transformers/main/en/tasks/../model_doc/lilt), [Longformer](https://huggingface.co/docs/transformers/main/en/tasks/../model_doc/longformer), [LUKE](https://huggingface.co/docs/transformers/main/en/tasks/../model_doc/luke), [LXMERT](https://huggingface.co/docs/transformers/main/en/tasks/../model_doc/lxmert), [MarkupLM](https://huggingface.co/docs/transformers/main/en/tasks/../model_doc/markuplm), [mBART](https://huggingface.co/docs/transformers/main/en/tasks/../model_doc/mbart), [MEGA](https://huggingface.co/docs/transformers/main/en/tasks/../model_doc/mega), [Megatron-BERT](https://huggingface.co/docs/transformers/main/en/tasks/../model_doc/megatron-bert), [MobileBERT](https://huggingface.co/docs/transformers/main/en/tasks/../model_doc/mobilebert), [MPNet](https://huggingface.co/docs/transformers/main/en/tasks/../model_doc/mpnet), [MVP](https://huggingface.co/docs/transformers/main/en/tasks/../model_doc/mvp), [Nezha](https://huggingface.co/docs/transformers/main/en/tasks/../model_doc/nezha), [Nyströmformer](https://huggingface.co/docs/transformers/main/en/tasks/../model_doc/nystromformer), [OPT](https://huggingface.co/docs/transformers/main/en/tasks/../model_doc/opt), [QDQBert](https://huggingface.co/docs/transformers/main/en/tasks/../model_doc/qdqbert), [Reformer](https://huggingface.co/docs/transformers/main/en/tasks/../model_doc/reformer), [RemBERT](https://huggingface.co/docs/transformers/main/en/tasks/../model_doc/rembert), [RoBERTa](https://huggingface.co/docs/transformers/main/en/tasks/../model_doc/roberta), [RoBERTa-PreLayerNorm](https://huggingface.co/docs/transformers/main/en/tasks/../model_doc/roberta-prelayernorm), [RoCBert](https://huggingface.co/docs/transformers/main/en/tasks/../model_doc/roc_bert), [RoFormer](https://huggingface.co/docs/transformers/main/en/tasks/../model_doc/roformer), [Splinter](https://huggingface.co/docs/transformers/main/en/tasks/../model_doc/splinter), [SqueezeBERT](https://huggingface.co/docs/transformers/main/en/tasks/../model_doc/squeezebert), [XLM](https://huggingface.co/docs/transformers/main/en/tasks/../model_doc/xlm), [XLM-RoBERTa](https://huggingface.co/docs/transformers/main/en/tasks/../model_doc/xlm-roberta), [XLM-RoBERTa-XL](https://huggingface.co/docs/transformers/main/en/tasks/../model_doc/xlm-roberta-xl), [XLNet](https://huggingface.co/docs/transformers/main/en/tasks/../model_doc/xlnet), [X-MOD](https://huggingface.co/docs/transformers/main/en/tasks/../model_doc/xmod), [YOSO](https://huggingface.co/docs/transformers/main/en/tasks/../model_doc/yoso)


<!--End of the generated tip-->

</Tip>

Before you begin, make sure you have all the necessary libraries installed:

```bash
pip install transformers datasets evaluate
```

We encourage you to login to your Hugging Face account so you can upload and share your model with the community. When prompted, enter your token to login:

In [3]:
# from huggingface_hub import notebook_login

# notebook_login()

## Load SQuAD dataset

Start by loading a smaller subset of the SQuAD dataset from the 🤗 Datasets library. This'll give you a chance to experiment and make sure everything works before spending more time training on the full dataset.

In [None]:
from datasets import load_dataset

squad = load_dataset("squad", split="train[:5000]")

Split the dataset's `train` split into a train and test set with the [train_test_split](https://huggingface.co/docs/datasets/main/en/package_reference/main_classes#datasets.Dataset.train_test_split) method:

In [5]:
squad = squad.train_test_split(test_size=0.2)

In [6]:
type(squad["train"])

datasets.arrow_dataset.Dataset

Then take a look at an example:

In [7]:
squad["train"][0]

{'id': '56cda75062d2951400fa67c6',
 'title': 'The_Legend_of_Zelda:_Twilight_Princess',
 'context': 'Twilight Princess was released to universal critical acclaim and commercial success. It received perfect scores from major publications such as 1UP.com, Computer and Video Games, Electronic Gaming Monthly, Game Informer, GamesRadar, and GameSpy. On the review aggregators GameRankings and Metacritic, Twilight Princess has average scores of 95% and 95 for the Wii version and scores of 95% and 96 for the GameCube version. GameTrailers in their review called it one of the greatest games ever created.',
 'question': 'Who counted the game among the best ever made?',
 'answers': {'text': ['GameTrailers'], 'answer_start': [423]}}

In [8]:
len(squad["train"]), len(squad["test"])

(4000, 1000)

There are several important fields here:

- `answers`: the starting location of the answer token and the answer text.
- `context`: background information from which the model needs to extract the answer.
- `question`: the question a model should answer.

## Risk Factors of Fine Tune Your Own Large Language Models

Fine-tuning large language models comes with several risk factors and challenges. Here are some key considerations and potential reasons for poor performance:

1. **Data Bias**: Large language models are trained on vast amounts of text data, which can introduce biases present in the training data. Fine-tuning on biased data or biased annotations can perpetuate or amplify those biases, leading to unfair or problematic outputs.

2. **Lack of Generalization**: Pretrained language models are often trained on diverse and extensive data, but fine-tuning on a specific task or domain may limit their generalization ability. If the fine-tuning dataset is significantly different from the original training data, the model might struggle to perform well on unseen examples.

3. **Overfitting**: Fine-tuning on a small dataset or for a limited number of epochs can result in overfitting. The model may memorize the training examples rather than learning general patterns, leading to poor performance on new inputs.

4. **Catastrophic Forgetting**: Fine-tuning can cause the model to forget or degrade its performance on the tasks it was pretrained on. The model may lose its original knowledge or exhibit interference effects if the fine-tuning task is significantly different from the pretraining objectives.

5. **Hyperparameter Selection**: Fine-tuning requires careful selection of hyperparameters such as learning rate, batch size, and regularization techniques. Poor choices in these hyperparameters can hinder the model's ability to converge or result in suboptimal performance.

6. **Limited Training Data**: Insufficient or low-quality training data can impede fine-tuning performance. If the fine-tuning dataset is small or lacks diversity, the model may struggle to capture the complexity of the task or generalize well.

7. **Domain Mismatch**: Fine-tuning a language model pretrained on general text data may not transfer well to specific domains or tasks. The model may lack domain-specific knowledge or struggle to adapt to the nuances of the target domain, leading to poor performance.

8. **Computational Resources**: Fine-tuning large language models is computationally intensive and requires substantial resources. Inadequate computational power or limited access to high-performance hardware may limit the ability to train the model effectively.

To mitigate these risks and improve fine-tuning performance, it is crucial to carefully curate high-quality training data, evaluate for biases, fine-tune on diverse and representative examples, regularize the model appropriately, experiment with hyperparameter tuning, and consider techniques like data augmentation, domain adaptation, or transfer learning to enhance model performance.

## Care for Training Data

To enhance the performance of fine-tuning a large language model, you can take several steps to take care of the training data and the texts within it. Here are some effective strategies:

1. **Data Cleaning and Preprocessing**: Clean and preprocess the training data to remove noise, irrelevant content, or inconsistent formatting. This may involve removing HTML tags, special characters, or excessive whitespace, as well as normalizing text by lowercasing, stemming, or lemmatizing words.

2. **Data Augmentation**: Increase the diversity and size of the training data through data augmentation techniques. This involves generating new training examples by applying operations such as synonym replacement, sentence shuffling, paraphrasing, or back-translation.

3. **Balancing and Sampling**: Ensure a balanced distribution of target classes in the training data, especially for tasks involving classification. If the data is imbalanced, consider oversampling minority classes, undersampling majority classes, or using techniques like stratified sampling to maintain class proportions during training.

4. **Handling Biases**: Identify and address biases present in the training data. Analyze the data for demographic, cultural, or ideological biases and take steps to mitigate them. Carefully curate diverse and representative data sources to reduce bias and ensure fairness in model predictions.

5. **Domain-Specific Training Data**: If your target task or application is focused on a specific domain, consider collecting or acquiring training data that is more representative of that domain. Domain-specific data can help the model learn domain-specific nuances and improve performance in the target context.

6. **Fine-Tuning Dataset Size**: Whenever possible, aim to have a reasonably large fine-tuning dataset. Larger datasets can help the model capture more diverse patterns, improve generalization, and reduce the risk of overfitting.

7. **Adversarial Training**: Incorporate adversarial examples or data that deliberately introduce challenging cases into the training data. Adversarial training can help the model become more robust and better handle edge cases or inputs with slight perturbations.

8. **Quality Assurance and Validation**: Conduct thorough quality assurance and validation of the training data. Perform manual inspection, annotation, or use crowd-sourced platforms to ensure the correctness and relevance of the data. Incorrect or noisy training examples can significantly impact model performance.

9. **Data Exploration and Analysis**: Explore and analyze the training data to gain insights into its distribution, identify patterns, and understand potential challenges. This analysis can help you design appropriate preprocessing steps, identify data biases, or select effective fine-tuning strategies.

Remember that the choice of training data and the care taken in preparing it have a significant impact on the fine-tuning process. By carefully curating, preprocessing, augmenting, and analyzing the training data, you can improve the performance and generalization ability of a large language model during fine-tuning.

### Discussion of Using Customized Data

We can use the following code to swap and update the training and testing data to customized data for training. This will require user to have a `train.csv` and `test.csv` whereas each file should have `context`, `question`, `answer` columns.

It is up to you to clean up the texts in the columns. If there are special characters and the texts don't make any sense, that'd be the issue with the training data.

As an example, your excel file should look like the following:

| context | question | answer |
| ---     | ---      | ---    |
| background | what is ...? | This is .... |

I would also recommend to have training data in the range of tens of thousands of rows at least.

In [9]:
import csv
from datasets import Dataset

def update_dataset_with_csv(dataset, csv_file):
    updated_examples = []

    with open(csv_file, 'r') as file:
        reader = csv.reader(file)
        next(reader)  # Skip header row

        for example, (context, question, answer) in zip(dataset, reader):
            updated_example = example.copy()
            updated_example['context'] = context
            updated_example['question'] = question
            updated_example['answers']['text'] = [answer]
            updated_examples.append(updated_example)

    updated_dataset = dataset.map(lambda example: {
        'id': example['id'],
        'title': example['title'],
        'context': updated_examples.pop(0)['context'] if updated_examples else 'abc',
        'question': updated_examples.pop(0)['question'] if updated_examples else 'abc',
        'answers': {
            'text': updated_examples.pop(0)['answers']['text'] if updated_examples else ['abc'],
            'answer_start': example['answers']['answer_start']
        }
    })

    return updated_dataset


In [10]:
# squad["train"] = update_dataset_with_csv(squad["train"], "/content/new.csv")

In [11]:
# squad["train"][0]

In [12]:
# squad["test"] = update_dataset_with_csv(squad["test"], "/content/newtest.csv")

## Preprocess

In [13]:
#@title
from IPython.display import HTML

HTML('<iframe width="560" height="315" src="https://www.youtube.com/embed/qgaM0weJHpA?rel=0&amp;controls=0&amp;showinfo=0" frameborder="0" allowfullscreen></iframe>')



The next step is to load a DistilBERT tokenizer to process the `question` and `context` fields:

In [14]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")

Downloading (…)okenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

There are a few preprocessing steps particular to question answering tasks you should be aware of:

1. Some examples in a dataset may have a very long `context` that exceeds the maximum input length of the model. To deal with longer sequences, truncate only the `context` by setting `truncation="only_second"`.
2. Next, map the start and end positions of the answer to the original `context` by setting
   `return_offset_mapping=True`.
3. With the mapping in hand, now you can find the start and end tokens of the answer. Use the [sequence_ids](https://huggingface.co/docs/tokenizers/main/en/api/encoding#tokenizers.Encoding.sequence_ids) method to
   find which part of the offset corresponds to the `question` and which corresponds to the `context`.

Here is how you can create a function to truncate and map the start and end tokens of the `answer` to the `context`:

In [15]:
def preprocess_function(examples):
    questions = [q.strip() for q in examples["question"]]
    inputs = tokenizer(
        questions,
        examples["context"],
        max_length=384,
        truncation="only_second",
        return_offsets_mapping=True,
        padding="max_length",
    )

    offset_mapping = inputs.pop("offset_mapping")
    answers = examples["answers"]
    start_positions = []
    end_positions = []

    for i, offset in enumerate(offset_mapping):
        answer = answers[i]
        start_char = answer["answer_start"][0]
        end_char = answer["answer_start"][0] + len(answer["text"][0])
        sequence_ids = inputs.sequence_ids(i)

        # Find the start and end of the context
        idx = 0
        # print(i, sequence_ids)
        while sequence_ids[idx] != 1:
            idx += 1
        context_start = idx
        while sequence_ids[idx] == 1:
            idx += 1
        context_end = idx - 1

        # If the answer is not fully inside the context, label it (0, 0)
        if offset[context_start][0] > end_char or offset[context_end][1] < start_char:
            start_positions.append(0)
            end_positions.append(0)
        else:
            # Otherwise it's the start and end token positions
            idx = context_start
            while idx <= context_end and offset[idx][0] <= start_char:
                idx += 1
            start_positions.append(idx - 1)

            idx = context_end
            while idx >= context_start and offset[idx][1] >= end_char:
                idx -= 1
            end_positions.append(idx + 1)

    inputs["start_positions"] = start_positions
    inputs["end_positions"] = end_positions
    return inputs

To apply the preprocessing function over the entire dataset, use 🤗 Datasets [map](https://huggingface.co/docs/datasets/main/en/package_reference/main_classes#datasets.Dataset.map) function. You can speed up the `map` function by setting `batched=True` to process multiple elements of the dataset at once. Remove any columns you don't need:

In [16]:
tokenized_squad = squad.map(preprocess_function, batched=True, remove_columns=squad["train"].column_names)

Map:   0%|          | 0/4000 [00:00<?, ? examples/s]

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

Now create a batch of examples using [DefaultDataCollator](https://huggingface.co/docs/transformers/main/en/main_classes/data_collator#transformers.DefaultDataCollator). Unlike other data collators in 🤗 Transformers, the [DefaultDataCollator](https://huggingface.co/docs/transformers/main/en/main_classes/data_collator#transformers.DefaultDataCollator) does not apply any additional preprocessing such as padding.

In [17]:
from transformers import DefaultDataCollator

data_collator = DefaultDataCollator(return_tensors="tf")

## Train

<Tip>

If you aren't familiar with finetuning a model with Keras, take a look at the basic tutorial [here](https://huggingface.co/docs/transformers/main/en/tasks/../training#train-a-tensorflow-model-with-keras)!

</Tip>
To finetune a model in TensorFlow, start by setting up an optimizer function, learning rate schedule, and some training hyperparameters:

In [18]:
from transformers import create_optimizer

batch_size = 16
num_epochs = 2
total_train_steps = (len(tokenized_squad["train"]) // batch_size) * num_epochs
optimizer, schedule = create_optimizer(
    init_lr=5e-5,
    num_warmup_steps=0,
    num_train_steps=total_train_steps,
)

Then you can load DistilBERT with [TFAutoModelForQuestionAnswering](https://huggingface.co/docs/transformers/main/en/model_doc/auto#transformers.TFAutoModelForQuestionAnswering):

For more options, go to [Hugging Face / Models](https://huggingface.co/models). You can also choose models from [here](https://huggingface.co/transformers/v3.0.2/model_doc/auto.html#transformers.AutoTokenizer.from_pretrained). You'll have to try it. Different model has different sizes. One interesting research is to compare and contrast different models here in the setting of `model_name` so that we can create a tabular form of `model <> carbon emissions <> loss`. Ideally, we can pick a model that has least amount of carbon emissions and least errors. That will be ideal.

In [19]:
from transformers import TFAutoModelForQuestionAnswering

# model_name = "distilbert-base-uncased"
model_name = 'bert-base-uncased'
model = TFAutoModelForQuestionAnswering.from_pretrained(model_name)

Downloading (…)lve/main/config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

Downloading model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

All PyTorch model weights were used when initializing TFBertForQuestionAnswering.

Some weights or buffers of the TF 2.0 model TFBertForQuestionAnswering were not initialized from the PyTorch model and are newly initialized: ['qa_outputs.weight', 'qa_outputs.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [20]:
# from transformers import TFAutoModelForQuestionAnswering

# model = TFAutoModelForQuestionAnswering("distilbert-base-uncased")

Convert your datasets to the `tf.data.Dataset` format with [prepare_tf_dataset()](https://huggingface.co/docs/transformers/main/en/main_classes/model#transformers.TFPreTrainedModel.prepare_tf_dataset):

In [21]:
tf_train_set = model.prepare_tf_dataset(
    tokenized_squad["train"],
    shuffle=True,
    batch_size=16,
    collate_fn=data_collator,
)

tf_validation_set = model.prepare_tf_dataset(
    tokenized_squad["test"],
    shuffle=False,
    batch_size=16,
    collate_fn=data_collator,
)

Configure the model for training with [`compile`](https://keras.io/api/models/model_training_apis/#compile-method):

In [22]:
import tensorflow as tf

model.compile(optimizer=optimizer)

The last thing to setup before you start training is to provide a way to push your model to the Hub. This can be done by specifying where to push your model and tokenizer in the [PushToHubCallback](https://huggingface.co/docs/transformers/main/en/main_classes/keras_callbacks#transformers.PushToHubCallback):

I intend this to be a private model, so I don't want to send it to `hub` and therefore I comment out this portion of the code.

In [23]:
# from transformers.keras_callbacks import PushToHubCallback

# callback = PushToHubCallback(
#     output_dir="my_awesome_qa_model",
#     tokenizer=tokenizer,
# )

Finally, you're ready to start training your model! Call [`fit`](https://keras.io/api/models/model_training_apis/#fit-method) with your training and validation datasets, the number of epochs, and your callback to finetune the model:

I'd advise to train on GPU otherwise it'll take too long to wait for the results.

In [24]:
device_name = tf.test.gpu_device_name()
device_name

'/device:GPU:0'

In [25]:
with tf.device('/device:GPU:0'):
    model.fit(x=tf_train_set, validation_data=tf_validation_set, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [None]:
# model.fit(x=tf_train_set, validation_data=tf_validation_set, epochs=3, callbacks=[callback])

# model.fit(x=tf_train_set, validation_data=tf_validation_set, epochs=3)

Once training is completed, your model is automatically uploaded to the Hub so everyone can use it!

<Tip>

For a more in-depth example of how to finetune a model for question answering, take a look at the corresponding
[PyTorch notebook](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/question_answering.ipynb)
or [TensorFlow notebook](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/question_answering-tf.ipynb).

</Tip>

Let's try to measure the carbon emission. We can do this by using `codecarbon` package.

In [30]:
! pip install codecarbon

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting codecarbon
  Downloading codecarbon-2.2.3-py3-none-any.whl (174 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m174.1/174.1 kB[0m [31m10.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting arrow (from codecarbon)
  Downloading arrow-1.2.3-py3-none-any.whl (66 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m66.4/66.4 kB[0m [31m8.9 MB/s[0m eta [36m0:00:00[0m
Collecting pynvml (from codecarbon)
  Downloading pynvml-11.5.0-py3-none-any.whl (53 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m53.1/53.1 kB[0m [31m6.7 MB/s[0m eta [36m0:00:00[0m
Collecting fuzzywuzzy (from codecarbon)
  Downloading fuzzywuzzy-0.18.0-py2.py3-none-any.whl (18 kB)
Installing collected packages: fuzzywuzzy, pynvml, arrow, codecarbon
Successfully installed arrow-1.2.3 codecarbon-2.2.3 fuzzywuzzy-0.18.0 pynvml-11.5.0


In [31]:
from codecarbon import EmissionsTracker

In [32]:
with EmissionsTracker(project_name="bert-base-uncased") as tracker:
    with tf.device('/device:GPU:0'):
        history = model.fit(x=tf_train_set, validation_data=tf_validation_set, epochs=2)

print(tracker.final_emissions)

[codecarbon INFO @ 03:18:16] [setup] RAM Tracking...
[codecarbon INFO @ 03:18:16] [setup] GPU Tracking...
[codecarbon INFO @ 03:18:16] Tracking Nvidia GPU via pynvml
[codecarbon INFO @ 03:18:16] [setup] CPU Tracking...
[codecarbon INFO @ 03:18:17] CPU Model on constant consumption mode: Intel(R) Xeon(R) CPU @ 2.00GHz
[codecarbon INFO @ 03:18:17] >>> Tracker's metadata:
[codecarbon INFO @ 03:18:17]   Platform system: Linux-5.15.107+-x86_64-with-glibc2.31
[codecarbon INFO @ 03:18:17]   Python version: 3.10.12
[codecarbon INFO @ 03:18:17]   CodeCarbon version: 2.2.3
[codecarbon INFO @ 03:18:17]   Available RAM : 50.997 GB
[codecarbon INFO @ 03:18:17]   CPU count: 8
[codecarbon INFO @ 03:18:17]   CPU model: Intel(R) Xeon(R) CPU @ 2.00GHz
[codecarbon INFO @ 03:18:17]   GPU count: 1
[codecarbon INFO @ 03:18:17]   GPU model: 1 x Tesla V100-SXM2-16GB


Epoch 1/2
 36/250 [===>..........................] - ETA: 1:28 - loss: 0.7559

[codecarbon INFO @ 03:18:32] Energy consumed for RAM : 0.000080 kWh. RAM Power : 19.123995780944824 W
[codecarbon INFO @ 03:18:32] Energy consumed for all GPUs : 0.000642 kWh. Total GPU Power : 153.76800000000003 W
[codecarbon INFO @ 03:18:33] Energy consumed for all CPUs : 0.000182 kWh. Total CPU Power : 42.5 W
[codecarbon INFO @ 03:18:33] 0.000904 kWh of electricity used since the beginning.




[codecarbon INFO @ 03:18:48] Energy consumed for RAM : 0.000158 kWh. RAM Power : 19.123995780944824 W




[codecarbon INFO @ 03:18:48] Energy consumed for all GPUs : 0.001286 kWh. Total GPU Power : 155.544 W




[codecarbon INFO @ 03:18:48] Energy consumed for all CPUs : 0.000362 kWh. Total CPU Power : 42.5 W
[codecarbon INFO @ 03:18:48] 0.001806 kWh of electricity used since the beginning.




[codecarbon INFO @ 03:19:03] Energy consumed for RAM : 0.000235 kWh. RAM Power : 19.123995780944824 W




[codecarbon INFO @ 03:19:03] Energy consumed for all GPUs : 0.001918 kWh. Total GPU Power : 156.528 W
[codecarbon INFO @ 03:19:03] Energy consumed for all CPUs : 0.000536 kWh. Total CPU Power : 42.5 W




[codecarbon INFO @ 03:19:03] 0.002690 kWh of electricity used since the beginning.




[codecarbon INFO @ 03:19:18] Energy consumed for RAM : 0.000314 kWh. RAM Power : 19.123995780944824 W




[codecarbon INFO @ 03:19:18] Energy consumed for all GPUs : 0.002725 kWh. Total GPU Power : 194.38400000000004 W
[codecarbon INFO @ 03:19:18] Energy consumed for all CPUs : 0.000713 kWh. Total CPU Power : 42.5 W
[codecarbon INFO @ 03:19:18] 0.003752 kWh of electricity used since the beginning.




[codecarbon INFO @ 03:19:33] Energy consumed for RAM : 0.000391 kWh. RAM Power : 19.123995780944824 W




[codecarbon INFO @ 03:19:33] Energy consumed for all GPUs : 0.003396 kWh. Total GPU Power : 164.46000000000004 W




[codecarbon INFO @ 03:19:33] Energy consumed for all CPUs : 0.000891 kWh. Total CPU Power : 42.5 W
[codecarbon INFO @ 03:19:33] 0.004679 kWh of electricity used since the beginning.




[codecarbon INFO @ 03:19:48] Energy consumed for RAM : 0.000469 kWh. RAM Power : 19.123995780944824 W




[codecarbon INFO @ 03:19:48] Energy consumed for all GPUs : 0.004091 kWh. Total GPU Power : 172.155 W
[codecarbon INFO @ 03:19:48] Energy consumed for all CPUs : 0.001063 kWh. Total CPU Power : 42.5 W
[codecarbon INFO @ 03:19:48] 0.005622 kWh of electricity used since the beginning.




[codecarbon INFO @ 03:20:03] Energy consumed for RAM : 0.000548 kWh. RAM Power : 19.123995780944824 W
[codecarbon INFO @ 03:20:03] Energy consumed for all GPUs : 0.005195 kWh. Total GPU Power : 265.312 W
[codecarbon INFO @ 03:20:03] Energy consumed for all CPUs : 0.001240 kWh. Total CPU Power : 42.5 W
[codecarbon INFO @ 03:20:03] 0.006984 kWh of electricity used since the beginning.


Epoch 2/2
 14/250 [>.............................] - ETA: 1:38 - loss: 0.8030

[codecarbon INFO @ 03:20:18] Energy consumed for RAM : 0.000629 kWh. RAM Power : 19.123995780944824 W


 15/250 [>.............................] - ETA: 1:38 - loss: 0.7723

[codecarbon INFO @ 03:20:18] Energy consumed for all GPUs : 0.005884 kWh. Total GPU Power : 162.733 W
[codecarbon INFO @ 03:20:18] Energy consumed for all CPUs : 0.001420 kWh. Total CPU Power : 42.5 W
[codecarbon INFO @ 03:20:18] 0.007932 kWh of electricity used since the beginning.


 51/250 [=====>........................] - ETA: 1:24 - loss: 0.7176

[codecarbon INFO @ 03:20:33] Energy consumed for RAM : 0.000709 kWh. RAM Power : 19.123995780944824 W
[codecarbon INFO @ 03:20:33] Energy consumed for all GPUs : 0.006573 kWh. Total GPU Power : 162.733 W
[codecarbon INFO @ 03:20:34] Energy consumed for all CPUs : 0.001603 kWh. Total CPU Power : 42.5 W
[codecarbon INFO @ 03:20:34] 0.008886 kWh of electricity used since the beginning.




[codecarbon INFO @ 03:20:49] Energy consumed for RAM : 0.000787 kWh. RAM Power : 19.123995780944824 W




[codecarbon INFO @ 03:20:49] Energy consumed for all GPUs : 0.007353 kWh. Total GPU Power : 190.08700000000005 W
[codecarbon INFO @ 03:20:49] Energy consumed for all CPUs : 0.001777 kWh. Total CPU Power : 42.5 W
[codecarbon INFO @ 03:20:49] 0.009918 kWh of electricity used since the beginning.




[codecarbon INFO @ 03:21:04] Energy consumed for RAM : 0.000866 kWh. RAM Power : 19.123995780944824 W
[codecarbon INFO @ 03:21:04] Energy consumed for all GPUs : 0.008012 kWh. Total GPU Power : 159.03400000000002 W
[codecarbon INFO @ 03:21:04] Energy consumed for all CPUs : 0.001957 kWh. Total CPU Power : 42.5 W
[codecarbon INFO @ 03:21:04] 0.010835 kWh of electricity used since the beginning.




[codecarbon INFO @ 03:21:19] Energy consumed for RAM : 0.000943 kWh. RAM Power : 19.123995780944824 W




[codecarbon INFO @ 03:21:19] Energy consumed for all GPUs : 0.008627 kWh. Total GPU Power : 152.189 W
[codecarbon INFO @ 03:21:19] Energy consumed for all CPUs : 0.002132 kWh. Total CPU Power : 42.5 W
[codecarbon INFO @ 03:21:19] 0.011702 kWh of electricity used since the beginning.




[codecarbon INFO @ 03:21:34] Energy consumed for RAM : 0.001021 kWh. RAM Power : 19.123995780944824 W




[codecarbon INFO @ 03:21:34] Energy consumed for all GPUs : 0.009306 kWh. Total GPU Power : 167.616 W
[codecarbon INFO @ 03:21:34] Energy consumed for all CPUs : 0.002309 kWh. Total CPU Power : 42.5 W
[codecarbon INFO @ 03:21:34] 0.012636 kWh of electricity used since the beginning.




[codecarbon INFO @ 03:21:49] Energy consumed for RAM : 0.001100 kWh. RAM Power : 19.123995780944824 W
[codecarbon INFO @ 03:21:49] Energy consumed for all GPUs : 0.009945 kWh. Total GPU Power : 154.532 W
[codecarbon INFO @ 03:21:49] Energy consumed for all CPUs : 0.002485 kWh. Total CPU Power : 42.5 W
[codecarbon INFO @ 03:21:50] 0.013530 kWh of electricity used since the beginning.




[codecarbon INFO @ 03:22:04] Energy consumed for RAM : 0.001176 kWh. RAM Power : 19.123995780944824 W
[codecarbon INFO @ 03:22:04] Energy consumed for all GPUs : 0.011025 kWh. Total GPU Power : 268.61400000000003 W
[codecarbon INFO @ 03:22:04] Energy consumed for all CPUs : 0.002656 kWh. Total CPU Power : 42.5 W
[codecarbon INFO @ 03:22:04] 0.014857 kWh of electricity used since the beginning.




[codecarbon INFO @ 03:22:06] Energy consumed for RAM : 0.001184 kWh. RAM Power : 19.123995780944824 W
[codecarbon INFO @ 03:22:06] Energy consumed for all GPUs : 0.011079 kWh. Total GPU Power : 129.782 W
[codecarbon INFO @ 03:22:06] Energy consumed for all CPUs : 0.002674 kWh. Total CPU Power : 42.5 W
[codecarbon INFO @ 03:22:06] 0.014938 kWh of electricity used since the beginning.


0.004946775163973085


In [33]:
print(f"Total emission of kilograms of CO2 is: {tracker.final_emissions}")

Total emission of kilograms of CO2 is: 0.004946775163973085


In [39]:
history.history

{'loss': [0.7472164630889893, 0.7356048226356506],
 'val_loss': [1.370938777923584, 1.370938777923584]}

In [42]:
print(f"Final error of the model is: {history.history['val_loss'][-1]}")

Final error of the model is: 1.370938777923584


Save model!!

In [None]:
# save model: it must be .h5 format
# model.save('your_own_LLM_5_25.h5')

## Evaluate

Evaluation for question answering requires a significant amount of postprocessing. To avoid taking up too much of your time, this guide skips the evaluation step. The [Trainer](https://huggingface.co/docs/transformers/main/en/main_classes/trainer#transformers.Trainer) still calculates the evaluation loss during training so you're not completely in the dark about your model's performance.

If have more time and you're interested in how to evaluate your model for question answering, take a look at the [Question answering](https://huggingface.co/course/chapter7/7?fw=pt#postprocessing) chapter from the 🤗 Hugging Face Course!

## Load Customized Model from Pickle



In [26]:
# def load_tf_model(filename):
#     with open(filename, 'rb') as file:
#         model = pickle.load(file)
#     return model

In [27]:
# Example usage
# new_model = load_tf_model('my_model.pkl')

## Quick Inference

In [28]:
def answer_question(prompt, question):
    inputs = tokenizer(prompt, question, add_special_tokens=True, return_tensors="tf")
    input_ids = inputs["input_ids"].numpy()[0]
    attention_mask = inputs["attention_mask"].numpy()[0]

    outputs = model(inputs)
    answer_start = tf.argmax(outputs.start_logits, axis=1).numpy()[0]
    answer_end = tf.argmax(outputs.end_logits, axis=1).numpy()[0]

    answer = tokenizer.decode(input_ids[answer_start:answer_end+1], skip_special_tokens=True)
    return answer


In [37]:
# Example usage
prompt = "The scholar's name is Yiqiao Yin. He is a computer scientist. He studies statistics at Columbia University as a graduate student and later he goes to University of Chicago."
question = "What does he study?"

answer = answer_question(prompt, question)
print("Answer:", answer)

Answer: computer scientist. he studies statistics


## Inference

Great, now that you've finetuned a model, you can use it for inference!

Come up with a question and some context you'd like the model to predict:

In [None]:
question = "How many programming languages does BLOOM support?"
context = "BLOOM has 176 billion parameters and can generate text in 46 languages natural languages and 13 programming languages."

The simplest way to try out your finetuned model for inference is to use it in a [pipeline()](https://huggingface.co/docs/transformers/main/en/main_classes/pipelines#transformers.pipeline). Instantiate a `pipeline` for question answering with your model, and pass your text to it:

In [None]:
from transformers import pipeline

question_answerer = pipeline("question-answering", model="my_awesome_qa_model")
question_answerer(question=question, context=context)

{'score': 0.2058267742395401,
 'start': 10,
 'end': 95,
 'answer': '176 billion parameters and can generate text in 46 languages natural languages and 13'}

You can also manually replicate the results of the `pipeline` if you'd like:

Tokenize the text and return TensorFlow tensors:

In [None]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("my_awesome_qa_model")
inputs = tokenizer(question, text, return_tensors="tf")

Pass your inputs to the model and return the `logits`:

In [None]:
from transformers import TFAutoModelForQuestionAnswering

model = TFAutoModelForQuestionAnswering.from_pretrained("my_awesome_qa_model")
outputs = model(**inputs)

Get the highest probability from the model output for the start and end positions:

In [None]:
answer_start_index = int(tf.math.argmax(outputs.start_logits, axis=-1)[0])
answer_end_index = int(tf.math.argmax(outputs.end_logits, axis=-1)[0])

Decode the predicted tokens to get the answer:

In [None]:
predict_answer_tokens = inputs.input_ids[0, answer_start_index : answer_end_index + 1]
tokenizer.decode(predict_answer_tokens)

'176 billion parameters and can generate text in 46 languages natural languages and 13'