In [1]:
! pip install -q transformers pandas datasets tabulate

[0m

In [2]:
!apt show rocm-libs -a

Package: rocm-libs
Version: 5.7.0.50700-63~22.04
Priority: optional
Section: devel
Maintainer: ROCm Libs Support <rocm-libs.support@amd.com>
Installed-Size: 13.3 kB
Depends: hipblas (= 1.1.0.50700-63~22.04), hipblaslt (= 0.3.0.50700-63~22.04), hipfft (= 1.0.12.50700-63~22.04), hipsolver (= 1.8.1.50700-63~22.04), hipsparse (= 2.3.8.50700-63~22.04), miopen-hip (= 2.20.0.50700-63~22.04), rccl (= 2.17.1.50700-63~22.04), rocalution (= 2.1.11.50700-63~22.04), rocblas (= 3.1.0.50700-63~22.04), rocfft (= 1.0.23.50700-63~22.04), rocrand (= 2.10.17.50700-63~22.04), rocsolver (= 3.23.0.50700-63~22.04), rocsparse (= 2.5.4.50700-63~22.04), rocm-core (= 5.7.0.50700-63~22.04), hipblas-dev (= 1.1.0.50700-63~22.04), hipblaslt-dev (= 0.3.0.50700-63~22.04), hipcub-dev (= 2.13.1.50700-63~22.04), hipfft-dev (= 1.0.12.50700-63~22.04), hipsolver-dev (= 1.8.1.50700-63~22.04), hipsparse-dev (= 2.3.8.50700-63~22.04), miopen-hip-dev (= 2.20.0.50700-63~22.04), rccl-dev (= 2.17.1.50700-63~22.04), rocalution-dev (=

In [3]:
# import os
# os.environ['HSA_OVERRIDE_GFX_VERSION']='10.3.0' # depending on your AMD GPU, you may need to run this line

In [4]:
from transformers import pipeline, AutoModelForTableQuestionAnswering
import pandas as pd

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
from datasets import load_dataset
data = load_dataset("ashraq/ott-qa-20k", split='train')

In [6]:
for doc in data:
    if doc['title'] == 'World economy':
        table = pd.DataFrame(doc["data"], columns=doc['header'])
        break 

In [7]:
print(table.to_markdown())

|    |   Rank | Country              | Value ( USD $ )   |   Peak year |
|---:|-------:|:---------------------|:------------------|------------:|
|  0 |      1 | Qatar                | 146,982           |        2012 |
|  1 |      2 | Macau                | 133,021           |        2013 |
|  2 |      3 | Luxembourg           | 108,951           |        2019 |
|  3 |      4 | Singapore            | 103,181           |        2019 |
|  4 |      5 | United Arab Emirates | 92,037            |        2004 |
|  5 |      6 | Brunei               | 83,785            |        2012 |
|  6 |      7 | Ireland              | 83,399            |        2019 |
|  7 |      8 | Norway               | 76,684            |        2019 |
|  8 |      9 | San Marino           | 74,664            |        2008 |
|  9 |     10 | Kuwait               | 71,036            |        2013 |
| 10 |     11 | Switzerland          | 66,196            |        2019 |
| 11 |     12 | United States        | 65,112      

In [14]:
from transformers import TapasTokenizer, TapasForQuestionAnswering, TapasConfig
model_name = "google/tapas-base-finetuned-wtq"
model = TapasForQuestionAnswering.from_pretrained(model_name)
tokenizer = TapasTokenizer.from_pretrained(model_name)
config = TapasConfig.from_pretrained('google/tapas-base-finetuned-wtq')

print(model)

print("Aggregation: ", config.aggregation_labels)

TapasForQuestionAnswering(
  (tapas): TapasModel(
    (embeddings): TapasEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(1024, 768)
      (token_type_embeddings_0): Embedding(3, 768)
      (token_type_embeddings_1): Embedding(256, 768)
      (token_type_embeddings_2): Embedding(256, 768)
      (token_type_embeddings_3): Embedding(2, 768)
      (token_type_embeddings_4): Embedding(256, 768)
      (token_type_embeddings_5): Embedding(256, 768)
      (token_type_embeddings_6): Embedding(10, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): TapasEncoder(
      (layer): ModuleList(
        (0-11): 12 x TapasLayer(
          (attention): TapasAttention(
            (self): TapasSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias

In [10]:
pipe = pipeline("table-question-answering", model=model, tokenizer=tokenizer) 

In [11]:
def qa(queries, table):    
    inputs = tokenizer(table=table, queries=queries, padding=True, truncation=True, return_tensors="pt") 
    outputs = model(**inputs)
    predicted_answer_coordinates, predicted_aggregation_indices = tokenizer.convert_logits_to_predictions(
        inputs, outputs.logits.detach(), outputs.logits_aggregation.detach()
    )

    # let's print out the results:
    id2aggregation = config.aggregation_labels
    aggregation_predictions_string = [id2aggregation[x] for x in predicted_aggregation_indices]

    answers = []
    for coordinates in predicted_answer_coordinates:
        if len(coordinates) == 1:
            # only a single cell:
            answers.append(table.iat[coordinates[0]])
        else:
            # multiple cells 
            cell_values = []
            for coordinate in coordinates:
                cell_values.append(table.iat[coordinate])
            answers.append(", ".join(cell_values))

    print("")
    for query, answer, predicted_agg in zip(queries, answers, aggregation_predictions_string):
        print(query)
        if predicted_agg == "NONE":
            print("Predicted answer: " + answer)
        else:
            print("Predicted answer: " + predicted_agg + " > " + answer)
        print()

In [15]:
queries = ["What is the value of Norway?",
           "What is the total value of all countries in 2013?",
           "What is the average value of all countries in 2019?",
           "How many countries are in the data in 2012?",
           "What is the combined value of Sweden and Denmark?"
          ]
qa(queries, table)

  text = normalize_for_match(row[col_index].text)
  cell = row[col_index]



What is the value of Norway?
Predicted answer: AVERAGE > 76,684

What is the total value of all countries in 2013?
Predicted answer: SUM > 133,021, 71,036

What is the average value of all countries in 2019?
Predicted answer: AVERAGE > 108,951, 83,399, 76,684, 66,196, 65,112, 64,928, 58,341, 56,066, 55,078, 54,628, 53,882, 53,567

How many countries are in the data in 2012?
Predicted answer: COUNT > Qatar, Brunei

What is the combined value of Sweden and Denmark?
Predicted answer: SUM > 54,628, 53,882

