In [7]:
from transformers import TapasTokenizer, TapasForQuestionAnswering
import pandas as pd

model_name = "google/tapas-base-finetuned-wtq"
model = TapasForQuestionAnswering.from_pretrained(model_name)
tokenizer = TapasTokenizer.from_pretrained(model_name)

data = {
    "Team": ["Adana Demirspor", "Alanyaspor", "Ankaragücü", "Antalyaspor", "Başakşehir", "Beşiktaş", "Fatih Karagümrük", "Fenerbahçe", "Galatasaray", "Gaziantep", "Hatayspor", "İstanbulspor", "Kasımpaşa", "Kayserispor", "Konyaspor", "Pendikspor", "Rizespor", "Samsunspor", "Sivasspor", "Trabzonspor"],
    "Home city/borough": ["Adana", "Alanya", "Ankara", "Antalya", "Başakşehir", "Beşiktaş", "Fatih", "Kadıköy", "Sarıyer", "Gaziantep", "Antakya", "Büyükçekmece", "Kasımpaşa", "Kayseri", "Konya", "Pendik", "Rize", "Samsun", "Sivas", "Trabzon"],
    "Home province": ["Adana", "Antalya", "Ankara", "Antalya", "Istanbul", "Istanbul", "Istanbul", "Istanbul", "Istanbul", "Gaziantep", "Hatay", "Istanbul", "Istanbul", "Kayseri", "Konya", "Istanbul", "Rize", "Samsun", "Sivas", "Trabzon"],
    "Stadium": ["New Adana Stadium", "Alanya Oba Stadium", "Eryaman Stadium", "Corendon Airlines Park", "Başakşehir Fatih Terim Stadium", "Tüpraş Stadium", "Atatürk Olympic Stadium", "Şükrü Saracoğlu Stadium", "Rams Park", "Kalyon Stadium", "Mersin Stadium", "Esenyurt Necmi Kadıoğlu Stadium", "Recep Tayyip Erdoğan Stadium", "RHG Enertürk Enerji Stadium", "Konya Metropolitan Municipality Stadium", "Pendik Stadium", "Rize City Stadium", "Samsun 19 Mayıs Stadium", "New Sivas 4 Eylül Stadium", "Şenol Güneş Sports Complex"],
    "Capacity": ["33543", "10130", "20560", "32537", "17156", "42590", "76761", "47834", "52280", "33502", "25497", "7500", "14234", "32864", "42000", "2500", "15332", "33919", "27532", "40782"]
}

queries = [
    "Which team has the highest stadium capacity?",
    "How many teams are based in Istanbul?",
    "What is Ankaragücü's total stadium capacity?",
    "How many stadium's are in Istanbul"
]

In [8]:
table = pd.DataFrame.from_dict(data)
inputs = tokenizer(table=table, queries=queries, padding="max_length", return_tensors="pt")
outputs = model(**inputs)
predicted_answer_coordinates, predicted_aggregation_indices = tokenizer.convert_logits_to_predictions(
    inputs, outputs.logits.detach(), outputs.logits_aggregation.detach()
)

  text = normalize_for_match(row[col_index].text)
  cell = row[col_index]


In [9]:
id2aggregation = {0: "NONE", 1: "SUM", 2: "AVERAGE", 3: "COUNT"}
aggregation_predictions_string = [id2aggregation[x] for x in predicted_aggregation_indices]

In [10]:
answers = []
for coordinates in predicted_answer_coordinates:
    if len(coordinates) == 1:
        # only a single cell:
        answers.append(table.iat[coordinates[0]])
    else:
        # multiple cells
        cell_values = []
        for coordinate in coordinates:
            cell_values.append(table.iat[coordinate])
        answers.append(", ".join(cell_values))

In [11]:
display(table)

Unnamed: 0,Team,Home city/borough,Home province,Stadium,Capacity
0,Adana Demirspor,Adana,Adana,New Adana Stadium,33543
1,Alanyaspor,Alanya,Antalya,Alanya Oba Stadium,10130
2,Ankaragücü,Ankara,Ankara,Eryaman Stadium,20560
3,Antalyaspor,Antalya,Antalya,Corendon Airlines Park,32537
4,Başakşehir,Başakşehir,Istanbul,Başakşehir Fatih Terim Stadium,17156
5,Beşiktaş,Beşiktaş,Istanbul,Tüpraş Stadium,42590
6,Fatih Karagümrük,Fatih,Istanbul,Atatürk Olympic Stadium,76761
7,Fenerbahçe,Kadıköy,Istanbul,Şükrü Saracoğlu Stadium,47834
8,Galatasaray,Sarıyer,Istanbul,Rams Park,52280
9,Gaziantep,Gaziantep,Gaziantep,Kalyon Stadium,33502


In [12]:
for query, answer, predicted_agg in zip(queries, answers, aggregation_predictions_string):
    print("Query: " + query)
    if predicted_agg == "NONE":
        print("Predicted answer: " + answer + "\n")
    else:
        print("Predicted answer: " + predicted_agg + " > " + answer + "\n")

Query: Which team has the highest stadium capacity?
Predicted answer: Fatih Karagümrük

Query: How many teams are based in Istanbul?
Predicted answer: COUNT > Başakşehir, Beşiktaş, Fatih Karagümrük, Fenerbahçe, Galatasaray, İstanbulspor, Kasımpaşa, Pendikspor

Query: What is Ankaragücü's total stadium capacity?
Predicted answer: AVERAGE > 20560

Query: How many stadium's are in Istanbul
Predicted answer: COUNT > Başakşehir Fatih Terim Stadium, Tüpraş Stadium, Atatürk Olympic Stadium, Şükrü Saracoğlu Stadium, Rams Park, Esenyurt Necmi Kadıoğlu Stadium, Recep Tayyip Erdoğan Stadium, Pendik Stadium

