In [22]:
import pandas as pd
import tester
from trainer import read_tables
import os
import argparse
import json
from IPython.display import display, clear_output
import time


In [23]:
def get_args(dataset):
    work_dir=os.path.abspath('..')
    test_args = argparse.Namespace(work_dir=work_dir,
                                   query_dir='demo_query',
                                   dataset=dataset)
    return test_args

In [24]:
def create_query_file(data_dir, question):
    file_name = 'fusion_query.jsonl'
    data_file = os.path.join(data_dir, file_name)
    query_data = {
        "id":0,
        "question":question,
        "table_id_lst":['N/A'],
        "answers":["N/A"],
        "ctxs": [{"title": "", "text": "This is a example passage."}]
    }
    with open(data_file, 'w') as f_o:
        f_o.write(json.dumps(query_data) + '\n')
    

In [25]:
def show_results(question, out_dir, table_dict):
    data_file = os.path.join(out_dir, 'pred_None.jsonl')
    with open(data_file) as f:
        item = json.load(f)
    tag_lst = item['tags']
    N = 5
    pred_table_set = set()
    for tag in tag_lst:
        table_id = tag['table_id']
        if table_id not in pred_table_set:
            pred_table_set.add(table_id)
            table = table_dict[table_id]
            show_table(question, table, len(pred_table_set))
        if len(pred_table_set) >= 5:
            return
    
def show_table(question, table, seq_no):
    print("Question, %s" % question)
    print("Top %d table,         %s " % (seq_no, table['documentTitle']))
    col_lst = table['columns']
    col_names = [a['text'] for a in col_lst]
    row_lst = table['rows']
    row_data = []
    for row_item in row_lst:
        cell_lst = row_item['cells']
        row_item = [a['text'] for a in cell_lst]
        row_data.append(row_item)
    df = pd.DataFrame(row_data,columns=col_names)
    display(df)
    

In [None]:
dataset=input('Type the dataset, ')
args = get_args(dataset)
print('Loading tables')
table_dict = read_tables(args.work_dir, dataset)
data_dir = os.path.join(args.work_dir, 'data', dataset, 'demo_query', 'test')
if not os.path.isdir(data_dir):
    os.makedirs(data_dir)
question = input('Type a question (q to quit),\n').strip()
while question != 'q':
    create_query_file(data_dir, question)
    out_dir = tester.main(args)
    clear_output(wait=False)
    show_results(question, out_dir, table_dict)
    time.sleep(1)
    question = input('Type a question (q to quit),\n').strip()
    

Question, Chicago county
Top 1 table,         Chicago , Sports 


Unnamed: 0,Club,League,Sport,Venue,Attendance,Founded,Championships
0,Chicago Bears,NFL,Football,Soldier Field,61142,1919,9 Championships (1 Super Bowl)
1,Chicago Cubs,MLB,Baseball,Wrigley Field,41649,1870,3 World Series
2,Chicago Blackhawks,NHL,Ice hockey,United Center,21653,1926,6 Stanley Cups
3,Chicago Bulls,NBA,Basketball,United Center,20776,1966,6 NBA Championships
4,Chicago White Sox,MLB,Baseball,Guaranteed Rate Field,20626,1900,3 World Series
5,Chicago Fire,MLS,Soccer,SeatGeek Stadium,17383,1997,"1 MLS Cup, 1 Supporters Shield"
6,Chicago Sky,WNBA,Basketball,Wintrust Arena,10387,2006,0 WNBA Championships
7,Chicago Red Stars,NWSL,Soccer,SeatGeek Stadium,3198,2006,1 National Women's Cup


Question, Chicago county
Top 2 table,         Martin Creed , Discography 


Unnamed: 0,Title,Type,Date,Format,Label
0,What The Fuck Am I Doing?,Single,2017,Digital,Telephone Records
1,Blow And Suck,Single,2017,Digital,Telephone Records
2,It's You,Single,2016,Digital,Telephone Records
3,Princess Taxi Girl,Single,2016,Digital,Telephone Records
4,Thoughts Lined Up,Album,2016,"CD, Digital",Telephone Records
5,Understanding,Single,2016,Digital,Telephone Records
6,Let Them In / Border Control,Double A-Side Single,2015,Digital,Telephone Records
7,Mind Trap,Album,2014,"CD, 12"" Vinyl, Special Edition 12"" Vinyl, Digital",Telephone Records
8,Blow And Suck / I Want You,Single,2013,"Special Edition 12"" Vinyl",The Vinyl Factory
9,Chicago,EP,2012,"Special Edition 12"" Vinyl",Telephone Records / The Vinyl Factory / MCA Ch...


Question, Chicago county
Top 3 table,         Ann Reinking , Credits 


Unnamed: 0,Year,Title,Role,Notes
0,1969,Cabaret,Ensemble,-
1,1969,Coco,Ensemble,-
2,1971,Wild and Wonderful,Ensemble,-
3,1972,Pippin,"Ensemble, Catherine understudy",-
4,1974,Over Here!,Maggie,-
5,1975,Goodtime Charley,Joan of Arc,-
6,1976,A Chorus Line,Cassie Ferguson (replacement),-
7,1977,Chicago,Roxie Hart (replacement),-
8,1978,Dancin',Ensemble,-
9,1986,Sweet Charity,Charity Hope Valentine (replacement),-


Question, Chicago county
Top 4 table,         Gay village , By U.S. city 


Unnamed: 0,Rank,City,Percentage of City Population,LGBT Population,LGBT Population.1
0,Rank,City,Percentage of City Population,population,rank
1,1,New York City,4.5%,272493,1
2,2,Los Angeles,5.6%,154270,2
3,3,Chicago,5.7%,114449,3
4,4,San Francisco,15.4%,94234,4
5,5,Phoenix,6.4%,63222,5
6,6,Houston,4.4%,61976,6
7,7,San Diego,6.8%,61945,7
8,8,Dallas,7.0%,58473,8
9,9,Seattle,12.9%,57993,9


Question, Chicago county
Top 5 table,         Marian Sutton , Competition Record 


Unnamed: 0,Year,Competition,Venue,Position,Event,Notes
0,Representing United Kingdom,Representing United Kingdom,Representing United Kingdom,Representing United Kingdom,Representing United Kingdom,Representing United Kingdom
1,1992,IAAF World Half Marathon Championships,"Newcastle-upon-Tyne, United Kingdom",37th,Half marathon,1:13:09
2,1993,IAAF World Half Marathon Championships,"Brussels, Belgium",19th,Half marathon,1:12:37
3,1993,World Championships,"Stuttgart, Germany",14th,Marathon,2:39:45
4,1994,European Championships,"Helsinki, Finland",22nd,Marathon,2:40:34
5,1994,IAAF World Half Marathon Championships,"Oslo, Norway",72th,Half marathon,1:16:15
6,1995,Chicago Marathon,"Chicago, United States",5th,Marathon,2:32:36
7,1996,Chicago Marathon,"Chicago, United States",1st,Marathon,2:30:41
8,1997,Chicago Marathon,"Chicago, United States",1st,Marathon,2:29:03
9,1997,Fleet Half Marathon,"Fleet, England",1st,Half marathon,1:12:35
