In [15]:
import pandas as pd
import requests
from anyio import sleep
from torch.utils.data.datapipes.dataframe.dataframe_wrapper import concat

data = pd.read_csv('../gym/data.csv')
llm_res = pd.read_csv('../gym/llm_response.csv')
altair_res = pd.read_csv('../gym/altair_response.csv')

altair_url = 'https://altair.marconapoleone.me/tables/searchEmbedding'

queries = {
    1: '(Graph structured) AND (visual Question Answering)',
    2: '"Autonomous Surface Vehicles" AND "Federated Learning"',
    3: 'RAG AND (Long-context LLM) OR (long context language model)',
    4: 'Applications of visual question answering systems',
    5: 'Fairness AND socials',
    6: '(Regulatory Capture AND IA OR ML) OR (Regulatory Capture OR Machine Learning)',
    7: '(Segmentation OR SAM) AND failure',
    8: '(Graph Convolutional Network) OR (GCN) AND (node classification)',
    9: '((Question Answering) AND (visual)) OR (VQA)',
    10: 'FL AND decentralised',
}

def get_altair_response(query):
    try:
        response = requests.get(altair_url, params={'query': query, 'limit': 200})
        response.raise_for_status()  # Raise an HTTPError for bad responses
        return response.json()
    except requests.exceptions.RequestException as e:
        print(f"HTTP error occurred: {e}")
        return None
    

In [17]:
results = []

for i in range(1, 11):
    try:
        query = queries[i]
        response = get_altair_response(query)
        if response:
            data = {}
            tables = response['tables']
            for table in tables:
                print(table['id'], table['fileName'], query)
                results.append({'query_id': i, 'query': query, 'table': table['fileName'], 'id': table['id']})
    except Exception as e:
        print(f"Error occurred: {e}")


S3.T5.2 2406.15319 (Graph structured) AND (visual Question Answering)
S3.T2 2301.10799v2 (Graph structured) AND (visual Question Answering)
S2.SS1.89 2404.18961v1 (Graph structured) AND (visual Question Answering)
S5.T5 2408.01931v2 (Graph structured) AND (visual Question Answering)
S5.T1 1609.05600v2 (Graph structured) AND (visual Question Answering)
S1.T1 2409.08498v1 (Graph structured) AND (visual Question Answering)
S5.T1 2302.12156 (Graph structured) AND (visual Question Answering)
S6.T5 2211.15217 (Graph structured) AND (visual Question Answering)
S2.T2 2404.18961v1 (Graph structured) AND (visual Question Answering)
Sx5.T3 2405.13018 (Graph structured) AND (visual Question Answering)
A4.T3.2.2 2409.17874 (Graph structured) AND (visual Question Answering)
S3.T1 2303.12317 (Graph structured) AND (visual Question Answering)
S5.T6 2409.08933v1 (Graph structured) AND (visual Question Answering)
A5.T5.4 2208.07900 (Graph structured) AND (visual Question Answering)
S4.T4 2409.08498v1 (G

In [18]:
results_df = pd.DataFrame(results)
results_df.to_csv('../gym/altair_response.csv', index=False)

In [26]:
# F1 score using the relevance data

# llm_res contains the ground truth relevance data
# altair_res contains the relevance data from the Altair API

def calculate_f1_score(query_id):
    llm = llm_res[llm_res['query'] == query_id]
    altair = altair_res[altair_res['query_id'] == query_id]
    tp = len(pd.merge(llm, altair, on=['table', 'id']))
    fp = len(altair) - tp
    fn = len(llm) - tp
    precision = tp / (tp + fp)
    recall = tp / (tp + fn)
    f1 = 2 * (precision * recall) / (precision + recall)
    return f1

f1_scores = []

for i in range(1, 11):
    f1 = calculate_f1_score(i)
    f1_scores.append({'query_id': i, 'f1': f1})
    
print('Mean F1 score:', sum([f['f1'] for f in f1_scores]) / len(f1_scores))
f1_scores


KeyError: 'table'