In [10]:
import ollama
import pandas as pd
from urllib3 import request

#ollama.pull('llama3.1:70b')

output_dir = '../../gym/raw/llm_json/gpt4o/'

from dotenv import load_dotenv
load_dotenv()


True

In [11]:

df = pd.read_csv('../../gym/raw/data.csv')


In [12]:
import os
import requests
import json

# for each row of the dataframe, we will ask the model to generate a response
# We want to know if the  table is relevant to the queries we have and a score of how relevant it is to the query
# then add to each row the 10 queries as columns and the score of the model for each query


queries = {
    1: '(Graph structured) AND (visual Question Answering)',
    2: '"Autonomous Surface Vehicles" AND "Federated Learning"',
    3: 'RAG AND (Long-context LLM) OR (long context language model)',
    4: 'Applications of visual question answering systems',
    5: 'Fairness AND socials',
    6: '(Regulatory Capture AND IA OR ML) OR (Regulatory Capture OR Machine Learning)',
    7: '(Segmentation OR SAM) AND failure',
    8: '(Graph Convolutional Network) OR (GCN) AND (node classification)',
    9: '((Question Answering) AND (visual)) OR (VQA)',
    10: 'FL AND decentralised',
}

response_format = ('{'
                   ' "relevant": true, //boolean \n'
                   ' "score": 0.9, //float \n'
                   ' "keywords": ["Graph structured", "visual Question Answering"], //list of strings \n'
                   ' "explanation": "The quick brown fox jumps over the lazy dog" //string \n (only if needed and max 30 word)'
                   '}')

def get_prompt(query, table):
    return (f"You are a server. Evaluate the relevance of the given table to the query and respond strictly in JSON format."
            f" Query: [{query}]. Table: [{table}]."
            f" Your response must include:"
            f" 1. A relevance score (between 0 and 1)."
            f" 2. A boolean indicating whether the table is relevant (true/false)."
            f" Example JSON response: {response_format}")

def get_prompt_response(query, table):
    response = ollama.chat(
        model='openai/gpt-4o-2024-11-20',
        messages=[{'role': 'user', 'content': get_prompt(query, table)}]
    )
    return response['message']['content']


def get_openrouter_response(query, table):
    
    #load OpenRouter API key from environment variable
    OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")
    
    if OPENROUTER_API_KEY is None:
        raise ValueError("OPENROUTER_API_KEY environment variable is not set")
    
    res = requests.post(
        url="https://openrouter.ai/api/v1/chat/completions",
        headers={
            "Authorization": f"Bearer {OPENROUTER_API_KEY}",
        },
        data=json.dumps({
            "model": "meta-llama/llama-3.1-70b-instruct",
            "messages": [
                {"role": "user", "content": get_prompt(query, table)}
            ],
            "top_p": 0.9,
            "temperature": 0.8,
            "frequency_penalty": 0,
            "presence_penalty": 0,
            "repetition_penalty": 1,
            "top_k": 0,
        })
    )
    return res.json()

In [13]:

# for each row of the dataframe, we will ask the model to generate a response
# then we export to json the results 
for index, row in df.iterrows():
    for query in queries:
        try:
            response = get_openrouter_response(queries[query], row)
            response = response['choices'][0]['message']['content']
            print(f"Exporting {index}_{query}.json")
            with open(f'{output_dir}/{index}_{query}.json', 'w') as f:
                f.write(response)
        except Exception as e:
            print(e)
            continue



Exporting 0_1.json
Exporting 0_2.json
Exporting 0_3.json
Exporting 0_4.json
Exporting 0_5.json
Exporting 0_6.json
Exporting 0_7.json
Exporting 0_8.json
Exporting 0_9.json
Exporting 0_10.json
Exporting 1_1.json
Exporting 1_2.json
Exporting 1_3.json
Exporting 1_4.json
Exporting 1_5.json
Exporting 1_6.json
Exporting 1_7.json
Exporting 1_8.json
Exporting 1_9.json
Exporting 1_10.json
Exporting 2_1.json
Exporting 2_2.json
Exporting 2_3.json
Exporting 2_4.json
Exporting 2_5.json
Exporting 2_6.json
Exporting 2_7.json
Exporting 2_8.json
Exporting 2_9.json
Exporting 2_10.json
Exporting 3_1.json
Exporting 3_2.json
Exporting 3_3.json
'choices'
Exporting 3_5.json
Exporting 3_6.json
'choices'
Exporting 3_8.json
Exporting 3_9.json
Exporting 3_10.json
Exporting 4_1.json
Exporting 4_2.json
Exporting 4_3.json
Exporting 4_4.json
Exporting 4_5.json
Exporting 4_6.json
Exporting 4_7.json
Exporting 4_8.json
Exporting 4_9.json
Exporting 4_10.json
Exporting 5_1.json
Exporting 5_2.json
Exporting 5_3.json
Export