# LLM Prompt Generator
This script read tables from `input_file.json` and generate a good prompt to send to gpt/gemini/llama/\<insert llm\> ...

In [19]:
import os
import json
from bs4 import BeautifulSoup

# Nome del file di input JSON
input_file = "input_file.json"
# Directory di output per i file .txt
output_dir = "prompts"

# Crea la directory di output se non esiste
os.makedirs(output_dir, exist_ok=True)

# Testo introduttivo da aggiungere prima della tabella
intro_text = """
Consider the claims being in the form |{Specification, Specification, …}, Measure, Outcome|

Claims must be extracted according to the following format:
|{Specification, Specification, …}, Measure, Outcome|
Specification: |name, value| pair describing the details of an experiment
E.g.: |dataset, Spider|, or |LLM, Llama27b|
Measure: metric or measure used to evaluate the experiment
E.g.: F1-measure
Outcome: outcome value related to metric 
E.g.: 0.89

The format have to follows this examples:
Claim 0: |{|Model Type, General LLM|, |Model Name, ChatGPT-3.5-turbo|, |Parameter Size, 175B|, |Dataset, Spider dev|, |Difficulty Level, 1|}, Execution Match , 0.760|
Claim 1:| .... |

Extract all the claims, even with empty value for the measure in that exact form from the following html table:
"""

# Frase da aggiungere per il contesto della caption
caption_context_text = "\n\nKnow that the context where the table was mentioned is the following:\n\n"

# Frase da aggiungere per le references

In [21]:
# Funzione per processare le tabelle nel file JSON
def process_json_tables(input_path, output_path):
    try:
        with open(input_path, "r", encoding="utf-8") as file:
            data = json.load(file)

        for table_id, table_content in data.items():
            if "table" in table_content:
                html_table = ' '.join(table_content["table"].split())
                caption = ' '.join(table_content["caption"].split())

                # Recupera e pulisce le references, se presenti
                references = table_content.get("references", [])
                references = [ref.strip() for ref in references if ref.strip()]  # Rimuovi spazi inutili e riferimenti vuoti
                references_text = "\n".join(references) if references else ""
                references_text = ' '.join(references_text.split())

                # Recupera e pulisce le footnotes, se presenti
                footnotes = table_content.get("footnotes", [])
                footnotes = [note.strip() for note in footnotes if note.strip()]
                footnotes_text = "\n".join(footnotes) if footnotes else ""
                footnotes_text = ' '.join(footnotes_text.split())

                output_file = os.path.join(output_path, f"{table_id}.txt")

                # Scrivi il testo introduttivo, la tabella HTML, la caption e le references in un file separato
                with open(output_file, "w", encoding="utf-8") as outfile:
                    outfile.write(intro_text + "\n" + html_table + "\n")
                    if caption:
                        outfile.write(caption_context_text + caption)
                    if references: 
                        outfile.write("\n\n" + references_text)
                    if footnotes:
                        outfile.write("\n\n" + footnotes_text)
                
                print(f"Tabella elaborata e salvata in: {output_file}")
            else:
                print(f"Avviso: Nessuna tabella valida trovata per ID {table_id}.")
    except (json.JSONDecodeError, KeyError) as e:
        print(f"Errore durante l'elaborazione del file JSON: {e}")

process_json_tables(input_file, output_dir)


Tabella elaborata e salvata in: prompts/2304.04370_1.txt
Tabella elaborata e salvata in: prompts/2304.04370_2.txt
Tabella elaborata e salvata in: prompts/2304.04370_3.txt
Tabella elaborata e salvata in: prompts/2308.12519_1.txt
Tabella elaborata e salvata in: prompts/2308.12519_2.txt
Tabella elaborata e salvata in: prompts/2308.12519_3.txt
Tabella elaborata e salvata in: prompts/2310.014444_2.txt
Tabella elaborata e salvata in: prompts/2310.014444_4.txt
Tabella elaborata e salvata in: prompts/2310.03965_1.txt
Tabella elaborata e salvata in: prompts/2310.03965_2.txt
Tabella elaborata e salvata in: prompts/2402.10890_1.txt
Tabella elaborata e salvata in: prompts/2402.10890_2.txt
Tabella elaborata e salvata in: prompts/2402.10890_3.txt
Tabella elaborata e salvata in: prompts/2403.04783_1.txt
Tabella elaborata e salvata in: prompts/2403.04783_2.txt
Tabella elaborata e salvata in: prompts/2403.04783_4.txt
Tabella elaborata e salvata in: prompts/2405.17129_1.txt
Tabella elaborata e salvata i