# Load 100 question dataset.

In [None]:
import os
from huggingface_hub import login
#token
login(token='token')

In [2]:
import json

with open('/kaggle/input/aurii-concepts-instances-dataset/concepts_instances_dataset.json', 'r') as file:
    original_dataset = json.load(file)

# Pipeline

## Model loading and inference.

* Load model

In [4]:
# import torch

# from transformers import AutoModelForCausalLM, AutoTokenizer, AutoModel

# model = AutoModelForCausalLM.from_pretrained("alpindale/Mistral-7B-v0.2-hf", torch_dtype=torch.float16)
# from transformers import AutoModelForCausalLM, AutoTokenizer, AutoModel
# tokenizer = AutoTokenizer.from_pretrained("alpindale/Mistral-7B-v0.2-hf")

In [5]:
!pip install -q accelerate bitsandbytes

In [6]:
!pip install -q peft

In [None]:
# Load model directly
from transformers import AutoModel, AutoTokenizer, AutoModelForCausalLM
import torch

model = AutoModelForCausalLM.from_pretrained("Stratos-Kakalis/norm_trunc_no_rdfs_8_epoch", torch_dtype=torch.float16)
tokenizer = AutoTokenizer.from_pretrained("alpindale/Mistral-7B-v0.2-hf")

* Inference function

In [8]:
import torch

def run_chat_inference(model, tokenizer, system_role, user_message):    
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    
    messages = [
        {"role": "system", "content": system_role},
        {"role": "user", "content": user_message}
    ]

    tokenizer.apply_chat_template(messages, tokenize=False)

    model_inputs = tokenizer.apply_chat_template(messages, return_tensors = "pt").to(device)
    
    generated_ids = model.generate(
        model_inputs,
        max_new_tokens = 1000,
        do_sample = True,
    )

    # Decode generated text
    generated_text = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
    
    # Remove the system message
    if system_role in generated_text:
        generated_text = generated_text.split(system_role)[-1].strip()

    # Remove the user message from the output to get only the assistant's response
    if user_message in generated_text:
        generated_text = generated_text.split(user_message)[-1].strip()

    # Clear model from RAM
    del model
    torch.cuda.empty_cache()
    
    return generated_text

In [9]:
def run_inference(model, tokenizer, prompt):
    # Move model to GPU
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    model.eval()  # Set model to evaluation mode
            
    # Tokenize prompt
    inputs = tokenizer(prompt, return_tensors="pt").to(device)
            
    # Generate output
    with torch.no_grad():
        outputs = model.generate(**inputs, 
                        max_new_tokens=800,  # Set a maximum length for generated text
                        #do_sample=True,  # Enable sampling
                        #top_k=7,        # Top-k sampling
                        #top_p=0.1,      # Top-p sampling (nucleus sampling)
                        #num_return_sequences=1,
                        #repetition_penalty=1, # No penalty for instruction tuned models.
                        repetition_penalty=1.2, # Penalty on repeating tokens.
                        eos_token_id=tokenizer.eos_token_id,  # Specify EOS token ID
                        pad_token_id=tokenizer.pad_token_id  # Specify PAD token ID
                        )
        
    # Extract generated text
    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
   
    # Remove the prompt text
    prompt_length = len(prompt)
    generated_text = generated_text[prompt_length:]

    # Decode and print output
    #print("Prompt:", prompt)
    
    # Clear model from RAM
    del model
    torch.cuda.empty_cache()
    
    return generated_text

* Quantized inference function for the quantized finetuned models.

In [10]:
def Quantized_Inference(model, tokenizer, prompt):
    results = []
    
    # Move model to GPU
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)  # Ensure model is moved to the device
    model.eval()  # Set model to evaluation mode
            
    # Tokenize prompt
    inputs = tokenizer(prompt, return_tensors="pt").to(device)
            
    # Generate output
    with torch.no_grad():
        outputs = model.generate(**inputs, 
                            max_new_tokens=400,  # Set a maximum length for generated text
                            #do_sample=True,  # Enable sampling
                            #top_k=7,        # Top-k sampling
                            #top_p=0.1,      # Top-p sampling (nucleus sampling)
                            #num_return_sequences=1,
                            repetition_penalty=1.2, # Penalty on repeating tokens.
                            eos_token_id=tokenizer.eos_token_id,  # Specify EOS token ID
                            pad_token_id=tokenizer.pad_token_id  # Specify PAD token ID
                            )
        
    # Extract generated text
    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    
    # Remove the prompt text
    prompt_length = len(prompt)
    generated_text = generated_text[prompt_length:]
    
    # Clear model from RAM
    del model
    torch.cuda.empty_cache()
    
    return generated_text

* Prompt creation function

In [11]:
# def create_prompt(question):
#     prompt = f"""Generator is an expert SPARQL query generator. For each question that the user supplies, the generator will convert it into a valid SPARQL query that can be used to answer the question. The query will be based on the Yago2Geo knowledge graph. The query should be enclosed by three backticks on new lines, denoting that it is a code block.
# Human: Where is Swansea located?
# Generator: ```select ?geoWKT where {{ yago:Swansea geo:hasGeometry ?o.  ?o geo:asWKT ?geoWKT. }}```
# Human: Which Greek regions have between 500000 and 1000000 inhabitants?
# Generator: ```select ?region where {{ ?region a y2geoo:GAG_Region . ?region y2geoo:hasGAG_Population ?pop. filter(?pop < 1000000). filter(?pop > 500000). }}```
# Human: Is Doolin to the south of Dublin?
# Generator: ```ASK {{ <http://yago-knowledge.org/resource/Doolin> geo:hasGeometry ?o. ?o geo:asWKT ?geoWKT. <http://yago-knowledge.org/resource/Dublin> geo:hasGeometry ?o1. ?o1 geo:asWKT ?geoWKT1. FILTER(strdf:below(?geoWKT,?geoWKT1)) }}```
# Human: {question}
# Generator: ```"""
#     return prompt

In [12]:
# def create_prompt(question, uris):
#     prompt = f"""Generator is an expert SPARQL query generator. For each question that the user supplies, the generator will convert it into a valid SPARQL query that can be used to answer the question. The query should be enclosed by three backticks on new lines, denoting that it is a code block.
# The generator is logical and creates each query by first explaining its thought process step-by-step.
# Human: Where is Swansea located?
# Generator: Let's think step by step. First, we want to find the location of Swansea, so we start with select ?geoWKT to specify that we need the geographic data.
# Next, we need to identify how Swansea's location is stored in the knowledge graph. We use yago:Swansea geo:hasGeometry ?o. to find the geometric data related to Swansea.
# Then, we need to extract the specific coordinates. We do this with ?o geo:asWKT ?geoWKT. to get the Well-Known Text (WKT) representation of Swansea's geometry.
# Finally, we wrap these patterns in a where clause to structure our query properly. The final result: ```select ?geoWKT where {{ yago:Swansea geo:hasGeometry ?o.  ?o geo:asWKT ?geoWKT. }}```
# Human: Which Greek regions have between 500000 and 1000000 inhabitants?
# Generator: Let's think step by step. First, we want to find Greek regions with a population between 500,000 and 1,000,000, so we start with select ?region to specify that we need the region names.
# Next, we need to identify which entities are Greek regions. We use ?region a y2geoo:GAG_Region to find entities classified as Greek regions.
# Then, we need to get the population of these regions. We do this with ?region y2geoo:hasGAG_Population ?pop to find the population data associated with each region.
# After that, we need to filter the results to only include regions with populations between 500,000 and 1,000,000. We use filter(?pop < 1000000) to exclude regions with more than 1,000,000 inhabitants and filter(?pop > 500000) to exclude regions with fewer than 500,000 inhabitants.
# Finally, we wrap these patterns in a where clause to structure our query properly. The final result: ```select ?region where {{ ?region a y2geoo:GAG_Region . ?region y2geoo:hasGAG_Population ?pop. filter(?pop < 1000000). filter(?pop > 500000). }}```
# Human: Is Doolin to the south of Dublin?
# Generator: Let's think step by step. Question asks for yes/no answer: Use ASK query
# Need to compare locations: Retrieve geometric data for both
# geo:hasGeometry and geo:asWKT predicates for Doolin and Dublin
# Check if one is south of the other: Use geospatial comparison function
# FILTER with strdf:below function
# Steps to build query:
# a. Get Doolin's geometry: http://yago-knowledge.org/resource/Doolin
# b. Get Dublin's geometry: http://yago-knowledge.org/resource/Dublin
# c. Compare using FILTER and strdf:below Resulting query:
# ```ASK {{ <http://yago-knowledge.org/resource/Doolin> geo:hasGeometry ?o. ?o geo:asWKT ?geoWKT. <http://yago-knowledge.org/resource/Dublin> geo:hasGeometry ?o1. ?o1 geo:asWKT ?geoWKT1. FILTER(strdf:below(?geoWKT,?geoWKT1)) }}```
# Human: {question}
# Generator: Let's think step by step."""
#     return prompt

* 6-shot prompt without CoT

In [13]:
def create_prompt(user_prompt, uris):
    prompt = f"""You are an expert SPARQL query generator. For each question that the user supplies, you will convert it into a valid SPARQL query that can be used to answer the question. The query will be based on the Yago2Geo knowledge graph. The query should be enclosed by three backticks on new lines, denoting that it is a code block. You will not provide further details.
Human: In Breckland district, which forests are south of streams?
The generator must use these URIs to answer the question: ['yago:Breckland_District', 'geo:hasGeometry', 'geo:asWKT', 'rdf:type', 'y2geoo:OSM_forest', 'geo:hasGeometry', 'geo:asWKT', 'rdf:type', 'y2geoo:OSM_stream', 'geo:hasGeometry', 'geo:asWKT', 'strdf:within', 'strdf:within', 'strdf:below']
Generator: ```SELECT DISTINCT ?forest WHERE {{ yago:Breckland_District geo:hasGeometry ?o1 . ?o1 geo:asWKT ?geoWKT1 . ?forest rdf:type y2geoo:OSM_forest . ?forest geo:hasGeometry ?o2 . ?o2 geo:asWKT ?geoWKT2 . ?stream rdf:type y2geoo:OSM_stream . ?stream geo:hasGeometry ?o3 . ?o3 geo:asWKT ?geoWKT3 . FILTER (strdf:within(?geoWKT2, ?geoWKT1) && strdf:within(?geoWKT3, ?geoWKT1) && strdf:below(?geoWKT2, ?geoWKT3)) }}```
Human: How many streams intersect with lakes?
The generator must use these URIs to answer the question: ['rdf:type', 'y2geoo:OSM_stream', 'geo:hasGeometry', 'geo:asWKT', 'rdf:type', 'y2geoo:OSM_lake', 'geo:hasGeometry', 'geo:asWKT', 'geof:sfIntersects']
Generator: ```SELECT (COUNT (DISTINCT ?p1) as ?streams) WHERE {{ ?p1 rdf:type y2geoo:OSM_stream; geo:hasGeometry ?p1geo. ?p1geo geo:asWKT ?p1WKT. ?p2 rdf:type y2geoo:OSM_lake; geo:hasGeometry ?p2geo. ?p2geo geo:asWKT ?p2WKT. FILTER(geof:sfIntersects(?p1WKT, ?p2WKT)) }}```
Human: Which Municipalities are on Thessaly's border?
The generator must use these URIs to answer the question: ['yago:Thessaly', 'geo:hasGeometry', 'geo:asWKT', 'rdf:type', 'y2geoo:GAG_Municipality', 'geo:hasGeometry', 'geo:asWKT', 'strdf:touches']
Generator: ```SELECT distinct ?rg where {{ yago:Thessaly geo:hasGeometry ?tgeo . ?tgeo geo:asWKT ?tgWKT . ?rg rdf:type y2geoo:GAG_Municipality . ?rg geo:hasGeometry ?rggeo . ?rggeo geo:asWKT ?rgWKT . FILTER (strdf:touches(?tgWKT,?rgWKT)) . }}```
Human: Which is the largest island in Ireland?
The generator must use these URIs to answer the question: ['strdf:area', 'yago:Republic_of_Ireland', 'geo:hasGeometry', 'geo:asWKT', 'y2geoo:OSM_island', 'geo:hasGeometry', 'geo:asWKT', 'geof:sfContains']
Generator: ```select distinct ?x (strdf:area(?lWKT) as ?area) where {{ yago:Republic_of_Ireland geo:hasGeometry ?geom . ?geom geo:asWKT ?mWKT . ?lake a y2geoo:OSM_island . ?lake geo:hasGeometry ?geol . ?geol geo:asWKT ?lWKT . FILTER (geof:sfContains(?mWKT, ?lWKT)) }} ORDER BY (?area) LIMIT 1```
Human: Is Crete south of Thessaly?
The generator must use these URIs to answer the question: ['http://yago-knowledge.org/resource/Crete', 'geo:hasGeometry', 'http://yago-knowledge.org/resource/Thessaly', 'geo:hasGeometry', 'geo:asWKT', 'geo:asWKT', 'strdf:below']
Generator: ```ASK {{ <http://yago-knowledge.org/resource/Crete> geo:hasGeometry ?geo1 . <http://yago-knowledge.org/resource/Thessaly> geo:hasGeometry ?geo2 . ?geo1 geo:asWKT ?geoWKT1 . ?geo2 geo:asWKT ?geoWKT2 . FILTER(strdf:below(?geoWKT1, ?geoWKT2)) }}```
Human: What is the population of Northern Ireland?
The generator must use these URIs to answer the question: ['xsd:integer', 'yago:Northern_Ireland', 'yago:hasPopulation']
Generator: ```SELECT (xsd:integer (?population) as ?pop) WHERE {{ yago:Northern_Ireland yago:'hasPopulation ?population. }}```
Human: {user_prompt}
The generator must use these URIs to answer the question: {uris}
Generator: ```"""
    
    return prompt

In [14]:
def create_prompt(question, uris):
    prompt = f"""Generator is an expert SPARQL query generator. For each question that the user supplies, the generator will convert it into a valid SPARQL query that can be used to answer the question. The query should be enclosed by three backticks on new lines, denoting that it is a code block.

Human: {question}
Generator: ```"""
    return prompt

In [28]:
def create_prompt(question, uris):
    prompt = f"""Generator is an expert SPARQL query generator. For each question that the user supplies, the generator will convert it into a valid SPARQL query that can be used to answer the question. The query should be enclosed by three backticks on new lines, denoting that it is a code block.

Human: {question}
The generator may use these URIs to answer the question: {uris}
Generator: ```"""
    return prompt

* Same CoT prompt but designed for automatic uri-injection. 

In [16]:
# def create_prompt(question, uris):
#     prompt = f"""Generator is an expert SPARQL query generator. For each question that the user supplies, the generator will convert it into a valid SPARQL query that can be used to answer the question.
# The generator may be provided with a list of URIs. Some of these URIs are relevant, while others are not. The generator must carefully identify and use only the correct URIs when they are provided. If the correct URIs are not available, the generator will rely on its understanding to construct the appropriate query.
# The query should be enclosed by three backticks on new lines, denoting that it is a code block.
# The generator is logical and creates each query by first explaining its thought process step-by-step.

# Human: Where is Swansea located?
# Provided URIs: yago:Swansea,y2geoo:OS_UnitaryAuthority
# Generator: Let's think step by step. First, we want to find the location of Swansea, so we start with select ?geoWKT to specify that we need the geographic data.
# Next, we need to identify how Swansea's location is stored in the knowledge graph. We use yago:Swansea geo:hasGeometry ?o. to find the geometric data related to Swansea.
# Then, we need to extract the specific coordinates. We do this with ?o geo:asWKT ?geoWKT. to get the Well-Known Text (WKT) representation of Swansea's geometry.
# Finally, we wrap these patterns in a where clause to structure our query properly. The final result: ```select ?geoWKT where {{ yago:Swansea geo:hasGeometry ?o.  ?o geo:asWKT ?geoWKT. }}```

# Human: Which Greek regions have between 500000 and 1000000 inhabitants?
# Provided URIs: yago:Greece
# Generator: Let's think step by step. First, we want to find Greek regions with a population between 500,000 and 1,000,000, so we start with select ?region to specify that we need the region names.
# Next, we need to identify which entities are Greek regions. We use ?region a y2geoo:GAG_Region to find entities classified as Greek regions. We do not need the provided URI of Greece.
# Then, we need to get the population of these regions. We do this with ?region y2geoo:hasGAG_Population ?pop to find the population data associated with each region.
# After that, we need to filter the results to only include regions with populations between 500,000 and 1,000,000. We use filter(?pop < 1000000) to exclude regions with more than 1,000,000 inhabitants and filter(?pop > 500000) to exclude regions with fewer than 500,000 inhabitants.
# Finally, we wrap these patterns in a where clause to structure our query properly. The final result: ```select ?region where {{ ?region a y2geoo:GAG_Region . ?region y2geoo:hasGAG_Population ?pop. filter(?pop < 1000000). filter(?pop > 500000). }}```

# Human: Is Doolin to the south of Dublin?
# Provided URIs: yago:Doolin,yago:Dublin
# Generator: Let's think step by step. Question asks for yes/no answer: Use ASK query
# Need to compare locations: Retrieve geometric data for both
# geo:hasGeometry and geo:asWKT predicates for Doolin and Dublin
# Check if one is south of the other: Use geospatial comparison function
# FILTER with strdf:below function
# Steps to build query:
# a. Get Doolin's and Dublin's geometry from the provided URIs: yago:Doolin and yago:Dublin
# b. Compare using FILTER and strdf:below 
# The final result:
# ```ASK {{ <http://yago-knowledge.org/resource/Doolin> geo:hasGeometry ?o. ?o geo:asWKT ?geoWKT. <http://yago-knowledge.org/resource/Dublin> geo:hasGeometry ?o1. ?o1 geo:asWKT ?geoWKT1. FILTER(strdf:below(?geoWKT,?geoWKT1)) }}```

# Human: {question}
# Provided URIs: {uris}
# Generator: Let's think step by step."""
#     return prompt

* Query cleanup function

In [17]:
import re

def query_cleanup(results):
    # Search for the pattern in the text
#     match = re.search(r'```(.*?)```', results, re.DOTALL)
    match = re.search(r'(.*?)```', results, re.DOTALL)
    query = results
    # If a match is found, return the matched text
    if match:
        query = match.group(1).strip()
    
    # Now remove the SPARQL prefix that the model adds.
    start_index = query.find("SPARQL")
    if start_index == 0:
        # Remove the prefix and all characters leading up to it
        query = query[start_index + len("SPARQL"):]

    return query

## Endpoint inference functions

* Gost materialization.

In [18]:
def gost_materialize_query(query: str):
    data = {
        "query": query
    }

    headers = {
        'Content-Type': 'application/json'
    }

    response = requests.post("materialize-api", headers=headers, data=json.dumps(data))
    
    if response.status_code == 200:
        return response.text
    else:
        print("Materialize failed:", response.text)
        return (query)

* Query fomatting function. This adds the correct prefixes and fixes some endpoint issues with regex.

In [19]:
def format_query(query):
    PREFIXES = """PREFIX geo: <http://www.opengis.net/ont/geosparql#>
PREFIX geof: <http://www.opengis.net/def/function/geosparql/>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
PREFIX yago: <http://yago-knowledge.org/resource/>
PREFIX y2geor: <http://kr.di.uoa.gr/yago2geo/resource/>
PREFIX y2geoo: <http://kr.di.uoa.gr/yago2geo/ontology/>
PREFIX strdf: <http://strdf.di.uoa.gr/ontology#>
PREFIX uom: <http://www.opengis.net/def/uom/OGC/1.0/>
PREFIX owl: <http://www.w3.org/2002/07/owl#>"""
    
    query = PREFIXES + ' ' + query
    
    query = query.replace('strdf:within', 'geof:sfWithin')
    query = query.replace('strdf:contains', 'geof:sfContains')
    query = query.replace('strdf:overlaps', 'geof:sfOverlaps')
    query = query.replace('strdf:distance', 'geof:sfDistance')
    
    # Use regex to find and replace strdf:buffer patterns
    query = re.sub(r'strdf:buffer\((\?\w+),\s*\d+,\s*uom:\w+\)', r'\1', query)
    
    return query

* Endpoint request function

In [20]:
import requests
import pandas as pd
from io import StringIO

def graphdb_send_request(query, endpoint_url="endpoint-url", accept_format='application/sparql-results+json'):
    """
    Sends a SPARQL query to a GraphDB endpoint.

    :param query: SPARQL query to be sent
    :param endpoint_url: URL of the GraphDB SPARQL endpoint
    :param accept_format: Desired response format (default is JSON)
    :return: Response from the endpoint
    """
    # Format the query, this means add the correct prefixes and fix some endpoint issues with regex.
    query = format_query(query)
    original_query = query
    query = gost_materialize_query(query)
    
    headers = {
        'Accept': accept_format,
        'Content-Type': 'application/x-www-form-urlencoded'
    }

    data = {
        'query': query
    }
    
    try:
        response = requests.post(endpoint_url, headers=headers, data=data, auth=requests.auth.HTTPBasicAuth('username', 'password'))

        if response.status_code == 200:
            if accept_format == 'application/sparql-results+json':
#                 print(response.json())
                json_response = response.json()
                return convert_json_to_csv(json_response)
            else:
#                 print(response.text)
                return response.text
        else:
            response.raise_for_status()
    except requests.exceptions.HTTPError as err:
        print("HTTP error (most likely invalid query)")
        #print(query)
        #print(err)
    except Exception as err:
        print(err)
        print("Endpoint error ENDPOINT DOWN")
        
def convert_json_to_csv(json_data):
    """
    Converts JSON data to CSV format.

    :param json_data: JSON data to be converted
    :return: CSV formatted data as a string
    """
    if 'boolean' in json_data:
        # Handling boolean result
        headers = ['value']
        rows = [[json_data['boolean']]]
    else:
        # Extracting header and rows from JSON response
        headers = json_data['head']['vars']
        rows = [{var: result.get(var, {}).get('value', '') for var in headers} for result in json_data['results']['bindings']]
    
    # Creating DataFrame and converting to CSV
    df = pd.DataFrame(rows, columns=headers)
    csv_output = StringIO()
    df.to_csv(csv_output, index=False)
    
    return csv_output.getvalue()

* General pipeline handler.

In [24]:
# Handler, takes a question as input and returns the geometric data that answers it.
def ask_pipeline(question, uris=None):
    # Generate appropriate prompt.
    prompt = create_prompt(question, uris)
    # Run inference on the LLM.
#     generated_query = run_inference(model, tokenizer, prompt)
    generated_query = Quantized_Inference(model, tokenizer, prompt)
#     print(generated_query)
#     generated_query = run_chat_inference(model, tokenizer, prompt, question)
    # Extract the query alone from the results.
    cleaned_query = query_cleanup(generated_query)
    print("----")
    print(cleaned_query)
    print("----")
    # Send the query to the endpoint.
    results = graphdb_send_request(cleaned_query)
    
    # TO DO: visualize the results instead of just printing them.
    #print (results)
    return results

# Accuracy evaluation 

* Custom comparisson function

In [25]:
def csv_to_columns(csv_data):
    rows = csv_data.strip().split('\n')
    data_rows = [row.split(',') for row in rows[1:]]  # Skip the header row
    columns = list(zip(*data_rows))  # Transpose rows to columns
    return columns

def compare_csv_columns(csv1, csv2):
    columns1 = csv_to_columns(csv1)
    columns2 = csv_to_columns(csv2)
    
    set_columns1 = {tuple(col) for col in columns1}
    set_columns2 = {tuple(col) for col in columns2}
    
    #return not set_columns1.isdisjoint(set_columns2)
    
    common_columns = set_columns1.intersection(set_columns2)
    
    if common_columns:
        return True
    else:
        return False

* The evaluation on the 100 question dataset.

In [None]:
detailed_comp = 0

gt_results = []
gen_results = []

i = 0
for key in original_dataset:
    print(i)
    i+=1
    if i < 32:
        continue
#9 at 32
    query = original_dataset[key]['Query']
    
    gt_result = graphdb_send_request(query)
    gt_results.append(gt_result)
    
    question = original_dataset[key]['Question']
    uris = original_dataset[key]['Gen_URI']
    uris_string = ','.join(uris)
#     uris_string = ''
    
    gen_result = ask_pipeline(question, uris_string)
    gen_results.append(gen_result)
    
    if gen_result and gt_result:
        comparisson = compare_csv_columns(gt_result, gen_result)
        if comparisson == True:  
            detailed_comp += 1
        print(detailed_comp)
        
print (detailed_comp/100)