In [None]:
import os
from openai import OpenAI
import csv
import json
from pathlib import Path
from random import shuffle, seed
import numpy as np
from tqdm import tqdm
from utils import *
import sys
from SPARQLWrapper import SPARQLWrapper, JSON
import os
import openai
import pandas as pd
import tiktoken
enc = tiktoken.encoding_for_model("gpt-3.5-turbo-0125")

openai.api_key = ""
data_dir = '/RACOON'
res_format = "{'type': []}"

In [None]:
with open(os.path.join(data_dir, 'test.table_col_type.json'), 'r') as f:
    examples = json.load(f)
num_examples = len(examples)

### Single-label baseline

In [None]:
OUTPUT = "GPT3.5_output_single_fulltest_baseline.csv"
results = []
for id, example in enumerate(tqdm(examples)):
        
        table_raw, table_with_en, label, headers, meta_data, num_col, max_num_row, colset = parse_example(example)
        x = min(len(table_raw),10)
        y = len(table_raw[0])
        table = []
        for col in range(y):
            column = []
            for row in range(x):
                if table_raw[row][col] == None:
                    column.append("")
                else: column.append(table_raw[row][col])
            table.append(column)
        num_col = len(table)
        table = list(zip(*table))
        headers =  tuple('' for _ in range(num_col)) 
        df = pd.DataFrame(table[0:], columns=headers, index=None)
        table = map(lambda x: ", ".join(x), table)
        CSV_like = ",\n".join(table)
        
        all_preds = []
        messages=[
            {
                "role": "system",
                "content": f"Be a helpful, accurate assistant for data discovery and exploration desiged to output valid JSON in the format {res_format}",
            },
            {
                "role": "user",
                "content": f"""Consider this table given in Comma-separated Values format:
                            ```
                            {CSV_like}
                            ```
                Your task is to assign only one semantic class to the first column that best represents all cells of this column. Solve this task by following these steps: 
                1. Look at the cells in the first column of the above table. 
                2. Choose only one valid type from the given list of types: {type_vocab.keys()}. Check that the type MUST be in the list. Give the answer in valid JSON format.
                """,
            }
        ]

        for i, col in enumerate(df.columns):
            chatgpt_msg = response(messages, 40)
            prediction = chatgpt_msg.content
            all_preds.append(prediction)
            messages.append(dict(chatgpt_msg))
            if i+1 >= len(colset): break
            messages.append(
                { "role": "user",
                "content": f"""Your task is to assign only one semantic class to the {i+2} column that best represents all cells of this column. Solve this task by following these steps: 
                1. Look at the cells in the {i+2} column of the above table.
                2. Choose only one valid type from the given list of types. Check that the type MUST be in the list. Give the answer in valid JSON format. 
                """
            })
        
        for i, p in enumerate(all_preds):
            results.append([id, i, p])

        with open(OUTPUT, "a", newline="", encoding="UTF-8") as output:
            writer = csv.writer(output)
            for i, p in enumerate(all_preds):
                writer.writerow([id, i, p])

### Single-label Ground Truth Linker Entity Label

In [None]:
OUTPUT = "GPT3.5_output_single_fulltest_entity_gt.csv"
results = []
for id, example in enumerate( tqdm(examples)):
        
        table_raw, table_with_en, label, headers, meta_data, num_col, max_num_row, colset = parse_example(example)
        x = min(len(table_raw),10)
        y = len(table_raw[0])
        table = []
        for col in range(y):
            column = []
            for row in range(x):
                if table_raw[row][col] == None:
                    column.append("")
                else: column.append(table_raw[row][col])
            table.append(column)
        num_col = len(table)
        table = list(zip(*table))
        headers =  tuple('' for _ in range(num_col)) # empty headers 
        df = pd.DataFrame(table[0:], columns=headers, index=None)
        table = map(lambda x: ", ".join(x), table)
        CSV_like = ",\n".join(table)

        hint = get_info(colset[0][:10],pid_to_mid, pid_to_label)
        hint = hint[:5]
        all_preds = []
        hints = []
        hints.append(hint)
    
        messages=[
            {
                "role": "system",
                "content": f"Be a helpful, accurate assistant for data discovery and exploration desiged to output valid JSON in the format {res_format}",
            },
            {
                "role": "user",
                "content": f"""Consider this table given in Comma-separated Values format:
                            ```
                            {CSV_like}
                            ```
                There are a list of 255 valid types for each column: {type_vocab.keys()}. Your task is to choose only one type from the list to annotate the first column. Solve this task by following these steps: 
                1. Look at the cells in the first column of the above table. Each cell is linked to an entity in the Wikidata knowledge graph. The entity labels correspond to the cells in the first column are presented as a list delimited by triple quotes.
                ```{hint}```
                2. Understand the entities in the first column
                3. Choose only one valid type from the given list of types. Check that the type MUST be in the list. Give the answer in valid JSON format.
                """,
            }
        ]

        messages_str = ' '.join(map(str,messages))
        encoded_msg = enc.encode(messages_str)
        if len(encoded_msg) > 16000:
            messages=[
            {
                "role": "system",
                "content": f"Be a helpful, accurate assistant for data discovery and exploration desiged to output valid JSON in the format {res_format}",
            },
            {
                "role": "user",
                "content": f"""Consider this table given in Comma-separated Values format:
                            ```
                            {CSV_like}
                            ```
                There are a list of 255 valid types for each column: {type_vocab.keys()}. Your task is to choose only one type from the list to annotate the first column. Solve this task by following these steps: 
                1. Look at the cells in the {i+2} column of the above table. Each cell is linked to an entity in the Wikidata knowledge graph. The entity labels correspond to the cells in the {i+2} column are presented as a list delimited by triple quotes.
                ```{hint}```
                2. Understand the entities in the {i+2} column
                3. Choose only one valid type from the given list of types. Check that the type MUST be in the list. Give the answer in valid JSON format.
                """,
            }
            ]
        

        for i, col in enumerate(df.columns):
            
            chatgpt_msg = response(messages, 40)
            prediction = chatgpt_msg.content
            all_preds.append(prediction)
            messages.append(dict(chatgpt_msg))
            if i+1 >= len(colset): break
            hint = get_info(colset[i+1][:10],pid_to_mid, pid_to_label)
            hint = hint[:5]
            hints.append(hint)
            
            messages.append(
            {
                "role": "user",
                "content": f"""Your task is to choose only one type from the list to annotate the {i+2} column. Solve this task by following these steps: 
                1. Look at the cells in the {i+2} column of the above table. Each cell is linked to an entity in the Wikidata knowledge graph. The entity labels correspond to the cells in the {i+2} column are presented as a list delimited by triple quotes.
                ```{hint}```
                2. Understand the entities in the {i+2} column
                3. Choose only one valid type from the given list of types. Check that the type MUST be in the list. Give the answer in valid JSON format.
                """,
            })
        
        
        for i, p in enumerate(all_preds):
            results.append([id, i, p, hints[i]])

        with open(OUTPUT, "a", newline="", encoding="UTF-8") as output:
            writer = csv.writer(output)
            for i, p in enumerate(all_preds):
                writer.writerow([id, i, p, hints[i]])

### Single-label Ground Truth Linker Entity Triplet

In [None]:
OUTPUT = "GPT3.5_output_single_fulltest_triplet_gt.csv"
results = []
for id, example in enumerate( tqdm(examples)):
        table_raw, table_with_en, label, headers, meta_data, num_col, max_num_row, colset = parse_example(example)
        x = min(len(table_raw),10)
        y = len(table_raw[0])
        table = []
        for col in range(y):
            column = []
            for row in range(x):
                if table_raw[row][col] == None:
                    column.append("")
                else: column.append(table_raw[row][col])
            table.append(column)
        num_col = len(table)
        table = list(zip(*table))
        headers =  tuple('' for _ in range(num_col)) # empty headers
        df = pd.DataFrame(table[0:], columns=headers, index=None)
        table = map(lambda x: ", ".join(x), table)
        CSV_like = ",\n".join(table)

        info_set = get_triplets(colset[0])
        hint = serialize_dict(info_set)
        all_preds = []
        hints = []
        hints.append(hint)
        messages=[
            {
                "role": "system",
                "content": f"Be a helpful, accurate assistant for data discovery and exploration desiged to output valid JSON in the format {res_format}",
            },
            {
                "role": "user",
                "content": f"""Consider this table given in Comma-separated Values format:
                            ```
                            {CSV_like}
                            ```
                There are a list of 255 valid types for each column: {type_vocab.keys()}. Your task is to choose only one type from the list to annotate the first column. Solve this task by following these steps: 
                1. Look at the cells in the first column of the above table. 
                2. Consider this information carefully: {hint}
                3. Choose only one valid type from the given list of types. Check that the type MUST be in the list. Give the answer in valid JSON format.
                """,
            }
        ]

        messages_str = ' '.join(map(str,messages))
        encoded_msg = enc.encode(messages_str)
        if len(encoded_msg) > 16000:
            messages=[
            {
                "role": "system",
                "content": f"Be a helpful, accurate assistant for data discovery and exploration desiged to output valid JSON in the format {res_format}",
            },
            {
                "role": "user",
                "content": f"""Consider this table given in Comma-separated Values format:
                            ```
                            {CSV_like}
                            ```
                There are a list of 255 valid types for each column: {type_vocab.keys()}. Your task is to choose only one type from the list to annotate the first column. Solve this task by following these steps: 
                1. Look at the cells in the first column of the above table. 
                2. Choose only one valid type from the given list of types. Check that the type MUST be in the list. Give the answer in valid JSON format.
                """,
            }
            ]
        

        for i, col in enumerate(df.columns):
            
            chatgpt_msg = response(messages, 40)
            prediction = chatgpt_msg.content
            all_preds.append(prediction)
            messages.append(dict(chatgpt_msg))
            if i+1 >= len(colset): break
            info_set = get_triplets(colset[i+1])
            hint = serialize_dict(info_set)
            hints.append(hint)
            
            messages.append(
            {
                "role": "user",
                "content": f"""Your task is to choose only one type from the list to annotate the {i+2} column. Solve this task by following these steps: 
                1. Look at the cells in the {i+2} column of the above table. 
                2. Consider this information carefully: {hint}
                3. Choose only one valid type from the given list of types. Check that the type MUST be in the list. Give the answer in valid JSON format.
                """,
            })
        
        
        for i, p in enumerate(all_preds):
            results.append([id, i, p, hints[i]])

        with open(OUTPUT, "a", newline="", encoding="UTF-8") as output:
            writer = csv.writer(output)
            for i, p in enumerate(all_preds):
                writer.writerow([id, i, p, hints[i]])

### Single-label MediaWiki API Entity Label

In [None]:
OUTPUT = "GPT3.5_output_single_fulltest_entity_wk.csv"
results = []
for id, example in enumerate( tqdm(examples)):
    table_raw, table_with_en, label, headers, meta_data, num_col, max_num_row, colset = parse_example(example)
    x = min(len(table_raw),10)
    y = len(table_raw[0])
    table = []
    for col in range(y):
        column = []
        for row in range(x):
            if table_raw[row][col] == None:
                column.append("")
            else: column.append(table_raw[row][col])
        table.append(column)
    num_col = len(table)
    table = list(zip(*table))
    headers =  tuple('' for _ in range(num_col)) # empty headers
    df = pd.DataFrame(table[0:], columns=headers, index=None)
    table = map(lambda x: ", ".join(x), table)
    CSV_like = ",\n".join(table)

    hint = get_info_wk(colset[0][:10])
    hint = hint[:5]
    all_preds = []
    hints = []
    hints.append(hint)

    messages=[
        {
            "role": "system",
            "content": f"Be a helpful, accurate assistant for data discovery and exploration desiged to output valid JSON in the format {res_format}",
        },
        {
            "role": "user",
            "content": f"""Consider this table given in Comma-separated Values format:
                        ```
                        {CSV_like}
                        ```
            There are a list of 255 valid types for each column: {type_vocab.keys()}. Your task is to choose only one type from the list to annotate the first column. Solve this task by following these steps: 
            1. Look at the cells in the first column of the above table. Each cell is linked to an entity in the Wikidata knowledge graph. The entity labels correspond to the cells in the first column are presented as a list delimited by triple quotes.
            ```{hint}```
            2. Understand the entities in the first column
            3. Choose only one valid type from the given list of types. Check that the type MUST be in the list. Give the answer in valid JSON format.
            """,
        }
    ]

    messages_str = ' '.join(map(str,messages))
    encoded_msg = enc.encode(messages_str)
    if len(encoded_msg) > 16000 or len(hint) == 0:
        messages=[
        {
            "role": "system",
            "content": f"Be a helpful, accurate assistant for data discovery and exploration desiged to output valid JSON in the format {res_format}",
        },
        {
            "role": "user",
            "content": f"""Consider this table given in Comma-separated Values format:
                        ```
                        {CSV_like}
                        ```
            There are a list of 255 valid types for each column: {type_vocab.keys()}. Your task is to choose only one type from the list to annotate the first column. Solve this task by following these steps: 
            1. Look at the cells in the first column of the above table. 
            2. Understand the entities in the first column
            3. Choose only one valid type from the given list of types. Check that the type MUST be in the list. Give the answer in valid JSON format.
            """,
        }
        ]
    

    for i, col in enumerate(df.columns):
        
        chatgpt_msg = response(messages, 40)
        prediction = chatgpt_msg.content
        all_preds.append(prediction)
        messages.append(dict(chatgpt_msg))
        if i+1 >= len(colset): break
        hint = get_info_wk(colset[i+1][:10])
        hint = hint[:5]
        hints.append(hint)
        if len(hint) > 0:
            messages.append(
            {
                "role": "user",
                "content": f"""Your task is to choose only one type from the list to annotate the {i+2} column. Solve this task by following these steps: 
                1. Look at the cells in the {i+2} column of the above table. Each cell is linked to an entity in the Wikidata knowledge graph. The entity labels correspond to the cells in the {i+2} column are presented as a list delimited by triple quotes.
                ```{hint}```
                2. Understand the entities in the {i+2} column
                3. Choose only one valid type from the given list of types. Check that the type MUST be in the list. Give the answer in valid JSON format.
                """,
            })
        else:
            messages.append(
            {
                "role": "user",
                "content": f"""Your task is to choose only one type from the list to annotate the {i+2} column. Solve this task by following these steps: 
                1. Look at the cells in the {i+2} column of the above table. 
                2. Understand the entities in the {i+2} column
                3. Choose only one valid type from the given list of types. Check that the type MUST be in the list. Give the answer in valid JSON format.
                """,
            }
            )
    
    
    for i, p in enumerate(all_preds):
        results.append([id, i, p, hints[i]])

    with open(OUTPUT, "a", newline="", encoding="UTF-8") as output:
        writer = csv.writer(output)
        for i, p in enumerate(all_preds):
            writer.writerow([id, i, p, hints[i]])

### Single-label MediaWiki API Entity Triplet

In [None]:
OUTPUT = "GPT3.5_output_single_fulltest_triplet_wk.csv"
results = []
for id, example in enumerate( tqdm(examples)):
    table_raw, table_with_en, label, headers, meta_data, num_col, max_num_row, colset = parse_example(example)
    x = min(len(table_raw),10)
    y = len(table_raw[0])
    table = []
    for col in range(y):
        column = []
        for row in range(x):
            if table_raw[row][col] == None:
                column.append("")
            else: column.append(table_raw[row][col])
        table.append(column)
    num_col = len(table)
    table = list(zip(*table))
    headers =  tuple('' for _ in range(num_col)) # empty header
    df = pd.DataFrame(table[0:], columns=headers, index=None)
    table = map(lambda x: ", ".join(x), table)
    CSV_like = ",\n".join(table)

    info_set = get_triplets_wk(colset[0])
    hint = serialize_dict(info_set)
    all_preds = []
    hints = []
    hints.append(hint)

    messages=[
        {
            "role": "system",
            "content": f"Be a helpful, accurate assistant for data discovery and exploration desiged to output valid JSON in the format {res_format}",
        },
        {
            "role": "user",
            "content": f"""Consider this table given in Comma-separated Values format:
                        ```
                        {CSV_like}
                        ```
            There are a list of 255 valid types for each column: {type_vocab.keys()}. Your task is to choose only one type from the list to annotate the first column. Solve this task by following these steps: 
            1. Look at the cells in the first column of the above table. 
            2. Consider this information carefully: {hint}
            3. Choose only one valid type from the given list of types. Check that the type MUST be in the list. Give the answer in valid JSON format.
            """,
        }
    ]

    messages_str = ' '.join(map(str,messages))
    encoded_msg = enc.encode(messages_str)
    if len(encoded_msg) > 16000 or len(hint) == 0:
        messages=[
        {
            "role": "system",
            "content": f"Be a helpful, accurate assistant for data discovery and exploration desiged to output valid JSON in the format {res_format}",
        },
        {
            "role": "user",
            "content": f"""Consider this table given in Comma-separated Values format:
                        ```
                        {CSV_like}
                        ```
            There are a list of 255 valid types for each column: {type_vocab.keys()}. Your task is to choose only one type from the list to annotate the first column. Solve this task by following these steps: 
            1. Look at the cells in the first column of the above table. 
            2. Choose only one valid type from the given list of types. Check that the type MUST be in the list. Give the answer in valid JSON format.
            """,
        }
        ]
    

    for i, col in enumerate(df.columns):
        
        chatgpt_msg = response(messages, 40)
        prediction = chatgpt_msg.content
        all_preds.append(prediction)
        messages.append(dict(chatgpt_msg))
        if i+1 >= len(colset): break
        info_set = get_triplets_wk(colset[i+1])
        hint = serialize_dict(info_set)
        hints.append(hint)
        if len(hint) > 0:
            messages.append(
            {
                "role": "user",
                "content": f"""Your task is to choose only one type from the list to annotate the {i+2} column. Solve this task by following these steps: 
                1. Look at the cells in the {i+2} column of the above table.
                2. Consider this information carefully: {hint}
                3. Choose only one valid type from the given list of types. Check that the type MUST be in the list. Give the answer in valid JSON format.
                """,
            })
        else:
            messages.append(
            {
                "role": "user",
                "content": f"""Your task is to choose only one type from the list to annotate the {i+2} column. Solve this task by following these steps: 
                1. Look at the cells in the {i+2} column of the above table. 
                2. Choose only one valid type from the given list of types. Check that the type MUST be in the list. Give the answer in valid JSON format.
                """,
            }
            )
    
    
    for i, p in enumerate(all_preds):
        results.append([id, i, p, hints[i]])

    with open(OUTPUT, "a", newline="", encoding="UTF-8") as output:
        writer = csv.writer(output)
        for i, p in enumerate(all_preds):
            writer.writerow([id, i, p, hints[i]])

### Single-label ReFinED Linker Entity Label

In [None]:
OUTPUT = "GPT3.5_output_single_fulltest_entity_rfd.csv"
results = []
for id, example in enumerate( tqdm(examples)):
        table_raw, table_with_en, label, headers, meta_data, num_col, max_num_row, colset = parse_example(example)
        x = min(len(table_raw),10)
        y = len(table_raw[0])
        table = []
        for col in range(y):
            column = []
            for row in range(x):
                if table_raw[row][col] == None:
                    column.append("")
                else: column.append(table_raw[row][col])
            table.append(column)
        num_col = len(table)
        table = list(zip(*table))
        headers =  tuple('' for _ in range(num_col)) # empty headers
        df = pd.DataFrame(table[0:], columns=headers, index=None)
        table = map(lambda x: ", ".join(x), table)
        CSV_like = ",\n".join(table)

        hint = get_info_re(colset[0][:10])
        hint = hint[:5]
        all_preds = []
        hints = []
        hints.append(hint)
    
        messages=[
            {
                "role": "system",
                "content": f"Be a helpful, accurate assistant for data discovery and exploration desiged to output valid JSON in the format {res_format}",
            },
            {
                "role": "user",
                "content": f"""Consider this table given in Comma-separated Values format:
                            ```
                            {CSV_like}
                            ```
                There are a list of 255 valid types for each column: {type_vocab.keys()}. Your task is to choose only one type from the list to annotate the first column. Solve this task by following these steps: 
                1. Look at the cells in the first column of the above table. Each cell is linked to an entity in the Wikidata knowledge graph. The entity labels correspond to the cells in the first column are presented as a list delimited by triple quotes.
                ```{hint}```
                2. Understand the entities in the first column
                3. Choose only one valid type from the given list of types. Check that the type MUST be in the list. Give the answer in valid JSON format.
                """,
            }
        ]

        messages_str = ' '.join(map(str,messages))
        encoded_msg = enc.encode(messages_str)
        if len(encoded_msg) > 16000 or len(hint) == 0:
            messages=[
            {
                "role": "system",
                "content": f"Be a helpful, accurate assistant for data discovery and exploration desiged to output valid JSON in the format {res_format}",
            },
            {
                "role": "user",
                "content": f"""Consider this table given in Comma-separated Values format:
                            ```
                            {CSV_like}
                            ```
                There are a list of 255 valid types for each column: {type_vocab.keys()}. Your task is to choose only one type from the list to annotate the first column. Solve this task by following these steps: 
                1. Look at the cells in the first column of the above table. 
                2. Understand the entities in the first column
                3. Choose only one valid type from the given list of types. Check that the type MUST be in the list. Give the answer in valid JSON format.
                """,
            }
            ]
        

        for i, col in enumerate(df.columns):
            chatgpt_msg = response(messages, 40)
            prediction = chatgpt_msg.content
            all_preds.append(prediction)
            messages.append(dict(chatgpt_msg))
            if i+1 >= len(colset): break
            hint = get_info_re(colset[i+1][:10])
            hint = hint[:5]
            hints.append(hint)
            if len(hint) > 0:
                messages.append(
                {
                    "role": "user",
                    "content": f"""Your task is to choose only one type from the list to annotate the {i+2} column. Solve this task by following these steps: 
                    1. Look at the cells in the {i+2} column of the above table. Each cell is linked to an entity in the Wikidata knowledge graph. The entity labels correspond to the cells in the {i+2} column are presented as a list delimited by triple quotes.
                    ```{hint}```
                    2. Understand the entities in the {i+2} column
                    3. Choose only one valid type from the given list of types. Check that the type MUST be in the list. Give the answer in valid JSON format.
                    """,
                })
            else:
                messages.append(
                {
                    "role": "user",
                    "content": f"""Your task is to choose only one type from the list to annotate the {i+2} column. Solve this task by following these steps: 
                    1. Look at the cells in the {i+2} column of the above table. 
                    2. Understand the entities in the {i+2} column
                    3. Choose only one valid type from the given list of types. Check that the type MUST be in the list. Give the answer in valid JSON format.
                    """,
                }
                )
        
        
        for i, p in enumerate(all_preds):
            results.append([id, i, p, hints[i]])

        with open(OUTPUT, "a", newline="", encoding="UTF-8") as output:
            writer = csv.writer(output)
            for i, p in enumerate(all_preds):
                writer.writerow([id, i, p, hints[i]])

### Single-label ReFinED Linker Entity Triplet

In [None]:
OUTPUT = "GPT3.5_output_single_fulltest_triplet_rdf.csv"
results = []
for id, example in enumerate( tqdm(examples)):
        
        table_raw, table_with_en, label, headers, meta_data, num_col, max_num_row, colset = parse_example(example)
        x = min(len(table_raw),10)
        y = len(table_raw[0])
        table = []
        for col in range(y):
            column = []
            for row in range(x):
                if table_raw[row][col] == None:
                    column.append("")
                else: column.append(table_raw[row][col])
            table.append(column)
        num_col = len(table)
        table = list(zip(*table))
        headers =  tuple('' for _ in range(num_col)) # empty headers
        df = pd.DataFrame(table[0:], columns=headers, index=None)
        table = map(lambda x: ", ".join(x), table)
        CSV_like = ",\n".join(table)

        info_set = get_triplets_re(colset[0])
        hint = serialize_dict(info_set)
        all_preds = []
        hints = []
        hints.append(hint)
    
        messages=[
            {
                "role": "system",
                "content": f"Be a helpful, accurate assistant for data discovery and exploration desiged to output valid JSON in the format {res_format}",
            },
            {
                "role": "user",
                "content": f"""Consider this table given in Comma-separated Values format:
                            ```
                            {CSV_like}
                            ```
                There are a list of 255 valid types for each column: {type_vocab.keys()}. Your task is to choose only one type from the list to annotate the first column. Solve this task by following these steps: 
                1. Look at the cells in the first column of the above table. 
                2. Consider this information carefully: {hint}
                3. Choose only one valid type from the given list of types. Check that the type MUST be in the list. Give the answer in valid JSON format.
                """,
            }
        ]

        messages_str = ' '.join(map(str,messages))
        encoded_msg = enc.encode(messages_str)
        if len(encoded_msg) > 16000 or len(hint) == 0:
            messages=[
            {
                "role": "system",
                "content": f"Be a helpful, accurate assistant for data discovery and exploration desiged to output valid JSON in the format {res_format}",
            },
            {
                "role": "user",
                "content": f"""Consider this table given in Comma-separated Values format:
                            ```
                            {CSV_like}
                            ```
                There are a list of 255 valid types for each column: {type_vocab.keys()}. Your task is to choose only one type from the list to annotate the first column. Solve this task by following these steps: 
                1. Look at the cells in the first column of the above table. 
                2. Choose only one valid type from the given list of types. Check that the type MUST be in the list. Give the answer in valid JSON format.
                """,
            }
            ]
        

        for i, col in enumerate(df.columns):
            
            chatgpt_msg = response(messages, 40)
            prediction = chatgpt_msg.content
            all_preds.append(prediction)
            messages.append(dict(chatgpt_msg))
            if i+1 >= len(colset): break
            info_set = get_triplets_re(colset[i+1])
            hint = serialize_dict(info_set)
            hints.append(hint)
            if len(hint) > 0:
                messages.append(
                {
                    "role": "user",
                    "content": f"""Your task is to choose only one type from the list to annotate the {i+2} column. Solve this task by following these steps: 
                    1. Look at the cells in the {i+2} column of the above table.
                    2. Consider this information carefully: {hint}
                    3. Choose only one valid type from the given list of types. Check that the type MUST be in the list. Give the answer in valid JSON format.
                    """,
                })
            else:
                messages.append(
                {
                    "role": "user",
                    "content": f"""Your task is to choose only one type from the list to annotate the {i+2} column. Solve this task by following these steps: 
                    1. Look at the cells in the {i+2} column of the above table. 
                    2. Choose only one valid type from the given list of types. Check that the type MUST be in the list. Give the answer in valid JSON format.
                    """,
                }
                )
        
        
        for i, p in enumerate(all_preds):
            results.append([id, i, p, hints[i]])

        with open(OUTPUT, "a", newline="", encoding="UTF-8") as output:
            writer = csv.writer(output)
            for i, p in enumerate(all_preds):
                writer.writerow([id, i, p, hints[i]])