In [1]:
import os
import re
import pickle
import numpy as np
from dotenv import dotenv_values
from langchain import PromptTemplate, LLMChain, OpenAI
from langchain.chat_models import ChatOpenAI

In [2]:
# Load env file with API KEY using full path
config = dotenv_values(".env")
os.environ['OPENAI_API_KEY'] = config["OPENAI_API_KEY"]

In [3]:
labels_to_text = {
    "I dont'know": "I dont'know",
    "addressLocality": "locality of address",
    "postalCode": "postal code",
    "addressRegion": "region of address",
    "Country": "country",
    "priceRange": "price range",
    "Hotel/name": "name of hotel",
    "telephone": "telephone",
    "faxNumber": "fax number",
    "Date": "date",
    "Restaurant/name": "name of restaurant",
    "paymentAccepted": "payment accepted",
    "DayOfWeek": "day of week",
    "Review": "review",
    "Organization": "organization",
    "DateTime": "date and time",
    "MusicAlbum/name": "name of music album",
    "MusicArtistAT": "music artist",
    "MusicRecording/name": "name of music recording",
    "Photograph": "photograph",
    "CoordinateAT": "coordinate",
    "Event/name": "name of event",
    "EventAttendanceModeEnumeration": "event attendance mode",
    "EventStatusType": "event status",
    "currency": "currency",
    "email": "email",
    "Time": "time",
    "LocationFeatureSpecification": "location feature",
    "Duration": "duration",
    "Event/description": "description of event",
    "Restaurant/description": "description of restaurant",
    "Rating": "rating",
    "Hotel/description": "description of hotel"
}

In [4]:
# Dictionary to map ChatGPT answers to label set: synonyms can be added here
text_to_label = {
    "locality of address": "addressLocality",
    "postal code": "postalCode",
    "region of address": "addressRegion",
    "country": "Country",
    "price range": "priceRange",
    "name of hotel": "Hotel/name",
    "telephone": "telephone",
    "fax number": "faxNumber",
    "date": "Date",
    "name of restaurant": "Restaurant/name",
    "payment accepted": "paymentAccepted",
    "day of week": "DayOfWeek",
    "review": "Review",
    "organization": "Organization",
    "date and time": "DateTime",
    "music artist": "MusicArtistAT",
    "music album": "MusicAlbum/name",
    "name of music recording": "MusicRecording/name",
    "photograph": "Photograph",
    "coordinate": "CoordinateAT",
    "name of event": "Event/name",
    "event attendance mode": "EventAttendanceModeEnumeration",
    "event status": "EventStatusType",
    "currency": "currency",
    "email": "email",
    "time": "Time",
    "location feature": "LocationFeatureSpecification",
    "duration": "Duration",
    "description of event": "Event/description",
    "description of restaurant": "Restaurant/description",
    "description of hotel": "Hotel/description",
    "rating": "Rating",
    #Added
    "description of restaurants": "Restaurant/description",
    "name of music artist": "MusicArtistAT",
    "description of hotel amenities": "LocationFeatureSpecification",
    "amenities": "LocationFeatureSpecification",
    "name of album": "MusicAlbum/name",
    "i don't know": "-",
    "name of music album": "MusicAlbum/name",
    "music recording": "MusicRecording/name",
    "event name": "Event/name",
    "description of hotels": "Hotel/description",
    "name of hotels": "Hotel/name",
    "duration of music recording or video": "Duration",
    "name of organization": "Organization",
    "hotel amenities": "LocationFeatureSpecification",
    "amenities of hotel room": "LocationFeatureSpecification",
    "check-in time": "Time",
    "check-out time": "Time",
    "time of check-in": "Time",
    "time of check-out": "Time",
    "hotel features": "LocationFeatureSpecification",
    "name of aparthotel": "Hotel/name",
    "event description": "Event/description",
    "email address": "email",
    "room amenities": "LocationFeatureSpecification",
    "end date": "Date",
    "descriptions of events": "Event/description",
    "mode of attendance": "EventAttendanceModeEnumeration",
    "name of song": "MusicRecording/name"
}

## Load test set

In [5]:
with open('data/cta-test-table-wise.pkl', "rb") as f:
    test = pickle.load(f)

examples = [example[1] for example in test ]
labels = [l for example in test for l in example[2]]

In [6]:
labels[:10]

['telephone',
 'Restaurant/name',
 'postalCode',
 'addressRegion',
 'Country',
 'CoordinateAT',
 'CoordinateAT',
 'Time',
 'DayOfWeek',
 'telephone']

## Choose prompt template: without or with instructions

In [17]:
# Paper name: table
template = """

Answer the question based on the task below. If the question cannot be answered using the information provided answer with "I don't know".

Task: Classify the columns of a given table with only one of the following classes that are separated with comma: description of event, description of restaurant, locality of address, postal code, region of address, country, price range, telephone, date, name of restaurant, payment accepted, day of week, review, organization, date and time, coordinate, name of event, event attendance mode, event status, currency, time, description of hotel, name of hotel, location feature, rating, fax number, email, photograph, name of music recording, music artist, name of album, duration.

Table: {input}

Class:

"""

In [8]:
# Paper name: table + instructions
template = """

Answer the question based on the task and instructions below. If the question cannot be answered using the information provided answer with "I don't know".

Task: Classify the columns of a given table with only one of the following classes that are separated with comma: description of event, description of restaurant, locality of address, postal code, region of address, country, price range, telephone, date, name of restaurant, payment accepted, day of week, review, organization, date and time, coordinate, name of event, event attendance mode, event status, currency, time, description of hotel, name of hotel, location feature, rating, fax number, email, photograph, name of music recording, music artist, name of album, duration.

Instructions: 1. Look at the input given to you and make a table out of it. 2. Look at the cell values in detail. 3. For each column, select a class that best represents the meaning of all cells in the column. 4. Answer with the selected class for each columns with the format Column1: class.

Table:
{input}

Class:

"""

## Load LLM and run model

In [18]:
gpt_3_turbo = ChatOpenAI(model_name='gpt-3.5-turbo-0301', temperature=0)
prompt = PromptTemplate(template=template, input_variables=['input'])
llm_chain = LLMChain(prompt=prompt, llm=gpt_3_turbo)

#Zero-shot prediction
preds = [llm_chain.run({'input': example}) for example in examples ]

In [19]:
preds[:10]

['Column 1: Telephone\nColumn 2: Name of restaurant\nColumn 3: Postal code\nColumn 4: Region of address\nColumn 5: Country\nColumn 6: Coordinate\nColumn 7: Time\nColumn 8: Day of week\nColumn 9: Payment accepted',
 'Column 1: telephone\nColumn 2: name of restaurant\nColumn 3: description of restaurant\nColumn 4: locality of address\nColumn 5: day of week\nColumn 6: time',
 'Column 1: Name of restaurant\nColumn 2: Postal code\nColumn 3: Payment accepted\nColumn 4: Region of address\nColumn 5: Locality of address',
 'Column 1: Name of restaurant\nColumn 2: Telephone\nColumn 3: Description of restaurant\nColumn 4: Locality of address\nColumn 5: Region of address\nColumn 6: Payment accepted\nColumn 7: Postal code\nColumn 8: Coordinate\nColumn 9: Coordinate\nColumn 10: Time\nColumn 11: Day of week',
 'Column 1: Name of restaurant\nColumn 2: Price range\nColumn 3: Telephone',
 'description of restaurant, telephone, price range, country, region of address, postal code, coordinate, locality of

In [20]:
#Save predictions in a file:
file_name='predictions/prompt-table-without-instructions-og-2.pkl'
f = open(file_name,'wb')
pickle.dump(preds,f)
f.close()

## Evaluation

In [21]:
# Map predictions to label space
predictions = []
i=0
for j, table_preds in enumerate(preds):
    # How many columns does the table have? : To control cases when less/more classes are returned
    table_number = len(test[j][2])
    
    if "Class:" in table_preds:
        table_preds = table_preds.split("Class:")[1]
      
    #Break predictions into either \n or ,
    if ":" in table_preds or "-" in table_preds:
        if ":" in table_preds:
            separator = ":"
            start = 1
            end = table_number+1
        else:
            separator = "-"  
            start = 1
            end = table_number+1
    else:
        separator = ","
        start = 0
        end = table_number
        
    col_preds = table_preds.split(separator)[start:end]
    
    for pred in col_preds:
        i+=1
        
        # Remove break lines
        if "\n" in pred:
            pred = pred.split('\n')[0].strip()
        # Remove commas
        if "," in pred:
            pred = pred.split(",")[0].strip()
        # Remove paranthesis
        if '(' in pred:
            pred = pred.split("(")[0].strip()
        #Remove points
        if '.' in pred:
            pred = pred.split(".")[0].strip()
        # Lower-case prediction
        pred = pred.strip().lower()
        
        if pred in text_to_label:
            predictions.append(text_to_label[pred])
        else:
            print(f"For test example {i} out of label space prediction: {pred}")
            predictions.append('-')
        
    # If more/less predictions for table
    if len(col_preds) < table_number:
        for m in range(0, table_number-len(col_preds)):
            predictions.append('-')
            i+=1

For test example 88 out of label space prediction: description of restaurant/winery
For test example 226 out of label space prediction: so it is not possible to classify the columns based on the information given
For test example 241 out of label space prediction: restaurants
For test example 242 out of label space prediction: hotels


In [22]:
predictions[15]

'Restaurant/name'

### Calculate Precision, Recall, Macro-F1 and Micro-F1

In [13]:
def calculate_f1_scores(y_tests, y_preds, num_classes):
    
    y_tests = [types.index(y) for y in y_tests]
    y_preds = [types.index(y) for y in y_preds]
    
    #Confusion matrix
    cm = np.zeros(shape=(num_classes,num_classes))
    
    for i in range(len(y_tests)):
        cm[y_preds[i]][y_tests[i]] += 1
        
    report = {}
    
    for j in range(len(cm[0])):
        report[j] = {}
        report[j]['FN'] = 0
        report[j]['FP'] = 0
        report[j]['TP'] = cm[j][j]

        for i in range(len(cm)):
            if i != j:
                report[j]['FN'] += cm[i][j]
        for k in range(len(cm[0])):
            if k != j:
                report[j]['FP'] += cm[j][k]

        precision = report[j]['TP'] / (report[j]['TP'] + report[j]['FP'])
        recall = report[j]['TP'] / (report[j]['TP'] + report[j]['FN'])
        f1 = 2*precision*recall / (precision + recall)
        
        if np.isnan(f1):
            f1 = 0
        if np.isnan(precision):
            f1 = 0
        if np.isnan(recall):
            f1 = 0

        report[j]['p'] =  precision
        report[j]['r'] =  recall
        report[j]['f1'] = f1
    
    all_fn = 0
    all_tp = 0
    all_fp = 0

    for r in report:
        if r != num_classes-1:
            all_fn += report[r]['FN']
            all_tp += report[r]['TP']
            all_fp += report[r]['FP']
        
    class_f1s = [ report[class_]['f1'] for class_ in report]
    class_p = [ 0 if np.isnan(report[class_]['p']) else report[class_]['p'] for class_ in report]
    class_r = [ 0 if np.isnan(report[class_]['r']) else report[class_]['r'] for class_ in report]
    macro_f1 = sum(class_f1s[:-1]) / (num_classes-1)
    
    p =  sum(class_p[:-1]) / (num_classes-1)
    r =  sum(class_r[:-1]) / (num_classes-1)
    micro_f1 = all_tp / ( all_tp + (1/2 * (all_fp + all_fn) )) 
    
    per_class_eval = {}
    for index, t in enumerate(types[:-1]):
        per_class_eval[t] = {"Precision":class_p[index], "Recall": class_r[index], "F1": class_f1s[index]}
    
    evaluation = {
        "Micro-F1": micro_f1,
        "Macro-F1": macro_f1,
        "Precision": p,
        "Recall": r
    }
    
    return [ evaluation, per_class_eval]

In [23]:
types = list(set(labels))
types = types + ["-"]
evaluation, per_class_eval = calculate_f1_scores(labels, predictions, 33)

  f1 = 2*precision*recall / (precision + recall)
  recall = report[j]['TP'] / (report[j]['TP'] + report[j]['FN'])


In [24]:
evaluation

{'Micro-F1': 0.49099099099099097,
 'Macro-F1': 0.42716638720445294,
 'Precision': 0.5068557310744812,
 'Recall': 0.396773538961039}

In [16]:
per_class_eval

{'Organization': {'Precision': 1.0, 'Recall': 0.25, 'F1': 0.4},
 'Hotel/description': {'Precision': 0.875,
  'Recall': 0.7777777777777778,
  'F1': 0.823529411764706},
 'EventAttendanceModeEnumeration': {'Precision': 0.8571428571428571,
  'Recall': 0.8571428571428571,
  'F1': 0.8571428571428571},
 'MusicRecording/name': {'Precision': 1.0,
  'Recall': 0.8,
  'F1': 0.888888888888889},
 'Restaurant/description': {'Precision': 0.4166666666666667,
  'Recall': 1.0,
  'F1': 0.5882352941176471},
 'addressLocality': {'Precision': 0.625,
  'Recall': 0.8333333333333334,
  'F1': 0.7142857142857143},
 'Event/name': {'Precision': 1.0, 'Recall': 0.8, 'F1': 0.888888888888889},
 'Duration': {'Precision': 1.0, 'Recall': 1.0, 'F1': 1.0},
 'email': {'Precision': 1.0, 'Recall': 1.0, 'F1': 1.0},
 'LocationFeatureSpecification': {'Precision': 0.5,
  'Recall': 0.25,
  'F1': 0.3333333333333333},
 'MusicAlbum/name': {'Precision': 1.0,
  'Recall': 0.42857142857142855,
  'F1': 0.6},
 'faxNumber': {'Precision': 1.0

## Error Analysis

In [None]:
# "-" means the model replied with out of label or with I don't know
errors = 0
for i in range(len(predictions)):
    if predictions[i] != labels[i]:
        errors += 1
        print(f"Predicted as {predictions[i]} when it was {labels[i]}")
errors

### Re-load previous preds files

In [None]:
with open('predictions/prompt-table-without-instructions.pkl', "rb") as f:
    preds = pickle.load(f)