## Prep

### Basics

In [1]:
#https://colab.research.google.com/drive/1BEZ_qgtVqSmOmCTuhHs7lHiYB5M5_myg?usp=sharing

import pandas as pd
import shutil
from pathlib import Path
import json
import gzip
from tqdm.auto import tqdm
import subprocess
import time
import re
import requests
from sklearn.preprocessing import MinMaxScaler
import numpy as np
from sklearn.metrics import classification_report
from sklearn.metrics import f1_score
from itertools import chain
import argparse
import os
import sys
from copy import deepcopy
import torch

In [2]:
from retry import retry

In [3]:
rand_seed=13
EST_CHARS_PER_TOKEN=4
MAX_LEN=2000*EST_CHARS_PER_TOKEN

## D4 Evaluation

In [4]:
NUM_SAMPLES = 100

In [5]:
D4_PATH = Path("/scratch/bf996/datasets/D4/groundtruth")
D4_files = list(D4_PATH.rglob("**/*.silver"))
d4_dfs = {}
for file in D4_files:
    df = pd.read_csv(file, sep='\t', names=["ID1", "ID2", "values"])
    for k in range(NUM_SAMPLES):
        dfs = df.sample(n=1 + np.random.randint(100), replace=True, random_state=k)
        d4_dfs[str(file.stem).lower() + "_" + str(k)] = dfs

In [6]:
D4_classes = list(f.stem.lower() for f in D4_files)
D4_classes
#D4_classes = [str(k).replace("-", "").replace("_", "") for k in d4_dfs.keys()]

['school-number',
 'ethnicity',
 'school-grades',
 'school-name',
 'school-dbn',
 'brooklyn',
 'bronx',
 'permit-types',
 'queens',
 'manhattan',
 'staten_island',
 'borough',
 'rental-building-class',
 'agency-short',
 'color',
 'agency-full',
 'other-states',
 'us-state',
 'month',
 'plate-type']

In [7]:
D4_renamed_classes = ['School ID',
 'Ethnicity',
 'Letter Grade',
 'Educational Organization',
 'School DBN',
 'Region in Brooklyn',
 'Region in Bronx',
 'Permit Type',
 'Region in Queens',
 'Region in Manhattan',
 'Region in Staten Island',
 'County',
 'Elevator or Staircase',
 'Short City Agency Name',
 'Color',
 'Full City Agency Name',
 'Country',
 'State',
 'Month',
 'License plate type']

In [8]:
D4_classname_map = {k1 : k2 for (k1, k2) in zip(D4_classes, D4_renamed_classes)}

sherlock_D4_map = {
  'school-number' : ['Code'],
  'ethnicity' : ['Nationality'],
  'school-grades' : ['Description', 'Grades'],
  'school-name' : ['Organisation', 'Education'],
  'school-dbn' : ['Description', 'Code'],
  'brooklyn' : ['Region', 'Address', 'Location'],
  'bronx' : ['Region', 'Address', 'Location'],
  'queens' : ['Region', 'Address', 'Location'],
  'manhattan' : ['Region', 'Address', 'Location'],
  'staten_island' : ['Region', 'Address', 'Location'],
  'borough' : ['County'],
  'color' : ['Description', 'Type', 'Category'],
  'permit-types' : ['Symbol', 'Code'],
  'rental-building-class' : ['Requirement', 'Operator'],
  'agency-short' : ['Organisation', 'Affiliate'],
  'agency-full' : ['Organisation', 'Affiliate'],
  'other-states' : ['Country'],
  'us-state' : ['State'],
  'month' : ['Birth date', 'Day'],
  'plate-type' : ['Symbol', 'Type'],
}

sotab_D4_map = {
  'school-number' : ['identifierNameAP', 'IdentifierAT', 'Text', 'Number', 'Integer', 'QuantitativeValue'],
  'ethnicity' : ['Person', 'Person/name', 'category'],
  'school-grades' : ['Text', 'category', 'CategoryCode', 'Offer'],
  'school-name' : ['Organization', 'EducationalOrganization'],
  'school-dbn' : ['Text', 'identifierNameAP', 'IdentifierAT', 'unitCode'],
  'brooklyn' : ['streetAddress', 'addressRegion', 'Place', 'addressLocality'],
  'bronx' : ['streetAddress', 'addressRegion', 'Place', 'addressLocality'],
  'queens' : ['streetAddress', 'addressRegion', 'Place', 'addressLocality'],
  'manhattan' : ['streetAddress', 'addressRegion', 'Place', 'addressLocality'],
  'staten_island' : ['streetAddress', 'addressRegion', 'Place', 'addressLocality'],
  'borough' : ['addressRegion', 'Place', 'addressLocality'],
  'color' : ['Text', 'category'],
  'permit-types' : ['Text', 'identifierNameAP', 'IdentifierAT', 'unitCode'],
  'rental-building-class' : ['Thing', 'category', "LocationFeatureSpecification"],
  'agency-short' : ['Organization', 'EducationalOrganization'],
  'agency-full' : ['Organization', 'EducationalOrganization'],
  'other-states' : ['Country'],
  'us-state' : ["addressRegion", "addressLocality"],
  'month' : ["DateTime", "Date"],
  'plate-type' : ['Text', 'category', 'CategoryCode', 'Offer'],
}

## TURL Variables

In [4]:
TURL_TRAIN = "/scratch/bf996/llm_er_std/proj/TURL/data/train.table_col_type.json"
TURL_TEST = "/scratch/bf996/llm_er_std/proj/TURL/data/test.table_col_type.json"

### SOTAB Variables

In [4]:
val_files = list(Path("/scratch/bf996/datasets/sotab/Validation").rglob("**/*.json.gz"))
train_files = list(Path("/scratch/bf996/datasets/sotab/Train").rglob("**/*.json.gz"))
test_files = list(Path("/scratch/bf996/datasets/sotab/Test").rglob("**/*.json.gz"))

In [5]:
os.chdir("/scratch/bf996/datasets/sotab")
gt_df = pd.read_csv("./CTA_validation_gt.csv")
gt_df_train = pd.read_csv("./CTA_training_gt.csv")
gt_df_test = pd.read_csv("./CTA_test_gt.csv")

In [6]:
train_save_path="/scratch/bf996/llm_er_std/proj/CTA_CPA_Benchmarks/wotab/catboost_features_dataset_train.csv"
val_save_path="/scratch/bf996/llm_er_std/proj/CTA_CPA_Benchmarks/wotab/catboost_features_dataset_val.csv"
test_save_path="/scratch/bf996/llm_er_std/proj/CTA_CPA_Benchmarks/wotab/catboost_features_dataset_test.csv"
dfv = pd.read_csv(val_save_path)
dft = pd.read_csv(test_save_path)

### MAPPINGS

In [7]:
def fix_labels(label, label_set):
  label = label.lower().strip()
  ldm = {k.lower().strip() : v.lower().strip() for k, v in label_set['dict_map'].items()}
  if label_set.get("abbrev_map", -1) != -1:
    lda = {k.lower().strip() : v.lower().strip() for k, v in label_set['abbrev_map'].items()}
    ldares = lda.get(label, "")
    if ldares != "":
      label = ldares
  if label.endswith("/name"):
    label = label[:-5]
  remap = ldm.get(label, -1)
  if remap != -1:
    label = remap
  return label.lower()

In [8]:
label_dict_map = {'Location' : 'streetAddress', 'PostalAddress' : 'streetAddress', 'CreativeWorkSeries' : 'CreativeWork', 'DateTime' : 'Date', 'QuantitativeValue' : "Number", "Integer" : "Number", 
                  "faxNumber" : "telephone", "Email" : "email", "unitText" : "Text", "Mass" : "weight", "MusicRecording" : "MusicAlbum",
                  "MonetaryAmount" : "price", "ProductModel" : "Product", "CoordinateAT" : "Coordinates", 'OccupationalExperienceRequirements' : 'JobRequirements',
                  'Thing' : 'Text', "MusicArtistAT" : "MusicGroup", 'Action' : "WebHTMLAction", "Energy" : "Calories", 'postalCode' : 'zipCode', "LocalBusiness" : "Company",
                  "addressLocality" : "streetAddress", "addressRegion" : "Country", "Place" : "Organization", "WarrantyPromise" : "Text", "typicalAgeRange" : "Age",
                  "EducationalOccupationalCredential" : "JobRequirements", "EventStatusType" : "Event", "identifierNameAP" : "IdentifierAT", "ItemAvailability" : "category", "MusicGroup" : "Artist",
                  "SportsEvent" : "Event"}

label_dict_map_full = {
    'DateTime' : 'calendarvalue',
    'EventStatusType' : 'statustype',
    'EventAttendanceModeEnumeration' : 'attendenum',
    'priceRange' : 'costrange',
    'ItemAvailability' : 'availabilityofitem',
    "LocalBusiness" : "company",
    'addressRegion' : 'countyorstate',
    'addressLocality' : 'city',
    'SportsTeam' : 'athleticteam',
    'ProductModel' : 'modelobject',
    'BookFormatType' : 'formatofbook',
    'CreativeWorkSeries' : 'seriescreative',
    "Energy" : "calories",
    'Action' : "webhtmlaction",
    'Photograph' : 'photourl',
    'ProductModel' : 'modelnameorid',
    'QuantitativeValue' : 'quantityrange',
    'Place' : 'buildingname',
}

cll = list(pd.unique(gt_df_train['label']))

clt = ['WebHTMLAction',
 'Book',
 'Boolean',
 'Brand',
 'Coordinates',
 'Country',
 'CreativeWork',
 'Date',
 'DayOfWeek',
 'DeliveryMethod',
 'Distance',
 'Duration',
 'EducationalOrganization',
 'Calories',
 'Event',
 'GenderType',
 'Hotel',
 'IdentifierAT',
 'ItemList',
 'JobPosting',
 'Language',
 'Company',
 'Movie',
 'Museum',
 'MusicAlbum',
 'Artist',
 'Number',
 'JobRequirements',
 'Offer',
 'Organization',
 'Person',
 'Location',
 'PostalAddress',
 'Product',
 'Rating',
 'Recipe',
 'Restaurant',
 'Review',
 'SportsTeam',
 'TVEpisode',
 'Text',
 'Time',
 'URL',
 'category',
 'currency',
 'email',
 'paymentAccepted',
 'price',
 'streetAddress',
 'telephone',
 'Age',
 'weight',
 'zipCode']

cls = [
 'Boolean',
 'Coordinates',
 'Country',
 'CreativeWork',
 'Date',
 'Event',
 'Gender',
 'JobPosting',
 'Language',
 'Company',
 'Number',
 'Organization',
 'Person',
 'Product',
 'SportsTeam',
 'Text',
 'Time',
 'URL',
 'category',
 'currency',
 'email',
 'price',
 'streetAddress',
 'telephone',
 'Age',
 'weight',
 'zipCode']

abbrev_map = {**{s[1:] : s for s in cll}, **{s[1:] : s for s in clt}, **{s[2:] : s for s in cll if len(s) > 5}, **{s[2:] : s for s in clt if len(s) > 5}}

context_labels = {"name" : "context_labels", "label_set" : cll, "dict_map" : label_dict_map_full, 'abbrev_map' : abbrev_map}

context_labels_trim = {"name" : "context_labels_trim", "label_set" : clt, "dict_map" : label_dict_map, 'abbrev_map' : abbrev_map}

numeric_labels = ['currency', 'price', 'Number',
       'Integer', 'IdentifierAT', 'QuantitativeValue',
       'Duration',
       'priceRange', 'postalCode', 'MonetaryAmount', 'Mass', 'CategoryCode',
        'weight',
       'unitCode', 'Energy', 'Distance',
       'workHours', 'typicalAgeRange']

label_dict_map_small = {'Location' : 'streetAddress', 'PostalAddress' : 'streetAddress', 
                        'CreativeWorkSeries' : 'CreativeWork', 'Book' : 'CreativeWork', 'DateTime' : 'Date', 
                        'QuantitativeValue' : "Number", "Integer" : "Number", "GenderType" : "Gender", "IdentifierAT" : "Text",
                        "Hotel" : "Company", "ItemList" : "category", "Movie" : "CreativeWork", "Museum" : "Organization",
                        "JobRequirements" : "category", "Offer" : "Text", "Location" : "streetAddress", "PostalAddress" : "streetAddress",
                        "Rating" : "category", "Restaurant" : "Company", "Review" : "Text", "Recipe" : "Text",
                        "TVEpisode" : "CreativeWork", "paymentAccepted" : "category",
                        "faxNumber" : "telephone", "Email" : "email", "unitText" : "Text", "Mass" : "weight", 
                        "MusicRecording" : "MusicAlbum", "Brand" : "Product", "DayOfWeek" : "Date", "DeliveryMethod" : "Text",
                        "Distance" : "Number", "Duration" : "Time", "EducationalOrganization" : "Organization",
                        "MonetaryAmount" : "price", "ProductModel" : "Product", "CoordinateAT" : "Coordinates", 
                        'OccupationalExperienceRequirements' : 'JobRequirements',
                        'Thing' : 'Text', "MusicArtistAT" : "Person", 'Action' : "URL", 
                        "Energy" : "Number", 'postalCode' : 'zipCode', "LocalBusiness" : "Company",
                        "addressLocality" : "streetAddress", "addressRegion" : "Country", 
                        "Place" : "Organization", "WarrantyPromise" : "Text", "typicalAgeRange" : "Age",
                        "EducationalOccupationalCredential" : "JobRequirements", "EventStatusType" : "Event", 
                        "identifierNameAP" : "IdentifierAT", "ItemAvailability" : "category", "MusicGroup" : "Person",
                        "SportsEvent" : "Event", "Audience" : "Person", "Energy" : "Number"}

context_labels_small = {"name" : "context_labels_small", "label_set" : cls, "dict_map" : label_dict_map_small, 'abbrev_map' : abbrev_map}

#, **{s[3:] : s for s in context_labels if len(s) > 5}, **{s[3:] : s for s in context_labels_trim if len(s) > 5}, **{s[4:] : s for s in context_labels if len(s) > 5}

In [9]:
sotab_integer_labels = ['DateTime', 'Date', 'Integer', 'telephone', 'faxNumber', 'Energy']

sotab_float_labels = ['price',
 'Number',
 'QuantitativeValue',
 'Duration',
 'priceRange',
 'MonetaryAmount',
 'CoordinateAT',
 'Mass',
 'weight',
 'Distance']

sotab_other_labels = ['Identifier', 'email', 'URL', 'WebHTMLAction', 'Photograph', 'category', 'text']

sotab_top_hier = {"integer" : sotab_integer_labels, "float" : sotab_float_labels, "other" : sotab_other_labels}

sotab_identifier = ['IdentifierAT', 'CategoryCode', 'identifierNameAP', 'unitCode']

sotab_category = ['currency',
 'ItemList',
 'EventStatusType',
 'EventAttendanceModeEnumeration',
 'OfferItemCondition',
 'ItemAvailability',
 'category',
 'Rating',
 'Language',
 'OccupationalExperienceRequirements',
 'DeliveryMethod',
 'BookFormatType',
 'EducationalOccupationalCredential',
 'GenderType',
 'paymentAccepted',
 'typicalAgeRange']

sotab_text = ['Product/name',
 'Hotel/name',
 'Brand',
 'Text',
 'Recipe/name',
 'Event/name',
 'PostalAddress',
 'Place',
 'Organization',
 'Country',
 'Person',
 'LocalBusiness/name',
 'streetAddress',
 'addressRegion',
 'addressLocality',
 'Person/name',
 'Review',
 'Thing',
 'Place/name',
 'openingHours',
 'SportsEvent/name',
 'SportsTeam',
 'ProductModel',
 'Movie/name',
 'CreativeWork/name',
 'JobPosting/name',
 'Museum/name',
 'Book/name',
 'TVEpisode/name',
 'CreativeWorkSeries',
 'RestrictedDiet',
 'Restaurant/name',
 'LocationFeatureSpecification',
 'MusicArtistAT',
 'MusicAlbum',
 'Product',
 'workHours',
 'Time',
 'DayOfWeek',
 'MusicAlbum/name',
 'CreativeWork',
 'EducationalOrganization',
 'MusicRecording/name',
 'Offer',
 'unitText',
 'MusicRecording',
 'audience',
 'WarrantyPromise',
 'MusicGroup']

sotab_other_hier = {"Identifier" : sotab_identifier, "category" : sotab_category, "text" : sotab_text}

In [10]:
mappings = pd.read_csv(r'/scratch/bf996/llm_er_std/proj/metadata/wotab-mapping.csv',
                   # Set first column as rownames in data frame
                  #  index_col=0,
                  #  on_bad_lines='skip'
                  )
sherlock_to_cta = {}
cta_list = list(set(mappings['Sherlock CTA'].tolist()))
for mapping in cta_list:
  mapping_split = mapping.split(", ")
  for m in mapping_split:
    if not m:
      continue
    # df_idx = [i for i in range(len(cta_list)) if m in cta_list[i]]
    # print(df_idx)
    map_list = list(set(mappings[mappings['Sherlock CTA'].str.contains(m)]['CTA label'].tolist()))
    # map_list = mappings.iloc[df_idx, 'CTA Label']
    # map_list = map_list['CTA Label'].tolist()
    match_set = list(chain(*[k.split(", ") for k in map_list]))
    #FOR ALL LABELS
    #match_set = list(set([fix_labels(m, context_labels) for m in match_set]))
    #FOR TRIM LABELS
    match_set = list(set([fix_labels(m, context_labels_trim) for m in match_set]))
    if not match_set:
      match_set = ["NoMatch"]
    m = m.lower()
    if sherlock_to_cta.get(m, -1) != -1:
      sherlock_to_cta[m] = list(set(sherlock_to_cta[m] + match_set))
    else:
      sherlock_to_cta[m] = match_set

sherlock_labels = ["Address", "Affiliate", "Affiliation", "Age", "Album", "Area", "Artist", "Birth date", "Birth place", "Brand", "Capacity", "Category", "City", "Class", "Classification", "Club", "Code", "Collection ", "Command ", "Company", "Component", "Continent", "Country", "County", "Creator", "Credit", "Currency", "Day", "Depth", "Description", "Director", "Duration", "Education", "Elevation ", "Family ", "File size", "Format", "Gender", "Genre", "Grades", "ISBN", "Industry", "Jockey", "Language", "Location", "Manufacturer", "Name", "Nationality", "Notes", "Operator", "Order", "Organisation", "Origin", "Owner", "Person", "Plays", "Position", "Product", "Publisher", "Range", "Rank", "Ranking", "Region", "Religion", "Requirement", "Result", "Sales", "Service", "Sex", "Species", "State", "Status", "Symbol", "Team", "Team name", "Type", "Weight", "Year"]
sherlock_map_reverse = {s.lower() : i for i, s in enumerate(sherlock_labels)}
sherlock_map_forward = {i : s.lower() for i, s in enumerate(sherlock_labels)}

In [11]:
#VALID CLASSES USING SHERLOCK_TO_CTA
list(sherlock_to_cta.get(k.lower(), -1) for k in cls)

[-1,
 -1,
 ['country'],
 -1,
 -1,
 -1,
 ['gendertype'],
 -1,
 ['language'],
 ['restaurant', 'locationfeaturespecification', 'hotel', 'company'],
 -1,
 ['museum', 'organization', 'programmembership', 'person'],
 ['audience', 'organization', 'person'],
 ['text', 'product'],
 -1,
 -1,
 -1,
 -1,
 ['offeritemcondition', 'deliverymethod', 'category', 'categorycode'],
 ['currency', 'price'],
 -1,
 -1,
 -1,
 -1,
 ['age'],
 ['weight'],
 -1]

In [18]:
#D4 mappings

d4_zs_context_labels = {"name" : "d4_zs", "label_set" : D4_renamed_classes, "dict_map" : {c : c for c in D4_renamed_classes}, "d4_map" : D4_classname_map}

d4_sotab_labels = {"name" : "d4_sotab", "label_set" : cll, "dict_map" : {c : c for c in cll}, "d4_map" : sotab_D4_map}

d4_sherlock_labels = {"name" : "d4_sherlock", "label_set" : sherlock_labels, "dict_map" : {c : c for c in sherlock_labels}, "d4_map" : sherlock_D4_map}


## Model-Specific Prep

### OpenAI

In [None]:
import openai
import os
import IPython
from dotenv import load_dotenv

In [None]:
load_dotenv("/scratch/bf996/notebooks/.env")
openai.api_key = os.getenv("OPENAI_API_KEY")
assert openai.api_key != None, "api key did not load"

### Sherlock

In [None]:
%%capture
!git clone https://github.com/penfever/sherlock-project
!pip install pyfunctional

### DODUO

In [21]:
#os.chdir("/scratch/bf996/notebooks")
#!git clone https://github.com/megagonlabs/doduo

In [22]:
os.chdir("/scratch/bf996/notebooks/doduo")
#!wget https://doduo-data.s3-us-west-2.amazonaws.com/model.tar.gz
#!tar -zvxf model.tar.gz

In [23]:
#sys.path.insert(0,'/content/doduo')

In [24]:
#sys.path.insert(0,'/content/doduo/doduo')
#sys.path.remove('/content/doduo/doduo')

In [25]:
from doduo import Doduo

In [26]:
# Load Doduo model
args = argparse.Namespace
#args.model = "wikitable" (121 classes?)
args.model = "viznet" #78 classes (Sherlock labels)
doduo_model = Doduo(args)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForMultiOutputClassification: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForMultiOutputClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMultiOutputClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForMultiOutputClassification were not initialized from the model checkpoint at bert-base-uncased and

### PETALS

In [None]:
!pip install petals

from petals import DistributedBloomForCausalLM
from transformers import AutoTokenizer

model = DistributedBloomForCausalLM.from_pretrained("bigscience/bloomz-petals").to("cuda:0")

tokenizer = AutoTokenizer.from_pretrained('bigscience/tokenizer')

['currency',
 'Number',
 'QuantitativeValue',
 'Duration',
 'Mass',
 'CategoryCode',
 'unitCode',
 'Energy',
 'typicalAgeRange']

### CATBOOST

In [17]:
from catboost import CatBoostClassifier

catboost_path="/scratch/bf996/llm_er_std/proj/CTA_CPA_Benchmarks/wotab/catboost_sotab.bin"
catboost_f1s="/scratch/bf996/llm_er_std/proj/CTA_CPA_Benchmarks/wotab/catboost_label_f1.json"

f1_thresh = .75
# cbc = CatBoostClassifier()
# cbc.load_model(catboost_path)
with open(catboost_f1s, 'r', encoding='utf-8') as alt_f:
  catboost_f = json.load(alt_f)

catboost_cats = [k for k, v in catboost_f.items() if v > f1_thresh]
catboost_cats

['currency',
 'Number',
 'QuantitativeValue',
 'Duration',
 'Mass',
 'CategoryCode',
 'unitCode',
 'Energy',
 'typicalAgeRange']

## Zero-Shot Model Loader

In [12]:
from accelerate import infer_auto_device_map, init_empty_weights, load_checkpoint_and_dispatch
from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer, AutoModelForSeq2SeqLM, T5ForConditionalGeneration, LlamaTokenizer, LlamaForCausalLM, GenerationConfig, pipeline
import langchain
from langchain.llms import HuggingFacePipeline
from langchain import PromptTemplate, LLMChain
from sentence_transformers import SentenceTransformer, util

sent_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2', device='cpu')

def set_pipeline(k=1):
    pipe = pipeline(
        "text-generation",
        model=base_model, 
        tokenizer=tokenizer, 
        max_length=MAX_LEN,
        temperature=0.5*k,
        top_p=0.80-(0.1 * k),
        repetition_penalty=1.3
    )
    local_llm = HuggingFacePipeline(pipeline=pipe)
    llm_chain = LLMChain(prompt=pt, 
        llm=local_llm
    )
    return pipe, local_llm, llm_chain

curr_model = ""

def init_model(model):
    curr_model = model
    with torch.no_grad():
        torch.cuda.empty_cache()
    if model == "llama-65b":
        LLAMA_PATH = "/scratch/bf996/text-generation-webui/models/llama-65b-hf"
        MAX_LEN=2048
        tokenizer = LlamaTokenizer.from_pretrained(LLAMA_PATH)
        config = AutoConfig.from_pretrained(LLAMA_PATH,
                                            torch_dtype=torch.float16,
                                            load_in_8bit=True)
        with init_empty_weights():
            base_model = AutoModelForCausalLM.from_config(config)
        base_model.tie_weights()
        device_map = infer_auto_device_map(base_model, max_memory={0: "60GiB", "cpu": "96GiB"})
        base_model = load_checkpoint_and_dispatch(
            base_model, 
            LLAMA_PATH, 
            device_map=device_map
        )
    elif model == "alpaca-13b":
        MAX_LEN=2048
        tokenizer = LlamaTokenizer.from_pretrained("chavinlo/alpaca-native")
        base_model = LlamaForCausalLM.from_pretrained(
            "chavinlo/alpaca-native",
            torch_dtype=torch.float16,
            load_in_8bit=True,
            device_map='auto',
        )
    elif model == "vicuna-13b":
        MAX_LEN=2048
        tokenizer = AutoTokenizer.from_pretrained("eachadea/vicuna-13b")
        base_model = AutoModelForCausalLM.from_pretrained(
            "eachadea/vicuna-13b",
            torch_dtype=torch.float16,
            load_in_8bit=True,
            device_map='auto',
        )
    elif model == "gpt4-x-alpaca":
        MAX_LEN=2048
        tokenizer = AutoTokenizer.from_pretrained("chavinlo/gpt4-x-alpaca")
        base_model = AutoModelForCausalLM.from_pretrained("chavinlo/gpt4-x-alpaca", device_map="auto", load_in_8bit=True)
    elif model == "t0pp":
        MAX_LEN=512
        tokenizer = AutoTokenizer.from_pretrained("bigscience/T0pp")
        base_model = AutoModelForSeq2SeqLM.from_pretrained("bigscience/T0pp", device_map="auto", torch_dtype=torch.float16, load_in_8bit=True)
    elif model == "flan-t5-xxl":
        MAX_LEN=512
        tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-xxl")
        base_model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-xxl", device_map="auto", torch_dtype=torch.float16, load_in_8bit=True)
    elif model == "flan-ul2":
        MAX_LEN=512
        base_model = T5ForConditionalGeneration.from_pretrained("google/flan-ul2", torch_dtype=torch.bfloat16, device_map="auto")                                                                 
        tokenizer = AutoTokenizer.from_pretrained("google/flan-ul2")
    elif model == "galpaca-30b":
        MAX_LEN=2048
        tokenizer = AutoTokenizer.from_pretrained("GeorgiaTechResearchInstitute/galpaca-30b", device_map="auto", torch_dtype=torch.float16, load_in_8bit=True)
        base_model = AutoModelForCausalLM.from_pretrained("GeorgiaTechResearchInstitute/galpaca-30b")
    elif model == "opt-iml-max-30b":
        MAX_LEN=2048
        tokenizer = AutoTokenizer.from_pretrained("facebook/opt-iml-max-30b", use_fast=False, padding_side='left')
        base_model = AutoModelForCausalLM.from_pretrained("facebook/opt-iml-max-30b", device_map="auto", torch_dtype=torch.float16)
    if model in ["flan-t5-xxl", "t0pp", "flan-ul2"]:
        template = """{instruction}"""
    else:
        template = """Below is an instruction that describes a task. Write a response that appropriately completes the request.

        ### Instruction: 
        {instruction}

        Answer:"""
    pt = PromptTemplate(template=template, input_variables=["instruction"])
    #Convert length from tokens to characters, leave room for model response
    MAX_LEN = MAX_LEN * EST_CHARS_PER_TOKEN - 200
    return base_model, tokenizer, template, pt, MAX_LEN

### Model-Specific Functions

In [13]:
def get_sherlock_resp(df, gt_df, prompt_dict, model, label_indices, base_prompt, lsd):
  isd4 = "d4" in lsd['name']
  if "sherlock" in model:
    model = sherlock_model
    data_m = pd.Series(df[label_indices].astype(str).T.values.tolist())
    extract_features(
        "../temporary.csv",
        data_m
    )
    feature_vectors = pd.read_csv("../temporary.csv", dtype=np.float32)
    predicted_labels = model.predict(feature_vectors, "sherlock")
    iter_len = len(data_m)
  elif "doduo" in model:
    model = doduo_model
    data_m = df[label_indices]
    try:
        annot_m = doduo_model.annotate_columns(data_m)
        predicted_labels = annot_m.coltypes
    except Exception as e:
        print(f"Exception {e} in Doduo, returning default \n")
        predicted_labels = ["text" for i in range(len(data_m))]
    iter_len = len(predicted_labels)
  predicted_labels_dict = {i : sherlock_to_cta.get(predicted_labels[i], [predicted_labels[i]]) for i in range(iter_len)}
  
  for idx, label_idx in zip(range(iter_len), label_indices):
    prompt = base_prompt + "_" + str(label_idx)
    if isd4:
        ans = predicted_labels[0]
        label = [s.lower() for s in lsd['d4_map'][gt_df]]
    else:
        gt_row = gt_df[gt_df['column_index'] == label_idx]
        if len(gt_row) != 1:
          continue
        label = fix_labels(gt_row['label'].item(), lsd)
        ans = [fix_labels(item, lsd) for item in predicted_labels_dict[idx]]
    if isd4:
        res = ans in label
    else:
        assert isinstance(ans, list), "ans should be a list"
        res = label in ans
    ans_dict = {"response" : ans, "context" : None, "ground_truth" : label, "correct" : res, "orig_model_label" : predicted_labels[idx]}
    prompt_dict[prompt] = ans_dict
  return prompt

@retry(Exception, tries=3, delay=3)
def get_chatgpt_resp(lsd: dict, context : str, ground_truth : str, prompt_dict : dict, response = True, session=None, method=["similarity"], max_len=15000):
  fixed_labels = [fix_labels(s, lsd) for s in lsd['label_set']]
  model = "gpt-3.5"
  context_labels = ", ".join(fixed_labels)
  fixed_labels = sorted(fixed_labels, key=len, reverse=True)
  prompt = prompt_context_insert(context_labels, context, max_len, "gpt-3.5")
  d_p = prompt_dict.get(prompt, -1)
  if d_p != -1 and "skip-existing" in method:
    #recompute_results(prompt_dict, prompt, model, cbc_pred=None, label_set=lsd)
    return prompt
  elif d_p != -1:
    while prompt_dict.get(prompt, -1) != -1:
        prompt = prompt + "*"
  if response:
    ans = openai.ChatCompletion.create(
      model="gpt-3.5-turbo",
      messages=[
          {"role": "user", "content": prompt},
      ],
      temperature=0,
    ).choices[0]['message']['content']
    #print(f"Original ans is {ans}")
  ans_n = fuzzy_label_match(ans, fixed_labels, None, None, prompt, lsd, model, method=method)
  #print(f"Fuzzy ans is {ans_n}")
  res = ans_n == ground_truth
  ans_dict = {"response" : ans_n, "context" : context, "ground_truth" : ground_truth, "correct" : res, "original_model_answer" : ans}
  prompt_dict[prompt] = ans_dict
  return prompt

@retry(Exception, tries=5, delay=3)
def get_ada_resp(lsd: dict, context : str, ground_truth : str, prompt_dict : dict, response = True, session=None):
  prompt = prompt_context_insert(context_labels, context, MAX_LEN, "ada-personal")
  if prompt_dict.get(prompt, -1) != -1:
    #recompute_results(prompt_dict, prompt, "ada-personal", label_set=lsd)
    return prompt
  if response:
    proc = subprocess.run(["openai", "api", "completions.create", "-m", "ada:ft-personal:-2023-03-14-11-52-45", "-M", "3", "-p", prompt], capture_output=True, check=True)
    ans = proc.stdout.decode("utf-8")[len(prompt):].strip()
  else:
    ans = ""
  res = ans.lower().strip().startswith(ground_truth)
  ans_dict = {"response" : ans, "context" : context, "ground_truth" : ground_truth, "correct" : res}
  prompt_dict[prompt] = ans_dict
  return prompt

def call_llama_model(session, link, prompt, lsd, var_params):
    fixed_labels = [fix_labels(s, lsd) for s in lsd['label_set']]
    if session:
      ans = session.post(link, json=make_json(prompt, var_params))
    else:
      ans = requests.post(link, json=make_json(prompt, var_params))
    ans = ans.json()["data"]
    ans_n = fix_labels(ans[0][len(prompt):].strip(), lsd)
    return ans_n

@retry(Exception, tries=3, delay=3)
def get_topp_resp(prompt, k):
    inputs = tokenizer.encode(prompt, return_tensors="pt").cuda()
    outputs = base_model.generate(inputs, 
                                  max_length=MAX_LEN,
                                  temperature=0.1*k,
                                  top_p=0.90-(0.1 * k),
                                  repetition_penalty=1.3
                                  )
    orig_ans = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return orig_ans

@retry(Exception, tries=3, delay=3)
def get_llama_resp(lsd: dict, context : list, ground_truth : str, prompt_dict : dict, link : str, response = True, session=None, cbc=None, model="llama", limited_context=None, method = ["ans_contains_gt", "gt_contains_ans", "resample"]):
  #print(f"in get llama resp, gt is {ground_truth}, context is {context}")
  isd4 = "d4" in lsd['name']
  if isd4:
      gtv = lsd['d4_map'][ground_truth]
      if isinstance(gtv, str):
        gtv = [gtv]
      ground_truth = [s.lower() for s in gtv]
  if "hierarchical" in method and not isd4:
      dtype = get_base_dtype(limited_context)
      fixed_labels = sotab_top_hier[dtype]
  else:
      fixed_labels = list(set([fix_labels(s, lsd) for s in lsd['label_set']]))
  context_labels = ", ".join(fixed_labels)
  fixed_labels = sorted(fixed_labels, key=len, reverse=True)
  if model in ["llama-zs", "opt-iml-30b-zs"]:
    pipe, local_llm, llm_chain = set_pipeline(k=1)
  prompt = prompt_context_insert(context_labels, context, MAX_LEN, model)
  d_p = prompt_dict.get(prompt, -1)
  #skip existing logic
  if d_p != -1 and "skip-existing" in method:
    # recompute_results(prompt_dict, prompt, "llama", cbc, lsd)
    return prompt
  elif d_p != -1:
    while prompt_dict.get(prompt, -1) != -1:
        prompt = prompt + "*"
  #response logic
  if not response:
    orig_ans = ans_n = ""
  else:
    orig_ans = apply_basic_rules(limited_context, None)
    if orig_ans is None:
        orig_ans = query_correct_model(model, prompt, context_labels, context, session, link, lsd)
        #hierarchical matching logic
        if "hierarchical" in method and dtype == "other" and orig_ans not in ['email', 'URL', 'WebHTMLAction', 'Photograph']:
            next_label_set = sotab_other_hier.get(orig_ans, -1)
            if next_label_set == -1:
                print(f"Original answer {orig_ans} not found in hierarchy")
                next_label_set = sotab_other_hier['text']
            fixed_labels = list(set([fix_labels(s, lsd) for s in next_label_set])) 
            context_labels = ", ".join(fixed_labels)
            fixed_labels = sorted(fixed_labels, key=len, reverse=True)
            orig_ans = query_correct_model(model, prompt, context_labels, context, session, link, lsd)  
        #fuzzy matching logic
        ans_n = fuzzy_label_match(orig_ans, fixed_labels, session, link, prompt, lsd, model, method=method).lower()
    else:
        ans_n = orig_ans.lower()
  #print(f"final label set was {fixed_labels}, prediction was {ans_n}, ground truth was {ground_truth} \n")
  if isd4:
    res = ans_n in ground_truth
  else:
    res = ans_n == ground_truth
  ans_dict = {"response" : ans_n, "context" : context, "ground_truth" : ground_truth, "correct" : res, "original_model_answer" : orig_ans}
  prompt_dict[prompt] = ans_dict
  #recompute_results(prompt_dict, prompt, "llama", cbc, lsd)
  return prompt

@retry(Exception, tries=5, delay=3)
def get_bloomz_resp(lsd: dict, context : str, ground_truth : str, prompt_dict : dict, response = True, session=None):
  prompt = prompt_context_insert(context_labels, context, 2000, "bloomz")
  if prompt_dict.get(prompt, -1) != -1:
    return prompt
  if response:
    inputs = tokenizer(prompt, return_tensors="pt")["input_ids"].to("cuda:0")
    outputs = model.generate(inputs, max_new_tokens=5)
  else:
    response = ""
  ans = tokenizer.decode(outputs[0]).split()[-1]
  ans = ''.join(e for e in ans if e.isalnum()).lower()
  res = ans == ground_truth
  ans_dict = {"response" : ans, "context" : context, "ground_truth" : ground_truth, "correct" : res}
  prompt_dict[prompt] = ans_dict
  return prompt

## Functions

In [27]:
def to_integer(val):
    return pd.to_numeric(val, downcast='integer', errors='ignore')

def derive_meta_features(col):
  features = {}
  if not col.astype(str).apply(str.isnumeric).all():
    return {"std" : round(col.astype(str).str.len().std(), 2), "mean" : round(col.astype(str).str.len().mean(), 2), "mode" : col.astype(str).str.len().mode().iloc[0].item(), "median" : col.astype(str).str.len().median(), "max" : col.astype(str).str.len().max(), "min" : col.astype(str).str.len().min(), "rolling-mean-window-4" : [0.0]}
  col = col.dropna().astype(float)
  if col.apply(float.is_integer).all():
    col = col.astype(int)
  #print(f"Collecting metafeatures for column {col} \n")    
  features['std'] = round(col.std(), 2)
  features['mean'] = round(col.mean(), 2)
  features['mode'] = col.mode().iloc[0].item()
  features['median'] = col.median()
  features['max'] = col.max()
  features['min'] = col.min()
  indexer = pd.api.indexers.FixedForwardWindowIndexer(window_size=4)
  features['rolling-mean-window-4'] = list(col.rolling(window=indexer, min_periods=1).mean())
  return features

def fix_mode(d):
  if isinstance(d['mode'], pd.Series):
    d['mode'] = d['mode'].loc[0].item()
  return d

def split_meta_features(d):
  return pd.Series([d.get('std', "N/A"), d.get('mean', "N/A"), d.get('median', "N/A"), d.get('mode', "N/A"), d.get('max', "N/A"), d.get('min', "N/A")])

def prompt_context_insert(context_labels: str, context : str, max_len : int = 2000, model : str = "gpt-3.5"):
  if model == "bloomz":
    s = f'SYSTEM: You are an AI research assistant. You use a tone that is technical and scientific. USER: Please select the field from {context_labels} which best describes the context below. Respond with the name of the field and nothing else. \n CONTEXT: {context}'
  elif model == "gpt-3.5":
    s = f'SYSTEM: Please select the field from {context_labels} which best describes the context. Respond only with the name of the field. \n CONTEXT: {context}'
  elif model == "ada-personal":
    s = f'{context}$'
  elif model == "llama-old":
    s = f'INSTRUCTION: Select the field from the category which matches the input. \n CATEGORIES: {context_labels} \n INPUT:{context} \n OUTPUT: '
  elif "-zs" in model:
    ct = "[" + ", ".join(context).replace("[", "").replace("]", "").replace("'", "")[:MAX_LEN - 100 - len(context_labels)] + "]"
    lb = "\n".join(["- " + c for c in context_labels.split(", ")])
    #s = f'How might one classify the following input? \n INPUT: {ct} .\n OPTIONS:\n {lb} \n ANSWER:'
    if model == "opt-iml-max-30b-zs":
        s = f'Select the option which best describes the input. \n INPUT: {ct} .\n OPTIONS:\n {lb} \n'
    else:
        s = f'INSTRUCTION: Select the option which best describes the input. \n INPUT: {ct} .\n OPTIONS:\n {lb} \n ANSWER:'
  elif model == "llama":
    s = f'INSTRUCTION: Select the category which best matches the input. \n INPUT:{context} \n CATEGORY: '
  elif model == "llama-retry":
    s = f'INSTRUCTION: Select the category which best matches the input. \n INPUT:{context} \n CATEGORY: '
  #Truncate if prompt exceeds maximum length
  if len(s) > max_len:
    s = s[:max_len - 3]
  return s

def recompute_results(prompt_dict, prompt, model_str, cbc_pred, label_set):
  dict_val = prompt_dict.get(prompt, -1)
  dict_val['cbc_pred'] = cbc_pred
  if model_str == "llama":
    if cbc_pred and (cbc_pred in catboost_cats):
      print(f"using cbcpred label: {cbc_pred} \n")
      dict_val['response'] = fix_labels(cbc_pred, label_set)
    dict_val['correct'] = ((dict_val['ground_truth'] == dict_val['response']) or (dict_val['response'] and (dict_val['response']) in dict_val['ground_truth']))
  prompt_dict[prompt] = dict_val

def make_json(prompt, var_params):
  p = deepcopy(params)
  if var_params:
    for k, v in var_params.items():
      p[k] = v
  return {
      "data": [
              prompt,
              p['max_new_tokens'],
              p['do_sample'],
              p['temperature'],
              p['top_p'],
              p['typical_p'],
              p['repetition_penalty'],
              p['encoder_repetition_penalty'],
              p['top_k'],
              p['min_length'],
              p['no_repeat_ngram_size'],
              p['num_beams'],
              p['penalty_alpha'],
              p['length_penalty'],
              p['early_stopping'],
              p['seed'],     
          ]
      }

def ans_contains_gt(ans_n, fixed_labels):
    for fixed_label in fixed_labels:
      if fixed_label in ans_n:
        print(f"Fuzzy label {ans_n} contains gt label {fixed_label}: MATCH \n")
        ans_n = fixed_label
        return ans_n
    return None

def gt_contains_ans(ans_n, fixed_labels):
    if ans_n == "":
        return None
    for fixed_label in fixed_labels:
      if ans_n in fixed_label:
        print(f"GT label {fixed_label} contains fuzzy label {ans_n}: MATCH \n")
        ans_n = fixed_label
        return ans_n
    return None

def basic_contains(ans_n, fixed_labels, method):
    #TODO: not sure the order should be fixed like this, could be made flexible
    if ans_n in fixed_labels:
        return ans_n
    if "ans_contains_gt" in method:
        res = ans_contains_gt(ans_n, fixed_labels)
        if res:
            return res
    if "gt_contains_ans" in method:
        res = gt_contains_ans(ans_n, fixed_labels)
        if res:
            return res
    return None

def fuzzy_label_match(orig_ans, fixed_labels, session, link, prompt, lsd, model, method=["ans_contains_gt", "gt_contains_ans", "resample"]):
    #answer is already in label set, no fuzzy match needed
    ans_n = fix_labels(orig_ans, lsd)
    res = basic_contains(ans_n, fixed_labels, method)
    if res:
        return res
    if "similarity" in method:
        ans_embedding = sent_model.encode(ans_n)
        lbl_embeddings = sent_model.encode(fixed_labels)
        sims = {lbl : util.pytorch_cos_sim(ans_embedding, le) for lbl, le in zip(fixed_labels, lbl_embeddings)}
        return max(sims, key=sims.get)
    if "resample" in method:
        #fuzzy label matching strategy
        for k in range(2,6):
            if "gpt" in model:
                ans_n = openai.ChatCompletion.create(
                    model="gpt-3.5-turbo",
                    messages=[
                        {"role": "user", "content": prompt},
                    ],
                    temperature=0 + k/10,
                ).choices[0]['message']['content'].lower()
            elif model in ["llama-zs", "opt-iml-30b-zs"]:
                pipe, local_llm, llm_chain = set_pipeline(k=k)
                ans_n = llm_chain.run(prompt)
            elif model in ["topp-zs", "flan-ul2-zs"]:
                ans_n = get_topp_resp(prompt, k)
            else:
                rep_pen = params['repetition_penalty']
                top_p = params['top_p']
                temp = params['temperature']
                ans_n = call_llama_model(session, link, prompt, lsd, {'no_repeat_ngram_size' : 1, 'top_p' : top_p - (0.1 * k), 'temperature' : 0.9})
                params['top_p'] = top_p
                params['temperature'] = temp
            res = basic_contains(ans_n, fixed_labels, method)
            if res:
                return res
    #print("Applying fallback label, 'text' \n")
    return 'text'

INTEGER_SET = set(r"0123456789,/\+-.^_()[] :")

def get_base_dtype(context):
    dtype = "integer"
    for item in context:
        if not all(char in INTEGER_SET for char in item):
          #print(f"String is OTHER because: {[char for char in item if char not in INTEGER_SET]}")
          return "other"
        try:
            if item.endswith(".0") or item.endswith(",0"):
              item = item[:-2]
              item = str(int(item))
            if item.endswith(".00") or item.endswith(",00"):
              item = item[:-3]
              item = str(int(item))
        except:
            return "float"
        temp_item = re.sub(r"[^a-zA-Z0-9.]", "", item)
        if not temp_item.isdigit():
          #print(f"string is FLOAT because {temp_item} is not an integer")
          dtype = "float"
    return dtype    

def query_correct_model(model, prompt, context_labels, context, session, link, lsd):
    if model in ["llama-zs", "opt-iml-max-30b-zs"]:
        orig_ans = llm_chain.run(prompt)
        if orig_ans is None:
            prompt = prompt_context_insert(context_labels, context, MAX_LEN, "llama-retry")
            orig_ans = llm_chain.run(prompt)
    elif model in ["topp-zs", "flan-ul2-zs"]:
        orig_ans = get_topp_resp(prompt, 1)
    else:
        orig_ans = call_llama_model(session, link, prompt, lsd, None)
        if orig_ans is None:
            prompt = prompt_context_insert(context_labels, context, MAX_LEN, "llama-retry")
            orig_ans = call_llama_model(session, link, prompt, lsd, None)
    return orig_ans

def get_df_sample_col(col, rand_seed, len_context, min_variance=2, replace=False):
    df = pd.Series(col)
    ignore_list = ["None", 'none', 'NaN', 'nan', 'N/A', 'na', '']
    sample_list = list(set(p[:75] for p in pd.unique(df.astype(str)[col]) if p not in ignore_list))
    if len(sample_list) < 1:
      return ["None"] * len_context
    if len(sample_list) < len_context:
      sample_list = sample_list * len_context
    if len(sample_list) > len_context:
      sample_list = sample_list[:len_context]
    assert len(sample_list) == len_context, f"An index in val_indices is length {len(sample_list)}"
    return sample_list

def check_substr_contains_only_set(str, acceptable_chars):
   validation = set(str)
   print("Checking if it contains only ",acceptable_chars)
   if validation.issubset(acceptable_chars):
      return True
   else:
      return False

def insert_source(context, fname):
  pattern = r"_([^_]*)_" # Matches substrings that start and end with "_"
  matcher = re.search(pattern, fname)
  addstr = str(matcher.group()).replace("_", "").split(".")[0]
  #context.insert(0, "SRC_FILE: " + addstr + "COL_VALS: ")
  context.insert(0, "SRC: " + addstr)
  return context    
    
def get_df_sample(df, rand_seed, val_indices, len_context, min_variance=1, replace=False, full=False, other_col=False, max_len=8000):
    column_samples = {}
    ignore_list = ["None", 'none', 'NaN', 'nan', 'N/A', 'na', '']
    for col in df.columns:
      sample_list = list(set(p[:max_len//(len_context*3)] for p in pd.unique(df.astype(str)[col]) if p not in ignore_list))
      #reformat integer samples
      sl_mod = []
      # Meta-features
      if full:
        meta_features = derive_meta_features(df[col])
        meta_features['rolling-mean-window-4'] = meta_features['rolling-mean-window-4'][:5]
      # Sampling from other columns
      if other_col:
        sample_list_fill_size = len_context - len(sample_list)
        nc = len(df.columns)
        per_column_context = max(1, sample_list_fill_size // nc)
        for idx, oc in enumerate(df.columns):
          items = df[oc].astype(str).iloc[0:per_column_context].tolist()
          sample_list = sample_list + ["OC: " + str(item) for item in items]
      if not sample_list:
        sample_list = ["None"]
      if len(sample_list) < len_context:
        sample_list = sample_list * len_context
      if len(sample_list) > len_context:
        sample_list = sample_list[:len_context]
      assert len(sample_list) == len_context, "An index in val_indices is length " + str(len(sample_list))
      if full:
        if meta_features['std'] == "N/A":
          sample_list = sample_list + ["" for k,v in meta_features.items()]
        else:
          sample_list = sample_list + [str(k) + ": " + str(v) for k,v in meta_features.items()]
      # print("sample list")
      # print(sample_list)
      column_samples[col] = sample_list
      # print("column samples")
      # print(column_samples)
    return pd.DataFrame.from_dict(column_samples)

NUMERIC_AND_COMMA = set('0123456789,')

BOOLEAN_SET = ["True", "true", "False", "false", "yes", "Yes", "No", "no"]

def apply_basic_rules(context, lbl):
  if not context:
    return lbl
  if not isinstance(context, list):
    return lbl
  try:
      if all(s.endswith(" g") for s in context):
        lbl = "weight"
      if all(s.endswith(" kg") for s in context):
        lbl = "weight"
      if all(s.endswith(" lb") for s in context):
        lbl = "weight"
      if all(s.endswith(" lbs") for s in context):
        lbl = "weight"
      if all(s.endswith(" pounds") for s in context):
        lbl = "weight"
      if all(s.endswith(" cal") for s in context):
        lbl = "calories"
      if all(s.endswith(" kcal") for s in context):
        lbl = "calories"
      if all(s.endswith(" calories") for s in context):
        lbl = "calories"
      if all("review" in s.lower() for s in context):
        lbl = "review"
      if all("recipe" in s.lower() for s in context):
        lbl = "recipe"
      if lbl and "openopen" in lbl:
        lbl = "openinghours"
      if all(s in BOOLEAN_SET for s in context):
        lbl = "boolean"
      return lbl
  except Exception as e:
      print(f"Exception {e} in apply_basic_rules with context {context}")
      return lbl

def get_cbc_pred(orig_label, numeric_labels):
    try:
      #FOR VALIDATION
      #cbc_filematch = dfv[dfv['df_path'] == str(f)]
      #FOR TEST SET
      cbc_filematch = dft[dft['df_path'] == str(f)]
      cbc_labelmatch = cbc_filematch[cbc_filematch['label'] == orig_label]
      if len(cbc_labelmatch) == 1:
        cbc_pred = numeric_labels[cbc_labelmatch['preds'].item()]
      else:
        cbc_pred = None
    except Exception as e:
      print("cbc excpetion: ")
      print(e)
      cbc_pred=None

def run_val(model : str, save_path : str, inputs : list, label_set : list, input_df : pd.DataFrame, resume : bool = True, results : bool = True, stop_early : int = -1, rand_seed : int = 13, sample_size : int = 5, link : str = None, response : bool = True, summ_stats : bool = False, table_src : bool = False, other_col : bool = False, skip_short : bool = False, min_var : int = 0, method : list = ["similarity"]):
  infmods = "sherlock" in model or "doduo" in model
  isd4 = "d4" in label_set['name']
  if resume and os.path.isfile(save_path):
    with open(save_path, 'r', encoding='utf-8') as f:
      prompt_dict = json.load(f)
  else:
    prompt_dict = {}
  s = requests.Session()
  if "-zs" in model:
    base_model.eval()
  if isinstance(inputs, dict):
    labels = ["_".join(k.split("_")[:-1]) for k in inputs.keys()]
    inputs = list(inputs.values())
  for idx, f in tqdm(enumerate(inputs), total=len(inputs)):
    if idx % 100 == 0:
      with open(save_path, 'w', encoding='utf-8') as alt_f:
        #print("pd", prompt_dict, "\n")
        json.dump(prompt_dict, alt_f, ensure_ascii=False, indent=4)
    if stop_early > -1 and idx == stop_early:
      break
    if isd4:
        f_df = f
        label_indices=[2]
        gt_labels = labels[idx]
    else:
        gt_labels = input_df[input_df['table_name'] == f.name]
        label_indices = pd.unique(gt_labels['column_index']).tolist()
        f_df = pd.read_json(f, compression='gzip', lines=True)
    if infmods:
        label_indices = ["values"]
        key = get_sherlock_resp(f_df, gt_labels, prompt_dict, model, label_indices, str(f), label_set)
        continue
    sample_df = get_df_sample(f_df, rand_seed, label_indices, sample_size, full=summ_stats, other_col=other_col, max_len=MAX_LEN)
    #print(f"in main loop, sample_df is {sample_df}")
    f_df_cols = f_df.columns
    for idx, col in enumerate(f_df_cols):
      if idx not in label_indices:
        continue
      #NOTE: skipping evaluation for columns with insufficient variance in the column
#       if len(pd.unique(sample_df.astype(str)[col])) < min_var:
#         continue
      if isd4:
        orig_label = gt_labels
      else:
        gt_row = gt_labels[gt_labels['column_index'] == idx]
        orig_label = gt_row['label'].item()
      label = fix_labels(orig_label, label_set)
      limited_context = sample_df[col].tolist()[:sample_size]
      #NOTE: could consider using min_var here
      #if full and len(pd.unique(sample_df[col].tolist())) < 3:
      if table_src:
        context = insert_source(sample_df[col].tolist(), f.name)
      else:
        context = sample_df[col].tolist()
      if "gpt-3.5" in model:
        key = get_chatgpt_resp(label_set, context, label, prompt_dict, response=response, session=s, method=method)
      elif "ada-personal" in model:
        key = get_ada_resp(label_set, context, label, prompt_dict, response=response, session=s)
      elif "bloomz" in model:
        key = get_bloomz_resp(label_set, context, label, prompt_dict, response=response, session=s)
      elif "llama" in model or "-zs" in model:
        #cbc_pred = get_cbc_pred(orig_label, numeric_labels)
        cbc_pred = None
        key = get_llama_resp(label_set, context, label, prompt_dict, link=link, response=response, session=s, cbc=cbc_pred, model=model, limited_context=limited_context, method=method)
        # print("Key: ", key, "\n")
        #print("pdk", prompt_dict[key], "\n")
      prompt_dict[key]['original_label'] = orig_label
      prompt_dict[key]['file+idx'] = str(f) + "_" + str(idx)
  with open(save_path, 'w', encoding='utf-8') as my_f:
    json.dump(prompt_dict, my_f, ensure_ascii=False, indent=4)
  if results:
    results_checker(save_path, skip_duplicates = False)

In [28]:
import json
from statistics import mean

ENDINGS = ["ANSWER:", "CATEGORY:"]

def results_checker_doduo(file_name, skip_duplicates = True):
    with open(file_name, "r") as f:
      d = json.load(f)
    correct = 0
    n = len(d)
    per_class_results = dict()
    for k, v in d.items():
        response_set = set(v["response"])
        for r in response_set:
            per_class_results.setdefault(r, {"TP": 0, "FP": 0, "FN": 0, "Total": 0})
        per_class_results.setdefault(v["ground_truth"], {"TP": 0, "FP": 0, "FN": 0, "Total": 0})
        if v['correct'] == True:
            correct += 1
            per_class_results[v["ground_truth"]]["TP"] += 1
        else:
            per_class_results[v["ground_truth"]]["FN"] += 1
            for r in response_set:
                per_class_results[r]["FP"] += 1
        per_class_results[v["ground_truth"]]["Total"] += 1

    for k, v in per_class_results.items():
        v['F1'] = (2 * v["TP"]) / (2 * v["TP"] + v["FP"] + v["FN"])

    weighted_f1 = sum([v["F1"] * v["Total"] for k, v in per_class_results.items()]) / n
    unweighted_f1 = mean([v["F1"] for k, v in per_class_results.items()])

    print(f"Total entries: {n} \n Accuracy: {round(correct/n, 4)} \n Weighted F1: {round(weighted_f1, 4)} \n Unweighted F1: {round(unweighted_f1, 4)}")

    
def results_checker(file_name, skip_duplicates = True):
    with open(file_name, "r") as f:
      d = json.load(f)
    if skip_duplicates:
      d = {k : v for k, v in d.items() if "CATEGORY: *" not in str(k)}

    correct = 0
    good_remap = 0
    total_remap = 0
    truncated = 0
    n = len(d)
    per_class_results = dict()

    for k, v in d.items():
        truncated_flag = True
        for ending in ENDINGS:
            if ending in str(k):
                truncated_flag = False
        if truncated_flag:
            truncated += 1
        per_class_results.setdefault(v["ground_truth"], {"TP": 0, "FP": 0, "FN": 0, "Total": 0})
        per_class_results.setdefault(v["response"], {"TP": 0, "FP": 0, "FN": 0, "Total": 0})
        if v["original_model_answer"] != v["response"]:
            total_remap += 1
            if v['correct'] == True:
                good_remap += 1
        if v['correct'] == True:
            correct += 1
            per_class_results[v["ground_truth"]]["TP"] += 1
        else:
            per_class_results[v["ground_truth"]]["FN"] += 1
            per_class_results[v["response"]]["FP"] += 1
        per_class_results[v["ground_truth"]]["Total"] += 1

    for k, v in per_class_results.items():
        v['F1'] = (2 * v["TP"]) / (2 * v["TP"] + v["FP"] + v["FN"])

    weighted_f1 = sum([v["F1"] * v["Total"] for k, v in per_class_results.items()]) / n
    unweighted_f1 = mean([v["F1"] for k, v in per_class_results.items()])

    print(f"Total entries: {n} \n Accuracy: {round(correct/n, 4)} \n Weighted F1: {round(weighted_f1, 4)} \n Unweighted F1: {round(unweighted_f1, 4)} \n Correct Remap: {good_remap} \n Total Remap: {total_remap} \n Truncated: {truncated}")


In [16]:
def missing_entries(f1, f2):
    with open(f1, "r") as file1:
      d1 = json.load(file1)    
    with open(f2, "r") as file2:
      d2 = json.load(file2)
    paths1 = set([v["file+idx"] for _, v in d1.items()])
    paths2 = set([v["file+idx"] for _, v in d2.items()])
    return paths1 - paths2

## Results Checking

In [60]:

results_checker("/scratch/bf996/llm_er_std/proj/CTA_CPA_Benchmarks/wotab/opt-iml-old/opt-iml-max-30b-zs-sim-10sample-old.json", skip_duplicates=False)


Total entries: 12279 
 Accuracy: 0.357 
 Weighted F1: 0.3055 
 Unweighted F1: 0.2245 
 Correct Remap: 4249 
 Total Remap: 11931 
 Truncated: 12279


In [61]:
model_name = "flan-ul2"

filename = f"{model_name}-zs-sim-10sample-v4.json"

sp = f"/scratch/bf996/llm_er_std/proj/CTA_CPA_Benchmarks/wotab/{filename}"

results_checker(sp)

Total entries: 15040 
 Accuracy: 0.4214 
 Weighted F1: 0.377 
 Unweighted F1: 0.3114 
 Correct Remap: 20 
 Total Remap: 160 
 Truncated: 0


In [27]:
filename = "flanxxl-zs-cont+resam-5sample-v2.json"

sp = f"/scratch/bf996/llm_er_std/proj/CTA_CPA_Benchmarks/wotab/{filename}"

results_checker(sp)

Total entries: 15040 
 Accuracy: 0.3773 
 Weighted F1: 0.3586 
 Unweighted F1: 0.2857 
 Correct Remap: 13 
 Total Remap: 23 
 Truncated: 4682


In [29]:
results_checker("/scratch/bf996/llm_er_std/proj/CTA_CPA_Benchmarks/wotab/flanxxl-zs-cont+resam-10sample-v2.json")

Total entries: 15040 
 Accuracy: 0.3832 
 Weighted F1: 0.3562 
 Unweighted F1: 0.2843 
 Correct Remap: 0 
 Total Remap: 0 
 Truncated: 0


In [30]:
results_checker("/scratch/bf996/llm_er_std/proj/CTA_CPA_Benchmarks/wotab/llama-ft-cont+resam-3sample.json", skip_duplicates=False)

Total entries: 15040 
 Accuracy: 0.7541 
 Weighted F1: 0.7591 
 Unweighted F1: 0.7493 
 Correct Remap: 1135 
 Total Remap: 2595 
 Truncated: 0


In [65]:
results_checker("/scratch/bf996/llm_er_std/proj/CTA_CPA_Benchmarks/wotab/llama-ft-cont+resam-5sample-v2.json", skip_duplicates=True)

Total entries: 14547 
 Accuracy: 0.7892 
 Weighted F1: 0.7914 
 Unweighted F1: 0.7745 
 Correct Remap: 915 
 Total Remap: 2033 
 Truncated: 0


In [32]:
results_checker("/scratch/bf996/llm_er_std/proj/CTA_CPA_Benchmarks/wotab/llama-ft-cont+resam-full-10sample-v3.json", skip_duplicates=False)

Total entries: 15040 
 Accuracy: 0.789 
 Weighted F1: 0.7913 
 Unweighted F1: 0.7798 
 Correct Remap: 981 
 Total Remap: 2120 
 Truncated: 0


In [33]:

results_checker("/scratch/bf996/llm_er_std/proj/CTA_CPA_Benchmarks/wotab/llama-ft-cont+resam-fullplusothercol-5sample-v2.json", skip_duplicates=False)


Total entries: 15040 
 Accuracy: 0.809 
 Weighted F1: 0.8075 
 Unweighted F1: 0.7996 
 Correct Remap: 983 
 Total Remap: 1711 
 Truncated: 0


In [34]:

results_checker("/scratch/bf996/llm_er_std/proj/CTA_CPA_Benchmarks/wotab/llama-ft-cont+resam-fullplusothercol-10sample-v2.json", skip_duplicates=False)


Total entries: 15040 
 Accuracy: 0.8144 
 Weighted F1: 0.8131 
 Unweighted F1: 0.8013 
 Correct Remap: 1081 
 Total Remap: 1807 
 Truncated: 5


In [18]:

results_checker("/scratch/bf996/llm_er_std/proj/CTA_CPA_Benchmarks/wotab/llama-ft-cont+resam-fullplusothercol-10sample-v3.json", skip_duplicates=False)


ZeroDivisionError: division by zero

In [35]:

results_checker("/scratch/bf996/llm_er_std/proj/CTA_CPA_Benchmarks/wotab/llama-ft-cont+resam-fullplusothercol-15sample-v2.json", skip_duplicates=False)


Total entries: 5447 
 Accuracy: 0.7979 
 Weighted F1: 0.7974 
 Unweighted F1: 0.775 
 Correct Remap: 455 
 Total Remap: 753 
 Truncated: 6


In [39]:

results_checker("/scratch/bf996/llm_er_std/proj/CTA_CPA_Benchmarks/wotab/llama-ft-cont+resam-fullfalse-5sample.json", skip_duplicates=False)


Total entries: 15040 
 Accuracy: 0.7664 
 Weighted F1: 0.7635 
 Unweighted F1: 0.7743 
 Correct Remap: 1575 
 Total Remap: 2760 
 Truncated: 0


In [40]:

results_checker("/scratch/bf996/llm_er_std/proj/CTA_CPA_Benchmarks/wotab/llama-ft-cont+resam-summstatsonly-5sample.json", skip_duplicates=False)


Total entries: 15040 
 Accuracy: 0.7682 
 Weighted F1: 0.7711 
 Unweighted F1: 0.7769 
 Correct Remap: 1335 
 Total Remap: 2607 
 Truncated: 0


In [66]:

results_checker("/scratch/bf996/llm_er_std/proj/CTA_CPA_Benchmarks/wotab/llama-ft-sim-5sample.json", skip_duplicates=False)


Total entries: 15040 
 Accuracy: 0.7685 
 Weighted F1: 0.7656 
 Unweighted F1: 0.742 
 Correct Remap: 792 
 Total Remap: 2199 
 Truncated: 0


In [45]:

results_checker_doduo("/scratch/bf996/llm_er_std/proj/CTA_CPA_Benchmarks/wotab/doduo-sherlock-sotab-small.json", skip_duplicates=False)


Total entries: 15040 
 Accuracy: 0.2805 
 Weighted F1: 0.2381 
 Unweighted F1: 0.0879


In [46]:

results_checker("/scratch/bf996/llm_er_std/proj/CTA_CPA_Benchmarks/wotab/flan-t5-xxl-zs-sim-5sample-full+othercol.json", skip_duplicates=False)


Total entries: 15040 
 Accuracy: 0.2609 
 Weighted F1: 0.2288 
 Unweighted F1: 0.1947 
 Correct Remap: 0 
 Total Remap: 0 
 Truncated: 200


In [47]:

results_checker("/scratch/bf996/llm_er_std/proj/CTA_CPA_Benchmarks/wotab/flan-t5-xxl-zs-sim-5sample-summ+tablesrc.json", skip_duplicates=False)


Total entries: 15040 
 Accuracy: 0.311 
 Weighted F1: 0.2951 
 Unweighted F1: 0.2602 
 Correct Remap: 0 
 Total Remap: 0 
 Truncated: 0


In [48]:

results_checker("/scratch/bf996/llm_er_std/proj/CTA_CPA_Benchmarks/wotab/flan-t5-xxl-zs-sim-5sample-tablesrc-v2.json", skip_duplicates=False)


Total entries: 15040 
 Accuracy: 0.3235 
 Weighted F1: 0.3289 
 Unweighted F1: 0.2638 
 Correct Remap: 0 
 Total Remap: 0 
 Truncated: 0


In [51]:

results_checker("/scratch/bf996/llm_er_std/proj/CTA_CPA_Benchmarks/wotab/flanxxl-zs-cont-3sample.json", skip_duplicates=False)


Total entries: 15040 
 Accuracy: 0.3616 
 Weighted F1: 0.3483 
 Unweighted F1: 0.2679 
 Correct Remap: 6 
 Total Remap: 14 
 Truncated: 2762


In [57]:
results_checker("/scratch/bf996/llm_er_std/proj/CTA_CPA_Benchmarks/wotab/flanxxl-zs-cont+resam-3sample-v2.json")

Total entries: 15040 
 Accuracy: 0.3666 
 Weighted F1: 0.3583 
 Unweighted F1: 0.2805 
 Correct Remap: 0 
 Total Remap: 0 
 Truncated: 0


In [52]:

results_checker("/scratch/bf996/llm_er_std/proj/CTA_CPA_Benchmarks/wotab/flanxxl-zs-cont-5sample.json", skip_duplicates=False)


Total entries: 15040 
 Accuracy: 0.3865 
 Weighted F1: 0.3774 
 Unweighted F1: 0.2982 
 Correct Remap: 0 
 Total Remap: 1 
 Truncated: 0


In [53]:

results_checker("/scratch/bf996/llm_er_std/proj/CTA_CPA_Benchmarks/wotab/flanxxl-zs-cont-10sample.json", skip_duplicates=False)


Total entries: 15040 
 Accuracy: 0.3989 
 Weighted F1: 0.3837 
 Unweighted F1: 0.3037 
 Correct Remap: 0 
 Total Remap: 0 
 Truncated: 0


In [63]:

results_checker("/scratch/bf996/llm_er_std/proj/CTA_CPA_Benchmarks/wotab/gpt-3.5-sim-10sample-full.json", skip_duplicates=True)


Total entries: 15236 
 Accuracy: 0.4462 
 Weighted F1: 0.4265 
 Unweighted F1: 0.4061 
 Correct Remap: 137 
 Total Remap: 1348 
 Truncated: 15236


In [64]:

results_checker("/scratch/bf996/llm_er_std/proj/CTA_CPA_Benchmarks/wotab/gpt-3.5-sim-10sample-small.json", skip_duplicates=True)


Total entries: 12424 
 Accuracy: 0.5145 
 Weighted F1: 0.4813 
 Unweighted F1: 0.3747 
 Correct Remap: 93 
 Total Remap: 460 
 Truncated: 12424


In [73]:

results_checker("/scratch/bf996/llm_er_std/proj/CTA_CPA_Benchmarks/wotab/llama-ft-contains-3sample.json", skip_duplicates=False)


Total entries: 15040 
 Accuracy: 0.7276 
 Weighted F1: 0.7468 
 Unweighted F1: 0.7289 
 Correct Remap: 708 
 Total Remap: 2490 
 Truncated: 0


In [74]:

results_checker("/scratch/bf996/llm_er_std/proj/CTA_CPA_Benchmarks/wotab/llama-ft-contains-5sample.json", skip_duplicates=False)


Total entries: 15040 
 Accuracy: 0.7583 
 Weighted F1: 0.7722 
 Unweighted F1: 0.7601 
 Correct Remap: 631 
 Total Remap: 2105 
 Truncated: 0


In [75]:

results_checker("/scratch/bf996/llm_er_std/proj/CTA_CPA_Benchmarks/wotab/llama-ft-contains-10sample.json", skip_duplicates=False)


Total entries: 15040 
 Accuracy: 0.7712 
 Weighted F1: 0.7836 
 Unweighted F1: 0.7741 
 Correct Remap: 677 
 Total Remap: 2102 
 Truncated: 0


## ZS Results

NOTES: ChatGPT is 1-2it/sec, but can be as slow as 2s/it. In high congestion times, can be slower because it drops connection. Estimated cost is 0.002 per query. 

Bloomz varies widely, from 1s / it to 45s / it. Ada-ft is 1-4s / it. Cost was ~$6.00 to tune it, .0016 per query (about the same as ChatGPT). Tested on 100-500 samples.

CHATGPT

~100 SAMPLES, FULL CLASS LIST, 10 CONTEXT: .49

~100 SAMPLES, TRIM CLASS LIST, 10 CONTEXT: .57

Eliminating datasets with low variance (require at least 3 unique entries per validated column), TRIM CLASS LIST, 10 CONTEXT: .66

ADA-FT

5 context samples, require 2 unique non-NaN entries per column: .7

### Other ZS Model Results

In [27]:
model_name = "opt-iml-max-30b"

base_model, tokenizer, template, pt, MAX_LEN = init_model(model_name)

pipe, local_llm, llm_chain = set_pipeline(k=1)

filename = f"{model_name}-zs-sim-10sample-v2.json"

sp = f"/scratch/bf996/llm_er_std/proj/CTA_CPA_Benchmarks/wotab/{filename}"

run_val(model=f"{model_name}-zs", save_path=sp, inputs=test_files, input_df=gt_df_test, label_set=context_labels_small, method=["similarity"], resume=False, sample_size = 10)

results_checker(sp)

Loading checkpoint shards:   0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/7026 [00:00<?, ?it/s]































../aten/src/ATen/native/cuda/Indexing.cu:1141: indexSelectLargeIndex: block: [160,0,0], thread: [64,0,0] Assertion `srcIndex < srcSelectDimSize` failed.
../aten/src/ATen/native/cuda/Indexing.cu:1141: indexSelectLargeIndex: block: [160,0,0], thread: [65,0,0] Assertion `srcIndex < srcSelectDimSize` failed.
../aten/src/ATen/native/cuda/Indexing.cu:1141: indexSelectLargeIndex: block: [160,0,0], thread: [66,0,0] Assertion `srcIndex < srcSelectDimSize` failed.
../aten/src/ATen/native/cuda/Indexing.cu:1141: indexSelectLargeIndex: block: [160,0,0], thread: [67,0,0] Assertion `srcIndex < srcSelectDimSize` failed.
../aten/src/ATen/native/cuda/Indexing.cu:1141: indexSelectLargeIndex: block: [160,0,0], thread: [68,0,0] Assertion `srcIndex < srcSelectDimSize` failed.
../aten/src/ATen/native/cuda/Indexing.cu:1141: indexSelectLargeIndex: block: [160,0,0], thread: [69,0,0] Assertion `srcIndex < srcSelectDimSize` failed.
../aten/src/ATen/native/cuda/Indexing.cu:1141: indexSelectLargeIndex: block: [160,

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


RuntimeError: CUDA error: CUBLAS_STATUS_EXECUTION_FAILED when calling `cublasLtMatmul( ltHandle, computeDesc.descriptor(), &alpha_val, mat1_ptr, Adesc.descriptor(), mat2_ptr, Bdesc.descriptor(), &beta_val, result_ptr, Cdesc.descriptor(), result_ptr, Cdesc.descriptor(), &heuristicResult.algo, workspace.data_ptr(), workspaceSize, at::cuda::getCurrentCUDAStream())`

In [21]:
model_name = "flan-ul2"

base_model, tokenizer, template, pt, MAX_LEN = init_model(model_name)

filename = f"{model_name}-zs-sim-10sample-v4.json"

sp = f"/scratch/bf996/llm_er_std/proj/CTA_CPA_Benchmarks/wotab/{filename}"

run_val(model="flan-ul2-zs", save_path=sp, inputs=test_files, input_df=gt_df_test, label_set=context_labels_small, method=["similarity"], resume=False, sample_size = 10)

Loading checkpoint shards:   0%|          | 0/8 [00:00<?, ?it/s]

  0%|          | 0/7026 [00:00<?, ?it/s]

#### Get Missing Samples

In [20]:
model_name = "flan-ul2"

filename = f"{model_name}-zs-sim-10sample-v4.json"

sp1 = f"/scratch/bf996/llm_er_std/proj/CTA_CPA_Benchmarks/wotab/{filename}"

sp2 = "/scratch/bf996/llm_er_std/proj/CTA_CPA_Benchmarks/wotab/flanxxl-zs-cont+resam-3sample-v2.json"

me = sorted(missing_entries(sp2, sp1))

me

['/scratch/bf996/datasets/sotab/Test/Book_1jour-1jeu.com_September2020_CTA.json.gz_9',
 '/scratch/bf996/datasets/sotab/Test/Book_2d-market.com_September2020_CTA.json.gz_2',
 '/scratch/bf996/datasets/sotab/Test/Book_abebooks.co.uk_September2020_CTA.json.gz_3',
 '/scratch/bf996/datasets/sotab/Test/Book_abebooks.de_September2020_CTA.json.gz_3',
 '/scratch/bf996/datasets/sotab/Test/Book_act1diabetes.org_September2020_CTA.json.gz_2',
 '/scratch/bf996/datasets/sotab/Test/Book_agronews.com.pl_September2020_CTA.json.gz_5',
 '/scratch/bf996/datasets/sotab/Test/Book_allaboutmagazines.co.uk_September2020_CTA.json.gz_1',
 '/scratch/bf996/datasets/sotab/Test/Book_apfel-z.de_September2020_CTA.json.gz_3',
 '/scratch/bf996/datasets/sotab/Test/Book_apgaming.co.uk_September2020_CTA.json.gz_6',
 '/scratch/bf996/datasets/sotab/Test/Book_arlingtonva.us_September2020_CTA.json.gz_3',
 '/scratch/bf996/datasets/sotab/Test/Book_artforum.sk_September2020_CTA.json.gz_5',
 '/scratch/bf996/datasets/sotab/Test/Book_

### FLAN-XXL Main Results and Ablations

#### Hierarchical + Similarity, 5-sample (SOTAB 27-class)

In [26]:
base_model, tokenizer, template, pt, MAX_LEN = init_model("flan-t5-xxl")

filename = "flanxxl-zs-hier+sim-5sample.json"

sp = f"/scratch/bf996/llm_er_std/proj/CTA_CPA_Benchmarks/wotab/{filename}"

run_val(model="topp-zs", save_path=sp, inputs=test_files, input_df=gt_df_test, label_set=context_labels_small, method=["similarity", "hierarchical"], resume=True, sample_size = 5)


  0%|          | 0/7026 [00:00<?, ?it/s]

Original answer Not needed not found in hierarchy
Original answer Add all ingredients to a blender and blend until not found in hierarchy


 Overall Accuracy score was 0.1214 
 Pct Eval: 0.89 

 Example errors: 

Sample Error: 
Context (500 chars):  ['England Men vs Australia Men', 'Brisbane Heat vs Melbourne Renegades', 'Adelaide Strikers Women vs Melbourne Stars Women', 'Brisbane Heat vs Sydney Thunder', 'Sydney Thunder Women vs Brisbane Heat Women']
Label: event || Prediction: text


Sample Error: 
Context (500 chars):  ["['Melbourne Renegades Women', 'Brisbane Heat Women']", "['Hobart Hurricanes Women', 'Brisbane Heat Women']", "['Sydney Thunder Women', 'Brisbane Heat Women']", "['Brisbane Heat Women', 'Sydney Sixers Women']", "['NSW Blues', 'Tasmanian Tigers Men']"]
Label: sportsteam || Prediction: identifierat


Sample Error: 
Context (500 chars):  ['2020-03-24T21:19:52+00:00', '2020-05-29T22:23:07+00:00', '2019-02-24T18:28:08+00:00', '2018-07-12T12:20:47+00:00', '2020-08-06T17:4

#### Hierarchical + Similarity, 3-sample (SOTAB 27-class)

In [None]:
base_model, tokenizer, template, pt, MAX_LEN = init_model("flan-t5-xxl")

filename = "flanxxl-zs-hier+sim-3sample-v2.json"

sp = f"/scratch/bf996/llm_er_std/proj/CTA_CPA_Benchmarks/wotab/{filename}"

run_val(model="topp-zs", save_path=sp, inputs=test_files, input_df=gt_df_test, label_set=context_labels_small, method=["similarity", "hierarchical"], resume=True, sample_size = 3)


Loading checkpoint shards:   0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/7026 [00:00<?, ?it/s]

#### Similarity, 5-sample (SOTAB 27-class)

In [29]:
model_name = "flan-t5-xxl"

base_model, tokenizer, template, pt, MAX_LEN = init_model(model_name)

filename = f"{model_name}-zs-sim-5sample.json"

sp = f"/scratch/bf996/llm_er_std/proj/CTA_CPA_Benchmarks/wotab/{filename}"

run_val(model="topp-zs", save_path=sp, inputs=test_files, input_df=gt_df_test, label_set=context_labels_small, method=["similarity"], resume=True, sample_size = 5)

results_checker(sp)

  0%|          | 0/7026 [00:00<?, ?it/s]



 Overall Accuracy score was 0.3362 
 Pct Eval: 0.89 

 Example errors: 

Sample Error: 
Context (500 chars):  ["['Sydney Sixers', 'Melbourne Stars']", "['Sydney Sixers', 'Brisbane Heat']", "['Adelaide Strikers', 'Brisbane Heat']", "['Sydney Thunder Women', 'Sydney Sixers Women']", "['Hobart Hurricanes Women', 'Sydney Sixers Women']"]
Label: sportsteam || Prediction: organization


Sample Error: 
Context (500 chars):  ['2020-08-06T17:55:51+00:00', '2018-06-05T21:11:53+00:00', '2019-07-11T01:34:36+00:00', '2020-08-06T17:46:22+00:00', '2015-07-16T22:53:25+00:00']
Label: date || Prediction: time


Sample Error: 
Context (500 chars):  ['thehoth', 'Bail Man', 'mbowie', 'thehoth', 'Bail Man']
Label: person || Prediction: creativework


Sample Error: 
Context (500 chars):  ['kg', 'kg', 'kg', 'kg', 'kg']
Label: text || Prediction: weight


Sample Error: 
Context (500 chars):  ["['I have ordered DNA Testing for immigration purposes for my daughter with DNA Diagnostics. They sent me the result 

#### Similarity, 5-Sample + Summ Stats (SOTAB 27-class)

In [None]:
model_name = "flan-t5-xxl"

if curr_model != model_name:
    base_model, tokenizer, template, pt, MAX_LEN = init_model(model_name)

filename = f"{model_name}-zs-sim-5sample-summstats-v2.json"

sp = f"/scratch/bf996/llm_er_std/proj/CTA_CPA_Benchmarks/wotab/{filename}"

run_val(model="topp-zs", save_path=sp, inputs=test_files, input_df=gt_df_test, label_set=context_labels_small, method=["similarity"], resume=True, summ_stats=True, sample_size = 5)

results_checker(sp)


Welcome to bitsandbytes. For bug reports, please submit your error trace to: https://github.com/TimDettmers/bitsandbytes/issues


  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)


CUDA SETUP: CUDA runtime path found: /usr/local/cuda/lib64/libcudart.so
CUDA SETUP: Highest compute capability among GPUs detected: 7.0
CUDA SETUP: Detected CUDA version 113
CUDA SETUP: Loading binary /ext3/miniconda3/lib/python3.9/site-packages/bitsandbytes/libbitsandbytes_cuda113_nocublaslt.so...


Loading checkpoint shards:   0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/7026 [00:00<?, ?it/s]

Token indices sequence length is longer than the specified maximum sequence length for this model (521 > 512). Running this sequence through the model will result in indexing errors


#### Similarity, 5-Sample + Table SRC (SOTAB 27-class)

In [17]:
model_name = "flan-t5-xxl"

if curr_model != model_name:
    base_model, tokenizer, template, pt, MAX_LEN = init_model(model_name)

filename = f"{model_name}-zs-sim-5sample-tablesrc-v2.json"

sp = f"/scratch/bf996/llm_er_std/proj/CTA_CPA_Benchmarks/wotab/{filename}"

run_val(model="topp-zs", save_path=sp, inputs=test_files, input_df=gt_df_test, label_set=context_labels_small, method=["similarity"], resume=True, table_src=True, sample_size = 5)

results_checker(sp)


Welcome to bitsandbytes. For bug reports, please submit your error trace to: https://github.com/TimDettmers/bitsandbytes/issues


  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)


CUDA SETUP: CUDA runtime path found: /usr/local/cuda/lib64/libcudart.so
CUDA SETUP: Highest compute capability among GPUs detected: 8.0
CUDA SETUP: Detected CUDA version 113
CUDA SETUP: Loading binary /ext3/miniconda3/lib/python3.9/site-packages/bitsandbytes/libbitsandbytes_cuda113.so...


Loading checkpoint shards:   0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/7026 [00:00<?, ?it/s]

Token indices sequence length is longer than the specified maximum sequence length for this model (571 > 512). Running this sequence through the model will result in indexing errors




 Overall Accuracy score was 0.2889 
 Pct Eval: 0.89 

 Example errors: 

Sample Error: 
Context (500 chars):  ['SRC: bailmanbailbonds', '2018-06-05T21:11:53+00:00', '2019-02-24T18:28:08+00:00', '2020-03-24T21:27:53+00:00', '2017-12-27T23:17:13+00:00', '2018-09-20T11:56:43+00:00']
Label: date || Prediction: time


Sample Error: 
Context (500 chars):  ['SRC: bailmanbailbonds', 'mbowie', 'Bail Man', 'thehoth', 'mbowie', 'Bail Man']
Label: person || Prediction: creativework


Sample Error: 
Context (500 chars):  ['SRC: gledopto', 'kg', 'kg', 'kg', 'kg', 'kg']
Label: text || Prediction: weight


Sample Error: 
Context (500 chars):  ['SRC: dnatestingchoice', "['It had been 2 years since I’d spoken to my last living grandparent so there was tension right from the start. Fortunately", "['The ‘‘dog ancestor’ report on our rescue dog appears very consistent with the look of and character of our dog. The repor", "['Oils not have asked for more!', 'A highly supportive team worked with us through

#### Similarity, 5-sample + Table src + summ stats (SOTAB 27-class)

In [18]:
# model_name = "flan-t5-xxl"

# if curr_model != model_name:
#     base_model, tokenizer, template, pt, MAX_LEN = init_model(model_name)

filename = f"{model_name}-zs-sim-5sample-summ+tablesrc.json"

sp = f"/scratch/bf996/llm_er_std/proj/CTA_CPA_Benchmarks/wotab/{filename}"

filename = f"{model_name}-zs-sim-5sample-summ+tablesrc.json"
run_val(model="topp-zs", save_path=sp, inputs=test_files, input_df=gt_df_test, label_set=context_labels_small, method=["similarity"], resume=True, table_src=True, summ_stats=True, sample_size = 5)

results_checker(sp)

  0%|          | 0/7026 [00:00<?, ?it/s]



 Overall Accuracy score was 0.2777 
 Pct Eval: 0.89 

 Example errors: 

Sample Error: 
Context (500 chars):  ['SRC: bailmanbailbonds', '2018-06-05T21:11:53+00:00', '2019-02-24T18:28:08+00:00', '2020-03-24T21:27:53+00:00', '2017-12-27T23:17:13+00:00', '2018-09-20T11:56:43+00:00', 'std: 0.0', 'mean: 25.0', 'mode: 25', 'median: 25.0', 'max: 25', 'min: 25', 'rolling-mean-window-4: [0.0]', '']
Label: date || Prediction: time


Sample Error: 
Context (500 chars):  ['SRC: bailmanbailbonds', 'mbowie', 'Bail Man', 'thehoth', 'mbowie', 'Bail Man', 'std: 0.55', 'mean: 7.22', 'mode: 7', 'median: 7.0', 'max: 8', 'min: 6', 'rolling-mean-window-4: [0.0]', '']
Label: person || Prediction: creativework


Sample Error: 
Context (500 chars):  ['SRC: gledopto', 'kg', 'kg', 'kg', 'kg', 'kg', 'std: 0.93', 'mean: 2.59', 'mode: 2', 'median: 2.0', 'max: 4', 'min: 2', 'rolling-mean-window-4: [0.0]', '']
Label: text || Prediction: weight


Sample Error: 
Context (500 chars):  ['SRC: dnatestingchoice', '5', '5

#### Similarity, 5-Sample + Other Columns (SOTAB 27-class)

In [None]:
# model_name = "flan-t5-xxl"

# if curr_model != model_name:
#     base_model, tokenizer, template, pt, MAX_LEN = init_model(model_name)

filename = f"{model_name}-zs-sim-5sample-full+othercol.json"

sp = f"/scratch/bf996/llm_er_std/proj/CTA_CPA_Benchmarks/wotab/{filename}"

run_val(model="topp-zs", save_path=sp, inputs=test_files, input_df=gt_df_test, label_set=context_labels_small, method=["similarity"], resume=True, table_src=True, summ_stats=True, other_col=True, sample_size = 5)

results_checker(sp)

  0%|          | 0/7026 [00:00<?, ?it/s]

#### Contains + Resample, 5-sample (SOTAB 27-class)

In [46]:
base_model, tokenizer, template, pt, MAX_LEN = init_model("flan-t5-xxl")

filename = "flanxxl-zs-cont+resam-5sample-v2.json"

sp = f"/scratch/bf996/llm_er_std/proj/CTA_CPA_Benchmarks/wotab/{filename}"

run_val(model="topp-zs", save_path=sp, inputs=test_files, input_df=gt_df_test, label_set=context_labels_small, method=["ans_contains_gt", "gt_contains_ans", "resample"], resume=True, sample_size = 5)


Loading checkpoint shards:   0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/7026 [00:00<?, ?it/s]

GT label event contains fuzzy label ev: MATCH 

GT label event contains fuzzy label ev: MATCH 

GT label event contains fuzzy label ev: MATCH 

GT label event contains fuzzy label ev: MATCH 

GT label event contains fuzzy label ev: MATCH 

GT label event contains fuzzy label ev: MATCH 

GT label event contains fuzzy label ev: MATCH 

GT label event contains fuzzy label ev: MATCH 



 Overall Accuracy score was 0.3369 
 Pct Eval: 0.89 

 Example errors: 

Sample Error: 
Context (500 chars):  ['Adelaide Strikers Women vs Melbourne Stars Women', 'Brisbane Heat vs Melbourne Renegades', 'Perth Scorchers Women vs Sydney Sixers Women', 'New Zealand Men vs India Men', 'India vs South Africa']
Label: event || Prediction: sportsteam


Sample Error: 
Context (500 chars):  ['thehoth', 'mbowie', 'Bail Man', 'thehoth', 'mbowie']
Label: person || Prediction: creativework


Sample Error: 
Context (500 chars):  ['kg', 'kg', 'kg', 'kg', 'kg']
Label: text || Prediction: weight


Sample Error: 
Context (5

#### Contains and Resample, Sample Size 3 (SOTAB 27-class)

In [None]:
base_model, tokenizer, template, pt, MAX_LEN = init_model("flan-t5-xxl")

filename = "flanxxl-zs-cont+resam-3sample-v2.json"

sp = f"/scratch/bf996/llm_er_std/proj/CTA_CPA_Benchmarks/wotab/{filename}"

run_val(model="topp-zs", save_path=sp, inputs=test_files, input_df=gt_df_test, label_set=context_labels_small, method=["ans_contains_gt", "gt_contains_ans", "resample"], resume=False, sample_size = 3)


Welcome to bitsandbytes. For bug reports, please submit your error trace to: https://github.com/TimDettmers/bitsandbytes/issues


  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)


CUDA SETUP: CUDA runtime path found: /usr/local/cuda/lib64/libcudart.so
CUDA SETUP: Highest compute capability among GPUs detected: 8.0
CUDA SETUP: Detected CUDA version 113
CUDA SETUP: Loading binary /ext3/miniconda3/lib/python3.9/site-packages/bitsandbytes/libbitsandbytes_cuda113.so...


Loading checkpoint shards:   0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/7026 [00:00<?, ?it/s]

Token indices sequence length is longer than the specified maximum sequence length for this model (577 > 512). Running this sequence through the model will result in indexing errors


#### Contains + Resample, Sample Size 10

In [16]:

base_model, tokenizer, template, pt, MAX_LEN = init_model("flan-t5-xxl")

filename = "flanxxl-zs-cont+res-10sample-v2.json"

sp = f"/scratch/bf996/llm_er_std/proj/CTA_CPA_Benchmarks/wotab/{filename}"

run_val(model="topp-zs", save_path=sp, inputs=test_files, input_df=gt_df_test, label_set=context_labels_small, method=["ans_contains_gt", "gt_contains_ans", "resample"], resume=True, sample_size = 10)

results_checker(sp)



Welcome to bitsandbytes. For bug reports, please submit your error trace to: https://github.com/TimDettmers/bitsandbytes/issues


  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)


CUDA SETUP: CUDA runtime path found: /usr/local/cuda/lib64/libcudart.so
CUDA SETUP: Highest compute capability among GPUs detected: 7.0
CUDA SETUP: Detected CUDA version 113
CUDA SETUP: Loading binary /ext3/miniconda3/lib/python3.9/site-packages/bitsandbytes/libbitsandbytes_cuda113_nocublaslt.so...


Loading checkpoint shards:   0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/7026 [00:00<?, ?it/s]

Token indices sequence length is longer than the specified maximum sequence length for this model (572 > 512). Running this sequence through the model will result in indexing errors




 Overall Accuracy score was 0.3422 
 Pct Eval: 0.89 

 Example errors: 

Sample Error: 
Context (500 chars):  ['2019-02-24T18:28:08+00:00', '2020-03-24T21:27:53+00:00', '2020-08-06T17:46:22+00:00', '2018-01-18T02:52:20+00:00', '2017-12-27T23:17:13+00:00', '2019-08-20T21:52:10+00:00', '2018-06-05T21:11:53+00:00', '2018-09-06T09:33:10+00:00', '2019-07-11T01:34:36+00:00', '2020-05-29T22:23:07+00:00']
Label: date || Prediction: time


Sample Error: 
Context (500 chars):  ['kg', 'kg', 'kg', 'kg', 'kg', 'kg', 'kg', 'kg', 'kg', 'kg']
Label: text || Prediction: weight


Sample Error: 
Context (500 chars):  ["['I signed up for the Individual plan, which came with 3 well", "['Misleading advertising – you need a doctor’s note to claim ", "['Well done to Alphabiolabs. We cannot fault their service.',", "['When it comes to versatility, FTDNA is your best option. Th", "['Some time ago I tested the Y Elite 2.1b with them after hav", "['We’re seeing our Collie in a whole new light! The DNA swabb", "

#### Contains, Sample Size 3 (SOTAB 27-class)

In [47]:
base_model, tokenizer, template, pt, MAX_LEN = init_model("flan-t5-xxl")

filename = "flanxxl-zs-cont-3sample.json"

sp = f"/scratch/bf996/llm_er_std/proj/CTA_CPA_Benchmarks/wotab/{filename}"

run_val(model="topp-zs", save_path=sp, inputs=test_files, input_df=gt_df_test, label_set=context_labels_small, method=["ans_contains_gt", "gt_contains_ans"], resume=True, sample_size = 3)

Loading checkpoint shards:   0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/7026 [00:00<?, ?it/s]

GT label event contains fuzzy label ev: MATCH 



 Overall Accuracy score was 0.323 
 Pct Eval: 0.89 

 Example errors: 

Sample Error: 
Context (500 chars):  ['thehoth', 'mbowie', 'Bail Man']
Label: person || Prediction: creativework


Sample Error: 
Context (500 chars):  ['kg', 'kg', 'kg']
Label: text || Prediction: weight


Sample Error: 
Context (500 chars):  ['Huge number of test carried out for what we paid and we got full colours too which just for them would have cost us more than this at other labs. Really glad our dog is clear! Took about 2 and a half weeks to get the results back. Really great at answering our questions on the live chat and I’ve alr', "['I signed up for the Individual plan, which came with 3 wellness reports and unlimited access to everything on the site for a year. I don’t know much about genetics, so at first, I had a bit of trouble understanding the information that was available. Fortunately, all the information I needed to un", "['Smoother than I was ex

In [None]:
filename = "flanxxl-zs-cont-5sample.json"

sp = f"/scratch/bf996/llm_er_std/proj/CTA_CPA_Benchmarks/wotab/{filename}"

run_val(model="topp-zs", save_path=sp, inputs=test_files, input_df=gt_df_test, label_set=context_labels_small, method=["ans_contains_gt", "gt_contains_ans"], resume=True, sample_size = 5)

results_checker(sp)

In [None]:
filename = "flanxxl-zs-cont-10sample.json"

sp = f"/scratch/bf996/llm_er_std/proj/CTA_CPA_Benchmarks/wotab/{filename}"

run_val(model="topp-zs", save_path=sp, inputs=test_files, input_df=gt_df_test, label_set=context_labels_small, method=["ans_contains_gt", "gt_contains_ans"], resume=True, sample_size = 10)

results_checker(sp)

#### Similarity, Sample Size 3 (SOTAB 27-class)

In [48]:
base_model, tokenizer, template, pt, MAX_LEN = init_model("flan-t5-xxl")

filename = "flanxxl-zs-sim-3sample-v2.json"

sp = f"/scratch/bf996/llm_er_std/proj/CTA_CPA_Benchmarks/wotab/{filename}"

run_val(model="topp-zs", save_path=sp, inputs=test_files, input_df=gt_df_test, label_set=context_labels_small, method=["similarity"], resume=True, sample_size = 3)


Loading checkpoint shards:   0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/7026 [00:00<?, ?it/s]



 Overall Accuracy score was 0.3227 
 Pct Eval: 0.89 

 Example errors: 

Sample Error: 
Context (500 chars):  ['thehoth', 'mbowie', 'Bail Man']
Label: person || Prediction: creativework


Sample Error: 
Context (500 chars):  ['kg', 'kg', 'kg']
Label: text || Prediction: weight


Sample Error: 
Context (500 chars):  ['Huge number of test carried out for what we paid and we got full colours too which just for them would have cost us more than this at other labs. Really glad our dog is clear! Took about 2 and a half weeks to get the results back. Really great at answering our questions on the live chat and I’ve alr', "['I signed up for the Individual plan, which came with 3 wellness reports and unlimited access to everything on the site for a year. I don’t know much about genetics, so at first, I had a bit of trouble understanding the information that was available. Fortunately, all the information I needed to un", "['Smoother than I was expecting, thank you Easy!', 'fell over themselve

#### Similarity, Sample Size 7 (SOTAB 27-class)

In [33]:

base_model, tokenizer, template, pt, MAX_LEN = init_model("flan-t5-xxl")

filename = "flanxxl-zs-sim-7sample.json"

sp = f"/scratch/bf996/llm_er_std/proj/CTA_CPA_Benchmarks/wotab/{filename}"

run_val(model="topp-zs", save_path=sp, inputs=test_files, input_df=gt_df_test, label_set=context_labels_small, method=["similarity"], resume=True, sample_size = 7)


  0%|          | 0/7026 [00:00<?, ?it/s]



 Overall Accuracy score was 0.3514 
 Pct Eval: 0.89 

 Example errors: 

Sample Error: 
Context (500 chars):  ['Bail Man', 'thehoth', 'mbowie', 'Bail Man', 'thehoth', 'mbowie', 'Bail Man']
Label: person || Prediction: creativework


Sample Error: 
Context (500 chars):  ['kg', 'kg', 'kg', 'kg', 'kg', 'kg', 'kg']
Label: text || Prediction: weight


Sample Error: 
Context (500 chars):  ['4.8', '5.0', '4.5', '3.8', '4.9', '4.7', '2.8']
Label: number || Prediction: product


Sample Error: 
Context (500 chars):  ["['Very professional at all times. Great customer service!', 'quick turn around and the responsiveness of the customer service was good', 'I telephoned on two occasions before going ahead with the analysis. On both occasions I was very pleased with the knowledge advice and service of the people who a", 'Excellent service they rushed it through for me as bitch was in season and my stud dog was favourite to be used all bit short notice but affinity dna saved the day thank you Deffo 

#### Similarity, Sample Size 10 (SOTAB 27-class)

In [None]:

base_model, tokenizer, template, pt, MAX_LEN = init_model("flan-t5-xxl")

filename = "flanxxl-zs-sim-10sample.json"

sp = f"/scratch/bf996/llm_er_std/proj/CTA_CPA_Benchmarks/wotab/{filename}"

run_val(model="topp-zs", save_path=sp, inputs=test_files, input_df=gt_df_test, label_set=context_labels_small, method=["similarity"], resume=True, sample_size = 10)


#### Similarity, Sample Size 2 (SOTAB 27-class)

In [34]:
filename = "flanxxl-zs-sim-2sample.json"

sp = f"/scratch/bf996/llm_er_std/proj/CTA_CPA_Benchmarks/wotab/{filename}"

run_val(model="topp-zs", save_path=sp, inputs=test_files, input_df=gt_df_test, label_set=context_labels_small, method=["similarity"], resume=True, sample_size = 2)


  0%|          | 0/7026 [00:00<?, ?it/s]



 Overall Accuracy score was 0.3409 
 Pct Eval: 0.89 

 Example errors: 

Sample Error: 
Context (500 chars):  ['2018-06-05T21:11:53+00:00', '2018-01-18T02:52:20+00:00']
Label: date || Prediction: time


Sample Error: 
Context (500 chars):  ['Bail Man', 'thehoth']
Label: person || Prediction: creativework


Sample Error: 
Context (500 chars):  ['kg', 'kg']
Label: text || Prediction: weight


Sample Error: 
Context (500 chars):  ['4.8', '5.0']
Label: number || Prediction: boolean


Sample Error: 
Context (500 chars):  ["['Very professional at all times. Great customer service!', 'quick turn around and the responsiveness of the customer service was good', 'I telephoned on two occasions before going ahead with the analysis. On both occasions I was very pleased with the knowledge advice and service of the people who a", 'Excellent service they rushed it through for me as bitch was in season and my stud dog was favourite to be used all bit short notice but affinity dna saved the day thank 

#### Contains, Sample Size 3 (SOTAB 91-class)

In [None]:
# base_model, tokenizer, template, pt, MAX_LEN = init_model("flan-t5-xxl")

filename = "flanxxl-zs-cont+res-3sample-SOTAB91.json"

sp = f"/scratch/bf996/llm_er_std/proj/CTA_CPA_Benchmarks/wotab/{filename}"

run_val(model="topp-zs", save_path=sp, inputs=test_files, input_df=gt_df_test, label_set=context_labels, method=["ans_contains_gt", "gt_contains_ans", "resample"], resume=True, sample_size = 3)

results_checker(sp)


#### Similarity (D4)

In [94]:
filename = "flanxxl-results-zs-D4-v7.json"

sp = f"/scratch/bf996/llm_er_std/proj/CTA_CPA_Benchmarks/wotab/{filename}"

run_val(model="topp-zs", save_path=sp, inputs=d4_dfs, input_df=gt_df_test, label_set=d4_zs_context_labels, method=["similarity"], resume=False, sample_size = 5)


  0%|          | 0/2000 [00:00<?, ?it/s]



 Overall Accuracy score was 0.611 
 Pct Eval: 1.0 

 Example errors: 

Sample Error: 
Context (500 chars):  ['XAKR', 'X244', 'MAUC', 'KBTA', 'KBRL']
Label: ['school id'] || Prediction: school dbn


Sample Error: 
Context (500 chars):  ['XAYV', 'KBVL', 'K104', 'XAHS', 'QAYP']
Label: ['school id'] || Prediction: school dbn


Sample Error: 
Context (500 chars):  ['Q297', 'KBRL', 'KBWG', 'K054', 'KADG']
Label: ['school id'] || Prediction: school dbn


Sample Error: 
Context (500 chars):  ['K104', 'X294', 'KAXC', 'Q020', 'KBVE']
Label: ['school id'] || Prediction: school dbn


Sample Error: 
Context (500 chars):  ['XAYV', 'XACY', 'K733', 'KBJF', 'RAEW']
Label: ['school id'] || Prediction: school dbn


Sample Error: 
Context (500 chars):  ['X486', 'KCJR', 'K187', 'KBWG', 'KAAS']
Label: ['school id'] || Prediction: school dbn


Sample Error: 
Context (500 chars):  ['X543', 'XAOK', 'QBEG', 'X583', 'Q205']
Label: ['school id'] || Prediction: license plate type


Sample Error: 
Context (500 ch

In [26]:
base_model, tokenizer, template, pt, MAX_LEN = init_model("flan-t5-xxl")

filename = "flanxxl-zs-D4-full-10sample.json"

sp = f"/scratch/bf996/llm_er_std/proj/CTA_CPA_Benchmarks/wotab/{filename}"

run_val(model="topp-zs", save_path=sp, inputs=d4_dfs, input_df=gt_df_test, label_set=d4_zs_context_labels, method=["similarity"], resume=False, sample_size = 10)

Loading checkpoint shards:   0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/2000 [00:00<?, ?it/s]



 Overall Accuracy score was 0.5625 
 Pct Eval: 1.0 

 Example errors: 

Sample Error: 
Context (500 chars):  ['X311', 'XAKZ', 'K785', 'Q505', 'X089', 'QBDD', 'KBJL', 'Q116', 'X254', 'Q284']
Label: ['school id'] || Prediction: license plate type


Sample Error: 
Context (500 chars):  ['KCJZ', 'X022', 'KAFB', 'Q076', 'X403', 'RABN', 'X169', 'QBHX', 'M340', 'K282']
Label: ['school id'] || Prediction: school dbn


Sample Error: 
Context (500 chars):  ['KCHN', 'K351', 'XAIB', 'QAVC', 'QBHW', 'X360', 'K558', 'RAGD', 'X445', 'KCOU']
Label: ['school id'] || Prediction: school dbn


Sample Error: 
Context (500 chars):  ['KBPT', 'KCII', 'M182', 'KAFO', 'RAFS', 'X114', 'X584', 'K209', 'M188', 'K627']
Label: ['school id'] || Prediction: school dbn


Sample Error: 
Context (500 chars):  ['KBGS', 'X355', 'K350', 'M046', 'X080', 'X337', 'KBJH', 'Q104', 'K486', 'QALP']
Label: ['school id'] || Prediction: school dbn


Sample Error: 
Context (500 chars):  ['XAJR', 'Q336', 'Z010', 'M517', 'M519', 'M110

NameError: name 'd_subset' is not defined

In [28]:
results_checker(sp)

1125
2000
Total entries: 2000 
 Correct Pct: 0.5625


### ChatGPT Results

#### Similarity, Sample Size 10 (SOTAB 27-class)

In [114]:
filename = "gpt-3.5_results_test_v12_zeroshot_small.json"

sp = f"/scratch/bf996/llm_er_std/proj/CTA_CPA_Benchmarks/wotab/{filename}"

run_val(model="gpt-3.5", save_path=sp, inputs=test_files, input_df=gt_df_test, label_set=context_labels_small, resume=True, sample_size = 10)


#### Similarity, Sample Size 10 (SOTAB 91-class)

In [None]:
filename = "gpt-3.5_results_test_v12_zeroshot_small.json"

sp = f"/scratch/bf996/llm_er_std/proj/CTA_CPA_Benchmarks/wotab/{filename}"

run_val(model="gpt-3.5", save_path=sp, inputs=test_files, input_df=gt_df_test, label_set=context_labels_full, resume=True, sample_size = 10)

#### Similarity, Sample Size 10 (D4)

In [None]:
filename = "gpt-3.5-sim-10sample-D4-v3.json"

sp = f"/scratch/bf996/llm_er_std/proj/CTA_CPA_Benchmarks/wotab/{filename}"

run_val(model="gpt-3.5", save_path=sp, inputs=d4_dfs, input_df=gt_df_test, label_set=d4_zs_context_labels, method=["similarity", "skip-existing"], resume=True, sample_size = 10)

## FT Results

### LLAMA-CTA

#### Contains + Resample, 5-sample

In [None]:
# #REFERENCE: https://huggingface.co/docs/transformers/main_classes/text_generation#transformers.GenerationConfig
# #https://huggingface.co/docs/transformers/generation_strategies

cur_url = "https://079c124ee590ac75fe.gradio.live"

params = {
    'max_new_tokens': 6,
    'do_sample': True,
    'temperature': 0.2,
    'top_p': 0.8,
    'typical_p': 1,
    'repetition_penalty': 1.3,
    'encoder_repetition_penalty': 1.0,
    'top_k': 0,
    'min_length': 3,
    'no_repeat_ngram_size': 3,
    'num_beams': 1,
    'penalty_alpha': 0,
    'length_penalty': 1,
    'early_stopping': False,
    'seed': rand_seed,
}

filename = "llama-ft-cont+resam-5sample.json"

sp = f"/scratch/bf996/llm_er_std/proj/CTA_CPA_Benchmarks/wotab/{filename}"

run_val(model="llama", save_path=sp, inputs=test_files, input_df=gt_df_test, label_set=context_labels, resume=True, link=f"{cur_url}/run/textgen", full=True, method=["ans_contains_gt", "gt_contains_ans", "resample"], sample_size = 5)


  0%|          | 0/7026 [00:00<?, ?it/s]

Fuzzy label 4integer contains gt label integer: MATCH 

Fuzzy label number orintegerorcostrange contains gt label costrange: MATCH 

GT label occupationalexperiencerequirements contains fuzzy label occupationalexperienc: MATCH 

Fuzzy label educationaloccupationalcredential ( contains gt label educationaloccupationalcredential: MATCH 

Fuzzy label drugstore,categorycode"> contains gt label categorycode: MATCH 

Fuzzy label chatperson contains gt label person: MATCH 

Fuzzy label town orcityorcounty contains gt label city: MATCH 

Fuzzy label verbatimreviewitemlist contains gt label itemlist: MATCH 

Fuzzy label showrooms,buildingname"> contains gt label buildingname: MATCH 

Fuzzy label sportssevents`text contains gt label event: MATCH 

GT label educationaloccupationalcredential contains fuzzy label educationaloccupationalcred: MATCH 

GT label gendertype contains fuzzy label gender: MATCH 

Fuzzy label abouttimetoeat contains gt label time: MATCH 

Fuzzy label product #modelnameorid 

#### Contains + Resample, 3 Sample

In [None]:
# #REFERENCE: https://huggingface.co/docs/transformers/main_classes/text_generation#transformers.GenerationConfig
# #https://huggingface.co/docs/transformers/generation_strategies

cur_url = "https://079c124ee590ac75fe.gradio.live"

params = {
    'max_new_tokens': 6,
    'do_sample': True,
    'temperature': 0.2,
    'top_p': 0.8,
    'typical_p': 1,
    'repetition_penalty': 1.3,
    'encoder_repetition_penalty': 1.0,
    'top_k': 0,
    'min_length': 3,
    'no_repeat_ngram_size': 3,
    'num_beams': 1,
    'penalty_alpha': 0,
    'length_penalty': 1,
    'early_stopping': False,
    'seed': rand_seed,
}

filename = "llama-ft-cont+resam-3sample.json"

sp = f"/scratch/bf996/llm_er_std/proj/CTA_CPA_Benchmarks/wotab/{filename}"

run_val(model="llama", save_path=sp, inputs=test_files, input_df=gt_df_test, label_set=context_labels, resume=True, link=f"{cur_url}/run/textgen", full=True, method=["ans_contains_gt", "gt_contains_ans", "resample"], sample_size = 3)


#### Similarity, 5 Sample

In [None]:
# #REFERENCE: https://huggingface.co/docs/transformers/main_classes/text_generation#transformers.GenerationConfig
# #https://huggingface.co/docs/transformers/generation_strategies

cur_url = "https://079c124ee590ac75fe.gradio.live"

params = {
    'max_new_tokens': 6,
    'do_sample': True,
    'temperature': 0.2,
    'top_p': 0.8,
    'typical_p': 1,
    'repetition_penalty': 1.3,
    'encoder_repetition_penalty': 1.0,
    'top_k': 0,
    'min_length': 3,
    'no_repeat_ngram_size': 3,
    'num_beams': 1,
    'penalty_alpha': 0,
    'length_penalty': 1,
    'early_stopping': False,
    'seed': rand_seed,
}

filename = "llama-ft-sim-5sample.json"

sp = f"/scratch/bf996/llm_er_std/proj/CTA_CPA_Benchmarks/wotab/{filename}"

run_val(model="llama", save_path=sp, inputs=test_files, input_df=gt_df_test, label_set=context_labels, resume=True, link=f"{cur_url}/run/textgen", full=True, method=["similarity"], sample_size = 5)


#### Similarity, 10-Sample

In [67]:
cur_url = "https://799ecc85ff8fb2da5a.gradio.live"

params = {
    'max_new_tokens': 6,
    'do_sample': True,
    'temperature': 0.2,
    'top_p': 0.8,
    'typical_p': 1,
    'repetition_penalty': 1.3,
    'encoder_repetition_penalty': 1.0,
    'top_k': 0,
    'min_length': 3,
    'no_repeat_ngram_size': 3,
    'num_beams': 1,
    'penalty_alpha': 0,
    'length_penalty': 1,
    'early_stopping': False,
    'seed': rand_seed,
}

filename = "llama-ft-sim-10sample.json"

sp = f"/scratch/bf996/llm_er_std/proj/CTA_CPA_Benchmarks/wotab/{filename}"

run_val(model="llama", save_path=sp, inputs=test_files, input_df=gt_df_test, label_set=context_labels, summ_stats=True, table_src=True, link=f"{cur_url}/run/textgen", method=["similarity"], sample_size = 10)


  0%|          | 0/7026 [00:00<?, ?it/s]

Total entries: 15040 
 Accuracy: 0.779 
 Weighted F1: 0.776 
 Unweighted F1: 0.7526 
 Correct Remap: 794 
 Total Remap: 2102 
 Truncated: 0


#### Similarity, 3-Sample

In [68]:
cur_url = "https://799ecc85ff8fb2da5a.gradio.live"

params = {
    'max_new_tokens': 6,
    'do_sample': True,
    'temperature': 0.2,
    'top_p': 0.8,
    'typical_p': 1,
    'repetition_penalty': 1.3,
    'encoder_repetition_penalty': 1.0,
    'top_k': 0,
    'min_length': 3,
    'no_repeat_ngram_size': 3,
    'num_beams': 1,
    'penalty_alpha': 0,
    'length_penalty': 1,
    'early_stopping': False,
    'seed': rand_seed,
}

filename = "llama-ft-sim-3sample.json"

sp = f"/scratch/bf996/llm_er_std/proj/CTA_CPA_Benchmarks/wotab/{filename}"

run_val(model="llama", save_path=sp, inputs=test_files, input_df=gt_df_test, label_set=context_labels, summ_stats=True, table_src=True, link=f"{cur_url}/run/textgen", method=["similarity"], sample_size = 3)


  0%|          | 0/7026 [00:00<?, ?it/s]

Total entries: 15040 
 Accuracy: 0.7316 
 Weighted F1: 0.7296 
 Unweighted F1: 0.699 
 Correct Remap: 768 
 Total Remap: 2490 
 Truncated: 0


#### Contains, 3-Sample, 5-Sample, 10-Sample

In [None]:
cur_url = "https://799ecc85ff8fb2da5a.gradio.live"

params = {
    'max_new_tokens': 6,
    'do_sample': True,
    'temperature': 0.2,
    'top_p': 0.8,
    'typical_p': 1,
    'repetition_penalty': 1.3,
    'encoder_repetition_penalty': 1.0,
    'top_k': 0,
    'min_length': 3,
    'no_repeat_ngram_size': 3,
    'num_beams': 1,
    'penalty_alpha': 0,
    'length_penalty': 1,
    'early_stopping': False,
    'seed': rand_seed,
}

filename = "llama-ft-contains-3sample.json"

sp = f"/scratch/bf996/llm_er_std/proj/CTA_CPA_Benchmarks/wotab/{filename}"

run_val(model="llama", save_path=sp, inputs=test_files, input_df=gt_df_test, label_set=context_labels, summ_stats=True, table_src=True, link=f"{cur_url}/run/textgen", method=["ans_contains_gt", "gt_contains_ans"], sample_size = 3)


  0%|          | 0/7026 [00:00<?, ?it/s]

Fuzzy label 3dmodelnameorid contains gt label modelnameorid: MATCH 

Fuzzy label beautifulcreativework contains gt label creativework: MATCH 

GT label occupationalexperiencerequirements contains fuzzy label occupationalexperienc: MATCH 

Fuzzy label phcategory contains gt label category: MATCH 

Fuzzy label territoryorganization contains gt label organization: MATCH 

GT label educationaloccupationalcredential contains fuzzy label educationaloccupationalcred: MATCH 

GT label gendertype contains fuzzy label gender: MATCH 

Fuzzy label timedelta contains gt label time: MATCH 

Fuzzy label ndrproduct contains gt label product: MATCH 

Fuzzy label 3dmodelnameorid contains gt label modelnameorid: MATCH 

Fuzzy label 12 inches distance contains gt label distance: MATCH 

Fuzzy label 1integer contains gt label integer: MATCH 

Fuzzy label 1integer contains gt label integer: MATCH 

Fuzzy label 2integer contains gt label integer: MATCH 

Fuzzy label gpcountyorstate contains gt label countyor

GT label unitcode contains fuzzy label tcode: MATCH 

Fuzzy label amitperson contains gt label person: MATCH 

Fuzzy label midentifierat contains gt label identifierat: MATCH 

Fuzzy label 2text contains gt label text: MATCH 

Fuzzy label energycalories contains gt label calories: MATCH 

Fuzzy label universityeducationalorganization contains gt label educationalorganization: MATCH 

Fuzzy label usaudience contains gt label audience: MATCH 

Fuzzy label ['streetaddress contains gt label streetaddress: MATCH 

Fuzzy label ['company contains gt label company: MATCH 

Fuzzy label ['company contains gt label company: MATCH 

Fuzzy label lipton® product contains gt label product: MATCH 

Fuzzy label asthonbuildingname contains gt label buildingname: MATCH 

Fuzzy label sportssevent contains gt label event: MATCH 

Fuzzy label (faxnumber contains gt label faxnumber: MATCH 

GT label gendertype contains fuzzy label gender: MATCH 

GT label educationaloccupationalcredential contains fuzzy labe

Fuzzy label 1800currency contains gt label currency: MATCH 

Fuzzy label garde_faxnumber contains gt label faxnumber: MATCH 

Fuzzy label kunitcode contains gt label unitcode: MATCH 

Fuzzy label 8category contains gt label category: MATCH 

Fuzzy label 8category contains gt label category: MATCH 

Fuzzy label 2text contains gt label text: MATCH 

Fuzzy label gigacompany contains gt label company: MATCH 

Fuzzy label 6integer contains gt label integer: MATCH 

GT label educationaloccupationalcredential contains fuzzy label educationaloccupationalcred: MATCH 

Fuzzy label msidentifierat contains gt label identifierat: MATCH 

Fuzzy label 2personjobposting contains gt label jobposting: MATCH 

Fuzzy label 5th saturday event contains gt label event: MATCH 

Fuzzy label (telephone,organization contains gt label organization: MATCH 

Fuzzy label 2price contains gt label price: MATCH 

GT label educationaloccupationalcredential contains fuzzy label educationaloccupationalcred: MATCH 

Fuzzy 

Fuzzy label unitcode** contains gt label unitcode: MATCH 

Fuzzy label myrtha restaurant contains gt label restaurant: MATCH 

Fuzzy label 1postalcode contains gt label postalcode: MATCH 

Fuzzy label 29.price contains gt label price: MATCH 

GT label occupationalexperiencerequirements contains fuzzy label occupationalexperienc: MATCH 

Fuzzy label gg/rating contains gt label rating: MATCH 

Fuzzy label 2quantityrange contains gt label quantityrange: MATCH 

Fuzzy label us,country contains gt label country: MATCH 

Fuzzy label theperson contains gt label person: MATCH 

Fuzzy label acompany contains gt label company: MATCH 

Fuzzy label ttvepisode contains gt label tvepisode: MATCH 

Fuzzy label 6postalcode contains gt label postalcode: MATCH 

Fuzzy label flcountyorstate contains gt label countyorstate: MATCH 

Fuzzy label namauto (organization) contains gt label organization: MATCH 

Fuzzy label 3postalcode contains gt label postalcode: MATCH 

Fuzzy label ndtphotourl contains gt lab

Fuzzy label unitcode** contains gt label unitcode: MATCH 

Fuzzy label 「event」 contains gt label event: MATCH 

Fuzzy label unnumberedinteger contains gt label integer: MATCH 

Fuzzy label unnumberedinteger contains gt label integer: MATCH 

Fuzzy label 2integer contains gt label integer: MATCH 

Fuzzy label buildingname* contains gt label buildingname: MATCH 

Fuzzy label fordbuildingname contains gt label buildingname: MATCH 

GT label educationaloccupationalcredential contains fuzzy label educationaloccupationalcred: MATCH 

Fuzzy label -integer contains gt label integer: MATCH 

Fuzzy label ttvepisode contains gt label tvepisode: MATCH 

Fuzzy label 2statustype contains gt label statustype: MATCH 

Fuzzy label kfcrestaurant contains gt label restaurant: MATCH 

Fuzzy label alhotel contains gt label hotel: MATCH 

GT label gendertype contains fuzzy label gender: MATCH 

Fuzzy label esportsevent contains gt label sportsevent: MATCH 

Fuzzy label hotels contains gt label hotel: MATCH 

Fuzzy label ncountry contains gt label country: MATCH 

Fuzzy label 1faxnumber contains gt label faxnumber: MATCH 

Fuzzy label 4integer contains gt label integer: MATCH 

GT label occupationalexperiencerequirements contains fuzzy label occupationalexperienc: MATCH 

Fuzzy label gendergendertype contains gt label gendertype: MATCH 

Fuzzy label 2musicalbum contains gt label musicalbum: MATCH 

Fuzzy label 1buildingname contains gt label buildingname: MATCH 

Fuzzy label educationorganization contains gt label organization: MATCH 

Fuzzy label 5organization contains gt label organization: MATCH 

Fuzzy label screative,person contains gt label person: MATCH 

Fuzzy label cardinalsfaxnumber contains gt label faxnumber: MATCH 

Fuzzy label uscurrency contains gt label currency: MATCH 

Fuzzy label 5text contains gt label text: MATCH 

Fuzzy label 2quantityrange contains gt label quantityrange: MATCH 

Fuzzy label lcompany contains gt label company: MATCH 

Fuzzy label 0fmodelnameorid conta

GT label educationaloccupationalcredential contains fuzzy label educationaloccupationalcred: MATCH 

GT label educationaloccupationalcredential contains fuzzy label educationaloccupationalcred: MATCH 

GT label occupationalexperiencerequirements contains fuzzy label occupationalexperienc: MATCH 

Fuzzy label jidentifierat contains gt label identifierat: MATCH 

Fuzzy label ethomusicartistat contains gt label musicartistat: MATCH 

Fuzzy label 7seriescreative contains gt label seriescreative: MATCH 

Fuzzy label watchtvepisode contains gt label tvepisode: MATCH 

Fuzzy label gendergendertype contains gt label gendertype: MATCH 

Fuzzy label 2buildingname contains gt label buildingname: MATCH 

Fuzzy label ggendertype contains gt label gendertype: MATCH 

Fuzzy label emptymonetaryamount contains gt label monetaryamount: MATCH 

Fuzzy label 4.review contains gt label review: MATCH 

Fuzzy label thrillist review of contains gt label review: MATCH 

Fuzzy label flatbreadorganization contain

Fuzzy label cityorganization contains gt label organization: MATCH 

Fuzzy label organization** contains gt label organization: MATCH 

GT label occupationalexperiencerequirements contains fuzzy label occupationalexperienc: MATCH 

Fuzzy label kunitcode contains gt label unitcode: MATCH 

GT label educationaloccupationalcredential contains fuzzy label ia: MATCH 

Fuzzy label massage contains gt label mass: MATCH 

Fuzzy label kunitcode contains gt label unitcode: MATCH 

Fuzzy label cunitcode contains gt label unitcode: MATCH 

Fuzzy label unitcode** contains gt label unitcode: MATCH 

GT label gendertype contains fuzzy label gender: MATCH 

Fuzzy label nan s. russelperson contains gt label person: MATCH 

Fuzzy label ebook contains gt label book: MATCH 

GT label occupationalexperiencerequirements contains fuzzy label occupationalexperienc: MATCH 

Fuzzy label liveperson contains gt label person: MATCH 

Fuzzy label chamberorganization contains gt label organization: MATCH 

GT label 

Fuzzy label commentwebhtmlaction contains gt label webhtmlaction: MATCH 

Fuzzy label 5organization contains gt label organization: MATCH 

Fuzzy label rrestaurant contains gt label restaurant: MATCH 

Fuzzy label ppostalcode contains gt label postalcode: MATCH 

Fuzzy label commentwebhtmlaction contains gt label webhtmlaction: MATCH 

Fuzzy label lunitcode contains gt label unitcode: MATCH 

Fuzzy label buildingname** contains gt label buildingname: MATCH 

Fuzzy label ttvepisode contains gt label tvepisode: MATCH 

Fuzzy label commentwebhtmlaction contains gt label webhtmlaction: MATCH 

Fuzzy label 9/price contains gt label price: MATCH 

Fuzzy label 8integer contains gt label integer: MATCH 

Fuzzy label 2company contains gt label company: MATCH 

GT label deliverymethod contains fuzzy label h: MATCH 

Fuzzy label 6text contains gt label text: MATCH 

Fuzzy label kunitcode contains gt label unitcode: MATCH 

Fuzzy label unitcode** contains gt label unitcode: MATCH 

Fuzzy label dtf

In [None]:
cur_url = "https://799ecc85ff8fb2da5a.gradio.live"

params = {
    'max_new_tokens': 6,
    'do_sample': True,
    'temperature': 0.2,
    'top_p': 0.8,
    'typical_p': 1,
    'repetition_penalty': 1.3,
    'encoder_repetition_penalty': 1.0,
    'top_k': 0,
    'min_length': 3,
    'no_repeat_ngram_size': 3,
    'num_beams': 1,
    'penalty_alpha': 0,
    'length_penalty': 1,
    'early_stopping': False,
    'seed': rand_seed,
}

filename = "llama-ft-contains-5sample.json"

sp = f"/scratch/bf996/llm_er_std/proj/CTA_CPA_Benchmarks/wotab/{filename}"

run_val(model="llama", save_path=sp, inputs=test_files, input_df=gt_df_test, label_set=context_labels, summ_stats=True, table_src=True, link=f"{cur_url}/run/textgen", method=["ans_contains_gt", "gt_contains_ans"], sample_size = 5)


In [None]:
cur_url = "https://799ecc85ff8fb2da5a.gradio.live"

params = {
    'max_new_tokens': 6,
    'do_sample': True,
    'temperature': 0.2,
    'top_p': 0.8,
    'typical_p': 1,
    'repetition_penalty': 1.3,
    'encoder_repetition_penalty': 1.0,
    'top_k': 0,
    'min_length': 3,
    'no_repeat_ngram_size': 3,
    'num_beams': 1,
    'penalty_alpha': 0,
    'length_penalty': 1,
    'early_stopping': False,
    'seed': rand_seed,
}

filename = "llama-ft-contains-10sample.json"

sp = f"/scratch/bf996/llm_er_std/proj/CTA_CPA_Benchmarks/wotab/{filename}"

run_val(model="llama", save_path=sp, inputs=test_files, input_df=gt_df_test, label_set=context_labels, summ_stats=True, table_src=True, link=f"{cur_url}/run/textgen", method=["ans_contains_gt", "gt_contains_ans"], sample_size = 10)


#### 10-Sample, Other Columns

In [21]:
# #REFERENCE: https://huggingface.co/docs/transformers/main_classes/text_generation#transformers.GenerationConfig
# #https://huggingface.co/docs/transformers/generation_strategies

cur_url = "https://e5bff7cbf5b4d1925d.gradio.live"

params = {
    'max_new_tokens': 6,
    'do_sample': True,
    'temperature': 0.2,
    'top_p': 0.8,
    'typical_p': 1,
    'repetition_penalty': 1.3,
    'encoder_repetition_penalty': 1.0,
    'top_k': 0,
    'min_length': 3,
    'no_repeat_ngram_size': 3,
    'num_beams': 1,
    'penalty_alpha': 0,
    'length_penalty': 1,
    'early_stopping': False,
    'seed': rand_seed,
}

filename = "llama-ft-cont+resam-fullplusothercol-10sample-v3.json"

sp = f"/scratch/bf996/llm_er_std/proj/CTA_CPA_Benchmarks/wotab/{filename}"

run_val(model="llama", save_path=sp, inputs=test_files, input_df=gt_df_test, label_set=context_labels, resume=False, link=f"{cur_url}/run/textgen", summ_stats=True, table_src=True, other_col=True, method=["ans_contains_gt", "gt_contains_ans", "resample"], sample_size = 10)

results_checker(sp, skip_duplicates=False)

  0%|          | 0/7026 [00:00<?, ?it/s]

Fuzzy label sportsrecruitmentmovieurl contains gt label movie: MATCH 

Fuzzy label testinginteger | chooseat // contains gt label integer: MATCH 

Fuzzy label (faxnumber)[f contains gt label faxnumber: MATCH 

GT label occupationalexperiencerequirements contains fuzzy label occupationalexperienc: MATCH 

Fuzzy label ['offer contains gt label offer: MATCH 

Fuzzy label sportsrecording ///organization contains gt label organization: MATCH 

Fuzzy label postaddressesfromemail address contains gt label email: MATCH 

Fuzzy label musicalbum* contains gt label musicalbum: MATCH 

Fuzzy label entertainmentthingyoridat contains gt label thing: MATCH 

Fuzzy label 0modelnameorid contains gt label modelnameorid: MATCH 



TypeError: unhashable type: 'Series'

#### 5-Sample, no table src

In [35]:
# #REFERENCE: https://huggingface.co/docs/transformers/main_classes/text_generation#transformers.GenerationConfig
# #https://huggingface.co/docs/transformers/generation_strategies

cur_url = "https://079c124ee590ac75fe.gradio.live"

params = {
    'max_new_tokens': 6,
    'do_sample': True,
    'temperature': 0.2,
    'top_p': 0.8,
    'typical_p': 1,
    'repetition_penalty': 1.3,
    'encoder_repetition_penalty': 1.0,
    'top_k': 0,
    'min_length': 3,
    'no_repeat_ngram_size': 3,
    'num_beams': 1,
    'penalty_alpha': 0,
    'length_penalty': 1,
    'early_stopping': False,
    'seed': rand_seed,
}

filename = "llama-ft-cont+resam-summstatsonly-5sample.json"

sp = f"/scratch/bf996/llm_er_std/proj/CTA_CPA_Benchmarks/wotab/{filename}"

run_val(model="llama", save_path=sp, inputs=test_files, input_df=gt_df_test, label_set=context_labels, resume=True, link=f"{cur_url}/run/textgen", summ_stats=True, table_src=False, other_col=False, method=["ans_contains_gt", "gt_contains_ans", "resample", "skip-existing"], sample_size = 5)

results_checker(sp)

  0%|          | 0/7026 [00:00<?, ?it/s]

Fuzzy label sportsevent #sport contains gt label sportsevent: MATCH 

Fuzzy label instructionitemlist`text`. contains gt label itemlist: MATCH 

GT label occupationalexperiencerequirements contains fuzzy label occupationalexperienc: MATCH 

GT label educationaloccupationalcredential contains fuzzy label educationaloccupational: MATCH 

Fuzzy label pharmacistcategorycode contains gt label categorycode: MATCH 

Fuzzy label ggendertype contains gt label gendertype: MATCH 

Fuzzy label scity contains gt label city: MATCH 

Fuzzy label softwareorganization description ern contains gt label organization: MATCH 

Fuzzy label sportstext contains gt label text: MATCH 

Fuzzy label aircreativeworkhours contains gt label creativework: MATCH 

Fuzzy label territoryorganization contains gt label organization: MATCH 

GT label gendertype contains fuzzy label gender: MATCH 

Fuzzy label universityorganization contains gt label organization: MATCH 

Fuzzy label utentypeitemlist, contains gt label item

Fuzzy label 8price contains gt label price: MATCH 

Fuzzy label ggendertype contains gt label gendertype: MATCH 

Fuzzy label editorurl contains gt label url: MATCH 

Fuzzy label venue,company orbuilding contains gt label company: MATCH 

Fuzzy label openopenopeninghours contains gt label openinghours: MATCH 

Fuzzy label dyppeakspersonal contains gt label person: MATCH 

Fuzzy label attendenum** contains gt label attendenum: MATCH 

Fuzzy label _instruction*thingy contains gt label thing: MATCH 

Fuzzy label createorganization,organize contains gt label organization: MATCH 

Fuzzy label eproduct contains gt label product: MATCH 

Fuzzy label instructionat)offeridentifier contains gt label offer: MATCH 

Fuzzy label currency** contains gt label currency: MATCH 

Fuzzy label ggendertype contains gt label gendertype: MATCH 

Fuzzy label 2buildingname contains gt label buildingname: MATCH 

Fuzzy label attendanceoffer contains gt label offer: MATCH 

Fuzzy label 50/price contains gt label

Fuzzy label baby event contains gt label event: MATCH 

Fuzzy label instructionitemlist`gend contains gt label itemlist: MATCH 

Fuzzy label kathperson contains gt label person: MATCH 

Fuzzy label statustype**************** contains gt label statustype: MATCH 

Fuzzy label ********attendenum contains gt label attendenum: MATCH 

Fuzzy label 8integer contains gt label integer: MATCH 

GT label gendertype contains fuzzy label gender: MATCH 

Fuzzy label icedcreativeproductivity contains gt label product: MATCH 

GT label educationaloccupationalcredential contains fuzzy label educationaloccupationalcred: MATCH 

Fuzzy label logoffer contains gt label offer: MATCH 

Fuzzy label 8integer contains gt label integer: MATCH 

Fuzzy label retextor with monet contains gt label text: MATCH 

Fuzzy label fourorganization .text field contains gt label organization: MATCH 

Fuzzy label sedanmodelnameorid contains gt label modelnameorid: MATCH 

Fuzzy label typicalagerange, typical contains gt label 

Fuzzy label motion picture,city the city contains gt label city: MATCH 

Fuzzy label restaurant chain,r contains gt label restaurant: MATCH 

Fuzzy label gümüsehotel contains gt label hotel: MATCH 

Fuzzy label url link format to user content contains gt label url: MATCH 

Fuzzy label brotextylebrandingorgan contains gt label brand: MATCH 

Fuzzy label jidentifierat contains gt label identifierat: MATCH 

Fuzzy label downloadsmodelnameorid contains gt label modelnameorid: MATCH 

Fuzzy label componentat)url#identifier contains gt label url: MATCH 

Fuzzy label 3integer contains gt label integer: MATCH 

Fuzzy label educationorganization contains gt label organization: MATCH 

Fuzzy label 2organization contains gt label organization: MATCH 

Fuzzy label stdcarrierisperson contains gt label person: MATCH 

Fuzzy label dayofweek*** contains gt label dayofweek: MATCH 

Fuzzy label tiglfaxnumber contains gt label faxnumber: MATCH 

Fuzzy label company   |buildingname| contains gt label buil

Fuzzy label openopenopeninghours contains gt label openinghours: MATCH 

Fuzzy label paymentaccepted** contains gt label paymentaccepted: MATCH 

Fuzzy label ggendertype contains gt label gendertype: MATCH 

Fuzzy label currency*****************integr contains gt label currency: MATCH 

GT label educationaloccupationalcredential contains fuzzy label educationaloccupationalcred: MATCH 

GT label educationaloccupationalcredential contains fuzzy label educationaloccupationalcred: MATCH 

Fuzzy label organization****************** contains gt label organization: MATCH 

Fuzzy label eventsurl contains gt label event: MATCH 

Fuzzy label departmentalorganization the department contains gt label organization: MATCH 

Fuzzy label parkbuildingname contains gt label buildingname: MATCH 

Fuzzy label joysrredbook building contains gt label book: MATCH 

Fuzzy label sportsorganization contains gt label organization: MATCH 

Fuzzy label testsportsevent contains gt label sportsevent: MATCH 

Fuzzy l

Fuzzy label 588integer contains gt label integer: MATCH 

Fuzzy label statustype******** contains gt label statustype: MATCH 

Fuzzy label instructionalintegeratypical contains gt label integer: MATCH 

Fuzzy label kbuildingnameeventualite contains gt label buildingname: MATCH 

Fuzzy label dorganicbrand contains gt label brand: MATCH 

Fuzzy label prixofferitemcondition // contains gt label offeritemcondition: MATCH 

Fuzzy label openopenopeninghours contains gt label openinghours: MATCH 

Fuzzy label men and women healthcategory, contains gt label category: MATCH 

Fuzzy label 39integer contains gt label integer: MATCH 

Fuzzy label pcountyorstate contains gt label countyorstate: MATCH 

Fuzzy label (webhtmlaction) contains gt label webhtmlaction: MATCH 

Fuzzy label input:integer contains gt label integer: MATCH 

Fuzzy label openopenopeninghours contains gt label openinghours: MATCH 

Fuzzy label positionalcodecs#integer contains gt label integer: MATCH 

Fuzzy label sportssevents 

Fuzzy label number   instruciton select contains gt label number: MATCH 

Fuzzy label workhours*** contains gt label workhours: MATCH 

Fuzzy label fourpersonorganization -music contains gt label organization: MATCH 

GT label occupationalexperiencerequirements contains fuzzy label occupationalexperienc: MATCH 

Fuzzy label dbuildingname contains gt label buildingname: MATCH 

Fuzzy label **attendenum* // contains gt label attendenum: MATCH 

Fuzzy label 2typicalcostrange contains gt label costrange: MATCH 

Fuzzy label wapellacity contains gt label city: MATCH 

Fuzzy label wcity contains gt label city: MATCH 

Fuzzy label instructionat)offeridentifier contains gt label offer: MATCH 

Fuzzy label _text **eventually******** contains gt label event: MATCH 

Fuzzy label city instabilityofbuildingname contains gt label buildingname: MATCH 

Fuzzy label unitcode*** contains gt label unitcode: MATCH 

Fuzzy label sportsrecruitment#text contains gt label text: MATCH 

Fuzzy label dproduct,th

Fuzzy label 2text contains gt label text: MATCH 

Fuzzy label ggendertype contains gt label gendertype: MATCH 

GT label educationaloccupationalcredential contains fuzzy label educationaloccupationalcred: MATCH 

Fuzzy label eicmodelnameorid contains gt label modelnameorid: MATCH 

Fuzzy label descriptiontext contains gt label text: MATCH 

Fuzzy label 2integer contains gt label integer: MATCH 

Fuzzy label availabilityofitem*** contains gt label availabilityofitem: MATCH 

Fuzzy label csvidentifierat contains gt label identifierat: MATCH 

Fuzzy label instructionat) #integerinst contains gt label integer: MATCH 

Fuzzy label *number******************costrange contains gt label costrange: MATCH 

Fuzzy label itemlist***********/inst contains gt label itemlist: MATCH 

Fuzzy label inidentifierat contains gt label identifierat: MATCH 

Fuzzy label openopenopeninghours contains gt label openinghours: MATCH 

Fuzzy label **currency contains gt label currency: MATCH 

Fuzzy label instructio

Fuzzy label openopenopeninghours contains gt label openinghours: MATCH 

Fuzzy label instructioncode :postalcode contains gt label postalcode: MATCH 

Fuzzy label statustype**************** contains gt label statustype: MATCH 

Fuzzy label attendenum* attende contains gt label attendenum: MATCH 

Fuzzy label numberofintegeroratmodel contains gt label integer: MATCH 

Fuzzy label selfidentifierat contains gt label identifierat: MATCH 

Fuzzy label photourl,building contains gt label photourl: MATCH 

Fuzzy label sidentifiernameap contains gt label identifiernameap: MATCH 

Fuzzy label instructionat)integeroffer contains gt label integer: MATCH 

Fuzzy label dp sportstext contains gt label text: MATCH 

Fuzzy label openopenopeninghours contains gt label openinghours: MATCH 

Fuzzy label ❘faxnumber contains gt label faxnumber: MATCH 

Fuzzy label statustype**************** contains gt label statustype: MATCH 

Fuzzy label attendenum* attende contains gt label attendenum: MATCH 

Fuzzy lab

Fuzzy label sportssevent contains gt label event: MATCH 

Fuzzy label statustype**************** contains gt label statustype: MATCH 

Fuzzy label **attendenum*atten contains gt label attendenum: MATCH 

Fuzzy label sperson contains gt label person: MATCH 

Fuzzy label 6integer contains gt label integer: MATCH 

Fuzzy label 4integer contains gt label integer: MATCH 

GT label occupationalexperiencerequirements contains fuzzy label occupationalexperienc: MATCH 

Fuzzy label attendenum** attende contains gt label attendenum: MATCH 

Fuzzy label openopenopeninghours contains gt label openinghours: MATCH 

Fuzzy label instructionintegerat[thing]( contains gt label integer: MATCH 

Fuzzy label person or persons unknown to le contains gt label person: MATCH 

Fuzzy label ggendertype contains gt label gendertype: MATCH 

Fuzzy label ggendertype contains gt label gendertype: MATCH 

Fuzzy label number**************** contains gt label number: MATCH 

Fuzzy label *number******************costra

Fuzzy label the musicartistat facebook contains gt label musicartistat: MATCH 

Fuzzy label dts_eventlisting contains gt label event: MATCH 

GT label educationaloccupationalcredential contains fuzzy label educationaloccupationalcred: MATCH 

Fuzzy label *number**************** contains gt label number: MATCH 

Fuzzy label itemlist*******************/inst contains gt label itemlist: MATCH 

Fuzzy label ['product' contains gt label product: MATCH 

Fuzzy label realperson contains gt label person: MATCH 

Fuzzy label free entry ticket required /price contains gt label price: MATCH 

Fuzzy label attendenum* attende contains gt label attendenum: MATCH 

Fuzzy label ********date contains gt label date: MATCH 

Fuzzy label conorganization with contact information contains gt label organization: MATCH 

Fuzzy label ggendertype contains gt label gendertype: MATCH 

GT label occupationalexperiencerequirements contains fuzzy label experiencerequirements: MATCH 

Fuzzy label commentwebhtmlaction 

Fuzzy label plaything contains gt label thing: MATCH 

Fuzzy label `company contains gt label company: MATCH 

Fuzzy label person   cmart on mar contains gt label person: MATCH 

Fuzzy label paymentaccepted** contains gt label paymentaccepted: MATCH 

Fuzzy label 3personsexmovie contains gt label person: MATCH 

Fuzzy label 5modelnameorid contains gt label modelnameorid: MATCH 

Fuzzy label ********offeritemcondition contains gt label offeritemcondition: MATCH 

Fuzzy label dishmanbuildingname contains gt label buildingname: MATCH 

Fuzzy label openopenopeninghours contains gt label openinghours: MATCH 

Fuzzy label advancedbook contains gt label book: MATCH 

Fuzzy label */attendenum* attend contains gt label attendenum: MATCH 

Fuzzy label statustype***/stat contains gt label statustype: MATCH 

Fuzzy label 2text contains gt label text: MATCH 

Fuzzy label openopenopeninghours contains gt label openinghours: MATCH 

Fuzzy label foreignrestaurant contains gt label restaurant: MATCH 



Fuzzy label **itemlist*inte contains gt label itemlist: MATCH 

Fuzzy label designworkshopbranding // contains gt label brand: MATCH 

Fuzzy label 2integer contains gt label integer: MATCH 

Fuzzy label number*****************integr contains gt label number: MATCH 

Fuzzy label sycamores (buildingname contains gt label buildingname: MATCH 

Fuzzy label currency******integr contains gt label currency: MATCH 

Fuzzy label wordpress,textbookcre contains gt label text: MATCH 

Fuzzy label attendenum** attende contains gt label attendenum: MATCH 

GT label educationaloccupationalcredential contains fuzzy label educationaloccupationalcred: MATCH 

GT label occupationalexperiencerequirements contains fuzzy label occupationalexperienc: MATCH 

GT label educationaloccupationalcredential contains fuzzy label educationaloccupationalcred: MATCH 

Fuzzy label nightlife, restaurant contains gt label restaurant: MATCH 

Fuzzy label nightclub,company orbrand contains gt label company: MATCH 

Fuzzy la

Fuzzy label proteins,masslessweight contains gt label weight: MATCH 

Fuzzy label fibermass contains gt label mass: MATCH 

Fuzzy label 2quantityrange contains gt label quantityrange: MATCH 

Fuzzy label ggendertype contains gt label gendertype: MATCH 

Fuzzy label distance*** contains gt label distance: MATCH 

Fuzzy label thebuildingname contains gt label buildingname: MATCH 

Fuzzy label sportsseventsthingorgan contains gt label thing: MATCH 

Fuzzy label gamecreativeworkshop@ contains gt label creativework: MATCH 

Fuzzy label 8integer contains gt label integer: MATCH 

Fuzzy label 16integer contains gt label integer: MATCH 

Fuzzy label *number contains gt label number: MATCH 

Fuzzy label castoreumbranding // contains gt label brand: MATCH 

Fuzzy label streetaddress)city,count contains gt label streetaddress: MATCH 

Fuzzy label attendenum** attende contains gt label attendenum: MATCH 

Fuzzy label variousmusicalbum contains gt label musicalbum: MATCH 

Fuzzy label rembrandt' ho

Fuzzy label ggendertype contains gt label gendertype: MATCH 

Fuzzy label *formatofbook contains gt label formatofbook: MATCH 

Fuzzy label diversifiedorganization contains gt label organization: MATCH 

Fuzzy label rrestaurant contains gt label restaurant: MATCH 

Fuzzy label ttvepisode contains gt label tvepisode: MATCH 

Fuzzy label casualorganization contains gt label organization: MATCH 

Fuzzy label integer************* contains gt label integer: MATCH 

Fuzzy label number*******************integr contains gt label number: MATCH 

Fuzzy label numberofferitemconditioned contains gt label offeritemcondition: MATCH 

Fuzzy label fourpersonmodelnameorid contains gt label modelnameorid: MATCH 

Fuzzy label ggendertype contains gt label gendertype: MATCH 

Fuzzy label httpurl contains gt label url: MATCH 

Fuzzy label number orintegerorcostrange contains gt label costrange: MATCH 

Fuzzy label number*****************integr contains gt label number: MATCH 

Fuzzy label itemlist*********

Fuzzy label 2statustype contains gt label statustype: MATCH 

Fuzzy label openopenopeninghours contains gt label openinghours: MATCH 

Fuzzy label gendergendertype contains gt label gendertype: MATCH 

Fuzzy label cassinaorganization contains gt label organization: MATCH 

Fuzzy label ttvepisode contains gt label tvepisode: MATCH 

Fuzzy label attendenum* the att contains gt label attendenum: MATCH 

GT label occupationalexperiencerequirements contains fuzzy label occupationalexperienc: MATCH 

Fuzzy label kmodelnameorid contains gt label modelnameorid: MATCH 

Fuzzy label ********availabilityofitem contains gt label availabilityofitem: MATCH 

Fuzzy label karencarrillo personal contains gt label person: MATCH 

Fuzzy label ninteger_price theof contains gt label integer: MATCH 

Fuzzy label closertohomemovie in contains gt label movie: MATCH 

Fuzzy label the number **number** of contains gt label number: MATCH 

Fuzzy label *unitcode contains gt label unitcode: MATCH 

Fuzzy label uni

Fuzzy label booksie contains gt label book: MATCH 

Fuzzy label number**********integr contains gt label number: MATCH 

Fuzzy label selkiebrand contains gt label brand: MATCH 

Fuzzy label url =organization -->url contains gt label organization: MATCH 

Fuzzy label cyclingnewsbuildingname"> contains gt label buildingname: MATCH 

Fuzzy label number    :integer   // contains gt label integer: MATCH 

Fuzzy label number    instruciton set contains gt label number: MATCH 

Fuzzy label product,thing #product thing contains gt label product: MATCH 

Fuzzy label product,thingorreviewed contains gt label product: MATCH 

Fuzzy label attendanceofferitemcondition contains gt label offeritemcondition: MATCH 

Fuzzy label ketperson contains gt label person: MATCH 

Fuzzy label /currency//itemlist // contains gt label itemlist: MATCH 

Fuzzy label -coordinateat contains gt label coordinateat: MATCH 

Fuzzy label 2integer contains gt label integer: MATCH 

Fuzzy label openopenopeninghours contains

Fuzzy label the number **number** of contains gt label number: MATCH 

Fuzzy label **instructionitemlist* contains gt label itemlist: MATCH 

Fuzzy label servername,urlusernameap contains gt label url: MATCH 

Fuzzy label instructionat)integeroffer contains gt label integer: MATCH 

Fuzzy label vegrestricteddiet contains gt label restricteddiet: MATCH 

Fuzzy label hotelhotel contains gt label hotel: MATCH 

Fuzzy label openopenopeninghours contains gt label openinghours: MATCH 

Fuzzy label sportsrecap #text then contains gt label text: MATCH 

Fuzzy label athenspostaladdress contains gt label postaladdress: MATCH 

Fuzzy label 326integer contains gt label integer: MATCH 

Fuzzy label 1postalcode contains gt label postalcode: MATCH 

Fuzzy label phphotourl contains gt label photourl: MATCH 

Fuzzy label railorganization contains gt label organization: MATCH 

Fuzzy label 2company contains gt label company: MATCH 

Fuzzy label openopenopeninghours contains gt label openinghours: MATCH 

Fuzzy label numberofferitemconditioned contains gt label offeritemcondition: MATCH 

Fuzzy label lotstreetbuildingname contains gt label buildingname: MATCH 

Fuzzy label 1sportsevent contains gt label sportsevent: MATCH 

Fuzzy label energizeworkhours contains gt label workhours: MATCH 

Fuzzy label mcountyorstate contains gt label countyorstate: MATCH 

Fuzzy label attendenum )attender contains gt label attendenum: MATCH 

Fuzzy label weavethroughweight contains gt label weight: MATCH 

Fuzzy label photourl the phot contains gt label photourl: MATCH 

Fuzzy label **offeritemcondition contains gt label offeritemcondition: MATCH 

Fuzzy label halongcity contains gt label city: MATCH 

Fuzzy label ictmovie :organization"> contains gt label organization: MATCH 

Fuzzy label hassnain ali,person contains gt label person: MATCH 

Fuzzy label productionbuildingname #eventcre contains gt label buildingname: MATCH 

Fuzzy label uidentifierat contains gt label identifierat: MATCH 

Fuzzy label 

10095
13181
Total entries: 13181 
 Correct Pct: 0.7659


#### 10-Sample

In [40]:
# #REFERENCE: https://huggingface.co/docs/transformers/main_classes/text_generation#transformers.GenerationConfig
# #https://huggingface.co/docs/transformers/generation_strategies

cur_url = "https://079c124ee590ac75fe.gradio.live"

params = {
    'max_new_tokens': 6,
    'do_sample': True,
    'temperature': 0.2,
    'top_p': 0.8,
    'typical_p': 1,
    'repetition_penalty': 1.3,
    'encoder_repetition_penalty': 1.0,
    'top_k': 0,
    'min_length': 3,
    'no_repeat_ngram_size': 3,
    'num_beams': 1,
    'penalty_alpha': 0,
    'length_penalty': 1,
    'early_stopping': False,
    'seed': rand_seed,
}

filename = "llama-ft-cont+resam-full-10sample-v2.json"

sp = f"/scratch/bf996/llm_er_std/proj/CTA_CPA_Benchmarks/wotab/{filename}"

run_val(model="llama", save_path=sp, inputs=test_files, input_df=gt_df_test, label_set=context_labels, resume=True, link=f"{cur_url}/run/textgen", summ_stats=True, table_src=True, other_col=False, method=["ans_contains_gt", "gt_contains_ans", "resample"], sample_size = 10)

results_checker(sp)

  0%|          | 0/7026 [00:00<?, ?it/s]

Fuzzy label sportsrecruitmenttexting contains gt label text: MATCH 

Fuzzy label number orintegerorcostrange contains gt label costrange: MATCH 

Fuzzy label fourthstreetaddressofbranded contains gt label streetaddress: MATCH 

GT label occupationalexperiencerequirements contains fuzzy label occupationalexperienc: MATCH 

Fuzzy label phcategory contains gt label category: MATCH 

Fuzzy label expedia coordinateat contains gt label coordinateat: MATCH 

Fuzzy label number |coordinateatofnumber contains gt label coordinateat: MATCH 

Fuzzy label town orcityartistat contains gt label city: MATCH 

Fuzzy label verbatimrevieweditem contains gt label review: MATCH 

Fuzzy label lexusbuildingname contains gt label buildingname: MATCH 

Fuzzy label sportstext,sportse contains gt label text: MATCH 

Fuzzy label monuments andbuildingname ecre contains gt label buildingname: MATCH 

GT label gendertype contains fuzzy label gender: MATCH 

Fuzzy label sectionedthingyoffer contains gt label offer: M

Fuzzy label ttvepisode contains gt label tvepisode: MATCH 

Fuzzy label gendertypecodecre contains gt label gendertype: MATCH 

Fuzzy label fortune teller datebook contains gt label book: MATCH 

Fuzzy label cowboy musicartistat contains gt label musicartistat: MATCH 

Fuzzy label 52faxnumber | contains gt label faxnumber: MATCH 

Fuzzy label 2identifiernameap contains gt label identifiernameap: MATCH 

Fuzzy label kunittext contains gt label unittext: MATCH 

Fuzzy label unitcode** contains gt label unitcode: MATCH 

Fuzzy label infoemailaddress # demolit contains gt label email: MATCH 

Fuzzy label openopenopeninghours contains gt label openinghours: MATCH 

Fuzzy label firstpostalcodebookbuilding contains gt label postalcode: MATCH 

GT label occupationalexperiencerequirements contains fuzzy label occupationalexperienc: MATCH 

Fuzzy label attendanceofferitemcondition contains gt label offeritemcondition: MATCH 

Fuzzy label lexicalorganization contains gt label organization: MATCH 

Fuzzy label discountscalendarvalue // contains gt label calendarvalue: MATCH 

Fuzzy label rrestaurant contains gt label restaurant: MATCH 

Fuzzy label spookshow inc.,brand contains gt label brand: MATCH 

Fuzzy label liveartistat -brand contains gt label brand: MATCH 

Fuzzy label 2faxnumber contains gt label faxnumber: MATCH 

Fuzzy label pretext contains gt label text: MATCH 

Fuzzy label exportedproductnameorid contains gt label product: MATCH 

Fuzzy label tcountyorstate contains gt label countyorstate: MATCH 

Fuzzy label exportcountry)or die; contains gt label country: MATCH 

Fuzzy label exported_product, chen contains gt label product: MATCH 

GT label warrantypromise contains fuzzy label warranty: MATCH 

Fuzzy label -coordinateat contains gt label coordinateat: MATCH 

Fuzzy label fitnesscompany contains gt label company: MATCH 

Fuzzy label thebrandreview contains gt label review: MATCH 

Fuzzy label children,audience or target contains gt label audience: MATCH 

Fuzzy lab

Fuzzy label brand newtext contains gt label brand: MATCH 

Fuzzy label brand new instruction**************** contains gt label brand: MATCH 

Fuzzy label brand new instruction**************** contains gt label brand: MATCH 

Fuzzy label brand newtext contains gt label brand: MATCH 

Fuzzy label encoded text contains gt label text: MATCH 

Fuzzy label 8quantityrange contains gt label quantityrange: MATCH 

Fuzzy label sportsrecruitmentbookjob contains gt label book: MATCH 

Fuzzy label kunitcode contains gt label unitcode: MATCH 

Fuzzy label unitcode** contains gt label unitcode: MATCH 

GT label itemlist contains fuzzy label list: MATCH 

Fuzzy label personalityitemlist`person contains gt label itemlist: MATCH 

Fuzzy label sportssevent contains gt label event: MATCH 

Fuzzy label evolutionaryorganization the e contains gt label organization: MATCH 

Fuzzy label ucsaperson contains gt label person: MATCH 

Fuzzy label gym,company contains gt label company: MATCH 

Fuzzy label qalibafc

Fuzzy label conditionatbestofferitem contains gt label offer: MATCH 

Fuzzy label 6integer contains gt label integer: MATCH 

Fuzzy label (statustype)stat contains gt label statustype: MATCH 

Fuzzy label designproduct contains gt label product: MATCH 

Fuzzy label stmbrand contains gt label brand: MATCH 

Fuzzy label firebrandtext contains gt label brand: MATCH 

Fuzzy label gendertype* contains gt label gendertype: MATCH 

Fuzzy label geek-branded building contains gt label brand: MATCH 

Fuzzy label 2typicalidentifiernameap contains gt label identifiernameap: MATCH 

Fuzzy label 4integer contains gt label integer: MATCH 

Fuzzy label itsbrohemptedbrand contains gt label brand: MATCH 

Fuzzy label luxurymhotelbrand contains gt label brand: MATCH 

Fuzzy label cordell hotel brand partners contains gt label brand: MATCH 

Fuzzy label escapistbrand contains gt label brand: MATCH 

Fuzzy label �text contains gt label text: MATCH 

Fuzzy label sportssevent contains gt label event: MATCH 


Fuzzy label openopen openinghours contains gt label openinghours: MATCH 

Fuzzy label person   : abodemus contains gt label person: MATCH 

Fuzzy label sportsrecitation #buildingname contains gt label buildingname: MATCH 

Fuzzy label wonderofbooks* contains gt label book: MATCH 

Fuzzy label glrestricteddiet contains gt label restricteddiet: MATCH 

Fuzzy label ttvepisode contains gt label tvepisode: MATCH 

Fuzzy label extrudeproduct # the product contains gt label product: MATCH 

Fuzzy label bkbrand contains gt label brand: MATCH 

Fuzzy label tt2seriescreative contains gt label seriescreative: MATCH 

Fuzzy label etheric_price the sou contains gt label price: MATCH 

Fuzzy label openopenopeninghours contains gt label openinghours: MATCH 

Fuzzy label vanmoofbuildingname contains gt label buildingname: MATCH 

GT label musicrecording contains fuzzy label us: MATCH 

Fuzzy label -coordinateat contains gt label coordinateat: MATCH 

Fuzzy label events contains gt label event: MATCH 


Fuzzy label city    buffalos population contains gt label city: MATCH 

Fuzzy label nycountyorstate contains gt label countyorstate: MATCH 

Fuzzy label sepcurrency contains gt label currency: MATCH 

Fuzzy label mangaonecreativework contains gt label creativework: MATCH 

GT label gendertype contains fuzzy label gender: MATCH 

Fuzzy label createcreativework#url contains gt label creativework: MATCH 

Fuzzy label sonettsorokbook contains gt label book: MATCH 

Fuzzy label tantamount to a rating contains gt label rating: MATCH 

Fuzzy label offerphotourl contains gt label photourl: MATCH 

Fuzzy label 5 starrestaurant contains gt label restaurant: MATCH 

GT label gendertype contains fuzzy label gender: MATCH 

GT label educationaloccupationalcredential contains fuzzy label educationaloccupationalcred: MATCH 

Fuzzy label shapelockreviewed contains gt label review: MATCH 

Fuzzy label machine,modelnameorid contains gt label modelnameorid: MATCH 

Fuzzy label printableintegeratpriceof c

Fuzzy label branding/identifierat contains gt label identifierat: MATCH 

Fuzzy label brandingitemlist contains gt label itemlist: MATCH 

Fuzzy label branding/identifierat contains gt label identifierat: MATCH 

Fuzzy label brandtext****************brand contains gt label brand: MATCH 

Fuzzy label branding/text contains gt label brand: MATCH 

Fuzzy label branding/text contains gt label brand: MATCH 

Fuzzy label branding/text contains gt label brand: MATCH 

Fuzzy label branding/text contains gt label brand: MATCH 

Fuzzy label branding/text contains gt label brand: MATCH 

Fuzzy label branding/text contains gt label brand: MATCH 

Fuzzy label branding/text contains gt label brand: MATCH 

Fuzzy label 2integer contains gt label integer: MATCH 

Fuzzy label lunitcode contains gt label unitcode: MATCH 

Fuzzy label reshotel contains gt label hotel: MATCH 

Fuzzy label 2musicalbum contains gt label musicalbum: MATCH 

Fuzzy label educationorganization contains gt label organization: MA

Fuzzy label inunitcode contains gt label unitcode: MATCH 

Fuzzy label hustonpostalcode contains gt label postalcode: MATCH 

Fuzzy label producturl contains gt label product: MATCH 

Fuzzy label webertext contains gt label text: MATCH 

Fuzzy label qmodelnameorid contains gt label modelnameorid: MATCH 

Fuzzy label us musicartistat – contains gt label musicartistat: MATCH 

Fuzzy label city # chatbookcre contains gt label city: MATCH 

Fuzzy label recity contains gt label city: MATCH 

Fuzzy label rrestaurant contains gt label restaurant: MATCH 

Fuzzy label obmusicalbum contains gt label musicalbum: MATCH 

Fuzzy label openopenopeninghours contains gt label openinghours: MATCH 

Fuzzy label consumingintegerofquantityrange contains gt label quantityrange: MATCH 

Fuzzy label 5 dayofweek contains gt label dayofweek: MATCH 

Fuzzy label boru sanati text of contains gt label text: MATCH 

Fuzzy label openopenopeninghours contains gt label openinghours: MATCH 

Fuzzy label vegrestricteddi

Fuzzy label coimbatorecity contains gt label city: MATCH 

Fuzzy label 2streetaddress contains gt label streetaddress: MATCH 

Fuzzy label 2musicalbum contains gt label musicalbum: MATCH 

Fuzzy label restartevent1728 contains gt label event: MATCH 

Fuzzy label ratatouille recipe contains gt label recipe: MATCH 

Fuzzy label ratio of city population to contains gt label city: MATCH 

GT label gendertype contains fuzzy label gender: MATCH 

Fuzzy label availableofferitemcondition contains gt label offeritemcondition: MATCH 

Fuzzy label textcode)itemlist of contains gt label itemlist: MATCH 

Fuzzy label thing #text#durationitem contains gt label duration: MATCH 

Fuzzy label 5organization contains gt label organization: MATCH 

Fuzzy label lowcalories contains gt label calories: MATCH 

Fuzzy label wearmybabybrand contains gt label brand: MATCH 

Fuzzy label formatofbook # eformat contains gt label formatofbook: MATCH 

Fuzzy label openopenopeninghours contains gt label openinghours: 

GT label educationaloccupationalcredential contains fuzzy label educationaloccupationalcred: MATCH 

Fuzzy label 876integer contains gt label integer: MATCH 

Fuzzy label paintunittext#modelname contains gt label unittext: MATCH 

Fuzzy label scheduling,calendarvalue // contains gt label calendarvalue: MATCH 

Fuzzy label moosehead marine museum building contains gt label museum: MATCH 

GT label educationaloccupationalcredential contains fuzzy label educationaloccupationalcred: MATCH 

GT label occupationalexperiencerequirements contains fuzzy label occupationalexperienc: MATCH 

Fuzzy label openopenopeninghours contains gt label openinghours: MATCH 

Fuzzy label arthrostim date contains gt label date: MATCH 

Fuzzy label 6integer contains gt label integer: MATCH 

Fuzzy label inunitcode contains gt label unitcode: MATCH 

Fuzzy label 1integer contains gt label integer: MATCH 

Fuzzy label 2integer contains gt label integer: MATCH 

Fuzzy label unitcode** contains gt label unitcode: M

Fuzzy label hatfieldbuildingname contains gt label buildingname: MATCH 

Fuzzy label ggendertype contains gt label gendertype: MATCH 

Fuzzy label thebuildingname contains gt label buildingname: MATCH 

Fuzzy label 2categorycode contains gt label categorycode: MATCH 

Fuzzy label sportssevent contains gt label event: MATCH 

Fuzzy label gamecreativeworkshop@ contains gt label creativework: MATCH 

Fuzzy label postalcode #posting contains gt label postalcode: MATCH 

Fuzzy label urlat https://branded contains gt label brand: MATCH 

Fuzzy label castore edc, brand contains gt label brand: MATCH 

Fuzzy label hotels contains gt label hotel: MATCH 

Fuzzy label attendenum )),att contains gt label attendenum: MATCH 

Fuzzy label kunitcode contains gt label unitcode: MATCH 

Fuzzy label acididentifierat contains gt label identifierat: MATCH 

Fuzzy label kunitcode contains gt label unitcode: MATCH 

Fuzzy label unitcode** contains gt label unitcode: MATCH 

Fuzzy label heavyweight contains g

Fuzzy label horganization with hoos contains gt label organization: MATCH 

Fuzzy label organization* contains gt label organization: MATCH 

Fuzzy label hootsuitemidentifierat contains gt label identifierat: MATCH 

Fuzzy label educationaloccupationalcredential // contains gt label educationaloccupationalcredential: MATCH 

Fuzzy label openopenopeninghours contains gt label openinghours: MATCH 

Fuzzy label 8quantityrange contains gt label quantityrange: MATCH 

GT label categorycode contains fuzzy label code: MATCH 

Fuzzy label jazzmusicalbum contains gt label musicalbum: MATCH 

Fuzzy label coffeecompany contains gt label company: MATCH 

Fuzzy label countyorstate`scount contains gt label countyorstate: MATCH 

Fuzzy label as per quote costrange mel contains gt label costrange: MATCH 

Fuzzy label openopenopeninghours contains gt label openinghours: MATCH 

Fuzzy label 2number contains gt label number: MATCH 

Fuzzy label hsmodelnameorid contains gt label modelnameorid: MATCH 

Fuz

JSONDecodeError: Expecting value: line 2 column 1 (char 1)

### 10-Sample, Other Columns

In [30]:
# #REFERENCE: https://huggingface.co/docs/transformers/main_classes/text_generation#transformers.GenerationConfig
# #https://huggingface.co/docs/transformers/generation_strategies

cur_url = "https://c6fa2555f7518e0245.gradio.live"

params = {
    'max_new_tokens': 6,
    'do_sample': True,
    'temperature': 0.2,
    'top_p': 0.8,
    'typical_p': 1,
    'repetition_penalty': 1.3,
    'encoder_repetition_penalty': 1.0,
    'top_k': 0,
    'min_length': 3,
    'no_repeat_ngram_size': 3,
    'num_beams': 1,
    'penalty_alpha': 0,
    'length_penalty': 1,
    'early_stopping': False,
    'seed': rand_seed,
}

filename = "llama-ft-cont+resam-fullplusothercol-10sample-v4.json"

sp = f"/scratch/bf996/llm_er_std/proj/CTA_CPA_Benchmarks/wotab/{filename}"

run_val(model="llama", save_path=sp, inputs=test_files, input_df=gt_df_test, label_set=context_labels, resume=True, link=f"{cur_url}/run/textgen", summ_stats=True, table_src=True, other_col=True, method=["ans_contains_gt", "gt_contains_ans", "resample"], sample_size = 10, results=True)


  0%|          | 0/7026 [00:00<?, ?it/s]

Fuzzy label sportsrecruitmentmovieurl contains gt label movie: MATCH 

Fuzzy label testinginteger | chooseat // contains gt label integer: MATCH 

Fuzzy label ended some difficulties understanding certain things contains gt label thing: MATCH 

Fuzzy label ended some difficulties understanding certain things contains gt label thing: MATCH 

Fuzzy label (faxnumber)[f contains gt label faxnumber: MATCH 

GT label occupationalexperiencerequirements contains fuzzy label occupationalexperienc: MATCH 

Fuzzy label ['offer contains gt label offer: MATCH 

Fuzzy label deliveryymethodofferitem contains gt label offer: MATCH 

Fuzzy label ‎dayofweek contains gt label dayofweek: MATCH 

Fuzzy label sportsrecording ///organization contains gt label organization: MATCH 

Fuzzy label postaddressesfromemail address contains gt label email: MATCH 

Fuzzy label musicalbum* contains gt label musicalbum: MATCH 

GT label gendertype contains fuzzy label gender: MATCH 

Fuzzy label entertainmentthingyorida

Fuzzy label commentwebhtmlaction contains gt label webhtmlaction: MATCH 

Fuzzy label hotels contains gt label hotel: MATCH 

Fuzzy label 8.text contains gt label text: MATCH 

Fuzzy label ​itemlist contains gt label itemlist: MATCH 

Fuzzy label servicesoffer contains gt label offer: MATCH 

Fuzzy label ​itemlist contains gt label itemlist: MATCH 

Fuzzy label url #url#avail contains gt label url: MATCH 

GT label gendertype contains fuzzy label gender: MATCH 

Fuzzy label villa,buildingname)city contains gt label buildingname: MATCH 

Fuzzy label ​itemlist contains gt label itemlist: MATCH 

Fuzzy label �brand contains gt label brand: MATCH 

Fuzzy label 83identifierat contains gt label identifierat: MATCH 

Fuzzy label jobposting .categorycode contains gt label categorycode: MATCH 

Fuzzy label educationaloccupationalcredential; contains gt label educationaloccupationalcredential: MATCH 

Fuzzy label 9availabilityofitem contains gt label availabilityofitem: MATCH 

Fuzzy label adida

Fuzzy label url,url#email| contains gt label email: MATCH 

Fuzzy label jidentifierat contains gt label identifierat: MATCH 

Fuzzy label educationorganization contains gt label organization: MATCH 

Fuzzy label acompany contains gt label company: MATCH 

Fuzzy label inavailabilityofitem contains gt label availabilityofitem: MATCH 

Fuzzy label 9.price contains gt label price: MATCH 

Fuzzy label moduleidentifierat contains gt label identifierat: MATCH 

Fuzzy label “creativework contains gt label creativework: MATCH 

Fuzzy label ​availabilityofitem contains gt label availabilityofitem: MATCH 

Fuzzy label registerat,textualorgan contains gt label text: MATCH 

GT label availabilityofitem contains fuzzy label il: MATCH 

Fuzzy label drawingwork orbuildingnameor contains gt label buildingname: MATCH 

Fuzzy label 2integer contains gt label integer: MATCH 

Fuzzy label ​availabilityofitem contains gt label availabilityofitem: MATCH 

Fuzzy label sportsorganization contains gt label orga

Fuzzy label #seriescreative contains gt label seriescreative: MATCH 

Fuzzy label kunitcode contains gt label unitcode: MATCH 

Fuzzy label 1unitcode contains gt label unitcode: MATCH 

GT label educationaloccupationalcredential contains fuzzy label ca: MATCH 

Fuzzy label numbernumber contains gt label number: MATCH 

GT label occupationalexperiencerequirements contains fuzzy label occupationalexperienc: MATCH 

GT label occupationalexperiencerequirements contains fuzzy label occupationalexperienc: MATCH 

GT label educationaloccupationalcredential contains fuzzy label educationaloccupationalcred: MATCH 

Fuzzy label books contains gt label book: MATCH 

Fuzzy label supertext contains gt label text: MATCH 

Fuzzy label ​calendarvalue contains gt label calendarvalue: MATCH 

Fuzzy label ['offer contains gt label offer: MATCH 

Fuzzy label skatebuildingname contains gt label buildingname: MATCH 

Fuzzy label -coordinateat contains gt label coordinateat: MATCH 

Fuzzy label raiffeisenpro

Fuzzy label paper,product #thingist contains gt label product: MATCH 

Fuzzy label brp_brand#organ contains gt label brand: MATCH 

Fuzzy label if it wereunitcode in contains gt label unitcode: MATCH 

Fuzzy label instructionitemlist */createdorgan contains gt label itemlist: MATCH 

Fuzzy label rrestaurant contains gt label restaurant: MATCH 

GT label occupationalexperiencerequirements contains fuzzy label occupationalexperienc: MATCH 

GT label occupationalexperiencerequirements contains fuzzy label occupationalexperienc: MATCH 

Fuzzy label 2quantityrange contains gt label quantityrange: MATCH 

Fuzzy label lunitcode contains gt label unitcode: MATCH 

Fuzzy label internalreview contains gt label review: MATCH 

Fuzzy label hkcurrency contains gt label currency: MATCH 

Fuzzy label 7postalcode contains gt label postalcode: MATCH 

Fuzzy label hongkongsportsevent contains gt label sportsevent: MATCH 

Fuzzy label uofferitemcondition contains gt label offeritemcondition: MATCH 

Fuzz

Fuzzy label bypostaddress[streetaddress contains gt label streetaddress: MATCH 

Fuzzy label parkstreetaddress contains gt label streetaddress: MATCH 

Fuzzy label themonetaryamountof contains gt label monetaryamount: MATCH 

Fuzzy label fulltimeworkhours contains gt label workhours: MATCH 

Fuzzy label educationaloccupationalcredentialso contains gt label educationaloccupationalcredential: MATCH 

Fuzzy label rrestaurant contains gt label restaurant: MATCH 

Fuzzy label itemlistitemcondition contains gt label itemlist: MATCH 

Fuzzy label yesboolean contains gt label boolean: MATCH 

Fuzzy label ndfurl contains gt label url: MATCH 

Fuzzy label commentwebhtmlaction contains gt label webhtmlaction: MATCH 

Fuzzy label rrestaurant contains gt label restaurant: MATCH 

GT label educationaloccupationalcredential contains fuzzy label de: MATCH 

Fuzzy label 8weight contains gt label weight: MATCH 

Fuzzy label kunitcode contains gt label unitcode: MATCH 

Fuzzy label racedaypostaladdress c

Fuzzy label 3text contains gt label text: MATCH 

Fuzzy label sportsrecordertext,s contains gt label text: MATCH 

Fuzzy label coverphotourl contains gt label photourl: MATCH 

Fuzzy label brokk+event contains gt label event: MATCH 

Fuzzy label ['itemlist contains gt label itemlist: MATCH 

Fuzzy label designthingy,creative contains gt label thing: MATCH 

Fuzzy label ['athleticteam, contains gt label athleticteam: MATCH 

Fuzzy label feedbackitemcondition #reviewlet contains gt label review: MATCH 

Fuzzy label ione orunitcode>brand contains gt label unitcode: MATCH 

Fuzzy label reshotel contains gt label hotel: MATCH 

Fuzzy label 6buildingname contains gt label buildingname: MATCH 

Fuzzy label etheridge_brand on brand contains gt label brand: MATCH 

Fuzzy label unitcode** contains gt label unitcode: MATCH 

Fuzzy label educationorganization contains gt label organization: MATCH 

Fuzzy label 2distance contains gt label distance: MATCH 

Fuzzy label ebayurl contains gt label url:

Fuzzy label rrestaurant contains gt label restaurant: MATCH 

Fuzzy label ['itemlist contains gt label itemlist: MATCH 

Fuzzy label 6category contains gt label category: MATCH 

Fuzzy label 3company contains gt label company: MATCH 

Fuzzy label moa museum of art contains gt label museum: MATCH 

Fuzzy label webperson contains gt label person: MATCH 

GT label educationaloccupationalcredential contains fuzzy label educationaloccupationalcred: MATCH 

GT label educationaloccupationalcredential contains fuzzy label educationaloccupationalcred: MATCH 

GT label occupationalexperiencerequirements contains fuzzy label occupationalexperienc: MATCH 

Fuzzy label jidentifierat contains gt label identifierat: MATCH 

Fuzzy label bay or city).streetaddress contains gt label streetaddress: MATCH 

GT label educationaloccupationalcredential contains fuzzy label educationaloccupationalcred: MATCH 

GT label occupationalexperiencerequirements contains fuzzy label occupationalexperienc: MATCH 

Fuzz

GT label occupationalexperiencerequirements contains fuzzy label occupationalexperienc: MATCH 

Fuzzy label educationaloccupationalcredentialst contains gt label educationaloccupationalcredential: MATCH 

GT label educationaloccupationalcredential contains fuzzy label educationaloccupationalcred: MATCH 

Fuzzy label ​unitcode contains gt label unitcode: MATCH 

Fuzzy label 8costrange contains gt label costrange: MATCH 

Fuzzy label widentifierat contains gt label identifierat: MATCH 

Fuzzy label 2itemlist contains gt label itemlist: MATCH 

Fuzzy label 1price contains gt label price: MATCH 

Fuzzy label outavailabilityofitem contains gt label availabilityofitem: MATCH 

Fuzzy label 8event contains gt label event: MATCH 

GT label gendertype contains fuzzy label gender: MATCH 

Fuzzy label ofoffer contains gt label offer: MATCH 

Fuzzy label �person contains gt label person: MATCH 

Fuzzy label //url contains gt label url: MATCH 

Fuzzy label br,country contains gt label country: MATCH

Fuzzy label 1price contains gt label price: MATCH 

Fuzzy label rrestaurant contains gt label restaurant: MATCH 

Fuzzy label rrestaurant contains gt label restaurant: MATCH 

Fuzzy label deliveryymethodofferitem contains gt label offer: MATCH 

Fuzzy label instructionitemlist */]]> contains gt label itemlist: MATCH 

Fuzzy label number |duration|text{ contains gt label duration: MATCH 

Fuzzy label 8integer contains gt label integer: MATCH 

Fuzzy label currency* contains gt label currency: MATCH 

Fuzzy label 2currency contains gt label currency: MATCH 

Fuzzy label educationaloccupationalcredential"> contains gt label educationaloccupationalcredential: MATCH 

GT label occupationalexperiencerequirements contains fuzzy label occupationalexperienc: MATCH 

Fuzzy label jidentifierat contains gt label identifierat: MATCH 

GT label occupationalexperiencerequirements contains fuzzy label experience: MATCH 

GT label occupationalexperiencerequirements contains fuzzy label experience: MATC

Fuzzy label 3telephone contains gt label telephone: MATCH 

Fuzzy label edison (person) contains gt label person: MATCH 

Fuzzy label sportsbook contains gt label book: MATCH 

Fuzzy label educationaloccupationalcredentialso contains gt label educationaloccupationalcredential: MATCH 

GT label occupationalexperiencerequirements contains fuzzy label occupationalexperienc: MATCH 

Fuzzy label 6quantityrange contains gt label quantityrange: MATCH 

Fuzzy label 7integer contains gt label integer: MATCH 

Fuzzy label ['itemlist contains gt label itemlist: MATCH 

Fuzzy label ['modelnameorid contains gt label modelnameorid: MATCH 

Fuzzy label ​weight contains gt label weight: MATCH 

GT label educationaloccupationalcredential contains fuzzy label ia: MATCH 

Fuzzy label 0weightitem contains gt label weight: MATCH 

Fuzzy label bebe,product the of contains gt label product: MATCH 

Fuzzy label ['offer, ofr contains gt label offer: MATCH 

Fuzzy label pcidentifierat contains gt label identifi

Fuzzy label corporatizeeventbookcre contains gt label event: MATCH 

Fuzzy label pdc (currency)currency contains gt label currency: MATCH 

Fuzzy label attendance_attendenum contains gt label attendenum: MATCH 

Fuzzy label instructionitemlist */]]> contains gt label itemlist: MATCH 

Fuzzy label pastmusicalbum contains gt label musicalbum: MATCH 

Fuzzy label ​itemlist contains gt label itemlist: MATCH 

Fuzzy label ['offer contains gt label offer: MATCH 

Fuzzy label bankbuildingname ) the building contains gt label buildingname: MATCH 

Fuzzy label sportsbook contains gt label book: MATCH 

Fuzzy label 20product contains gt label product: MATCH 

Fuzzy label 3creativecreativework contains gt label creativework: MATCH 

Fuzzy label ['itemlist contains gt label itemlist: MATCH 

Fuzzy label thetvepisode contains gt label tvepisode: MATCH 

GT label occupationalexperiencerequirements contains fuzzy label occupationalexperienc: MATCH 

Fuzzy label kidentifierat contains gt label identif

Fuzzy label jobposting #categorycode contains gt label categorycode: MATCH 

Fuzzy label lbr (brand) contains gt label brand: MATCH 

Fuzzy label etheratome #unitcode contains gt label unitcode: MATCH 

Fuzzy label thehotel contains gt label hotel: MATCH 

Fuzzy label +telephone contains gt label telephone: MATCH 

Fuzzy label 2integer contains gt label integer: MATCH 

Fuzzy label monumentsat wrote this monumenttext contains gt label text: MATCH 

Fuzzy label 5price contains gt label price: MATCH 

Fuzzy label kunitcode contains gt label unitcode: MATCH 

Fuzzy label commentwebhtmlaction contains gt label webhtmlaction: MATCH 

Fuzzy label city the city is divided into contains gt label city: MATCH 

Fuzzy label upcountyorstate contains gt label countyorstate: MATCH 

Fuzzy label rrestaurant contains gt label restaurant: MATCH 

GT label occupationalexperiencerequirements contains fuzzy label occupationalexperienc: MATCH 

Fuzzy label ​availabilityofitem contains gt label availability

Fuzzy label laweducationalorganization contains gt label educationalorganization: MATCH 

GT label occupationalexperiencerequirements contains fuzzy label occupationalexperienc: MATCH 

GT label occupationalexperiencerequirements contains fuzzy label occupationalexperienc: MATCH 

GT label educationaloccupationalcredential contains fuzzy label educationaloccupationalcred: MATCH 

Fuzzy label jidentifierat contains gt label identifierat: MATCH 

Fuzzy label true/boolean contains gt label boolean: MATCH 

Fuzzy label truckcompany contains gt label company: MATCH 

Fuzzy label https://calendarvalue.url contains gt label calendarvalue: MATCH 

Fuzzy label 3postaladdress contains gt label postaladdress: MATCH 

Fuzzy label #musicalbum contains gt label musicalbum: MATCH 

Fuzzy label rstreetaddress,county contains gt label streetaddress: MATCH 

Fuzzy label product,thing # theproduct contains gt label product: MATCH 

Fuzzy label 2modelnameorid contains gt label modelnameorid: MATCH 

Fuzzy

Fuzzy label pduration contains gt label duration: MATCH 

GT label educationaloccupationalcredential contains fuzzy label educationaloccupationalcred: MATCH 

GT label educationaloccupationalcredential contains fuzzy label educationaloccupationalcred: MATCH 

Fuzzy label 2product contains gt label product: MATCH 

Fuzzy label 10price contains gt label price: MATCH 

Fuzzy label ​availabilityofitem contains gt label availabilityofitem: MATCH 

Fuzzy label 3price contains gt label price: MATCH 

Fuzzy label gunittext contains gt label unittext: MATCH 

Total entries: 15040 
 Accuracy: 0.814 
 Weighted F1: 0.812 
 Unweighted F1: 0.799 
 Correct Remap: 1106 
 Total Remap: 1857 
 Truncated: 5


### LLAMA-CTA D4 (zero-shot)

In [None]:
# #REFERENCE: https://huggingface.co/docs/transformers/main_classes/text_generation#transformers.GenerationConfig
# #https://huggingface.co/docs/transformers/generation_strategies

cur_url = "https://079c124ee590ac75fe.gradio.live"

params = {
    'max_new_tokens': 6,
    'do_sample': True,
    'temperature': 0.2,
    'top_p': 0.8,
    'typical_p': 1,
    'repetition_penalty': 1.3,
    'encoder_repetition_penalty': 1.0,
    'top_k': 0,
    'min_length': 3,
    'no_repeat_ngram_size': 3,
    'num_beams': 1,
    'penalty_alpha': 0,
    'length_penalty': 1,
    'early_stopping': False,
    'seed': rand_seed,
}

filename = "llama-ft-cont+resam-3sample.json"

sp = f"/scratch/bf996/llm_er_std/proj/CTA_CPA_Benchmarks/wotab/{filename}"

run_val(model="llama", save_path=sp, inputs=test_files, input_df=gt_df_test, label_set=context_labels, resume=True, link=f"{cur_url}/run/textgen", full=False, method=["ans_contains_gt", "gt_contains_ans", "resample"], sample_size = 5)


## Sherlock Eval on SOTAB

COMMENTS: Sherlock ~3-4it / s

Sherlock gets a very generous mapping; 60 WOTAB classes overlap on 39 Sherlock classes, so many Sherlock predictions are marked correct if they're in a set of 2-4 possible matches in WOTAB.  Of course, many other Sherlock classes had no overlap with WOTAB at all. 

Sherlock achieves about 21% accuracy on 1000 samples.

Accuracy around 10% using pretrained model with manual remappings to classes

In [None]:
import os
os.chdir("sherlock-project")
import numpy as np
import pandas as pd
import pyarrow as pa

from sherlock import helpers
from sherlock.deploy.model import SherlockModel
from sherlock.features.paragraph_vectors import initialise_pretrained_model, initialise_nltk
from sherlock.features.preprocessing import (
    extract_features,
    convert_string_lists_to_lists,
    prepare_feature_extraction,
    load_parquet_values,
)
from sherlock.features.word_embeddings import initialise_word_embeddings
from sherlock.functional import extract_features_to_csv
os.environ["PYTHONHASHSEED"] = "13"

In [None]:
import shutil

shutil.copyfile(r"/content/drive/MyDrive/School/NYU/Dataset Search/proj/sherlock/glove.6B.50d.txt", r"/content/sherlock-project/sherlock/features/glove.6B.50d.txt")
shutil.copyfile(r"/content/drive/MyDrive/School/NYU/Dataset Search/proj/sherlock/par_vec_trained_400.pkl.docvecs.vectors_docs.npy", r"/content/sherlock-project/sherlock/features/par_vec_trained_400.pkl.docvecs.vectors_docs.npy")
shutil.copyfile(r"/content/drive/MyDrive/School/NYU/Dataset Search/proj/sherlock/par_vec_trained_400.pkl.trainables.syn1neg.npy", r"/content/sherlock-project/sherlock/features/par_vec_trained_400.pkl.trainables.syn1neg.npy")
shutil.copyfile(r"/content/drive/MyDrive/School/NYU/Dataset Search/proj/sherlock/par_vec_trained_400.pkl.wv.vectors.npy", r"/content/sherlock-project/sherlock/features/par_vec_trained_400.pkl.wv.vectors.npy")

'/content/sherlock-project/sherlock/features/par_vec_trained_400.pkl.wv.vectors.npy'

In [None]:
# prepare_feature_extraction()
initialise_word_embeddings(path=r"/content/drive/MyDrive/School/NYU/Dataset Search/proj/sherlock/")
initialise_pretrained_model(400, path=r"/content/sherlock-project/sherlock/features/")
initialise_nltk()
sherlock_model = SherlockModel(path="/content/sherlock-project/")
sherlock_model.initialize_model_from_json(with_weights=True, model_id="sherlock")

Initialising word embeddings
Initialise Word Embeddings process took 0:00:06.654745 seconds.
Initialise Doc2Vec Model, 400 dim, process took 0:00:04.147899 seconds. (filename = /content/sherlock-project/sherlock/features/par_vec_trained_400.pkl)


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


Initialised NLTK, process took 0:00:00.854763 seconds.




In [None]:

run_val(model="sherlock", save_path="/content/drive/MyDrive/School/NYU/Dataset Search/proj/CTA_CPA_Benchmarks/wotab/sherlock-results_v1.json", resume=True)


Output hidden; open in https://colab.research.google.com to view.

## DoDuo Eval on SOTAB

https://github.com/megagonlabs/doduo

RESULTS

~100 SAMPLES, ALL CONTEXT LABELS: ~24%, .58it/s on CPU

~100 SAMPLES, TRIM CONTEXT LABELS: ~24%, .58it/s on CPU

ALL SAMPLES, SMALL CONTEXT LABELS: ~28%, 7it/s on GPU?


In [None]:
# os.chdir("/scratch/bf996/notebooks")

# filename = "doduo-sherlock-to-sotab-trim-full-v1.json"

# sp = f"/scratch/bf996/llm_er_std/proj/CTA_CPA_Benchmarks/wotab/{filename}"

# #run_val(model="doduo", save_path=sp, inputs=test_files, input_df=gt_df_test, label_set=context_labels, resume=False, stop_early=100)
# run_val(model="doduo", save_path=sp, inputs=test_files, input_df=gt_df_test, label_set=context_labels_trim, resume=True)

  0%|          | 0/7026 [00:00<?, ?it/s]

### D4

In [95]:
os.chdir("/scratch/bf996/notebooks")

filename = "doduo-sherlock-to-d4-trim-full-v4.json"

sp = f"/scratch/bf996/llm_er_std/proj/CTA_CPA_Benchmarks/wotab/{filename}"

run_val(model="doduo", save_path=sp, inputs=d4_dfs, input_df=gt_df_test, label_set=d4_sherlock_labels)


  0%|          | 0/2000 [00:00<?, ?it/s]



 Overall Accuracy score was 0.322 
 Pct Eval: 1.0 

 Example errors: 

Sample Error: 
Label: ['code'] || Prediction: name


Sample Error: 
Label: ['code'] || Prediction: name


Sample Error: 
Label: ['code'] || Prediction: name


Sample Error: 
Label: ['code'] || Prediction: name


Sample Error: 
Label: ['code'] || Prediction: name


Sample Error: 
Label: ['code'] || Prediction: name


Sample Error: 
Label: ['code'] || Prediction: name


Sample Error: 
Label: ['code'] || Prediction: name


Sample Error: 
Label: ['code'] || Prediction: name


Sample Error: 
Label: ['code'] || Prediction: name




## Build Training Dataset for fine tuning GPT / LLAMA / Sherlock

### Sherlock

In [None]:
def train_sherlock(df, gt_df, prompt_dict, model, label_indices, base_prompt):
  data_m = pd.Series(df[label_indices].astype(str).T.values.tolist())
  extract_features(
      "../temporary.csv",
      data_m
  )
  feature_vectors = pd.read_csv("../temporary.csv", dtype=np.float32)
  predicted_labels = model.predict(feature_vectors, "sherlock")
  predicted_labels_dict = {i : sherlock_to_cta.get(predicted_labels[i], [predicted_labels[i]]) for i in range(len(data_m))}
  for idx, label_idx in zip(range(len(data_m)), label_indices):
    prompt = base_prompt + "_" + str(label_idx)
    gt_row = gt_df[gt_df['column_index'] == label_idx]
    if len(gt_row) != 1:
      continue
    label = fix_labels(gt_row['label'].item())
    ans = predicted_labels_dict[idx]
    assert isinstance(ans, list), "ans should be a list"
    res = label in ans
    ans_dict = {"response" : ans, "context" : None, "ground_truth" : label, "correct" : res}
    prompt_dict[prompt] = ans_dict

### LLAMA

#### Old Command

In [None]:

run_val(model="llama", save_path="/content/drive/MyDrive/School/NYU/Dataset Search/proj/CTA_CPA_Benchmarks/wotab/llama_results_prompt_v9.json", inputs=train_files, input_df=gt_df_train, label_set=context_labels, resume=False, response=False, full=True, sample_size=5)


46790it [1:33:10,  8.37it/s]




 Overall Accuracy score was 0.0 
 Pct Eval: 7.49 

 Example errors: 

Sample Error: 
Context (500 chars):  ['SRC: virginityrocks', '$25.00', 'Sold Out', '$25.00', 'Sold Out', '$25.00', 'std: 0.9', 'mean: 7.5', 'mode: 8', 'median: 8.0', 'max: 8', 'min: 6', 'rolling-mean-window-4: [0.0]', '']
Label: price || Prediction: 


Sample Error: 
Context (500 chars):  ['SRC: scvs', "['2.0E1', '1.5E2', '0.0E0']", '0.0E0', "['0.0E0', '7.5E1', '1.0E1']", "['2.0E1', '1.5E2', '0.0E0']", '0.0E0', 'std: 7.21', 'mean: 6.58', 'mode: 4', 'median: 4.0', 'max: 27', 'min: 4', 'rolling-mean-window-4: [0.0]', '']
Label: price || Prediction: 


Sample Error: 
Context (500 chars):  ['SRC: scvs', "['GBP', 'GBP', 'GBP']", 'GBP', "['GBP', 'GBP', 'GBP']", 'GBP', "['GBP', 'GBP', 'GBP']", 'std: 5.43', 'mean: 5.63', 'mode: 4', 'median: 4.0', 'max: 21', 'min: 3', 'rolling-mean-window-4: [0.0]', '']
Label: currency || Prediction: 


Sample Error: 
Context (500 chars):  ['SRC: scvs', "['https://schema.org/InStock', 'http

#### OC + TN + SS

In [23]:
cur_url = "https://079c124ee590ac75fe.gradio.live"

filename = "train-llama-oc+tn+ss-v1.json"

sp = f"/scratch/bf996/llm_er_std/proj/CTA_CPA_Benchmarks/wotab/{filename}"

run_val(model="llama", save_path=sp, inputs=train_files, input_df=gt_df_train, response=False, label_set=context_labels, resume=True, table_src=True, summ_stats=True, other_col=True, link=f"{cur_url}/run/textgen", method=["similarity"], sample_size = 5)

  0%|          | 0/46790 [00:00<?, ?it/s]



 Overall Accuracy score was 0.0 
 Pct Eval: 7.75 

 Example errors: 



#### Dataset Prep

In [24]:
# Calling DataFrame constructor after zipping
# both lists, with columns specified
import pandas as pd
from pathlib import Path
import io, json

filename = "train-llama-oc+tn+ss-v1.json"

sp = f"/scratch/bf996/llm_er_std/proj/CTA_CPA_Benchmarks/wotab/{filename}"

with open(sp, 'r') as jf:
  prompt_dict = json.load(jf)

values = []
kl = list(prompt_dict.keys())
for k in kl:
    values.append(prompt_dict[k]['ground_truth'])

df = pd.DataFrame(list(zip(kl, values)),
               columns =['prompt', 'completion'])
#df['prompt'] = df['prompt'].apply(lambda s : s + "$")
#df.to_csv("/content/drive/MyDrive/School/NYU/Dataset Search/proj/CTA_CPA_Benchmarks/gpt_train_v2.csv", index=False)

In [25]:
filename = "train-llama-oc+tn+ss-v1-formatted.json"

target_path = f"/scratch/bf996/llm_er_std/proj/CTA_CPA_Benchmarks/wotab/{filename}"

def find_context(s):
    idx = s.find("INPUT:")
    len_c = len("INPUT:")
    s = s[idx + len_c:] + "\n"
    return s

df['instruction'] = "Select the category which best matches the input. \n"
df['input'] = df['prompt'].apply(lambda x : find_context(x))
df['output'] = df['completion'] + "\n"
df = df.drop(columns=['prompt', 'completion'])
df.to_json(target_path, orient='records', indent=4)

In [26]:
def _make_w_io_base(f, mode: str):
    if not isinstance(f, io.IOBase):
        f_dirname = os.path.dirname(f)
        if f_dirname != "":
            os.makedirs(f_dirname, exist_ok=True)
        f = open(f, mode=mode)
    return f


def _make_r_io_base(f, mode: str):
    if not isinstance(f, io.IOBase):
        f = open(f, mode=mode)
    return f


def jdump(obj, f, mode="w", indent=4, default=str):
    """Dump a str or dictionary to a file in json format.

    Args:
        obj: An object to be written.
        f: A string path to the location on disk.
        mode: Mode for opening the file.
        indent: Indent for storing json dictionaries.
        default: A function to handle non-serializable entries; defaults to `str`.
    """
    f = _make_w_io_base(f, mode)
    if isinstance(obj, (dict, list)):
        json.dump(obj, f, indent=indent, default=default)
    elif isinstance(obj, str):
        f.write(obj)
    else:
        raise ValueError(f"Unexpected type: {type(obj)}")
    f.close()


def jload(f, mode="r"):
    """Load a .json file into a dictionary."""
    f = _make_r_io_base(f, mode)
    jdict = json.load(f)
    f.close()
    return jdict

In [27]:
d = jload(target_path)

In [28]:
d

[{'instruction': 'Select the category which best matches the input. \n',
  'input': '[\'SRC: sacristy\', \'Catholic Bishops of Great Britain: A Reference to Roman Catholic Bishops from 1850 to 2015\', "Into the Depths: A Chaplain\'s Reflections on Death, Dying and Pastoral Care", \'Secret Lives (part 2)\', \'All Hail the Glorious Night (and other Christmas poems): The Complete Christmas Poetry of Kevin Carey\', \'The Writing on the Wall: Everyday Phrases from the King James Bible\', \'std: 20.9\', \'mean: 56.37\', \'mode: 56\', \'median: 58.0\', \'max: 101\', \'min: 16\', \'rolling-mean-window-4: [0.0]\', \'\'] \n CATEGORY: \n',
  'output': 'book\n'},
 {'instruction': 'Select the category which best matches the input. \n',
  'input': "['SRC: sacristy', '1st April 2016', '1st June 2019', '1st October 2016', '1st September 2018', '1st October 2015', 'std: 1.89', 'mean: 15.58', 'mode: 17', 'median: 16.0', 'max: 19', 'min: 12', 'rolling-mean-window-4: [0.0]', ''] \n CATEGORY: \n",
  'outpu

#### OpenAI Fine Tuning

In [None]:
#!yes | openai tools fine_tunes.prepare_data -f "/content/drive/MyDrive/School/NYU/Dataset Search/proj/CTA_CPA_Benchmarks/gpt_train.csv"

## Fit a Random Forest for Numeric Column Classification

### Build Dataset

In [None]:
gt_df_train_num = gt_df_train[gt_df_train['label'].isin(numeric_labels)]
gt_df_val_num = gt_df[gt_df['label'].isin(numeric_labels)]
gt_df_test_num = gt_df_test[gt_df_test['label'].isin(numeric_labels)]

In [None]:
len_context=10
mfs = ["std", "mean", "median", "mode", "max", "min"]
cft_list = [f"col_feature_sample_{i}" for i in range(len_context)]

In [None]:
df_dict = {}
for idx, f in tqdm(enumerate(train_files)):
    #print(f)
    gt_labels = gt_df_train[gt_df_train['table_name'] == f.name]
    if len(gt_labels) < 1:
      continue
    label_indices = pd.unique(gt_labels['column_index']).tolist()
    f_df = pd.read_json(f, compression='gzip', lines=True)
    f_df_cols = f_df.columns
    df_features = [f_df[col].tolist() for col in f_df_cols]
    meta_features = {}
    for idx, col in enumerate(f_df_cols):
      gt_row = gt_labels[gt_labels['column_index'] == idx]
      if len(gt_row) != 1:
        #print(f"skipping {gt_row} with non-matching column qty {len(gt_row)}")
        continue
      meta_features = derive_meta_features(f_df[col])
      for k, v in meta_features.items():
        if isinstance(v, str):
          meta_features[k] = 0.0
      d_name = f.name + "_" + str(col)
      assert df_dict.get(d_name, -1) == -1, "Don't overwrite entries"
      df_dict[d_name] = {'df_path' : f, 'df_name' : f.name, 'col_features' : f_df[col].tolist(), 'label' : gt_row['label'].item(), 'meta_features' : meta_features}

46790it [28:02, 27.81it/s]


In [None]:
# for k, v in df_dict.items():
#   v['df_path'] = str(v['df_path'])
# with open(save_path, 'w', encoding='utf-8') as alt_f:
#   json.dump(df_dict, alt_f, ensure_ascii=False, indent=4)
df = pd.DataFrame(df_dict).T

In [None]:
print("Length of dataframe will be ", len(df_dict))
df = df[df['label'].notnull()]
df['col_feature_sample'] = df['col_features'].apply(get_df_sample_col, rand_seed=13, len_context=len_context, replace=True)
df[cft_list] = pd.DataFrame(df['col_feature_sample'].tolist(), index=df.index)
df['meta_features'] = df['meta_features'].apply(lambda x : x if x else {"std" : 0.0, "mean" : 0.0, "median" : 0.0, "mode" : 0.0, "max" : 0.0, "min" : 0.0, 'rolling-mean-window-4' : [0.0]})
df['meta_features'] = df['meta_features'].apply(fix_mode)
df[mfs] = df['meta_features'].apply(split_meta_features)

Length of dataframe will be  130471


ValueError: ignored

In [None]:
df['label_idx'] = df['label'].apply(lambda x : context_labels['label_set'].index(x))

In [None]:
df.head()

Unnamed: 0,df_path,col_features,label,meta_features,col_feature_sample,col_feature_sample_0,col_feature_sample_1,col_feature_sample_2,col_feature_sample_3,col_feature_sample_4,...,col_feature_sample_7,col_feature_sample_8,col_feature_sample_9,std,mean,median,mode,max,min,label_idx
MusicRecording_mikkidaniel.com_September2020_CTA.json.gz_0,/content/Train/MusicRecording_mikkidaniel.com_...,"[Heaven In The West, ...Thinkin' of You, In It...",MusicRecording/name,"{'std': 4.528292700633286, 'mean': 16.375, 'mo...","[Heaven In The West, ...Thinkin' of You, In It...",Heaven In The West,...Thinkin' of You,In It for the Ride,I Met Jesus In Texas,Polka Dots and Moonbeams,...,Medina Mules,Girl From Kentucky,Texas Plains,4.528293,16.375,16.5,18.0,28.0,8.0,82
MusicRecording_mikkidaniel.com_September2020_CTA.json.gz_2,/content/Train/MusicRecording_mikkidaniel.com_...,"[Mikki Daniel, Mikki Daniel/Doug Figgs/ Marian...",MusicArtistAT,"{'std': 9.841865617888699, 'mean': 16.08333333...","[Mikki Daniel, Mikki Daniel/Doug Figgs/ Marian...",Mikki Daniel,Mikki Daniel/Doug Figgs/ Marian Funke,Doug Figgs,Doug Figgs/Rusty Battenfield/Mariam Funke,Doug Figgs/Todd Carter/Mariam Funke,...,Doug Figgs,Doug Figgs/Rusty Battenfield/Mariam Funke,Doug Figgs/Todd Carter/Mariam Funke,9.841866,16.083333,12.0,12.0,41.0,10.0,70
Product_buttoncare.com_September2020_CTA.json.gz_9,/content/Train/Product_buttoncare.com_Septembe...,[[http://purl.org/goodrelations/v1#FederalExpr...,DeliveryMethod,"{'std': 38.376128944009885, 'mean': 175.818181...",[['http://purl.org/goodrelations/v1#FederalExp...,['http://purl.org/goodrelations/v1#FederalExpr...,['http://purl.org/goodrelations/v1#FederalExpr...,['http://purl.org/goodrelations/v1#FederalExpr...,['http://purl.org/goodrelations/v1#FederalExpr...,['http://purl.org/goodrelations/v1#FederalExpr...,...,['http://purl.org/goodrelations/v1#FederalExpr...,['http://purl.org/goodrelations/v1#FederalExpr...,['http://purl.org/goodrelations/v1#FederalExpr...,38.376129,175.818182,184.0,184.0,184.0,4.0,56
Product_buttoncare.com_September2020_CTA.json.gz_13,/content/Train/Product_buttoncare.com_Septembe...,"[[http://purl.org/goodrelations/v1#DinersClub,...",paymentAccepted,"{'std': 96.36672379273594, 'mean': 435.4545454...",[['http://purl.org/goodrelations/v1#DinersClub...,['http://purl.org/goodrelations/v1#DinersClub'...,['http://purl.org/goodrelations/v1#DinersClub'...,['http://purl.org/goodrelations/v1#DinersClub'...,['http://purl.org/goodrelations/v1#DinersClub'...,['http://purl.org/goodrelations/v1#DinersClub'...,...,['http://purl.org/goodrelations/v1#DinersClub'...,['http://purl.org/goodrelations/v1#DinersClub'...,['http://purl.org/goodrelations/v1#DinersClub'...,96.366724,435.454545,456.0,456.0,456.0,4.0,83
Product_pccables.com_September2020_CTA.json.gz_6,/content/Train/Product_pccables.com_September2...,"[72 Inch, 72 Inch, 72 Inch, 72 Inch, 120 Inch,...",Distance,"{'std': 0.8260419974130604, 'mean': 6.99938949...","[72 Inch, 120 Inch, 2 Inch, 300 Inch, 180 Inch...",72 Inch,120 Inch,2 Inch,300 Inch,180 Inch,...,36 Inch,600 Inch,48 Inch,0.826042,6.999389,7.0,7.0,11.0,6.0,73


In [None]:
df.to_csv(train_save_path)

In [None]:
df_dict = {}
for idx, f in tqdm(enumerate(val_files)):
    #print(f)
    gt_labels = gt_df_val_num[gt_df_val_num['table_name'] == f.name]
    if len(gt_labels) < 1:
      continue
    label_indices = pd.unique(gt_labels['column_index']).tolist()
    f_df = pd.read_json(f, compression='gzip', lines=True)
    f_df_cols = f_df.columns
    df_features = [f_df[col].tolist() for col in f_df_cols]
    meta_features = {}
    for idx, col in enumerate(f_df_cols):
      gt_row = gt_labels[gt_labels['column_index'] == idx]
      if len(gt_row) != 1:
        #print(f"skipping {gt_row} with non-matching column qty {len(gt_row)}")
        continue
      meta_features = derive_meta_features(f_df[col])
      for k, v in meta_features.items():
        if isinstance(v, str):
          meta_features[k] = 0.0
      d_name = f.name + "_" + str(col)
      assert df_dict.get(d_name, -1) == -1, "Don't overwrite entries"
      df_dict[d_name] = {'df_path' : f, 'df_name' : f.name, 'col_features' : f_df[col].tolist(), 'label' : gt_row['label'].item(), 'meta_features' : meta_features}

4811it [01:04, 123.91it/s]

In [None]:
print("Length of dataframe will be ", len(df_dict))
dfv = pd.DataFrame(df_dict).T
dfv = dfv[dfv['label'].notnull()]
dfv['col_feature_sample'] = dfv['col_features'].apply(get_df_sample_col, rand_seed=13, len_context=len_context, replace=True)
cft_list = [f"col_feature_sample_{i}" for i in range(len_context)]
dfv[cft_list] = pd.DataFrame(dfv['col_feature_sample'].tolist(), index=dfv.index)
dfv['meta_features'] = dfv['meta_features'].apply(lambda x : x if x else {"std" : 0.0, "mean" : 0.0, "median" : 0.0, "mode" : 0.0, "max" : 0.0, "min" : 0.0, 'rolling-mean-window-4' : [0.0]})
dfv['meta_features'] = dfv['meta_features'].apply(fix_mode)
dfv[mfs] = dfv['meta_features'].apply(split_meta_features)
dfv['label_idx'] = dfv['label'].apply(lambda x : context_labels['label_set'].index(x))
#dfv['label_idx'] = dfv['label'].apply(lambda x : numeric_labels.index(x))

In [None]:
dfv.to_csv(val_save_path)

In [None]:
df_dict = {}
for idx, f in tqdm(enumerate(test_files)):
    #print(f)
    gt_labels = gt_df_test_num[gt_df_test_num['table_name'] == f.name]
    if len(gt_labels) < 1:
      continue
    label_indices = pd.unique(gt_labels['column_index']).tolist()
    f_df = pd.read_json(f, compression='gzip', lines=True)
    f_df_cols = f_df.columns
    df_features = [f_df[col].tolist() for col in f_df_cols]
    meta_features = {}
    for idx, col in enumerate(f_df_cols):
      gt_row = gt_labels[gt_labels['column_index'] == idx]
      if len(gt_row) != 1:
        #print(f"skipping {gt_row} with non-matching column qty {len(gt_row)}")
        continue
      meta_features = derive_meta_features(f_df[col])
      for k, v in meta_features.items():
        if isinstance(v, str):
          meta_features[k] = 0.0
      d_name = f.name + "_" + str(col)
      assert df_dict.get(d_name, -1) == -1, "Don't overwrite entries"
      df_dict[d_name] = {'df_path' : f, 'df_name' : f.name, 'col_features' : f_df[col].tolist(), 'label' : gt_row['label'].item(), 'meta_features' : meta_features}

NameError: ignored

In [None]:
print("Length of dataframe will be ", len(df_dict))
dft = pd.DataFrame(df_dict).T
dft = dft[dft['label'].notnull()]
dft['col_feature_sample'] = dft['col_features'].apply(get_df_sample_col, rand_seed=13, len_context=len_context, replace=True)
cft_list = [f"col_feature_sample_{i}" for i in range(len_context)]
dft[cft_list] = pd.DataFrame(dft['col_feature_sample'].tolist(), index=dft.index)
dft['meta_features'] = dft['meta_features'].apply(lambda x : x if x else {"std" : 0.0, "mean" : 0.0, "median" : 0.0, "mode" : 0.0, "max" : 0.0, "min" : 0.0, 'rolling-mean-window-4' : [0.0]})
dft['meta_features'] = dft['meta_features'].apply(fix_mode)
dft[mfs] = dft['meta_features'].apply(split_meta_features)
#dft['label_idx'] = dft['label'].apply(lambda x : numeric_labels.index(x))
dft['label_idx'] = dft['label'].apply(lambda x : context_labels['label_set'].index(x))

Length of dataframe will be  3778


In [None]:
dft.to_csv(test_save_path)

In [None]:
df = pd.read_csv(train_save_path)
dft = pd.read_csv(test_save_path)

In [None]:
y_train = df['label_idx'].to_frame()
X_train = df[cft_list + mfs]
# y_val = dfv['label_idx'].to_frame()
# X_val = dfv[cft_list + mfs]
for col in mfs:
  X_train[col] = MinMaxScaler().fit_transform(np.array(X_train[col]).reshape(-1,1))
  #X_val[col] = MinMaxScaler().fit_transform(np.array(X_val[col]).reshape(-1,1))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_train[col] = MinMaxScaler().fit_transform(np.array(X_train[col]).reshape(-1,1))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_train[col] = MinMaxScaler().fit_transform(np.array(X_train[col]).reshape(-1,1))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_train[col] = MinMaxScaler().fit_transfo

In [None]:
y_test = dft['label_idx'].to_frame()
X_test = dft[cft_list + mfs]
for col in mfs:
  X_test[col] = MinMaxScaler().fit_transform(np.array(X_test[col]).reshape(-1,1))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_test[col] = MinMaxScaler().fit_transform(np.array(X_test[col]).reshape(-1,1))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_test[col] = MinMaxScaler().fit_transform(np.array(X_test[col]).reshape(-1,1))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_test[col] = MinMaxScaler().fit_transform(np

In [None]:
X_train[cft_list] = X_train[cft_list].astype(str)
X_test[cft_list] = X_test[cft_list].astype(str)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_train[cft_list] = X_train[cft_list].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_test[cft_list] = X_test[cft_list].astype(str)


In [None]:
X_train.head()

Unnamed: 0,col_feature_sample_0,col_feature_sample_1,col_feature_sample_2,col_feature_sample_3,col_feature_sample_4,col_feature_sample_5,col_feature_sample_6,col_feature_sample_7,col_feature_sample_8,col_feature_sample_9,std,mean,median,mode,max,min
0,Heaven In The West,...Thinkin' of You,In It for the Ride,I Met Jesus In Texas,Polka Dots and Moonbeams,One More Ride,Viejo Amigo,Medina Mules,Girl From Kentucky,Texas Plains,1.517848e-15,1.0247170000000001e-17,1.032214e-17,1.1282850000000002e-17,1.7497270000000002e-17,5.0146e-18
1,Mikki Daniel,Mikki Daniel/Doug Figgs/ Marian Funke,Doug Figgs,Doug Figgs/Rusty Battenfield/Mariam Funke,Doug Figgs/Todd Carter/Mariam Funke,Mikki Daniel,Mikki Daniel/Doug Figgs/ Marian Funke,Doug Figgs,Doug Figgs/Rusty Battenfield/Mariam Funke,Doug Figgs/Todd Carter/Mariam Funke,3.298916e-15,1.0064650000000001e-17,7.507007e-18,7.5219e-18,2.562101e-17,6.26825e-18
2,['http://purl.org/goodrelations/v1#FederalExpr...,['http://purl.org/goodrelations/v1#FederalExpr...,['http://purl.org/goodrelations/v1#FederalExpr...,['http://purl.org/goodrelations/v1#FederalExpr...,['http://purl.org/goodrelations/v1#FederalExpr...,['http://purl.org/goodrelations/v1#FederalExpr...,['http://purl.org/goodrelations/v1#FederalExpr...,['http://purl.org/goodrelations/v1#FederalExpr...,['http://purl.org/goodrelations/v1#FederalExpr...,['http://purl.org/goodrelations/v1#FederalExpr...,1.286338e-14,1.100237e-16,1.151074e-16,1.153358e-16,1.149821e-16,2.5073e-18
3,['http://purl.org/goodrelations/v1#DinersClub'...,['http://purl.org/goodrelations/v1#DinersClub'...,['http://purl.org/goodrelations/v1#DinersClub'...,['http://purl.org/goodrelations/v1#DinersClub'...,['http://purl.org/goodrelations/v1#DinersClub'...,['http://purl.org/goodrelations/v1#DinersClub'...,['http://purl.org/goodrelations/v1#DinersClub'...,['http://purl.org/goodrelations/v1#DinersClub'...,['http://purl.org/goodrelations/v1#DinersClub'...,['http://purl.org/goodrelations/v1#DinersClub'...,3.230137e-14,2.724993e-16,2.852663e-16,2.858322e-16,2.849556e-16,2.5073e-18
4,72 Inch,120 Inch,2 Inch,300 Inch,180 Inch,900 Inch,40 Inch,36 Inch,600 Inch,48 Inch,2.768828e-16,4.380087e-18,4.379088e-18,4.387775e-18,6.873929e-18,3.76095e-18


In [None]:
len(X_train)

130471

In [None]:

# X_val[cft_list] = X_val[cft_list].astype(str)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_train[cft_list] = X_train[cft_list].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_val[cft_list] = X_val[cft_list].astype(str)


### Fit XGB / Catboost

In [None]:


# ct = ColumnTransformer([
#         ('somename', StandardScaler(), mfs)
#     ], remainder='passthrough')
# print("dtypes before: ")
# for col in X_train.columns:
#   print(col, X_train[col].dtype)
# X_train_pt = ct.fit_transform(X_train)
# print("dtypes after: ")
# print([X_train_pt[i].dtype for i in range(X_train_pt.shape[1])])
# X_val_pt = ct.transform(X_val)

dtypes before: 
col_feature_sample_0 object
col_feature_sample_1 object
col_feature_sample_2 object
col_feature_sample_3 object
col_feature_sample_4 object
col_feature_sample_5 object
col_feature_sample_6 object
col_feature_sample_7 object
col_feature_sample_8 object
col_feature_sample_9 object
std float64
mean float64
median float64
mode float64
max float64
min float64
dtypes after: 
[dtype('O'), dtype('O'), dtype('O'), dtype('O'), dtype('O'), dtype('O'), dtype('O'), dtype('O'), dtype('O'), dtype('O'), dtype('O'), dtype('O'), dtype('O'), dtype('O'), dtype('O'), dtype('O')]


In [None]:
# for i in range(10):
#   X_train_pt[i] = X_train_pt[i].astype('|S25')
#   X_val_pt[i] = X_val_pt[i].astype('|S25')
X_train_slice = X_train[:20000]
y_train_slice = y_train[:20000]

In [None]:
from catboost import CatBoostClassifier
from sklearn.metrics import accuracy_score
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler
# Initialize CatBoostClassifier
#learn: A list of metric values calculated for the learning dataset.
cat_features = [0,1,2,3,4,5,6,7,8,9]

model = CatBoostClassifier(iterations=1000,
                           depth=8, 
                           used_ram_limit="10gb", 
                           max_ctr_complexity=5,
                           task_type="GPU",
                           learning_rate=0.2,
                           devices='0:1')

In [None]:
# Fit model
import sys

model.fit(X_train, y_train, cat_features)

model.save_model(catboost_path)

0:	learn: 3.6967643	total: 951ms	remaining: 15m 50s
1:	learn: 3.5482181	total: 1.79s	remaining: 14m 53s
2:	learn: 3.6123178	total: 2.68s	remaining: 14m 51s
3:	learn: 3.0610325	total: 3.78s	remaining: 15m 40s
4:	learn: 3.4674923	total: 4.83s	remaining: 16m
5:	learn: 3.0114103	total: 5.67s	remaining: 15m 39s
6:	learn: 2.8781645	total: 6.35s	remaining: 15m
7:	learn: 2.8241139	total: 6.98s	remaining: 14m 25s
8:	learn: 2.7518230	total: 7.68s	remaining: 14m 5s
9:	learn: 2.6224597	total: 8.33s	remaining: 13m 44s
10:	learn: 2.5375694	total: 9.02s	remaining: 13m 31s
11:	learn: 2.5224940	total: 9.69s	remaining: 13m 17s
12:	learn: 2.4610634	total: 10.4s	remaining: 13m 6s
13:	learn: 2.4158712	total: 11s	remaining: 12m 55s
14:	learn: 2.3907727	total: 11.7s	remaining: 12m 45s
15:	learn: 2.3530938	total: 12.3s	remaining: 12m 38s
16:	learn: 2.3339279	total: 13s	remaining: 12m 29s
17:	learn: 2.3178920	total: 13.6s	remaining: 12m 23s
18:	learn: 2.2985132	total: 14.3s	remaining: 12m 19s
19:	learn: 2.2882

In [None]:

# load pretrained model
model = CatBoostClassifier()
model.load_model(catboost_path)
# Get predicted classes
y_pred = model.predict(X_test)
# Get predicted probabilities for each class
#preds_proba = model.predict_proba(X_test)
# Get predicted RawFormulaVal
# preds_raw = model.predict(eval_data, prediction_type='RawFormulaVal')
#acc = accuracy_score(y_test, y_pred)
#print(acc)
#classif_rept = classification_report(y_test, y_pred, target_names=cll)
#print(classif_rept)
f1s = f1_score(y_test, y_pred, average=None)
label_to_f1 = {k : v for k, v in zip(cll, f1s)}
# with open(catboost_f1s, 'w', encoding='utf-8') as alt_f:
#   json.dump(label_to_f1, alt_f, ensure_ascii=False, indent=4)
label_to_f1

{'currency': 0.30617283950617286,
 'Product/name': 0.24155578300921188,
 'price': 0.0,
 'DateTime': 0.3968072976054732,
 'Date': 0.09420289855072464,
 'Number': 0.0,
 'Integer': 0.2541743970315399,
 'Hotel/name': 0.0,
 'Brand': 0.0,
 'Text': 0.0,
 'IdentifierAT': 0.0,
 'ItemList': 0.0,
 'Recipe/name': 0.0,
 'QuantitativeValue': 0.0,
 'Event/name': 0.045627376425855515,
 'Duration': 0.0,
 'telephone': 0.0,
 'EventStatusType': 0.038461538461538464,
 'PostalAddress': 0.044444444444444446,
 'Place': 0.0,
 'EventAttendanceModeEnumeration': 0.0,
 'Organization': 0.0,
 'priceRange': 0.02515723270440252,
 'Country': 0.0,
 'Person': 0.0,
 'OfferItemCondition': 0.0,
 'ItemAvailability': 0.0,
 'email': 0.0,
 'LocalBusiness/name': 0.0}

In [None]:
type(classif_rept)

str

In [None]:
dft['preds'] = y_pred 
dft.to_csv(test_save_path, index=False)

In [None]:
# import xgboost as xgb


# mod = xgb.XGBRegressor(
#     gamma=1,                 
#     learning_rate=0.01,
#     max_depth=3,
#     n_estimators=10000,                                                                    
#     subsample=0.8,
#     random_state=34
# ) 

# mod.fit(X_train, y_train)
# predictions = mod.predict(X_val)
# rmse = sqrt(mean_squared_error(y_val, predictions))
# print("score: {0:,.0f}".format(rmse))

ValueError: ignored