In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
import os
import sys

In [4]:
sys.path.append(os.path.abspath(os.path.join("..")))

In [5]:
import numpy as np
import pandas as pd
from utils.constants import question_ids_map
from utils.experiment import Experiment
from utils.questions import (
    construct_single_question_for_ai,
)


  from tqdm.autonotebook import tqdm


In [6]:
bucket_name = os.getenv("BUCKET_NAME", "")
index_name = os.getenv("INDEX_NAME", "")

In [7]:
print(f"Bucket name: {bucket_name}")
print(f"Index name: {index_name}")

Bucket name: gaaprt-reader-dev
Index name: development-gaaprt-reader-openai


Retrieve questions


In [8]:
questions_df = pd.read_excel("../data/asc_842/grt_ai_reader_questions_v1.xlsx")
questions_df = questions_df.drop(["encoding"], axis=1)
questions_df = questions_df.replace({np.nan: None})

Retrieve answers file


In [9]:
answers_df = pd.read_excel("../data/asc_842/lease_agreements_info/answers.xlsx")

Get questions to send to the AI


In [10]:
question_id = question_ids_map["end_date"]
question_df_filtered = questions_df[questions_df["id"] == question_id]
question_as_json = question_df_filtered.to_dict(orient="records")[0]
question_for_ai = construct_single_question_for_ai(question_as_json)

## Experiment 1:

- gpt-3.5-turbo
- text-embedding-ada-002
- unstructured outputs


In [None]:
experiment = Experiment(
    answers_df=answers_df,
    question_id=question_id,
    question_for_ai=question_for_ai,
    model="gpt-3.5-turbo",
    embedding_model="text-embedding-ada-002",
    use_structured_outputs=False,
    index_name="eafit-text-embedding-ada-002",
    date_question=True,
    csv_results_filename="results/end_date_experiment1.csv",
)
experiment.run()

Answer: 2022-01-25
Real answer: 2022-01-25
CORRECT
----------------------------------------------------
Answer: 2024-03-01
Real answer: 2024-03-01
CORRECT
----------------------------------------------------
Answer: 2010-10-01
Real answer: 2010-10-01
CORRECT
----------------------------------------------------
Answer: 2005-04-01
Real answer: 1995-04-01
INCORRECT
----------------------------------------------------
Answer: 2023-01-01
Real answer: 2021-08-01
INCORRECT
----------------------------------------------------
Answer: 2010-01-31
Real answer: 2010-01-31
CORRECT
----------------------------------------------------
Answer: 2018-02-01
Real answer: 2018-02-01
CORRECT
----------------------------------------------------
Answer: 2024-03-01
Real answer: 2024-03-01
CORRECT
----------------------------------------------------
Answer: null
Real answer: 2015-09-01
INCORRECT
----------------------------------------------------
Answer: 2017-10-01
Real answer: 2017-10-01
CORRECT
-------------

## Experiment 2:

- gpt-3.5-turbo
- text-embedding-3-small
- unstructured outputs


In [None]:
experiment = Experiment(
    answers_df=answers_df,
    question_id=question_id,
    question_for_ai=question_for_ai,
    model="gpt-3.5-turbo",
    embedding_model="text-embedding-3-small",
    use_structured_outputs=False,
    index_name="eafit-text-embedding-3-small",
    date_question=True,
    csv_results_filename="results/end_date_experiment2.csv",
)
experiment.run()

Answer: 2022-01-25
Real answer: 2022-01-25
CORRECT
----------------------------------------------------
Answer: 2024-04-01
Real answer: 2024-03-01
INCORRECT
----------------------------------------------------
Answer: 2010-10-01
Real answer: 2010-10-01
CORRECT
----------------------------------------------------
Answer: 2007-05-01
Real answer: 1995-04-01
INCORRECT
----------------------------------------------------
Answer: unknown
Real answer: 2021-08-01
INCORRECT
----------------------------------------------------
Answer: 2010-01-31
Real answer: 2010-01-31
CORRECT
----------------------------------------------------
Answer: 2018-02-01
Real answer: 2018-02-01
CORRECT
----------------------------------------------------
Answer: N/A
Real answer: 2024-03-01
INCORRECT
----------------------------------------------------
Answer: 2015-09-01
Real answer: 2015-09-01
CORRECT
----------------------------------------------------
Answer: 2017-10-01
Real answer: 2017-10-01
CORRECT
---------------

## Experiment 3:

- gpt-3.5-turbo
- text-embedding-3-large
- unstructured outputs


In [None]:
experiment = Experiment(
    answers_df=answers_df,
    question_id=question_id,
    question_for_ai=question_for_ai,
    model="gpt-3.5-turbo",
    embedding_model="text-embedding-3-large",
    use_structured_outputs=False,
    index_name="eafit-text-embedding-3-large",
    date_question=True,
    csv_results_filename="results/end_date_experiment3.csv",
)
experiment.run()

Answer: 2023-07-30
Real answer: 2022-01-25
INCORRECT
----------------------------------------------------
Answer: 2024-04-01
Real answer: 2024-03-01
INCORRECT
----------------------------------------------------
Answer: 2010-10-01
Real answer: 2010-10-01
CORRECT
----------------------------------------------------
Answer: 2007-05-01
Real answer: 1995-04-01
INCORRECT
----------------------------------------------------
Answer: 2023-01-01
Real answer: 2021-08-01
INCORRECT
----------------------------------------------------
Answer: 2010-01-31
Real answer: 2010-01-31
CORRECT
----------------------------------------------------
Answer: 2018-02-01
Real answer: 2018-02-01
CORRECT
----------------------------------------------------
Answer: 2027-02-28
Real answer: 2024-03-01
INCORRECT
----------------------------------------------------
Answer: 2015-09-01
Real answer: 2015-09-01
CORRECT
----------------------------------------------------
Answer: 2017-10-01
Real answer: 2017-10-01
CORRECT
---

## Experiment 4:

- gpt-3.5-turbo
- text-embedding-ada-002
- structured outputs


In [None]:
experiment = Experiment(
    answers_df=answers_df,
    question_id=question_id,
    question_for_ai=question_for_ai,
    model="gpt-3.5-turbo",
    embedding_model="text-embedding-ada-002",
    use_structured_outputs=True,
    index_name="eafit-text-embedding-ada-002",
    date_question=True,
    csv_results_filename="results/end_date_experiment4.csv",
)
experiment.run()

Answer: 2022-01-25
Real answer: 2022-01-25
CORRECT
----------------------------------------------------
Error processing lease002: '25'
Answer: 2010-10-01
Real answer: 2010-10-01
CORRECT
----------------------------------------------------
Answer: 2005-04-01
Real answer: 1995-04-01
INCORRECT
----------------------------------------------------
Answer: 2023-01-01
Real answer: 2021-08-01
INCORRECT
----------------------------------------------------
Answer: 2010-01-31
Real answer: 2010-01-31
CORRECT
----------------------------------------------------
Answer: 2018-02-01
Real answer: 2018-02-01
CORRECT
----------------------------------------------------
Answer: 2024-03-01
Real answer: 2024-03-01
CORRECT
----------------------------------------------------
Answer: null
Real answer: 2015-09-01
INCORRECT
----------------------------------------------------
Answer: 2017-10-01
Real answer: 2017-10-01
CORRECT
----------------------------------------------------
Answer: 2022-10-01
Real answer: 

## Experiment 5:

- gpt-3.5-turbo
- text-embedding-3-small
- structured outputs


In [None]:
experiment = Experiment(
    answers_df=answers_df,
    question_id=question_id,
    question_for_ai=question_for_ai,
    model="gpt-3.5-turbo",
    embedding_model="text-embedding-3-small",
    use_structured_outputs=True,
    index_name="eafit-text-embedding-3-small",
    date_question=True,
    csv_results_filename="results/end_date_experiment5.csv",
)
experiment.run()

Answer: 2022-01-25
Real answer: 2022-01-25
CORRECT
----------------------------------------------------
No valid JSON found in the string.
No answer found
Answer: None
Real answer: 2024-03-01
INCORRECT
----------------------------------------------------
Answer: 2010-10-01
Real answer: 2010-10-01
CORRECT
----------------------------------------------------
Answer: 2007-05-01
Real answer: 1995-04-01
INCORRECT
----------------------------------------------------
Answer: 2023-01-01
Real answer: 2021-08-01
INCORRECT
----------------------------------------------------
Answer: 2010-01-31
Real answer: 2010-01-31
CORRECT
----------------------------------------------------
Answer: 2018-02-01
Real answer: 2018-02-01
CORRECT
----------------------------------------------------
Answer: N/A
Real answer: 2024-03-01
INCORRECT
----------------------------------------------------
Answer: 2015-09-01
Real answer: 2015-09-01
CORRECT
----------------------------------------------------
Answer: 2017-10-01

## Experiment 6:

- gpt-3.5-turbo
- text-embedding-3-large
- structured outputs


In [None]:
experiment = Experiment(
    answers_df=answers_df,
    question_id=question_id,
    question_for_ai=question_for_ai,
    model="gpt-3.5-turbo",
    embedding_model="text-embedding-3-large",
    use_structured_outputs=True,
    index_name="eafit-text-embedding-3-large",
    date_question=True,
    csv_results_filename="results/end_date_experiment6.csv",
)
experiment.run()

Answer: 2023-07-30
Real answer: 2022-01-25
INCORRECT
----------------------------------------------------
Answer: 2024-04-01
Real answer: 2024-03-01
INCORRECT
----------------------------------------------------
Answer: 2010-10-01
Real answer: 2010-10-01
CORRECT
----------------------------------------------------
Answer: 2007-05-01
Real answer: 1995-04-01
INCORRECT
----------------------------------------------------
Answer: 2023-01-01
Real answer: 2021-08-01
INCORRECT
----------------------------------------------------
Answer: 2010-01-31
Real answer: 2010-01-31
CORRECT
----------------------------------------------------
Answer: 2018-02-01
Real answer: 2018-02-01
CORRECT
----------------------------------------------------
Answer: 2027-02-28
Real answer: 2024-03-01
INCORRECT
----------------------------------------------------
Answer: 2015-09-01
Real answer: 2015-09-01
CORRECT
----------------------------------------------------
Answer: 2017-10-01
Real answer: 2017-10-01
CORRECT
---

## Experiment 7:

- gpt-4o-mini
- text-embedding-3-small
- unstructured outputs


In [None]:
experiment = Experiment(
    answers_df=answers_df,
    question_id=question_id,
    question_for_ai=question_for_ai,
    model="gpt-4o-mini",
    embedding_model="text-embedding-3-small",
    use_structured_outputs=False,
    index_name="eafit-text-embedding-3-small",
    date_question=True,
    csv_results_filename="results/end_date_experiment7.csv",
)
experiment.run()

Answer: 2022-01-25
Real answer: 2022-01-25
CORRECT
----------------------------------------------------
Answer: 2024-04-01
Real answer: 2024-03-01
INCORRECT
----------------------------------------------------
Answer: 2010-10-01
Real answer: 2010-10-01
CORRECT
----------------------------------------------------
Answer: 2005-04-01
Real answer: 1995-04-01
INCORRECT
----------------------------------------------------
Answer: 
Real answer: 2021-08-01
INCORRECT
----------------------------------------------------
Answer: 2010-01-31
Real answer: 2010-01-31
CORRECT
----------------------------------------------------
Answer: 2018-02-01
Real answer: 2018-02-01
CORRECT
----------------------------------------------------
Answer: 
Real answer: 2024-03-01
INCORRECT
----------------------------------------------------
Answer: 2015-09-01
Real answer: 2015-09-01
CORRECT
----------------------------------------------------
Answer: 2017-10-01
Real answer: 2017-10-01
CORRECT
-------------------------

## Experiment 8:

- gpt-4o-mini
- text-embedding-3-large
- unstructured outputs


In [None]:
experiment = Experiment(
    answers_df=answers_df,
    question_id=question_id,
    question_for_ai=question_for_ai,
    model="gpt-4o-mini",
    embedding_model="text-embedding-3-large",
    use_structured_outputs=False,
    index_name="eafit-text-embedding-3-large",
    date_question=True,
    csv_results_filename="results/end_date_experiment8.csv",
)
experiment.run()

Answer: 
Real answer: 2022-01-25
INCORRECT
----------------------------------------------------
Answer: 2024-04-01
Real answer: 2024-03-01
INCORRECT
----------------------------------------------------
Answer: 2010-10-01
Real answer: 2010-10-01
CORRECT
----------------------------------------------------
Answer: 1995-04-01
Real answer: 1995-04-01
CORRECT
----------------------------------------------------
Answer: 
Real answer: 2021-08-01
INCORRECT
----------------------------------------------------
Answer: 2010-01-31
Real answer: 2010-01-31
CORRECT
----------------------------------------------------
Answer: 2018-02-01
Real answer: 2018-02-01
CORRECT
----------------------------------------------------
Answer: 
Real answer: 2024-03-01
INCORRECT
----------------------------------------------------
Answer: 2015-09-01
Real answer: 2015-09-01
CORRECT
----------------------------------------------------
Answer: 2017-10-01
Real answer: 2017-10-01
CORRECT
-----------------------------------

## Experiment 9:

- gpt-4o-mini
- text-embedding-3-small
- structured outputs


In [None]:
experiment = Experiment(
    answers_df=answers_df,
    question_id=question_id,
    question_for_ai=question_for_ai,
    model="gpt-4o-mini",
    embedding_model="text-embedding-3-small",
    use_structured_outputs=True,
    index_name="eafit-text-embedding-3-small",
    date_question=True,
    csv_results_filename="results/end_date_experiment9.csv",
)
experiment.run()

Answer: 2022-01-25
Real answer: 2022-01-25
CORRECT
----------------------------------------------------
Answer: 2024-04-01
Real answer: 2024-03-01
INCORRECT
----------------------------------------------------
Answer: 2010-10-01
Real answer: 2010-10-01
CORRECT
----------------------------------------------------
Answer: 2005-04-01
Real answer: 1995-04-01
INCORRECT
----------------------------------------------------
Answer: YYYY-MM-DD
Real answer: 2021-08-01
INCORRECT
----------------------------------------------------
Answer: 2010-01-31
Real answer: 2010-01-31
CORRECT
----------------------------------------------------
Answer: 2018-02-01
Real answer: 2018-02-01
CORRECT
----------------------------------------------------
Answer: None
Real answer: 2024-03-01
INCORRECT
----------------------------------------------------
Answer: 2015-09-01
Real answer: 2015-09-01
CORRECT
----------------------------------------------------
Answer: 2017-10-01
Real answer: 2017-10-01
CORRECT
-----------

## Experiment 10:

- gpt-4o-mini
- text-embedding-3-large
- structured outputs


In [None]:
experiment = Experiment(
    answers_df=answers_df,
    question_id=question_id,
    question_for_ai=question_for_ai,
    model="gpt-4o-mini",
    embedding_model="text-embedding-3-large",
    use_structured_outputs=True,
    index_name="eafit-text-embedding-3-large",
    date_question=True,
    csv_results_filename="results/end_date_experiment10.csv",
)
experiment.run()

Answer: YYYY-MM-DD
Real answer: 2022-01-25
INCORRECT
----------------------------------------------------
Answer: 2024-03-01
Real answer: 2024-03-01
CORRECT
----------------------------------------------------
Answer: 2010-10-01
Real answer: 2010-10-01
CORRECT
----------------------------------------------------
Answer: 1995-04-01
Real answer: 1995-04-01
CORRECT
----------------------------------------------------
Answer: YYYY-MM-DD
Real answer: 2021-08-01
INCORRECT
----------------------------------------------------
Answer: 2010-01-31
Real answer: 2010-01-31
CORRECT
----------------------------------------------------
Answer: 2018-02-01
Real answer: 2018-02-01
CORRECT
----------------------------------------------------
Answer: 2024-03-01
Real answer: 2024-03-01
CORRECT
----------------------------------------------------
Answer: 2015-09-01
Real answer: 2015-09-01
CORRECT
----------------------------------------------------
Answer: 2017-10-01
Real answer: 2017-10-01
CORRECT
---------

## Experiment 11:

- gpt-4o-2024-08-06
- text-embedding-3-small
- structured outputs


In [None]:
experiment = Experiment(
    answers_df=answers_df,
    question_id=question_id,
    question_for_ai=question_for_ai,
    model="gpt-4o-2024-08-06",
    embedding_model="text-embedding-3-small",
    use_structured_outputs=True,
    index_name="eafit-text-embedding-3-small",
    date_question=True,
    csv_results_filename="results/end_date_experiment11.csv",
)
experiment.run()

Answer: 2022-01-25
Real answer: 2022-01-25
CORRECT
----------------------------------------------------
Answer: 2024-04-01
Real answer: 2024-03-01
INCORRECT
----------------------------------------------------
Answer: 2010-10-01
Real answer: 2010-10-01
CORRECT
----------------------------------------------------
Answer: 2005-04-01
Real answer: 1995-04-01
INCORRECT
----------------------------------------------------
Answer: None
Real answer: 2021-08-01
INCORRECT
----------------------------------------------------
Answer: 2010-01-31
Real answer: 2010-01-31
CORRECT
----------------------------------------------------
Answer: 2018-02-01
Real answer: 2018-02-01
CORRECT
----------------------------------------------------
Answer: None
Real answer: 2024-03-01
INCORRECT
----------------------------------------------------
Answer: 2015-09-01
Real answer: 2015-09-01
CORRECT
----------------------------------------------------
Answer: 2017-10-01
Real answer: 2017-10-01
CORRECT
-----------------

## Experiment 12:

- gpt-4o-2024-08-06
- text-embedding-3-large
- structured outputs


In [None]:
experiment = Experiment(
    answers_df=answers_df,
    question_id=question_id,
    question_for_ai=question_for_ai,
    model="gpt-4o-2024-08-06",
    embedding_model="text-embedding-3-large",
    use_structured_outputs=True,
    index_name="eafit-text-embedding-3-large",
    date_question=True,
    csv_results_filename="results/end_date_experiment12.csv",
)
experiment.run()

Answer: None
Real answer: 2022-01-25
INCORRECT
----------------------------------------------------
Answer: 2024-03-01
Real answer: 2024-03-01
CORRECT
----------------------------------------------------
Answer: 2010-10-01
Real answer: 2010-10-01
CORRECT
----------------------------------------------------
Answer: 1995-04-01
Real answer: 1995-04-01
CORRECT
----------------------------------------------------
Answer: 2023-10-01
Real answer: 2021-08-01
INCORRECT
----------------------------------------------------
Answer: 2010-01-31
Real answer: 2010-01-31
CORRECT
----------------------------------------------------
Answer: 2018-02-01
Real answer: 2018-02-01
CORRECT
----------------------------------------------------
Answer: None
Real answer: 2024-03-01
INCORRECT
----------------------------------------------------
Answer: 2015-09-01
Real answer: 2015-09-01
CORRECT
----------------------------------------------------
Answer: 2017-10-01
Real answer: 2017-10-01
CORRECT
-------------------