In [6]:
from src.db.database import session_scope
from src.db import models

with session_scope() as session:
    res = session.query(models.Firms.sector).distinct().all()
    res = [r[0] for r in res]

res

['Electric Utilities',
 'Communications Equipment',
 'Life Sciences Tools & Services',
 'Health Care REITs',
 'Electronic Equipment & Instruments',
 'Application Software',
 'Asset Management & Custody Banks',
 'IT Consulting & Other Services',
 'Casinos & Gaming',
 'Health Care Distributors',
 'Air Freight & Logistics',
 'Automobile Manufacturers',
 'Diversified Banks',
 'Specialty Chemicals',
 'Publishing',
 'Integrated Telecommunication Services',
 'Packaged Foods & Meats',
 'Construction Machinery & Heavy Transportation Equipment',
 'Consumer Finance',
 'Passenger Airlines',
 'Financial Exchanges & Data',
 'Health Care Equipment',
 'Oil & Gas Exploration & Production',
 'Restaurants',
 'Life & Health Insurance',
 'Industrial Machinery & Supplies & Components',
 'Insurance Brokers',
 'Homebuilding',
 'Heavy Electrical Equipment',
 'Biotechnology',
 'Rail Transportation',
 'Hotel & Resort REITs',
 'Multi-Utilities',
 'Oil & Gas Refining & Marketing',
 'Tobacco',
 'Systems Software',


# Llama-Index Text-To-SQL Retrieval Agent
### Thoughts:
- Too inconsistent in its performance
- Easily makes up facts in the absence of results
- Isn't really able to grasp the full context of the data structure and meaning
- Underlying functionality difficult to modify, particularly the prompt template for the text-to-sql process prior to response synthesis.

In [7]:
import os
from dotenv import load_dotenv
from IPython.display import Markdown, display
import pandas as pd

from llama_index.core import SQLDatabase
from llama_index.llms.openai import OpenAI
from llama_index.core.indices.struct_store.sql_query import (
    SQLTableRetrieverQueryEngine,
)
from llama_index.core.objects import (
    SQLTableNodeMapping,
    ObjectIndex,
    SQLTableSchema,
)
from llama_index.core import VectorStoreIndex, PromptTemplate

from src.db.database import engine
from src.db import models


load_dotenv()


OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")


llm = OpenAI(temperature=0.1, model="gpt-4o-mini", api_key=OPENAI_API_KEY)

sql_database = SQLDatabase(engine)

table_node_mapping = SQLTableNodeMapping(sql_database)

table_schema_objs = [
    (SQLTableSchema(table_name=table.__tablename__, context_str=table.__context_str__)) 
    for table in models.__dict__.values() if hasattr(table, '__tablename__')
]

obj_index = ObjectIndex.from_objects(
    table_schema_objs,
    table_node_mapping,
    VectorStoreIndex,
)

response_synthesis_prompt_str = (
    "Given an input question, synthesize a response from the query results. \
    You must ensure your response is completely factual.\n"
    "<query>{query_str}</query>\n"
    "<sql>{sql_query}</sql>\n"
    "<sql response>SQL Response: {context_str}</sql response>\n"
    "Response: "
)
response_synthesis_prompt = PromptTemplate(
    response_synthesis_prompt_str,
)

query_engine = SQLTableRetrieverQueryEngine(
    sql_database, obj_index.as_retriever(similarity_top_k=1),
    response_synthesis_prompt=response_synthesis_prompt,
)

# query = "What are the fields in the meetings table and what do they represent contextually?"
# query = "Using just your provided system messaging and without using SQL, \
#     What are the fields in the meetings table and what do they represent contextually?"
# query = "What is the name of the firm that has the most meetings and how many meets do they have?"
# query = "Can you show me the first 5 rows of meetings?"
query = "Fetch the first 5 meetings and their content which have a firm attended that are in the IT Consulting & Other Services sector."
response = query_engine.query(query)

print("SQL Query:")
print("```\n" + response.metadata["sql_query"] + "\n```")
print("Response:")
display(Markdown(f"<b>{response}</b>"))
if "result" in response.metadata:
    display(pd.DataFrame(response.metadata["result"], columns=response.metadata["col_keys"]))

SQL Query:
```
SELECT m.meeting_id, m.title, m.content
FROM meetings m
JOIN firms f ON m.firm_attended_id = f.firm_id
WHERE f.sector = 'IT Consulting & Other Services'
ORDER BY m.date
LIMIT 5;
```
Response:


<b>The first 5 meetings attended by firms in the IT Consulting & Other Services sector are as follows:
1. Meeting with Accenture: Discussed potential synergies, tech innovations, data analytics, regulatory changes, and improved communication channels.
2. Call with Cognizant: Discussed collaboration opportunities, growth in digital services, key sectors for investment, and regulatory challenges.
3. Call with Accenture: Discussed Verizon's market performance, investment in 5G technology, competition, customer retention strategies, and potential for M&A.
4. Call with Cognizant: Discussed Incyte's pipeline developments, GE HealthCare's market position, Entergy's regulatory challenges, and Gilead Sciences' acquisitions.
5. Email with Cognizant: Discussion on potential collaborations with IQVIA, Yum! Brands, Host Hotels & Resorts, and Cummins.</b>

Unnamed: 0,meeting_id,title,content
0,b1df9bfc-febd-44d2-9cf4-ae5e905953d5,Meeting with Accenture,- Discussed potential synergies between our fi...
1,93ee4cf5-ca97-48ce-ac42-e9b01e83c605,Call with Cognizant,- Discussed potential collaboration opportunit...
2,6539b388-0b71-4ed4-b792-a3088d7496c6,Call with Accenture,- Discussed Verizon's recent market performanc...
3,18d357db-b908-4360-8054-3c16bccbba8f,Call with Cognizant,- Discussed Incyte's recent pipeline developme...
4,8cb7004e-07f8-4180-b1cd-acf71a3a6a2b,Email with Cognizant,Subject: Discussion on Potential Collaboration...


# Custom Simplified Implementation
- Much slower
- Has chain of thought reasoning with verbosity
- Still has issues constructing queries
- Need to consider how the information is presented back to the User in a memory-friendly way
    - Can return just beam_ids as part of the retrieval?
        - This can be added to the user's 'meetings in-focus' view?
    - Can return as markdown (BIG CONTEXT ISSUE)

In [8]:
from src.db.database import session_scope
from src.rag.sql_retriever import SQLAgent


agent = SQLAgent(llm, "src/db/models.py", verbose=True)

# query = "I need all meetings between 2022-01-01 and 2023-01-01 where the firms that attended are in the Energy sector."
query = "Return the beam_ids of all meetings between 2022-01-01 and 2023-01-01 where the firms that attended are in the IT Consulting & Other Services sector."

with session_scope() as session:
    response = agent.complete(session, query)

response_md = response.to_markdown(index=True)

print(response_md)

CHAIN OF THOUGHTS:
Thoughts: I need to retrieve the beam_ids from the meetings table where the date of the meeting is between 2022-01-01 and 2023-01-01. Additionally, I need to filter the results based on the firms that attended the meetings, specifically those in the 'IT Consulting & Other Services' sector.
Outcome: I will need to join the meetings table with the firms table through the firm_attended_id to access the sector information. 

Thoughts: The date filter will be applied to the date column in the meetings table. I will also need to ensure that the sector filter is applied to the firms table.
Outcome: I will add a filter for the date range and the sector in the SQL query. 

Thoughts: The final query should select the beam_id from the meetings table, joining it with the firms table to filter by sector and applying the date range filter.
Outcome: The fields will include meetings.beam_id, and I will join meetings with firms on firm_attended_id and firm_id. 

SQL QUERY:
```
SELECT

# Some additional tests

In [45]:
with session_scope() as session:
    res = (
        session.query(models.Meetings)
        .filter(models.Meetings.date >= "2025-01-15", models.Meetings.date <= "2025-02-15")
        .all()
    )
    for meeting in res:
        meeting.employees
    

res = [x for y in [[str(y.employee_id) for y in x.employees] for x in res] for x in y]
user_id = pd.Series(res).value_counts().sort_values(ascending=False).index[0]

In [46]:
with session_scope() as session:
    res = (
        session.query(models.Meetings)
        .filter(
            models.Meetings.date >= "2025-01-15", 
            models.Meetings.date <= "2025-02-15",
            models.Meetings.employees.any(models.Employees.employee_id == user_id)
        )
        .all()
    )

len(res)

2

In [47]:
query = "Give me a summary of meetings I have attended in the last month (employee_id: {}, current date: {}).".format(user_id, "2025-02-15")

with session_scope() as session:
    response = agent.complete(session, query)

response_md = response.to_markdown(index=True)
print(response_md)

CHAIN OF THOUGHTS:
Thoughts: I need to retrieve a summary of meetings attended by a specific employee within the last month. The employee's ID is provided, and the current date is given as 2025-02-15. I will need to filter the meetings based on the date range from 2025-01-15 to 2025-02-15.
Outcome: I will filter the meetings based on the date column to include only those that fall within the last month. 

Thoughts: The meetings attended by employees are stored in the 'employee_meetings' association table, which links employees to meetings. I will need to join this table with the 'meetings' table to get the meeting details.
Outcome: I will join the 'employee_meetings' table with the 'meetings' table to retrieve the relevant meeting information. 

Thoughts: I need to select relevant fields from the meetings table, such as title, content, date, and created_at, to provide a summary of the meetings attended.
Outcome: I will select the title, content, date, and created_at fields from the mee