In [None]:
%pip install 'vanna[chromadb,openai,postgres]'

In [1]:
from vanna.openai import OpenAI_Chat
from vanna.chromadb import ChromaDB_VectorStore
from openai import OpenAI
import os
from dotenv import load_dotenv

from contextlib import redirect_stdout, redirect_stderr
import io

## You will need to have the OpenAI key in the .env file, along with having it set to NEW_OPENAI_API_KEY=sk-....

In [2]:
# Load the .env file
load_dotenv()

# Retrieve the values using os.environ
api_key = os.getenv('NEW_OPENAI_API_KEY')

In [None]:
print(f"The value of the api_key is the {api_key}")

## Running this code will generate a chroma.sqlite3 file, which is likely the ChromaDB?

The code below creates a chroma.sqlite3 file

In [3]:
class MyVanna(ChromaDB_VectorStore, OpenAI_Chat):
    def __init__(self, config=None):
        ChromaDB_VectorStore.__init__(self, config=config)
        OpenAI_Chat.__init__(self, config=config)

# Running this code creates the chroma.sqlite3 file
vn = MyVanna(config={'api_key': api_key, 'model': 'gpt-3.5-turbo'})

In [None]:
import requests
ip = requests.get('https://api.ipify.org').text
print(f"Your Colab runtime IP is: {ip}")

## Remember the host number might change! Change that whenever the VM stops running

A sample of what the database looks like is in the csv of this code or in /Vanna/courses.csv

In [4]:
vn.connect_to_postgres(host='34.9.3.158', dbname='ece_day_db', user='postgres', password='butlar', port='5432')

## Running this code will create the information schema (Directories full of training data and embeddings?)

In [5]:
# The information schema query may need some tweaking depending on your database. This is a good starting point.

# This SQL code retrieves all the information across all the tables in all tables within a database.
df_information_schema = vn.run_sql("SELECT * FROM INFORMATION_SCHEMA.COLUMNS")


# This will break up the information schema into bite-sized chunks that can be referenced by the LLM
plan = vn.get_training_plan_generic(df_information_schema)

In [6]:
vn.train(plan=plan)

In [7]:
# At any time you can inspect what training data the package is able to reference
training_data = vn.get_training_data()
training_data

Unnamed: 0,id,question,content,training_data_type
0,7458e153-9934-539e-a38e-abd7448607c9-doc,,The following columns are in the pg_type table...,documentation
1,5939b086-28e7-591b-a300-ef5c130ced98-doc,,The following columns are in the pg_language t...,documentation
2,802fdd13-31bc-56a0-a856-67aecc7390e2-doc,,The following columns are in the pg_class tabl...,documentation
3,144df4b3-3295-588f-9b1a-06641a83b46b-doc,,The following columns are in the pg_stat_sys_i...,documentation
4,eb3d7405-6031-558c-ab12-83b536ed6f5e-doc,,The following columns are in the pg_stat_ssl t...,documentation
...,...,...,...,...
205,e93b3320-1224-56a2-8ad9-4f6f5e216f9f-doc,,The following columns are in the sql_parts tab...,documentation
206,b8b56f68-f2b4-5429-9bb8-5a31818fda90-doc,,The following columns are in the collations ta...,documentation
207,c064f41b-6cd8-549a-b5de-c8f8ba89b7f7-doc,,The following columns are in the enabled_roles...,documentation
208,039b55f7-a0de-5a72-9262-37f995b65c9f-doc,,The following columns are in the team_members ...,documentation


In [8]:
vn.train(documentation="Associate location as where. So for example, 'Where is Team 12?' should respond with 'Table 12'")

Adding documentation....


'674f636d-fd7a-5a99-9d6b-2cfe54843276-doc'

In [9]:
vn.train(documentation="My database outlines each group within Boston University senior Electrical and Computer Engineering (ECE) design course. The ECE Senior Design capstone course serves as an opportunity " \
"for students to execute the education they have gained in the" \
"classroom throughout their undergraduate careers, in order to" \
"produce prototypes for real-world clients. Student teams serve" \
"volunteer customers drawn from industry, government, small" \
"businesses, non-profits, schools, artists, faculty, and staff. ")

Adding documentation....


'808468bd-5748-54a2-a9b7-6b4526c76830-doc'

In [10]:
vn.train(documentation="Asking about one person's project would return all of the group members. So when I ask 'Who are Ivan Garcia's" \
"group mates, it should respond Rohan Kumar, Harlan Jones, Karl Naba'")

Adding documentation....


'22426660-764f-520f-945c-6e1ca9441954-doc'

In [11]:
vn.train(documentation="When I ask about someone's groupmates, make sure that you list the client as the client, so for team 15, the client would be Emily Lam")

Adding documentation....


'0abb6086-e669-5bf1-8291-073bbeb20801-doc'

In [6]:
vn.train(documentation="Associate each person with their project. It is possessive. So when I ask what is John Smith's project, you should parse through the abstract summary and give me their" \
"associated project.")

Adding documentation....


'50a15d98-006f-59cc-b913-cc214247e5e6-doc'

In [None]:
# You can remove training data if there's obsolete/incorrect information.
vn.remove_training_data(id='66230b0b-b604-5bad-96fe-99bc906f0502-doc')
training_data

In [None]:
# %%capture captured_output
my_question = "How many student thesis projects are there? Can you give me an example about one of them?"
response = vn.ask(question=my_question, allow_llm_to_see_data=True)

In [None]:
capt_vanna_ans= captured_output.stdout
if "ERROR" not in capt_vanna_ans:
    lines = capt_vanna_ans.splitlines()

    SQL_query_commands = ["SELECT", "AND", "WHERE", "FROM", ";", "gpt", "SQL", "LLM", "`"]
    filtered_lines = [line for line in lines if not any(substring in line for substring in SQL_query_commands)]
    vanna_ans_tables = "\n".join(filtered_lines)

    print(vanna_ans_tables)
else:
    print("Bruh happened")

In [None]:
client = OpenAI()
client.api_key = api_key

completion = client.chat.completions.create(
  model="gpt-3.5-turbo",
  messages=[
    {"role": "developer", "content": "You are a helpful assistant in interpreting data tables into complete sentences and an intelligible response."},
    {"role": "user", "content": f"Answer the question of: {my_question}, given this data table of{vanna_ans_tables}"}
  ]
)

print(completion.choices[0].message.content)


In [None]:
class MyVanna(ChromaDB_VectorStore, OpenAI_Chat):
    def __init__(self, config=None):
        ChromaDB_VectorStore.__init__(self, config=config)
        OpenAI_Chat.__init__(self, config=config)

def answer_course_question_new(question: str):
    vn = MyVanna(config={'api_key': api_key, 'model': 'gpt-3.5-turbo'})
    vn.connect_to_postgres(host='34.134.126.254', dbname='tutorialDB', user='postgres', password='butlar', port='5432')
    # training_data = vn.get_training_data()
    # print(training_data)

    dummy_output = io.StringIO()
    dummy_error = io.StringIO()
    with redirect_stdout(dummy_output), redirect_stderr(dummy_error):
        response = vn.ask(question=question, allow_llm_to_see_data=True, print_results=False)
    
    if response[1]:
        print(f"The response is {response[1]}")
    return response[1]

newple = answer_course_question_new("What courses does Tali Moreshet teach?")

In [None]:
print(newple[1])

In [None]:
from vanna.flask import VannaFlaskApp
app = VannaFlaskApp(vn)
app.run()