In [1]:
%pip install 'vanna[chromadb,openai,postgres]'

Note: you may need to restart the kernel to use updated packages.


In [1]:
from vanna.openai import OpenAI_Chat
from vanna.chromadb import ChromaDB_VectorStore
from openai import OpenAI
import os
from dotenv import load_dotenv

## You will need to have the OpenAI key in the .env file, along with having it set to NEW_OPENAI_API_KEY=sk-....

In [2]:
# Load the .env file
load_dotenv()

# Retrieve the values using os.environ
api_key = os.getenv('NEW_OPENAI_API_KEY')

In [3]:
print(f"The value of the api_key is the {api_key}")

The value of the api_key is the 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


## Running this code will generate a chroma.sqlite3 file, which is likely the ChromaDB?

In [39]:
class MyVanna(ChromaDB_VectorStore, OpenAI_Chat):
    def __init__(self, config=None):
        ChromaDB_VectorStore.__init__(self, config=config)
        OpenAI_Chat.__init__(self, config=config)

vn = MyVanna(config={'api_key': api_key, 'model': 'gpt-3.5-turbo'})

In [40]:
import requests
ip = requests.get('https://api.ipify.org').text
print(f"Your Colab runtime IP is: {ip}")

Your Colab runtime IP is: 73.47.202.170


## Remember the host number might change! Change that whenever the VM stops running

In [41]:
vn.connect_to_postgres(host='34.31.179.146', dbname='tutorialDB', user='postgres', password='butlar', port='5432')


## Running this code will create the information schema (Directories full of training data and embeddings?)

In [6]:
# The information schema query may need some tweaking depending on your database. This is a good starting point.

# This SQL code retrieves all the information across all the tables in all tables within a database.
df_information_schema = vn.run_sql("SELECT * FROM INFORMATION_SCHEMA.COLUMNS")

# This will break up the information schema into bite-sized chunks that can be referenced by the LLM
plan = vn.get_training_plan_generic(df_information_schema)
plan


# If you like the plan, then uncomment this and run it to train
vn.train(plan=plan)

In [7]:
vn.train(documentation="My tables outline the coures for a college university. In each row you can see the college" \
"course with its respective course number and name, which professor teaches it, what times it runs from," \
"where it meets, how many students can attend, and how many units the course is worth. ")

Adding documentation....


'66230b0b-b604-5bad-96fe-99bc906f0502-doc'

In [None]:
# At any time you can inspect what training data the package is able to reference
training_data = vn.get_training_data()
training_data

In [None]:
# You can remove training data if there's obsolete/incorrect information.
# vn.remove_training_data(id='5dc9b25e-6409-51b3-9ae7-1e51152309b8-doc')
# training_data

In [30]:
%%capture captured_output
my_question = "What classes does Tali Moreshet teach?"
response = vn.ask(question=my_question, allow_llm_to_see_data=True)

In [None]:
capt_vanna_ans= captured_output.stdout
if "error" not in capt_vanna_ans:
    lines = capt_vanna_ans.splitlines()

    SQL_query_commands = ["SELECT", "AND", "WHERE", "FROM", ";", "gpt"]
    filtered_lines = [line for line in lines if not any(substring in line for substring in SQL_query_commands)]
    vanna_ans_tables = "\n".join(filtered_lines)

    print(vanna_ans_tables)
else:
    print("Something happened")

  college subject  catalog_nbr prof_first prof_last building room_num  \
0     ENG      EC          311       Tali  Moreshet      PHO      210   
1     ENG      EC          413       Tali  Moreshet      EPC      209   

   class_capacity                         descr units meeting_days  \
0              82  Introduction to Logic Design     4         TuTh   
1              65         Computer Organization     4         MoWe   

  meeting_times  
0   0900 - 1045  
1   1010 - 1155  


In [None]:
client = OpenAI()
client.api_key = api_key

completion = client.chat.completions.create(
  model="gpt-3.5-turbo",
  messages=[
    {"role": "developer", "content": "You are a helpful assistant in interpreting data tables into complete sentences and an intelligible response."},
    {"role": "user", "content": f"Answer the question of: {my_question}, given this data table of{vanna_ans_tables}"}
  ]
)

print(completion.choices[0].message.content)


Tali Moreshet teaches the classes "Introduction to Logic Design" and "Computer Organization" for the subjects of Engineering at the college.


In [None]:
from vanna.flask import VannaFlaskApp
app = VannaFlaskApp(vn)
app.run()