## ADB DLL

In [None]:
from databases.adbddl import ADBDDL

In [None]:
adb_ddl = ADBDDL()

In [None]:
query = """

SELECT _SiteId, SUM(Value) as total_production
telemetry_processed_silver
--WHERE _TelemetryType = '17' -- 17 is the telemetry type for Total Flow
GROUP BY _SiteId
ORDER BY total_production DESC
LIMIT 10

"""

In [None]:
query = """
select 
* 
from system.information_schema.columns 
where table_catalog = 'system'

"""

In [None]:
df = adb_ddl.query_lakehouse(query)

In [None]:
print(df)

In [None]:
df

In [None]:
# streamlit_app.py

import streamlit as st

# Initialize connection.
conn = st.experimental_connection('snowpark')

In [None]:
query = """
select t.table_schema,
       t.table_name,
       c.column_name,
       c.ordinal_position,
       c.data_type,
       case 
            when c.numeric_precision is not null
                then c.numeric_precision
            when c.character_maximum_length is not null
                then c.character_maximum_length
       end as max_length,
       c.numeric_scale, 
       c.is_identity,
       c.is_nullable
from information_schema.tables t
inner join information_schema.columns c on 
         c.table_schema = t.table_schema and c.table_name = t.table_name   
order by table_schema,
       table_name,
       ordinal_position;
"""

In [None]:
conn = st.experimental_connection('snowpark')

# Perform query.
df = conn.query('SELECT * from mytable;', ttl=600)

In [None]:

# Load the table as a dataframe using the Snowpark Session.
@st.cache_data
def load_table(conn, query):
    with conn.safe_session() as session:
        return session.query(query, ttl=600).to_pandas()

df = load_table()

# Print results.
for row in df.itertuples():
    st.write(f"{row.NAME} has a :{row.PET}:")

## Explorer Agent

In [None]:
from agents import ExplorerAgent
agent = ExplorerAgent(**{"database":"default"})

In [None]:
result = agent.run("can you return a dataframe with the most recent record per WELL_HID in telemetry_table?")

In [None]:
print(result)

In [None]:
import logging
from typing import List  # NOQA: UP035

import openai
import streamlit as st


@st.cache_data()
def create_gpt_completion(ai_model: str, messages: List[dict]) -> dict:
    try:
        openai.api_key = st.secrets.api_credentials.api_key
    except (KeyError, AttributeError):
        st.error(st.session_state.locale.empty_api_handler)
    logging.info(f"{messages=}")
    completion = openai.ChatCompletion.create(
        model=ai_model,
        messages=messages,
        # stream=True,
        # temperature=0.7,
    )
    logging.info(f"{completion=}")
    return completion


def calc_cost(usage: dict) -> None:
    total_tokens = usage.get("total_tokens")
    prompt_tokens = usage.get("prompt_tokens")
    completion_tokens = usage.get("completion_tokens")
    st.session_state.total_tokens.append(total_tokens)
    # pricing logic: https://openai.com/pricing#language-models
    if st.session_state.model == "gpt-3.5-turbo":
        cost = total_tokens * 0.002 / 1000
    else:
        cost = (prompt_tokens * 0.03 + completion_tokens * 0.06) / 1000
    st.session_state.costs.append(cost)

In [None]:
completion = create_gpt_completion(st.session_state.model, st.session_state.messages)
ai_content = completion.get("choices")[0].get("message").get("content")
calc_cost(completion.get("usage"))
st.session_state.messages.append({"role": "assistant", "content": ai_content})
if ai_content:
    show_chat(ai_content, st.session_state.user_text)
    st.divider()

## HFT Agent Chat

## Tools

So far we've been using the tools that the agent has access to. These tools are the following:

- **Document question answering**: given a document (such as a PDF) in image format, answer a question on this document (Donut)
- **Text question answering**: given a long text and a question, answer the question in the text (Flan-T5)
- **Unconditional image captioning**: Caption the image! (BLIP)
- **Image question answering**: given an image, answer a question on this image (VILT)
- **Image segmentation**: given an image and a prompt, output the segmentation mask of that prompt (CLIPSeg)
- **Speech to text**: given an audio recording of a person talking, transcribe the speech into text (Whisper)
- **Text to speech**: convert text to speech (SpeechT5)
- **Zero-shot text classification**: given a text and a list of labels, identify to which label the text corresponds the most (BART)
- **Text summarization**: summarize a long text in one or a few sentences (BART)
- **Translation**: translate the text into a given language (NLLB)

We also support the following community-based tools:

- **Text downloader**: to download a text from a web URL
- **Text to image**: generate an image according to a prompt, leveraging stable diffusion
- **Image transformation**: transforms an image

We can therefore use a mix and match of different tools by explaining in natural language what we would like to do.

But what about adding new tools? Let's take a look at how to do that 

### Adding new tools

We'll add a very simple tool so that the demo remains simple: we'll use the awesome cataas (Cat-As-A-Service) API to get random cats on each run.

We can get a random cat with the following code:

In [None]:
%pip install huggingface_hub>0.14 git+https://github.com/huggingface/transformers@$transformers_version -q diffusers accelerate datasets torch soundfile sentencepiece opencv-python openai

In [None]:
#@title Setup
transformers_version = "v4.29.0" #@param ["main", "v4.29.0"] {allow-input: true}

print(f"Setting up everything with transformers version {transformers_version}")

%pip install huggingface_hub git+https://github.com/huggingface/transformers@v4.29.0 -q diffusers accelerate datasets torch soundfile sentencepiece opencv-python openai streamlit


In [None]:
from agents import HFTAgent

In [None]:
#create .streamlit/secrets.toml file using % and bash commands
#from agents import HFTAgent

In [None]:
agent = HFTAgent()

In [None]:
text = agent.run("crete a a name for a web service solution that uses larga language models to build a self service big data platform that helps on creating queries and jobs to process data and chat interaction to analyze and visualize data.")

In [None]:
print(text)

In [None]:
image = agent.run("crete a logo of a web service solution that uses larga language models to build a self service big data platform that helps on creating queries and jobs to process data and chat interaction to analyze and visualize data. It is called, LLM Explorer")

In [None]:
image

In [None]:
caption = agent.run("Can you caption the `image`?")

In [None]:
agent.chat("Show me an an image of a capybara")

## Langchain Huggingface Hub

In [None]:
import os
import streamlit as st
os.environ["HUGGINGFACEHUB_API_TOKEN"] = st.secrets.connections.huggingface.api_key

In [None]:
from langchain import HuggingFaceHub

repo_id = "databricks/dolly-v2-3b" # "mosaicml/mpt-7b" # See https://huggingface.co/models?pipeline_tag=text-generation&sort=downloads for some other options

llm = HuggingFaceHub(repo_id=repo_id, model_kwargs={"temperature":0, "max_length":128})

In [None]:
from langchain import PromptTemplate, LLMChain

template = """Question: {question}

Answer: Let's think step by step."""
prompt = PromptTemplate(template=template, input_variables=["question"])
llm_chain = LLMChain(prompt=prompt, llm=llm)

In [None]:
question = "what is a quantum computer?"

response = llm_chain.run(question)

In [None]:
print(response)