## Approach

Take the user question, summarize it for keywords, then use those words for an embedding search.




In [1]:
%cd '/home/pedram/projects/llm-support-bot/llm-support-bot' 
import storage as s
!unset OPENAI_API_KEY

/home/pedram/projects/llm-support-bot/llm-support-bot
INFO:chromadb.telemetry.posthog:Anonymized telemetry enabled. See https://docs.trychroma.com/telemetry for more information.
Anonymized telemetry enabled. See https://docs.trychroma.com/telemetry for more information.


In [2]:
import logging
import sys
import openai
import os
from IPython.display import Markdown, display
from llama_index.response.notebook_utils import display_response, display_source_node, display_metadata

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
openai.log = "info"
os.environ['NUMEXPR_MAX_THREADS'] = '20'
os.unsetenv('OPENAI_API_KEY')

In [3]:
from llama_index.llms import OpenAI
from llama_index import ServiceContext
from llama_index.llms import LlamaCPP
from llama_index.llms.llama_utils import messages_to_prompt, completion_to_prompt
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from llama_index import get_response_synthesizer, set_global_service_context

# model_url = 'https://huggingface.co/TheBloke/CodeLlama-13B-Instruct-GGUF/resolve/main/codellama-13b-instruct.Q4_K_M.gguf'
model_url = 'https://huggingface.co/TheBloke/CodeLlama-13B-Instruct-GGUF/resolve/main/codellama-13b-instruct.Q6_K.gguf'
llm = LlamaCPP(
    model_url=model_url,
    temperature=0.1,
    max_new_tokens=2000,
    context_window=3900,
    generate_kwargs={},
    model_kwargs={"n_gpu_layers": -1},
    messages_to_prompt=messages_to_prompt,
    completion_to_prompt=completion_to_prompt,
    verbose=False,
)

openai = OpenAI()
embed_model = s.get_embedding_model()
service_context = ServiceContext.from_defaults(llm=openai, embed_model=embed_model)
set_global_service_context(service_context)

ggml_init_cublas: found 1 CUDA devices:
  Device 0: NVIDIA GeForce RTX 4080, compute capability 8.9
llama_model_loader: loaded meta data with 20 key-value pairs and 363 tensors from /home/pedram/projects/llm-cache/models/codellama-13b-instruct.Q6_K.gguf (version GGUF V2 (latest))
llama_model_loader: - tensor    0:                token_embd.weight q6_K     [  5120, 32016,     1,     1 ]
llama_model_loader: - tensor    1:           blk.0.attn_norm.weight f32      [  5120,     1,     1,     1 ]
llama_model_loader: - tensor    2:            blk.0.ffn_down.weight q6_K     [ 13824,  5120,     1,     1 ]
llama_model_loader: - tensor    3:            blk.0.ffn_gate.weight q6_K     [  5120, 13824,     1,     1 ]
llama_model_loader: - tensor    4:              blk.0.ffn_up.weight q6_K     [  5120, 13824,     1,     1 ]
llama_model_loader: - tensor    5:            blk.0.ffn_norm.weight f32      [  5120,     1,     1,     1 ]
llama_model_loader: - tensor    6:              blk.0.attn_k.weight q6_

In [20]:
from llama_index.output_parsers import LangchainOutputParser
from llama_index.llm_predictor import StructuredLLMPredictor
from langchain.output_parsers import StructuredOutputParser, ResponseSchema
response_schemas = [
    ResponseSchema(
        name="Education",
        description="Describes the author's educational experience/background.",
    ),
    ResponseSchema(
        name="Work", description="Describes the author's work experience/background."
    ),
]

llm_predictor = StructuredLLMPredictor()

In [22]:
from llama_index.prompts import PromptTemplate
from llama_index.prompts.default_prompts import (
    DEFAULT_TEXT_QA_PROMPT_TMPL,
    DEFAULT_REFINE_PROMPT_TMPL,
)
from langchain.output_parsers import PydanticOutputParser
from pydantic import BaseModel, Field, validator
from typing import List


In [29]:
class Summary(BaseModel):
    keyword: List[str] = Field(description="list of keywords related to the issue")
    quesdtions: List[str] = Field(descripton="list of related questions this issue could answer")
    
parser = PydanticOutputParser(pydantic_object=Summary)
output_parser = LangchainOutputParser(parser)

fmt_qa_tmpl = output_parser.format(DEFAULT_TEXT_QA_PROMPT_TMPL)
fmt_refine_tmpl = output_parser.format(DEFAULT_REFINE_PROMPT_TMPL)

qa_prompt = PromptTemplate(fmt_qa_tmpl, output_parser=output_parser)
refine_prompt = PromptTemplate(fmt_refine_tmpl, output_parser=output_parser)
print(fmt_refine_tmpl)

The original query is as follows: {query_str}
We have provided an existing answer: {existing_answer}
We have the opportunity to refine the existing answer (only if needed) with some more context below.
------------
{context_msg}
------------
Given the new context, refine the original answer to better answer the query. If the context isn't useful, return the original answer.
Refined Answer: 

The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {{"properties": {{"foo": {{"title": "Foo", "description": "a list of strings", "type": "array", "items": {{"type": "string"}}}}}}, "required": ["foo"]}}
the object {{"foo": ["bar", "baz"]}} is a well-formatted instance of the schema. The object {{"properties": {{"foo": ["bar", "baz"]}}}} is not well-formatted.

Here is the output schema:
```
{{"properties": {{"keyword": {{"title": "Keyword", "description": "list of keywords related to the issue", "type": "array", "items": {{"type

In [51]:
from llama_index.prompts import PromptTemplate
template = """
[INST]
You are an AI language model assistant. Your task is to generate {queryCount} different versions of a
question to retrieve relevant documents from a vector database.

The user's question is: {query_str}

""".strip()
fmt_template = output_parser.format(template)
qa_prompt = PromptTemplate(fmt_template, output_parser=output_parser)
print(fmt_template)

[INST]
You are an AI language model assistant. Your task is to generate {queryCount} different versions of a
question to retrieve relevant documents from a vector database.

The user's question is: {query_str}

The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {{"properties": {{"foo": {{"title": "Foo", "description": "a list of strings", "type": "array", "items": {{"type": "string"}}}}}}, "required": ["foo"]}}
the object {{"foo": ["bar", "baz"]}} is a well-formatted instance of the schema. The object {{"properties": {{"foo": ["bar", "baz"]}}}} is not well-formatted.

Here is the output schema:
```
{{"properties": {{"keyword": {{"title": "Keyword", "description": "list of keywords related to the issue", "type": "array", "items": {{"type": "string"}}}}, "quesdtions": {{"title": "Quesdtions", "descripton": "list of related questions this issue could answer", "type": "array", "items": {{"type": "string"}}}}}}, "required

In [52]:
question="""
I have successfully integrated a dbt model into Dagster. Description and metadata are correctly pulled in by Dagster for my tables and visible both in the graph (the description) and the corresponding asset detail pages . However, source details do not seem to be populated when I click on the asset details. The model runs nicely via Dagster and sources appear in the graph.

My dummy dbt schema.yml is as follows:

version: 2

models:
  - name: test_model_1
    description: "This model contains user-level revenue data aggregated at the monthly level."
    columns:
      - name: n_of_rows
    config:
      meta:
        dagster:
          group: revenue

sources:
  - name: external
    description: "This is an external source"
    database: ext
    schema: ext
    tables:
    - name: payments
      description: "User payment data"
      meta:
        dagster:
          group: revenue
"""
prompt = qa_prompt.format(queryCount=4, query_str=question)


In [53]:
response = openai.complete(prompt)

In [54]:
print(response.text)

{"keyword": ["dbt model integration", "Dagster", "metadata", "asset details", "source details", "graph", "model runs", "schema.yml"], "questions": ["How can I integrate a dbt model into Dagster?", "How does Dagster pull in description and metadata for tables?", "Why are source details not populated when clicking on asset details?", "What appears in the graph when running the model via Dagster?"]}


In [55]:
response = llm.complete(prompt)

In [49]:
print(response.text)

  Based on the given user question and schema, here are four different versions of the output that conform to the JSON schema:

1.
```json
{
    "keyword": ["dbt", "Dagster", "source details"],
    "quesdtions": ["Why are source details not populated when I click on the asset details?", "How can I populate source details in Dagster?"]
}
```
2.
```json
{
    "keyword": ["dbt", "Dagster", "source details"],
    "quesdtions": ["What are the possible reasons for not populating source details when clicking on asset details?", "How can I troubleshoot issues with source details in Dagster?"]
}
```
3.
```json
{
    "keyword": ["dbt", "Dagster", "source details"],
    "quesdtions": ["What are the best practices for populating source details in Dagster?", "How can I ensure that source details are correctly populated when clicking on asset details?"]
}
```
4.
```json
{
    "keyword": ["dbt", "Dagster", "source details"],
    "quesdtions": ["What are the common issues related to source details in 

In [None]:
index = s.load_index()

In [None]:
retriever = index.as_retriever(
    similarity_top_k=5,
    vector_store_query_mode="default",
)

In [None]:
retrieved_summaries = retriever.retrieve(response.text.strip())


In [None]:
[display_source_node(i, source_length=200, show_source_metadata=True) for i in retrieved_summaries]

In [None]:
display_metadata(retrieved_summaries)