In [None]:
import pathlib
import weave
weave.use_frontend_devmode()
from weave.ecosystem import langchain

In [None]:
gpt4_basic_10 = [
    "What is the purpose of experiment tracking in machine learning, and how does the Weights & Biases platform facilitate this process?",
    "How does the Weights & Biases platform help to improve the reproducibility and transparency of machine learning experiments?",
    "What are some of the key features of the Weights & Biases platform that make it a popular choice for experiment tracking in machine learning?",
    "How does the Weights & Biases platform allow users to compare and evaluate the performance of different machine learning models?",
    "What are some of the benefits of using Weights & Biases for experiment tracking, and how do these benefits translate into improved outcomes for machine learning projects?",
    "How does Weights & Biases enable collaboration and sharing of experiments across teams working on different machine learning projects?",
    "Can the Weights & Biases platform be integrated with other tools commonly used in machine learning workflows, such as Jupyter notebooks or TensorBoard?",
    "What are some best practices for using Weights & Biases for experiment tracking, and how can users maximize the platform's potential?",
    "How does Weights & Biases help users to identify and troubleshoot issues that may arise during the training and evaluation of machine learning models?",
    "How does Weights & Biases help users to optimize the hyperparameters of machine learning models, and what strategies can be employed to maximize the effectiveness of this feature?"
]
questions = weave.save(gpt4_basic_10[:2], 'questions')

In [None]:
# TODO: uses StringHistogram by default. Prefer Each or Table
#questions

In [None]:
from langchain.docstore.document import Document
from langchain.text_splitter import (
    MarkdownTextSplitter,
    PythonCodeTextSplitter,
    TokenTextSplitter,
)
# Get markdown files from our docs repo

# Checkout of our docs repo: https://github.com/wandb/docodile/
DOC_DIR = '/Users/shawn/code2/docodile'
DOC_SUFFIX = '.md'

docs = []
for file in pathlib.Path(DOC_DIR).glob('**/*' + DOC_SUFFIX):
    with file.open('r') as f:
        # store them as langchain Document objects
        docs.append(Document(page_content=f.read(), metadata={'path': file.name}))
docs = MarkdownTextSplitter().split_documents(docs)
docs = TokenTextSplitter().split_documents(docs)

docs = weave.save(docs, 'wandb-docs')
#docs

In [None]:
# TODO: crash due to a panel error, need to fix auto table state to work on Object Type
#docs

In [None]:
from langchain.vectorstores import VectorStore, FAISS
from langchain.embeddings.openai import OpenAIEmbeddings
embeddings = OpenAIEmbeddings()
vector_store = FAISS.from_documents(weave.use(docs), embeddings)

In [None]:
# Stuff you can do:
#   - .similarity_search(<query>)
#     - but currently there is a crash because Row.Group tries to render and fails. Need to switch the
#       panel to Table and then change column to row.__getattr__('page_content')
#   - .document_embeddings
#     - this gets the embeddings out of FAISS, and also performs FAISS' k-means with 20 clusters
#     - switch to projection.plot
# TODO:
#   - no default panel!
#   - fix Row.Group crash
#   - automatically show Table instead of Row.Group
#   - make PanelTable automatically render Object correctly.
#   - make PanelObject handle Objects
#   - fix __getattr__ rendering
#   - give control over k for k-means
#   - figure out dragging region on embeddings view to perform drilldown (won't work because of projector wrapping)
weave.show(vector_store)

In [None]:
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA
model_gpt_35_temp07 = RetrievalQA.from_chain_type(
        llm=ChatOpenAI(model_name='gpt-3.5-turbo', temperature=0.7),
        chain_type='stuff',
        retriever=vector_store.as_retriever()
    )

In [None]:
# run with langchain
model_gpt_35_temp07.run('hello')

In [None]:
# run with weave
model = weave.save(model_gpt_35_temp07)
#model.run('hello')

In [None]:
models = [
    RetrievalQA.from_chain_type(
        llm=ChatOpenAI(model_name='gpt-3.5-turbo', temperature=0.7),
        chain_type='stuff',
        retriever=vector_store.as_retriever()
    ),
    RetrievalQA.from_chain_type(
        llm=ChatOpenAI(model_name='gpt-3.5-turbo', temperature=0.2),
        chain_type='stuff',
        retriever=vector_store.as_retriever()
    )
]
models = weave.save(models, 'docbot-models')
# TODO: can't view this

In [None]:
# You can run multiple models at once
# TODO: this is the wrong view, need to show which model it was as well!
models.run('hello')

In [None]:
# You can map over questions
# TODO:
#   - crash due to document size. Need to do a split stage1
#   - default output format is wrong. Want table of Question v. Model
#questions.map(lambda q: models.run(q))

In [None]:
# This defines the whole thing using Weave ops
# TODO:
#   - how do we naturally get from the style above where we use langchain directly
#     and save to weave, to here where we use weave ops to construct everything?
#     (I think the answer has to do with object constructors!)
#   - what if we want to save our own evaluations instead of letting Weave do it?
#     (ie make standard manual flow work)
vector_store = langchain.faiss_from_documents(docs, langchain.openai_embeddings())
llm = langchain.chat_openai('gpt-3.5-turbo', 0.7)
qa = langchain.retrieval_qa_from_chain_type(llm, 'stuff', vector_store)
qa.run('what is weave?')

In [None]:
# TODO:
#   - use latest
#   - fix projections
#   - change how projector works
#   - super ugly projection config
#   - why is similarity search so slow now?
#     - because the openai API is being slow, and we need to embed the query _and_ we can't cache the
#       query embedding step because it happens down in langchain. Better to build this stuff up in a Weave
#       way!
#   - make projection drilldown
#   - make table of question v model
#   - auto table for projection output does source.cluster, but it seems the . notation doesn't work
#     in weave1 for some reason? ['source']['cluster'] is fine
#   - wordclouds / stacey plots
#   - selection on plot gives us the output plot data, not the input data! so we can't see anything useful
#   - selection on plot doesn't "stick"


weave.panels.Board(
    vars={
        'documents': docs,
        'questions': questions.limit(2),
        'embeddings': langchain.openai_embeddings(),
        'vector_store': lambda embeddings, documents: langchain.faiss_from_documents(documents, embeddings),
        'doc_embeddings': lambda vector_store: vector_store.document_embeddings(),
        'models': lambda vector_store: weave.ops.make_list(
            a=langchain.retrieval_qa_from_chain_type(langchain.chat_openai('gpt-3.5-turbo', 0.2), 'stuff', vector_store),
            b=langchain.retrieval_qa_from_chain_type(langchain.chat_openai('gpt-3.5-turbo', 0.7), 'stuff', vector_store),
        ),
        'projection': lambda doc_embeddings: doc_embeddings.projection2D(
                                                                  'pca',
                                                                  'single',
                                                                  ['embedding'],
                                                                  {'pca': {},
                                                                   'tsne': {
                                                                       'perplexity': 30,
                                                                       'learningRate': 10,
                                                                       'iterations': 25
                                                                   },
                                                                   'umap': {
                                                                       'neighbors': 15,
                                                                       'minDist': 0.1,
                                                                       'spread': 1.0
                                                                   }
                                                                  }),
        'eval_results': lambda questions, models: questions.map(lambda question: models.run(question))
    },
    panels=[      
        weave.panels.BoardPanel(
            lambda documents: weave.panels.Table(documents,
                                                 columns=[
                                                     lambda doc: doc.page_content,
                                                     lambda doc: doc.metadata['path']
                                                 ]),
            layout=weave.panels.BoardPanelLayout(x=0, y=0, w=12, h=6)
        ),
        weave.panels.BoardPanel(
            lambda vector_store: weave.panels.Table(vector_store.similarity_search('weave'),
                                                    columns=[
                                                     lambda doc: doc.page_content,
                                                     lambda doc: doc.metadata['path']  
                                                    ]),
            layout=weave.panels.BoardPanelLayout(x=12, y=0, w=12, h=6)
        ),
        weave.panels.BoardPanel(
            id='docs_projection',
            panel=lambda projection: weave.panels.Plot(
                projection,
                x=lambda row: row['projection.x'],
                y=lambda row: row['projection.y'],
                color=lambda row: row['source.cluster']
            ),
            layout=weave.panels.BoardPanelLayout(x=0, y=6, w=12, h=12)
        ),
        weave.panels.BoardPanel(
            lambda docs_projection: docs_projection.rows_selected(),
            layout=weave.panels.BoardPanelLayout(x=12, y=6, w=12, h=12)
        ),
        weave.panels.BoardPanel(
            lambda eval_results: weave.panels.Table(eval_results),
            layout=weave.panels.BoardPanelLayout(x=0, y=18, w=24, h=12)
        )
    ]
)