## Installation

In [1]:
!pip install "langchain>=0.1.0" langchain-community "arize-phoenix[evals]" tiktoken nest-asyncio langchain-groq sentence-transformers umap-learn hdbscan plotly faiss-cpu pyarrow

Collecting langchain>=0.1.0
  Downloading langchain-0.2.7-py3-none-any.whl (983 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m983.6/983.6 kB[0m [31m7.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting langchain-community
  Downloading langchain_community-0.2.7-py3-none-any.whl (2.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.2/2.2 MB[0m [31m16.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting arize-phoenix[evals]
  Downloading arize_phoenix-4.9.0-py3-none-any.whl (1.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.4/1.4 MB[0m [31m24.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting tiktoken
  Downloading tiktoken-0.7.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m28.6 MB/s[0m eta [36m0:00:00[0m
Collecting langchain-groq
  Downloading langchain_groq-0.1.6-py3-none-any.whl (14 kB)
Collecting sentence-transfo

In [73]:
import warnings
warnings.filterwarnings('ignore')

# Basics Understanding

## Step 1: Import Libraries
We import Sentence Transformers for creating embeddings, UMAP for dimensionality reduction, HDBSCAN for clustering, and Plotly for interactive visualization.

## Step 2: Create Embeddings
We generate embeddings for our sample sentences using a pre-trained Sentence Transformer model.

## Step 3: Visualize Embeddings with UMAP and Plotly
We use UMAP to reduce the dimensionality of the embeddings to 2D for visualization and Plotly to create an interactive plot.

## Step 4: Cluster Embeddings with HDBSCAN
We use HDBSCAN to cluster the embeddings and visualize the clusters interactively with Plotly.


In [8]:
from sentence_transformers import SentenceTransformer
from langchain.embeddings import SentenceTransformerEmbeddings
import umap
import hdbscan
import plotly.express as px
import pandas as pd
import numpy as np

In [3]:
sentences = ["I love machine learning",
             "Artificial intelligence is fascinating",
             "Deep learning is a subset of AI",
             "Natural language processing is a part of AI",
             "Machine learning algorithms include supervised learning and unsupervised learning",
             "AI is transforming industries"]

In [4]:
embed_function = SentenceTransformerEmbeddings(model_name='all-MiniLM-L6-v2')

  warn_deprecated(
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]



config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [9]:
embedding_model = SentenceTransformer('all-MiniLM-L6-v2')



In [10]:
# Generate embeddings for the sentences
embeddings = embedding_model.encode(sentences)

In [11]:
embeddings

array([[-0.04363633, -0.05905434,  0.08201236, ...,  0.09771021,
         0.01688082, -0.09040587],
       [-0.02487095, -0.06024501,  0.05528266, ...,  0.0958535 ,
         0.02723549, -0.0703013 ],
       [-0.05394987, -0.05611948,  0.06270038, ...,  0.06852759,
         0.07129659, -0.03704014],
       [ 0.00581621, -0.01208333,  0.05307851, ...,  0.1195265 ,
         0.076396  , -0.04039066],
       [-0.04218372, -0.0511343 ,  0.02060528, ...,  0.02296887,
         0.07036246, -0.0144029 ],
       [-0.00107005, -0.02945988,  0.0180009 , ..., -0.06836102,
         0.06417272, -0.02449267]], dtype=float32)

In [12]:
# Apply UMAP
umap_reducer = umap.UMAP(n_components=2, random_state=0)
embeddings_2d = umap_reducer.fit_transform(embeddings)

  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  warn(


In [13]:
df = pd.DataFrame(embeddings_2d, columns=['x', 'y'])

In [14]:
df['sentence'] = sentences

In [15]:
df

Unnamed: 0,x,y,sentence
0,18.45787,-3.909374,I love machine learning
1,19.430281,-4.339714,Artificial intelligence is fascinating
2,19.368591,-3.207176,Deep learning is a subset of AI
3,20.187126,-3.403855,Natural language processing is a part of AI
4,18.40135,-3.052598,Machine learning algorithms include supervised...
5,20.323513,-4.238095,AI is transforming industries


In [16]:
fig = px.scatter(df, x='x', y='y', text='sentence', title='Sentence Embeddings Visualization with UMAP')
fig.update_traces(textposition='top center')
fig.update_layout(hovermode='closest')

In [17]:
fig.show()

In [18]:
# Apply HDBSCAN
clusterer = hdbscan.HDBSCAN(min_cluster_size=2)
cluster_labels = clusterer.fit_predict(embeddings)

In [19]:
# Add cluster labels to the DataFrame
df['cluster'] = cluster_labels

In [20]:
fig = px.scatter(df, x='x', y='y', color='cluster', text='sentence', title='Sentence Embeddings Clustering with HDBSCAN')
fig.update_traces(textposition='top center')
fig.update_layout(hovermode='closest')

# Show the plot
fig.show()

## Modular

In [21]:
# Function to generate embeddings
def generate_embeddings(sentences, model_name='all-MiniLM-L6-v2'):
    model = SentenceTransformer(model_name)
    embeddings = model.encode(sentences)
    return embeddings

# Function to reduce dimensionality using UMAP
def reduce_dimensionality(embeddings, n_components=2):
    umap_reducer = umap.UMAP(n_components=n_components, random_state=0)
    embeddings_2d = umap_reducer.fit_transform(embeddings)
    return embeddings_2d

# Function to cluster embeddings using HDBSCAN
def cluster_embeddings(embeddings):
    clusterer = hdbscan.HDBSCAN(min_cluster_size=2)
    cluster_labels = clusterer.fit_predict(embeddings)
    return cluster_labels

# Function to visualize embeddings with Plotly
def visualize_embeddings(embeddings_2d, sentences, cluster_labels=None, title='Sentence Embeddings Visualization'):
    df = pd.DataFrame(embeddings_2d, columns=['x', 'y'])
    df['sentence'] = sentences
    if cluster_labels is not None:
        df['cluster'] = cluster_labels

    fig = px.scatter(df, x='x', y='y', color='cluster' if cluster_labels is not None else None, text='sentence', title=title)
    fig.update_traces(textposition='top center')
    fig.update_layout(hovermode='closest')
    fig.show()

In [22]:
def process_and_visualize(sentences):
    embeddings = generate_embeddings(sentences)
    embeddings_2d = reduce_dimensionality(embeddings)
    cluster_labels = cluster_embeddings(embeddings)
    visualize_embeddings(embeddings_2d, sentences, cluster_labels, title='Sentence Embeddings Clustering with HDBSCAN')

In [23]:
# Example usage
sentences = [
    "I love machine learning", "Machine learning is great", "I enjoy studying AI",
    "AI and machine learning are fascinating", "Artificial intelligence is the future",
    "Deep learning is a subset of machine learning", "AI can transform industries",
    "Machine learning algorithms include supervised and unsupervised learning",
    "Natural language processing is a part of AI", "AI is impacting many fields",
    "Machine learning is a field of AI", "AI applications are growing rapidly",
    "Supervised learning requires labeled data", "Unsupervised learning finds hidden patterns",
    "Reinforcement learning is a type of machine learning", "Deep learning uses neural networks",
    "Neural networks are a key technology in AI", "AI research is advancing quickly",
    "Machine learning models can be complex", "AI technology is evolving"
]

In [24]:
process_and_visualize(sentences)


`resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.


n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.



## Setting up LLM

In [25]:
import os
from google.colab import userdata

In [26]:
os.environ['GROQ_API_KEY'] = userdata.get('GROQ_API_KEY')

In [27]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_groq import ChatGroq

In [28]:
llm = ChatGroq(
    temperature=0.1,
    model="llama3-70b-8192"
)

In [29]:
system = "You are a helpful assistant."
human = "{text}"
prompt = ChatPromptTemplate.from_messages([("system", system), ("human", human)])

chain = prompt | llm
chain.invoke({"text": "Explain the importance of low latency for LLMs."})

AIMessage(content="Low latency is crucial for Large Language Models (LLMs) because it directly impacts the user experience, model performance, and overall efficiency of language-based applications. Here are some reasons why low latency is essential for LLMs:\n\n1. **Real-time Interaction**: LLMs are often used in applications that require real-time interaction, such as chatbots, virtual assistants, and language translation systems. Low latency ensures that the model responds quickly to user input, providing a seamless and engaging experience.\n2. **Conversational Flow**: In conversational AI, latency can disrupt the natural flow of conversation. High latency can lead to awkward pauses, making the interaction feel unnatural and frustrating. Low latency helps maintain a smooth conversation, allowing users to engage more naturally with the model.\n3. **Model Performance**: LLMs rely on complex algorithms and massive datasets to generate responses. High latency can lead to increased comput

## Arize AI Phoenix

In [30]:
import json
from getpass import getpass
from urllib.request import urlopen

import nest_asyncio
import numpy as np
import pandas as pd
import phoenix as px
from langchain.chains import RetrievalQA
from phoenix.evals import (
    HallucinationEvaluator,
    OpenAIModel,
    QAEvaluator,
    RelevanceEvaluator,
    run_evals,
)
from phoenix.session.evaluation import get_qa_with_reference, get_retrieved_documents
from phoenix.trace import DocumentEvaluations, SpanEvaluations
from phoenix.trace.langchain import LangChainInstrumentor
from tqdm import tqdm

In [31]:
nest_asyncio.apply()  # needed for concurrent evals in notebook environments

## You can run Phoenix in the background to collect trace data emitted by any LangChain application that has been instrumented with the OpenInferenceTracer.

Launch Phoenix and follow the instructions in the cell output to open the Phoenix UI (the UI should be empty because we have yet to run a LangChain application).

In [32]:
session = px.launch_app()

🌍 To view the Phoenix app in your browser, visit https://dcs6dfbphnb2-496ff2e9c6d22116-6006-colab.googleusercontent.com/
📖 For more information on how to use Phoenix, check out https://docs.arize.com/phoenix


## Building RAG

In [33]:
df = pd.read_parquet(
    "http://storage.googleapis.com/arize-phoenix-assets/datasets/"
    "unstructured/llm/context-retrieval/langchain/database.parquet"
)

In [34]:
df

Unnamed: 0,text,text_vector
0,\nAccess tutorials of what's possible with Ari...,"[-0.0036313518733896957, 0.0026182831234325553..."
1,"\n{% hint style=""info"" %}\nYour model type det...","[-0.006279695251247202, -0.0005314257337369415..."
2,\nExamples for logging explainability metrics....,"[-0.007306394403318994, 0.0007199545863166625,..."
3,\n| Google Cloud Services | Link |\n| ------...,"[0.002060703379662884, -0.0037287262540240764,..."
4,\n| Sending 10 Million Inferences to Arize in ...,"[-0.008325811390756237, -0.008001045242455413,..."
...,...,...
1081,\nContact us at `support@arize.com` to discuss...,"[0.01593316514702935, -0.0024252962047531914, ..."
1082,\nThe installation requires a release's TAR fi...,"[-0.0009749303156775049, -0.01393693439926567,..."
1083,\nThe Arize team can help size the cluster bas...,"[0.012797655540632846, -0.005487701856846996, ..."
1084,"\nTo get started quickly, you can use the scri...","[0.008342131870142003, -0.0008487129130762056,..."


In [54]:
df.columns

Index(['text', 'text_vector'], dtype='object')

In [55]:
# Add an Id column
df['Id'] = range(1, len(df) + 1)

In [56]:
df.head()

Unnamed: 0,text,text_vector,Id
0,\nAccess tutorials of what's possible with Ari...,"[-0.0036313518733896957, 0.0026182831234325553...",1
1,"\n{% hint style=""info"" %}\nYour model type det...","[-0.006279695251247202, -0.0005314257337369415...",2
2,\nExamples for logging explainability metrics....,"[-0.007306394403318994, 0.0007199545863166625,...",3
3,\n| Google Cloud Services | Link |\n| ------...,"[0.002060703379662884, -0.0037287262540240764,...",4
4,\n| Sending 10 Million Inferences to Arize in ...,"[-0.008325811390756237, -0.008001045242455413,...",5


In [60]:
from langchain import FAISS
import time

In [57]:
start=time.time()
metadatas = []
for index, row in df.iterrows():
    doc_meta = {
        "id": row['Id'],
        "language": row['text_vector']
    }
    metadatas.append(doc_meta)

In [58]:
metadatas

[{'id': 1,
  'language': array([-0.00363135,  0.00261828,  0.01131498, ...,  0.00180943,
         -0.02351461, -0.04157862])},
 {'id': 2,
  'language': array([-0.0062797 , -0.00053143,  0.02610683, ...,  0.02038727,
         -0.0460627 , -0.03824178])},
 {'id': 3,
  'language': array([-0.00730639,  0.00071995,  0.04121973, ...,  0.00105407,
         -0.02168174, -0.04634845])},
 {'id': 4,
  'language': array([ 0.0020607 , -0.00372873,  0.01930736, ..., -0.00675897,
         -0.01410869,  0.01020274])},
 {'id': 5,
  'language': array([-0.00832581, -0.00800105, -0.00374588, ..., -0.02725079,
         -0.02660126, -0.03660995])},
 {'id': 6,
  'language': array([-0.0176872 , -0.00670446,  0.04048646, ...,  0.00408692,
         -0.01641602, -0.01920442])},
 {'id': 7,
  'language': array([-0.00460523, -0.00803392, -0.00536059, ..., -0.00141934,
         -0.04720104, -0.04923389])},
 {'id': 8,
  'language': array([ 0.00527384, -0.00071348,  0.01357212, ...,  0.00017116,
          0.00246124, 

In [61]:
faiss = FAISS.from_texts(df['text'].tolist(), embed_function, metadatas)
print("Time Taken --> ", time.time()-start)

Time Taken -->  279.5139207839966


In [62]:
faiss.save_local("LangChain_FAISS", "YT")

In [64]:
load_faiss = faiss.load_local("LangChain_FAISS",embed_function, "YT", allow_dangerous_deserialization=True)

In [67]:
start=time.time()
load_faiss.similarity_search_with_score("What is Machine Learning?",5)

[(Document(metadata={'id': 335, 'language': array([-0.00971158, -0.00024339,  0.01485453, ..., -0.03267737,
         -0.03848496, -0.00109941])}, page_content='\n\n\nMachine learning is a subset of AI, and it consists of the techniques that enable computers to figure things out from the data and deliver AI applications. Deep learning, meanwhile, is a subset of machine learning that enables computers to solve more complex problems inspired by the human brain’s network of neurons.&#x20;\n\n'),
  0.60838413),
 (Document(metadata={'id': 342, 'language': array([-0.02882825, -0.00399296,  0.02946261, ..., -0.0063542 ,
         -0.01685994, -0.01724056])}, page_content='\nA machine learning infrastructure tool to monitor and improve model performance. Think of them as the ledger or log of model activities/inferences. Evaluation stores are used to:\n\n* Surface up performance metrics in aggregate (or slice) for any model, in any environment\u200a—\u200aproduction, validation, training\n* Monit

In [68]:
retriever = load_faiss.as_retriever()

In [69]:
chain_type = "stuff"

In [70]:
chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type=chain_type,
    retriever=retriever,
    metadata={"application_type": "question_answering"},
)

In [71]:
chain

RetrievalQA(metadata={'application_type': 'question_answering'}, combine_documents_chain=StuffDocumentsChain(llm_chain=LLMChain(prompt=ChatPromptTemplate(input_variables=['context', 'question'], messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], template="Use the following pieces of context to answer the user's question. \nIf you don't know the answer, just say that you don't know, don't try to make up an answer.\n----------------\n{context}")), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['question'], template='{question}'))]), llm=ChatGroq(client=<groq.resources.chat.completions.Completions object at 0x7d0d4a7eb010>, async_client=<groq.resources.chat.completions.AsyncCompletions object at 0x7d0d4a7e8370>, model_name='llama3-70b-8192', temperature=0.1, groq_api_key=SecretStr('**********'))), document_variable_name='context'), retriever=VectorStoreRetriever(tags=['FAISS', 'HuggingFaceEmbeddings'], vectorstore=<langchain_communi

In [72]:
question = "What is Machine Learning"
result = chain({"query": question})
result["result"]


The method `Chain.__call__` was deprecated in langchain 0.1.0 and will be removed in 0.3.0. Use invoke instead.



'According to the provided context, Machine Learning is a subset of AI that consists of techniques that enable computers to figure things out from the data and deliver AI applications.'

In [74]:
LangChainInstrumentor().instrument()

In [75]:
url = "http://storage.googleapis.com/arize-phoenix-assets/datasets/unstructured/llm/context-retrieval/arize_docs_queries.jsonl"
queries = []
with urlopen(url) as response:
    for line in response:
        line = line.decode("utf-8").strip()
        data = json.loads(line)
        queries.append(data["query"])
queries[:10]

['How do I use the SDK to upload a ranking model?',
 'What drift metrics are supported in Arize?',
 'Does Arize support batch models?',
 'Does Arize support training data?',
 'How do I configure a threshold if my data has seasonality trends?',
 'How are clusters in the UMAP calculated? When are the clusters refreshed?',
 'How does Arize calculate AUC?',
 'Can I send truth labels to Arize separtely? ',
 'How do I send embeddings to Arize?',
 'Can I copy a dashboard']

In [76]:
for query in tqdm(queries[:5]):
    chain.invoke(query)

100%|██████████| 5/5 [00:02<00:00,  1.82it/s]


In this case, we will export our retriever spans into two separate dataframes:

queries_df, in which the retrieved documents for each query are concatenated into a single column,
retrieved_documents_df, in which each retrieved document is "exploded" into its own row to enable the evaluation of each query-document pair in isolation.

This will enable us to compute multiple kinds of evaluations, including:

relevance: Are the retrieved documents grounded in the response?

Q&A correctness: Are your application's responses grounded in the retrieved context?

hallucinations: Is your application making up false information?

In [77]:
queries_df = get_qa_with_reference(px.Client())
retrieved_documents_df = get_retrieved_documents(px.Client())

In [79]:
os.environ['OPENAI_API_KEY'] = userdata.get("OPENAI_API_KEY")

In [81]:
!pip install langchain_openai

Collecting langchain_openai
  Downloading langchain_openai-0.1.16-py3-none-any.whl (46 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/46.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━[0m [32m41.0/46.1 kB[0m [31m1.0 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m46.1/46.1 kB[0m [31m910.2 kB/s[0m eta [36m0:00:00[0m
Collecting openai<2.0.0,>=1.32.0 (from langchain_openai)
  Downloading openai-1.35.13-py3-none-any.whl (328 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m328.5/328.5 kB[0m [31m10.3 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: openai, langchain_openai
Successfully installed langchain_openai-0.1.16 openai-1.35.13


In [82]:
eval_model = OpenAIModel(
    model="gpt-4-turbo-preview",
)

In [None]:
hallucination_evaluator = HallucinationEvaluator(eval_model)
qa_correctness_evaluator = QAEvaluator(eval_model)
relevance_evaluator = RelevanceEvaluator(eval_model)

hallucination_eval_df, qa_correctness_eval_df = run_evals(
    dataframe=queries_df,
    evaluators=[hallucination_evaluator, qa_correctness_evaluator],
    provide_explanation=True,
)
relevance_eval_df = run_evals(
    dataframe=retrieved_documents_df,
    evaluators=[relevance_evaluator],
    provide_explanation=True,
)[0]

px.Client().log_evaluations(
    SpanEvaluations(eval_name="Hallucination", dataframe=hallucination_eval_df),
    SpanEvaluations(eval_name="QA Correctness", dataframe=qa_correctness_eval_df),
    DocumentEvaluations(eval_name="Relevance", dataframe=relevance_eval_df),
)

run_evals |          | 0/10 (0.0%) | ⏳ 00:00<? | ?it/s