# Install required libraries

In [None]:
!pip install -qU \
    datasets \
    apache_beam \
    mwparserfromhell \
    tiktoken \
    langchain openai

# Load dataset

In [None]:
from datasets import load_dataset

data = load_dataset("wikipedia", "20220301.simple", split='train[:100]')
data

In [None]:
import tiktoken

tiktoken.encoding_for_model('gpt-3.5-turbo')

# Tokenize and split into chunks

In [None]:
import tiktoken

tokenizer = tiktoken.get_encoding('cl100k_base')

# create the length function
def tiktoken_len(text):
    tokens = tokenizer.encode(
        text,
        disallowed_special=()
    )
    return len(tokens)

tiktoken_len("hello I am a chunk of text and using the tiktoken_len function "
             "we can find the length of this chunk of text in tokens")

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=400,
    chunk_overlap=20,
    length_function=tiktoken_len,
    separators=["\n\n", "\n", " ", ""]
)

In [None]:
chunks = text_splitter.split_text(data[6]['text'])[:3]
chunks

# Load the OpenAI embedding model

In [None]:
import os
from langchain.embeddings.openai import OpenAIEmbeddings

model_name = 'text-embedding-ada-002'
os.environ['OPENAI_API_KEY'] = <OPENAI-API-KEY>

embed = OpenAIEmbeddings(
    model=model_name
)

In [None]:
texts = [
    'this is the first chunk of text',
    'then another second chunk of text is here'
]

res = embed.embed_documents(texts)
len(res), len(res[0])

In [None]:
from langchain.docstore.document import Document

doc =  Document(page_content="text", metadata={"source": "local"})

# Create documents to store in VectorDB

In [None]:
from tqdm.auto import tqdm
from uuid import uuid4

batch_limit = 100

docs = []

for i, record in enumerate(tqdm(data)):
    # first get metadata fields for this record
    metadata = {
        'wiki-id': str(record['id']),
        'source': record['url'],
        'title': record['title']
    }
    # now we create chunks from the record text
    record_texts = text_splitter.split_text(record['text'])
    # create individual metadata dicts for each chunk
    record_docs = [ Document(page_content=text, metadata={
        "chunk": j, "text": text, **metadata
    }) for j, text in enumerate(record_texts)]
    # append these to current batches
    docs.extend(record_docs)

# Store it in vectorDB

In [None]:
from qdrant_client import QdrantClient
from langchain.vectorstores import Qdrant

In [None]:
qdrant = Qdrant.from_documents(
    docs, embed, 
    path="/tmp/local_qdrant",
    collection_name="my_documents",
)

# Create QA using Langchain APIs

In [None]:
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA

# completion llm
llm = ChatOpenAI(
    model_name='gpt-3.5-turbo',
    temperature=0.0
)

qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=qdrant.as_retriever()
)

In [None]:
qa.run('who is ethel')

# Productionizing QA - starting with Qdrant

In [None]:
# replace this with your Qdrant URL
# it will be of the form <name-of-deployment>.<workspace-name>.svc.cluster.local
qdrant_url = 'qdrant.abhi-test.svc.cluster.local'

In [None]:
qdrant_remote = Qdrant.from_documents(
    docs,
    embed,
    url=qdrant_url,
    prefer_grpc=True,
    collection_name="my_documents",
)

In [None]:
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA

# completion llm
llm = ChatOpenAI(
    model_name='gpt-3.5-turbo',
    temperature=0.0
)

qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=qdrant_remote.as_retriever()
)

In [None]:
qa.run('who is ethel')

# Replace ChatGPT LLM with open-source LLM

In [None]:
!pip install servicefoundry

In [None]:
!servicefoundry login --host https://avmc.truefoundry.cloud/

In [None]:
from servicefoundry.langchain import TruefoundryPlaygroundLLM
import os

# Note: Login using servicefoundry login --host <https://example-domain.com>
model = TruefoundryPlaygroundLLM(
  model_name="llama-2-7b-chat",
  provider="truefoundry-self-hosted",
  parameters={
    "maximumLength": 100,
    "temperature": 0.7,
    "topP": 0.9,
    "repetitionPenalty": 1
  }
)
response = model.predict("Enter the prompt here")

In [None]:
qa = RetrievalQA.from_chain_type(
    llm=model,
    chain_type="stuff",
    retriever=qdrant_remote.as_retriever()
)

In [None]:
qa.run('who is ethel')

# Let's create a service
1. Create main.py, requirements.txt
2. Write TFY deployment spec as servicefoundry.yaml
3. Deploy to your workspace

In [None]:
%%writefile main.py

import os
import servicefoundry
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from langchain.chains import RetrievalQA
from langchain.vectorstores import Qdrant
from langchain.embeddings.openai import OpenAIEmbeddings
from servicefoundry.langchain import TruefoundryPlaygroundLLM
from qdrant_client import QdrantClient
from servicefoundry.lib.auth.servicefoundry_session import ServiceFoundrySession

app = FastAPI()

EMBEDDING_API_KEY = os.environ['OPENAI_API_KEY']
QDRANT_URL = os.environ['QDRANT_URL']
EMBED_MODEL_NAME = os.environ['EMBED_MODEL_NAME']
MODEL_NAME = os.environ['MODEL_NAME']
COLLECTION_NAME = os.environ['COLLECTION_NAME']

# Initialize components
embed = OpenAIEmbeddings(model=EMBED_MODEL_NAME, api_key=EMBEDDING_API_KEY)
client = QdrantClient(host=QDRANT_URL, port=6333)
qdrant_remote = Qdrant(
    client=client, collection_name=COLLECTION_NAME, embeddings=embed
)

model = TruefoundryPlaygroundLLM(
  model_name="llama-2-7b-chat",
  provider="truefoundry-self-hosted",
  parameters={
    "maximumLength": 100,
    "temperature": 0.7,
    "topP": 0.9,
    "repetitionPenalty": 1
  }
)
qa = RetrievalQA.from_chain_type(llm=model, chain_type="stuff", retriever=qdrant_remote.as_retriever())

@app.get("/")
def read_root():
    return {"Hello": "World"}

# Define the request model
class QueryModel(BaseModel):
    query: str

@app.post("/query/")
def get_answer(body: QueryModel):
    query = body.query
    if not query:
        raise HTTPException(status_code=400, detail="Query not provided")

    answer = qa.run(query)
    return {"answer": answer}


In [None]:
%%writefile requirements.txt

fastapi
uvicorn
langchain
servicefoundry
tiktoken
datasets
apache-beam
mwparserfromhell
openai
qdrant-client
python-dotenv


In [None]:
%%writefile servicefoundry.yaml

type: service
image:
  type: build
  build_source:
    type: local
  build_spec:
    type: tfy-python-buildpack
    build_context_path: ./
    command: uvicorn main:app --host 0.0.0.0 --port 8000 --root-path /fastapi-mine/ # replace --root-path with your path
    python_version: '3.9'
    requirements_path: requirements.txt
name: fastapi-app-mine
ports:
  - expose: true
    port: 8000
    protocol: TCP
    host: ml.avmc.truefoundry.cloud
    path: /fastapi-mine/ # replace this with a unique path for your service
    app_protocol: http
replicas: 1
env:
  OPENAI_API_KEY: <OPENAI-API-KEY>
  QDRANT_URL: <QDRANT-URL> # your qdrant URL
  MODEL_NAME: llama-2-7b-chat # change model name if you deployed something else
  COLLECTION_NAME: my_documents
  TFY_HOST: https://avmc.truefoundry.cloud
  TFY_API_KEY: <API-KEY> # From TrueFoundry dashboard, go to Settings > API Key to fetch one

In [None]:
!servicefoundry deploy --workspace-fqn=<YOUR-WORKSPACE-FQN>