In [None]:
!pwd
!pip install --upgrade pip

# Install required libraries
!python3 -m pip -q install redis
!pip install -U langchain gradio
!pip install -U langchain-google-vertexai


In [None]:
## Uncomment & execute the following code in case Redis Enterprise is not available
##################################################################################

# %%sh
# curl -fsSL https://packages.redis.io/gpg | sudo gpg --dearmor -o /usr/share/keyrings/redis-archive-keyring.gpg
# echo "deb [signed-by=/usr/share/keyrings/redis-archive-keyring.gpg] https://packages.redis.io/deb $(lsb_release -cs) main" | sudo tee /etc/apt/sources.list.d/redis.list
# sudo apt-get update  > /dev/null 2>&1
# sudo apt-get install redis-stack-server  > /dev/null 2>&1
# redis-stack-server --daemonize yes

In [1]:
## Update the 'host' field with the correct Redis host URL
host = ''
port = 
password = ''
requirePass = True

## For redis-stack-server, comment out the above code and uncomment the following:
# host = 'localhost'
# requirePass = False

In [2]:
import redis

if requirePass:
    client = redis.Redis(host = host, port=port, decode_responses=True, password=password)
else:
    client = redis.Redis(host = 'localhost', decode_responses=True)

print(client.ping())
# Clear Redis database (optional)
client.flushdb()

REDIS_URL = f"redis://:{password}@{host}:{port}"
INDEX_NAME = f"idx_qna"

True


In [None]:
## Authenticate with GCP & set project id and region
from google.colab import auth
from getpass import getpass

auth.authenticate_user()
print('Authenticated')

# input your GCP project ID and region for Vertex AI
PROJECT_ID = getpass("PROJECT_ID:")
REGION = 'us-central1' #input("REGION:")

print(f'PROJECT_ID: {PROJECT_ID} & REGION: {REGION}')


In [None]:
!wget https://storage.googleapis.com/abhi-data-2024/how_india_shops_online.pdf -O report.pdf


In [4]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import UnstructuredURLLoader
from langchain.chains import RetrievalQA

from langchain.document_loaders import PyPDFLoader


file = "report.pdf"

# set up the file loader/extractor and text splitter to create chunks
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=2500, chunk_overlap=50, add_start_index=True
)

loader = PyPDFLoader(file)
documents = loader.load()

chunks = text_splitter.split_documents(documents)
#chunked_docs = [doc.page_content for doc in chunks]

# Create text embeddings with Vertex AI embedding model

Use the Vertex AI API for text embeddings, developed by Google.

Text embeddings are a dense vector representation of a piece of content such that, if two pieces of content are semantically similar, their respective embeddings are located near each other in the embedding vector space. This representation can be used to solve common NLP tasks, such as:


*   Semantic search: Search text ranked by semantic similarity.
*   Recommendation: Return items with text attributes similar to the given text.
*   Classification: Return the class of items whose text attributes are similar to the given text.
*   Clustering: Cluster items whose text attributes are similar to the given text.
*   Outlier Detection: Return items where text attributes are least related to the given text.

The Vertex AI text-embeddings API lets you create a text embedding using Generative AI on Vertex AI. The textembedding-gecko model accepts a maximum of 3,072 input tokens (i.e. words) and outputs 768-dimensional vector embeddings.

In [6]:
from langchain.vectorstores.redis import Redis
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import UnstructuredURLLoader
from langchain.chains import RetrievalQA
from langchain_google_vertexai import VertexAIEmbeddings

from langchain.document_loaders import UnstructuredFileLoader

embeddings = VertexAIEmbeddings(model_name="textembedding-gecko@003", project=PROJECT_ID, location=REGION)

def get_vectordb() -> Redis:
    """Create the Redis vectordb."""

    try:
        vectordb = Redis.from_existing_index(
            embedding=embeddings,
            index_name=INDEX_NAME,
            redis_url=REDIS_URL
        )
        return vectordb
    except:
        pass

    # Load Redis with documents
    vectordb = Redis.from_documents(
        documents=chunks,
        embedding=embeddings,
        index_name=INDEX_NAME,
        redis_url=REDIS_URL
    )
    return vectordb


redis = get_vectordb()

#embedder = HuggingFaceEmbeddings(model_name=EMBED_MODEL)

# Include RAG

We're going to build a complete RAG pipeline from scratch incorporating the following components:

Standard retrieval and chat completion
Dense content representation to improve accuracy
Query re-writing to improve accuracy
Semantic caching to improve performance
Conversational session history to improve personalization

### Define Prompt template
PromptTemplate defines the exect text of the response that would be fed to the LLM. This step is optional, but the defaults usually work well for OpenAI and might fall short for other models.

In [7]:
#@title Function to define prompt template

def create_prompt():
    """Create the QA chain."""
    from langchain.prompts import PromptTemplate
    from langchain.chains import RetrievalQA

    # Define our prompt
    prompt_template = """Use only the following pieces of context to answer the question. If you don't know the answer, say that you don't know, don't try to make up an answer.

    This should be in the following format:

    Question: [question here]
    Answer: [answer here]

    Begin!

    Context:
    ---------
    {context}
    ---------
    Question: {question}
    Answer:"""

    prompt = PromptTemplate(
        template=prompt_template,
        input_variables=["context", "question"]
    )
    return prompt


In [8]:
#@title Invoke Google Vertex LLM using Langchain
# This is where the Langchain brings all the components together in a form of a simple QnA chain
from langchain_google_vertexai import VertexAI

llm = VertexAI(
    model_name="gemini-1.5-pro-preview-0409",
    max_output_tokens=2048,
    temperature=0.5,
    verbose=False,
)

qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=redis.as_retriever(search_type="similarity_distance_threshold",search_kwargs={"distance_threshold":0.5}),
    #return_source_documents=True,
    chain_type_kwargs={"prompt": create_prompt()},
    #verbose=True
    )

In [15]:
qa.invoke('What are some motivations for shopping online?')['result']

'Answer: Some motivators for shopping online include the absence of physical stores for premium brands, stockouts of certain products, and a lack of knowledgeable staff in offline stores.  Additional motivators include the lack of discounts and special offers in physical stores, along with large crowds in malls during weekends. \n'

In [16]:
qa.invoke('How do Indians like to pay for shopping online?')['result']

'Answer: Urban dwellers and those who live in the rest of India are similar in their preference for using UPI for online payments. However, cash on delivery is still the most preferred option among those in the rest of India. \n'

In [19]:
qa.invoke('What are some known challenges in shopping online?')['result']

'Question: What are some known challenges in shopping online?\nAnswer: Some known challenges of shopping online are payment fraud, credibility of unfamiliar websites, and doubts regarding product quality matching the images shown.  Also, urban delivery faces logistical overload, regulations, and access restrictions, causing inconsistent experiences to the consumers. \n'

In [20]:
qa.invoke('How home and kitchen segment is growing?')['result']

'Question: How home and kitchen segment is growing?\nAnswer: In the Indian market, the home furnishing and kitchen sector is growing at a CAGR of 10%. \n'

In [21]:
qa.invoke('What are the effects of social media on online shopping?')['result']

'Answer: Social media has amplified awareness and aspirations of products. 62% of respondents tried products after seeing them on Facebook and Instagram. Social media is the most preferred channel for encouraging trials of new products. \n'

In [22]:
qa.invoke('What are some relevant items that are shopped online?')['result']

'    Answer: Grocery, Fashion and Accessories, Electronics and Consumer Durables, Beauty and Personal Care, Health and Wellness, Home and Kitchen, Sports and Fitness \n'

In [None]:
import gradio as gr

def handle(query):
    response = qa.run(query)
    return response

iface = gr.Interface(fn=handle, inputs="text", outputs="text")
iface.launch(share=True)

In [None]:
iface.close()

Closing server running on port: 7860
