In [None]:
!pwd
!pip install --upgrade pip

# Install required libraries
!python3 -m pip -q install redis
!pip install -U langchain gradio
!pip install -U langchain-core
!pip install -U langchain-google-vertexai
!pip install -U langchain-community


In [13]:
## Update the 'host' field with the correct Redis host URL
host = ''
port = 15337
password = 'admin'


In [14]:
import redis

client = redis.Redis(host = host, port=port, decode_responses=True, password=password)

print(client.ping())

REDIS_URL = f"redis://:{password}@{host}:{port}"


True


In [None]:

from redis.commands.search.field import NumericField, TextField, TagField, VectorField
from redis.commands.search.indexDefinition import IndexDefinition, IndexType
from redis.commands.search.query import NumericFilter, Query
import redis.commands.search.aggregation as aggregations
import redis.commands.search.reducers as reducers


INDEX_NAME = f"idx_scan_docs"

schema = (TextField("$.accountNo", as_name="accountNo"),
          NumericField("$.date", as_name="date", sortable=True),
          TextField("$.desc", as_name="desc"),
          VectorField("$.desc_vector", "HNSW",
              {
              "TYPE": "FLOAT32",
              "DISTANCE_METRIC": "COSINE",
              #"as_name": "desc_vector",
              "DIM": 768,
              }))

client.ft(INDEX_NAME).create_index(schema,
      definition=IndexDefinition(prefix=["trading:securitylot:"], index_type=IndexType.JSON))


In [16]:
from langchain_google_vertexai import VertexAIEmbeddings
from google.colab import auth
from getpass import getpass

from typing import Generator, List, Any
import numpy as np

import vertexai

auth.authenticate_user()
print('Authenticated')

# input your GCP project ID and region for Vertex AI
PROJECT_ID = getpass("PROJECT_ID:")
REGION = input("REGION:")

print(f'PROJECT_ID: {PROJECT_ID} & REGION: {REGION}')

vertexai.init(project=PROJECT_ID, location=REGION)

def embed_text(text=[]):
   embeddings = VertexAIEmbeddings(model_name="text-embedding-004")
   return embeddings.embed_documents(text)

def convert_embedding(emb: List[float]):
   return np.array(emb).astype(np.float32).tobytes()


Authenticated
PROJECT_ID: central-beach-194106 & REGION: us-central1


In [27]:
import datetime
import json
import time
from redis.commands.search.query import Query


qry = '@embeddings:{false}'
limit = 1000
query = (Query(qry).paging(0, limit).sort_by("date"))
docs = client.ft("idx_trading_security_lot").search(query).docs

doc_array = []
desc_vector_list = []
count = 0
while len(docs) > 0:
    for doc in docs:
        temp = json.loads(doc.json)
        temp['id'] = doc.id
        doc_array.append(temp)
        count += 1

        if len(doc_array) == 1000:
            vectors = embed_text([d['desc']for d in doc_array])

            desc_vector_list.extend(vectors)
            print(desc_vector_list)
            pipeline = client.pipeline()
            for index, vec in enumerate(desc_vector_list):
                d = doc_array[index]
                d['desc_vector'] = vec
                d['embeddings'] = True
                if index < 4:
                  print(d)
                pipeline.json().set(d['id'], "$", d)
            pipeline.execute()

            desc_vector_list = []
            doc_array = []
            time.sleep(5)

    if len(doc_array) > 0:
        print(f"Inside second block {len(doc_array)}")
        vectors = embed_text([d['desc']for d in doc_array])
        desc_vector_list.extend(vectors)
        pipeline = client.pipeline()
        index = 0
        for index, vec in enumerate(desc_vector_list):
            d = doc_array[index]
            d['desc_vector'] = vec
            d['embeddings'] = True
            pipeline.json().set(d['id'], "$", d)
        pipeline.execute()
        print(f"total rec --> {index}")
        desc_vector_list = []
        doc_array = []
        time.sleep(5)

    query = (Query(qry).paging(0, limit).sort_by("date"))
    docs = client.ft("idx_trading_security_lot").search(query).docs

print(f"Total record count: {count}")


Total record count: 0


In [17]:
import numpy as np
import re
from redis.commands.search.query import Query

def get_accNo(query):
  accNo = None
  pattern = r'ACC\d+'
  match = re.search(pattern, query)
  if match:
      accNo = match.group()

  return accNo

def get_contexts(accNo):
  query = (
      Query(f'(@accountNo:{accNo})')
      .paging(0, 200)
      .return_fields('desc')
      .dialect(2)
  )

  result_docs = client.ft(INDEX_NAME).search(query).docs

  desc_collection = []
  for d in result_docs:
    desc_collection.append(d.desc)

  contexts = "-" + "\n-".join([str for str in desc_collection])
  print(contexts)
  return contexts


In [18]:
#@title Invoke Google Vertex LLM using Langchain
from langchain_google_vertexai import VertexAI


def create_prompt(prompt_template: str, **kwargs) -> str:
  return prompt_template.format(**kwargs)

PROMPT = """You are a helpful virtual financial & investment assistant. Use the provided context to answer the questions related to stocks that gets credited to the account number. Don't blindly make things up.

    Context:
    {context}

    QUESTION:
    {query}?

    ANSWER:"""


def get_response(query):
    accNo = get_accNo(query)
    if accNo is None:
      return "Please provide a valid account number (e.g ACC10000) to fetch the details"

    contexts = get_contexts(accNo)

    query1 = "How many stocks has been credited to account ACC10000 between 1st Nov, 2023 and 30th Nov, 2023?"
    query2 = "What was the total portfolio value of ABCBANK stock till Aug 31st, 2023 for account ACC10000?"
    query3 = "Suppose portfolio against account ACC10000 had 0 financial value on July 13th, 2023 and user has not sold any stocks so far. Which stock has the most financial value?"
    query4 = "How many stocks has been credited to accountNo (account number) ACC10000?"
    query5 = "Which is the most profitable stock in terms of total financial value for ACC10000?"

    full_prompt = create_prompt(
            prompt_template=PROMPT,
            context=contexts,
            query=query
          )

    llm = VertexAI(
        model_name="gemini-1.5-pro-002",
        max_output_tokens=2048,
        temperature=0.5,
        verbose=True,
    )
    return llm.invoke(full_prompt)


In [None]:
import gradio as gr

def handle(query):
    response = get_response(query)
    return response

iface = gr.Interface(fn=handle, inputs="text", outputs="text")
iface.launch(share=True)

In [15]:
iface.close()

Closing server running on port: 7860
