In [None]:
!pwd
!pip install --upgrade pip

# Install required libraries
!python3 -m pip -q install redis
!pip install -U langchain gradio
!pip install -U langchain-core
!pip install -U langchain-google-vertexai
!pip install -U langchain-community


In [None]:
## Update the 'host' field with the correct Redis host URL
host = ''
port = 15337
password = 'admin'


In [None]:
import redis

client = redis.Redis(host = host, port=port, decode_responses=True, password=password)

print(client.ping())

REDIS_URL = f"redis://:{password}@{host}:{port}"


True


In [None]:

from redis.commands.search.field import NumericField, TextField, TagField, VectorField
from redis.commands.search.indexDefinition import IndexDefinition, IndexType
from redis.commands.search.query import NumericFilter, Query
import redis.commands.search.aggregation as aggregations
import redis.commands.search.reducers as reducers


INDEX_NAME = f"idx_scan_docs"

schema = (TextField("$.accountNo", as_name="accountNo"),
          NumericField("$.date", as_name="date", sortable=True),
          TextField("$.desc", as_name="description"),
          VectorField("$.desc_vector", "HNSW",
              {
              "TYPE": "FLOAT32",
              "DISTANCE_METRIC": "COSINE",
              #"as_name": "desc_vector",
              "DIM": 768,
              }))

client.ft(INDEX_NAME).create_index(schema,
      definition=IndexDefinition(prefix=["trading:securitylot:"], index_type=IndexType.JSON))


In [None]:
from langchain_google_vertexai import VertexAIEmbeddings
from google.colab import auth
from getpass import getpass

from typing import Generator, List, Any
import numpy as np

import vertexai

auth.authenticate_user()
print('Authenticated')

# input your GCP project ID and region for Vertex AI
PROJECT_ID = getpass("PROJECT_ID:")
REGION = input("REGION:")

print(f'PROJECT_ID: {PROJECT_ID} & REGION: {REGION}')

vertexai.init(project=PROJECT_ID, location=REGION)

def embed_text(text=[]):
   embeddings = VertexAIEmbeddings(model_name="text-embedding-004")
   return embeddings.embed_documents(text)

def convert_embedding(emb: List[float]):
   return np.array(emb).astype(np.float32).tobytes()


In [27]:
import datetime
import json
import time
from redis.commands.search.query import Query


qry = '@embeddings:{false}'
limit = 1000
query = (Query(qry).paging(0, limit).sort_by("date"))
docs = client.ft("idx_trading_security_lot").search(query).docs

doc_array = []
desc_vector_list = []
count = 0
while len(docs) > 0:
    for doc in docs:
        temp = json.loads(doc.json)
        temp['id'] = doc.id
        doc_array.append(temp)
        count += 1

        if len(doc_array) == 1000:
            vectors = embed_text([d['desc']for d in doc_array])

            desc_vector_list.extend(vectors)
            print(desc_vector_list)
            pipeline = client.pipeline()
            for index, vec in enumerate(desc_vector_list):
                d = doc_array[index]
                d['desc_vector'] = vec
                d['embeddings'] = True
                if index < 4:
                  print(d)
                pipeline.json().set(d['id'], "$", d)
            pipeline.execute()

            desc_vector_list = []
            doc_array = []
            time.sleep(5)

    if len(doc_array) > 0:
        print(f"Inside second block {len(doc_array)}")
        vectors = embed_text([d['desc']for d in doc_array])
        desc_vector_list.extend(vectors)
        pipeline = client.pipeline()
        index = 0
        for index, vec in enumerate(desc_vector_list):
            d = doc_array[index]
            d['desc_vector'] = vec
            d['embeddings'] = True
            pipeline.json().set(d['id'], "$", d)
        pipeline.execute()
        print(f"total rec --> {index}")
        desc_vector_list = []
        doc_array = []
        time.sleep(5)

    query = (Query(qry).paging(0, limit).sort_by("date"))
    docs = client.ft("idx_trading_security_lot").search(query).docs

print(f"Total record count: {count}")


Total record count: 0


In [39]:
import numpy as np

accNo = 'ACC10000'
queryString = f'(@accountNo:{accNo})=>[KNN 100 @desc_vector $query_vector ]'
query = (
    Query(f'(@accountNo:{accNo})')
     .paging(0, 200)
     .return_fields('desc')
     .dialect(2)
)

qry = "How many stocks credited to accountNo ACC10000 so far?"

result_docs = client.ft(INDEX_NAME).search(query).docs

desc_collection = []
for d in result_docs:
  desc_collection.append(d.desc)

contexts = "-" + "\n-".join([str for str in desc_collection])

print(contexts)


-14-Jul-2023: 2 ABCMOTORS stocks having unit price of INR 623.0 credited to accountNo ACC10000. The transaction value is INR 1246.0
-19-Jul-2023: 2 ABCMOTORS stocks having unit price of INR 614.0 credited to accountNo ACC10000. The transaction value is INR 1228.0
-21-Jul-2023: 10 ABCMOTORS stocks having unit price of INR 621.0 credited to accountNo ACC10000. The transaction value is INR 6210.0
-26-Jul-2023: 5 ABCFOOD stocks having unit price of INR 22790.9 credited to accountNo ACC10000. The transaction value is INR 113954.5
-02-Aug-2023: 1 ABCFOOD stocks having unit price of INR 22690.0 credited to accountNo ACC10000. The transaction value is INR 22690.0
-02-Aug-2023: 14 ABCBANK stocks having unit price of INR 1642.0 credited to accountNo ACC10000. The transaction value is INR 22988.0
-08-Aug-2023: 2 ABCBANK stocks having unit price of INR 1651.7 credited to accountNo ACC10000. The transaction value is INR 3303.4
-08-Aug-2023: 1 ABCMOTORS stocks having unit price of INR 612.55 credite

In [43]:
#@title Invoke Google Vertex LLM using Langchain
# This is where the Langchain brings all the components together in a form of a simple QnA chain
from langchain_google_vertexai import VertexAI


def create_prompt(prompt_template: str, **kwargs) -> str:
  return prompt_template.format(**kwargs)

PROMPT = """You are a helpful virtual financial & investment assistant. Use the provided context to answer the questions related to stocks that gets credited to the account number. Don't blindly make things up.

Context:
{context}

QUESTION:
{query}?

ANSWER:"""


query1 = "How many stocks has been credited to accountNo (account number) ACC10000?"
query2 = "Which is the most profitable stock in terms of total financial value for ACC10000?"
query3 = "Which stock has the most financial value for ACC10000?"


full_prompt = create_prompt(
        prompt_template=PROMPT,
        context=contexts,
        query=query3
      )

#generation_model = TextGenerationModel.from_pretrained("text-bison@001")


llm = VertexAI(
    model_name="gemini-1.5-pro-002",
    max_output_tokens=2048,
    temperature=0.5,
    verbose=True,
)

llm.invoke(full_prompt)


"Here's how to determine which stock has the most financial value for account ACC10000:\n\n1. **Calculate total value per stock:**  We need to sum the total transaction values for each stock type.\n\n    * **ABCMOTORS:** 1246.0 + 1228.0 + 6210.0 + 612.55 + 7330.8 + 1237.4 + 10540.0 + 6586.0 + 11038.95 + 674.0 + 6546.6 + 6387.2 + 7199.55 + 1772.0 + 936.75 + 966.15 + 13905.5 + 1929.9 + 968.4 + 950.0 + 7548.0 + 4815.0 =  118,549.85\n\n    * **ABCFOOD:** 113954.5 + 22690.0 + 21954.45 + 133653.3 + 68880.0 + 48600.0 + 48400.0 + 365535.75 + 24340.0 + 195600.0 + 99520.0 + 276584.0 + 50972.0 + 77609.55 + 330000.0 + 15480.0 + 29688.0 + 35840.0 + 5202.0 + 10120.0 + 15365.7 + 2606.05 + 2590.0 + 2551.0 + 5134.1 + 5246.6 + 7518.3 + 19760.0 = 2,005,747.35\n\n    * **ABCBANK:** 22988.0 + 3303.4 + 4737.0 + 15270.0 + 10559.5 + 8418.0 + 4363.65 + 4338.0 + 8352.9 + 8310.0 + 16104.0 + 5654.2 + 1465.15 + 12032.0 + 15574.0 + 1522.0 + 10653.65 + 2919.4 + 8676.3 + 13015.8 + 1466.0 + 19891.3 + 4554.0 + 9389.4 +

In [None]:
import gradio as gr

def handle(query):
    response = qa.run(query)
    return response

iface = gr.Interface(fn=handle, inputs="text", outputs="text")
iface.launch(share=True)

In [None]:
iface.close()

Closing server running on port: 7860
