In [1]:
from langchain_groq import ChatGroq
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains import create_retrieval_chain
from langchain_community.vectorstores import FAISS
from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain_google_genai import GoogleGenerativeAIEmbeddings

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
#Data Ingestion from Local Text file

from langchain_community.document_loaders import TextLoader
loader=TextLoader("stockology.txt")
text_documents=loader.load()
text_documents




In [3]:
#Import Google and Groq

from dotenv import load_dotenv
import os
load_dotenv()

## load the GROQ And OpenAI API KEY 
groq_api_key=os.getenv('GROQ_API_KEY')
os.environ["GOOGLE_API_KEY"]=os.getenv("GOOGLE_API_KEY")



In [4]:
#Transform Data in Chunks

from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter=RecursiveCharacterTextSplitter(chunk_size=100,chunk_overlap=20)
documents=text_splitter.split_documents(text_documents)
documents[:5]


[Document(metadata={'source': 'stockology.txt'}, page_content='Article 1: July 28, 2024 / 15:50 IST'),
 Document(metadata={'source': 'stockology.txt'}, page_content='Title: Stockology: Volatility would be high, and hammering will be stock-specific'),
 Document(metadata={'source': 'stockology.txt'}, page_content='While markets are entering the slightly expensive zone, volatility is set to increase more.'),
 Document(metadata={'source': 'stockology.txt'}, page_content='Review:'),
 Document(metadata={'source': 'stockology.txt'}, page_content='As expected, below 24290, buying emerged in most of the sectors and stocks.')]

In [5]:
# Generate embeddings
embeddings_model = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
embeddings = embeddings_model.embed_documents([doc.page_content for doc in documents])

In [6]:
#Converting Chunks to Vector Embeddings and storing to ChromaDB

from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_community.vectorstores import Chroma
DB = Chroma.from_documents(documents, embeddings_model)


In [7]:
#Direct Search on Chroma db without LLM
Query="What was the market view on july 29 2024?"
result=DB.similarity_search(Query)
result


[Document(metadata={'source': 'stockology.txt'}, page_content='After scaling above 25,000, markets gave a gap down big corrective move on Friday, July 26th. After'),
 Document(metadata={'source': 'stockology.txt'}, page_content='July 26th. After four negative days, the market gave a single big candle which recovered losses of'),
 Document(metadata={'source': 'stockology.txt'}, page_content='August 07, 2024: Wednesday: S.3: Purva: Bullish day, Book Profits:'),
 Document(metadata={'source': 'stockology.txt'}, page_content='As expected, below 24290, buying emerged in most of the sectors and stocks.')]

In [8]:
#Direct Search on Chroma db without LLM
Query="What was the market view on july 29 2024?"
result=DB.similarity_search(Query)
result[0].page_content

'After scaling above 25,000, markets gave a gap down big corrective move on Friday, July 26th. After'

In [9]:
#Storing Chunks to FAISS DB

from langchain_community.vectorstores import FAISS
DB2=FAISS.from_documents(documents[:20], embeddings_model)

In [10]:
#Direct Search on FAISS db without LLM
Query="What was the market view on july 29 2024?"
result=DB.similarity_search(Query)
result[0].page_content

'After scaling above 25,000, markets gave a gap down big corrective move on Friday, July 26th. After'