In [1]:
from langchain_groq import ChatGroq
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains import create_retrieval_chain
from langchain_community.vectorstores import FAISS
from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain_google_genai import GoogleGenerativeAIEmbeddings

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
#Import Google and Groq

from dotenv import load_dotenv
import os
load_dotenv()

## load the GROQ And OpenAI API KEY 
groq_api_key=os.getenv('GROQ_API_KEY')
os.environ["GOOGLE_API_KEY"]=os.getenv("GOOGLE_API_KEY")

In [3]:
from langchain_community.document_loaders import PyPDFLoader
loader=PyPDFLoader('Stockology Article 2.pdf')
docs=loader.load()

In [4]:
docs

 Document(metadata={'source': 'Stockology Article 2.pdf', 'page': 1}, page_content='guideline for the upcoming week. While as previously mentioned, Time is still not showing big risk, it \nhas started giving indications that slow wealth destruction has begun. Major panic sell -off or crisis is \nnot indicated. Sun 60 Jupiter on 7th is a very positive aspect, and big movement in index stocks \ncannot be ruled out. The next day it is V enus 0 Mercury aspect. Thursday seems very important day \nof the week.  \nWe are looking at Cement and Auto Ancillary selectively, and MNC -specialised capital goods sector \ncompanies to do extremely well.  \nFrom TimeMap perspective, many individual companies, esp ecially the defence companies will see \nbeginning of a major correction, while FMCG and IT might show a positive breakout.  \n \nAugust 05, 2024: Monda y: S.1: Ashlesha: Good after 11  \nGap down and recovery could be the best scenario. Will the markets recovery to close in positive? \nOur sy

In [6]:
#Transform Data in Chunks

from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter=RecursiveCharacterTextSplitter(chunk_size=100,chunk_overlap=20)
documents=text_splitter.split_documents(docs)
documents[:5]

[Document(metadata={'source': 'Stockology Article 2.pdf', 'page': 0}, page_content='Stockology: FMCG and IT sector could show a positive \nbreakout'),
 Document(metadata={'source': 'Stockology Article 2.pdf', 'page': 0}, page_content='breakout  \n \nMAHESH  GOWANDE  AUGUST  04, 2024  / 08:04 IST'),
 Document(metadata={'source': 'Stockology Article 2.pdf', 'page': 0}, page_content='Major panic sell -off or crisis is not indicated in the coming week.  \n \nReview:'),
 Document(metadata={'source': 'Stockology Article 2.pdf', 'page': 0}, page_content='After scaling above 25,000, markets gave a gap down big corrective move on Friday, July 26th. After'),
 Document(metadata={'source': 'Stockology Article 2.pdf', 'page': 0}, page_content='July 26th. After')]

In [7]:
# Generate embeddings
embeddings_model = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

In [8]:
#Converting Chunks to Vector Embeddings and storing to ChromaDB

from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_community.vectorstores import Chroma
DB = Chroma.from_documents(documents, embeddings_model)

In [16]:
#Direct Search on Chroma db without LLM
Query="What is Nakshatra on August 09, 2024?"
result=DB.similarity_search(Query)
#result
result[0].page_content

'August 09, 2024:  Friday: S.5: Hasta: Auspicious Day'

In [10]:
#Storing Chunks to FAISS DB

from langchain_community.vectorstores import FAISS
DB2=FAISS.from_documents(documents[:20], embeddings_model)

In [15]:
#Direct Search on FAISS db without LLM
Query="What is Nakshatra on August 09, 2024?"
result=DB.similarity_search(Query)
result[0].page_content

'August 09, 2024:  Friday: S.5: Hasta: Auspicious Day'