In [2]:
import os
from langchain_groq import ChatGroq
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains import create_retrieval_chain
from langchain_community.vectorstores import FAISS
from langchain_community.document_loaders import PyPDFLoader


In [3]:
from dotenv import load_dotenv
load_dotenv()
groq_api_key = os.getenv("GROQ_API_KEY")
os.environ['HF_TOKEN'] = os.getenv('HF_TOKEN')

In [4]:
model = ChatGroq(groq_api_key = groq_api_key, model = 'llama-3.3-70b-versatile')
model

ChatGroq(client=<groq.resources.chat.completions.Completions object at 0x000002F5D4390090>, async_client=<groq.resources.chat.completions.AsyncCompletions object at 0x000002F5D413FAD0>, model_name='llama-3.3-70b-versatile', model_kwargs={}, groq_api_key=SecretStr('**********'))

In [5]:
embeddings = HuggingFaceEmbeddings(model_name = 'all-MiniLM-L6-v2')

  embeddings = HuggingFaceEmbeddings(model_name = 'all-MiniLM-L6-v2')
  from .autonotebook import tqdm as notebook_tqdm


In [6]:
loader = PyPDFLoader("Text Book dir/CN.pdf")
docs = loader.load()
docs

[Document(metadata={'source': 'Text Book dir/CN.pdf', 'page': 0}, page_content=''),
 Document(metadata={'source': 'Text Book dir/CN.pdf', 'page': 1}, page_content='Computer\xa0NetworksTanenbaum\xa0•Feamster\xa0•WetherallSIXTH\xa0EDITION\nGLOBAL\xa0EDITION'),
 Document(metadata={'source': 'Text Book dir/CN.pdf', 'page': 2}, page_content='COMPUTER NETWORKSSIXTH EDITION'),
 Document(metadata={'source': 'Text Book dir/CN.pdf', 'page': 3}, page_content='This page is intentionally left blank'),
 Document(metadata={'source': 'Text Book dir/CN.pdf', 'page': 4}, page_content='COMPUTER NETWORKSSIXTH EDITIONGlobal EditionANDREW S. TANENBAUMVrije UniversiteitAmsterdam, The NetherlandsNICK FEAMSTERUniversity of ChicagoChicago, ILDAVID WETHERALLGoogle'),
 Document(metadata={'source': 'Text Book dir/CN.pdf', 'page': 5}, page_content=' \nPlease\n \ncontact\n \nhttps://support.pearson.com/getsupport/s/contactsupport\n \nPearson\n \nEducation\n \nLimited\nKAO\n \nTwo\nKAO\n \nPark\nHockham\n \nWay\nHarl

In [7]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1000 , chunk_overlap = 250)
final_docs = text_splitter.split_documents(docs)
final_docs


[Document(metadata={'source': 'Text Book dir/CN.pdf', 'page': 1}, page_content='Computer\xa0NetworksTanenbaum\xa0•Feamster\xa0•WetherallSIXTH\xa0EDITION\nGLOBAL\xa0EDITION'),
 Document(metadata={'source': 'Text Book dir/CN.pdf', 'page': 2}, page_content='COMPUTER NETWORKSSIXTH EDITION'),
 Document(metadata={'source': 'Text Book dir/CN.pdf', 'page': 3}, page_content='This page is intentionally left blank'),
 Document(metadata={'source': 'Text Book dir/CN.pdf', 'page': 4}, page_content='COMPUTER NETWORKSSIXTH EDITIONGlobal EditionANDREW S. TANENBAUMVrije UniversiteitAmsterdam, The NetherlandsNICK FEAMSTERUniversity of ChicagoChicago, ILDAVID WETHERALLGoogle'),
 Document(metadata={'source': 'Text Book dir/CN.pdf', 'page': 5}, page_content='Please\n \ncontact\n \nhttps://support.pearson.com/getsupport/s/contactsupport\n \nPearson\n \nEducation\n \nLimited\nKAO\n \nTwo\nKAO\n \nPark\nHockham\n \nWay\nHarlow\nCM17\n \n9SR\nUnited\n \nKingdom\nand\n \nAssociated\n \nCompanies\n \nthroughout\n

In [8]:
vector_db = FAISS.from_documents(final_docs, embeddings)

In [9]:
vector_db.save_local('faiss_index')