In [2]:
import os
os.chdir('../')

In [3]:
#Load API kyes from environment
from dotenv import load_dotenv
load_dotenv()

os.environ['GROQ_API_KEY'] = os.getenv('GROQ_API_KEY')
os.environ['HF_TOKEN'] = os.getenv('HF_TOKEN')



In [4]:
#load pdf files
from langchain_community.document_loaders import PyPDFLoader,DirectoryLoader
def load_pdf_files(data):
    loader=DirectoryLoader(data,glob="*.pdf",loader_cls=PyPDFLoader)
    documents=loader.load()
    return documents

In [None]:
extracted_data=load_pdf_files(data='Data/')



In [None]:
#Split data into multiple chunks
from langchain.text_splitter import RecursiveCharacterTextSplitter

def text_split(documents):
    splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=20)
    return splitter.split_documents(documents)

In [8]:
text_chunks=text_split(extracted_data)

In [11]:
#text_chunks

In [10]:
print(len(text_chunks))

1661


In [12]:
#Download embedding from huggingface
from langchain_huggingface import HuggingFaceEmbeddings

def download_hugging_face_embedding():
    embeddings=HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
    return embeddings

In [24]:
embeddings= download_hugging_face_embedding()

In [42]:
result=embeddings.embed_query("what are the procedure of installation")
result

[0.06454586982727051,
 0.047256309539079666,
 0.008665105327963829,
 -0.0368228405714035,
 -0.054572075605392456,
 -0.027574801817536354,
 0.012743386439979076,
 0.015847496688365936,
 -0.032862406224012375,
 -0.015028310939669609,
 0.059374067932367325,
 -0.0036433388013392687,
 0.028818678110837936,
 -0.036067716777324677,
 -0.05594169348478317,
 -0.009238719940185547,
 -0.0065774074755609035,
 0.017495736479759216,
 -0.005373092368245125,
 -0.06804537028074265,
 0.005668764002621174,
 -0.07171773165464401,
 -0.03552819415926933,
 -0.10623595118522644,
 0.0021670793648809195,
 -0.006932201329618692,
 0.004591924604028463,
 0.09452936798334122,
 0.06651078909635544,
 0.017112528905272484,
 0.025502918288111687,
 -0.06677965074777603,
 0.047925785183906555,
 0.03311958909034729,
 -0.016251584514975548,
 -0.0013320702128112316,
 0.084822878241539,
 -0.0507761687040329,
 -0.025731341913342476,
 -0.045903269201517105,
 0.06137659028172493,
 -0.09385813772678375,
 -0.05660498887300491,
 -0

In [27]:
from langchain_community.vectorstores import FAISS

vectordb=FAISS.from_documents(documents=text_chunks,embedding=embeddings)

In [28]:
vectordb.similarity_search("explain 3d explorer")

[Document(id='3b572895-25d8-4944-af3f-b256f2f2c5c4', metadata={'producer': 'þÿMicrosoft® Word 2010; modified using iText 2.1.7 by 1T3XT', 'creator': 'Microsoft® Word 2010', 'creationdate': '2024-12-16T04:40:43-06:00', 'viewable': '', 'moddate': '2024-12-17T07:25:05-08:00', 'marketsegment': 'Service Provider', 'doctype': 'TSD Products TOC User Guide', 'author': 'Wendy Hirschfeld (whirschf)', 'title': 'Cisco Crosswork Hierarchical Controller 10.0 Network Visualization Guide', 'campaigns': '', 'alfrescodocversion': '', 'date': '2024-12-16T02:37:22.519-08:00', 'concept': 'Cisco Crosswork Hierarchical Controller', 'topics': '', 'iapath': 'cisco.com#Products#Cisco Products#Cloud and Systems Management#Routing and Switching Management#Cisco Crosswork Network Automation#Cisco Crosswork Hierarchical Controller', 'language': 'en', 'entitlementexpression': 'contains( &quot;0,1,2,3,4,7&quot; , $profileField[3] )', 'country': 'US', 'secondaryconcept': '', 'documentid': '1734449105679408', 'contentt

In [29]:
retriever = vectordb.as_retriever(search_type="similarity", search_kwargs={"k":3})

In [37]:
from langchain_groq import ChatGroq
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain

In [38]:
llm=ChatGroq(model="qwen-2.5-32b")

system_prompt='''You are a helpful Telecommunication network management system  test engineer. 
                 your goals is to genenate details test case given user input
                 
                 ##Example ##
                 if user ask to generate network inventory test case .Generate testcase in tabular format including devices,link,connection ,port  
                 
                 {context}
                 
                 '''

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)
