In [23]:
from dotenv import load_dotenv
import os
import getpass
from groq import Groq

load_dotenv()

api_key=os.getenv(".env")
#os.environ['GROQ_API_KEY']=getpass.getpass(api_key)


In [14]:
from langchain_community.document_loaders import PyPDFLoader

file_path="./test_textbook/Compilers Principles, Techniques, & Tools 2nd Ed.pdf"
loader=PyPDFLoader(file_path)

docs=loader.load()
print(len(docs))

947


In [15]:
from langchain_ollama import OllamaEmbeddings

embeddings=OllamaEmbeddings(model="llama3.2")

In [16]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
text_splitter=RecursiveCharacterTextSplitter(
    chunk_size=1000,chunk_overlap=200,add_start_index=True
)
all_splits=text_splitter.split_documents(docs)
len(all_splits)

3201

In [17]:
from langchain_core.vectorstores import InMemoryVectorStore
vector_store=InMemoryVectorStore(embeddings)
ids=vector_store.add_documents(documents=all_splits)

In [39]:
query="Explain lexical analyzer"
results=vector_store.similarity_search(query)
print(results[0])

page_content='T o see ho w the GOTO /'s are computed/, consider GOTO /#28 I
/3/6
/;C /#29/. In the original
set of LR/#28/1/#29 items/, GOTO /#28 I
/3
/; C /#29/= I
/8
/, and I
/8
is no w part of I
/8/9
/,s o w e mak e
GOTO /#28 I
/3/6
/;C /#29 be I
/8/9
/. W e could ha v e arriv ed at the same conclusion if w e
considered I
/6
/, the other part of I
/3/6
/. That is/, GOTO /#28 I
/6
/;C /#29 /= I
/9
/, and I
/9
is
no w part of I
/8/9
/. F or another example/, consider GOTO /#28 I
/2
/;c /#29/, an en try that is
exercised after the shift action of I
/2
on input c /. In the original sets of LR/#28/1/#29
items/, GOTO /#28 I
/2
/;c /#29/= I
/6
/. Since I
/6
is no w part of I
/3/6
/, GOTO /#28 I
/2
/;c /#29 b ecomes I
/3/6
/.
Th us/, the en try in Fig/. /4/./4/3 for state /2 and input c is made s/3/6/, meaning shift
and push state /3/6 on to the stac k/. /2
When presen ted with a string from the language c
/#03
dc
/#03
d /, b oth the LR parser' metadata={'source': './test_textbook/Compilers

In [38]:
client = Groq(api_key=api_key)
completion = client.chat.completions.create(
    messages=[
        {'role':'system','content':'''Answer the questions from the prompt and the context given by the user. If the answer is not
         found, reply "Cannot provide answer", don't give any additional explanation about the question.Give the answer with proper headings,subheadings and bullet points if it is a long answer.
         You have to help the user understand the answer to the question and format it for notes making.Do not give answers from outside provided context'''},
         {'role':'user','content':f"Context:{results[0]},Question:{query}"}
    ],
    model="llama3-8b-8192",
)
print(completion.choices[0].message.content)

**Lexical Analysis: Understanding Lexical Analyzer**

**Contextual Analysis**

The given context is related to the construction of LR parser tables and the computation of GOTOs. The lexical analyzer is a crucial component of this process, as it breaks the input source code into a stream of tokens, which are then used to build the parser.

**Function of Lexical Analyzer**

A lexical analyzer, also known as a lexer or scanner, performs the following functions:

• **Tokenization**: breaks the input source code into a stream of tokens, such as keywords, identifiers, literals, symbols, and other special characters.

• **Pattern Matching**: matches the tokens against a set of predefined patterns or regular expressions to identify the types of tokens.

• **Error Handling**: checks for any errors in the input source code, such as syntax errors or invalid characters, and reports them to the parser.

**Example: Tokenization**

In the given context, the input string `T o see ho w the GOTO /'s are