### ü§ñ Chat with PDF

In [5]:
# Importing Needed library 
import os
from pypdf import PdfReader
import dotenv
from openai import OpenAI
import chromadb
from IPython.display import display, Markdown

dotenv.load_dotenv()

True

### ‚öôÔ∏è Configuration

In [6]:
PDF_FILE_PATH = "../../data/02-RAG_Systems/simple_rag/Classic_Airent-3.pdf"
CHROMA_COLLECTION_NAME = "datasheet_rag"
OPENAI_KEY = os.getenv("OPENAI_API_KEY")
OPENAI_ENDPOINT = os.getenv("OPENAI_ENDPOINT")

CHAT_DEPLOYMENT = "o3"
EMBEDDING_DEPLOYMENT = "text-embedding-3-small"

### üöÄ Initiating OpenAI Client & Chroma DB (In-Memory)

In [7]:
client = OpenAI(
    base_url=OPENAI_ENDPOINT,
    api_key=OPENAI_KEY
)

chroma_client = chromadb.Client()
collection = chroma_client.create_collection(name=CHROMA_COLLECTION_NAME)

### üìö Helper Functions

In [8]:
def get_embedding(text):
    """Generates vector embedding for a string using Azure OpenAI."""
    text = text.replace("\n", " ") 
    response = client.embeddings.create(
        input=[text, text],
        model=EMBEDDING_DEPLOYMENT
    )
    return response.data[0].embedding

In [9]:
def split_sections(text: str):
    sections = []
    current_header = None
    current_lines = []

    for line in text.splitlines():
        if line.strip().endswith(":"):  # header line
            # save previous section
            if current_header is not None:
                sections.append({
                    "header": current_header.replace(":",""),
                    "content": "\n".join(current_lines).strip().replace("\uf0b7", "")
                })
            # start new section
            current_header = line.strip()
            current_lines = []
        else:
            current_lines.append(line)

    # last section
    if current_header is not None:
        sections.append({
            "header": current_header,
            "content": "\n".join(current_lines).strip().replace("\uf0b7", "")
            
        })

    return sections

### 1Ô∏è‚É£  Loading PDF...

In [10]:
reader = PdfReader(PDF_FILE_PATH)
full_text = ""
for page in reader.pages:
    full_text += page.extract_text()

### ‚úÇÔ∏è  Chunking Text...

In [11]:
sections = split_sections(full_text)
product_name = PDF_FILE_PATH.split('/')[-1].split('.')[0].replace('_', ' ')
print(f"Total sections in {product_name}: {len(sections)}")
for section in sections:
    header = section['header']
    content = section['content']
    print(f"{header} - Length of the content: {len(content)}")

chunks = [f"{sect['header']} of {product_name}:\n{sect['content']}" for sect in sections if sect['content'] != '']

Total sections in Classic Airent-3: 18
Description - Length of the content: 353
Applications - Length of the content: 362
Advantages - Length of the content: 669
Compatibility - Length of the content: 98
Technical Properties - Length of the content: 375
Standards - Length of the content: 87
Method of Use - Length of the content: 214
Dosage - Length of the content: 504
Package - Length of the content: 71
Storage - Length of the content: 283
Air Void Characteristics - Length of the content: 413
Cleaning - Length of the content: 55
Package - Length of the content: 71
Storage - Length of the content: 211
Cautions - Length of the content: 0
Health & Safety - Length of the content: 211
Fire - Length of the content: 34
Variety of Products: - Length of the content: 735


### üíæ Generating Embeddings & Storing...

In [12]:
chunks[0]

'Description of Classic Airent-3:\nClassic Airent-3 is a liquid air entraining \nadmixture with plasticizing properties specially \ndesigned to create microscopic air bubbles that are \nuniformly distributed in the concrete mix. It also \npermits reductions in the free water content to be \nmade. Classic Airent-3 is formulated from selected \nwater reducer polymers and synthetic surfactants.'

In [13]:
ids = [str(i) for i in range(len(chunks))]
ids = [str(i) for i in range(len(chunks))]
embeddings = []

# Loop through chunks and generate embeddings (Batching is better for production)
for i, chunk in enumerate(chunks):
    vec = get_embedding(chunk)
    embeddings.append(vec)
    if i % 5 == 0: print(f"   -> Processed {i+1}/{len(chunks)} chunks...", end="\r")


   -> Processed 16/17 chunks...

In [14]:
collection.add(
    documents=chunks,
    embeddings=embeddings,
    ids=ids
)
print("\n   -> Indexing complete!")


   -> Indexing complete!


### üß† RETRIEVAL & GENERATION LOOP

In [15]:
user_queries = [
    "What is the packing variants of airent -3?",
    "What is the dosing of classic airent 3 needed for 25 kg cement?",
    "How to use classic airent?",
]
rag_system_prompt = """You are a helpful assistant. Use the provided context to answer the question.
    If the answer is not in the context, say you don't know."""
    

common_system_prompt = """You are a helpful assistant who has vast experience in construction and construction chemical field. Using your knowledge, answer the question."""


In [16]:
def _md_escape(text: str) -> str:
    return text.strip().replace("\n", "<br>")


for query in user_queries:
    query_vec = get_embedding(query)
    results = collection.query(
        query_embeddings=[query_vec],
        n_results=3
    )
    retrieved_context = "\n\n".join(results['documents'][0])

    rag_user_message = f"""
        Context:
        {retrieved_context}

        Question: 
        {query}
        """
    rag_chat_response = client.chat.completions.create(
        model=CHAT_DEPLOYMENT,
        messages=[
            {"role": "system", "content": rag_system_prompt},
            {"role": "user", "content": rag_user_message}
        ],
    )
    rag_answer = rag_chat_response.choices[0].message.content

    common_user_message = f"""Question: {query}"""
    common_chat_response = client.chat.completions.create(
        model=CHAT_DEPLOYMENT,
        messages=[
            {"role": "system", "content": common_system_prompt},
            {"role": "user", "content": common_user_message}
        ],
    )
    common_answer = common_chat_response.choices[0].message.content

    display(Markdown(
        f"""
---
### üôã‚Äç‚ôÇÔ∏è Question
`{query}`

<details>
<summary><strong>Retrieved Context</strong></summary>

{_md_escape(retrieved_context)}

</details>

| Mode | Response |
|------|----------|
| **üîç RAG (Context Grounded)** | {_md_escape(rag_answer)} |
| **üåê Common LLM (No Context)** | {_md_escape(common_answer)} |
"""
    ))


---
### üôã‚Äç‚ôÇÔ∏è Question
`What is the packing variants of airent -3?`

<details>
<summary><strong>Retrieved Context</strong></summary>

Package of Classic Airent-3:<br>Classic Airent-3 is available in 50 and 250 <br>Kg drums and bulks supply.<br><br>Package of Classic Airent-3:<br>Classic Airent-3 is available in 50 and 250 <br>Kg drums and bulks supply.<br><br>Description of Classic Airent-3:<br>Classic Airent-3 is a liquid air entraining <br>admixture with plasticizing properties specially <br>designed to create microscopic air bubbles that are <br>uniformly distributed in the concrete mix. It also <br>permits reductions in the free water content to be <br>made. Classic Airent-3 is formulated from selected <br>water reducer polymers and synthetic surfactants.

</details>

| Mode | Response |
|------|----------|
| **üîç RAG (Context Grounded)** | Classic Airent-3 is supplied in three packing variants:<br>‚Ä¢ 50 kg drums  <br>‚Ä¢ 250 kg drums  <br>‚Ä¢ Bulk supply |
| **üåê Common LLM (No Context)** | Airent-3 is supplied ready-for-use in the following standard packs:  <br>‚Ä¢ Small packs ‚Äì 1 L / 1 kg plastic bottles (handy for site trials and minor works)  <br>‚Ä¢ Medium packs ‚Äì 5 L and 20‚Äì25 L HDPE jerry cans  <br>‚Ä¢ Bulk packs ‚Äì 200 L HDPE / MS drums (and, on request, 1 000 L IBCs for large projects) |



---
### üôã‚Äç‚ôÇÔ∏è Question
`What is the dosing of classic airent 3 needed for 25 kg cement?`

<details>
<summary><strong>Retrieved Context</strong></summary>

Dosage of Classic Airent-3:<br>The guidance dosage of Classic Airent-3 to <br>achieve air content of 4.5 +/- 1.5 % in the concrete <br>mix and 10-15% water reduction is 0.10 ‚Äì 0.40 litre <br>per 100 Kg of cementitious material in the mix, <br>including GGBFS, PFA or microsilica. We can go <br>below or above the mentioned dose based on site <br>concrete trials. Representative trials should have <br>conducted to determine the optimum dosage of <br>Classic Airent-3  to meet the performance <br>requirements by using the materials and conditions <br>in actual use.<br><br>Method of Use of Classic Airent-3:<br>Classic Airent-3 should be added to the <br>concrete with the mixing water to achieve optimum <br>performance. An automatic dispenser should be used <br>to dispense the correct quantity of Classic Airent-3 <br>to the concrete.<br><br>Package of Classic Airent-3:<br>Classic Airent-3 is available in 50 and 250 <br>Kg drums and bulks supply.

</details>

| Mode | Response |
|------|----------|
| **üîç RAG (Context Grounded)** | The recommended dosage is proportional to the cementitious material:<br><br>0.10 ‚Äì 0.40 L per 100 kg cement  <br>= 0.001 ‚Äì 0.004 L per kg cement.<br><br>For 25 kg of cement:<br>0.001 L/kg √ó 25 kg = 0.025 L (25 mL)  <br>0.004 L/kg √ó 25 kg = 0.100 L (100 mL)<br><br>Therefore, you would use roughly 0.025 ‚Äì 0.10 litre (25 ‚Äì 100 mL) of Classic Airent-3 for 25 kg of cement, subject to confirmation by site trials. |
| **üåê Common LLM (No Context)** | Classic Airent-3 is normally used at roughly 0.30 ‚Äì 0.50 % of the weight of cement.<br><br>25 kg cement √ó 0.004 (0.4 %) ‚âà 0.10 kg  <br>‚âà 100 g (or about 100 ml, since its density is close to 1 g/ml)<br><br>So, for a 25 kg bag of cement allow about 100 ml of Classic Airent-3.  <br>If a little more or less air is required you can vary this within the range 75 ‚Äì 125 ml (0.3 ‚Äì 0.5 % by weight of cement). |



---
### üôã‚Äç‚ôÇÔ∏è Question
`How to use classic airent?`

<details>
<summary><strong>Retrieved Context</strong></summary>

Method of Use of Classic Airent-3:<br>Classic Airent-3 should be added to the <br>concrete with the mixing water to achieve optimum <br>performance. An automatic dispenser should be used <br>to dispense the correct quantity of Classic Airent-3 <br>to the concrete.<br><br>Description of Classic Airent-3:<br>Classic Airent-3 is a liquid air entraining <br>admixture with plasticizing properties specially <br>designed to create microscopic air bubbles that are <br>uniformly distributed in the concrete mix. It also <br>permits reductions in the free water content to be <br>made. Classic Airent-3 is formulated from selected <br>water reducer polymers and synthetic surfactants.<br><br>Applications of Classic Airent-3:<br> In concrete mixes for roadways, airports <br>runways and other concrete exposed to <br>potential frost damage. <br> To increase the durability of concrete and its <br>resistance to damage by frost and de-icing <br>salts. <br> To increase cohesion of concrete mixes to <br>overcome bleed, segregation and sand runs <br>where poorly graded aggregates with fine <br>content needs to be used.

</details>

| Mode | Response |
|------|----------|
| **üîç RAG (Context Grounded)** | Add Classic Airent-3 to the concrete together with the mixing water, and use an automatic dispenser to meter the correct dosage into the mix. |
| **üåê Common LLM (No Context)** | Below-ground it is normally specified as an ‚Äúair-entraining admixture for concrete and mortar‚Äù.  <br>Although every producer will issue its own technical data sheet, the way you use any of the ‚ÄúClassic Airent / Classic-AirEntrainer‚Äù type products is practically the same.<br><br>1. Identify why you need it  <br>   ‚Ä¢ Improve workability without adding extra water.  <br>   ‚Ä¢ Produce a stable system of microscopic air bubbles (3-7 % total air) so concrete or mortar can resist freeze‚Äìthaw and de-icing salts.  <br><br>2. Determine the target air content  <br>   ‚Ä¢ Plain concrete 4 % ¬±1 % (medium workability, 20-25 mm agg.)  <br>   ‚Ä¢ Severe exposure / paving 6 % ¬±1 % (‚â§19 mm agg.)  <br>   ‚Ä¢ Mortar/plaster normally 10-18 % depending on project specification.  <br><br>3. Decide the trial-mix dosage  <br>Typical guide values:  <br>   ‚Ä¢ 0.05‚Äì0.15 % by mass of cement (50‚Äì150 m‚Ñì/100 kg cement)  <br>   ‚Ä¢ Start at mid-range (0.08 %) and adjust after you have measured the actual air content in a pressure-meter.  <br>Never exceed the maximum value given on the manufacturer‚Äôs sheet.<br><br>4. Mixing sequence  <br>   a. Charge 70 % of the total water in the mixer.  <br>   b. Add all the ‚ÄúClassic Airent‚Äù with that water.  <br>   c. Add cement, sand, coarse aggregate.  <br>   d. Add remaining water in stages until the specified slump/flow is reached.  <br>   e. Mix for a further 60‚Äì90 s to make sure the air bubbles are uniformly dispersed.  <br>If batching plant is used, meter the admixture through a calibrated dispenser; do not just ‚Äúhand dose‚Äù.<br><br>5. Combine with other admixtures carefully  <br>   ‚Ä¢ Water-reducers/plasticisers: usually compatible, but they often lower air content, so you may have to increase Classic Airent slightly.  <br>   ‚Ä¢ Accelerators or retarders: do trial mixes.  <br>   ‚Ä¢ Never premix different liquid admixtures in the same container.<br><br>6. Quality control on site  <br>   ‚Ä¢ Check air content every 50 m¬≥ (or at least once per shift).  <br>   ‚Ä¢ Check slump. Do not add extra water after discharge; that destroys the bubble system.  <br>   ‚Ä¢ If pumping, keep pressure <8 MPa and use smooth elbows to minimise air loss.  <br>   ‚Ä¢ Finish and cure in the normal manner; the entrained air does not remove the need for curing.<br><br>7. Storage and shelf life  <br>   ‚Ä¢ Keep containers tightly closed, out of direct sunlight, 5‚Äì35 ¬∞C.  <br>   ‚Ä¢ Protect from frost‚Äîif frozen, most air-entrainers are ruined.  <br>   ‚Ä¢ Shelf life is usually 12 months.<br><br>8. Safety  <br>   ‚Ä¢ Mildly alkaline; wear gloves and goggles.  <br>   ‚Ä¢ Wash off splashes with plenty of water.<br><br>Quick reference example (for 25 MPa paving mix):  <br>Cement 330 kg, water 170 ‚Ñì, w/c 0.52, sand 700 kg, 19 mm agg. 1 050 kg.  <br>Target air 6 %.  <br>Start with Classic Airent 300 m‚Ñì (‚âà0.09 % of cement).  <br>Measure air‚Äîif you get only 4.5 %, raise dose to 380 m‚Ñì on the next truck.<br><br>In short: measure, adjust, don‚Äôt overdose, and always verify the air content with a pressure meter before the concrete is poured. |
