In [None]:
print("init")


init


In [2]:
import os
import time
from llm_handler import analyze_rcc_drawing
from prompt import INITIAL_EXTRACTION_PROMPT, REFINEMENT_PROMPT_TEMPLATE

# --- Configuration ---
REPORTS_DIR = "reports"
UPLOADS_DIR = "uploads"
FIRST_EXTRACT_DIR = "first_extract"
os.makedirs(REPORTS_DIR, exist_ok=True)
os.makedirs(UPLOADS_DIR, exist_ok=True)
os.makedirs(FIRST_EXTRACT_DIR, exist_ok=True)


print(INITIAL_EXTRACTION_PROMPT)


You are an expert assistant specializing in Indian civil engineering standards for Reinforced Concrete Cement (RCC) design.
Your task is to analyze an RCC structural drawing PDF and check its compliance against IS 456:2000 and SP 34.

Here is the step-by-step process you must follow:
**Step 0: Initial Document Check**
- **0.1:** Verify that the document is an RCC structural drawing of "FOUNDATIONS" only. If not, mention that it is not a valid drawing and exit.
- **0.2:** Find the site location. This is crucial for many checks. If it's not mentioned, flag this as "Missing Information". Do not confuse the consultant's or architect's location with the site location.
- **0.3:** Confirm that all compliance checks are based *only* on IS 456:2000 and SP 34.

**Step 1: Locate the "NOTES" Section**
- Find the specific "NOTES" section in the drawing. Do not confuse it with "GENERAL NOTES". If this section is missing, flag it.

**Step 2 & 3: Extract and Verify Design Parameters from "NOTES"**
- 

In [5]:

# --- 1. Enter the PDF Filename ---
# Replace "foundations.pdf" with the name of the PDF file you want to analyze from the 'uploads' folder.
pdf_filename = "foundations.pdf"

# --- 2. Run the Initial Extraction ---
pdf_path = os.path.join(UPLOADS_DIR, pdf_filename)
initial_report = ""

if not os.path.exists(pdf_path):
    print(f"‚ùå ERROR: The file '{pdf_filename}' was not found in the '{UPLOADS_DIR}/' directory.")
    print("Please add the file to the uploads folder and try again.")
else:
    print(f"üöÄ Starting initial analysis of '{pdf_filename}'...")
    try:
        initial_report = analyze_rcc_drawing(pdf_path, INITIAL_EXTRACTION_PROMPT)
        print("‚úÖ Initial analysis complete.")
        print("\\n" + "="*50)
        print("INITIAL COMPLIANCE REPORT")
        print("="*50)
        print(initial_report)

        # Save the initial report to the 'first_extract' directory
        timestamp = int(time.time())
        initial_filename = f"initial_report_{os.path.basename(pdf_filename)}_{timestamp}.md"
        initial_filepath = os.path.join(FIRST_EXTRACT_DIR, initial_filename)
        with open(initial_filepath, 'w', encoding='utf-8') as f:
            f.write(initial_report)
        print(f"‚úÖ Initial report saved to {initial_filepath}")

    except Exception as e:
        print(f"An error occurred during the analysis: {e}")



üöÄ Starting initial analysis of 'foundations.pdf'...
Converting PDF 'uploads/foundations.pdf' to images...
Successfully converted 1 pages to images.
Sending request to OpenRouter...
Received response from OpenRouter.
‚úÖ Initial analysis complete.
INITIAL COMPLIANCE REPORT
As an expert assistant specializing in Indian civil engineering standards (IS 456:2000 and SP 34) for RCC design, I have analyzed the provided structural drawing.

---

## Step 0: Initial Document Check

| Check | Verification | Status |
| :--- | :--- | :--- |
| **0.1 Document Scope** | The drawing title is "FOUNDATION LAYOUT" and depicts footings (F1, RF1, RF2), section details (Section X, Lift Pit), and a column schedule from the foundation up to the terrace level. Confirmed as an RCC structural drawing focused on **FOUNDATIONS**. | Compliant |
| **0.2 Site Location** | The project name is "NH OFFICE". The consultant/approval authority locations (Mangaluru, Shivamogga, Belthangady) suggest a location in Karnataka

In [7]:
# --- 3. Provide Missing Information ---
# Review the "Missing or Wrong Information" section from the report above.
# Provide the missing details clearly in the variable below.


user_provided_info = """

Site Location : Mangalore, Karnataka, India.
environment condition : severe 

"""

In [5]:
from data_loader import read_md_files_from_folder
try:
    # Path to your folder
    path_to_data = "SP34_md"
    
    # Call the simplified function
    all_data = read_md_files_from_folder(path_to_data)
    
    print(f"\nSuccessfully read {len(all_data)} files.")
    # Example: print the name of the first file
    print(f"First file: {all_data[0]['file_name']}")
    
except (FileNotFoundError, ValueError) as e:
    print(e)


Successfully read 5 files.
First file: SP_34_OCR_p0001-0050.md


In [6]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.docstore.document import Document

from data_loader import read_md_files_from_folder
from embedding_service import embedding_model
from vector_db import VectorStore


vectordb = VectorStore(collection_name="is_codes_docs", folder_path="./chroma_db")


def extract_and_save_data():
    try:
        # 1. Read all .md files from the folder
        folder_file_data = read_md_files_from_folder("SP34_md")
        if not folder_file_data:
            return "No files found to process"

        print(f"\n‚úÖ Successfully read {len(folder_file_data)} files.")

        # 2. Setup text splitter
        text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
            chunk_size=500,
            chunk_overlap=50,
            separators=["\n##", "\n###", "\n", " "]
        )

        all_chunks, all_ids, all_embeddings, all_metadatas = [], [], [], []

        # 3. Process each markdown file
        for file_data in folder_file_data:
            file_content = file_data["content"]
            if not file_content.strip():
                continue

            # Split into text chunks
            chunks = text_splitter.split_text(file_content)
            print(f"üìÑ File {file_data['file_name']} ‚Üí {len(chunks)} chunks")

            # Batch embed all chunks
            embeddings = embedding_model.embed_documents(chunks)

            for i, (chunk, emb) in enumerate(zip(chunks, embeddings)):
                # Add chunk text
                all_chunks.append(chunk)
                all_embeddings.append(emb)
                all_ids.append(f"{file_data['file_name']}_part_{i+1}")

                # Attach useful metadata
                all_metadatas.append({
                    "source_file": file_data["file_name"],
                    "source_folder": file_data["folder_name"],
                    "chunk_id": i + 1,
                })

        if not all_chunks:
            return "No valid documents were processed"

        # 4. Save all chunks + embeddings + metadata into ChromaDB
        vectordb.save_documents(
            documents=all_chunks,
            ids=all_ids,
            embeddings=all_embeddings,
            metadatas=all_metadatas
        )

        return f"‚úÖ Successfully processed {len(all_chunks)} chunks across {len(folder_file_data)} files"

    except Exception as e:
        error_message = f"‚ùå Error during data extraction and saving: {str(e)}"
        print(error_message)
        return error_message
###########################################

extract_and_save_data()

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]


‚úÖ Successfully read 5 files.
üìÑ File SP_34_OCR_p0001-0050.md ‚Üí 52 chunks
üìÑ File SP_34_OCR_p0051-0100.md ‚Üí 45 chunks
üìÑ File SP_34_OCR_p0101-0150.md ‚Üí 32 chunks
üìÑ File SP_34_OCR_p0151-0200.md ‚Üí 38 chunks
üìÑ File SP_34_OCR_p0201-0250.md ‚Üí 56 chunks


'‚úÖ Successfully processed 223 chunks across 5 files'

In [8]:
#############################################
from embedding_service import embedding_model
from vector_db import VectorStore
from llm_service import generate_compliance_report


vectordb = VectorStore(collection_name="is_codes_docs", folder_path="./chroma_db")

refinement_prompt = REFINEMENT_PROMPT_TEMPLATE.format(
        previous_analysis=initial_report,
        user_input=user_provided_info
    )


def Report_Generate():
    report = generate_compliance_report(
        # image_data_markdown='img-data-extr/90.md',
        vectordb=vectordb,
        embedding_model=embedding_model,
        Initial_report=refinement_prompt,
        previous_analysis=initial_report,
        user_input=user_provided_info,
    )
    print("report generated üò≠üò≠üò≠")
    print(report)

##############################################
Report_Generate()

response generated.
report generated üò≠üò≠üò≠
**UPDATED COMPLIANCE REPORT**  
*RCC‚ÄØStructural Drawing ‚Äì ‚ÄúFOUNDATION LAYOUT‚Äù*  
*Project: NH‚ÄØOFFICE*  

Prepared by: **Senior Civil Engineer (India)**  
Standards referenced: **IS‚ÄØ456:2000**, **SP‚ÄØ34**, and the additional site‚Äëspecific data supplied by the client.  

---

## 0. Initial Document Check  

| Check | Verification | Status |
|------|--------------|--------|
| **0.1‚ÄØDocument Scope** | Title ‚ÄúFOUNDATION LAYOUT‚Äù, shows footings, column schedule, lift‚Äëpit section ‚Äì clearly a foundation‚Äëfocused RCC drawing. | **Compliant** |
| **0.2‚ÄØSite Location** | Now supplied: **Mangalore, Karnataka, India**. | **Compliant** |
| **0.3‚ÄØEnvironmental Exposure** | Supplied: **Severe** exposure condition (coastal, high humidity). | **Compliant** |
| **0.4‚ÄØStandards Used** | Checks performed against IS‚ÄØ456:2000 and SP‚ÄØ34 (as required). | **Compliant** |

---

## 1‚Äë22‚ÄØDesign Parameters ‚Äì Verification wit