In [31]:
import yaml
from pathlib import Path
from jinja2 import Template
import os
import toml
from google.genai import types
from google import genai 
import time

In [2]:
class PromptLoader:
    def __init__(self, path: str = "prompts.yaml"):
        with open(path, "r", encoding="utf-8") as f:
            self.prompts = yaml.safe_load(f)

    def render(self, name: str, **kwargs) -> str:
        """Render a named prompt with given variables."""
        template = Template(self.prompts[name])
        return template.render(**kwargs)

In [39]:
input_path = "data/bfp-a3447q_chunked.txt"
output_path = "data/bfp-a3447q_context.txt"

In [4]:
import pathlib, json
json_read = pathlib.Path(input_path).read_text()
data = json.loads(json_read)

In [21]:
paragraph_dict = dict()
context_dict = dict()
loader = PromptLoader("data/prompts.yaml")

In [10]:
config = toml.load("../../.streamlit/secrets.toml")
os.environ["GEMINI_API_KEY"] = config["gemini"]["GOOGLE_API_KEY"]

In [13]:
client = genai.Client()
print("Client initialized")

Client initialized


In [35]:
root_chapter_ix = 0
window_size = 3 # max number of chunks to be taken before and ahead of selected chunk
for index, chunk in enumerate(data):
    if index in context_dict.keys():
        continue
    if chunk[0] == 1:
        root_chapter_ix = index
    if 2*len(chunk[1]) > len(chunk[-1]): # if chapter is empty, skip its context creation
        print(f'{index}) Paragraphs not generated for chapter: {chunk[1]}')
        continue
    paragraph_start = max(root_chapter_ix, index - window_size)
    paragraph_end = min(index + window_size + 1, len(data))
    paragraph_list = [ d[-1] for d in data[paragraph_start:paragraph_end]]
    paragraphs = '\n'.join(paragraph_list)
    print(f'{index}) Paragraphs generated in range [{paragraph_start}:{paragraph_end-1}] for chapter: {chunk[1]}')
    paragraph_dict[index] = paragraphs
    prompt = loader.render(
        "context_extension",
        chunk=chunk[-1],
        neighbors=paragraphs
    )
    print("\rPrompting standby...",end='')
    time.sleep(2)
    print("\rWaiting for generation...",end='')
    response = client.models.generate_content(
        model="gemini-2.5-flash",
        config=types.GenerateContentConfig(
            thinking_config=types.ThinkingConfig(thinking_budget=0)
        ),
        contents=prompt
    )
    print(f'\r\tContext generated with end: {response.text[-50:]}')
    context_dict[index] = response.text

0) Paragraphs not generated for chapter: 1 General configuration
20) Paragraphs not generated for chapter: 1.6 Contents of the structural equipment
25) Paragraphs not generated for chapter: 2 Robot arm
26) Paragraphs not generated for chapter: 2.1 Standard specifications
27) Paragraphs not generated for chapter: 2.1.1 Basic specifications
48) Paragraphs not generated for chapter: 2.2.7 Protection specifications
51) Paragraphs not generated for chapter: 2.4 Outside dimensions / Operating range diagram
52) Paragraphs not generated for chapter: 2.4.1 Outside dimensions / Operating range diagram
56) Paragraphs not generated for chapter: 2.4.2 Outside dimensions of machine cables
59) Paragraphs not generated for chapter: 2.5 Tooling
61) Paragraphs not generated for chapter: 2.5.2 Internal wiring and piping
72) Paragraphs not generated for chapter: 3 Controller
73) Paragraphs not generated for chapter: 3.1 Standard specifications
77) Paragraphs not generated for chapter: 3.2 Names of each pa

In [40]:
import json
json_file=json.dumps(context_dict, indent=2)
# Exporting data to output file for storage
with open(output_path, mode='w+') as f_out:
    f_out.write(json_file)