In [22]:
base_url = "https://www.nhs.uk/start-for-life/pregnancy/week-by-week-guide-to-pregnancy/"
trimester = {
    1: {
        "url": "1st-trimester/week-",
        "weeks": list(range(4, 13))
    },
    2: {
        "url": "2nd-trimester/week-",
        "weeks": list(range(13, 28))
    },
    3: {
        "url": "3rd-trimester/week-",
        "weeks": list(range(28, 42))
    }
}

import requests
from bs4 import BeautifulSoup
from langchain.docstore.document import Document

def url_builder(week: int):
    if week < 4 or week > 41:
        raise ValueError("Week out of bound!")
    target = [key for key, subdict in trimester.items() if week in subdict.get("weeks", [])][0]
    return base_url + trimester[target]['url'] + str(week) + '/'

def scrape_week_content(week_number: int):
    docs = []

    response = requests.get(url_builder(week_number))
    if response.status_code == 200:
        soup = BeautifulSoup(response.text, 'html.parser')

        intro = soup.select_one("#maincontent > section:first-of-type div.nhsuk-u-reading-width")
        if intro:
            doc = Document(
                page_content=intro.get_text(separator=" ", strip=True),
                metadata={"week": week_number, "section": "intro"}
            )
            docs.append(doc)
        else:
            print("Intro not found.")

        start_section = soup.find("section", id="whats-happening")
        end_section = soup.find("section", id="action-stations")
        
        current_section = start_section if start_section else None
        
        while current_section:
            section_id = current_section.get("id")
            if not section_id:
                heading_tag = current_section.find(["h1", "h2", "h3", "h4", "h5", "h6"])
                if heading_tag:
                    section_id = heading_tag.get_text(strip=True)
                else:
                    section_id = "No ID"
            doc = Document(
                page_content=current_section.get_text(separator=" ", strip=True),
                metadata={"week": week_number, "section": section_id}
            )
            docs.append(doc)
            if current_section == end_section:
                break  # Stop after including the end section
            current_section = current_section.find_next_sibling("section")
    else:
        print(f"Failed to fetch page. Status code: {response.status_code}")

    return docs

In [23]:
all_docs = []

for week in range(4, 42):
    all_docs.extend(scrape_week_content(week))

In [24]:
from langchain.docstore.document import Document
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

embeddings_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
vectordb = FAISS.from_documents(all_docs, embeddings_model)

In [37]:
import re

def search(query, vectordb=vectordb, k=20):
    # Perform similarity search
    results = vectordb.similarity_search_with_score(query, k=k)
    return results
    

def search_filter(results_with_score=None, vectordb=vectordb, week_filter=None, section_filter=None):
    documents = results_with_score or [(doc, None) for doc in vectordb.docstore._dict.values()]
    return [
        (doc, score) for doc, score in documents
        if (week_filter is None or doc.metadata.get("week") == week_filter)
        and (section_filter is None or doc.metadata.get("section") == section_filter)
    ]

def get_content_by_week(week: int):
    docs = search_filter(week_filter=week)
    result = ""
    line = "=" * 50

    for doc, score in docs:
        title = f"\nWEEK: {doc.metadata['week']}; SECTION: {doc.metadata['section']}\n\n"
        content = doc.page_content

        result += "\n" + line + title + re.sub(r'([.!?])\s*', r'\1\n', content) + line

    return result

In [7]:
len(search_filter()), search_filter(section_filter="No ID")

(277, [])

In [36]:
print(get_content_by_week(10))


WEEK: 10; SECTION: intro

Week 10 Welcome to week 10.
Pregnancy is divided into 3 chunks, called "trimesters".
You are nearly at the end of your 1st trimester.
By the 2nd trimester you will probably have lots more energy and all those signs of early pregnancy will gradually fade away.
Around now, you may have a booking appointment with a midwife.
You'll be asked lots of questions about your health and medical history.
You can ask lots of questions too.
WEEK: 10; SECTION: whats-happening

What's happening in my body?
You may be struggling to do up your jeans.
Your uterus (womb) is around the size of a large orange, while your baby is more like the size of an apricot.
You may be feeling bloated and you might find yourself burping or passing wind – this is due to your hormones.
The female hormone progesterone is just doing its job – relaxing the muscles in your womb so that it can expand along with your growing baby.
However, in the process, the muscles in your digestive tract also becom

In [None]:
vectordb.save_local("./vectordb")

In [None]:
from langchain_openai import ChatOpenAI
from langchain.prompts import PromptTemplate


llm = ChatOpenAI(
  openai_api_key="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",
  openai_api_base="https://openrouter.ai/api/v1",
  model_name="deepseek/deepseek-r1"
)


  llm = ChatOpenAI(


In [44]:
template = """
Question: {question}

Context: {context}

Answer: Let's think step by step.
"""
prompt = PromptTemplate(template=template, input_variables=["question", "context"])
llm_chain = prompt | llm

llm_result = llm_chain.run({
    "question": "what I need to do", 
    "context": get_content_by_week(10)
})
llm_result

  llm_chain = LLMChain(prompt=prompt, llm=llm)
  llm_result = llm_chain.run({


'**Step-by-Step Guide for Week 10 of Pregnancy**  \n\nHere’s a concise, organized list of actionable steps based on your Week 10 context:  \n\n---\n\n### **1. Medical Appointments**  \n- **Schedule/Attend a Booking Appointment**:  \n  - Arrange this with a midwife or GP (between weeks 8–12). It includes health screenings, discussing birth options, and tests for infections/Down’s syndrome.  \n  - Ask about continuity of care (seeing the same carer throughout pregnancy).  \n- **Prepare for Your First Dating Scan**:  \n  - Typically occurs between weeks 8–14. Confirm the date.  \n\n---\n\n### **2. Lifestyle Adjustments**  \n- **Avoid Harmful Substances**:  \n  - Quit smoking and alcohol. Reduce caffeine (tea, coffee, etc.). Seek GP/midwife support if needed.  \n- **Combat Bloating & Digestive Issues**:  \n  - Eat 6 small meals daily, avoid late-night eating, and reduce spicy/fatty foods or coffee.  \n  - Take short walks after meals.  \n\n---\n\n### **3. Nutrition & Supplements**  \n- **P

In [45]:
print(llm_result)

**Step-by-Step Guide for Week 10 of Pregnancy**  

Here’s a concise, organized list of actionable steps based on your Week 10 context:  

---

### **1. Medical Appointments**  
- **Schedule/Attend a Booking Appointment**:  
  - Arrange this with a midwife or GP (between weeks 8–12). It includes health screenings, discussing birth options, and tests for infections/Down’s syndrome.  
  - Ask about continuity of care (seeing the same carer throughout pregnancy).  
- **Prepare for Your First Dating Scan**:  
  - Typically occurs between weeks 8–14. Confirm the date.  

---

### **2. Lifestyle Adjustments**  
- **Avoid Harmful Substances**:  
  - Quit smoking and alcohol. Reduce caffeine (tea, coffee, etc.). Seek GP/midwife support if needed.  
- **Combat Bloating & Digestive Issues**:  
  - Eat 6 small meals daily, avoid late-night eating, and reduce spicy/fatty foods or coffee.  
  - Take short walks after meals.  

---

### **3. Nutrition & Supplements**  
- **Prenatal Vitamins**:  
  - 