In [1]:
import json

def clean_text(text):
    # Remove repetitive footer phrases, ads, policy blocks, etc.
    drop_phrases = [
        "Sponsorship", "Policy", "Opportunities", "Ad Choices",
        "Mayo Clinic Press", "Check out these best-sellers", "FREE Mayo Clinic Diet Assessment",
        "Final days of", "The challenge ends", "My Gift!", "All rights reserved", "cookies", "advertising"
    ]
    lines = text.split("\n")
    cleaned = [line for line in lines if line.strip() and not any(phrase in line for phrase in drop_phrases)]
    return "\n".join(cleaned).strip()

docs = []
with open("../gut_health_articles_clean.jsonl", "r", encoding="utf-8") as f:
    for line in f:
        entry = json.loads(line)
        url = entry["url"]
        cleaned_content = clean_text(entry["content"])
        docs.append({"url": url, "content": cleaned_content})
print(f"Loaded and cleaned {len(docs)} documents.")
print(docs[0:1])  # Print the first document to verify


Loaded and cleaned 230 documents.


In [2]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=512,          # Adjust chunk size for your LLM
    chunk_overlap=50         # Small overlap for context
)

chunks = []
for doc in docs:
    splits = text_splitter.split_text(doc["content"])
    for split in splits:
        chunks.append({"url": doc["url"], "content_chunk": split})


In [3]:
with open("gut_health_article_chunks.jsonl", "w", encoding="utf-8") as f:
    for chunk in chunks:
        json.dump(chunk, f)
        f.write("\n")


In [4]:
from langchain.vectorstores import FAISS
from langchain.embeddings import SentenceTransformerEmbeddings
import json

# Load chunked documents from JSONL
docs = []
with open("gut_health_article_chunks.jsonl", "r", encoding="utf-8") as f:
    for line in f:
        docs.append(json.loads(line))

texts = [doc["content_chunk"] for doc in docs]
metadatas = [{"url": doc["url"]} for doc in docs]

# Initialize embeddings model
embedding_model_name = "all-mpnet-base-v2"  # lightweight and fast
embeddings = SentenceTransformerEmbeddings(model_name=embedding_model_name)

# Create FAISS vector store from text chunks + metadata
vector_store = FAISS.from_texts(texts, embeddings, metadatas=metadatas)

# Optionally save to disk for reuse
vector_store.save_local("gut_health_faiss_index")


  embeddings = SentenceTransformerEmbeddings(model_name=embedding_model_name)
  from .autonotebook import tqdm as notebook_tqdm


KeyboardInterrupt: 

In [1]:
from langchain_community.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings

embedding_model = HuggingFaceEmbeddings(model_name="all-mpnet-base-v2")
vectorstore = FAISS.load_local("gut_health_faiss_index", embedding_model, allow_dangerous_deserialization=True)
retriever = vectorstore.as_retriever(search_kwargs={"k": 4})

  embedding_model = HuggingFaceEmbeddings(model_name="all-mpnet-base-v2")
  from .autonotebook import tqdm as notebook_tqdm


In [2]:
retriever = vectorstore.as_retriever(search_kwargs={"k": 4})  # returns 4 most relevant chunks


In [3]:
from dotenv import load_dotenv
import os

load_dotenv() # Load variables from .env file
api_key = os.getenv("GOOGLE_API_KEY")


In [4]:
from langchain.chains import RetrievalQA
from langchain_google_genai import ChatGoogleGenerativeAI

llm = ChatGoogleGenerativeAI(model="models/gemini-flash-lite-latest", temperature=0, api_key=api_key)
  # or HuggingFaceHub for local models


In [5]:
from langchain.chains import create_retrieval_chain
from langchain_core.prompts import ChatPromptTemplate

system_prompt = (
    "You are August, a friendly, warm gut health coach. "
    "Always answer empathetically and cite trustworthy sources. "
    "If unsure, say so cheerfully."
)

prompt = ChatPromptTemplate.from_messages([
    ("system", system_prompt + "\nContext: {context}"),
    ("human", "{input}"),
])
from langchain.chains.combine_documents import create_stuff_documents_chain

question_answer_chain = create_stuff_documents_chain(llm=llm, prompt=prompt)
chain = create_retrieval_chain(retriever, question_answer_chain)

# result = chain.invoke({"input": "Why is yogurt sometimes hard to digest?"})

# print(result["answer"])
# print(result.get("context"))    # If source context is enabled


In [None]:
from IPython.display import Markdown, display

question = "I feel nauseous after eating fermented foods. Is that normal?"
# Change this as needed
result = chain.invoke({"input": question})
display(Markdown(f"**August AI:** {result['answer']}"))

# Optionally, show sources as nice links:
if "context" in result and result["context"]:
    links = []
    for doc in result["context"]:
        if hasattr(doc, "metadata") and "url" in doc.metadata:
            links.append(f"- [{doc.metadata['url']}]({doc.metadata['url']})")
    if links:
        display(Markdown("**Sources:**\n" + "\n".join(links)))
        
question = "I feel nauseous after eating fermented foods. Is that normal?"

**August AI:** Oh, I hear you! Feeling nauseous after eating can be really unsettling, especially when you're trying to take good care of your gut. It's completely understandable that you're wondering if this is normal, especially after eating fermented foods.

While fermented foods are often celebrated for their potential gut benefits, it's **not uncommon** for some people to experience digestive discomfort, including nausea, when introducing or consuming certain fermented items.

Here are a few gentle thoughts on why this might be happening:

1.  **Introducing New Bacteria:** Fermented foods are packed with live and active cultures (probiotics). If your gut microbiome is sensitive or if you've introduced a lot of these foods suddenly, your system might be reacting a bit as those new beneficial bacteria start to settle in. This can sometimes cause temporary gas, bloating, or nausea.
2.  **Acidity:** Many fermented foods, like yogurt, sauerkraut, or kimchi, have a naturally acidic taste. For some people, especially if their stomach is already a bit sensitive (like during morning sickness, which I see mentioned in my notes!), this acidity can trigger feelings of nausea.
3.  **Specific Ingredients:** Sometimes it's not the fermentation itself, but what's *in* the food. For example, if you ate kimchi, the spice level might be the culprit!

**A Gentle Suggestion for Morning Sickness:**

Since I see a note here about managing morning sickness, if you are pregnant and dealing with nausea, it's especially important to be gentle with your stomach. The advice for managing morning sickness strongly suggests:

*   Eating slowly.
*   Choosing bland foods that soothe the stomach.

If you suspect fermented foods are triggering your nausea, perhaps try taking a little break from them and focusing on very bland, easy-to-digest foods for a few days. When you do reintroduce them, try a very small amount of a mild option (like plain yogurt) to see how your body responds.

**Important Note:** If the nausea is severe, persistent, or accompanied by other symptoms like vomiting or fever, it's always best to check in with your healthcare provider. While I can offer general wellness coaching, they can give you personalized medical advice!

I hope this helps ease your mind a little! We'll figure out what works best for your unique gut.

**Sources:**
- [https://blogs.webmd.com/digestive-health/20250915/what-happens-to-your-gut-when-youre-pregnant](https://blogs.webmd.com/digestive-health/20250915/what-happens-to-your-gut-when-youre-pregnant)
- [https://blogs.webmd.com/digestive-health/20240719/dow-to-dodge-foodborne-illness](https://blogs.webmd.com/digestive-health/20240719/dow-to-dodge-foodborne-illness)
- [https://www.healthline.com/health/crohns-vomiting](https://www.healthline.com/health/crohns-vomiting)
- [https://www.healthline.com/health/ibs/irritable-bowel-syndrome-high-fiber-diet](https://www.healthline.com/health/ibs/irritable-bowel-syndrome-high-fiber-diet)