# Backend Configuration

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


## Install Packages

In [2]:
!pip install -qU crewai crewai-tools langchain-openai langchain-community beautifulsoup4 faiss-cpu selenium python-dateutil google-api-python-client google-auth-oauthlib

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m40.3/40.3 kB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.8/42.8 kB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.3/67.3 kB[0m [31m5.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m48.5/48.5 kB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m366.3/366.3 kB[0m [31m10.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.6/8.6 MB[0m [31m67.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m16.4/16.4 MB[0m [31m72.2 MB/s[0m eta [3

## Configure Selenium Development

In [3]:
!sudo apt-get update -y
!sudo apt-get install -y chromium-chromedriver
!sudo cp /usr/lib/chromium-browser/chromedriver /usr/bin

0% [Working]            Get:1 http://security.ubuntu.com/ubuntu jammy-security InRelease [129 kB]
0% [Connecting to archive.ubuntu.com (185.125.190.81)] [1 InRelease 5,484 B/129                                                                               Get:2 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease [3,632 B]
Hit:3 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  InRelease
Get:4 https://r2u.stat.illinois.edu/ubuntu jammy InRelease [6,555 B]
Hit:5 http://archive.ubuntu.com/ubuntu jammy InRelease
Get:6 http://archive.ubuntu.com/ubuntu jammy-updates InRelease [128 kB]
Hit:7 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy InRelease
Hit:8 https://ppa.launchpadcontent.net/graphics-drivers/ppa/ubuntu jammy InRelease
Get:9 https://r2u.stat.illinois.edu/ubuntu jammy/main amd64 Packages [2,775 kB]
Hit:10 https://ppa.launchpadcontent.net/ubuntugis/ppa/ubuntu jammy InRelease
Get:11 http://archive.ubuntu.com/ubuntu jammy-ba

## Load Secrets & Credentials

In [25]:
import os
from google.colab import userdata

# Azure and Sender Email Credentials
os.environ["AZURE_API_KEY"] = userdata.get('AZURE_OPENAI_API_KEY')
os.environ["AZURE_API_BASE"] = userdata.get('AZURE_OPENAI_ENDPOINT')
os.environ["AZURE_API_VERSION"] = userdata.get('OPENAI_API_VERSION')
os.environ["AZURE_DEPLOYMENT_ID"] = userdata.get('AZURE_OPENAI_CHAT_DEPLOYMENT_NAME')
os.environ["AZURE_EMBEDDING_DEPLOYMENT_NAME"] = userdata.get('AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME') # Add this line
os.environ["OPENAI_API_TYPE"] = 'azure' # Keep this to explicitly set the provider type for LiteLLM
EMAIL_ADDRESS = userdata.get('EMAIL_ADDRESS') # The "FROM" address for proactive newsletters
EMAIL_PASSWORD = userdata.get('EMAIL_PASSWORD')

print("All secrets loaded successfully!")

All secrets loaded successfully!


## Recipients for Newsletter

In [5]:
%%writefile recipients.csv
name,email
Aditya Bayhaqie,adityabayhaqie@gmail.com
Nabila Nurhusna Yap, nabilanurhusnayap@gmail.com

Writing recipients.csv


## Web Scraping

In [6]:
import requests
import re
import time
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import StaleElementReferenceException, TimeoutException, NoSuchElementException
from urllib.parse import urljoin, urldefrag
from dateutil.parser import parse as parse_date
from datetime import datetime
from dateutil.parser import parse as parse_date
from dateutil.relativedelta import relativedelta
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [7]:
class Document:
    def __init__(self, page_content, metadata):
        self.page_content = page_content
        self.metadata = metadata
    def __repr__(self):
        return f"Document(metadata={self.metadata})"

In [8]:
def extract_and_format_date(text):
    month_map = {'januari': 'january', 'februari': 'february', 'maret': 'march', 'april': 'april', 'mei': 'may', 'juni': 'june', 'juli': 'july', 'agustus': 'august', 'september': 'september', 'oktober': 'october', 'november': 'november', 'desember': 'december'}
    date_pattern = r"(?i)(\d{1,2}\s+(?:Jan(?:uari)?|Feb(?:ruari)?|Mar(?:et)?|Apr(?:il)?|Mei|Jun(?:i)?|Jul(?:i)?|Agu(?:stus)?|Sep(?:ember)?|Okt(?:ober)?|Nov(?:ember)?|Des(?:ember)?)\s+\d{4}|(?:Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:ember)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)\s+\d{1,2}(?:st|nd|rd|th)?(?:,)?\s+\d{4})"
    match = re.search(date_pattern, text)
    if match:
        try:
            date_str = match.group(0).lower()
            for indo, eng in month_map.items(): date_str = date_str.replace(indo, eng)
            return parse_date(date_str)
        except (ValueError, TypeError): return None
    return None

In [9]:
def parse_last_updated(update_text):
    match = re.search(r'(\d+)\s+months? ago', update_text)
    if match:
        months_ago = int(match.group(1))
        return datetime.now() - relativedelta(months=months_ago)
    return None

In [10]:
def clean_text(text):
    text = re.sub(r'\\n\\s*\\n', '\\n\\n', text)
    artifacts = ["Was this helpful?", "Powered by GitBook", "Copy", "Next", "Previous", "Last updated"]
    for artifact in artifacts: text = text.replace(artifact, "")
    return text.strip()

In [11]:
import requests
import time
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from bs4 import BeautifulSoup
from selenium.common.exceptions import TimeoutException
from selenium import webdriver

def scrape_github_releases(api_url):
    # This function is correct and remains unchanged.
    documents = []
    try:
        response = requests.get(f"{api_url}?per_page=15", timeout=15)
        response.raise_for_status()
        releases = response.json()
        for release in releases:
            content = f"## {release.get('name', 'Untitled Release')}\n\n{release.get('body', 'No description.')}"
            release_date = release.get('published_at', '')
            doc = Document(page_content=content, metadata={"source": "https://github.com/langflow-ai/langflow/releases", "release_date": release_date.split('T')[0] if release_date else 'unknown'})
            documents.append(doc)
        print(f"Scraped {len(documents)} documents from: Langflow")
        return documents
    except requests.RequestException as e:
        print(f"Error fetching GitHub releases from {api_url}: {e}")
        return []

In [12]:
def scrape_simplidots_with_selenium(base_url):
    options = webdriver.ChromeOptions(); options.add_argument('--headless'); options.add_argument('--no-sandbox'); options.add_argument('--disable-dev-shm-usage')
    driver = webdriver.Chrome(options=options)
    all_documents = []
    try:
        print("Finding all unique article links on SimpliDots...")
        driver.get(base_url)
        WebDriverWait(driver, 20).until(EC.presence_of_element_located((By.TAG_NAME, "a")))
        links = driver.find_elements(By.XPATH, "//a[contains(@href, '/202')]")
        urls_to_visit = {link.get_attribute("href") for link in links if link.get_attribute("href")}
        print(f"Found {len(urls_to_visit)} potential article links. Now extracting content...")
        for url in urls_to_visit:
            try:
                driver.get(url)
                WebDriverWait(driver, 15).until(EC.presence_of_element_located((By.TAG_NAME, "main")))
                title = driver.title
                content_text = driver.find_element(By.TAG_NAME, "main").text.strip()
                page_source = driver.page_source
                release_date_str = 'unknown'
                date_obj = extract_and_format_date(title)
                if not date_obj: date_obj = extract_and_format_date(content_text)
                if not date_obj and "Last updated" in page_source:
                    footer_elements = driver.find_elements(By.XPATH, "//*[contains(text(), 'Last updated')]")
                    if footer_elements: date_obj = parse_last_updated(footer_elements[0].text)
                if date_obj: release_date_str = date_obj.strftime('%Y-%m-%d')
                if len(content_text) > 100:
                    doc = Document(page_content=content_text, metadata={"source": url, "release_date": release_date_str})
                    all_documents.append(doc)
            except Exception as e:
                print(f"Warning: Could not process SimpliDots page {url}. Error: {e}")
    finally:
        driver.quit()
    print(f"Scraped {len(all_documents)} documents from: SimpliDots")
    return all_documents

In [13]:
from bs4 import BeautifulSoup
from selenium.common.exceptions import TimeoutException
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time

def scrape_anthropic_with_selenium(url):
    """
    A highly resilient scraper for Anthropic using a 'wait then pause' strategy.
    """
    options = webdriver.ChromeOptions()
    options.add_argument('--headless')
    options.add_argument('--no-sandbox')
    options.add_argument('--disable-dev-shm-usage')
    # Add a user-agent to appear more like a regular browser
    options.add_argument('user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36')

    driver = webdriver.Chrome(options=options)
    documents = []

    try:
        driver.get(url)

        # Use a more general wait condition: just wait for the <body> tag to exist.
        WebDriverWait(driver, 30).until(
            EC.presence_of_element_located((By.TAG_NAME, "body"))
        )

        # Add a hard pause. This is a robust way to let JavaScript and dynamic content finish loading.
        print("Page has loaded, pausing for 5 seconds to let content settle...")
        time.sleep(5)

        # Now that the page is stable, parse the HTML.
        soup = BeautifulSoup(driver.page_source, 'html.parser')

        # The parsing logic remains the same as it was correct.
        date_headings = soup.find_all('h4')

        for heading in date_headings:
            date_obj = extract_and_format_date(heading.text)
            if date_obj:
                release_date_str = date_obj.strftime('%Y-%m-%d')
                content_node = heading.find_next_sibling('ul')
                if content_node:
                    content_text = content_node.get_text(separator='\\n', strip=True)
                    doc = Document(page_content=content_text, metadata={"source": url, "release_date": release_date_str})
                    documents.append(doc)

        print(f"Scraped {len(documents)} dated entries from: Anthropic")
        return documents

    except TimeoutException:
        print(f"Error: Timed out after 30 seconds. The site may be blocking automated access or is currently down.")
        return []
    finally:
        driver.quit()

In [14]:
URLS = {
    "simplidots": "https://fitur-sap.simplidots.id/",
    "langflow": "https://api.github.com/repos/langflow-ai/langflow/releases",
    "anthropic": "https://docs.anthropic.com/en/release-notes/api"
}
print("Starting data scraping...")
all_documents = []
all_documents.extend(scrape_simplidots_with_selenium(URLS["simplidots"]))
all_documents.extend(scrape_github_releases(URLS["langflow"]))
all_documents.extend(scrape_anthropic_with_selenium(URLS["anthropic"]))
print(f"\\nScraping complete. Total documents found: {len(all_documents)}")

Starting data scraping...
Finding all unique article links on SimpliDots...
Found 57 potential article links. Now extracting content...
Scraped 57 documents from: SimpliDots
Scraped 15 documents from: Langflow
Page has loaded, pausing for 5 seconds to let content settle...
Scraped 40 dated entries from: Anthropic
\nScraping complete. Total documents found: 112


## Advanced Preprocessing

In [15]:
print("\nStarting data preprocessing (cleaning and chunking)...")
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=150)
chunks = text_splitter.split_documents(all_documents)

processed_docs = []
for chunk in chunks:
    chunk.page_content = clean_text(chunk.page_content)
    if len(chunk.page_content) > 50:
        processed_docs.append(chunk)

print(f"Preprocessing complete. Total processed chunks: {len(processed_docs)}")


Starting data preprocessing (cleaning and chunking)...
Preprocessing complete. Total processed chunks: 542


## Data Ingestion & Vectorization

In [16]:
from langchain_openai import AzureOpenAIEmbeddings
from langchain_community.vectorstores import FAISS

print("Initializing Azure OpenAI Embeddings model...")
azure_embeddings = AzureOpenAIEmbeddings(
    azure_deployment=os.getenv("AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME"),
    api_version=os.getenv("AZURE_OPENAI_API_VERSION"),
)

print("Creating FAISS vector store from processed documents...")
if 'processed_docs' in locals() and processed_docs:
    vector_store = FAISS.from_documents(processed_docs, azure_embeddings)
    retriever = vector_store.as_retriever(search_kwargs={'k': 12})

    print("Vector store and retriever created successfully with improved settings.")
else:
    print("No documents were processed. The Q&A bot will not have any knowledge.")

Initializing Azure OpenAI Embeddings model...
Creating FAISS vector store from processed documents...
Vector store and retriever created successfully with improved settings.


#  Proactive Weekly Newsletter - 1st Feature

## Filter for Recent Updates

In [17]:
from datetime import datetime, timedelta

# Updated logic to filter for the last 7 days
seven_days_ago = datetime.now() - timedelta(days=7)

weekly_docs = []
for doc in processed_docs:
    release_date_str = doc.metadata.get('release_date')
    if release_date_str and release_date_str != 'unknown':
        try:
            release_date = datetime.strptime(release_date_str, '%Y-%m-%d')
            # Check if the release date is within the last 7 days
            if release_date >= seven_days_ago:
                weekly_docs.append(doc)
        except ValueError:
            continue

print(f"Found {len(weekly_docs)} documents from the last 7 days (since {seven_days_ago.strftime('%Y-%m-%d')}).")

# Prepare the context for the crew
if weekly_docs:
    newsletter_context = "\\n\\n---\\n\\n".join(
        f"Source: {doc.metadata.get('source', 'N/A')}\\n"
        f"Date: {doc.metadata.get('release_date', 'N/A')}\\n\\n"
        f"{doc.page_content}"
        for doc in weekly_docs
    )
else:
    newsletter_context = "No new release notes found in the last 7 days."

Found 7 documents from the last 7 days (since 2025-07-23).


## Configure LLM and Tools

In [27]:
import csv
import smtplib
import time
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from langchain_openai import AzureChatOpenAI
from crewai.tools import BaseTool

try:
    llm = AzureChatOpenAI(
        azure_endpoint=os.environ["AZURE_API_BASE"],
        azure_deployment=os.environ["AZURE_DEPLOYMENT_ID"],
        api_key=os.environ["AZURE_API_KEY"],
        api_version=os.environ["AZURE_API_VERSION"],
        model=f"azure/{userdata.get('AZURE_OPENAI_CHAT_DEPLOYMENT_NAME')}"
    )
    print("Azure LLM initialized successfully.")
except Exception as e:
    print(f"Error initializing Azure LLM: {e}")
    print("Please ensure your Azure OpenAI credentials are set correctly in the cell above.")

Azure LLM initialized successfully.


In [28]:
import csv
import smtplib
import time
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from crewai.tools import BaseTool

# This entire class replaces your old CSVEmailTool
class CSVEmailTool(BaseTool):
    name: str = "Personalized HTML Email Dispatcher"
    description: str = "Reads 'recipients.csv' and sends a personalized email with a custom subject to everyone on the list. The input must be the subject and the newsletter body."

    def _run(self, subject: str, newsletter_body_html: str) -> str:
        # These EMAIL_ADDRESS and EMAIL_PASSWORD variables must be loaded from your secrets
        sender_email = EMAIL_ADDRESS
        sender_password = EMAIL_PASSWORD
        sent_count = 0
        recipient_list = []

        try:
            with open('recipients.csv', mode='r', encoding='utf-8') as csvfile:
                reader = csv.DictReader(csvfile)
                for row in reader:
                    recipient_list.append(row)
            if not recipient_list:
                return "Error: recipients.csv is empty or not found."
        except FileNotFoundError:
            return "Error: recipients.csv not found. Please create it first."
        except Exception as e:
            return f"Error reading CSV file: {e}"

        for recipient in recipient_list:
            recipient_name = recipient.get("name", "there")
            recipient_email = recipient.get("email")

            if not recipient_email:
                continue

            # Create the full HTML document for each recipient
            full_html_content = f"""
            <!DOCTYPE html>
            <html lang="en">
            <head>
                <meta charset="UTF-8">
                <meta name="viewport" content="width=device-width, initial-scale=1.0">
                <style>
                    body {{ font-family: sans-serif; line-height: 1.6; color: #333; }}
                    .container {{ max-width: 600px; margin: 20px auto; padding: 20px; border: 1px solid #ddd; border-radius: 8px; }}
                    h2 {{ color: #0056b3; border-bottom: 1px solid #eee; padding-bottom: 5px;}}
                    h3 {{ color: #444; }}
                    ul {{ padding-left: 20px; }}
                    li {{ margin-bottom: 10px; }}
                    .footer {{ margin-top: 20px; font-size: 0.8em; color: #888; text-align: center; }}
                </style>
            </head>
            <body>
                <div class="container">
                    <p>Hi {recipient_name},</p>
                    <p>Here are the latest updates for this month:</p>
                    {newsletter_body_html}
                    <hr>
                    <p class="footer">To unsubscribe, please reply to this email.</p>
                </div>
            </body>
            </html>
            """

            message = MIMEMultipart()
            message['From'] = f"SimpliDOTS Tech Updates <{sender_email}>"
            message['To'] = recipient_email
            # Use the dynamic subject passed into the tool
            message['Subject'] = subject

            message.attach(MIMEText(full_html_content, 'html'))

            try:
                print(f"Sending '{subject}' to {recipient_name} at {recipient_email}...")
                server = smtplib.SMTP('smtp.gmail.com', 587)
                server.starttls()
                server.login(sender_email, sender_password)
                server.sendmail(sender_email, recipient_email, message.as_string())
                server.quit()
                sent_count += 1
                time.sleep(2) # Wait 2 seconds before sending the next email
            except Exception as e:
                print(f"Failed to send email to {recipient_email}. Error: {e}")

        return f"Successfully sent personalized HTML emails to {sent_count}/{len(recipient_list)} recipients."

# Don't forget to instantiate the tool after the class definition
email_tool = CSVEmailTool()

## Define Agents

In [29]:
# ONE CONSOLIDATED EMAIL
# This is an alternative workflow that sends a single email containing updates from all products.
# To use this method, uncomment this cell and the corresponding "Assemble and Run" cell below,
# and comment out the "Main Execution Loop" cell.

# from crewai import Agent, Task # Assuming Agent and Task are already imported

# The same agents (analyst_agent, expert_agent, dispatcher_agent) defined for the loop method can be used here.

# consolidated_analysis_task = Task(
#     description=(
#         "Analyze the provided text which contains software release notes from the last 7 days from multiple companies. "
#         "Your primary job is to group all findings by company (e.g., SimpliDots, Langflow, Anthropic). "
#         "Under each company, create sub-categories for 'New Features', 'Bug Fixes', etc., and list the specific updates."
#         "\n\nCONTEXT:\n---\n{context}\n---"
#     ),
#     expected_output=(
#         "A single, structured report. The top-level categories must be the company names. "
#         "Under each company, there should be sub-categories with bulleted lists of the specific updates."
#     ),
#     agent=analyst_agent
# )

# consolidated_summarization_task = Task(
#     description=(
#         "Take the analyst's consolidated report, which is categorized by company, and transform it into a single, polished HTML newsletter body. "
#         "Create a main heading (<h2>) for each company (e.g., 'Langflow Updates'). "
#         "Under each company heading, create subheadings (<h3>) for the update types (e.g., 'New Features'). "
#         "Format the details for each update as an unordered list (<ul> with <li> items). "
#         "Do NOT include the <html>, <head>, or <body> tags."
#     ),
#     expected_output=(
#         "A single string containing the well-formatted HTML for the complete newsletter body, organized with <h2> tags for each company."
#     ),
#     agent=expert_agent,
#     context=[consolidated_analysis_task]
# )

# consolidated_email_task = Task(
#     description=(
#         "Take the single composed newsletter body and use the Personalized HTML Email Dispatcher tool. "
#         "The subject line for the email MUST be 'Weekly Tech Release Notes Digest'."
#     ),
#     expected_output="A confirmation message stating how many emails were successfully sent.",
#     agent=dispatcher_agent,
#     context=[consolidated_summarization_task]
# )

In [30]:
# --- THREE EMAILs, THREE RELEASES NOTEs

from crewai import Crew, Process, Agent, Task

# Define the products you want to create newsletters for.
# The `source_keyword` is used to filter documents for each product.
products_to_process = [
    {"name": "Langflow", "source_keyword": "github.com/langflow-ai/langflow"},
    {"name": "SimpliDots", "source_keyword": "simplidots.id"},
    {"name": "Anthropic", "source_keyword": "anthropic.com"}
]

# Re-define your agents here to make sure they are in scope for the loop.
# (Ensure the llm variable is already created in a previous cell)
analyst_agent = Agent(role="Principal Technology Analyst", goal="Analyze provided release notes to identify critical updates.", backstory="You are an expert analyst who extracts impactful information.", llm=llm, verbose=True)
expert_agent = Agent(role="Expert Tech Newsletter Writer", goal="Craft an engaging newsletter from an analyst's report.", backstory="You are a famous tech writer known for making complex topics exciting.", llm=llm, verbose=True)
dispatcher_agent = Agent(role="Communications Dispatch Officer", goal="Use the email tool to send the newsletter to all recipients defined in the system's data file.", backstory="You are a reliable specialist ensuring important updates are dispatched correctly.", tools=[email_tool], llm=llm, verbose=True)

## Assemble the Crew

In [31]:
# ASSEMBLE AND RUN THE CONSOLIDATED CREW
# To use this, uncomment this cell and the task definitions above, and comment out the main loop.

# from crewai import Crew, Process

# consolidated_crew = Crew(
#     agents=[analyst_agent, expert_agent, dispatcher_agent],
#     tasks=[consolidated_analysis_task, consolidated_summarization_task, consolidated_email_task],
#     process=Process.sequential,
#     verbose=1
# )

# print("\nKicking off the Consolidated Weekly Newsletter Crew...")
# # We use the 'newsletter_context' variable which contains all weekly updates combined.
# if newsletter_context != "No new release notes found in the last 7 days.":
#     try:
#         # The context containing all updates is passed here.
#         result = consolidated_crew.kickoff(inputs={'context': newsletter_context})
#         print("\nConsolidated crew execution finished successfully!")
#         print(f"Final Result: {result}")
#     except Exception as e:
#         print(f"\nAn error occurred during the consolidated crew execution: {e}")
# else:
#     print("No weekly updates found to process for the consolidated email.")

In [32]:
# THREE EMAILs, THREE RELEASES NOTEs

# Loop through each product to create and send a dedicated newsletter
for product in products_to_process:
    product_name = product["name"]
    keyword = product["source_keyword"]

    print(f"\n{'='*60}")
    print(f"Starting process for: {product_name}")
    print(f"{'='*60}")

    # 1. Filter documents for the current product from the current week
    product_docs = [doc for doc in weekly_docs if keyword in doc.metadata.get('source', '')]

    if not product_docs:
        print(f"No weekly updates found for {product_name}. Skipping.")
        continue

    print(f"Found {len(product_docs)} document(s) for {product_name}.")

    # 2. Create the context string for this product only
    product_context = "\\n\\n---\\n\\n".join(
        f"Source: {doc.metadata.get('source', 'N/A')}\\n"
        f"Date: {doc.metadata.get('release_date', 'N/A')}\\n\\n"
        f"{doc.page_content}"
        for doc in product_docs
    )

    # 3. Create dynamic tasks specifically for the current product
    analysis_task = Task(
        description=f"Analyze the provided release notes for '{product_name}' from the last 7 days. "
                    f"Create a clear, bulleted list of all new features, bug fixes, and other important announcements."
                    f"\n\nCONTEXT:\n---\n{product_context}\n---",
        expected_output=f"A structured report summarizing all weekly updates for {product_name}.",
        agent=analyst_agent
    )

    summarization_task = Task(
        description=f"Take the analyst's report for '{product_name}' and transform it into a polished HTML newsletter body. "
                    f"Use <h3> tags for sub-categories (e.g., 'New Features', 'Bug Fixes'). Use <ul> and <li> for the details. "
                    "Do NOT include <html>, <head>, or <body> tags, only the content that goes inside the body.",
        expected_output=f"A string containing the well-formatted HTML for the {product_name} newsletter body.",
        agent=expert_agent,
        context=[analysis_task]
    )

    email_task = Task(
        description=f"Take the composed newsletter body for '{product_name}' and use the Personalized Email Dispatcher tool. "
                    f"The subject line for the email MUST be 'Weekly {product_name} Release Notes'.",
        expected_output="A confirmation message stating how many emails were successfully sent.",
        agent=dispatcher_agent,
        context=[summarization_task]
    )

    product_crew = Crew(
        agents=[analyst_agent, expert_agent, dispatcher_agent],
        tasks=[analysis_task, summarization_task, email_task],
        process=Process.sequential,
        verbose=1 # Using verbose=1 for cleaner logs in a loop
    )

    print(f"\nKicking off the crew for {product_name}...")
    try:
        result = product_crew.kickoff()
        print(f"\nCrew execution for {product_name} finished successfully!")
        print(f"Final Result: {result}")
    except Exception as e:
        print(f"\nAn error occurred during the {product_name} crew execution: {e}")


Starting process for: Langflow
No weekly updates found for Langflow. Skipping.

Starting process for: SimpliDots
Found 5 document(s) for SimpliDots.

Kicking off the crew for SimpliDots...


Output()

Output()

Output()

Output()


Crew execution for SimpliDots finished successfully!
Final Result: Successfully sent personalized HTML emails to 2/2 recipients.

Starting process for: Anthropic
Found 2 document(s) for Anthropic.

Kicking off the crew for Anthropic...


Output()

Output()

Output()

Output()


Crew execution for Anthropic finished successfully!
Final Result: Successfully sent personalized HTML emails to 2/2 recipients.


# Reactive Q&A Email Bot

## RAG Chain

In [141]:
from langchain_openai import AzureChatOpenAI
from langchain.prompts import PromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

llm = AzureChatOpenAI(
    azure_endpoint=os.environ["AZURE_API_BASE"],
    azure_deployment=os.environ["AZURE_DEPLOYMENT_ID"],
    api_key=os.environ["AZURE_API_KEY"],
    api_version=os.environ["AZURE_API_VERSION"],
    model=f"azure/{userdata.get('AZURE_OPENAI_CHAT_DEPLOYMENT_NAME')}",
    temperature=0.2
)

prompt_template = """
You are a product support specialist from SimpliDOTS who replies to user questions via email in a friendly and professional tone.
Your task is to answer the user's question based ONLY on the provided release note context below.
You MUST format your entire response as a clean, professional HTML snippet that will be embedded in an email.

**Instructions:**
1.  **Analyze the Context:** Read the provided release notes to fully understand the answer.
2.  **Format as HTML:** Structure your response using HTML tags.
    * Start with a friendly greeting like "<p>Halo, Kawan Simpli!</p>".
    * Use `<p>` tags for paragraphs.
    * Use `<h3>` for main points or titles.
    * Use `<ul>` and `<li>` for bullet points.
    * Use `<strong>` to highlight key terms.
    * End with a professional closing.
3.  **Synthesize and Explain:** Create a helpful, detailed, and easy-to-understand summary. Do not just list facts.
4.  **Language:** Write the response in the same language as the user's question (e.g., Bahasa Indonesia).
5.  **Important:** Do NOT include `<html>`, `<head>`, or `<body>` tags. Your entire output should be the HTML for the email body content only.

**Context from Release Notes:**
---
{context}
---

**User's Question:**
{question}

**Your HTML Email Response:**
"""

prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])

def format_docs(docs):
    return "\n\n---\n\n".join(
        f"Document Source: {doc.metadata.get('source', 'N/A')}\n"
        f"Release Date: {doc.metadata.get('release_date', 'N/A')}\n\n"
        f"{doc.page_content}"
        for doc in docs
    )

rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

print("RAG Q&A Chain is ready.")

RAG Q&A Chain is ready.


## Gmail API Functions

### Get Gmail Service

In [142]:
from google.colab import drive
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build
from google.auth.transport.requests import Request
import pickle
import os

def get_gmail_service():
    """
    The definitive function to authenticate in Colab. This version forces the
    correct redirect_uri to resolve the 400 error.
    """
    drive.mount('/content/drive', force_remount=True)
    CREDENTIALS_PATH = '/content/drive/MyDrive/CollabData/credential_cloud/credential_releasenotes.json'
    SCOPES = ['https://www.googleapis.com/auth/gmail.modify']
    TOKEN_PATH = 'token.pickle'

    creds = None
    if os.path.exists(TOKEN_PATH):
        with open(TOKEN_PATH, 'rb') as token:
            creds = pickle.load(token)

    if not creds or not creds.valid:
        if creds and creds.expired and creds.refresh_token:
            creds.refresh(Request())
        else:
            # Load credentials from your file
            flow = InstalledAppFlow.from_client_secrets_file(CREDENTIALS_PATH, SCOPES)
            flow.redirect_uri = 'urn:ietf:wg:oauth:2.0:oob'

            # Generate the authorization URL with the correct URI now embedded
            auth_url, _ = flow.authorization_url(prompt='consent')

            print('--- MANUAL AUTHENTICATION REQUIRED ---')
            print('Please go to this URL to authorize the application:')
            print(auth_url)

            # Ask the user to paste the authorization code
            code = input('Enter the authorization code you receive here: ')

            # Exchange the code for a token
            flow.fetch_token(code=code)
            creds = flow.credentials

        # Save the credentials for the next run
        with open(TOKEN_PATH, 'wb') as token:
            pickle.dump(creds, token)

    service = build('gmail', 'v1', credentials=creds)
    print("Gmail service initialized successfully.")
    return service

### Check for New Emails

In [143]:
import base64

def check_for_new_emails(service):
    """
    Checks for unread emails with a variety of specified subjects and
    robustly extracts the full email body.
    """
    accepted_subjects = [
        "Release Note Question",
        "Pertanyaan Rilis",
        "Help with Release Notes",
        "Tanya Fitur Baru"
    ]
    search_query = " OR ".join([f'subject:("{s}")' for s in accepted_subjects])
    print(f"Searching for unread emails with subjects matching your list...")

    results = service.users().messages().list(
        userId='me',
        labelIds=['INBOX', 'UNREAD'],
        q=search_query
    ).execute()

    messages = results.get('messages', [])

    questions = []
    if not messages:
        print("No new questions found.")
    else:
        print(f"Found {len(messages)} new email(s) to process.")
        for message in messages:
            msg = service.users().messages().get(userId='me', id=message['id']).execute()
            payload = msg['payload']
            headers = payload['headers']

            sender = next(h['value'] for h in headers if h['name'] == 'From')

            # ** THE FIX IS HERE: Robustly extract the full email body **
            question_body = ""
            if 'parts' in payload:
                # Find the plain text part of a multipart email
                for part in payload['parts']:
                    if part['mimeType'] == 'text/plain':
                        base64_data = part['body'].get('data', '')
                        question_body = base64.urlsafe_b64decode(base64_data).decode('utf-8')
                        break
            elif 'data' in payload['body']:
                # If the email is not multipart, get the body directly
                base64_data = payload['body'].get('data', '')
                question_body = base64.urlsafe_b64decode(base64_data).decode('utf-8')

            # If no body is found, use the snippet as a last resort
            if not question_body:
                question_body = msg.get('snippet', '')

            questions.append({'id': message['id'], 'sender': sender, 'question': question_body.strip()})

            # Mark the email as read
            service.users().messages().modify(userId='me', id=message['id'], body={'removeLabelIds': ['UNREAD']}).execute()

    return questions

### Send Reply Email

In [144]:
def send_reply_email(service, to, subject, body):
    """Sends a reply email correctly formatted as HTML."""
    message = MIMEText(body, 'html')

    message['to'] = to
    message['subject'] = "Re: " + subject # Automatically adds "Re:" to the subject
    raw_message = base64.urlsafe_b64encode(message.as_bytes()).decode()

    try:
        message = service.users().messages().send(userId='me', body={'raw': raw_message}).execute()
        print(f"HTML reply sent successfully to {to}")
    except Exception as e:
        print(f"An error occurred while sending reply to {to}: {e}")

## Run QnA Bot

## Credentials Refresh

In [145]:
# For Re-authentication

# import os

# # Define the paths to Colab's credential cache files
# adc_path = '/content/.config/gce/application_default_credentials.json'
# token_path = 'token.pickle'

# # Delete the files if they exist
# if os.path.exists(adc_path):
#     os.remove(adc_path)
#     print("Removed old cached credentials (adc.json).")

# if os.path.exists(token_path):
#     os.remove(token_path)
#     print("Removed old token.pickle file.")

# print("\nCredential cache is clear. You can now re-authenticate.")

In [149]:
import re

print("Initializing Gmail Service...")
gmail_service = get_gmail_service()
print("Checking for new questions...")

new_questions = check_for_new_emails(gmail_service)

if new_questions:
    print(f"Found {len(new_questions)} new question(s). Answering them now...")
    for item in new_questions:
        print(f"  > Answering question from {item['sender']}: '{item['question']}'")

        # Get the raw answer from the RAG chain
        raw_answer = rag_chain.invoke(item['question'])

        # This removes the leading "```html" and the trailing "```" from the response.
        clean_answer = re.sub(r'^```html\s*|\s*```$', '', raw_answer).strip()

        # Send the cleaned reply
        send_reply_email(gmail_service, item['sender'], "Release Note Question", clean_answer)

Initializing Gmail Service...
Mounted at /content/drive
Gmail service initialized successfully.
Checking for new questions...
Searching for unread emails with subjects matching your list...
Found 1 new email(s) to process.
Found 1 new question(s). Answering them now...
  > Answering question from Nabila Nurhusna Yap <nabilanurhusnayap@gmail.com>: 'Hello! bisa jelasin dengan lebih detail ngga ya untuk release Live Mode
with Reset/No Reset Data Options? thank you!'
HTML reply sent successfully to Nabila Nurhusna Yap <nabilanurhusnayap@gmail.com>
