In [1]:
from dotenv import load_dotenv
load_dotenv()

from crewai import LLM

llm = LLM(
    model="gemini/gemini-2.5-flash",
    temperature=0.1
)
llm.call("Who invented transcendental meditation.")

'Transcendental Meditation (TM) was founded by **Maharishi Mahesh Yogi**.\n\nHe introduced the technique to the world in the late 1950s and early 1960s, based on his interpretation of ancient Vedic traditions from India.'

In [None]:

# Initialize the tool to read any files the agents knows or lean the path for


# Initialize the tool with a specific file path, so the agent can only read the content of the specified file

# Initialize the tool


In [5]:
from crewai import Agent, Task, Crew
from crewai_tools import FileReadTool, FileWriterTool

# Initialize tools
file_read_tool = FileReadTool(file_path='text.txt')
file_writer_tool = FileWriterTool()

# Define the summarizer agent
content_summarizer_agent = Agent(
    role="Text Summarization and Markdown Formatting Expert",
    goal=(
        "Read a text file, summarize its key ideas, and rewrite the content "
        "in a well-structured and professional markdown format."
    ),
    backstory=(
        "An experienced technical writer and summarization specialist who "
        "can distill long content into concise, readable markdown documents."
    ),
    tools=[file_read_tool, file_writer_tool],
    llm=llm
)

# Define the summarization and markdown rewriting task
summarize_and_rewrite_task = Task(
    description=(
        "Read the content from 'text.txt' using the FileReadTool, summarize "
        "its main points, and rewrite the entire content in a professional "
        "and polished markdown format. Save the final markdown output as "
        "'summarized_output.txt' using FileWriterTool."
    ),
    expected_output=(
        "A professionally written and formatted markdown version of the "
        "original content, saved as a .txt file."
    ),
    agent=content_summarizer_agent
)

# Create the crew
crew = Crew(
    agents=[content_summarizer_agent],
    tasks=[summarize_and_rewrite_task],
)

# Execute the workflow
result = crew.kickoff()

# Save summarized markdown content to file
file_writer_tool._run(
    file_name='summarized_output.txt',
    content=result,
    directory='output_files'
)

print("✅ Summarization and markdown rewrite completed successfully!")
print(result)


Would you like to view your execution traces? [y/N] (20s timeout): ✅ Summarization and markdown rewrite completed successfully!
# Transformer: An Electrical Device

*(For other uses, see Transformer (disambiguation).)*

A transformer is a passive electrical component that facilitates the transfer of electrical energy between two or more circuits through electromagnetic induction. This process occurs without a direct metallic connection between the circuits.

## Key Characteristics

*   **Component Type:** Passive
*   **Working Principle:** Electromagnetic induction
*   **Inventor:** Michael Faraday
*   **Invention Year:** 1831

## Fundamental Principle

The operation of a transformer relies on Faraday's law of induction. A varying current in the primary coil generates a varying magnetic flux within the transformer's core. This changing magnetic flux, in turn, induces a varying electromotive force (EMF) across any other coils wound around the same core (secondary coils).

## Application

In [11]:
import os
import re
import asyncio
import aiofiles
from dotenv import load_dotenv
from crewai import Agent, Task, Crew, Process, LLM
from crewai_tools import ScrapeWebsiteTool
from crawl4ai import AsyncWebCrawler, CrawlerRunConfig, CacheMode, DefaultMarkdownGenerator

# ==============================================================
# Utility: Remove duplicate lines
# ==============================================================
def remove_duplicate_lines_in_memory(text: str) -> str:
    seen = set()
    cleaned_lines = []
    for line in text.splitlines():
        if line not in seen:
            seen.add(line)
            cleaned_lines.append(line)
    return "\n".join(cleaned_lines)


# ==============================================================
# Crawl4AI Async Scraper
# ==============================================================
async def quick_parallel_example(urls, output_file):
    generator = DefaultMarkdownGenerator(content_source="fit_html")
    crawl_config = CrawlerRunConfig(
        markdown_generator=generator,
        cache_mode=CacheMode.BYPASS,
        stream=False
    )

    all_content = []

    async with AsyncWebCrawler() as crawler:
        results = await crawler.arun_many(urls, config=crawl_config)

        for res in results:
            if res.success:
                print(f"[OK] {res.url}, length: {len(res.markdown)}")
                all_content.append(f"\n\n--- Content from {res.url} ---\n\n")
                all_content.append(res.markdown)
            else:
                print(f"[ERROR] {res.url} => {res.error_message}")

    combined_text = "\n".join(all_content)
    cleaned_text = remove_duplicate_lines_in_memory(combined_text)

    async with aiofiles.open(output_file, 'w', encoding='utf-8') as f:
        await f.write(cleaned_text)

    print(f"\n✅ Cleaned content saved to {output_file}")


# ==============================================================
# CrewAI Setup
# ==============================================================
OUTPUT_DIR = "web_files"
os.makedirs(OUTPUT_DIR, exist_ok=True)

file_name = input('Enter the file name: ').strip() + ".txt"
output_path = os.path.join(OUTPUT_DIR, file_name)

_ = load_dotenv()

llm = LLM(
    model="gemini/gemini-2.5-flash",
    temperature=0.5,
)

website_url = input("Enter the website URL to scrape: ").strip()

# Tool to scrape the base website and get links
url_discovery_tool = ScrapeWebsiteTool(
    website_url=website_url,
    include_links=True,
)

# ==============================================================
# CrewAI Agent: URL Discovery
# ==============================================================
url_discovery_agent = Agent(
    role="Website Structure Analyzer",
    goal=(
        "Find the most important internal pages of the website "
        "that provide information about the company, such as About, Services, Products, Contact, "
        "Team, Careers, or FAQs. Return their **direct URLs only**, not names or JSON."
    ),
    backstory=(
        "You are an expert in analyzing website structures and identifying the most relevant pages "
        "for understanding the company, its mission, offerings, and people."
    ),
    tools=[url_discovery_tool],
    verbose=True,
    llm=llm
)

# ==============================================================
# Task: Extract Important URLs
# ==============================================================
url_discovery_task = Task(
    description=(
        "Analyze the website and extract the full URLs of all important internal pages "
        "that contain company-related information such as About, Services, Products, Contact, "
        "Team, Careers, or FAQ. Return ONLY full URLs (e.g., https://example.com/about), "
        "one per line, with no JSON formatting or extra text. Ignore external links."
    ),
    expected_output=(
        "A plain list of internal URLs that describe the company and its offerings."
    ),
    agent=url_discovery_agent,
    output_variable="important_urls"
)

crew = Crew(
    agents=[url_discovery_agent],
    tasks=[url_discovery_task],
    process=Process.sequential,
    verbose=True,
)


# ==============================================================
# Run CrewAI → Extract URLs → Crawl4AI Deep Scrape
# ==============================================================
async def main():
    print("\n🚀 Discovering important URLs with CrewAI...")
    result = crew.kickoff()

    # Extract URLs from text output
    text_result = str(result)
    important_urls = re.findall(r'https?://[^\s"\')]+', text_result)
    important_urls = list(dict.fromkeys(important_urls))  # deduplicate

    print("\n🌐 Important URLs Found:")
    for url in important_urls:
        print(" -", url)

    if not important_urls:
        print("\n⚠️ No URLs found. Exiting.")
        return

    print("\n🕷️ Starting Crawl4AI deep scraping for extracted URLs...")
    await quick_parallel_example(important_urls, output_path)


# ==============================================================
# Execute
# ==============================================================

await main()



🚀 Discovering important URLs with CrewAI...



🌐 Important URLs Found:
 - https://www.texasbank.com/about-us
 - https://www.texasbank.com/our-history
 - https://www.texasbank.com/careers
 - https://www.texasbank.com/our-team
 - https://www.texasbank.com/meet-our-mortgage-team
 - https://www.texasbank.com/meet-our-commercial-lenders
 - https://www.texasbank.com/contact-us
 - https://www.texasbank.com/locations-atms
 - https://www.texasbank.com/personal
 - https://www.texasbank.com/personal/spend/compare-checking
 - https://www.texasbank.com/personal/save/compare-savings
 - https://www.texasbank.com/personal/borrow/mortgage-loans
 - https://www.texasbank.com/mortgage
 - https://www.texasbank.com/mortgage/borrow/home-equity-loans
 - https://www.texasbank.com/business
 - https://www.texasbank.com/business/spend/compare-checking
 - https://www.texasbank.com/business/save/compare-savings
 - https://www.texasbank.com/business/borrow/commercial-real-estate-loans
 - https://www.texasbank.com/business/specialty-banking
 - https://www.texasb

[OK] https://www.texasbank.com/personal/borrow/mortgage-loans, length: 3660
[OK] https://www.texasbank.com/personal/save/compare-savings, length: 3660
[OK] https://www.texasbank.com/business/save/compare-savings, length: 3660
[OK] https://www.texasbank.com/locations-atms, length: 3660
[OK] https://www.texasbank.com/mortgage, length: 3660
[OK] https://www.texasbank.com/business/borrow/commercial-real-estate-loans, length: 3660
[OK] https://www.texasbank.com/community-reinvestment-act-public-file-2024, length: 3660
[OK] https://www.texasbank.com/our-history, length: 3660
[OK] https://www.texasbank.com/our-team, length: 3660
[OK] https://www.texasbank.com/business/spend/compare-checking, length: 3925
[OK] https://www.texasbank.com/business/specialty-banking, length: 4339
[OK] https://www.texasbank.com/about-us, length: 3925
[OK] https://www.texasbank.com/careers, length: 5292
[OK] https://www.texasbank.com/meet-our-commercial-lenders, length: 3925
[OK] https://www.texasbank.com/meet-our-m