In [None]:
!pip install "crewai[tools]" beautifulsoup4 requests transformers ddgs

In [None]:
!pip install --upgrade ipywidgets

In [None]:
import warnings
warnings.filterwarnings("ignore")

In [None]:
from crewai import Agent, Task, Crew, Process, LLM
from google.colab import userdata

gemini_api_key = userdata.get('GEMINI_API_KEY')

gemini_llm = LLM(
    model='gemini/gemini-2.5-pro',
    api_key=gemini_api_key,
    temperature=0.8
)

In [None]:
class BlogResearchTool(BaseTool):
    name: str = "Blog Research Assistant"
    description: str = "Retrieves up-to-date information and data from the web for blog content creation"

    def _run(self, topic: str, max_results: int = 5) -> str:
        results = []
        with DDGS() as ddgs:
            results = [{
                "title": r.get("title", ""),
                "url": r.get("url", ""),
                "snippet": r.get("body", ""),
                "source": urlparse(r.get("url", "")).netloc.replace("www.", "") or "unknown"
            } for r in ddgs.text(topic, max_results=max_results)]

        data = {
            "trends": [],
            "stats": [],
            "devs": [],
            "opinions": [],
            "sources": {r["source"]: r["url"] for r in results}
        }

        patterns = {
            "stats": r'\b\d+[\d,%\.]+\s*(?:percent|%|million|billion|years?|months?|\$)',
            "trends": r'\b(?:growth|decline|increase|decrease|trend|adoption|emerging)\b.*?[\.\!\?]',
            "devs": r'\b(?:announce|launch|release|introduce|new|update|version)\b.*?[\.\!\?]',
            "opinions": r'["“](.*?)[”"]\s*—\s*\w+\s*\w+'
        }

        for r in results:

            for key, pattern in patterns.items():
                matches = re.findall(pattern, r["snippet"], re.IGNORECASE)
                for match in matches:
                    if match not in data[key]:
                        data[key].append(match)

            if r["source"] in ["techcrunch.com", "wired.com", "mit.edu"]:
                try:
                    response = requests.get(r["url"], timeout=10)
                    soup = BeautifulSoup(response.content, 'html.parser')

                    for tag in ["script", "style", "header", "footer", "nav"]:
                        for element in soup.find_all(tag):
                            element.decompose()

                    main_content = soup.find('main') or soup.find('article') or soup.body
                    text = main_content.get_text(separator=' ', strip=True) if main_content else ""
                    for key, pattern in patterns.items():
                        matches = re.findall(pattern, text, re.IGNORECASE)
                        for match in matches:
                            if match not in data[key]:
                                data[key].append(match)
                except Exception as e:
                    print(f"Error processing {r['url']}: {str(e)}")
                    continue

        summary = f"Research Summary for '{topic}':\n\n"

        summary += "Key Trends:\n"
        if data["trends"]:
            for trend in data["trends"][:5]:
                summary += f"  - {trend}\n"
        else:
            summary += "  - No trends found\n"
        summary += "\n"

        summary += "Latest Developments:\n"
        if data["devs"]:
            for dev in data["devs"][:5]:
                summary += f"  - {dev}\n"
        else:
            summary += "  - No developments found\n"
        summary += "\n"

        summary += "Expert Opinions:\n"
        if data["opinions"]:
            for opinion in data["opinions"][:5]:
                summary += f"  - {opinion}\n"
        else:
            summary += "  - No opinions found\n"
        summary += "\n"

        summary += "Statistics:\n"
        if data["stats"]:
            for stat in data["stats"][:5]:
                summary += f"  - {stat}\n"
        else:
            summary += "  - No statistics found\n"
        summary += "\n"

        summary += "Sources:\n"
        if data["sources"]:
            for domain, url in list(data["sources"].items())[:5]:
                summary += f"  - {domain} ({url})\n"
        else:
            summary += "  - No sources found\n"

        return summary

In [None]:
research_agent = Agent(
    role='Lead Deep Research Specialist',
    goal='Conduct exhaustive, multi-source investigations to uncover novel insights and hidden patterns',
    backstory="""A forensic researcher with 15+ years experience in investigative technology analysis.
    Known for digging beyond surface-level data to reveal groundbreaking connections.
    Former head of research at MIT's Emerging Tech Lab.""",
    verbose=True,
    tools=[BlogResearchTool()],
    llm=gemini_llm,
    allow_delegation=False,
    max_iter=5,
)

writer_agent = Agent(
    role='Technical Content Architect',
    goal='Transform complex research findings into authoritative, evidence-based publications',
    backstory="""PhD in Scientific Communication with a specialty in making deep technical research
    accessible to executive audiences. Developed the 'Pyramid of Evidence' writing framework
    used by leading tech publications.""",
    verbose=True,
    llm=gemini_llm,
    allow_delegation=False,
    max_iter=3,
)

In [None]:
research_task = Task(
    description="""Conduct exhaustive investigation on '{topic}' covering:
    - Emerging patterns and underreported trends (last 6 months)
    - Competitive landscape analysis (top 5 players benchmarking)
    - Technical white papers and academic research (last 2 years)
    - Regulatory developments and standardization efforts
    - Patent analysis and R&D directions
    - Case studies of successful/unsuccessful implementations""",
    expected_output="""Technical research dossier containing:
    1. Executive Summary (100 words)
    2. Market Thermometer: Adoption metrics + growth vectors (300 words)
    3. Technology Deep Dive: Architectural comparisons (400 words)
    4. Implementation Matrix: Cost/benefit analysis (200 words)
    5. Risk Assessment: Technical debt + adoption barriers (200 words)
    6. Future Projections: Roadmap to 2030 (300 words)
    7. Annotated Bibliography (10+ sources)""",
    agent=research_agent,
    async_execution=True
)

writing_task = Task(
    description="""Transform the research dossier into an authoritative industry report with:
    - Technical depth suitable for CTO/CIO audience
    - Evidence-based arguments with proper citations [IEEE format]
    - Comparative analysis frameworks (tables/graphs where applicable)
    - Implementation decision trees
    - Risk/reward evaluation matrices
    - Vendor-neutral technology assessment""",
    expected_output="""Structured technical report (1000-1200 words) containing:
    1. Catchy Heading (Phrase, h1 heading)
    2. Disruptive Potential of {topic} (300 words, h2 heading)
    3. Architectural Evolution Timeline (200 - 300 words, h2 heading)
    4. Implementation Decision Framework (200 words, h2 heading)
    5. Total Cost of Ownership Analysis (comparative table, h2 heading)
    6. Risk Assessment Matrix (likelihood/impact grid, h2 heading)
    7. Strategic Adoption Roadmap (phased approach, h2 heading)
    8. Conclusion: Preparing Your Tech Stack (200 words, h3 heading)
    9. Appendix: Methodology & Data Sources: (each source in deifferent line, h3 heading)""",
    agent=writer_agent,
    context=[research_task],
    # output_file="industry_report.md",
)

In [None]:
blog_crew = Crew(
    agents=[research_agent, writer_agent],
    tasks=[research_task, writing_task],
    process=Process.sequential,
    llm=gemini_llm,
    verbose=True
)

blog_topic = "Computer Science"
result = blog_crew.kickoff(inputs={'topic': blog_topic})

print("\n\n-------------------- FINAL BLOG POST -------------------")
print(result)

In [None]:
from IPython.display import Markdown, display
display(Markdown(result.raw))

In [None]:
from google.colab import files
with open('research_results.md', 'w') as f:
    f.write(result.raw)

files.download('research_results.md')