In [1]:
!pip install -q wikipedia requests

import os
import json
import wikipedia
import requests
from datetime import datetime
from typing import List, Dict
from urllib.parse import quote_plus

print(" UNIVERSAL WIKIPEDIA CONTENT GENERATOR")

# Setup
os.makedirs('/content/outputs', exist_ok=True)
wikipedia.set_lang("en")

# ------------------ UNIVERSAL RESEARCH AGENT ------------------
class UniversalResearchAgent:
    def __init__(self):
        pass

    def research(self, topic: str) -> List[Dict]:
        print(f" Researching: {topic}")
        sources = []

        # Special handling for "Machine Learning"
        if topic.lower() == "machine learning":
            return self._research_machine_learning()

        try:
            # Method 1: Try Wikipedia library
            try:
                main_page = wikipedia.page(topic, auto_suggest=False)
                main_summary = wikipedia.summary(topic, sentences=12, auto_suggest=False)
                sources.append({
                    "title": main_page.title,
                    "url": main_page.url,
                    "summary": main_summary,
                    "content": main_page.content[:4000],
                    "method": "wikipedia_library"
                })

            except wikipedia.exceptions.DisambiguationError as e:
                # Use first option from disambiguation
                first_option = e.options[0]
                main_page = wikipedia.page(first_option, auto_suggest=False)
                main_summary = wikipedia.summary(first_option, sentences=12, auto_suggest=False)
                sources.append({
                    "title": main_page.title,
                    "url": main_page.url,
                    "summary": main_summary,
                    "content": main_page.content[:4000],
                    "method": "disambiguation"
                })

            except wikipedia.exceptions.PageError:
                # Method 2: Use Wikipedia API directly
                print(" Using Wikipedia API directly...")
                api_sources = self._wikipedia_api_search(topic)
                if api_sources:
                    sources.extend(api_sources)
                else:
                    print(" No Wikipedia content found")
                    return []

            # Get related topics
            related_sources = self._get_related_topics(topic)
            sources.extend(related_sources)

        except Exception as e:
            print(f" Research error: {e}")
            # Final fallback: API search
            api_sources = self._wikipedia_api_search(topic)
            if api_sources:
                sources.extend(api_sources)

        print(f" Found {len(sources)} sources")
        return sources

    def _research_machine_learning(self) -> List[Dict]:
        """Special research for Machine Learning"""
        print(" Using special method for Machine Learning...")
        sources = []

        try:
            # Try different page names for Machine Learning
            possible_names = [
                "Machine learning",
                "ML (machine learning)",
                "Machine Learning (field)"
            ]

            for name in possible_names:
                try:
                    page = wikipedia.page(name, auto_suggest=False)
                    summary = wikipedia.summary(name, sentences=15, auto_suggest=False)

                    sources.append({
                        "title": page.title,
                        "url": page.url,
                        "summary": summary,
                        "content": page.content[:5000],
                        "method": "special_handling"
                    })

                    # Get related topics
                    related = self._get_related_topics("machine learning")
                    sources.extend(related)
                    break

                except:
                    continue

            if not sources:
                # Fallback to API
                api_sources = self._wikipedia_api_search("machine learning")
                if api_sources:
                    sources.extend(api_sources)

        except Exception as e:
            print(f" Machine Learning research failed: {e}")

        return sources

    def _wikipedia_api_search(self, topic: str) -> List[Dict]:
        """Use Wikipedia API directly as fallback"""
        try:
            url = "https://en.wikipedia.org/w/api.php"
            params = {
                "action": "query",
                "list": "search",
                "srsearch": topic,
                "format": "json",
                "srlimit": 5
            }

            response = requests.get(url, params=params, timeout=10)
            data = response.json()

            sources = []
            search_results = data.get("query", {}).get("search", [])

            for result in search_results[:3]:
                title = result["title"]
                page_url = f"https://en.wikipedia.org/wiki/{quote_plus(title)}"

                # Get summary using API
                summary_params = {
                    "action": "query",
                    "prop": "extracts",
                    "exintro": True,
                    "explaintext": True,
                    "titles": title,
                    "format": "json"
                }

                summary_response = requests.get(url, params=summary_params, timeout=10)
                summary_data = summary_response.json()
                pages = summary_data.get("query", {}).get("pages", {})

                summary = ""
                for page_id, page_data in pages.items():
                    summary = page_data.get("extract", "")[:500] + "..."
                    break

                sources.append({
                    "title": title,
                    "url": page_url,
                    "summary": summary if summary else f"Information about {title}",
                    "method": "api_fallback"
                })

            return sources

        except Exception as e:
            print(f" API search failed: {e}")
            return []

    def _get_related_topics(self, topic: str) -> List[Dict]:
        """Get related topics"""
        related_sources = []
        try:
            search_results = wikipedia.search(topic, results=4)
            for result in search_results:
                if result.lower() != topic.lower():
                    try:
                        page = wikipedia.page(result, auto_suggest=False)
                        summary = wikipedia.summary(result, sentences=4, auto_suggest=False)
                        related_sources.append({
                            "title": page.title,
                            "url": page.url,
                            "summary": summary,
                            "is_related": True
                        })
                    except:
                        continue
        except:
            pass

        return related_sources[:2]  # Return max 2 related topics

# ------------------ CONTENT ORGANIZER ------------------
class ContentOrganizer:
    def __init__(self):
        pass

    def create_article(self, topic: str, sources: List[Dict]) -> str:
        """Create article from research"""
        if not sources:
            return self._create_no_content_message(topic)

        article_parts = []

        # Title
        article_parts.append(f"# {topic}\n")

        # Main content from first source
        main_source = sources[0]
        if 'summary' in main_source:
            article_parts.append("## Overview\n")
            article_parts.append(main_source['summary'])
            article_parts.append("")

        # Extract sections from content if available
        if 'content' in main_source:
            sections = self._extract_sections(main_source['content'])
            for section in sections:
                article_parts.append(f"## {section['title']}\n")
                article_parts.append(section['content'])
                article_parts.append("")

        # Related topics
        related_sources = [s for s in sources if s.get('is_related')]
        if related_sources:
            article_parts.append("## Related Topics\n")
            for source in related_sources:
                article_parts.append(f"### {source['title']}\n")
                article_parts.append(source['summary'])
                article_parts.append("")

        # References
        article_parts.append("## References\n")
        for i, source in enumerate(sources, 1):
            article_parts.append(f"[{i}] {source['title']}")
            article_parts.append(f"    {source['url']}")
            if 'method' in source:
                article_parts.append(f"    Source: {source['method']}")
            article_parts.append("")

        return "\n".join(article_parts)

    def _extract_sections(self, content: str) -> List[Dict]:
        """Extract sections from content"""
        sections = []
        lines = content.split('\n')
        current_section = {"title": "Details", "content": ""}

        for line in lines:
            line = line.strip()
            if line.startswith('==') and line.endswith('=='):
                if current_section["content"].strip():
                    sections.append(current_section.copy())
                current_section = {
                    "title": line.replace('=', '').strip(),
                    "content": ""
                }
            elif line and len(line) > 20 and not line.startswith(('{', '[', '|')):
                current_section["content"] += line + " "

        if current_section["content"].strip():
            sections.append(current_section)

        return sections[:6]

    def _create_no_content_message(self, topic: str) -> str:
        return f"# {topic}\n\nNo Wikipedia content could be retrieved for this topic."

# ------------------ MAIN EXECUTION ------------------
def generate_content(topic: str):
    """Generate content for any topic"""
    print(f"🚀 Generating: {topic}")

    researcher = UniversalResearchAgent()
    organizer = ContentOrganizer()

    sources = researcher.research(topic)
    article = organizer.create_article(topic, sources)

    # Save
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    filename = f"/content/outputs/{topic.replace(' ', '_').lower()}.txt"

    with open(filename, 'w', encoding='utf-8') as f:
        f.write(f"TOPIC: {topic}\n")
        f.write(f"SOURCES: {len(sources)}\n")
        f.write(f"GENERATED: {datetime.now().isoformat()}\n")
        f.write("=" * 70 + "\n\n")
        f.write(article)

    word_count = len(article.split())
    print(f" Generated {word_count} words")
    return article, sources, filename

def display_content(article: str, topic: str):
    """Display content"""
    print(f"\n CONTENT: {topic}")
    print("=" * 60)

    lines = article.split('\n')
    for line in lines[:20]:
        if line.strip():
            print(line)

    if len(lines) > 20:
        print("\n... [full content in saved file] ...")
    print("=" * 60)

# ------------------ RUN ------------------
print("🎯 ENTER ANY TOPIC")
print("=====================")
print("Test with:")
print("• Machine Learning")
print("• Artificial Intelligence")
print("• Quantum Computing")
print("• Blockchain")
print("• Renewable Energy")
print("=====================")

while True:
    topic = input("\n Enter topic (or 'quit'): ").strip()

    if topic.lower() == 'quit':
        break

    if not topic:
        continue

    try:
        article, sources, filename = generate_content(topic)

        if sources:
            display_content(article, topic)
            print(f" Sources: {len(sources)}")
            print("\n Pages:")
            for i, source in enumerate(sources, 1):
                print(f"   {i}. {source['title']}")
        else:
            print(" No content found")

    except Exception as e:
        print(f" Error: {e}")

    print("\n" + "="*50)
    cont = input(" Another topic? (y/n): ").strip().lower()
    if cont != 'y':
        break

print(" Thank you!")

  Preparing metadata (setup.py) ... [?25l[?25hdone
  Building wheel for wikipedia (setup.py) ... [?25l[?25hdone
 UNIVERSAL WIKIPEDIA CONTENT GENERATOR
🎯 ENTER ANY TOPIC
Test with:
• Machine Learning
• Artificial Intelligence
• Quantum Computing
• Blockchain
• Renewable Energy

 Enter topic (or 'quit'): Machine Learning
🚀 Generating: Machine Learning
 Researching: Machine Learning
 Using special method for Machine Learning...
 Generated 1137 words

 CONTENT: Machine Learning
# Machine Learning
## Overview
Machine learning (ML) is a field of study in artificial intelligence concerned with the development and study of statistical algorithms that can learn from data and generalise to unseen data, and thus perform tasks without explicit instructions. Within a subdiscipline in machine learning, advances in the field of deep learning have allowed neural networks, a class of statistical algorithms, to surpass many previous machine learning approaches in performance.
ML finds application in