In [None]:
!pip install langchain_google_genai
!pip install -U langchain
!pip install requests beautifulsoup4


In [None]:
import requests
from bs4 import BeautifulSoup
from langchain_google_genai import ChatGoogleGenerativeAI
from typing import List, Dict, Tuple
import re
import json
from collections import Counter

In [None]:


#@title CategorizerAgent Class
class ExtractorAgent:
    """
    Agent 1: Responsible for extracting the list of papers from GitHub.
    """

    def __init__(self, llm):
        self.llm = llm
        self.github_urls = [
            "https://github.com/kyegomez/awesome-multi-agent-papers",
            "https://github.com/shizhl/Multi-Agent-Papers",
        ]

    def fetch_github_content(self, url: str) -> str:
        """
        Fetch the HTML content of a GitHub page.
        """
        try:
            response = requests.get(url)
            response.raise_for_status()
            return response.text
        except Exception as e:
            print(f"Error while fetching {url}: {e}")
            return ""

    def extract_paper_titles(self, html_content: str) -> List[str]:
        """
        Extract paper titles from HTML.
        """
        soup = BeautifulSoup(html_content, "html.parser")

        # Collect all paper links/titles
        papers: List[str] = []

        # Method 1: Find bold text, which often corresponds to paper titles
        for strong in soup.find_all("strong"):
            text = strong.get_text().strip()
            # Filter out headers
            if len(text) > 10 and not text.startswith("#"):
                papers.append(text)

        # Method 2: Find links that point to arxiv or PDF files
        for link in soup.find_all("a", href=True):
            href = link["href"]
            if "arxiv.org" in href or "pdf" in href.lower():
                text = link.get_text().strip()
                if text and len(text) > 10:
                    papers.append(text)

        # Remove duplicates
        return list(set(papers))

    def categorize_papers_with_llm(self, papers: List[str]) -> Dict:
        """
        Use the LLM to get a first, coarse-grained categorization of papers.
        """
        # Limit to first 50 papers for the LLM prompt
        papers_text = "\n".join(
            [f"{i + 1}. {paper}" for i, paper in enumerate(papers[:50])]
        )

        prompt = f"""
You are an expert in Multi-Agent Systems.
You have received the following list of paper titles.

Papers:
{papers_text}

Please:
1. Identify the main topic of each paper.
2. Group the papers into high-level categories (e.g., Collaboration, Frameworks, Healthcare, Coding, etc.).
3. Output ONLY the category names and the number of papers in each category.

Output format:
- Category Name: Count
"""

        response = self.llm.invoke(prompt)
        return response.content

    def run(self) -> List[str]:
        """
        Run Agent 1 end-to-end.
        """
        print("üîç Agent 1 (Extractor) started...\n")

        all_papers: List[str] = []

        for url in self.github_urls:
            print(f"üì• Fetching: {url}")
            html = self.fetch_github_content(url)

            if html:
                papers = self.extract_paper_titles(html)
                all_papers.extend(papers)
                print(f"‚úÖ Number of extracted papers: {len(papers)}\n")

        print(f"üìä Total extracted papers (including duplicates): {len(all_papers)}\n")

        # Remove duplicates
        unique_papers = list(set(all_papers))
        print(f"üìä Number of unique papers: {len(unique_papers)}\n")

        # Show first 10 samples
        print("üîñ Sample papers:")
        for i, paper in enumerate(unique_papers[:10], 1):
            print(f"   {i}. {paper}")

        return unique_papers


In [None]:
#@title Run Agent 1
extractor = ExtractorAgent(llm)
papers = extractor.run()


In [None]:
#@title Save results for the next Agent
print("\n‚úÖ Agent 1 finished!")
print(f"üì¶ Total number of papers: {len(papers)}")
print("üì§ Ready for Agent 2 (Categorizer)")

# Save to file
with open("extracted_papers.txt", "w", encoding="utf-8") as f:
    for paper in papers:
        f.write(paper + "\n")

print("\nüíæ File 'extracted_papers.txt' has been saved.")


In [None]:
#@title CategorizerAgent
class CategorizerAgent:
    """
    Agent 2: Categorize papers into main research topics.
    """

    def __init__(self, llm):
        self.llm = llm

        # Main categories based on inspection of GitHub lists
        self.main_categories: Dict[str, str] = {
            "Multi-Agent Collaboration": "Collaboration and coordination between agents",
            "Frameworks & Tools": "Frameworks and development tools",
            "Software Engineering": "Software engineering and coding applications",
            "Healthcare & Medical": "Medical and healthcare applications",
            "Data & ML": "Data analysis and machine learning",
            "Evaluation & Benchmarks": "Evaluation and benchmarking",
            "Social Simulation": "Social simulation and human behavior",
            "Reasoning & Problem Solving": "Reasoning and problem solving",
            "Communication": "Communication and information exchange",
            "Architecture & Design": "System architecture and design",
            "Security": "Cybersecurity",
            "Multimodal": "Multimodal systems",
            "Other Applications": "Other applications",
        }

    def categorize_single_paper(self, paper_title: str) -> str:
        """
        Categorize a single paper using the LLM.
        """
        categories_list = "\n".join(
            [f"- {cat}: {desc}" for cat, desc in self.main_categories.items()]
        )

        prompt = f"""
You are an expert in Multi-Agent Systems research.

Given this paper title:
"{paper_title}"

Assign it to ONE of these categories:
{categories_list}

Return ONLY the category name (e.g., "Multi-Agent Collaboration"), nothing else.
"""

        try:
            response = self.llm.invoke(prompt)
            category = response.content.strip()

            # If the returned category is not in the list, map it to the closest or Other
            if category not in self.main_categories:
                for cat in self.main_categories.keys():
                    if cat.lower() in category.lower():
                        return cat
                return "Other Applications"

            return category
        except Exception as e:
            print(f"Error while classifying '{paper_title}': {e}")
            return "Other Applications"

    def categorize_batch(self, papers: List[str], batch_size: int = 10) -> Dict[str, List[str]]:
        """
        Batch categorization of papers (faster, fewer API calls).
        """
        print(f"üìä Starting categorization of {len(papers)} papers...\n")

        categorized: Dict[str, List[str]] = {cat: [] for cat in self.main_categories.keys()}

        for i in range(0, len(papers), batch_size):
            batch = papers[i : i + batch_size]

            papers_text = "\n".join([f"{j + 1}. {paper}" for j, paper in enumerate(batch)])

            prompt = f"""
You are an expert in Multi-Agent Systems research.

Categorize each paper below into ONE category from this list:
{', '.join(self.main_categories.keys())}

Papers:
{papers_text}

Return your answer in this EXACT format (one per line):
1. Category Name
2. Category Name
...

Example:
1. Multi-Agent Collaboration
2. Software Engineering
"""

            try:
                response = self.llm.invoke(prompt)
                categories = response.content.strip().split("\n")

                for idx, category_line in enumerate(categories):
                    if idx >= len(batch):
                        break

                    # Extract category name from "N. Category Name"
                    category = category_line.split(". ", 1)[-1].strip()

                    # Validate category
                    if category in self.main_categories:
                        categorized[category].append(batch[idx])
                    else:
                        # Try to find the closest category
                        found = False
                        for cat in self.main_categories.keys():
                            if cat.lower() in category.lower():
                                categorized[cat].append(batch[idx])
                                found = True
                                break
                        if not found:
                            categorized["Other Applications"].append(batch[idx])

                print(
                    f"‚úÖ Classified {min(i + batch_size, len(papers))} "
                    f"out of {len(papers)} papers"
                )

            except Exception as e:
                print(f"‚ö†Ô∏è Error in batch {i // batch_size + 1}: {e}")
                # If something fails, put all papers in this batch into Other
                for paper in batch:
                    categorized["Other Applications"].append(paper)

        return categorized

    def generate_statistics(self, categorized: Dict[str, List[str]]) -> Dict:
        """
        Generate statistics (count, percentage, description) for each category.
        """
        stats: Dict[str, Dict] = {}
        total = sum(len(ps) for ps in categorized.values())

        for category, ps in categorized.items():
            count = len(ps)
            percentage = (count / total * 100) if total > 0 else 0.0
            stats[category] = {
                "count": count,
                "percentage": round(percentage, 2),
                "description": self.main_categories[category],
            }

        return stats

    def run(self, papers: List[str], batch_size: int = 10) -> Tuple[Dict[str, List[str]], Dict[str, Dict]]:
        """
        Run Agent 2 end-to-end.

        Args:
            papers: List of paper titles
            batch_size: Number of papers per batch (default: 10)
        """
        print("üîç Agent 2 (Categorizer) started...\n")
        print(f"üì¶ Batch size: {batch_size}\n")

        # Categorize all papers with custom batch_size
        categorized = self.categorize_batch(papers, batch_size=batch_size)  # ‚Üê ÿ≠ÿßŸÑÿß ŸÇÿßÿ®ŸÑ ÿ™ŸÜÿ∏€åŸÖ!

        # Compute statistics
        stats = self.generate_statistics(categorized)


        # Print results
        print("\n" + "=" * 60)
        print("üìä Categorization results:")
        print("=" * 60 + "\n")

        # Sort by count (descending)
        sorted_stats = sorted(stats.items(), key=lambda x: x[1]["count"], reverse=True)

        for category, data in sorted_stats:
            if data["count"] > 0:
                print(f"üìÅ {category}")
                print(f"   Count: {data['count']} ({data['percentage']}%)")
                print(f"   Description: {data['description']}\n")

        return categorized, stats


In [None]:
#@title run agent 2

from langchain_google_genai import ChatGoogleGenerativeAI

# Initialize model with gemini-2.5-flash
llm = ChatGoogleGenerativeAI(
    model="gemini-2.5-flash",
    api_key="AIzaSxxxxxxyOH6cAels",
    temperature=0
)

print("üöÄ Starting categorization with gemini-2.5-flash")
print(f"   Papers: {len(papers)}")
print(f"   Batch size: 30")
print(f"   Expected batches: 8\n")

# Run categorization
categorizer = CategorizerAgent(llm)
categorized_papers, statistics = categorizer.run(papers, batch_size=30)

# Save results
import json

with open('categorized_papers_final.json', 'w', encoding='utf-8') as f:
    json.dump(categorized_papers, f, ensure_ascii=False, indent=2)

with open('statistics_final.json', 'w', encoding='utf-8') as f:
    json.dump(statistics, f, ensure_ascii=False, indent=2)

print("\n" + "="*60)
print("‚úÖ All papers categorized successfully!")
print("üíæ Files saved:")
print("   - categorized_papers_final.json")
print("   - statistics_final.json")
print("\nüéØ Ready for Agent 3!")
print("="*60)


In [None]:
#@title Save results for later use
print("\n" + "=" * 60)
print("üíæ Saving results...")
print("=" * 60 + "\n")

# Save as JSON
with open("categorized_papers.json", "w", encoding="utf-8") as f:
    json.dump(categorized_papers, f, ensure_ascii=False, indent=2)

with open("statistics.json", "w", encoding="utf-8") as f:
    json.dump(statistics, f, ensure_ascii=False, indent=2)

print("‚úÖ The following files have been saved:")
print("   - categorized_papers.json (categorized papers)")
print("   - statistics.json (category statistics)")

print("\nüìä Final summary:")
print(f"   - Total number of papers: {len(papers)}")
print(f"   - Number of non-empty categories: {len([s for s in statistics.values() if s['count'] > 0])}")
print("\n‚úÖ Agent 2 has finished!")


In [None]:
#@title Generate output for plotting
import matplotlib.pyplot as plt

def visualize_categories(statistics: Dict):
    """
    Plot category distribution.
    """
    # Filter out empty categories
    filtered = {k: v["count"] for k, v in statistics.items() if v["count"] > 0}

    # Sort by count (descending)
    sorted_data = dict(sorted(filtered.items(), key=lambda x: x[1], reverse=True))

    # Bar chart
    plt.figure(figsize=(14, 8))
    bars = plt.bar(
        range(len(sorted_data)),
        list(sorted_data.values()),
        color="steelblue",
    )
    plt.xticks(
        range(len(sorted_data)),
        list(sorted_data.keys()),
        rotation=45,
        ha="right",
    )
    plt.xlabel("Category", fontsize=12)
    plt.ylabel("Number of papers", fontsize=12)
    plt.title(
        "Distribution of Papers across LLM Multi-Agent Categories",
        fontsize=14,
        fontweight="bold",
    )
    plt.tight_layout()

    # Add value labels on bars
    for bar in bars:
        height = bar.get_height()
        plt.text(
            bar.get_x() + bar.get_width() / 2.0,
            height,
            f"{int(height)}",
            ha="center",
            va="bottom",
            fontsize=10,
        )

    plt.savefig("paper_distribution.png", dpi=300, bbox_inches="tight")
    print("\nüìä Chart saved as 'paper_distribution.png'")
    plt.show()


# Draw chart
visualize_categories(statistics)


In [None]:
# Install squarify for treemap (if not already installed)
!pip install squarify -q


In [None]:
import matplotlib.pyplot as plt
import numpy as np
from datetime import datetime
import textwrap
from typing import Dict


In [None]:
#@title ReporterAgent
class ReporterAgent:
    """
    Agent 3: Generate a comprehensive report with various charts and textual analysis.
    """

    def __init__(self, llm):
        self.llm = llm
        self.output_dir = "report_output"

        # Create output directory if it does not exist
        import os
        if not os.path.exists(self.output_dir):
            os.makedirs(self.output_dir)

        # Font settings (optional ‚Äì useful for Persian text in plots)
        plt.rcParams["font.family"] = "DejaVu Sans"

    def plot_bar_chart(self, statistics: Dict, filename: str = "bar_chart.png"):
        """
        Vertical bar chart with a professional design.
        """
        filtered = {k: v["count"] for k, v in statistics.items() if v["count"] > 0}
        sorted_data = dict(
            sorted(filtered.items(), key=lambda x: x[1], reverse=True)
        )

        plt.figure(figsize=(16, 10))
        colors = plt.cm.viridis(np.linspace(0.3, 0.9, len(sorted_data)))

        bars = plt.bar(
            range(len(sorted_data)),
            list(sorted_data.values()),
            color=colors,
            edgecolor="black",
            linewidth=1.5,
            alpha=0.8,
        )

        plt.xticks(
            range(len(sorted_data)),
            [textwrap.fill(label, 15) for label in sorted_data.keys()],
            rotation=45,
            ha="right",
            fontsize=11,
        )
        plt.ylabel("Number of Papers", fontsize=14, fontweight="bold")
        plt.xlabel("Category", fontsize=14, fontweight="bold")
        plt.title(
            "Distribution of Multi-Agent LLM Papers by Category",
            fontsize=16,
            fontweight="bold",
            pad=20,
        )
        plt.grid(axis="y", alpha=0.3, linestyle="--")

        # Add value labels on top of bars
        for bar in bars:
            height = bar.get_height()
            plt.text(
                bar.get_x() + bar.get_width() / 2.0,
                height,
                f"{int(height)}",
                ha="center",
                va="bottom",
                fontsize=11,
                fontweight="bold",
            )

        plt.tight_layout()
        plt.savefig(f"{self.output_dir}/{filename}", dpi=300, bbox_inches="tight")
        plt.close()
        print(f"  ‚úÖ {filename}")

    def plot_pie_chart(self, statistics: Dict, filename: str = "pie_chart.png"):
        """
        Pie chart of category distribution.
        """
        filtered = {k: v["count"] for k, v in statistics.items() if v["count"] > 0}
        sorted_data = dict(
            sorted(filtered.items(), key=lambda x: x[1], reverse=True)
        )

        # Top 8 categories + Others
        top_8 = dict(list(sorted_data.items())[:8])
        others_count = sum(list(sorted_data.values())[8:])
        if others_count > 0:
            top_8["Others"] = others_count

        plt.figure(figsize=(14, 10))
        colors = plt.cm.Set3(np.linspace(0, 1, len(top_8)))

        wedges, texts, autotexts = plt.pie(
            top_8.values(),
            labels=[textwrap.fill(k, 20) for k in top_8.keys()],
            autopct="%1.1f%%",
            colors=colors,
            startangle=90,
            textprops={"fontsize": 11, "weight": "bold"},
            explode=[0.05] * len(top_8),  # separate slices slightly
        )

        # Improve readability of percentage labels
        for autotext in autotexts:
            autotext.set_color("white")
            autotext.set_fontsize(12)
            autotext.set_weight("bold")

        plt.title(
            "Category Distribution (Top 8 + Others)",
            fontsize=16,
            fontweight="bold",
            pad=20,
        )
        plt.tight_layout()
        plt.savefig(f"{self.output_dir}/{filename}", dpi=300, bbox_inches="tight")
        plt.close()
        print(f"  ‚úÖ {filename}")

    def plot_horizontal_bar(self, statistics: Dict, filename: str = "horizontal_bar.png"):
        """
        Horizontal bar chart for easier comparison.
        """
        filtered = {k: v["count"] for k, v in statistics.items() if v["count"] > 0}
        sorted_data = dict(sorted(filtered.items(), key=lambda x: x[1]))

        plt.figure(figsize=(12, 10))
        colors = plt.cm.coolwarm(np.linspace(0.2, 0.8, len(sorted_data)))

        bars = plt.barh(
            range(len(sorted_data)),
            list(sorted_data.values()),
            color=colors,
            edgecolor="black",
            linewidth=1.2,
            alpha=0.85,
        )

        plt.yticks(
            range(len(sorted_data)),
            [textwrap.fill(label, 35) for label in sorted_data.keys()],
            fontsize=11,
        )
        plt.xlabel("Number of Papers", fontsize=14, fontweight="bold")
        plt.title(
            "Papers Count by Category (Horizontal View)",
            fontsize=16,
            fontweight="bold",
            pad=20,
        )
        plt.grid(axis="x", alpha=0.3, linestyle="--")

        # Add value labels
        for bar in bars:
            width = bar.get_width()
            plt.text(
                width + 1,
                bar.get_y() + bar.get_height() / 2.0,
                f"{int(width)}",
                ha="left",
                va="center",
                fontsize=10,
                fontweight="bold",
            )

        plt.tight_layout()
        plt.savefig(f"{self.output_dir}/{filename}", dpi=300, bbox_inches="tight")
        plt.close()
        print(f"  ‚úÖ {filename}")

    def plot_treemap(self, statistics: Dict, filename: str = "treemap.png"):
        """
        Treemap visualization of category proportions.
        """
        try:
            import squarify
        except ImportError:
            print("  ‚ö†Ô∏è squarify is not installed. Treemap will not be generated.")
            return

        filtered = {k: v["count"] for k, v in statistics.items() if v["count"] > 0}
        sorted_data = dict(
            sorted(filtered.items(), key=lambda x: x[1], reverse=True)
        )

        plt.figure(figsize=(16, 10))

        sizes = list(sorted_data.values())
        labels = [f"{k}\n({v})" for k, v in sorted_data.items()]
        colors = plt.cm.Spectral(np.linspace(0.2, 0.8, len(sorted_data)))

        squarify.plot(
            sizes=sizes,
            label=labels,
            color=colors,
            alpha=0.8,
            text_kwargs={"fontsize": 10, "weight": "bold"},
            edgecolor="white",
            linewidth=3,
        )

        plt.axis("off")
        plt.title(
            "Treemap: Research Areas in Multi-Agent LLM",
            fontsize=18,
            fontweight="bold",
            pad=20,
        )
        plt.tight_layout()
        plt.savefig(f"{self.output_dir}/{filename}", dpi=300, bbox_inches="tight")
        plt.close()
        print(f"  ‚úÖ {filename}")

    def plot_percentage_comparison(self, statistics: Dict, filename: str = "percentage_chart.png"):
        """
        Side-by-side percentage vs absolute count comparison.
        """
        filtered = {k: v for k, v in statistics.items() if v["count"] > 0}
        sorted_data = dict(
            sorted(filtered.items(), key=lambda x: x[1]["percentage"], reverse=True)
        )

        fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(18, 8))

        categories = list(sorted_data.keys())
        percentages = [v["percentage"] for v in sorted_data.values()]
        counts = [v["count"] for v in sorted_data.values()]
        colors = plt.cm.plasma(np.linspace(0.2, 0.9, len(categories)))

        # Chart 1: Percentages
        bars1 = ax1.barh(
            categories,
            percentages,
            color=colors,
            edgecolor="black",
            linewidth=1.2,
            alpha=0.85,
        )
        ax1.set_xlabel("Percentage (%)", fontsize=12, fontweight="bold")
        ax1.set_title("Percentage Distribution", fontsize=14, fontweight="bold")
        ax1.grid(axis="x", alpha=0.3)

        for i, bar in enumerate(bars1):
            width = bar.get_width()
            ax1.text(
                width + 0.5,
                bar.get_y() + bar.get_height() / 2.0,
                f"{percentages[i]:.1f}%",
                ha="left",
                va="center",
                fontsize=9,
                fontweight="bold",
            )

        # Chart 2: Absolute counts
        bars2 = ax2.barh(
            categories,
            counts,
            color=colors,
            edgecolor="black",
            linewidth=1.2,
            alpha=0.85,
        )
        ax2.set_xlabel("Number of Papers", fontsize=12, fontweight="bold")
        ax2.set_title("Absolute Count", fontsize=14, fontweight="bold")
        ax2.grid(axis="x", alpha=0.3)

        for i, bar in enumerate(bars2):
            width = bar.get_width()
            ax2.text(
                width + 0.5,
                bar.get_y() + bar.get_height() / 2.0,
                f"{counts[i]}",
                ha="left",
                va="center",
                fontsize=9,
                fontweight="bold",
            )

        plt.tight_layout()
        plt.savefig(f"{self.output_dir}/{filename}", dpi=300, bbox_inches="tight")
        plt.close()
        print(f"  ‚úÖ {filename}")

    def plot_top_categories_donut(self, statistics: Dict, filename: str = "donut_chart.png"):
        """
        Donut chart for the top 5 categories.
        """
        filtered = {k: v["count"] for k, v in statistics.items() if v["count"] > 0}
        sorted_data = dict(
            sorted(filtered.items(), key=lambda x: x[1], reverse=True)
        )

        # Top 5 categories only
        top_5 = dict(list(sorted_data.items())[:5])

        plt.figure(figsize=(12, 10))
        colors = ["#ff9999", "#66b3ff", "#99ff99", "#ffcc99", "#ff99cc"]

        wedges, texts, autotexts = plt.pie(
            top_5.values(),
            labels=list(top_5.keys()),
            autopct="%1.1f%%",
            colors=colors,
            startangle=90,
            textprops={"fontsize": 12, "weight": "bold"},
            pctdistance=0.85,
            wedgeprops=dict(width=0.5),  # make a donut
        )

        for autotext in autotexts:
            autotext.set_color("white")
            autotext.set_fontsize(13)
            autotext.set_weight("bold")

        plt.title("Top 5 Research Categories", fontsize=16, fontweight="bold", pad=20)
        plt.tight_layout()
        plt.savefig(f"{self.output_dir}/{filename}", dpi=300, bbox_inches="tight")
        plt.close()
        print(f"  ‚úÖ {filename}")

    def generate_text_analysis(self, statistics: Dict, categorized: Dict) -> str:
        """
        Generate textual analysis with the LLM.
        """
        total_papers = sum(v["count"] for v in statistics.values())

        stats_text = "\n".join(
            [
                f"- {cat}: {data['count']} papers ({data['percentage']}%)"
                for cat, data in sorted(
                    statistics.items(), key=lambda x: x[1]["count"], reverse=True
                )
                if data["count"] > 0
            ]
        )

        prompt = f"""You are a research analyst specializing in Multi-Agent LLM Systems.

Based on the following categorization of {total_papers} research papers from two major GitHub repositories:

{stats_text}

Please provide a comprehensive analysis in Persian (Farsi) covering:

1. **ÿÆŸÑÿßÿµŸá ⁄©ŸÑ€å** (Overview):
   - ÿ™ÿπÿØÿßÿØ ⁄©ŸÑ ŸÖŸÇÿßŸÑÿßÿ™ Ÿà ÿØÿ≥ÿ™Ÿá‚ÄåŸáÿß
   - ÿ™Ÿàÿ≤€åÿπ ⁄©ŸÑ€å ÿ™ÿ≠ŸÇ€åŸÇÿßÿ™

2. **ÿØÿ≥ÿ™Ÿá‚ÄåŸáÿß€å ÿßÿµŸÑ€å** (Main Categories):
   - 5 ÿØÿ≥ÿ™Ÿá ÿ®ÿ±ÿ™ÿ± Ÿà ÿßŸáŸÖ€åÿ™ ÿ¢ŸÜŸáÿß
   - ⁄Üÿ±ÿß ÿß€åŸÜ ÿØÿ≥ÿ™Ÿá‚ÄåŸáÿß Ÿæÿ±⁄©ÿßÿ±ÿ™ÿ± Ÿáÿ≥ÿ™ŸÜÿØÿü

3. **ÿ±ŸàŸÜÿØŸáÿß€å ÿ™ÿ≠ŸÇ€åŸÇÿßÿ™€å** (Research Trends):
   - ⁄©ÿØÿßŸÖ ÿ≠Ÿàÿ≤Ÿá‚ÄåŸáÿß ÿØÿßÿ∫‚Äåÿ™ÿ± Ÿáÿ≥ÿ™ŸÜÿØÿü
   - ÿ¥⁄©ÿßŸÅ‚ÄåŸáÿß€å ÿ™ÿ≠ŸÇ€åŸÇÿßÿ™€å (⁄©ÿØÿßŸÖ ÿ≠Ÿàÿ≤Ÿá‚ÄåŸáÿß ⁄©ŸÖÿ™ÿ± ⁄©ÿßÿ± ÿ¥ÿØŸá‚ÄåÿßŸÜÿØ)

4. **ŸÜ⁄©ÿßÿ™ ⁄©ŸÑ€åÿØ€å** (Key Insights):
   - ⁄ÜŸá ÿßŸÑ⁄ØŸàŸáÿß€å€å ÿØÿ± ÿ™Ÿàÿ≤€åÿπ ŸÖÿ¥ÿßŸáÿØŸá ŸÖ€å‚Äåÿ¥ŸàÿØÿü
   - ÿ™ŸÖÿ±⁄©ÿ≤ ÿ¨ÿßŸÖÿπŸá ÿ™ÿ≠ŸÇ€åŸÇÿßÿ™€å ÿ±Ÿà€å ⁄ÜŸá ŸÖÿ≥ÿßÿ¶ŸÑ€å ÿßÿ≥ÿ™ÿü

5. **Ÿæ€åÿ¥ŸÜŸáÿßÿØÿßÿ™ ÿ®ÿ±ÿß€å Ÿæÿ±Ÿà⁄òŸá** (Recommendations):
   - ÿ®ÿ±ÿß€å ŸÖÿ≠ŸÇŸÇ ŸÖÿ®ÿ™ÿØ€å ⁄©ÿØÿßŸÖ ÿØÿ≥ÿ™Ÿá ŸÖŸÜÿßÿ≥ÿ®‚Äåÿ™ÿ± ÿßÿ≥ÿ™ÿü
   - ÿ≠Ÿàÿ≤Ÿá‚ÄåŸáÿß€å ŸÜŸàÿ∏ŸáŸàÿ± Ÿà ŸÅÿ±ÿµÿ™‚ÄåŸáÿß€å ÿ™ÿ≠ŸÇ€åŸÇÿßÿ™€å

Write in a professional, academic Persian style. Be concise but insightful.
Use proper Persian academic terminology. Keep it around 400-500 words."""

        print("  ü§ñ Generating analysis with LLM...")
        response = self.llm.invoke(prompt)
        print("  ‚úÖ Textual analysis generated")
        return response.content

    def generate_summary_table(self, statistics: Dict) -> str:
        """
        Generate a text-based summary table.
        """
        table = "=" * 90 + "\n"
        table += f"{'Category':<45} {'Count':>12} {'Percentage':>18}\n"
        table += "=" * 90 + "\n"

        sorted_stats = sorted(
            statistics.items(), key=lambda x: x[1]["count"], reverse=True
        )

        for category, data in sorted_stats:
            if data["count"] > 0:
                table += (
                    f"{category:<45} {data['count']:>12} "
                    f"{data['percentage']:>17.2f}%\n"
                )

        table += "=" * 90 + "\n"
        total = sum(v["count"] for v in statistics.values())
        table += f"{'TOTAL':<45} {total:>12} {'100.00%':>18}\n"
        table += "=" * 90 + "\n"

        return table

    def create_full_report(self, statistics: Dict, categorized: Dict):
        """
        Create the full report (charts + text).
        """
        print("\n" + "=" * 90)
        print("üìä Agent 3 (Reporter) is generating the full report...")
        print("=" * 90 + "\n")

        # Generate charts
        print("üé® Drawing charts...")
        self.plot_bar_chart(statistics)
        self.plot_pie_chart(statistics)
        self.plot_horizontal_bar(statistics)
        self.plot_treemap(statistics)
        self.plot_percentage_comparison(statistics)
        self.plot_top_categories_donut(statistics)
        print()

        # Textual analysis (LLM disabled here due to quota limits)
        print("üìù Generating textual analysis...")
        # analysis = self.generate_text_analysis(statistics, categorized)
        analysis = (
            "[LLM-based textual analysis is disabled in this notebook due to "
            "quota limitations; the analysis will be written manually in the final report.]"
        )
        # print("  ‚ö†Ô∏è LLM-based analysis not executed (quota limitation)\n")

        # Summary table
        summary_table = self.generate_summary_table(statistics)

        # Build final report string
        report = f"""
{'=' * 90}
MULTI-AGENT LLM RESEARCH ANALYSIS REPORT
Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
Sources:
  - https://github.com/kyegomez/awesome-multi-agent-papers
  - https://github.com/shizhl/Multi-Agent-Papers
{'=' * 90}

{summary_table}

{'=' * 90}
DETAILED ANALYSIS (ÿ™ÿ≠ŸÑ€åŸÑ ÿ™ŸÅÿµ€åŸÑ€å)
{'=' * 90}

{analysis}

{'=' * 90}
VISUALIZATIONS
{'=' * 90}

The following charts have been generated in '{self.output_dir}/':

1. bar_chart.png - Vertical bar chart showing paper distribution
2. pie_chart.png - Pie chart showing top 8 categories + others
3. horizontal_bar.png - Horizontal bar chart for easy comparison
4. treemap.png - Treemap visualization of research areas
5. percentage_chart.png - Dual comparison (percentage vs absolute count)
6. donut_chart.png - Donut chart for top 5 categories

{'=' * 90}
METHODOLOGY
{'=' * 90}

This analysis was conducted using a Hybrid Multi-Agent System:

Agent 1 (Extractor): Extracted paper titles from GitHub repositories
Agent 2 (Categorizer): Used a hybrid approach (Keyword Matching + LLM)
Agent 3 (Reporter): Generated visualizations and analysis

Total Processing: ~5-7 minutes for ~500 papers

{'=' * 90}
END OF REPORT
{'=' * 90}
"""

        # Save report to file
        with open(f"{self.output_dir}/full_report.txt", "w", encoding="utf-8") as f:
            f.write(report)

        print("üíæ Reports have been saved:")
        print(f"  ‚úÖ {self.output_dir}/full_report.txt")

        return report

    def run(self, statistics: Dict, categorized: Dict):
        """
        Run Agent 3 end-to-end.
        """
        report = self.create_full_report(statistics, categorized)

        print("\n" + "=" * 90)
        print("üéâ Agent 3 finished successfully!")
        print("=" * 90)
        print(f"\nüìÇ Output files in folder '{self.output_dir}':\n")
        print("   üìä 6 high-quality charts")
        print("   üìÑ 1 comprehensive text report")
        print("   ü§ñ (Optional) LLM-generated analysis\n")

        return report

# ============================================================================
# Run Agent 3
# ============================================================================

reporter = ReporterAgent(llm)
final_report = reporter.run(statistics, categorized_papers)

print("=" * 90)
print("üìÑ Report preview:")
print("=" * 90)
print(final_report[:1500] + "\n...\n[continued in full_report.txt]")



In [None]:
#@title extract data to drive
# ÿßÿ®ÿ™ÿØÿß ŸæŸàÿ¥Ÿá Ÿæÿ±Ÿà⁄òŸá ÿ®ÿ≥ÿßÿ≤
!mkdir -p "/content/drive/MyDrive/project_phase1"

# ⁄©Ÿæ€å ŸáŸÖŸá ⁄Ü€åÿ≤ ÿ®Ÿá ÿ¨ÿ≤ .config Ÿà drive
!cp -r /content/report_output "/content/drive/MyDrive/project_phase1/"
!cp -r /content/sample_data "/content/drive/MyDrive/project_phase1/"
!cp /content/*.json "/content/drive/MyDrive/project_phase1/"
!cp /content/*.txt "/content/drive/MyDrive/project_phase1/"
!cp /content/*.png "/content/drive/MyDrive/project_phase1/"


In [None]:
#@title mounting
from google.colab import drive
drive.mount('/content/drive')
