# AI Agents

In [2]:
class PlannerAgent:
    def __init__(self):
        self.tasks = []

    def plan_task(self, topic):
        self.tasks = [
            {"task": "Search for articles about the topic", "agent": "Retriever"},
            {"task": "Extract key points from articles", "agent": "Executor"},
            {"task": "Summarize the extracted points", "agent": "Executor"},
        ]
        print(f"Planner: Created tasks for '{topic}'")
        return self.tasks


class RetrieverAgent:
    def retrieve_data(self, topic):
        # Simulate data retrieval (replace with actual web scraping or API calls)
        print(f"Retriever: Fetching articles for '{topic}'...")
        return [
            "Article 1: AI is transforming industries like healthcare and finance.",
            "Article 2: AI advancements include NLP, computer vision, and robotics.",
        ]


class ExecutorAgent:
    def extract_key_points(self, articles):
        print("Executor: Extracting key points...")
        key_points = []
        for article in articles:
            key_points.append(article.split(": ")[1])  # Simulated extraction
        return key_points

    def summarize(self, key_points):
        print("Executor: Summarizing key points...")
        return f"Summary: {' '.join(key_points)}"


# Centralized Communication
class CentralizedCoordinator:
    def __init__(self):
        self.planner = PlannerAgent()
        self.retriever = RetrieverAgent()
        self.executor = ExecutorAgent()

    def execute_task(self, topic):
        tasks = self.planner.plan_task(topic)

        for task in tasks:
            if task["agent"] == "Retriever":
                articles = self.retriever.retrieve_data(topic)
            elif task["agent"] == "Executor" and "Extract" in task["task"]:
                key_points = self.executor.extract_key_points(articles)
            elif task["agent"] == "Executor" and "Summarize" in task["task"]:
                summary = self.executor.summarize(key_points)

        print(f"Final Output: {summary}")


# Run the system
if __name__ == "__main__":
    topic = "Artificial Intelligence"
    coordinator = CentralizedCoordinator()
    coordinator.execute_task(topic)


Planner: Created tasks for 'Artificial Intelligence'
Retriever: Fetching articles for 'Artificial Intelligence'...
Executor: Extracting key points...
Executor: Summarizing key points...
Final Output: Summary: AI is transforming industries like healthcare and finance. AI advancements include NLP, computer vision, and robotics.


In [None]:
%pip install transformers datasets accelerate

In [1]:
class PlannerAgent:
    def __init__(self, model_name="Qwen/Qwen2.5-1.5B-Instruct"):
        from transformers import pipeline
        self.planner = pipeline(
            "text-generation", 
            model=model_name, 
            max_length=150, 
            num_return_sequences=1, 
            temperature=0.3, 
            top_k=10, 
            top_p=0.9
        )

    def plan_task(self, input_task):
        response = self.planner(
            f"Please break down the following task into a clear and concise list of subtasks: {input_task}"
        )
        if not response or 'generated_text' not in response[0]:
            raise ValueError("Model did not generate a valid response.")
        
        generated_text = response[0]['generated_text'].strip()
        print("Raw response from model:", generated_text)  # Debugging
        
        subtasks = generated_text.split("\n")
        return [task.strip() for task in subtasks if task.strip()]
    



In [2]:


class RetrieverAgent:
    def __init__(self, model_name="deepset/roberta-base-squad2"):
        from transformers import pipeline
        self.retriever = pipeline(
            "question-answering",
            model=model_name,
            top_k=5,  # Retrieve up to 5 answers
            handle_impossible_answer=True,
        )

    def retrieve_data(self, question, context):
        max_context_length = 512
        if len(context.split()) > max_context_length:
            context_chunks = self._split_context(context, max_context_length)
            answers = []
            for chunk in context_chunks:
                try:
                    response = self.retriever(question=question, context=chunk)
                    answers.extend(self._process_response(response))
                except Exception as e:
                    answers.append(f"Error: {str(e)}")
            return " ".join(answers).strip()
        else:
            response = self.retriever(question=question, context=context)
            return " ".join(self._process_response(response))

    def _process_response(self, response):
        """Process the response list and extract answers."""
        if not response:
            return ["No relevant data found for this subtask."]
        answers = []
        for item in response:
            answer = item.get("answer", "").strip()
            if answer:
                answers.append(answer)
        return answers if answers else ["No relevant data found for this subtask."]

    def _split_context(self, context, max_length):
        words = context.split()
        return [
            " ".join(words[i:i + max_length])
            for i in range(0, len(words), max_length)
        ]


In [3]:
class SummarizerAgent:
    def __init__(self, model_name="facebook/bart-large-cnn"):
        from transformers import pipeline
        self.summarizer = pipeline("summarization", model=model_name)

    def _split_text(self, text, max_tokens=1024):
        """Split text into chunks if it exceeds the model's token limit."""
        words = text.split()
        return [
            " ".join(words[i:i + max_tokens])
            for i in range(0, len(words), max_tokens)
        ]

    def summarize(self, text, max_length=130, min_length=30, do_sample=False):
        """Summarize the input text, handling edge cases and long inputs."""
        try:
            # Handle empty or whitespace-only text
            if not text.strip():
                return "The input text is empty or contains only whitespace."

            # Debugging input text
            print(f"Input text: '{text}'")
            print(f"Word count: {len(text.split())}")

            # Handle short texts
            if len(text.split()) < 30:
                return "The text is too short to summarize meaningfully."

            # Handle long texts by splitting and summarizing chunks
            if len(text.split()) > 1024:
                chunks = self._split_text(text, max_tokens=1024)
                summaries = []
                for chunk in chunks:
                    response = self.summarizer(chunk, max_length=max_length, min_length=min_length, do_sample=do_sample)
                    summaries.append(response[0]["summary_text"])
                return " ".join(summaries)

            # Summarize normally for medium-length texts
            response = self.summarizer(text, max_length=max_length, min_length=min_length, do_sample=do_sample)
            print("Raw summarizer response:", response)  # Debugging
            if not response or "summary_text" not in response[0]:
                return "No summary could be generated."
            return response[0]["summary_text"]

        except Exception as e:
            return f"Error in summarization: {str(e)}"


In [4]:
class RoleManager:
    def __init__(self):
        self.planner = PlannerAgent()
        self.retriever = RetrieverAgent()
        self.summarizer = SummarizerAgent()

    def execute(self, input_task, context):
        # Step 1: Plan the task
        subtasks = self.planner.plan_task(input_task)
        # print(f"Subtasks:\n {subtasks}")

        # Step 2: Retrieve data for each subtask
        retrieved_data = []
        # for task in subtasks:
        #     data = self.retriever.retrieve_data(task, context)
        #     retrieved_data.append(data)
        
        
        def simplify_task(task):
            # Remove verbose instructions and keep the core question
            return task.split(":")[-1].strip()
                
        for task in subtasks:
            simple_task = simplify_task(task).strip()
            if not simple_task:
                print("Skipping empty or invalid subtask.")
                continue
            print(f"Simplified subtask: '{simple_task}'")
            data = self.retriever.retrieve_data(simple_task, context)
            retrieved_data.append(data)

            
        print(f"Orginal context: {context}")    
        print(f"Retrieved data for '{retrieved_data}")

        # Step 3: Summarize the data
        full_text = " ".join(retrieved_data)
        print(f"Full text: {full_text}")
        summary = self.summarizer.summarize(full_text)
        return summary


In [5]:
if __name__ == "__main__":
    # Context data for retrieval (e.g., an article or knowledge base)
    context = """
The internet, a revolutionary technology, began as a research project in the 1960s with the creation of ARPANET, a U.S. Department of Defense initiative. The invention of the World Wide Web by Tim Berners-Lee in 1989 transformed the internet into a global communication tool. The 1990s saw rapid expansion with the rise of email, search engines, and e-commerce platforms like Amazon and eBay. Social media platforms like Facebook, Twitter, and Instagram reshaped how people connect and share information in the 2000s. Today, the internet is an integral part of daily life, enabling everything from online education to virtual reality experiences.
    """

    # Input task
    task = "Summarize the advancements in Internet Technology."

    # Run the multi-agent system
    manager = RoleManager()
    result = manager.execute(task, context)
    print("\n[Final Output]:", result)


  from .autonotebook import tqdm as notebook_tqdm
2025-01-05 17:35:51.647881: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-01-05 17:35:51.844439: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1736076951.925769  137936 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1736076951.945996  137936 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-01-05 17:35:52.089683: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorF

Raw response from model: Please break down the following task into a clear and concise list of subtasks: Summarize the advancements in Internet Technology. To summarize the advancements in Internet Technology, you can follow these steps:

1. Identify key technologies that have been developed or improved over time.
2. Analyze how these technologies have impacted various aspects of society, such as communication, commerce, education, and entertainment.
3. Consider any new developments or innovations that are currently being researched or tested.
4. Evaluate the potential future impact of these advancements on society and technology.

By breaking down the task into these subtasks, it becomes easier to approach and complete the summarization process systematically. Each step involves analyzing specific areas within Internet Technology and considering their broader implications for society and technology
Skipping empty or invalid subtask.
Simplified subtask: '1. Identify key technologies th

Your max_length is set to 130, but your input_length is only 82. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=41)


Orginal context: 
The internet, a revolutionary technology, began as a research project in the 1960s with the creation of ARPANET, a U.S. Department of Defense initiative. The invention of the World Wide Web by Tim Berners-Lee in 1989 transformed the internet into a global communication tool. The 1990s saw rapid expansion with the rise of email, search engines, and e-commerce platforms like Amazon and eBay. Social media platforms like Facebook, Twitter, and Instagram reshaped how people connect and share information in the 2000s. Today, the internet is an integral part of daily life, enabling everything from online education to virtual reality experiences.
    
Retrieved data for '['internet The internet The internet', 'Social media platforms Social media platforms like Facebook, Twitter, and Instagram reshaped how people connect and share information reshaped how people connect and share information in the 2000s.', 'The internet internet The internet The internet, a revolutionary tech

In [None]:
# class RetrieverAgent:
#     def __init__(self, model_name="deepset/roberta-base-squad2"):
#         from transformers import pipeline
#         self.retriever = pipeline(
#             "question-answering",
#             model=model_name,
#             top_k=5,
#             handle_impossible_answer=True,
#         )

#     def retrieve_data(self, question, context):
#         max_context_length = 512
#         if len(context.split()) > max_context_length:
#             context_chunks = self._split_context(context, max_context_length)
#             answers = []
#             for chunk in context_chunks:
#                 try:
#                     response = self.retriever(question=question, context=chunk)
#                     answer = response.get("answer", "").strip()
#                     if not answer:
#                         answer = "No relevant data found for this subtask."
#                     answers.append(answer)
#                 except Exception as e:
#                     answers.append(f"Error: {str(e)}")
#             return " ".join(answers).strip()
#         else:
#             response = self.retriever(question=question, context=context)
#             answer = response.get("answer", "").strip()
#             return answer if answer else "No relevant data found for this subtask."

#     def _split_context(self, context, max_length):
#         words = context.split()
#         return [
#             " ".join(words[i : i + max_length])
#             for i in range(0, len(words), max_length)
#         ]
