In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [11]:
# Change the working directory to the project folder in Google Drive
%cd /content/drive/MyDrive/GenAI/AI Agents/Capstone Project - the AI Product Manager

/content/drive/MyDrive/GenAI/AI Agents/Capstone Project - the AI Product Manager


In [3]:
# Install the 'crewai' library and its tools, along with 'openai'
!pip install crewai
!pip install openai
!pip install 'crewai[tools]'

Collecting crewai-tools>=0.37.0 (from crewai[tools])
  Downloading crewai_tools-0.37.0-py3-none-any.whl.metadata (5.4 kB)
Collecting docker>=7.1.0 (from crewai-tools>=0.37.0->crewai[tools])
  Downloading docker-7.1.0-py3-none-any.whl.metadata (3.8 kB)
Collecting embedchain>=0.1.114 (from crewai-tools>=0.37.0->crewai[tools])
  Downloading embedchain-0.1.127-py3-none-any.whl.metadata (9.3 kB)
Collecting lancedb>=0.5.4 (from crewai-tools>=0.37.0->crewai[tools])
  Downloading lancedb-0.21.1-cp39-abi3-manylinux_2_28_x86_64.whl.metadata (4.1 kB)
Collecting pyright>=1.1.350 (from crewai-tools>=0.37.0->crewai[tools])
  Downloading pyright-1.1.396-py3-none-any.whl.metadata (6.6 kB)
Collecting pytube>=15.0.0 (from crewai-tools>=0.37.0->crewai[tools])
  Downloading pytube-15.0.0-py3-none-any.whl.metadata (5.0 kB)
Collecting alembic<2.0.0,>=1.13.1 (from embedchain>=0.1.114->crewai-tools>=0.37.0->crewai[tools])
  Downloading alembic-1.15.1-py3-none-any.whl.metadata (7.2 kB)
Collecting chromadb>=0.5

In [4]:
# Retrieve the OpenAI API key securely from Google Colab's user data
from google.colab import userdata
api_key = userdata.get('genai_course')

In [5]:
# Import essential libraries for image processing and agent creation
import os
from PIL import Image  # For image manipulation
from crewai_tools import VisionTool  # Specialized tool for image analysis
from crewai import Agent, Task, Crew, Process  # Core components of the crewai library
from langchain_openai import ChatOpenAI  # Interface to interact with OpenAI's language models
from IPython.display import display, Markdown  # For displaying outputs in Jupyter notebooks

/usr/local/lib/python3.11/dist-packages/pydantic/_internal/_config.py:295: PydanticDeprecatedSince20: Support for class-based `config` is deprecated, use ConfigDict instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.10/migration/
  warn(
/usr/local/lib/python3.11/dist-packages/crewai_tools/tools/scrapegraph_scrape_tool/scrapegraph_scrape_tool.py:34: PydanticDeprecatedSince20: Pydantic V1 style `@validator` validators are deprecated. You should migrate to Pydantic V2 style `@field_validator` validators, see the migration guide for more details. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.10/migration/
  @validator("website_url")
/usr/local/lib/python3.11/dist-packages/crewai_tools/tools/selenium_scraping_tool/selenium_scraping_tool.py:26: PydanticDeprecatedSince20: Pydantic V1 style `@validator` validators are deprecated. You should migrate to

In [6]:
# Set the OpenAI API key as an environment variable
os.environ['OPENAI_API_KEY'] = api_key

In [7]:
# Initialize the vision tool for image analysis
vision_tool = VisionTool()

In [12]:
# Load the Image using the Pillow library
image_path = "translation.png"
image = Image.open(image_path)

# AGENT 1 - Image Description Agent

In [13]:
# Build the description agent with its role, goal, and backstory
description_agent = Agent(
    role="Image Description Agent",
    goal=f"Fully describe the digital image ({image_path}), of a B2B Digital Menu startup, including its visible elements, design, and intended purpose.",
    backstory="You are responsible for analyzing images and describing their purpose in detail.",
    verbose=True,
    tools=[vision_tool],
    llm=ChatOpenAI(model_name="gpt-4o", temperature=0.8)
)

In [14]:
# Create the description task assigned to the description agent
description_task = Task(
    description="Identify and fully describe the digital image and explain its purpose.",
    expected_output="A complete description of the image and its purpose.",
    agent=description_agent
)

# AGENT 2 - Critique Agent

In [15]:
# Build the critique agent with its role, goal, and backstory
critique_agent = Agent(
    role="UX Critique Agent",
    goal=f"Critique the image {image_path} based on its description and intended purpose provided by the Image Description Agent.",
    backstory="You critically evaluate images, specifically UX designs, and point out flaws, weaknesses, and areas of improvement.",
    verbose=True,
    tools=[vision_tool],
    llm=ChatOpenAI(model_name="gpt-4o", temperature=0.8)
)

In [16]:
# Create the critique task assigned to the critique agent, using the description task as context
critique_task = Task(
    description="Critically analyze the image based on its description and intended purpose.",
    expected_output="A complete critique of the image, highlighting design flaws and areas of improvement.",
    agent=critique_agent,
    context=[description_task]
)

# AGENT 3 - UX Suggestion

In [18]:
# Create the UX suggestion agent with its role, goal, and backstory
ux_agent = Agent(
    role="UX Suggestion Agent",
    goal=f"Provide design and layout suggestions for the image {image_path} based on the context from the Image Description Agent and UX Critique Agent.",
    backstory="You specialize in providing actionable suggestions to improve the design of website images.",
    verbose=True,
    tools=[vision_tool],
    llm=ChatOpenAI(model_name="gpt-4o", temperature=0.8)
)

In [19]:
# Create the UX suggestion task assigned to the UX suggestion agent, using previous tasks as context
ux_task = Task(
    description="Provide suggestions for improving the image design and layout, based on the context from the description and critique agents.",
    expected_output="A list of actionable suggestions for improving the image design and layout based on the image's purpose and critiques.",
    agent=ux_agent,
    context=[description_task, critique_task]
)

# AGENT 4 - AI Product Manager

In [20]:
# Define the AI Product Manager Agent with its role, goal, and backstory
pm_agent = Agent(
    role="AI Product Manager",
    goal=f"Write user stories based on the suggestions from the UX agent for the image {image_path} and prioritize the suggestions based on probable customer feedback.",
    backstory="You act as a product manager for a digital company, prioritizing suggestions and creating user stories to guide improvements.",
    verbose=True,
    tools=[vision_tool],
    llm=ChatOpenAI(model_name="gpt-4o", temperature=0.8)
)

In [21]:
# Create a task for the AI Product Manager, using all previous tasks as context
pm_task = Task(
    description="Write user stories based on the suggestions from the UX agent for the image and prioritize the suggestions based on probable customer feedback.",
    expected_output="A list of prioritized improvements based on expected impact on the customer, and the user stories.",
    agent=pm_agent,
    context=[description_task, critique_task, ux_task]
)

# Run the AI Product Manager

In [22]:
# Define the crew with all agents and tasks, set to run sequentially
crew = Crew(
    agents=[description_agent, critique_agent, ux_agent, pm_agent],
    tasks=[description_task, critique_task, ux_task, pm_task],
    verbose=True,
    process=Process.sequential
)

# Kick off the crew to start processing the tasks
result = crew.kickoff()

[1m[94m 
[2025-03-11 19:43:52][🚀 CREW 'CREW' STARTED, 4E3F420D-E6F6-481D-8698-4836F44B04E2]: 2025-03-11 19:43:52.285134[00m
[1m[94m 
[2025-03-11 19:43:52][📋 TASK STARTED: IDENTIFY AND FULLY DESCRIBE THE DIGITAL IMAGE AND EXPLAIN ITS PURPOSE.]: 2025-03-11 19:43:52.305103[00m
[1m[94m 
[2025-03-11 19:43:52][🤖 AGENT 'IMAGE DESCRIPTION AGENT' STARTED TASK]: 2025-03-11 19:43:52.307240[00m
[1m[95m# Agent:[00m [1m[92mImage Description Agent[00m
[95m## Task:[00m [92mIdentify and fully describe the digital image and explain its purpose.[00m
[1m[94m 
[2025-03-11 19:43:52][🤖 LLM CALL STARTED]: 2025-03-11 19:43:52.308014[00m
[1m[94m 
[2025-03-11 19:43:54][✅ LLM CALL COMPLETED]: 2025-03-11 19:43:54.349704[00m
[1m[94m 
[2025-03-11 19:43:54][🤖 TOOL USAGE STARTED: 'VISION TOOL']: 2025-03-11 19:43:54.350734[00m
[1m[94m 
[2025-03-11 19:44:09][✅ TOOL USAGE FINISHED: 'VISION TOOL']: 2025-03-11 19:44:09.199505[00m


[1m[95m# Agent:[00m [1m[92mImage Description Agent[00m


In [23]:
# extract and display the output of each agent
for idx, task_output in enumerate(result.tasks_output):
  display(Markdown(f"### Agent {idx+1}: {task_output.agent}\n{task_output.raw}"))

### Agent 1: Image Description Agent
The image "translation.png" is a screenshot of a digital interface for a restaurant menu translation tool, likely part of a B2B digital menu startup platform. The platform, labeled "Bitte," is designed to assist in the management and translation of restaurant menus to different languages, facilitating a smoother dining experience for diverse clientele.

Visible elements in the image include a sidebar on the left, featuring various sections like "Edit Menu," "Categories," "Items," and a "Translation Center." These sections suggest the platform's purpose of providing comprehensive menu management capabilities, allowing users to categorize and organize menu items efficiently.

The main focus of the interface is the translation section, highlighting its primary utility. It lists menu items under categories such as "Entradas" (Starters) and "Pizzas." Specific menu items displayed include "Bruschetta al Pomodoro," "Sopa de Mexilhão," "Margherita," "Diavola," and "ai Quattro Formaggi." This section allows users to view and enter translations for each item, with visible language options indicating support for Portuguese and English. 

The presence of input fields for translations and the language toggle feature reflect the platform's core function: enabling businesses, particularly restaurants, to offer multilingual menu options, thereby catering to a broader audience and enhancing customer experience. This capability is crucial for businesses operating in multicultural or tourist-heavy areas, aiming to bridge communication gaps and accommodate non-native speakers effectively.

### Agent 2: UX Critique Agent
1. **Interface Design:**
   - **Strengths:**
     - The sidebar organization is intuitive, with clear sections for "Edit Menu," "Categories," "Items," and "Translation Center," indicating a well-structured hierarchy.
     - The main area is focused on translations, which aligns with the platform’s primary purpose.
   
   - **Weaknesses:**
     - The overall aesthetic could benefit from more visual appeal. The design appears quite functional but lacks engaging elements that could enhance user interaction.
     - The interface may look cluttered if there are many menu items, which might overwhelm the user. Consider implementing accordion-style collapsible menus for categories to improve navigation.
   
   - **Areas for Improvement:**
     - Introduce visual icons or color coding for each section to quickly differentiate between tasks.
     - Implement a more modern design approach, such as flat design or material design principles, to make the interface more visually appealing.

2. **Usability:**
   - **Strengths:**
     - The presence of input fields for translations directly next to the menu items is user-friendly, as it reduces navigation time.
     - Language toggle options are clearly visible, which is critical for a translation tool.
   
   - **Weaknesses:**
     - If the interface doesn’t support auto-saving, users might lose their translations if they forget to manually save.
     - The language switcher could benefit from additional languages if not already present, improving utility for a global audience.
   
   - **Areas for Improvement:**
     - Consider adding a progress indicator to show how many items have been translated vs. pending translations.
     - Provide tooltips or a help icon next to features for users unfamiliar with the platform, enhancing onboarding.

3. **Functionality:**
   - **Strengths:**
     - The platform’s core feature of enabling multilingual menu options is well-highlighted.
     - The translation interface supports both Portuguese and English, which is a good start for engaging diverse clientele.
   
   - **Weaknesses:**
     - There’s no indication of a preview feature that allows users to see the menu as the customer would. This could be crucial for quality assurance.
   
   - **Areas for Improvement:**
     - Implement a preview mode that lets users view the fully translated menu as it would appear to customers.
     - Incorporate feedback mechanisms where users can suggest corrections or improvements for translations.

By addressing these areas, the "Bitte" platform can significantly enhance its usability, aesthetic appeal, and functionality, making it a more robust tool for restaurant menu management and translation.

### Agent 3: UX Suggestion Agent
Based on the description and critique of the image "translation.png," here are actionable suggestions for improving the image design and layout:

1. **Interface Design Improvements:**
   - **Visual Appeal:** 
     - Introduce visual icons or color coding for each section (e.g., "Edit Menu," "Categories") to enhance differentiation and visual appeal.
     - Adopt a modern design approach, such as flat design or material design principles, to make the interface more attractive and engaging.
   - **Reduce Clutter:**
     - Implement accordion-style collapsible menus for categories to prevent the interface from looking cluttered when there are numerous menu items.
     - Use whitespace effectively to separate sections and reduce cognitive load.

2. **Usability Enhancements:**
   - **Auto-Save Feature:**
     - Integrate an auto-save function for translation entries to prevent data loss if users forget to save manually.
   - **Progress Indicator:**
     - Add a progress indicator to show the percentage of menu items translated, helping users track their progress.
   - **User Assistance:**
     - Provide tooltips or a help icon with brief explanations next to features to assist new users and improve onboarding.

3. **Functionality Additions:**
   - **Preview Mode:**
     - Implement a preview mode that allows users to view the complete translated menu as it would appear to customers, facilitating quality checks.
   - **Language Expansion:**
     - Consider adding more language options to cater to a broader global audience, increasing the platform's utility.
   - **Feedback Mechanism:**
     - Introduce a feedback option where users can suggest corrections or improvements for translations, promoting continuous improvement.

By implementing these suggestions, the "Bitte" platform can enhance its usability, aesthetics, and functionality, thereby providing a better user experience and supporting its purpose of efficient restaurant menu management and translation.

### Agent 4: AI Product Manager
1. **Interface Design Improvements:**
   - **Visual Appeal:**
     - Introduce visual icons or color coding for each section (e.g., "Edit Menu," "Categories") to enhance differentiation and visual appeal. 
     - Adopt a modern design approach, such as flat design or material design principles, to make the interface more attractive and engaging.
     - **User Story:** As a user, I want a visually appealing interface with clear icons or color codes for each section so that I can easily navigate and differentiate among various tasks.

   - **Reduce Clutter:**
     - Implement accordion-style collapsible menus for categories to prevent the interface from looking cluttered when there are numerous menu items. 
     - Use whitespace effectively to separate sections and reduce cognitive load.
     - **User Story:** As a user, I want collapsible menus that organize my items efficiently, to avoid feeling overwhelmed by too much information at once.

2. **Usability Enhancements:**
   - **Auto-Save Feature:**
     - Integrate an auto-save function for translation entries to prevent data loss if users forget to save manually.
     - **User Story:** As a user, I want an auto-save feature for my translations so that I don't lose any data if I forget to save manually.

   - **Progress Indicator:**
     - Add a progress indicator to show the percentage of menu items translated, helping users track their progress.
     - **User Story:** As a user, I want to see a progress indicator that shows how much of my menu has been translated, to manage my time effectively.

   - **User Assistance:**
     - Provide tooltips or a help icon with brief explanations next to features to assist new users and improve onboarding.
     - **User Story:** As a new user, I want tooltips or a help icon to understand how to use different features better, ensuring a smoother onboarding experience.

3. **Functionality Additions:**
   - **Preview Mode:**
     - Implement a preview mode that allows users to view the complete translated menu as it would appear to customers, facilitating quality checks.
     - **User Story:** As a user, I want a preview mode that shows me how the translated menu will look to customers, so I can ensure it's accurate and appealing.

   - **Language Expansion:**
     - Consider adding more language options to cater to a broader global audience, increasing the platform's utility.
     - **User Story:** As a restaurant manager, I want more language options available, so I can serve a diverse clientele effectively.

   - **Feedback Mechanism:**
     - Introduce a feedback option where users can suggest corrections or improvements for translations, promoting continuous improvement.
     - **User Story:** As a user, I want a feedback mechanism to suggest translation improvements, contributing to the platform's quality enhancement.

Prioritization:
1. **Auto-Save Feature** - High priority as it directly prevents data loss and enhances user peace of mind.
2. **Preview Mode** - High priority for ensuring translation quality and customer satisfaction.
3. **Progress Indicator** - Medium priority, as it aids user motivation and task management.
4. **Language Expansion** - Medium priority for broadening market reach and utility.
5. **Visual Appeal and Clutter Reduction** - Medium priority as they enhance user engagement and navigation.
6. **Feedback Mechanism** - Lower priority but valuable for platform improvement.
7. **User Assistance** - Lower priority, though beneficial for new users.

By implementing these improvements, the "Bitte" platform can significantly enhance its usability, aesthetic appeal, and functionality, making it a more robust tool for restaurant menu management and translation.