# Google Image
This notebook is meant to create a agent that searches for google images based on a prompt, then returns the links to those images plus a brief description of those images.

In [63]:
from langchain.agents import AgentType, Tool, AgentExecutor, create_tool_calling_agent
from langchain_community.utilities import GoogleSerperAPIWrapper
from langchain import hub
from langchain_openai import ChatOpenAI
from langchain.tools import tool
from typing import List, Dict, Any
from openai import AsyncOpenAI
import asyncio
from nest_asyncio import apply
apply()

from dotenv import load_dotenv
load_dotenv()

True

In [64]:
agent_prompt = """
You are agent in charge of finding good images for lecture slides given the topic of the slide.
You have access to google images.

Remember, the images should be relevant to the topic and helpful for the students to understand the topic better.

First find four images that you think are good for the given topic.
Next, use get_descriptions tool to get the descriptions of the images.

"""

In [65]:
@tool
def get_descriptions(prompt: str ,images: List[str]) -> str:
    """
    This function takes in a list of images and a prompt, and explains why the image is relevant to the prompt.
    """
    client = AsyncOpenAI()
    async def get_one_description(image: str) -> str:
        response = await client.chat.completions.create(
            model="gpt-4-turbo",
            messages=[
                {
                    "role": "user",
                    "content": [
                        {"type": "text", "text": "Explain the contents of this image and how it is relevant to the prompt in two sentences."},
                        {
                            "type": "image_url",
                            "image_url": {
                                "url": image,
                            },
                        },
                    ],
                }
            ],
            max_tokens=300,
        )

        return(response.choices[0].message.content)
    
    image_descriptions = asyncio.run(asyncio.gather(*[get_one_description(image) for image in images]))
    
    # Link image urls with descriptions
    image_descriptions = [f"{image}: {description}" for image, description in zip(images, image_descriptions)]
    return str(image_descriptions)

In [66]:
prompt = hub.pull("hwchase17/openai-tools-agent")
prompt.messages[0].prompt.template = agent_prompt
llm = ChatOpenAI(temperature=0, model_name="gpt-4-turbo")
search = GoogleSerperAPIWrapper(type="images")
tools = [
    Tool(
        name="image_search",
        func=search.results,
        description="Useful for finding images",
    ),
    get_descriptions,
    
]

agent = create_tool_calling_agent(llm, tools, prompt)
executor = AgentExecutor(agent=agent, tools=tools, verbose=True).with_config(
    {"run_name": "Assistant"}
)

In [71]:
topic = "Slide on stacks in data structures"

response = await executor.ainvoke({
    "input": topic,
})
print(response['output'])



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `image_search` with `stack data structure diagram`


[0m[32;1m[1;3m
Invoking: `image_search` with `stack operations push and pop`


[0m[32;1m[1;3m
Invoking: `image_search` with `LIFO (Last In First Out) concept`


[0m[32;1m[1;3m
Invoking: `image_search` with `real-life example of stack data structure`


[0m[36;1m[1;3m{'searchParameters': {'q': 'stack operations push and pop', 'gl': 'us', 'hl': 'en', 'type': 'images', 'num': 10, 'engine': 'google'}, 'images': [{'title': 'Explain stack operations PUSH and POP with examples ...', 'imageUrl': 'https://homework.study.com/cimages/multimages/16/stack_push_operation2294814496370743228.jpg', 'imageWidth': 400, 'imageHeight': 246, 'thumbnailUrl': 'https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcQdGi8LA93QuNL56jy7xrDBvjl9v5gquCxRCTwK-7YKHTgqNk6b&s', 'thumbnailWidth': 286, 'thumbnailHeight': 176, 'source': 'Homework.Study.com', 'domain': 'homework.study.com',

In [74]:
num_images = 3

client = AsyncOpenAI()

output_format = """
Return in the following json format:

{
    images:[
        {
            src: "image_url",
            description: "description of the image"
        },
        {
            ...
        }
    ]
}
"""

images = await client.chat.completions.create(
    model="gpt-4-turbo",
    response_format={"type": "json_object"},
    messages=[
        {
            "role": "user",
            "content": f"Images: {response['output']} \n\n Topic: {topic} \n\n From the images above, please select {num_images} images that you think are good for the given topic. \n\n {output_format}"
        }
    ],
    max_tokens=2000,
)

print(images.choices[0].message.content)

{
    "images":[
        {
            "src": "https://media.geeksforgeeks.org/wp-content/cdn-uploads/20230726165552/Stack-Data-Structure.png",
            "description": "This image illustrates the concept of a stack data structure, showing the 'push' operation where an element 'C' is added to the top of a stack containing elements 'A' and 'B', and the 'pop' operation where the top element 'C' is removed from the stack. It effectively visualizes the Last-In-First-Out (LIFO) principle of the stack, where the last element added is the first one to be removed, relevant to understanding basic data structures in computer science."
        },
        {
            "src": "https://homework.study.com/cimages/multimages/16/stack_push_operation2294814496370743228.jpg",
            "description": "The image illustrates a 'Push Operation' in a data structure known as a stack, showing how a new element ('E') is added to the top of the stack. On the left side, element 'E' is outside the stack; afte