In [9]:
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.messages import HumanMessage
from langchain_core.prompts.image import ImagePromptTemplate
from langchain.prompts import PromptTemplate
from langchain_core.prompt_values import ImageURL
from langchain_core.pydantic_v1 import BaseModel, Field, validator
from langchain.output_parsers import PydanticOutputParser

GOOGLE_API_KEY = ""

SyntaxError: unterminated string literal (detected at line 9) (1209622849.py, line 9)

In [2]:
class TagsForAspects(BaseModel):
    
    room_sentiment: list[str] = Field(..., description = "The sentiments of the room");
    
    room_color: list[str] = Field(..., description = "The overall atmospheres that depicted to a color");
    
    room_size: list[int] = Field(..., description = "room size expressed in square meters", enum = list(range(1, 100)));
    
    housing_type: list[str] = Field(..., description = "The type of housing",
                              enum = ['studio', 'apartment', 'house']);
    
    room_type: list[str] = Field(..., description = "The type of room",
                           enum = ['living room', 'kitchen', 'home office', 'bedroom', 'bathroom', 'dining room', 'office', 'garage', 'basement', 'attic', 'laundry room', 'pantry', 'family room', 'foyer']);


class SentenceToAspect:

    def __init__(self):

        self.GOOGLE_API_KEY = GOOGLE_API_KEY
        self.llm = ChatGoogleGenerativeAI(model = 'gemini-pro', google_api_key = self.GOOGLE_API_KEY,
                                         temperature = 0)

        self.parser = PydanticOutputParser(pydantic_object = TagsForAspects)
        self.prompt = PromptTemplate(
            template = """Answer the user query. \n {format_instructions}\n{query}\n
            
            1. You must extract the formatted aspect from each word or keyword within query sentence, rather than the sentence as a whole.
            2. Please concentrate metric information if it is considerated in the original human message.
            3. If you don't think that there is no appropriate words for those aspects, you must return the word 'none' to the aspect
            """,
            input_variables = ["query"],
            partial_variables = {"format_instructions" : self.parser.get_format_instructions()}
        )

    def query(self, query_sentence: str):

        chain = self.prompt | self.llm | self.parser
        return chain.invoke({"query" : query_sentence})


class ImageToAspect(SentenceToAspect):

    def __init__(self):
        SentenceToAspect.__init__(self)
        
        self.GOOGLE_API_KEY = GOOGLE_API_KEY
        self.lmm = ChatGoogleGenerativeAI(model = 'gemini-pro-vision', google_api_key = self.GOOGLE_API_KEY, temporary = 0, top_p = 0.5)

    def query_room_img(self, url:str = None):

        self.message = HumanMessage(
            content = [
                {'type': 'text',
                 'text': """Here is how you must depict an image of a room in English, making sure to include the following elements:
                 
                 1. Include at least one word that conveys the sentiment of the room.
                 If there are multiple sentiments that describe the image, feel free to include those all.
                 **You must suggest at least two sentiment terms, at most five.**
                 2. Describe the overall color scheme of the room.
                 **You must suggest at least two color terms, at most five.**
                 3. The more depiction of sentiment is suggested, the better.
                 4. Estimate how much size of the room is fit to the item **numerically**.
                 **The unit must be in integer square meters**.
                 5. Determine whether the room belongs to an apartment, studio, or house, and specify which one.
                 6. Guess which part of the house the room might be, such as the living room, bedroom, etc."""}
                ,
                {'type' : 'image_url',
                 'image_url' : url}
            ]           
        )
        
        self.return_sentence_img = self.lmm.invoke([self.message])
        return self.query(query_sentence = self.return_sentence_img.content)

    def query_item_img(self, url:str = None):

        self.message = HumanMessage(
            content = [
                {'type': 'text',
                 'text': """Here is how you must depict an image of an item in English. **you must think a room and housing related to the item**.
                 making sure to include the following elements:
                 
                 1. You must use sentiment words that conveys the sentiment of the room.
                 **You must suggest at least two sentiment terms, at most five.**
                 2. Recommend **background colors** that match the color of the item.
                 **You must not depict the color of the item itself!**
                 **You must suggest at least two color terms, at most five.**
                 3. The more depiction of color and sentiment is suggested, the better.
                 4. Estimate how much size of the room is fit to the item **numerically**.
                 **The unit must be in integer square meters**.
                 5. Determine whether the item matches a room belongs to an apartment, studio, or house, and specify which one.
                 6. Guess which part of the house the room might be, such as the living room, bedroom, etc."""}
                ,
                {'type' : 'image_url',
                 'image_url' : url}
            ]           
        )
        
        self.return_sentence_img = self.lmm.invoke([self.message])
        return self.query(query_sentence = self.return_sentence_img.content)


In [3]:
ita = ImageToAspect()
output_ita_room = ita.query_room_img(url = 'https://image.ohou.se/i/bucketplace-v2-development/uploads/cards/snapshots/169087905318351049.jpeg')


In [4]:
output_ita_room

TagsForAspects(room_sentiment=['bright', 'airy', 'peaceful', 'tranquil'], room_color=['warm', 'neutral', 'white'], room_size=[20], housing_type=['apartment'], room_type=['living room'])

In [5]:
output_ita_item = ita.query_item_img(url = 'https://yekun.com/data/goodsImages/ADD_GOODS1_159832306020211231164545.jpg')

In [6]:
output_ita_item

TagsForAspects(room_sentiment=['warm', 'inviting', 'comfortable', 'stylish', 'versatile'], room_color=['white', 'light gray', 'beige', 'pale yellow', 'light blue'], room_size=[12], housing_type=[], room_type=['living room', 'dining room', 'bedroom', 'home office'])

In [7]:
output_ita_item = ita.query_item_img(url = 'https://i.namu.wiki/i/u8Qv8l2kwXCT7K_M5Vt2c_i_-O6UwVK470gwec4GcarM2KKRz5vV6SPylgG82Y2V6hvbLIaSnSGk5SF6Q7P3IQ.webp')

In [8]:
output_ita_item

TagsForAspects(room_sentiment=['Modern', 'Elegant', 'Simple'], room_color=['White', 'Gray', 'Beige'], room_size=[20], housing_type=[], room_type=['Living room', 'Office'])