In [45]:
import sys
sys.path.append('..')
import constants.constants as const
import constants.file_handler_constants as fh
from constants.attraction_constants import *

from packages.attraction.Attraction import *
from packages.file_handler_package.file_handler import *

import os
import glob
import time
import pandas as pd
import numpy as np
import ast

import json
import requests
import google.generativeai as genai
from google.generativeai.types import ContentType
from PIL import Image
from IPython.display import Markdown

In [None]:
def getScorefromGeminiAPI(name:str, latitude:float, longitude:float, all_img_url:list[str]) -> dict:
    """
    Gets tag scores for a given attraction using the Gemini API.

    Args:
        name: Name of the attraction.
        latitude: Latitude of the attraction.
        longitude: Longitude of the attraction.
        all_img_url: List of image URLs of the attraction.

    Returns:
        Dictionary representing scores for all tags.
    """

    # create a 'temp' directory to store temporarily downloaded images, which will be used in requests to the Gemini API
    createDirectory(fh.STORE_FILL_ATTRACTION_TAG_SCORE, 'temp')

    for Idx, cur_url in enumerate(all_img_url):
        if(cur_url == ''):
            break
        response = requests.get(cur_url)
        if response.status_code == 200:
            filename = 'temp/temp_img_{0}.jpeg'.format(Idx)
            with open(filename, 'wb') as file:
                file.write(response.content)

    # send API request to retrieve the score for the current attraction (including a query and the main image).
    genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
    model = genai.GenerativeModel('gemini-1.5-flash-latest')
    # model = genai.GenerativeModel('gemini-1.5-pro-latest')
    
    text_prompt = "Provide place name, latitude, and longitude. I will return a JSON string containing scores (0-1) for following attributes(nothing else no other sentences)" + \
    "\nfor example: \'{\"Tourism\":0,\"Adventure\":0,\"Meditation\":0,\"Art\":0,\"Cultural\":0,\"Landscape\":0,\"Nature\":0,\"Historical\":0,\"Cityscape\":0,\"Beach\":0,\"Mountain\":0,\"Architecture\":0,\"Temple\":0,\"WalkingStreet\":0,\"Market\":0,\"Village\":0,\"NationalPark\":0,\"Diving\":0,\"Snuggle\":0,\"Waterfall\":0,\"Island\":0,\"Shopping\":0,\"Camping\":0,\"Fog\":0,\"Cycling\":0,\"Monument\":0,\"Zoo\":0,\"Waterpark\":0,\"Hiking\":0,\"Museum\":0,\"Riverside\":0,\"NightLife\":0,\"Family\":0,\"Kid\":0,\"Landmark\":0,\"Forest\":0}" + \
    "\n{0}, {1}, {2} give me score for this".format(name, latitude, longitude)

    # send a prompt to the model
    prompt = [text_prompt]
    for Idx, cur_path_img in enumerate(glob.glob(os.path.join(fh.STORE_FILL_ATTRACTION_TAG_SCORE, 'temp', '*.jpeg'))):
        # use a maximum of 3 images in the prompt to reduce token usage.
        if(Idx == 3):
            break
        cur_img_prompt = Image.open(cur_path_img)
        prompt.append(cur_img_prompt)
        
    print("total_tokens: ", model.count_tokens(prompt))
    
    res_score_dict = {}
    try:
        response = model.generate_content(prompt)
        # remove directory 'temp'
        removeNoneEmptyDir(os.path.join(fh.STORE_FILL_ATTRACTION_TAG_SCORE, 'temp'))
        res_start_Idx = response.text.find('{')
        res_end_Idx = response.text.find('}')
        res_score_dict =  json.loads(response.text[res_start_Idx:res_end_Idx+1])

    except Exception as e:
        # remove directory 'temp'
        removeNoneEmptyDir(os.path.join(fh.STORE_FILL_ATTRACTION_TAG_SCORE, 'temp'))
        print("failed to use gemini api")
    
    return res_score_dict

In [47]:
# use this for the first time (pull dataframe from  result of module 'merge_attracton_scraping')
# path_to_res_merge_attraction_phuket = os.path.join(fh.STORE_MERGE_ATTRACTION_SCRAPING, 'res_merge_attraction', 'res_merge_attraction_Phuket.csv')

# use this if there is result in module 'fill_attraction_score'
path_to_res_merge_attraction_phuket = os.path.join(fh.STORE_FILL_ATTRACTION_TAG_SCORE, 'res_merge_attraction', 'res_merge_attraction_Phuket.csv')

merge_attraction_phuket_df = pd.read_csv(path_to_res_merge_attraction_phuket)
merge_attraction_phuket_df['sum_tag_score'] = merge_attraction_phuket_df.loc[:, 'Tourism':'Forest'].sum(axis=1)
merge_attraction_phuket_df

Unnamed: 0,name,type,description,latitude,longitude,imgPath,phone,website,openingHour,address,...,Hiking,Museum,Riverside,NightLife,Family,Kid,Landmark,Forest,Fitness,sum_tag_score
0,เลิฟ ไดวิ่ง ภูเก็ต,['การดำน้ำลึกและการดำน้ำตื้น'],,7.893529,98.297264,['https://dynamic-media-cdn.tripadvisor.com/me...,,https://th.tripadvisor.com/Attraction_Review-g...,"{'จันทร์': '12:00-21:00', 'อังคาร': '12:00-21:...",V7VW+CW6 ตำบลป่าตอง อำเภอกะทู้ ภูเก็ต,...,0.0,0.0,0.0,0.0,0.2,0.2,0.0,0.0,,5.6
1,Andaman Sea Kayaks,['การพายเรือคายัคและการพายเรือแคนู'],,7.888660,98.380020,['https://dynamic-media-cdn.tripadvisor.com/me...,,https://th.tripadvisor.com/Attraction_Review-g...,"{'จันทร์': '08:00-20:00', 'อังคาร': '08:00-20:...",V9QJ+F26 ตำบล วิชิต อำเภอเมืองภูเก็ต ภูเก็ต,...,0.0,0.0,0.2,0.0,0.7,0.5,0.2,0.1,,6.2
2,The Junk Liveaboards,['การดำน้ำลึกและการดำน้ำตื้น'],,7.859957,98.352960,['https://dynamic-media-cdn.tripadvisor.com/me...,,https://th.tripadvisor.com/Attraction_Review-g...,"{'จันทร์': '09:00-17:00', 'อังคาร': '09:00-17:...",V953+X5P ตำบล ฉลอง อำเภอเมืองภูเก็ต ภูเก็ต,...,0.2,0.1,0.2,0.2,0.5,0.4,0.2,0.2,,10.2
3,ACDC Diving,['การดำน้ำลึกและการดำน้ำตื้น'],,7.840755,98.345090,['https://dynamic-media-cdn.tripadvisor.com/me...,,https://th.tripadvisor.com/Attraction_Review-g...,"{'จันทร์': '08:00-21:00', 'อังคาร': '08:00-21:...",R8RW+824 ตำบล ฉลอง อำเภอเมืองภูเก็ต ภูเก็ต,...,0.0,0.0,0.0,0.0,0.2,0.1,0.0,0.0,,4.4
4,พาราไดส์ ไดวิ่ง เอเซีย,['การดำน้ำลึกและการดำน้ำตื้น'],,8.087546,98.297670,['https://dynamic-media-cdn.tripadvisor.com/me...,,https://th.tripadvisor.com/Attraction_Review-g...,"{'จันทร์': '10:00-16:00', 'อังคาร': '10:00-16:...",37QX+237 ตำบลสาคู อำเภอถลาง ภูเก็ต,...,0.0,0.0,0.0,0.0,0.7,0.5,0.0,0.0,,7.2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
378,คันทรีบาร์ บางลา,['บาร์/คลับ'],,7.894073,98.297030,['https://dynamic-media-cdn.tripadvisor.com/me...,,https://th.tripadvisor.com/Attraction_Review-g...,"{'จันทร์': '19:00-02:00', 'อังคาร': '19:00-02:...",V7VW+JRC ตำบลป่าตอง อำเภอกะทู้ ภูเก็ต,...,0.0,0.0,0.0,0.7,0.0,0.0,0.0,0.0,,2.4
379,Kid's Club,['เกมและแหล่งรวมความบันเทิง'],ที่ฮอลิเดย์อินน์รีสอร์ทภูเก็ตหาดไม้ขาวมี Kid's...,7.996620,98.349390,['https://dynamic-media-cdn.tripadvisor.com/me...,,https://th.tripadvisor.com/Attraction_Review-g...,{},"81 หมู่ 3 ฮอลิเดย์ อินน์ รีสอร์ต ภูเก็ต, ไม้ขา...",...,0.0,0.0,0.0,0.8,0.7,0.9,0.0,0.0,,2.5
380,Yellow Clock Tower,['สถานที่สำคัญ/จุดที่น่าสนใจ'],,7.883170,98.395370,['https://dynamic-media-cdn.tripadvisor.com/me...,,https://th.tripadvisor.com/Attraction_Review-g...,{},"Intersection of Phuket Rd and, Phangnga Rd, Mu...",...,0.1,0.0,0.1,0.4,0.1,0.1,0.4,0.0,,4.2
381,The Garden Phuket Luxury Mall,['ศูนย์การค้า'],เดอะ การ์เด้น : ภูเก็ต ลักชัวรี่ มอลล์ ศูนย์กา...,8.015935,98.337840,['https://dynamic-media-cdn.tripadvisor.com/me...,,https://th.tripadvisor.com/Attraction_Review-g...,"{'จันทร์': '09:00-18:00', 'อังคาร': '09:00-18:...","124/1 หมู่ 1 ถ.เทพกษัตรี, เมืองภูเก็ต, จังหวัด...",...,0.0,0.2,0.0,0.0,0.3,0.3,0.2,0.0,,4.8


In [None]:
cnt_debug = 0
for Idx, cur_row in merge_attraction_phuket_df.iterrows():
    
    if(cur_row['sum_tag_score'] != 0):
        continue
    
    res_score_dict = getScorefromGeminiAPI(
        name = cur_row['name'],
        latitude = cur_row['latitude'],
        longitude = cur_row['longitude'],
        all_img_url = ast.literal_eval(cur_row['imgPath'])
    )

    if(not len(res_score_dict)):
        print("failed at name --> ", cur_row['name'])
        break
    
    print(cur_row['name'])
    print("check res_score_dict")
    print(res_score_dict)

    for key, value in res_score_dict.items():
        merge_attraction_phuket_df.loc[Idx, key] = value
    
    cnt_debug += 1

check IDx :  328
Directory temp created successfully
total_tokens:  total_tokens: 477

can not remove C:\Users\user\git\CEPPWebScraping\fill_attractionTag_score\temp
prn response : 
response:
GenerateContentResponse(
    done=True,
    iterator=None,
    result=protos.GenerateContentResponse({
      "candidates": [
        {
          "content": {
            "parts": [
              {
                "text": "```json\n{\"Tourism\":0.8,\"Adventure\":0.2,\"Meditation\":0.1,\"Art\":0.1,\"Cultural\":0.2,\"Landscape\":0.3,\"Nature\":0.6,\"Historical\":0.1,\"Cityscape\":0.0,\"Beach\":0.9,\"Mountain\":0.0,\"Architecture\":0.2,\"Temple\":0.0,\"WalkingStreet\":0.1,\"Market\":0.0,\"Village\":0.0,\"NationalPark\":0.0,\"Diving\":0.0,\"Snuggle\":0.1,\"Waterfall\":0.0,\"Island\":0.8,\"Shopping\":0.1,\"Camping\":0.0,\"Fog\":0.0,\"Cycling\":0.0,\"Monument\":0.0,\"Zoo\":0.0,\"Waterpark\":0.0,\"Hiking\":0.0,\"Museum\":0.0,\"Riverside\":0.0,\"NightLife\":0.3,\"Family\":0.3,\"Kid\":0.2,\"Landmark\":0.1,\

In [49]:
merge_attraction_phuket_df.drop(columns=['sum_tag_score'], inplace=True)
merge_attraction_phuket_df

Unnamed: 0,name,type,description,latitude,longitude,imgPath,phone,website,openingHour,address,...,Waterpark,Hiking,Museum,Riverside,NightLife,Family,Kid,Landmark,Forest,Fitness
0,เลิฟ ไดวิ่ง ภูเก็ต,['การดำน้ำลึกและการดำน้ำตื้น'],,7.893529,98.297264,['https://dynamic-media-cdn.tripadvisor.com/me...,,https://th.tripadvisor.com/Attraction_Review-g...,"{'จันทร์': '12:00-21:00', 'อังคาร': '12:00-21:...",V7VW+CW6 ตำบลป่าตอง อำเภอกะทู้ ภูเก็ต,...,0.0,0.0,0.0,0.0,0.0,0.2,0.2,0.0,0.0,
1,Andaman Sea Kayaks,['การพายเรือคายัคและการพายเรือแคนู'],,7.888660,98.380020,['https://dynamic-media-cdn.tripadvisor.com/me...,,https://th.tripadvisor.com/Attraction_Review-g...,"{'จันทร์': '08:00-20:00', 'อังคาร': '08:00-20:...",V9QJ+F26 ตำบล วิชิต อำเภอเมืองภูเก็ต ภูเก็ต,...,0.0,0.0,0.0,0.2,0.0,0.7,0.5,0.2,0.1,
2,The Junk Liveaboards,['การดำน้ำลึกและการดำน้ำตื้น'],,7.859957,98.352960,['https://dynamic-media-cdn.tripadvisor.com/me...,,https://th.tripadvisor.com/Attraction_Review-g...,"{'จันทร์': '09:00-17:00', 'อังคาร': '09:00-17:...",V953+X5P ตำบล ฉลอง อำเภอเมืองภูเก็ต ภูเก็ต,...,0.0,0.2,0.1,0.2,0.2,0.5,0.4,0.2,0.2,
3,ACDC Diving,['การดำน้ำลึกและการดำน้ำตื้น'],,7.840755,98.345090,['https://dynamic-media-cdn.tripadvisor.com/me...,,https://th.tripadvisor.com/Attraction_Review-g...,"{'จันทร์': '08:00-21:00', 'อังคาร': '08:00-21:...",R8RW+824 ตำบล ฉลอง อำเภอเมืองภูเก็ต ภูเก็ต,...,0.0,0.0,0.0,0.0,0.0,0.2,0.1,0.0,0.0,
4,พาราไดส์ ไดวิ่ง เอเซีย,['การดำน้ำลึกและการดำน้ำตื้น'],,8.087546,98.297670,['https://dynamic-media-cdn.tripadvisor.com/me...,,https://th.tripadvisor.com/Attraction_Review-g...,"{'จันทร์': '10:00-16:00', 'อังคาร': '10:00-16:...",37QX+237 ตำบลสาคู อำเภอถลาง ภูเก็ต,...,0.0,0.0,0.0,0.0,0.0,0.7,0.5,0.0,0.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
378,คันทรีบาร์ บางลา,['บาร์/คลับ'],,7.894073,98.297030,['https://dynamic-media-cdn.tripadvisor.com/me...,,https://th.tripadvisor.com/Attraction_Review-g...,"{'จันทร์': '19:00-02:00', 'อังคาร': '19:00-02:...",V7VW+JRC ตำบลป่าตอง อำเภอกะทู้ ภูเก็ต,...,0.0,0.0,0.0,0.0,0.7,0.0,0.0,0.0,0.0,
379,Kid's Club,['เกมและแหล่งรวมความบันเทิง'],ที่ฮอลิเดย์อินน์รีสอร์ทภูเก็ตหาดไม้ขาวมี Kid's...,7.996620,98.349390,['https://dynamic-media-cdn.tripadvisor.com/me...,,https://th.tripadvisor.com/Attraction_Review-g...,{},"81 หมู่ 3 ฮอลิเดย์ อินน์ รีสอร์ต ภูเก็ต, ไม้ขา...",...,0.0,0.0,0.0,0.0,0.8,0.7,0.9,0.0,0.0,
380,Yellow Clock Tower,['สถานที่สำคัญ/จุดที่น่าสนใจ'],,7.883170,98.395370,['https://dynamic-media-cdn.tripadvisor.com/me...,,https://th.tripadvisor.com/Attraction_Review-g...,{},"Intersection of Phuket Rd and, Phangnga Rd, Mu...",...,0.0,0.1,0.0,0.1,0.4,0.1,0.1,0.4,0.0,
381,The Garden Phuket Luxury Mall,['ศูนย์การค้า'],เดอะ การ์เด้น : ภูเก็ต ลักชัวรี่ มอลล์ ศูนย์กา...,8.015935,98.337840,['https://dynamic-media-cdn.tripadvisor.com/me...,,https://th.tripadvisor.com/Attraction_Review-g...,"{'จันทร์': '09:00-18:00', 'อังคาร': '09:00-18:...","124/1 หมู่ 1 ถ.เทพกษัตรี, เมืองภูเก็ต, จังหวัด...",...,0.0,0.0,0.2,0.0,0.0,0.3,0.3,0.2,0.0,


In [50]:
merge_attraction_phuket_df.loc[3]

name                                                       ACDC Diving
type                                    ['การดำน้ำลึกและการดำน้ำตื้น']
description                                                        NaN
latitude                                                      7.840755
longitude                                                     98.34509
imgPath              ['https://dynamic-media-cdn.tripadvisor.com/me...
phone                                                              NaN
website              https://th.tripadvisor.com/Attraction_Review-g...
openingHour          {'จันทร์': '08:00-21:00', 'อังคาร': '08:00-21:...
address                     R8RW+824 ตำบล ฉลอง อำเภอเมืองภูเก็ต ภูเก็ต
province                                                        ภูเก็ต
district                                                   เมืองภูเก็ต
subDistrict                                                        NaN
province_code                                                       83
distri

In [51]:
createDirectory(fh.STORE_FILL_ATTRACTION_TAG_SCORE, 'res_merge_attraction')

res_path = os.path.join(fh.STORE_FILL_ATTRACTION_TAG_SCORE, 'res_merge_attraction', 'res_merge_attraction_Phuket.csv') 
merge_attraction_phuket_df.set_index(['name'], inplace=True)
merge_attraction_phuket_df.to_csv(res_path, encoding="utf-8")

Directory res_merge_attraction created successfully
