In [None]:
import os
import pandas as pd
import textwrap
import google.generativeai as genai
from dotenv import load_dotenv
from IPython.display import display
from IPython.display import Markdown

In [None]:
# load the environment variables
load_dotenv()
# fetch an environment variable.
GOOGLE_API_KEY = os.getenv('GOOGLE_API_KEY')

genai.configure(api_key=GOOGLE_API_KEY)

LLM 改寫 Query

In [None]:
# prompt for the User imput
def make_prompt_user(query):
    prompt = ("""
                You are excel at extracting main points from a paragraph and further generating keywords for googling. 
                Your job is to discern the users' intent from the paragraph and formulate keywords in both input language and English, 
                producing at least 5 distinct results seperatly.
                These keywords are for getting most relevant results from Google search.
                Optimizing these keywords according to the rules of each different language systems.
                If users' intent is not clear, you should ask for more information.
                If the user requests information unrelated to AI tool recommendations, return 
                "Sorry, I can only search for information about AI tools".
                For information related to tutorials, append Medium, Reddit, Youtube, Wiki or other reputable journalism sites to the 
                end of the search query. Ensure that the search results of queries are relevant to the paragraph and 
                include tool tutorials. Finally, return the finalized keywords in following type.
                QUESTION: '{query}'
                ANSWER: keyword1\nkeyword2\nkeyword3
                """).format(query=query)
    return prompt

In [None]:
# start with gemini model
model = genai.GenerativeModel('gemini-1.5-pro-latest')

# prompt = "我需要一個AI工具來幫助我安排行程，需要詳細的教學，有什麼推薦的工具嗎？最好是免費的工具"
# prompt = "我要去韓國玩五天，需要一個可以幫我安排行程的AI工具，有什麼推薦的工具嗎？須附上使用步驟，最好是免費的"
prompt = "I need an AI tool to help me plan my trip. I need detailed tutorials. Any recommended tools? Preferably free tools."

response = model.generate_content(
    make_prompt_user(prompt),
    generation_config={
        'temperature': 0.5,
        'top_k': 90,
    }
)

In [None]:
response.text

LLM 將爬蟲結果整理成 markdown 格式

In [None]:
# libraries for web scraping
import requests
from bs4 import BeautifulSoup

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options

Web crawler

In [None]:
# prompt for turn summary of website info into markdown file
def make_prompt_mdfile(query):
    prompt = ("""
                You are good at summarizing information of the website and return the results in markdown format.
                A markdown document should outline the main features of the answer for users. 
                The document should be structured as follows:
                Title: Use an H1 tag (#) for the title.
                Use H3 tags (###) for the sections and H4 tags (####) for sub-sections.
                Sections should be clearly labeled and easy to navigate.
                Section 1. Introduction
                Provide a brief overview of the user queries and information of the results.
                Section 2. Recommend List
                Each feature should be a bullet point, and any sub-features should be sub-bullets.
                Section 3. Installation Guide
                The steps for installation should be listed in a numbered list. 
                If there are any code snippets required for installation, please include them using code blocks.
                Section 4. Usage
                This section should explain how to use the software product. 
                Any steps should be in a numbered list, and include code snippets where necessary.
                Section 5. FAQ
                Each question should be a bullet point with an H5 tag (#####) for the question, and the answer should be written directly below the question without any special formatting.

                Please ensure that the document is easy to read and navigate, with clear headings and subheadings, bullet points for key points, and numbered lists for any step-by-step instructions."
                Website_html: '{query}'
                ANSWER:
                """).format(query=query)
    return prompt

In [None]:
# 從關鍵字搜尋網頁
def search_webpage(query):
    url = 'https://www.google.com/search?q=' + query
    return url

# 從給定的URL抓取網頁內容
def fetch_webpage(url):
    response = requests.get(url)
    response.raise_for_status()
    return response.text

# 從HTML中解析內容
def parse_html(html):
    soup = BeautifulSoup(html, 'html.parser')

    content = ' '.join([p.text for p in soup.find_all('p')])
    content = soup.title.string + ' ' + content
    return content

def web_to_md(url):
    html = fetch_webpage(url)
    content = parse_html(html)
    response = model.generate_content(
        make_prompt_mdfile(content),
        generation_config={
            'temperature': 0.5,
            'top_k': 50,
            'max_output_tokens': 1280
        }
    )
    return response.text

In [None]:
# options = Options()
# options.binary_location = '../work/'
# driver = webdriver.Chrome(options = options)

# driver.get(search_webpage('AI tools for trip planning tutorial'))

# # 等待页面加载
# wait = WebDriverWait(driver, 10)

In [None]:
res_md = web_to_md('https://www.geeksforgeeks.org/ai-chatbots-for-travel-planning/')