In [2]:
! pip install opencc
! pip install -U -q google.generativeai
! pip install gradio


Collecting opencc
  Downloading OpenCC-1.1.7-cp310-cp310-manylinux1_x86_64.whl (779 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m779.8/779.8 kB[0m [31m3.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: opencc
Successfully installed opencc-1.1.7
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m146.8/146.8 kB[0m [31m1.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.5/664.5 kB[0m [31m7.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting gradio
  Downloading gradio-4.27.0-py3-none-any.whl (17.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.1/17.1 MB[0m [31m44.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl (15 kB)
Collecting fastapi (from gradio)
  Downloading fastapi-0.110.2-py3-none-any.whl (91 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m91.9/91

In [1]:
!pip install 'llama-index==0.9.37' 'google-generativeai==0.3.2' matplotlib qdrant_client



In [18]:
#test image url:https://img.onl/8mQXU4
#test poem= 野幕蔽瓊筵，羌戎賀勞旋。
from opencc import OpenCC
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import google.generativeai as genai
import google.ai.generativelanguage as glm
from google.colab import userdata
from IPython.display import Markdown #用來印出data的

# model configuring
API_KEY=userdata.get('API_KEY')
genai.configure(api_key=API_KEY)
model = genai.GenerativeModel('gemini-pro')

# scraping peom data
def scrape_poem_data():
  import requests
  from bs4 import BeautifulSoup
  num=["001","034","041","070","083","163","216","217","246","254","305"]
  data_store=""
  converter = OpenCC('s2twp')
  for i in range(1,12):
      if i<10:
          url = f"https://www.millionbook.net/gd/h/hengtangtuishi/tssb/00{i}.htm#{num[i-1]}"
      else:
          url = f"https://www.millionbook.net/gd/h/hengtangtuishi/tssb/0{i}.htm#{num[i-1]}"
      response = requests.get(url)
      response.encoding = 'big5'
      content = response.text
      result = BeautifulSoup(content, "html.parser")
      data=result.find_all("span",{"class":"swy1"})
      for i in data:
        t=converter.convert(i.text)
        data_store+=t
  lst=data_store.split("=============================")
  documents = lst[1:]
  documents = [s.replace('\r', '').replace('\n', '') for s in documents]
  return documents

#prompt templete
def make_prompt(query, relevant_passage):
  escaped = relevant_passage.replace("'", "").replace('"', "").replace("\n", " ")
  prompt = ("""你現在是一個中文學習者的小幫手，你需要根據下方提供的段落幫忙回答有關唐詩的問題。請盡量以口語、好懂的方式表達，這樣可以幫助學習者更容易理解。
  如果提供的段落無關問題你可以忽略。
  QUESTION: '{query}'
  PASSAGE: '{relevant_passage}'

    ANSWER:
  """).format(query=query, relevant_passage=escaped)

  return prompt

# CV
def read_image(url):
  from llama_index.multi_modal_llms.gemini import GeminiMultiModal

  from llama_index.multi_modal_llms.generic_utils import (
      load_image_urls,)
  import os
  from google.colab import userdata
  #Setup GEMINI API KEY in Environment variable of Runtime
  # add your GOOGLE API key here
  GOOGLE_API_KEY = userdata.get('GOOGLE_API_KEY')
  os.environ["GOOGLE_API_KEY"] = GOOGLE_API_KEY
  image_urls = [url]
  image_documents = load_image_urls(image_urls)
  gemini_pro = GeminiMultiModal(model_name="models/gemini-pro-vision")
  complete_response = gemini_pro.complete(

      prompt="請讀出圖片中的文字，這是一首詩。",
      image_documents=image_documents,
  )
  complete_response=str(complete_response.text).replace("\n","，").replace(" ","")
  return complete_response

# main
searching_choice=input("input \"1\" if you want to insert a poem with text or input \"2\" if you wanna upload a image")
if searching_choice == str(1):
  search_poem=input("請輸入想要查找的詩。Please input a poem you want to search")
  language=input("the language you want to translate to:")
  query = f"請給我這首詩的詩名、作者、解析以及這首詩想表達的意義，除了以繁體中文表示外，請同時將所有的內容翻譯成{language}。:{search_poem}"

  #find the most relevant passage in the documents
  # Convert the given passage and list of passages into TF-IDF vectors
  documents= scrape_poem_data()
  vectorizer = TfidfVectorizer()
  tfidf_matrix = vectorizer.fit_transform(documents + [search_poem])

  # Calculate cosine similarity between the given passage and each passage in the list
  similarities = cosine_similarity(tfidf_matrix[-1], tfidf_matrix[:-1])

  # Find the index of the most relevant passage
  most_relevant_index = similarities.argmax()

  # Print the most relevant passage
  print("Most relevant passage:")
  print(documents[most_relevant_index])

  #pass a query to prompt
  prompt = make_prompt(query, documents[most_relevant_index])

  #use the generate_content method to generate a reponse from the model base on the prompt
  model = genai.GenerativeModel('gemini-pro')
  answer = model.generate_content(prompt)
  print(answer.text)

else:
  search_poem=input("上傳包含詩詞的圖片的URL。Please upload an URL of an image including a poem")
  poem=read_image(search_poem)
  language=input("the language you want to translate to:")
  query = f"請給我這首詩的詩名、作者、解析以及這首詩想表達的意義，除了以繁體中文表示外，請同時將所有的內容翻譯成{language}。:{poem}"

  #find the most relevant passage in the documents
  # Convert the given passage and list of passages into TF-IDF vectors
  documents= scrape_poem_data()
  vectorizer = TfidfVectorizer()
  tfidf_matrix = vectorizer.fit_transform(documents + [poem])

  # Calculate cosine similarity between the given passage and each passage in the list
  similarities = cosine_similarity(tfidf_matrix[-1], tfidf_matrix[:-1])

  # Find the index of the most relevant passage
  most_relevant_index = similarities.argmax()

  # Print the most relevant passage
  print("Most relevant passage:")
  print(documents[most_relevant_index])

  #pass a query to prompt
  prompt = make_prompt(query, documents[most_relevant_index])

  #use the generate_content method to generate a reponse from the model base on the prompt
  model = genai.GenerativeModel('gemini-pro')
  answer = model.generate_content(prompt)
  print(answer.text)





input "1" if you want to insert a poem with text or input "2" if you wanna upload a image2
上傳包含詩詞的圖片的URL。Please upload an URL of an image including a poemhttps://img.onl/8mQXU4
the language you want to translate to:泰文
Most relevant passage:
《登鸛雀樓》作者：王之渙白日依山盡，黃河入海流。欲窮千里目，更上一層樓。【註解】：１、鸛雀樓：在今山西省蒲縣西南，傳說鸛雀經常棲息於此。【韻譯】：夕陽依傍著西山慢慢地沉沒，滔滔黃河朝著東海洶湧奔流。若想把千里的風光景物看夠，那就要登上更高的一層城樓。【評析】：�U�U這是一首登高望遠詩。寥寥數語，把景色寫得浩瀚壯闊，氣魄雄渾，放眼宇宙之無限，寓寄哲理之深沉。詩的兩聯皆用對仗，而且對得順乎自然，氣勢充沛，浩大無邊，渾然天成。“欲窮千里目，更上一層樓”，被作為追求理想境界的座右銘，遺芳千古。
**詩名**：登鸛雀樓

**作者**：王之渙

**解析**：

* 夕陽慢慢落到西山上。
* 滔滔黃河向著大海奔流。
* 如果想要看到更遠的地方，那就登上更高的樓層吧。

**這首詩想表達的意義**：

這首詩表達了詩人登上高樓後，看到壯闊的景色，激發他想要追求更高、更遠的理想。

**泰文翻譯**：

**ชื่อบทกวี**： เติ้งกุ้นเฉวียโหลว

**ผู้แต่ง**： หวังจื้อฮวน

**การวิเคราะห์**：

* พระอาทิตย์ตกดินอยู่หลังภูเขา
* แม่น้ำฮวงโหไหลลงทะเล
* หากต้องการชมทัศนียภาพที่ไกลออกไป จึงต้องขึ้นไปยังชั้นบน

**ความหมายของบทกวี**：

บทกวีนี้สื่อถึงกวีที่ขึ้นไปบนหอคอยที่สูงแล้วมองเห็นทัศนียภาพอันกว้างใหญ่ สิ่งนี้ทำให้เขาได้รับแรงบันดาลใจให้แสวงหาอุดมคติที่สูงขึ้นและไกลออกไ