In [1]:
!pip install sagemaker boto3 langchain langchain-community pypdf faiss-cpu --quiet

[0m

In [3]:
!mkdir forex_report/
!curl https://www.fubon.com/banking/document/Corporate/Financial_Market/TW/Weekly%20Review%20and%20Outlook%20for%202024%205%2020-5%2024.pdf -o ./forex_report/qa.pdf

mkdir: cannot create directory ‘forex_report/’: File exists
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  670k  100  670k    0     0   368k      0  0:00:01  0:00:01 --:--:--  368k


In [4]:
import json
from typing import Dict
from langchain_community.llms import SagemakerEndpoint
from langchain_community.llms.sagemaker_endpoint import LLMContentHandler

class ContentHandler(LLMContentHandler):
    content_type = "application/json"
    accepts = "application/json"

    def transform_input(self, prompt: str, model_kwargs: Dict) -> bytes:
        input_str = json.dumps({"inputs": prompt, "parameters": model_kwargs})
        return input_str.encode("utf-8")

    def transform_output(self, output: bytes) -> str:
        response_json = json.loads(output.read().decode("utf-8"))
        return response_json[0]["generated_text"]
    
content_handler = ContentHandler()

In [5]:
from langchain_community.embeddings import SagemakerEndpointEmbeddings
from langchain_community.embeddings.sagemaker_endpoint import EmbeddingsContentHandler

class EmbeddingsHandler(EmbeddingsContentHandler):
    content_type = "application/json"
    accepts = "application/json"

    def transform_input(self, prompt: str, model_kwargs={}) -> bytes:
        input_str = json.dumps({"queries": prompt, "mode": "nn_train_data", **model_kwargs})
        return input_str.encode("utf-8")

    def transform_output(self, output: bytes) -> str:
        response_json = json.loads(output.read().decode("utf-8"))
        embeddings = [[x[0]["score"]] for x in response_json]
        return embeddings

embeddings_handler = EmbeddingsHandler()
    
embeddings = SagemakerEndpointEmbeddings(
    endpoint_name='jumpstart-dft-multilingual-e5-base',
    region_name='us-west-2',
    content_handler=embeddings_handler
)

In [6]:
from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma, FAISS

loader = PyPDFDirectoryLoader('./forex_report/')
documents = loader.load()

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=200,
    chunk_overlap=0
)

docs = text_splitter.split_documents(documents)

In [7]:
len(docs)

42

In [8]:
db = FAISS.from_documents(docs, embeddings)
db.save_local("qa.db")

In [9]:
from langchain_core.prompts import PromptTemplate
from langchain.chains.question_answering import load_qa_chain

prompt_template = "Search Content:\n{context}\nQuestion:{question}:\nAnswer:"

PROMPT = PromptTemplate(template=prompt_template, input_variables=['context', 'question'])

chain = load_qa_chain(
    llm = SagemakerEndpoint(
        endpoint_name="jumpstart-dft-meta-textgeneration-llama-2-7b-f",
        region_name="us-west-2",
        content_handler=content_handler
    ),
    prompt=PROMPT
)

In [10]:
query = "美元/日圓"
search_results = db.similarity_search(query, k=3)

result = chain.run(input_documents=search_results, question=query)
print(result)

  warn_deprecated(


Search Content:
2. 截至 5/16，5月以來外資僅 2日賣超台股，累計淨買超約 44.76億美元，台股
加權指數於 5/16創新高、達 21516，自 5/13 -5/16，台股加權指數上漲 2.88%，
所有類股齊漲，台積電財報亮眼， 權重約 64%資訊科技類股上漲 3.86%，健護、
房地產類股領漲 5.74%、4.9%，房市熱度升溫、獲利前景樂觀，營建類股回神。

6  
 美元 /日圓   
本週區間： 153.60 -156.74  
下週預估區間： 153.90 -157.00  收盤價： 155.39  
方向：→ 
日本 10年公債殖利率   
本週區間： 0.9090% -0.9740%  
下週預估區間： 0.8900% -0.9800%  收盤價： 0.9270%  
方向：→

因彭博報導中國擬令地方政府購買房屋並轉為經濟適用房，為房市注入強心針，
澳洲礦業類股本週在 BHP、RIO、FMG等權值股走高帶動下，上漲 2.10%。 
3. 截至 5/16，美債殖利率劇烈修正，抵銷澳洲公債殖利率回落， 美澳 10年期利
差整理於約 15個基本點 ，全球風險偏好回升， 澳幣突破前高壓力點位， 1週風
Question:美元/日圓:
Answer:本週區間：153.60-156.74，下週預估區間：153.90-157.00，收盤價：155.39，方向：→

Based on the given information, the exchange rate between US dollar and Japanese yen is expected to move towards a higher level in the near future,
