In [1]:
import os

import pandas as pd
from langchain.chains.summarize.refine_prompts import prompt_template
from openai import OpenAI, api_key
from dotenv import load_dotenv
from pathlib import Path
from langchain.chains import RetrievalQA
from langchain.docstore.document import Document
from langchain.embeddings import OpenAIEmbeddings
from langchain.llms import OpenAI 
from langchain.vectorstores import Chroma
from langchain.document_loaders import TextLoader
from langchain.document_loaders import DataFrameLoader
from langchain.vectorstores import Chroma 
from langchain_core.messages import HumanMessage, SystemMessage
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import ChatOpenAI
from sympy.physics.units import temperature
import base64



In [2]:
load_dotenv()

True

In [3]:
init_prompt =  open('C:\Code\III_2024_golf\data\\rag_init_prompt.txt').read()
type(init_prompt)

str

# RAG建構 

In [4]:
# 加载 Excel 文件
file_path = "C:\Code\III_2024_golf\data\回饋規則.xlsx"
data = pd.read_excel(file_path)

# 查看数据
print(data.head())

             球路類型  結果                        原因                      改善建議
0         Pull左飛球  失誤          上桿(P2~3)時，角度過於陡峭          上桿(P2~3)時，肩膀往右轉動
1         Pull左飛球  失誤                    桿頭頂點過高                    肩膀往右轉動
2         Pull左飛球  失誤  下桿角度過於陡峭，左手腕過度外展，肩關節伸展抬起  下桿時，左手臂打直、左手腕維持固定、肩膀自然旋轉
3         Pull左飛球  失誤            桿面關閉，擊球點位於球的外側       擊球時，左手腕維持固定，注意擊球點位置
4  Pull Hook左拉左曲球  失誤          上桿(P2~3)時，角度過於陡峭          上桿(P2~3)時，肩膀往右轉動


In [5]:
documents = [
    Document(
        page_content=" | ".join(map(str, row.values)), 
        metadata={"row_index": index}
    )
    for index, row in data.iterrows()
]

print(f"生成了 {len(documents)} 個文檔")
print(documents[0])

生成了 28 個文檔
page_content='Pull左飛球 | 失誤 | 上桿(P2~3)時，角度過於陡峭 | 上桿(P2~3)時，肩膀往右轉動' metadata={'row_index': 0}


In [6]:
# 使用 OpenAI Embeddings 進行向量化
import chromadb
openai_api_key = os.getenv("OPENAI_API_KEY")
embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)

# 创建向量数据库
vectorstore = Chroma.from_documents(documents, embeddings)


  embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)


In [7]:
def encode_image(image_path):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode("utf-8")

In [8]:
from langchain_core.example_selectors import SemanticSimilarityExampleSelector
from langchain_core.prompts import FewShotChatMessagePromptTemplate, HumanMessagePromptTemplate, AIMessagePromptTemplate
import json

#few shot prompt

example_csv = "C:\Code\III_2024_golf\data\\few_shot_example.csv"

#csv to json 
df = pd.read_csv(example_csv)
df.to_json("C:\Code\III_2024_golf\data\\few_shot_example.json", orient='records')
examples = json.loads(open("C:\Code\III_2024_golf\data\\few_shot_example.json",encoding="utf-8").read())
example_prompt = (
    HumanMessagePromptTemplate.from_template("{question}")
    + AIMessagePromptTemplate.from_template("{answer}")
)
example_selector = SemanticSimilarityExampleSelector.from_examples(
    examples,
    OpenAIEmbeddings(model="text-embedding-3-small"),
    Chroma,
    k=1
)
few_shot_prompt = FewShotChatMessagePromptTemplate(
    example_prompt=example_prompt,
    example_selector=example_selector, 
    input_variables=["input"],
)



In [9]:
few_shot_prompt

FewShotChatMessagePromptTemplate(example_selector=SemanticSimilarityExampleSelector(vectorstore=<langchain_community.vectorstores.chroma.Chroma object at 0x000001C6A8166320>, k=1, example_keys=None, input_keys=None, vectorstore_kwargs=None), input_variables=['input'], input_types={}, partial_variables={}, example_prompt=ChatPromptTemplate(input_variables=['answer', 'question'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['question'], input_types={}, partial_variables={}, template='{question}'), additional_kwargs={}), AIMessagePromptTemplate(prompt=PromptTemplate(input_variables=['answer'], input_types={}, partial_variables={}, template='{answer}'), additional_kwargs={})]))

In [11]:


# 初始化生成模型
client = ChatOpenAI(api_key= openai_api_key,model = "chatgpt-4o-latest",temperature=1)

# 構建 RetrievalQA 鏈
retriever = vectorstore.as_retriever()
prompt = ChatPromptTemplate.from_messages([
    ('system', init_prompt+'{context}'),
    ('system', "example:"+'{example}'),
    ('user', [
                {"type": "text", "text": "{input}"},
                {
                    "type": "image_url",
                    "image_url": {
                            "url": "data:image/jpeg;base64,'{image}'",
                            "detail": "high",
                        },
                },
            ],
        ),
    ])

document_chain = create_stuff_documents_chain(client, prompt)
retrieval_chain = create_retrieval_chain(retriever, document_chain)



In [19]:
# 用户提问
query = """
Pull Hook 左拉左曲球
球速:66.43、發射角度:7.198、發射方向:-6.172、飛行距離:179、ClubAngleFace:-15、ClubAnglePath:6.901
front:{A: [0.0, 0.0, -0.0, 0.0, -0.0, -0.0], F: [-0.01, -0.008, 0.0, 0.048, -0.018, -0.011], I: [-0.019, -0.04, 0.184, -0.063, -0.041, -0.023], T: [0.002, 0.0, 0.0, -0.001, -0.001, -0.001]}
side:{A: [0.002, -0.001, 0.001, -0.0, -0.002, 0.0], F: [0.001, 0.004, 0.0, -0.001, -0.003, 0.0], I: [0.217, -0.065, -0.013, -0.068, -0.054, -0.013], T: [-0.049, -0.043, 0.253, -0.083, -0.055, -0.013]}
"""
image = encode_image("C:\Code\III_2024_golf\data/combined/combined_186382.jpg")

response = retrieval_chain.invoke({
    'context': retriever,
    'image': image,
    'input': query,
    'example': example_selector.select_examples({"question": query})
})

print(response['answer'])
print(example_selector.select_examples({"question": query}))

{
        "球路": "Pull Hook 左拉左曲球",
        "原因": "下桿角度過於陡峭，手腕過度彎曲，擊球時桿面過度關閉",
        "建議": "在下桿（P5-P6）時，試著讓右肩與右手肘保持外旋，避免過度手腕翻轉，並確保手腕在P5.5時維持適當的屈曲角度，這有助於讓球桿的路徑更穩定。此外，擊球時（P7），確保左手腕恢復至較中立的狀態，以防止桿面過度關閉，造成左拉左曲球。"  
}
[{'answer': '上桿時維持軸心，P2時桿身平行於雙腳之平行線，保持桿面角度，P2至P4胸椎充分旋轉，下桿時於P3重心往左，左半身帶領的同時，P5至P6右肩和右手肘維持外旋，左手腕在P5.5時屈曲，維持手腕角度過球，轉身至P10收桿。', 'question': 'Push Hook右拉左曲球 上桿往內側，然後再由內側透過手腕的翻轉使球往左旋'}]


In [190]:

df = pd.DataFrame()
for i in range(10):
    query = "球速:66.43、發射角度:7.198、發射方向:-6.172、飛行距離:179、ClubAngleFace:-15、ClubAnglePath:6.901front:{A: [0.0, 0.0, -0.0, 0.0, -0.0, -0.0], F: [-0.01, -0.008, 0.0, 0.048, -0.018, -0.011], I: [-0.019, -0.04, 0.184, -0.063, -0.041, -0.023], T: [0.002, 0.0, 0.0, -0.001, -0.001, -0.001]}side:{A: [0.002, -0.001, 0.001, -0.0, -0.002, 0.0], F: [0.001, 0.004, 0.0, -0.001, -0.003, 0.0], I: [0.217, -0.065, -0.013, -0.068, -0.054, -0.013], T: [-0.049, -0.043, 0.253, -0.083, -0.055, -0.013]}"
    image = encode_image("C:\Code\III_2024_golf\data/combined/combined_186382.jpg")
    response = retrieval_chain.invoke({
        'context': retriever,
        'image': image,
        'input': query,
    })
    
    #insert response to dataframe
    df = df._append({'query': query, 'response': response['answer']}, ignore_index=True)

df.to_csv('C:\Code\III_2024_golf\data\\rag_output.csv',encoding='utf-8', index=False)
    