In [1]:
import pandas as pd

from sentence_transformers import SentenceTransformer, util
from qdrant_client import QdrantClient
from qdrant_client.models import Distance, VectorParams, PointStruct

import requests
import gspread
from oauth2client.service_account import ServiceAccountCredentials

import hashlib
import uuid

import google.generativeai as genai

  from .autonotebook import tqdm as notebook_tqdm


<h3>Google Sheets API</h3>

In [2]:
SPREADSHEET_ID = "1iv7ZYCF5tMRzsnghHTQcotWcqXv2E9SrdYKph8i72fI"
RANGE = "EN!A2:B1000"
API_KEY = 'AIzaSyCGPX273kJXnAJg6GVUviZjwZWDs2f1IJI'

url = f"https://sheets.googleapis.com/v4/spreadsheets/{SPREADSHEET_ID}/values/{RANGE}?key={API_KEY}"

response = requests.get(url)
data = response.json()

qna_df = pd.DataFrame(data['values'], columns=['question', 'answer'])
qna_df['content'] = qna_df['question'] + ' ' + qna_df['answer']
qna_df.insert(loc=0, column='point_id', value = qna_df['content'].apply(lambda row :hashlib.md5(row.encode()).hexdigest()))

In [3]:
qna_df

Unnamed: 0,point_id,question,answer,content
0,a7cce980648bbef34970b99d9f680663,How to unlock the car without a key?,You can unlock the car via HAUP app or tap the...,How to unlock the car without a key? You can u...
1,fac4f746c08383c4aea6ed258f0b94a0,How does a keycard or an in-app unlock functio...,We suggest you unlock the car via the app or u...,How does a keycard or an in-app unlock functio...
2,5f763f0290b28002a3551049f76a15fe,What are the Power/Fuel Conditions?,You have 2 choices: Return as Received and P...,What are the Power/Fuel Conditions? You have 2...
3,125cbd8899d49287f8834d58286e691d,What to know before enter Bangkok expressways ...,Most tollways cost anywhere from THB10 to THB1...,What to know before enter Bangkok expressways ...
4,079a32ccfb12a426ee877ff714e4fda2,What to know about M-Flow?,You don't have to stop and pay the toll right ...,What to know about M-Flow? You don't have to s...
5,b2b67fadb69a2eed694a216e146974d8,How to deal with the traffic tickets?,"A ticket ranges from THB500 to THB 2,000 depen...",How to deal with the traffic tickets? A ticket...
6,ccda1540848541606b5ba81e65f5a2c4,Where to park the car?,Many shopping malls in Bangkok offer free park...,Where to park the car? Many shopping malls in ...
7,afa2aabf4e1a241dbe74e5abd04ec0b4,When to request the reimbursement?,"In case of fuel payment, please contact Custom...",When to request the reimbursement? In case of ...
8,f5045de09322e33195cd33aebe6a7979,What are HAUP services?,HAUP is a shared mobility company established ...,What are HAUP services? HAUP is a shared mobil...


<h3>Google Sheets -> Embed -> Check for duplicate in Qdrant -> push new Q&A to Qdrant</h3>

In [None]:
client = QdrantClient(
    url="https://8ab85bf3-4f21-4a87-8e58-7ef71293b301.us-east4-0.gcp.cloud.qdrant.io", 
    api_key="eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhY2Nlc3MiOiJtIn0.2cRZKIxU4YafG1px2n5vEZ2fzuAcu78mGjx-a1LTG_U",
)
collection_name = "RAG-HAUP-FAQ"

# check for duplicate
all_QnA = list(qna_df["point_id"])
qdrant_pts = client.retrieve(collection_name=collection_name, ids=all_QnA)
duplicate_pts = [pts for pts in [qdrant_pts[i].id for i in range(len(qdrant_pts))]]

new_qna_df = qna_df[~(qna_df['point_id'].apply(lambda x : str(uuid.UUID(x)))).isin(duplicate_pts)]

print(f"📑 Google Sheets has {len(all_QnA)} Q&A(s).")
print(f"💾 Qdrant has {len(duplicate_pts)} duplicated Q&A(s).")
print(f"💾 Pushing new {len(new_qna_df)} Q&A(s) to Qdrant.")



if len(new_qna_df) > 0 :
    emded_model = SentenceTransformer("paraphrase-multilingual-MiniLM-L12-v2")
    embeddings = emded_model.encode(new_qna_df["content"].tolist()).tolist()


    points = [
        PointStruct(
            id=str(uuid.UUID(new_qna_df.iloc[i]["point_id"])),
            vector=embeddings[i],
            payload={
                "question": new_qna_df.iloc[i]["question"],
                "answer": new_qna_df.iloc[i]["answer"]
            }
        )
        for i in range(len(new_qna_df))
    ]

    client.upsert(collection_name=collection_name, points=points)

    print(f"💾 Pushed new {len(new_qna_df)} Q&A(s) to Qdrant.")
else:
    print('✅ No new data to pushed to Qdrant Cloud.')

📑 Google Sheets has 9 Q&A(s).
💾 Qdrant has 9 duplicated Q&A(s).
💾 Pushing new 0 Q&A(s) to Qdrant.
✅ No new data to pushed to Qdrant Cloud.


<h3>User Question -> Embeded -> Find Similarity of Q&A</h3>

In [44]:
# Load embedding model
emded_model = SentenceTransformer("paraphrase-multilingual-MiniLM-L12-v2")
client = QdrantClient(
    url="https://b3b64ce7-135d-4173-a9bd-79560ddaf8f6.us-east4-0.gcp.cloud.qdrant.io:6333", 
    api_key="eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhY2Nlc3MiOiJtIn0.cIcWC7swL4V-VQ7-_o5YKdbIr1183FDTfyN19NorZgQ",
)
collection_name = "RAG-HAUP-FAQ"

In [45]:
# Connect to Qdrant
user_question = "how to unlock a car?"
print(f"👩🏻 User Ask : {user_question}")


query_vector = emded_model.encode(user_question).tolist()

search_results = client.query_points(
    collection_name=collection_name,
    limit=3,
    query=query_vector,
    with_payload=True
)

similar_points = ""
for result in search_results.points:
    similar_points += f"Q: {result.payload['question']}\nA: {result.payload['answer']}\n\n"


system_prompt = "You are a helpful assistant that answers based only on the provided context with more short detail you think it benefits for user understandings. Respond in the same language as the user's question\n\n"

gemini_prompt = f"""
{system_prompt} \n
{similar_points} \n
Question : {user_question}"""

👩🏻 User Ask : how to unlock a car?


<h3>Gemini Flash 2.0 API to Assist and Humanized</h3>

In [42]:
Gemini_API_key = "AIzaSyDGa0sMMynboU9L1XFRMvjOjcdpEY72ZBw"
genai.configure(api_key=Gemini_API_key)
model = genai.GenerativeModel("gemini-2.0-flash")

In [43]:
response = model.generate_content(gemini_prompt)
# print(f"💻 Prompting \n\n {gemini_promt}")
print(response.text)


คุณสามารถปลดล็อกรถได้ผ่านแอป HAUP หรือแตะบัตรคีย์

(โดยปกติคุณสามารถปลดล็อกรถผ่านแอปพลิเคชันบนมือถือที่เชื่อมต่อกับรถ หรือใช้คีย์การ์ดที่ตั้งโปรแกรมไว้สำหรับรถคันนั้น)

