# RAG (main)

## Dependencies

In [1]:
import os
from dotenv import load_dotenv
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate

In [2]:
# load the environment variables
load_dotenv() 

True

In [3]:
# setup

# LangChain
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY")
os.environ["LANGCHAIN_ENDPOINT"] = os.getenv("LANGCHAIN_ENDPOINT")
os.environ["LANGCHAIN_PROJECT"] = os.getenv("LANGCHAIN_PROJECT")

# OpenAI
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")

## Documents

Things to Consider
- chunking level
- data cleaning
- meta data

In [4]:
from langchain_community.document_loaders import PyPDFLoader

pdf_paths = [
    "https://www.ph.emb-japan.go.jp/files/100508281.pdf", # TOURISM
    "https://www.ph.emb-japan.go.jp/files/100412012.pdf", # PACKAGE TOUR
    "https://www.ph.emb-japan.go.jp/files/100508282.pdf", # BUSINESS, CONFERENCE or CULTURAL EXCHANGE, etc.
    "https://www.ph.emb-japan.go.jp/files/100508283.pdf", # VISITING RELATIVES
    "https://www.ph.emb-japan.go.jp/files/100508284.pdf", # VISITING FRIENDS OR DISTANT RELATIVES
    "https://www.ph.emb-japan.go.jp/files/100508285.pdf", # VISITING US MILITARY PERSONNEL
    "https://www.ph.emb-japan.go.jp/files/100585068.pdf", # SPOUSE OR CHLID OF JAPANESE NATIONAL RESIDING IN THE PHILIPPINES
    "https://www.ph.emb-japan.go.jp/files/100508287.pdf", # TRANSIT
    "https://www.ph.emb-japan.go.jp/files/100508288.pdf", # MULTIPLE-ENTRY VISA FOR TEMPORARY VISITOR
    "https://www.ph.emb-japan.go.jp/files/100674192.pdf", # MULTIPLE-ENTRY TEMPORARY VISITOR VISA (PHILIPPINE NATIONALS WITH CONSIDERABLE FINANCIAL CAPACITY) 
    "https://www.ph.emb-japan.go.jp/files/100404404.pdf", # MULTIPLE-ENTRY VISA FOR TEMPORARY VISITOR
    "https://www.ph.emb-japan.go.jp/files/100479463.pdf", # STUDENT, WORKER AND DEPENDENT
    "https://www.ph.emb-japan.go.jp/files/100415046.pdf", # OFFICIAL 
    "https://www.ph.emb-japan.go.jp/files/100415047.pdf", # HOUSEKEEPER OF DIPLOMAT/OFFICIAL
    "https://www.ph.emb-japan.go.jp/files/100415048.pdf", # NIKKEI-JIN (JAPANESE DESCENDANT)
    "https://www.ph.emb-japan.go.jp/files/100508289.pdf", # FILIPINO PARENTS TRAVELLING TO JAPAN WITH JAPANESE-FILIPINO CHILDREN
]
headers = {
    "User-Agent": "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Mobile Safari/537.36"
    }

pages = []
for pdf_path in pdf_paths:
    loader = PyPDFLoader(pdf_path, headers=headers)
    async for page in loader.alazy_load():
        pages.append(page)

In [13]:
pages[0]

Document(metadata={'source': 'https://www.ph.emb-japan.go.jp/files/100508281.pdf', 'page': 0}, page_content='TOURISM   \nMay 2024  \n  \nA. PURPOSE  \nVisit Japan for tourism.  \n  \nB. Requirements（Details→https://www.ph.emb-japan.go.jp/itpr_ja/11_000001_00898.html）  \n    \n※ Downloadable from this website   \n      \n(1) Passport（Holder’s signature required）  \n(2) Application Form ※（A facial Photo (4.5×3.5cm) must be attached.）  \n(3) PSA issued Birth Certificate and Marriage Certificate (for married applicants), issued within 1 year \n☞ Unnecessary if there is used Japan Visa on passport. \n【ADDITIONAL REQUIREMENTS】  \n- If (3) is unreadable, submit Birth/Marriage certificate issued by Local Civil Registrar.  \n- If Birth Certificate is “LATE REGISTRATION”, submit Baptismal Certificate and School Record (Form 137).  \n-If there is no record of Birth/Marriage in PSA, submit Birth Certificate issued by Local Civil Registrar and Negative  \nCertificate issued by PSA.  \n(4) Itinerary

In [5]:
print(pages[0])

page_content='TOURISM   
May 2024  
  
A. PURPOSE  
Visit Japan for tourism.  
  
B. Requirements（Details→https://www.ph.emb-japan.go.jp/itpr_ja/11_000001_00898.html）  
    
※ Downloadable from this website   
      
(1) Passport（Holder’s signature required）  
(2) Application Form ※（A facial Photo (4.5×3.5cm) must be attached.）  
(3) PSA issued Birth Certificate and Marriage Certificate (for married applicants), issued within 1 year 
☞ Unnecessary if there is used Japan Visa on passport. 
【ADDITIONAL REQUIREMENTS】  
- If (3) is unreadable, submit Birth/Marriage certificate issued by Local Civil Registrar.  
- If Birth Certificate is “LATE REGISTRATION”, submit Baptismal Certificate and School Record (Form 137).  
-If there is no record of Birth/Marriage in PSA, submit Birth Certificate issued by Local Civil Registrar and Negative  
Certificate issued by PSA.  
(4) Itinerary in Japan  
 
【In case that applicant will shoulder part/all of travel expense】   
 
(5) Applicant’s Bank Certific

## Embeddings

Things to Consider
- Evaluation Metric
- dataset
- model

In [6]:
embeddings = OpenAIEmbeddings(model="text-embedding-3-large")

## Vector Database

Things to Consider
- Evaluation Metric
- HNSW parameters

In [7]:
from langchain_core.vectorstores import InMemoryVectorStore

vector_store = InMemoryVectorStore.from_documents(pages, embeddings)

In [8]:
# check

docs = vector_store.similarity_search("What is the requirements for tourism?", k=1)
for doc in docs:
    print(f'Page {doc.metadata["page"]}: {doc.page_content}\n')

Page 0: TOURISM   
May 2024  
  
A. PURPOSE  
Visit Japan for tourism.  
  
B. Requirements（Details→https://www.ph.emb-japan.go.jp/itpr_ja/11_000001_00898.html）  
    
※ Downloadable from this website   
      
(1) Passport（Holder’s signature required）  
(2) Application Form ※（A facial Photo (4.5×3.5cm) must be attached.）  
(3) PSA issued Birth Certificate and Marriage Certificate (for married applicants), issued within 1 year 
☞ Unnecessary if there is used Japan Visa on passport. 
【ADDITIONAL REQUIREMENTS】  
- If (3) is unreadable, submit Birth/Marriage certificate issued by Local Civil Registrar.  
- If Birth Certificate is “LATE REGISTRATION”, submit Baptismal Certificate and School Record (Form 137).  
-If there is no record of Birth/Marriage in PSA, submit Birth Certificate issued by Local Civil Registrar and Negative  
Certificate issued by PSA.  
(4) Itinerary in Japan  
 
【In case that applicant will shoulder part/all of travel expense】   
 
(5) Applicant’s Bank Certificate (b

## Prompt Template

In [9]:
system_template = """
Answer the following based on this {context}, 
otherwise just give this url (https://www.ph.emb-japan.go.jp/itpr_en/00_000035.html) 
for more information about Japan Visa"""

prompt_template = ChatPromptTemplate.from_messages(
    [("system", system_template), ("user", "{question}")]
)

## LLM

In [10]:
llm = ChatOpenAI(model="gpt-4o-mini")

## RAG

Things to Consider
- Evaluation Metric

In [11]:
# USER QUESTION
question = input("question:")

# search similarity to the vector database (context)
context = vector_store.similarity_search(question, k=1)

# augmented prompt
prompt = prompt_template.invoke({
    "context": context,
    "question": question
})

# generate response
response = llm.invoke(prompt)

In [12]:
print(response.content)

The requirements for a Nikkei-Jin Japan visa include the following:

1. Background history about the 1st and 2nd Generation in English and its Japanese translation.
2. Birth Certificates for the spouse of the 1st Generation, the 2nd Generation, and their spouses.
3. Marriage Certificates for the 1st and 2nd Generation.
4. Death Certificates if the 1st or 2nd Generation is deceased (if the certificate is unreadable, submit a Death Certificate issued by the Local Civil Registrar; if no record of death in PSA, submit a Death Certificate from Local Civil Registrar and a Negative Certificate from PSA).
5. Family Photo of the 1st and 2nd Generation and their descendants.
6. If the 2nd Generation is not registered in Koseki-tohon, submit documents to indicate the 1st Generation’s migration to the Philippines (such as the 1st Generation’s Wedding Photo, passport, or Koseki-tohon).
7. Documents to indicate the relationship between the descendants of the 2nd Generation in the Philippines and the