#✈️ Planning to Study in Switzerland? Just ask!

### Where is the data from?
- blog.naver.com/imyourbest (89% 직장인 일지)

### Why is it useful?
- 초코빵 finished her master's in Zurich(2020-2025) and documented her full preparation journey on her Naver blog.
- So many people have asked her for tips — so she trained a little Q&A bot to answer your questions!

## Setup

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


working directory

In [None]:
import os
project_path = '/content/drive/MyDrive/Projects/01swiss_study_abroad_prep_QA'
os.chdir(project_path)

packages

In [None]:
!pip install -qr requirements.txt

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.0/61.0 kB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m18.3/18.3 MB[0m [31m96.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.7/7.7 MB[0m [31m91.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m40.8/40.8 kB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m265.8/265.8 kB[0m [31m17.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m303.4/303.4 kB[0m [31m24.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.9/50.9 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m129.3/129.3 kB[0m [31m10.1 MB/s[0m eta [36m0:00:00[0m
[?25h[31mERROR: pip's dependency resolver d

API key

In [None]:
from helper import get_openai_api_key
OPENAI_API_KEY = get_openai_api_key()

parallel processing

In [None]:
import nest_asyncio
nest_asyncio.apply()

## Prepare contents of blog postings

※ 'restart session' is required if error

In [None]:
from llama_index.core import SimpleDirectoryReader
documents = SimpleDirectoryReader(input_files=["swiss_study_abroad_prep.pdf"]).load_data()

## Set up language and embedding models for answering study-abroad questions

In [None]:
from llama_index.core.node_parser import SentenceSplitter

splitter = SentenceSplitter(chunk_size=1024)
nodes = splitter.get_nodes_from_documents(documents)

In [None]:
from llama_index.core import Settings
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding

Settings.llm = OpenAI(model="gpt-3.5-turbo")
Settings.embed_model = OpenAIEmbedding(model="text-embedding-ada-002")

## Set up two Q&A tools based on the type of question:
- summary_tool: when you want a summary
- vector_tool: when you have a specific question

In [None]:
from llama_index.core import SummaryIndex, VectorStoreIndex

summary_index = SummaryIndex(nodes)
vector_index = VectorStoreIndex(nodes)

In [None]:
summary_query_engine = summary_index.as_query_engine(
    response_mode="tree_summarize",
    use_async=True,
)
vector_query_engine = vector_index.as_query_engine()

In [None]:
from llama_index.core.tools import QueryEngineTool

summary_tool = QueryEngineTool.from_defaults(
    query_engine=summary_query_engine,
    description=(
        "When you want a summarization"
    ),
)

vector_tool = QueryEngineTool.from_defaults(
    query_engine=vector_query_engine,
    description=(
        "When you have a specific question"
    ),
)

## Automatically selects the appropriate Q&A tool based on the question

In [None]:
from llama_index.core.query_engine.router_query_engine import RouterQueryEngine
from llama_index.core.selectors import LLMSingleSelector
import logging
logging.getLogger("llama_index").setLevel(logging.ERROR)

query_engine = RouterQueryEngine(
    selector=LLMSingleSelector.from_defaults(),
    query_engine_tools=[
        summary_tool,
        vector_tool,
    ],
    #verbose=True
)

## Q&A examples

In [None]:
response = query_engine.query("스위스 유학 준비 서류는?")
print(str(response))

학부 졸업증명서(원본/영문판), 학부 성적표(원본/영문판), 고등학교 졸업증명서(원본/영문판), 고등학교 성적표(원본/영문판), 여권, 영어성적, CV, Motivation Letter, 재정증명서, Course description.


In [None]:
response = query_engine.query(
    "유학을 준비하면서 가장 힘들었던 건 뭐였어? 한국어로 대답해줘."
)
print(str(response))

숙소를 구하는 것이 가장 힘들었던 것 같아요.


# Your turn! 🤖 Ask what you want!
🔎 답변이 영어로 나온다면 포스팅에서 찾지 못한 내용일 확률이 높으므로 hallucination 답변을 피할 수 있답니다! 👍🏻

In [None]:
!pip install -q gradio

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m54.1/54.1 MB[0m [31m20.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m322.9/322.9 kB[0m [31m23.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m95.2/95.2 kB[0m [31m7.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.5/11.5 MB[0m [31m110.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m72.0/72.0 kB[0m [31m5.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.5/62.5 kB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
import gradio as gr

def ask_bot(user_question):
    if not user_question.strip():
        return "❗질문을 입력해주세요."
    response = query_engine.query(user_question)
    return str(response)

iface = gr.Interface(
    fn=ask_bot,
    inputs=gr.Textbox(lines=2, placeholder="예: 스위스 유학 서류는 뭐가 필요해요?", label="질문"),
    outputs=gr.Textbox(label="답변"),
    title="스위스 유학준비 Q&A 봇",
    description="초코빵 블로그를 기반으로 만든 스위스 유학 Q&A봇입니다. 블로그에 작성된 내용에 한해 정확한 답변을 제공해 드립니다."
)

iface.launch(share=True)

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://ff3335e3e9ee1932d0.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


