In [None]:
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

# Do you have questions about studying abroad to Switzerland? Just ask!

### Where is the data from?
- blog.naver.com/imyourbest (89% 직장인 일지)

### Why useful?
- 초코빵, who finished her master succesfully at University of Zurich, Switzerland, shares her own preparation process to study abroad on her blog. She has been getting many questions about it, so she decided to create a Q&A bot!

## Setup

In [4]:
# GitHub connection
!git config --global user.name "lim-hyebin"
!git config --global user.email "hyebin.lim@uzh.ch"

In [3]:
# 패키지 설치
!pip install -qU llama-index llama-index-llms-openai llama-index-embeddings-openai python-dotenv==1.0.0 numpy==1.26.4 pandas==2.2.2

# API 키 로드
import helper
from helper import get_openai_api_key
OPENAI_API_KEY = get_openai_api_key()

# 병렬처리
import nest_asyncio
nest_asyncio.apply()

ImportError: cannot import name 'get_openai_api_key' from 'helper' (unknown location)

## Load Data

In [None]:
from llama_index.core import SimpleDirectoryReader

# load documents
documents = SimpleDirectoryReader(input_files=["swiss_study_abroad_prep.pdf"]).load_data()

## Define LLM and Embedding model

In [None]:
from llama_index.core.node_parser import SentenceSplitter

splitter = SentenceSplitter(chunk_size=1024)
nodes = splitter.get_nodes_from_documents(documents)

In [None]:
from llama_index.core import Settings
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding

Settings.llm = OpenAI(model="gpt-3.5-turbo")
Settings.embed_model = OpenAIEmbedding(model="text-embedding-ada-002")

## Define Summary Index and Vector Index

In [None]:
from llama_index.core import SummaryIndex, VectorStoreIndex

summary_index = SummaryIndex(nodes)
vector_index = VectorStoreIndex(nodes)

## Define Query Engines and Set Metadata

In [None]:
summary_query_engine = summary_index.as_query_engine(
    response_mode="tree_summarize",
    use_async=True,
)
vector_query_engine = vector_index.as_query_engine()

In [None]:
from llama_index.core.tools import QueryEngineTool


summary_tool = QueryEngineTool.from_defaults(
    query_engine=summary_query_engine,
    description=(
        "When you want a summarization"
    ),
)

vector_tool = QueryEngineTool.from_defaults(
    query_engine=vector_query_engine,
    description=(
        "When you have a specific question"
    ),
)

## Define Router Query Engine

In [None]:
from llama_index.core.query_engine.router_query_engine import RouterQueryEngine
from llama_index.core.selectors import LLMSingleSelector


query_engine = RouterQueryEngine(
    selector=LLMSingleSelector.from_defaults(),
    query_engine_tools=[
        summary_tool,
        vector_tool,
    ],
    verbose=True
)

In [None]:
response = query_engine.query("스위스 유학 준비 서류는?")
print(str(response))

[1;3;38;5;200mSelecting query engine 0: The question is asking for a summarization of the documents needed for studying abroad in Switzerland..
[0m학사 학위증명서, 성적 증명서, 영어능력시험 성적증명서, 추천서, 자기소개서, 비자 신청서, 계좌정리, 모바일 OTP 설정, 외국환은행 지정 등록, 장기체류 보험, 원화 결제 차단 등록, 당장 가서 쓸 돈 환전, Motivation Letter, 합격 발표 확인서, UZH지원포탈 확인, 36학점 리스트, 학부 졸업증명서, 학부 성적표, 고등학교 졸업증명서, 고등학교 성적표, 여권, 영어성적, CV, 재정증명서, Course description, Letter of recommendation, 학교 지원서류, 재정증빙, Mastersportal 위시리스트, 대학 랭킹 참고, 대학리스트, 레딧, 헬로우톡 활용, 대학교이름+ranking 검색.


In [None]:
print(len(response.source_nodes))

278


In [None]:
response = query_engine.query(
    "유학을 준비하면서 가장 힘들었던 건 뭐였어? 한국어로 대답해줘."
)
print(str(response))

[1;3;38;5;200mSelecting query engine 0: This choice is most relevant as it is asking for a summarization of the most difficult aspect of preparing for studying abroad..
[0m음식에 대한 걱정, 학부 전공과 석사 전공 간의 연관성, 항공권과 수하물 선택, 서류 준비, 합격률이 낮은 학교에 대한 긴장과 불안이 유학을 준비하면서 가장 힘들었던 것들이었어.
