In [17]:
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

# Load the text file
with open('Example.txt', 'r', encoding='utf-8') as file:
    text = file.read()

# Define a text splitter to divide the text into manageable chunks
text_splitter = RecursiveCharacterTextSplitter(  
    separators=["\n", "\n\n"],    
    chunk_size=100,
    chunk_overlap=50,
    length_function=len,
    is_separator_regex=False,
)

# Split the text into chunks
texts = text_splitter.create_documents([text])

# Print first chunk to verify
print(texts[0].page_content)


한국폴리텍대학 학칙 제정 
1998. 02. 11
개정 
2000. 03. 01
개정 
2000. 09. 14
개정 
2001. 03. 01
개정 
2002. 03. 01
개정


In [18]:
from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings

# Create an embedding function using a sentence-transformer model
embedding_function = SentenceTransformerEmbeddings(model_name="jhgan/ko-sroberta-multitask")

# Print a test embedding from the first text chunk (optional)
test_embedding = embedding_function.embed_query(texts[0].page_content)
print(test_embedding[:10])  # Printing first 10 elements to see the embedding




[-0.5240543484687805, -0.3548991084098816, -0.16854596138000488, -0.13652780652046204, -0.055925656110048294, -0.5044718980789185, 0.02095988020300865, 0.4780733585357666, 0.27940991520881653, -0.007464896887540817]


In [19]:
from langchain.vectorstores import FAISS

# Create FAISS vector store from the documents and their embeddings
db = FAISS.from_documents(texts, embedding_function)

# Verify the store by performing a quick similarity search
query = "학생에 관한 학칙들 설명해줘."
docs = db.similarity_search(query)
print(docs[0].page_content)  # Print the first retrieved document



제2조(경과조치) 이 학칙 시행당시 시각디자인과, 컴퓨터애니메이션과에 재적중인 학생은 커뮤니케이션디자인과에, 멀티미디어과, 컴퓨터게임과에 재적중인 학생은 미디어콘텐츠과에, 패션디자인과에 재적중인 학생은 패션메이킹과에 재적하고 있는 것으로 본다. 부     칙


In [21]:
# Create a retriever for retrieving relevant documents
retriever = db.as_retriever(search_type="similarity", search_kwargs={'k': 5, 'fetch_k': 50})

# Test the retriever with a query
retrieved_docs = retriever.get_relevant_documents("학생에 관한 학칙들 설명해줘.")
print(retrieved_docs[0].page_content)  # Print the first relevant document



제2조(경과조치) 이 학칙 시행당시 시각디자인과, 컴퓨터애니메이션과에 재적중인 학생은 커뮤니케이션디자인과에, 멀티미디어과, 컴퓨터게임과에 재적중인 학생은 미디어콘텐츠과에, 패션디자인과에 재적중인 학생은 패션메이킹과에 재적하고 있는 것으로 본다. 부     칙


In [23]:
from langchain.prompts import ChatPromptTemplate

# Define a prompt template for answering the question based on the context
template = """
Answer the question based only on the following context:
{context}

Question: {question}
"""

# Create a prompt template object
prompt = ChatPromptTemplate.from_template(template)


In [25]:
from langchain_community.chat_models import ChatOllama

# Initialize the LLM model (adjust URL if needed)
llm = ChatOllama(model="gemma2:9b", temperature=0, base_url="http://127.0.0.1:11434/")

# Test the LLM with a sample question
response = llm("학생에 관한 학칙")
print(response.content)


ValueError: Received unsupported message type for Ollama.