In [5]:
import os
import json
from tqdm import tqdm

In [6]:
# 파일 위치 지정
keys_file_path = os.path.join('data', 'api_keys.txt')

# 파일에서 API 키를 로드하는 함수
def load_api_keys(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        keys = json.load(file)
    return keys

# API 키 사용
api_keys = load_api_keys(keys_file_path)
pinecone_keys = api_keys['pinecone_key1']

In [7]:
from pinecone import Pinecone, ServerlessSpec

# Pinecone 연결 및 index 설정
pc = Pinecone(api_key=pinecone_keys)
index = pc.Index('minilm')

index.describe_index_stats()

{'dimension': 384,
 'index_fullness': 0.41754,
 'namespaces': {'': {'vector_count': 41754}},
 'total_vector_count': 41754}

In [8]:
file_path = os.path.join(os.getcwd(), "data", "processed_data.txt")

# 문장을 저장할 배열 초기화
sentences = []

# 파일 열기
with open(file_path, "r", encoding="utf-8") as file:
    # 파일의 각 줄을 읽어와 배열에 추가
    for line in file:
        sentences.append(line.strip())

In [13]:
from sentence_transformers import SentenceTransformer
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

# Assume sentences is a list of input sentences
embedded_sentences = []

# Encode each sentence and store the embeddings
for sentence in tqdm(sentences):
    embedded_sentences.append(model.encode(sentence))

print(embedded_sentences[0]) # 186개 문장을 전부 임베딩

100%|██████████| 186/186 [00:01<00:00, 96.12it/s] 


[ 3.11310752e-03 -1.56439617e-02  5.08689322e-02  1.52268374e-04
 -1.67714953e-02 -2.43715756e-02  4.28456999e-02 -6.47027120e-02
  3.78207788e-02 -2.82120947e-02  1.98299158e-02 -5.90517521e-02
 -3.56922857e-02 -2.47333087e-02 -1.91530194e-02 -1.31397828e-01
 -1.82549655e-02 -1.22621441e-02 -1.36688203e-02 -9.34328660e-02
  1.46321645e-02  2.18940224e-03  3.63208801e-02 -2.29838751e-02
  1.04088530e-01  4.17623436e-03  1.18183769e-01  2.40357127e-02
  5.30165993e-02  5.28994761e-02  1.07629538e-01  7.27605773e-03
 -9.59043652e-02 -1.77922398e-02 -5.71395867e-02 -2.48129833e-02
 -1.59236938e-02  6.53056726e-02 -3.06698177e-02  1.14739798e-02
 -2.08299793e-02 -1.98709555e-02  1.04911640e-01 -4.63728141e-03
  4.30373959e-02  9.54070129e-03 -4.88798656e-02 -8.10768902e-02
  8.56835581e-03 -4.16736156e-02 -8.08678120e-02 -3.28446589e-02
  5.54848760e-02  2.29463130e-02  4.26597521e-02 -2.31313426e-02
  3.42331380e-02  8.57734680e-02 -1.16089091e-03  5.46230525e-02
 -1.00753181e-01  1.86795

In [14]:
# Initialize an empty list to collect all retrieved assertions
all_assertions = []

# Loop through each encoded sentence and query the index for matches
for i, encoded_sentence in enumerate(tqdm(embedded_sentences)):
    # Query the index using the current encoded sentence
    matches = index.query(
        vector=encoded_sentence.tolist(),
        top_k=5,  # Adjust the number of matches to retrieve as needed
        include_values=False,
        include_metadata=True,
    )
    
    # Extract the assertions metadata from each match
    assertions_list = [match['metadata']['assertion'].strip() for match in matches['matches']]

    # Collect all results in the overall list
    all_assertions.append(' '.join(assertions_list))

# If you want to store or process the combined assertions list
print(all_assertions)

100%|██████████| 186/186 [00:47<00:00,  3.89it/s]

['Koreans usually eat rice with a spoon, which is different from other Asian countries. Koreans use chopsticks and spoons to eat their meals. Stir-frying is a quick and easy way to cook food that is popular in Asian cuisine. Korean chopsticks are typically made of metal, are flat, and have blunt or sharp ends. Korean food is meant to be shared by family and friends, and is often served in large portions.', 'Turkey is known for its spices, which are used in many different ways in Turkish cuisine. Turkish cuisine uses a variety of spices to enhance the flavor of their dishes. Turkish cuisine has a variety of meat dishes that are cooked in different ways. Turkish cuisine features a variety of soups which are often served with Turkish bread. Turkish cuisine is a fusion of Turkish, Arabic, Persian, and American cuisines.', 'The cuisine of Egypt has a long and rich history, with many dishes dating back to ancient times. Ethiopian dishes use many exotic spice blends, which can vary in spice l




In [15]:
sentence = model.encode("korea famous food")

matches = index.query(
    vector=sentence.tolist(),
    top_k=5,  # Adjust the number of matches to retrieve as needed
    include_values=False,
    include_metadata=True,
)

# Extract the assertions metadata from each match
assertions_list = [match['metadata']['assertion'].strip() for match in matches['matches']]
print(assertions_list)

['The staple food of Korea is rice.', 'Korea is a great place for food with many unique and delicious options.', 'Korean food is healthy and delicious.', 'Korea is a country with a rich culture and cuisine, with a variety of traditional and modern dishes.', 'Korean food is delicious and affordable.']


In [16]:
len(all_assertions)

186

In [17]:
from langchain_community.chat_models import ChatOllama
from langchain_core.output_parsers import StrOutputParser

# LangChain이 지원하는 다른 채팅 모델을 사용합니다. 여기서는 Ollama를 사용합니다.
llm = ChatOllama(model="llama2")

In [18]:
from langchain.prompts import PromptTemplate

template = """Information: {retrieval}
Question: {query}"""

prompt = PromptTemplate.from_template(template=template)

In [19]:
# LangChain 표현식 언어 체인 구문을 사용합니다.
# LCEL에 대한 자세한 내용은 https://python.langchain.com/docs/expression_language/why 에서 확인할 수 있습니다.
chain = prompt | llm | StrOutputParser()

In [20]:
query = "Just answer the questions and don't say anything else. \n### Reconstruct the first as the main sentence, using the rest as supporting information, into 2-3 sentences."

In [21]:
with open("data/result.txt", "w") as file:
    for retrieval in tqdm(list(zip(sentences, all_assertions))[:5]):
        result = chain.invoke({"retrieval": retrieval, "query":query})
        print(f"retrieval: {retrieval}\nreulst: {result}\n############################\n")
        file.write(result + '\n')

 20%|██        | 1/5 [00:07<00:31,  7.98s/it]

retrieval: ('The young man is stirring his pot of korean food with a wooden spoon.', 'Koreans usually eat rice with a spoon, which is different from other Asian countries. Koreans use chopsticks and spoons to eat their meals. Stir-frying is a quick and easy way to cook food that is popular in Asian cuisine. Korean chopsticks are typically made of metal, are flat, and have blunt or sharp ends. Korean food is meant to be shared by family and friends, and is often served in large portions.')
reulst: Sure! Here is a reconstructed version of the text that answers your request:

The young man is stirring his pot of Korean food with a wooden spoon, which is a common practice in Korean cuisine. Koreans typically use chopsticks and spoons to eat their meals, and stir-frying is a popular cooking method in Asian cuisine. The Korean chopsticks are made of metal and have blunt or sharp ends, which are designed for ease of use and comfort during meals.
############################



 40%|████      | 2/5 [00:12<00:17,  5.70s/it]

retrieval: ('The young man is stirring his pot of turkish food with a wooden spoon.', 'Turkey is known for its spices, which are used in many different ways in Turkish cuisine. Turkish cuisine uses a variety of spices to enhance the flavor of their dishes. Turkish cuisine has a variety of meat dishes that are cooked in different ways. Turkish cuisine features a variety of soups which are often served with Turkish bread. Turkish cuisine is a fusion of Turkish, Arabic, Persian, and American cuisines.')
reulst: Sure! Here is the reconstructed sentence:

The young man is stirring his pot of Turkish food with a wooden spoon, indicative of the rich flavors and spices used in Turkish cuisine. Turkey is known for its diverse use of spices, which are fundamental to many dishes in Turkish cooking. From meat dishes cooked in various ways to soups served with Turkish bread, the variety of dishes in Turkish cuisine reflects the blending of Turkish, Arabic, Persian, and American culinary traditions.

 60%|██████    | 3/5 [00:16<00:09,  4.96s/it]

retrieval: ('The young man is stirring his pot of egyptian food with a wooden spoon.', 'The cuisine of Egypt has a long and rich history, with many dishes dating back to ancient times. Ethiopian dishes use many exotic spice blends, which can vary in spice level from mild to hot. He enjoys Italian, French, and Egyptian cuisine. The cuisine of Egypt is a very rich cuisine that has many unique customs. Ethiopian cuisine is characterized by its use of spices, which gives it its characteristic taste.')
reulst: Sure! Here is a reconstructed version of the text with the requested supporting information:

The young man stirs his pot of Egyptian food with a wooden spoon, enjoying the rich and varied cuisine of this ancient land. Ethiopian dishes use many exotic spice blends that can range in spice level from mild to hot, while Italian and French cuisines also hold a special place in his heart. The unique customs surrounding Egyptian food make it a truly distinctive culinary experience.
########

 80%|████████  | 4/5 [00:20<00:04,  4.67s/it]

retrieval: ('The young man is stirring his pot of thai food with a wooden spoon.', 'Thai food is eaten with a spoon and fork. Thai cuisine is known for its use of spices. Thai people generally use a fork and spoon to eat, rather than chopsticks. Thai food is characterized by the use of fresh herbs and spices. Thai cooking is a unique and exciting cuisine that is famous for its fragrant, tangy flavours.')
reulst: Sure! Here is a possible reconstruction of the text as a main sentence with supporting information:

The young man stirs his pot of Thai food with a wooden spoon, illustrating the common utensil used in Thai cuisine. Thai food is typically eaten with a fork and spoon, rather than chopsticks, and is known for its bold flavors derived from fresh herbs and spices. Thai cooking is renowned for its unique blend of fragrant and tangy tastes, which sets it apart from other cuisines.
############################



100%|██████████| 5/5 [00:24<00:00,  4.95s/it]

retrieval: ('The young man is stirring his pot of mexican food with a wooden spoon.', 'Mexican dishes are typically filled with spices, which give them their characteristic flavor. Mexican cuisine is a blend of many different cultures and influences. The man simply knows authentic Mexican food. Mexican food is a blend of Spanish and indigenous cuisine. Mexican cuisine is a blend of various European countries food particularly Spanish food.')
reulst: Sure! Here is a reconstructed version of the text with the requested changes:

The young man is stirring his pot of Mexican food with a wooden spoon, demonstrating his knowledge of authentic Mexican cuisine. Mexican dishes are typically filled with spices, which give them their characteristic flavor, and Mexican cuisine is a blend of many different cultures and influences. The man's expertise in Mexican food reflects the rich cultural heritage of the country, where Spanish and indigenous cuisines have combined to create a unique and flavorf


