https://openai.com/blog/introducing-text-and-code-embeddings  
https://platform.openai.com/docs/guides/embeddings

In [5]:
from dotenv import load_dotenv
import os
load_dotenv()  # .env 파일에서 환경 변수 로드
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')  # OPENAI_API_KEY 환경 변수 가져오기

In [6]:
import openai
import numpy as np # 벡터 연산을 위해 numpy 라이브러리를 임포트합니다.
openai.api_key = OPENAI_API_KEY

In [7]:
def talk_with(persona, ask_user):

    # GPT-3 모델에 전달할 메시지 쿼리를 생성합니다.
    query = [{"role": "system", "content": persona}, {"role": "user", "content": ask_user}]

    # GPT-3 모델에 메시지 쿼리를 전달하여 응답을 받습니다.
    result = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=query,
        n=3
    )

    # GPT-3 모델의 응답 메시지를 반환합니다.
    return [choice["message"]["content"] for choice in result["choices"]]

In [8]:
# GPT-3의 응답을 얻습니다.
gpt_responses = talk_with(
    persona="""You are the best sports reporter, the audience will ask you to evaluate a soccer player. You have to be logical. You have to answer in one sentence""",
    ask_user="Who do you think is the best soccer player and why"
)

# 문서들을 불러옵니다.
documents = gpt_responses

# 문서들에 대한 임베딩을 생성합니다.
doc_embeddings = []
for doc in documents:
    response = openai.Embedding.create(
        input=doc,
        engine="text-similarity-davinci-001"
    )
    doc_embeddings.append(response["data"][0]["embedding"])

# 쿼리에 대한 임베딩을 생성합니다.
query = "Who do you think is the best soccer player and why"
response = openai.Embedding.create(
    input=query,
    engine="text-similarity-davinci-001"
)
query_embedding = response["data"][0]["embedding"]

# 문서들과 쿼리 사이의 코사인 유사도를 계산하고 출력합니다.
cosine_similarities = []
for i, doc_embedding in enumerate(doc_embeddings):
    cosine_similarity = np.dot(query_embedding, doc_embedding) / (np.linalg.norm(query_embedding) * np.linalg.norm(doc_embedding))
    cosine_similarities.append(cosine_similarity)

    # 각 문서와 쿼리 사이의 코사인 유사도를 출력합니다.
    print('   query : ' + query)
    print('document : ' + documents[i])
    print('cosine_similarity : ' + str(cosine_similarity))
    print('\n')

# 가장 유사한 문서를 찾습니다.
max_index = np.argmax(cosine_similarities)
most_similar_document = documents[max_index]

# 결과를 출력합니다.
print("The most similar document to the query is:")
print('   query : ' + query)
print('document : ' + most_similar_document)


   query : Who do you think is the best soccer player and why
document : I cannot definitively name the best soccer player as it is a subjective topic and varies based on individual preferences and opinions.
cosine_similarity : 0.7451253144966979


   query : Who do you think is the best soccer player and why
document : It is difficult to pinpoint one individual as the best soccer player as it is subjective to personal preference and opinions, but players such as Lionel Messi and Cristiano Ronaldo have consistently showcased incredible skills, talent and tremendous success throughout their careers.
cosine_similarity : 0.6982998247370353


   query : Who do you think is the best soccer player and why
document : I cannot provide a definitive answer to who the best soccer player is because it is subjective and dependent on individual opinions and criteria.
cosine_similarity : 0.7431996705912162


The most similar document to the query is:
   query : Who do you think is the best soccer pla