In [7]:
import os
import pandas as pd
import numpy as np
import dotenv
from dotenv import load_dotenv
import openai
from openai import OpenAI
load_dotenv()
from sklearn.metrics.pairwise import cosine_similarity
client = OpenAI()
## import key from .env file
openai.api_key = os.getenv("OPENAI_API_KEY")

### Generate embeds using a encoder model - ada-002

In [8]:
def get_embedding(text, model="text-embedding-ada-002"):
    text = text.replace("\n", " ")
    response = openai.embeddings.create(input=text, model=model).data[0].embedding
    return response

### Reading in your data and information

In [9]:
input_data = pd.read_csv('economics_terms.csv',nrows=10) ## feel free to generate more emebds at the cost of your credits

### Sample Print Out

In [10]:
input_data.head()

Unnamed: 0,Description
0,Absolute advantage A concept that helps to exp...
1,Active management A branch ofinvestment manage...
2,Activist investing Fund managers who take a st...
3,Adverse selection A risk associated withinsura...
4,Agency costs The expense involved in using a t...


### Generate embeddings per row of the data

In [11]:
input_data['Embeddings'] = input_data['Description'].apply(lambda x: get_embedding(x))

In [12]:
input_data

Unnamed: 0,Description,Embeddings
0,Absolute advantage A concept that helps to exp...,"[-0.007806450128555298, -0.019596705213189125,..."
1,Active management A branch ofinvestment manage...,"[-0.026157651096582413, -0.02481149509549141, ..."
2,Activist investing Fund managers who take a st...,"[-0.016879232600331306, -0.041712164878845215,..."
3,Adverse selection A risk associated withinsura...,"[0.001439624116756022, -0.0018853959627449512,..."
4,Agency costs The expense involved in using a t...,"[0.01102621853351593, -0.020206065848469734, 0..."
5,"Aggregate demand The flow of spending, across ...","[-0.001824539969675243, -0.031350355595350266,..."
6,Agriculture The cultivation of crops and the t...,"[0.010470718145370483, -0.018756134435534477, ..."
7,Alpha That part of an investment return that i...,"[0.024826332926750183, -0.023480484262108803, ..."
8,Amortisation The gradual reduction in the valu...,"[-0.029106151312589645, -0.005032192450016737,..."
9,Animal spirits Term used by John Maynard Keyne...,"[-0.02226892113685608, -0.025230351835489273, ..."


### Ask your question here 

In [13]:
question = 'Any terms related towards food source?'

### generate embeddings for the questions

In [14]:
question_embed=get_embedding(question)

### Find best suited matches via cosine similarity

In [15]:
# Calculate cosine similarity between the question and each description
question_embed_array = np.array(question_embed)
question_embed_reshaped = question_embed_array.reshape(1, -1)

embeddings_array = np.array(input_data['Embeddings'].tolist())

# Calculate cosine similarity
similarities = cosine_similarity(question_embed_reshaped, embeddings_array)

# Flatten the similarities array for easier processing
similarities_flattened = similarities.flatten()

# Get indices of the top 4 most similar descriptions. 
# Use [-4:] to get the last four items of the array, which are the highest values due to reverse sorting
top_4_indices = similarities_flattened.argsort()[-4:][::-1]

# Get the most similar descriptions
top_4_descriptions = input_data['Description'].iloc[top_4_indices]

# Print the top 4 most similar descriptions
for i, desc in enumerate(top_4_descriptions, start=1):
    print(f"Top {i} Description: {desc}")

Top 1 Description: Agriculture The cultivation of crops and the tending of animals for the purpose of supplying food. For millennia, this was mankind’s primary economic activity.
Top 2 Description: Animal spirits Term used by John Maynard Keynes to describe sentiment among businesspeople and consumers. If sentiment is depressed, economies may struggle to escape fromrecession. For more detail, readthis article. See alsoKeynesian economics.
Top 3 Description: Absolute advantage A concept that helps to explain international trade. If country A is better at making toasters than country B, and B is better at making kettles than A, it makes sense for each country to focus on the area where they have this advantage, and then trade toasters for kettles. But see, more importantly,comparative advantage.
Top 4 Description: Adverse selection A risk associated withinsurance, and linked toasymmetric information. People who are worried about their health will be more inclined to pay for health insura

In [20]:
response = client.chat.completions.create(
  model="gpt-3.5-turbo",
  messages=[
    {"role": "system", "content": "You are a helpful assistant. That uses only from the context given to answer your question. If you dont know say I dont know. "},
    {"role": "user", "content": f" Here are my context:{top_4_descriptions.tolist()} + question: {question}"},
  ]
)

In [23]:
# Extract the content text
content_text = response.choices[0].message.content
# Present the text
print(content_text)

The term "Agriculture" is related to the cultivation of crops and the tending of animals for the purpose of supplying food.
