In [6]:
import json

with open('credentials.json') as f:
    data = json.load(f)
    api_key = data['api_key']
    openai_api_key = data['openai_api_key']

In [7]:
from pinecone import Pinecone

pc = Pinecone(api_key=api_key)


In [8]:

with open("emoji.txt") as f:
    emoji = [line.split("#")[1].strip().split(" ", 2) for line in f if "fully-qualified" in line and not line.startswith("#")]

print(len(emoji))

1898


In [9]:
index = pc.Index("emoji")

In [10]:
from openai import AsyncOpenAI

async_client = AsyncOpenAI(
    api_key=openai_api_key,
)

In [11]:
import asyncio
from asynciolimiter import Limiter

limiter = Limiter(500/60)

async def analyze(e):
	await limiter.wait()
	chat = await async_client.chat.completions.create(messages=[
		{"role": "system", "content": "You are a helpful assistant that analyzes emoji. You will receive a message with an emoji and it's canonical name. You will respond with a description of the emoji and a short 1-2 sentence description of how it is used."},
		{"role": "user", "content": f"emoji: {e[0]}\name: {e[2]}"},
	], model="gpt-3.5-turbo", temperature=0.3)
	resp = chat.choices[0].message.content
	embedding = await async_client.embeddings.create(input=resp, model="text-embedding-3-small")

	return {
		"id": e[2],
		"values": embedding.data[0].embedding,
		"metadata": { "edition": float(e[1].removeprefix("E")), "name": e[2], "emoji": e[0], "description":  resp }
	}

tasks = [analyze(emo) for emo in emoji]
completed_tasks = await asyncio.gather(*tasks, return_exceptions=True)


In [12]:
print(len(completed_tasks))

5


In [13]:
for v in completed_tasks:
    v["id"] = v["id"].replace("’", "-").replace("ñ", "n").replace("“", "\"").replace("”", "\"").replace("Å", "A").replace("é", "e").replace("ô", "o").replace("ç", "c").replace("ã", "a").replace("í", "i").replace("ü", "u")

print(completed_tasks[0])

{'id': 'grinning face', 'values': [0.029121619, -0.0015660091, -0.011394021, ... , -0.010671864, -0.006253338], 'metadata': {'edition': 1.0, 'name': 'grinning face', 'emoji': '😀', 'description': 'The emoji 😀 is a grinning face with smiling eyes. It is often used to convey happiness, joy, or a friendly greeting in text messages or social media posts.'}}


In [31]:
index.upsert(vectors=completed_tasks, batch_size=100)

# print(len(c))

Upserted vectors: 100%|██████████| 1897/1897 [00:17<00:00, 109.13it/s]


{'upserted_count': 1897}

In [15]:
query = "Class pet"
from openai import OpenAI
client = OpenAI(
    api_key=openai_api_key
)
v = client.embeddings.create(input=[query], model="text-embedding-3-small").data[0].embedding

results = index.query(vector=v, top_k=10, include_metadata=True, include_values=False)

for result in results["matches"]:
    print(result["metadata"]["emoji"], result["score"])
    
print(results["matches"][0])

🐈 0.387731433
🐕 0.346858114
🐩 0.346812248
🐾 0.325074822
🐱 0.324121416
🐹 0.301057756
🐶 0.295934945
🐕‍🦺 0.295025021
🐈‍⬛ 0.290965855
😺 0.28406617
{'id': 'cat',
 'metadata': {'description': 'The emoji 🐈 depicts a cute and playful cat. It '
                             'is often used to represent pet cats, love for '
                             'cats, or to convey feelings of playfulness or '
                             'mischief.',
              'edition': 0.7,
              'emoji': '🐈',
              'name': 'cat'},
 'score': 0.387731433,
 'values': []}
