In [1]:
import os
import openai
import json
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics.pairwise import cosine_similarity
from scipy.spatial.distance import euclidean

# Retrieve your OpenAI API key from system environment variable
openai.api_key = os.getenv('OPENAI_API_KEY')

if not openai.api_key:
    raise ValueError("The OpenAI API key is missing. Please set the environment variable 'OPENAI_API_KEY'.")

def get_embedding(text, model="text-embedding-3-small"):
    try:
        response = openai.embeddings.create(
            input=text,
            model=model
        )
        embedding = response.data[0].embedding
        return embedding
    except AttributeError as e:
        print(f"Error: {e}")
        return None

def save_embedding(text, embedding, filename='embeddings.json'):
    data = {
        'text': text,
        'embedding': embedding
    }
    if os.path.exists(filename):
        with open(filename, 'r') as file:
            all_data = json.load(file)
            all_data.append(data)
    else:
        all_data = [data]
    with open(filename, 'w') as file:
        json.dump(all_data, file)

def load_embeddings(filename='embeddings.json'):
    if os.path.exists(filename):
        with open(filename, 'r') as file:
            all_data = json.load(file)
            return all_data
    return []

def visualize_embeddings(embeddings):
    plt.figure(figsize=(10, 7))
    for i, emb in enumerate(embeddings):
        plt.scatter(emb['embedding'][0], emb['embedding'][1], label=emb['text'])
    plt.legend()
    plt.title('2D Visualization of Embeddings')
    plt.show()

def calculate_difference(embedding1, embedding2):
    return np.subtract(embedding1, embedding2)

def add_vector_to_embeddings(embeddings, vector):
    return [np.add(emb['embedding'], vector) for emb in embeddings]

def calculate_distances(embedding1, embedding2):
    cos_sim = cosine_similarity([embedding1], [embedding2])[0][0]
    euc_dist = euclidean(embedding1, embedding2)
    return cos_sim, euc_dist

# Main code to interact with embeddings
embeddings_file = 'embeddings.json'
embeddings_data = load_embeddings(embeddings_file)

while True:
    text = input("Enter text to get embedding (or 'exit' to quit): ").strip()
    if text.lower() == 'exit' or text == '':
        print("Exiting the program.")
        break
    embedding = get_embedding(text)
    if embedding:
        save_embedding(text, embedding)
        print(f"Embedding for '{text}' saved.")

embeddings_data = load_embeddings(embeddings_file)

# Visualization
embeddings_to_visualize = embeddings_data[:2]  # Only taking the first 2 for visualization purposes
visualize_embeddings(embeddings_to_visualize)

# Calculating differences
embedding1 = embeddings_to_visualize[0]['embedding']
embedding2 = embeddings_to_visualize[1]['embedding']
difference_vector = calculate_difference(embedding1, embedding2)

# Adding the resulting vector to other embeddings
updated_embeddings = add_vector_to_embeddings(embeddings_data, difference_vector)

# Calculating distances
cos_sim, euc_dist = calculate_distances(embedding1, embedding2)
print(f"Cosine Similarity: {cos_sim}")
print(f"Euclidean Distance: {euc_dist}")
