# OpenAI Vector Embedding

https://platform.openai.com/docs/guides/embeddings

In [None]:
import requests
from dotenv import load_dotenv
import os
import json
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
import numpy as np

In [None]:
load_dotenv()
key = os.getenv('OPENAI_API_KEY')
if(len(key) != 0):
    print("success")

In [None]:

url = 'https://api.openai.com/v1/embeddings'

data = {
    'model': 'text-embedding-3-small',
    'input': 'Hello World!'
}

headers = {
    'Authorization': f'Bearer {key}',
    'Content-Type': 'application/json'
}

response = requests.post(url, json=data, headers=headers)
print(response.text)


In [None]:
data = json.loads(response.text)
first_item = data['data'][0]

embedding = first_item['embedding']
embedding


In [None]:
def getAllEmbeddings(sentences, key):
    embeddings = []
    url = 'https://api.openai.com/v1/embeddings' 

    headers = {
        'Authorization': f'Bearer {key}',
        'Content-Type': 'application/json'
    }

    for sentence in sentences:
        data = {
            'model': 'text-embedding-3-small',
            'input': sentence
        }

        response = requests.post(url, json=data, headers=headers)
        
        if response.status_code == 200:
            result = json.loads(response.text)
            first_item = result['data'][0]
            embedding = first_item['embedding']
            embeddings.append(embedding)
        else:
            print(f"Fehler bei der Anfrage für Satz: {sentence}")
            embeddings.append([])

    return embeddings

In [None]:
def reduce_dimensions(embeddings, components=2):
    reducer = PCA(n_components=components)
    embeddings_array = np.array(embeddings)
    reduced_embeddings = reducer.fit_transform(embeddings_array)
    return reduced_embeddings

In [None]:
def plot_embeddings(reduced_embeddings, labels=None):
    plt.figure(figsize=(10, 8))
    x = reduced_embeddings[:, 0]
    y = reduced_embeddings[:, 1]
    plt.scatter(x, y, alpha=0.5)

    if labels is None:
        labels = range(len(reduced_embeddings))
    for i, label in enumerate(labels):
        plt.annotate(label, (x[i], y[i]), textcoords="offset points", xytext=(0,10), ha='center')

    plt.title('Visualisierung der 2D Embeddings')
    plt.xlabel('Dimension 1')
    plt.ylabel('Dimension 2')
    plt.show()

In [None]:
sentences = [
    "Der Sommer ist schön",
    "Fußballspielen macht Spaß",
    "Eis im Sommer schmeckt am besten",
    "Ferien sind das schönste"
]
embeddings = getAllEmbeddings(sentences=sentences, key=key)
reduced_embeddings = reduce_dimensions(embeddings)
plot_embeddings(reduced_embeddings=reduced_embeddings)

In [None]:
sentences = [
    "Der Sommer ist schön",
    "Fußballspielen macht Spaß",
    "Eis im Sommer schmeckt am besten",
    "Ferien sind das schönste",
    "Der Erste Weltkrieg begann offiziell am 28. Juli 1914 mit der Kriegserklärung Österreich-Ungarns an Serbien"
]
embeddings = getAllEmbeddings(sentences=sentences, key=key)
reduced_embeddings = reduce_dimensions(embeddings)
plot_embeddings(reduced_embeddings=reduced_embeddings, labels=sentences)

In [None]:
sentences = [
    "König",
    "Königin",
    "Man",
    "Frau",
]
embeddings = getAllEmbeddings(sentences=sentences, key=key)
reduced_embeddings = reduce_dimensions(embeddings)
plot_embeddings(reduced_embeddings=reduced_embeddings, labels=sentences)

In [None]:
sentences = [
    "König",
    "Königin",
    "Man",
    "Frau",
    "Umpalumpa",
    "aoiufhaoiwfhjaipwf",
    "2348562=)(§/%"
]
embeddings = getAllEmbeddings(sentences=sentences, key=key)
reduced_embeddings = reduce_dimensions(embeddings)
plot_embeddings(reduced_embeddings=reduced_embeddings, labels=sentences)