In [1]:
import openai
import pandas as pd
import decouple

config = decouple.AutoConfig(" ")
openai.api_key = config("CHATGPT_API_KEY")
EMBEDDING_MODEL = "text-embedding-ada-002"
INPUT_DB_NAME = "Gx_reviews_database.csv"
OUTPUT_DB_NAME = "Gx_review_embeddings.csv"

In [2]:
df = pd.read_csv(INPUT_DB_NAME, usecols=["Summary", "Text", "Score"], nrows=500)
df = df[df["Score"] != 3]
df["Summ_and_Text"] = "Title: " + df["Summary"] + "; Content: " + df["Text"]
df.head(5)

Unnamed: 0,Score,Summary,Text,Summ_and_Text
0,5,Good Quality Dog Food,I have bought several of the Vitality canned d...,Title: Good Quality Dog Food; Content: I have ...
1,1,Not as Advertised,Product arrived labeled as Jumbo Salted Peanut...,Title: Not as Advertised; Content: Product arr...
2,4,"""Delight"" says it all",This is a confection that has been around a fe...,"Title: ""Delight"" says it all; Content: This is..."
3,2,Cough Medicine,If you are looking for the secret ingredient i...,Title: Cough Medicine; Content: If you are loo...
4,5,Great taffy,Great taffy at a great price. There was a wid...,Title: Great taffy; Content: Great taffy at a ...


In [3]:
total_token_usage = 0
embeddings_generated = 0
total_data_rows = df.shape[0]


def get_embedding(item):
    global total_token_usage, embeddings_generated
    response = openai.Embedding.create(
        model=EMBEDDING_MODEL,
        input=item,
    )
    tokens_used = response["usage"]["total_tokens"]
    total_token_usage += tokens_used
    embeddings_generated += 1
    if (embeddings_generated % 10) == 0:
        print(
            f"Generated {embeddings_generated} embeddings so far with a total of {total_token_usage} tokens used. ({int((embeddings_generated / total_data_rows) * 100)}%)"
        )
    return response['data'][0]['embedding']

In [4]:
df["Embedding"] = df.Summ_and_Text.apply(lambda item: get_embedding(item))

df.to_csv(OUTPUT_DB_NAME, index=False)

print(
    f"""
Generated {embeddings_generated} embeddings with a total of {total_token_usage} tokens used. (Done!)
Successfully saved embeddings to {OUTPUT_DB_NAME}.

    """
)

df.head(10)

Generated 10 embeddings so far with a total of 680 tokens used. (2%)
Generated 20 embeddings so far with a total of 1531 tokens used. (4%)
Generated 30 embeddings so far with a total of 2313 tokens used. (6%)
Generated 40 embeddings so far with a total of 3559 tokens used. (8%)
Generated 50 embeddings so far with a total of 4806 tokens used. (10%)
Generated 60 embeddings so far with a total of 5567 tokens used. (12%)
Generated 70 embeddings so far with a total of 6788 tokens used. (15%)
Generated 80 embeddings so far with a total of 8079 tokens used. (17%)
Generated 90 embeddings so far with a total of 9046 tokens used. (19%)
Generated 100 embeddings so far with a total of 9809 tokens used. (21%)
Generated 110 embeddings so far with a total of 11220 tokens used. (23%)
Generated 120 embeddings so far with a total of 12171 tokens used. (25%)
Generated 130 embeddings so far with a total of 12873 tokens used. (28%)
Generated 140 embeddings so far with a total of 13764 tokens used. (30%)
Ge

Unnamed: 0,Score,Summary,Text,Summ_and_Text,Embedding
0,5,Good Quality Dog Food,I have bought several of the Vitality canned d...,Title: Good Quality Dog Food; Content: I have ...,"[9.45914798649028e-05, 0.0025626542046666145, ..."
1,1,Not as Advertised,Product arrived labeled as Jumbo Salted Peanut...,Title: Not as Advertised; Content: Product arr...,"[-0.010502041317522526, -0.005715508479624987,..."
2,4,"""Delight"" says it all",This is a confection that has been around a fe...,"Title: ""Delight"" says it all; Content: This is...","[0.0007722130976617336, 0.006629731971770525, ..."
3,2,Cough Medicine,If you are looking for the secret ingredient i...,Title: Cough Medicine; Content: If you are loo...,"[0.010979381389915943, 0.01641949638724327, -0..."
4,5,Great taffy,Great taffy at a great price. There was a wid...,Title: Great taffy; Content: Great taffy at a ...,"[-0.01659574918448925, -0.0036598483566194773,..."
5,4,Nice Taffy,I got a wild hair for taffy and ordered this f...,Title: Nice Taffy; Content: I got a wild hair ...,"[-0.0024112274404615164, -0.003537999931722879..."
6,5,Great! Just as good as the expensive brands!,This saltwater taffy had great flavors and was...,Title: Great! Just as good as the expensive b...,"[-0.0027381693944334984, -0.008508416824042797..."
7,5,"Wonderful, tasty taffy",This taffy is so good. It is very soft and ch...,"Title: Wonderful, tasty taffy; Content: This t...","[-0.005907095968723297, -0.0032475811894983053..."
8,5,Yay Barley,Right now I'm mostly just sprouting this so my...,Title: Yay Barley; Content: Right now I'm most...,"[-0.007997295819222927, -0.011915299110114574,..."
9,5,Healthy Dog Food,This is a very healthy dog food. Good for thei...,Title: Healthy Dog Food; Content: This is a ve...,"[0.0029935531783849, 0.005754504352807999, 0.0..."
