In [None]:
import json 
import os 
import pandas as pd    
import time  
from time import sleep
from dotenv import load_dotenv 
from tenacity import retry, stop_after_attempt, wait_random_exponential
from openai import OpenAI  
from supabase import create_client, Client

load_dotenv('.env')

OPENAI_KEY = os.environ.get("OPENAI_KEY")
SUPABASE_URL = os.environ.get("SUPABASE_URL")
SUPABASE_KEY = os.environ.get("SUPABASE_KEY")
EMBDDING_MODEL = "text-embedding-ada-002"   

client = OpenAI(
    api_key = OPENAI_KEY
)  

supabase: Client = create_client(SUPABASE_URL,SUPABASE_KEY)

emb_file_path = '/Users/baileygimpel/Desktop/Review_Search/review_data/airline_review_embeddings.csv'

In [None]:
df = pd.read_csv('processed_airline_reviews.csv')    
    
print(df.shape) 

df.head()

In [None]:
RPM = 500 
sleep_time = RPM / 60 

@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
def get_embedding(text, model):
   time.sleep(sleep_time)
   text = text.replace("\n", " ")
   return client.embeddings.create(input = text, model=model).data[0].embedding   


if not os.path.exists(emb_file_path):
     
    df['embedding'] = df.embedding_input.apply(lambda x: get_embedding(x, model=EMBDDING_MODEL))
    
    df.to_csv(emb_file_path, index=False)
    print("Embeddings generated and saved to CSV.")

else:

    emb_df = pd.read_csv(emb_file_path)
    print("CSV file with embeddings loaded.")

In [None]:
for index, row in emb_df.iterrows():
    
    data = {
       "input_content": row['embedding_input'],
        "embedding": row['embedding']
    }
    
    supabase.table("airline_reviews").insert(data).execute()