In [None]:
from src.data_ingestion.db.postgres_db import PostgresConfig
import vecs
import os
from dotenv import load_dotenv

load_dotenv()

db_config = PostgresConfig(
    host=os.environ["SB_DDBB_HOST"],
    port=os.environ["SB_DDBB_PORT"],
    database=os.environ["SB_DDBB_DATABASE"],
    user=os.environ["SB_DDBB_USER"],
    password=os.environ["SB_DDBB_PWD"]
)

db_config.get_connection_string()

In [None]:
from sqlalchemy import create_engine


create_engine(db_config.get_connection_string())

In [None]:
vx = vecs.create_client(db_config.get_connection_string())

In [None]:
db_config.get_connection_string()

## LangChain vector store + Supabase client

In [1]:
import os
from dotenv import load_dotenv

from langchain_community.embeddings.openai import OpenAIEmbeddings
from langchain_core.documents import Document
from langchain_community.vectorstores import SupabaseVectorStore
from supabase.client import create_client

load_dotenv()

SCHEMA_NAME = "vecs"
NEWS_TABLE_NAME = "sp500_news"

In [6]:
docs = [
    Document(page_content="Monsters of Rock: Northern Star defends hedges as gold prices spike; Metals Acquisition bats off M&A talk", metadata={"id": "vec3"}),
]
embeddings = OpenAIEmbeddings()
supabase_client = create_client(os.environ["SB_URL"], os.environ["SB_API_KEY"]).schema(SCHEMA_NAME)

In [7]:
vector_store = SupabaseVectorStore.from_documents(
    docs,
    embeddings,
    client=supabase_client,
    table_name=NEWS_TABLE_NAME,
    #query_name="match_documents",
    chunk_size=50,
)

2024-07-24 10:28:22,194:INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-07-24 10:28:25,633:INFO - HTTP Request: POST https://ntcmnflewcnxcafoslkj.supabase.co/rest/v1/sp500_news?columns=%22metadata%22%2C%22id%22%2C%22content%22%2C%22embedding%22 "HTTP/2 201 Created"


### Documents Batch insertion

In [28]:
## Batch insertion
import pandas as pd
DATA_PATH = "../src/data_ingestion/downloader/data"

file = "news_2024_07_01_2024_07_22.csv"

df_news = pd.read_csv(os.path.join(DATA_PATH, file)).head(100)

In [29]:
list_docs = df_news.apply(
    lambda x: Document(
        page_content=f"{x['title']}: {x['description']}",
        metadata={
            "external_id": x["id"],
            "source": x["source"],
            "tags": x["tags"],
            "tickers": x["tickers"],
            "publish_date": x["publishedDate"],
        },
    ),
    axis=1,
).to_list()

In [33]:
vector_store = SupabaseVectorStore.from_documents(
    list_docs,
    embeddings,
    client=supabase_client,
    table_name="sp500_news",
    query_name="match_documents",
    chunk_size=50,
)

2024-07-24 10:44:18,646:INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-07-24 10:44:28,794:INFO - HTTP Request: POST https://ntcmnflewcnxcafoslkj.supabase.co/rest/v1/sp500_news?columns=%22metadata%22%2C%22id%22%2C%22content%22%2C%22embedding%22 "HTTP/2 201 Created"
2024-07-24 10:44:34,128:INFO - HTTP Request: POST https://ntcmnflewcnxcafoslkj.supabase.co/rest/v1/sp500_news?columns=%22metadata%22%2C%22id%22%2C%22content%22%2C%22embedding%22 "HTTP/2 201 Created"
