In [None]:
from src.data_ingestion.db.postgres_db import PostgresConfig
import vecs
import os
from dotenv import load_dotenv

load_dotenv()

db_config = PostgresConfig(
    host=os.environ["SB_DDBB_HOST"],
    port=os.environ["SB_DDBB_PORT"],
    database=os.environ["SB_DDBB_DATABASE"],
    user=os.environ["SB_DDBB_USER"],
    password=os.environ["SB_DDBB_PWD"]
)

db_config.get_connection_string()

In [None]:
from sqlalchemy import create_engine


create_engine(db_config.get_connection_string())

In [None]:
vx = vecs.create_client(db_config.get_connection_string())

In [None]:
db_config.get_connection_string()

## LangChain vector store + Supabase client

In [40]:
import os
from dotenv import load_dotenv

from langchain_community.embeddings.openai import OpenAIEmbeddings
from langchain_core.documents import Document
from langchain_community.vectorstores import SupabaseVectorStore
from supabase.client import create_client

load_dotenv()

SCHEMA_NAME = "vecs"
NEWS_TABLE_NAME = "sp500_news"

In [46]:
docs = [
    Document(page_content="Monsters of Rock: Northern Star defends hedges as gold prices spike; Metals Acquisition bats off M&A talk", metadata={"id": "vec3"}),
]
embeddings = OpenAIEmbeddings()
supabase_client = create_client(os.environ["SB_URL"], os.environ["SB_API_KEY"]).schema(SCHEMA_NAME)

In [None]:
vector_store = SupabaseVectorStore.from_documents(
    docs,
    embeddings,
    client=supabase_client,
    table_name=NEWS_TABLE_NAME,
    #query_name="match_documents",
    chunk_size=50,
)

### Documents Batch insertion

In [66]:
## Batch insertion
import pandas as pd
DATA_PATH = "../src/data_ingestion/downloader/data"

file = "news_2024_07_01_2024_07_22.csv"

df_news = pd.read_csv(os.path.join(DATA_PATH, file)).drop_duplicates(subset=["id"]).drop_duplicates(subset=["description"]).head(1000)

In [67]:
list_docs = df_news.apply(
    lambda x: Document(
        page_content=f"{x['title']}: {x['description']}",
        metadata={
            "external_id": x["id"],
            "source": x["source"],
            "tags": x["tags"],
            "tickers": x["tickers"],
            "publish_date": x["publishedDate"],
        },
    ),
    axis=1,
).to_list()

In [None]:
vector_store = SupabaseVectorStore.from_documents(
    list_docs,
    embeddings,
    client=supabase_client,
    table_name="sp500_news",
    query_name="match_documents",
    chunk_size=200,
)

In [69]:
# supabase function vecs.match_documents. Expects query_embeding, as the limit and scrore threshold is being managed by the LangChain vector_store python code

question = "Bad news with prices lowering"
docs = vector_store.similarity_search(question, k=5, score_threshold=0.5)
docs

2024-07-24 11:30:55,063:INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-07-24 11:31:01,053:INFO - HTTP Request: POST https://ntcmnflewcnxcafoslkj.supabase.co/rest/v1/rpc/match_documents?limit=5 "HTTP/2 200 OK"


[Document(page_content='Markets Sell the News After Satisfying CPI: The Inflation Rate came in better than expected for a soft landing, but the S&P and Nasdaq posted their worst trading day in two months.'),
 Document(page_content='Markets Sell The News After Satisfying CPI: Even as this morning’s much-anticipated inflation data came in better than expected, the S&P 500 and the Nasdaq managed to log their worst trading days in more than two months.'),
 Document(page_content="Has Nike Stock Become Too Cheap to Pass Up?: The company's recent earnings results weren't impressive, and they led to a steep sell-off."),
 Document(page_content='2 Stocks Down 74% and 57.5% to Buy Right Now: Taking a buy-and-hold approach with these beaten-down stocks could reward investors with market-crushing returns.'),
 Document(page_content='3 Beaten-Down Stocks Ready for a Jaw-Dropping Rebound: Investors should see a buying opportunity in these beaten-down stocks with their share prices hovering at or near 

In [70]:
question = "What are the latest gold news?"
docs = vector_store.similarity_search(question, k=5, score_threshold=0.5)
docs

2024-07-24 11:31:03,373:INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-07-24 11:31:03,738:INFO - HTTP Request: POST https://ntcmnflewcnxcafoslkj.supabase.co/rest/v1/rpc/match_documents?limit=5 "HTTP/2 200 OK"


[Document(page_content='Commodity Roundup: Gold near fresh record high, China demand concerns weigh on oil: Oselote/iStock via Getty Images Gold prices extended their rally to Tuesday, trading close to an -all-time high\xa0amid rising hopes that the\xa0Federal...'),
 Document(page_content="2 ASX gold shares going gangbusters (and one crashing 11%!): There's a lot going on in the gold sector today."),
 Document(page_content='Gold Digger: State Street and ANZ bring golden days to Aussie investors: With bullion remaining around record highs, investors are seeing an increasing number of products leveraged to the success of the gold price.'),
 Document(page_content="Gold Miners Hit 27-Month Peak, Bullion Breaks Record As Traders Decide September Rate Cut As Good As Done: Gold reaches new all-time high above $2,460/oz amid heightened investor confidence in Fed rate cut in Sept. Jerome Powell's remarks support market expectations."),
 Document(page_content='3 Top Stocks to Buy as Gold Prices 

In [72]:
question = "Most dystopic news"
docs = vector_store.similarity_search(question, k=5, score_threshold=0.5)
docs

2024-07-24 11:36:56,382:INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-07-24 11:37:00,906:INFO - HTTP Request: POST https://ntcmnflewcnxcafoslkj.supabase.co/rest/v1/rpc/match_documents?limit=5 "HTTP/2 200 OK"


[Document(page_content='Stock Market News From Amazon, Nike, McCormick, and More: We also talk with author Nicola Twilley about her new book, "Frostbite: How Refrigeration Changed Our Food, Our Planet, and Ourselves."'),
 Document(page_content="Trump Pours Cold Water On Industry, Tesla Falls Ahead Of Earnings Week, Ford's Step-back In EV Plans And More: Biggest EV Stories Of The Week: Even as EV companies struggle to find a footing, they could be in for more trouble amid the developments on the geopolitical front."),
 Document(page_content="Diary: 22 Jul 2024 - A big week lies ahead for the markets, with US inflation, the global fallout from Friday's major computer outage, the Trump comeback, Biden’s fade-out, investor nervousness about big techs (with the start of some significant earnings reports), and the endless speculation about a looming rate cut, which seems to be firming up at long last."),
 Document(page_content="Here Are The Markets Moving Higher As Trump's Election Odds Rise