In [2]:
from fastapi import FastAPI
import psycopg2
from langchain.chains.query_constructor.schema import AttributeInfo
from langchain.retrievers.self_query.base import SelfQueryRetriever
from langchain_openai import ChatOpenAI
from langchain_openai import OpenAIEmbeddings
from langchain_postgres import PGVector

embeddings = OpenAIEmbeddings(model="text-embedding-3-large")
vector_store = PGVector(
    embeddings=embeddings,
    collection_name="my_docs",
    connection="postgresql+psycopg://stefan:gigelfrone112@localhost:5432/techvector",
)
app = FastAPI()
conn = psycopg2.connect("dbname=techvector user=stefan password=gigelfrone112 host=localhost port=5432")
cursor = conn.cursor()

In [29]:

metadata_field_info = [
    AttributeInfo(
        name="title",
        description="The title that the article was published under",
        type="string",
    ),
    AttributeInfo(
        name="author",
        description="The name of the author of the article",
        type="string",
    ),
    AttributeInfo(
        name="time",
        description="The time that the article was published",
        type="hour and date",
    ),
    AttributeInfo(
        name="image",
        description="The URL to the image that is associated with the article",
        type="link",
    ),
    AttributeInfo(
        name="category",
        description="The category that the article belongs to. One of ['AI', 'Apps', 'Biotech & Health', 'Climate', 'Commerce', 'Crypto', 'Enterprise', 'Fintech', 'Fundraising', 'Gadgets', 'Gaming', 'Government & Policy', 'Hardware', 'Media & Entertainment', 'Privacy', 'Robotics', 'Security', 'Social', 'Space', 'Startups', 'Transportation', 'Venture']",
        type="string",
    ),
    AttributeInfo(
        name="url",
        description="The URL to the original TechCrunch article",
        type="link",
    )
]
document_content_description = "The article content"
llm = ChatOpenAI(temperature=0, model="gpt-4o-mini")
retriever = SelfQueryRetriever.from_llm(
    llm,
    vector_store,
    document_content_description,
    metadata_field_info,
)

In [35]:
@app.get("/get_articles_by_query")
async def get_articles_by_query(query: str):
    query = query.replace("'", "\'")
    query = query.replace("’", "\'")
    docs = retriever.invoke(query)
    urls = list(set([doc.metadata["url"] for doc in docs]))
    print(urls)
    cursor.execute("SELECT * FROM article WHERE link = ANY(%s);", (urls,))
    tuples = cursor.fetchall()
    result_dict = [dict(zip(['url', 'title', 'time', 'img', 'category', 'summary', 'questions', 'author'], tup)) for tup in tuples]
    return result_dict 


In [9]:
@app.get("/get_articles")
async def get_articles():
    cursor.execute("SELECT * FROM article;")
    tuples = cursor.fetchall()
    result_dict = [dict(zip(['url', 'title', 'time', 'img', 'category', 'summary', 'questions', 'author'], tup)) for tup in tuples]
    return result_dict 


In [8]:
@app.get("/get_article")
async def get_article(url: str):
    cursor.execute(f"SELECT * FROM article where link = '{url}';")
    tuples = cursor.fetchone()
    result_dict = dict(zip(['url', 'title', 'time', 'img', 'category', 'summary', 'questions', 'author'], tuples))
    return result_dict



In [42]:
print(await get_articles_by_query('Give me an article about AI, by Paul Sawers, posted on 7:00 AM PST · January 12, 2025'))

[]
[]


In [2]:
from langchain_openai import OpenAIEmbeddings
from langchain_postgres import PGVector
from sqlalchemy.orm import Session
from dotenv import load_dotenv

load_dotenv()
embeddings = OpenAIEmbeddings(
    model="text-embedding-3-large"
)

vectorstore = PGVector(
    embeddings=embeddings, collection_name="my_docs",
    connection="postgresql+psycopg://stefan:gigelfrone112@localhost:5432/techvector", use_jsonb=True)

with Session(vectorstore.session_maker.bind) as session:
    docs = session.query(vectorstore.EmbeddingStore).all()

print(docs[0])

print(len(docs))

<langchain_postgres.vectorstores._get_embedding_collection_store.<locals>.EmbeddingStore object at 0x77fd83dc7d00>
1261


In [6]:
docs[0].document

#todo iterate through all docs WITH METADATA, modify metadata for time to just date


'Apple board opposes proposal to abolish DEI programs\nApple’s board of directors has come out in opposition to a proposal seeking to end the company’s Diversity, Equity, and Inclusion (DEI) programs.\nIn a proxy filing, the company said the National Center for Public Policy Research (a conservative think tank) is submitting a proposal for Apple to “consider abolishing its Inclusion & Diversity program, policies, department and goals.”\nThe think tank’s proposal claims that DEI programs could make companies vulnerable to lawsuits, citing the recent Supreme Court ruling against race-based affirmative action in colleges and noting that other companies have eliminated or scaled back similar programs. (Most recently, Meta eliminated its DEI programs, and Amazon is reportedly pulling back as well.)'