In [1]:
from fastapi import FastAPI
import psycopg2
from langchain.chains.query_constructor.schema import AttributeInfo
from langchain.retrievers.self_query.base import SelfQueryRetriever
from langchain_openai import ChatOpenAI
from langchain_openai import OpenAIEmbeddings
from langchain_postgres import PGVector

embeddings = OpenAIEmbeddings(model="text-embedding-3-large")
vector_store = PGVector(
    embeddings=embeddings,
    collection_name="my_docs",
    connection="postgresql+psycopg://stefan:gigelfrone112@localhost:5432/techvector",
)
app = FastAPI()
conn = psycopg2.connect("dbname=techvector user=stefan password=gigelfrone112 host=localhost port=5432")
cursor = conn.cursor()

In [4]:

metadata_field_info = [
    AttributeInfo(
        name="title",
        description="The title that the article was published under",
        type="string",
    ),
    AttributeInfo(
        name="author",
        description="The name of the author of the article",
        type="string",
    ),
    AttributeInfo(
        name="time",
        description="The time that the article was published",
        type="hour and date",
    ),
    AttributeInfo(
        name="image",
        description="The URL to the image that is associated with the article",
        type="link",
    ),
    AttributeInfo(
        name="category",
        description="The category that the article belongs to. One of ['AI', 'Apps', 'Biotech & Health', 'Climate', 'Commerce', 'Crypto', 'Enterprise', 'Fintech', 'Fundraising', 'Gadgets', 'Gaming', 'Government & Policy', 'Hardware', 'Media & Entertainment', 'Privacy', 'Robotics', 'Security', 'Social', 'Space', 'Startups', 'Transportation', 'Venture']",
        type="string",
    ),
    AttributeInfo(
        name="url",
        description="The URL to the original TechCrunch article",
        type="link",
    )
]
document_content_description = "The article content"
llm = ChatOpenAI(temperature=0)
retriever = SelfQueryRetriever.from_llm(
    llm,
    vector_store,
    document_content_description,
    metadata_field_info,
)

In [3]:
retriever.invoke("Tell me 3 articles about AI")

[Document(id='5d77ed53-1c00-4219-9556-f779f267cb53', metadata={'url': 'https://techcrunch.com/2025/01/08/microsoft-rolls-back-its-bing-image-creator-model-after-users-complain-of-degraded-quality/', 'time': '7:31 AM PST · January 8, 2025', 'image': 'https://techcrunch.com/wp-content/uploads/2025/01/BICShare.png?w=1024', 'title': 'Microsoft rolls back its Bing Image Creator model after users complain of degraded quality', 'author': 'Kyle Wiggers', 'category': 'AI', 'start_index': 1995}, page_content='— outward (@roccynoxy) December 19, 2024'),
 Document(id='cbf25e6e-40ba-46b6-b21d-1199bbe87d41', metadata={'url': 'https://techcrunch.com/2025/01/08/microsoft-rolls-back-its-bing-image-creator-model-after-users-complain-of-degraded-quality/', 'time': '7:31 AM PST · January 8, 2025', 'image': 'https://techcrunch.com/wp-content/uploads/2025/01/BICShare.png?w=1024', 'title': 'Microsoft rolls back its Bing Image Creator model after users complain of degraded quality', 'author': 'Kyle Wiggers', 

In [10]:
@app.get("/get_articles_by_query")
async def get_articles_by_query(query: str):
    docs = retriever.invoke("Tell me 3 articles about AI")
    urls = list(set([doc.metadata["url"] for doc in docs]))
    print(urls)
    cursor.execute("SELECT * FROM article where link = %s;", urls)
    tuples = cursor.fetchall()
    result_dict = [dict(zip(['url', 'title', 'time', 'img', 'category', 'summary', 'questions'], tup)) for tup in tuples]
    return result_dict 


In [7]:
@app.get("/get_articles")
async def get_articles():
    cursor.execute("SELECT * FROM article;")
    tuples = cursor.fetchall()
    result_dict = [dict(zip(['url', 'title', 'time', 'img', 'category', 'summary', 'questions'], tup)) for tup in tuples]
    return result_dict 


In [8]:
@app.get("/get_article")
async def get_article(url: str):
    cursor.execute(f"SELECT * FROM article where link = '{url}';")
    tuples = cursor.fetchone()
    result_dict = dict(zip(['url', 'title', 'time', 'img', 'category', 'summary', 'questions'], tuples))
    return result_dict



In [9]:
cursor.execute("rollback;")

In [11]:
print(await get_articles_by_query('Give me 3 articles about venture newer than 2021-01-07'))

['https://techcrunch.com/2025/01/03/chatgpt-everything-to-know-about-the-ai-chatbot/', 'https://techcrunch.com/2025/01/08/microsoft-rolls-back-its-bing-image-creator-model-after-users-complain-of-degraded-quality/', 'https://techcrunch.com/2025/01/08/omi-a-competitor-to-friend-wants-to-boost-your-productivity-using-ai-and-a-brain-interface/']


TypeError: not all arguments converted during string formatting