In [1]:
import os

In [2]:
os.chdir("./../.")

In [3]:
import json
import cuid
import datetime
import dotenv
from openai import OpenAI
from copy import deepcopy
from pymongo import MongoClient

In [None]:
dotenv.load_dotenv()

In [5]:
mongodb_url = os.getenv("MONGODB_URL")

In [6]:

mongodb_client = MongoClient(mongodb_url)

In [7]:
db = mongodb_client['default']

### (1) Get subscription

In [8]:
subscription_id = "cm554j1vv000vkp6uatn2kcnz"

In [9]:
subscription = db['NewsSubscription'].find_one({"_id": subscription_id})

### (2) Process keywords

In [10]:
from typing import List


def process_keywords(keywords: List[str], news_sources: List[str]):
    q = " OR ".join(sorted([k.lower().strip() for k in keywords]))
    news_sources = " OR ".join(
        [f"site:{n}".lower() for n in sorted(news_sources)])
    query = q + " " + news_sources
    return query

In [11]:
query = process_keywords(subscription["keywords"], subscription["newsSources"])

### (3) Get Search Results

In [12]:
from app.routers.v1.search_news import search_news

In [13]:
date_range_map = {
    "any_time": None,
    "past_hour": "qdr:h",
    "past_24_hours": "qdr:d",
    "past_week": "qdr:w",
    "past_month": "qdr:m",
    "past_year": "qdr:y",
}

In [14]:
q = query
gl = subscription['country']
hl = subscription['language']
num = 10
tbs = date_range_map[subscription['dateRange']]

In [15]:
search_result = await search_news(q=q, gl=gl, hl=hl, num=num, tbs=tbs)

### (5) Crawl Website

In [16]:
from app.routers.v1.scrape import Scraper

In [17]:
urls = [new['link']for new in search_result['news']]

In [18]:
scraper = Scraper()

In [19]:
contents = await scraper.multi_run(urls=urls)

In [20]:
relevant_articles = deepcopy(search_result['news'])

In [21]:
for article, content in zip(relevant_articles, contents):
    article.pop("imageUrl")
    article['content'] = content

In [22]:
news_reference = [{"link": news['link']} for news in search_result['news']]

In [23]:
user_prompt = subscription['newsPrompt']
new_articles = json.dumps(relevant_articles, indent=4)
news_reference = json.dumps(news_reference, indent=4)

In [24]:
def get_prompt(user_prompt: str, new_articles: str, news_reference: str):

    return f"""
    ## User Request:
    "{user_prompt}"

    Cite the reference links from {news_reference} only with link without their titles at the end of your response.

    ## Instructions:
    Based on the user's request and the provided news articles, generate a comprehensive and insightful response with title.

    **Specifically, your response should:**

    * **Address the key aspects** of the user's prompt.
    * **Highlight key aspects and important information in different color.**
    * **Synthesize information** from the provided articles, avoiding direct quotes unless necessary for emphasis or context.
    * **Present a neutral and objective perspective**, acknowledging different viewpoints presented in the articles.
    * **Maintain a clear and concise writing style**, suitable for a general audience.
    * **Avoid making subjective statements or drawing unsupported conclusions.**

    ## Relevant News Articles in JSON format:
    {new_articles}
    """.strip()

In [25]:
prompt = get_prompt(user_prompt, new_articles, news_reference)

In [26]:
deep_seek_api_key = os.getenv("DEEPSEEK_API_KEY")

In [27]:

client = OpenAI(api_key=deep_seek_api_key,
                base_url="https://api.deepseek.com")

In [28]:
system_prompt = """
You are a helpful and informative AI assistant designed to provide insightful summaries and analyses of news articles. You receive user requests for information and a set of relevant news articles as context. Your goal is to process this information and generate a comprehensive and objective response that satisfies the user's request.

Here's how you should operate:

- Understand the User Request: Carefully analyze the user's prompt to identify the key information they are seeking. Pay attention to keywords, context, and any specific instructions regarding format or length.
- Process the News Articles: Thoroughly read and analyze the provided news articles. Extract key facts, events, perspectives, and any other relevant information that can help address the user's request.
- Synthesize and Summarize: Combine the information from different articles to create a cohesive and comprehensive response. Avoid simply summarizing each article individually. Instead, synthesize the information to provide a holistic view of the topic.
- Maintain Objectivity: Present information neutrally and objectively, acknowledging different viewpoints presented in the articles without expressing personal opinions or biases.
- Focus on Clarity and Conciseness: Use clear and concise language to make your response easily understandable for a general audience. Avoid jargon or technical terms unless necessary and clearly defined.
- Follow Instructions: Adhere to any specific instructions provided in the prompt, such as desired format (summary, comparison, timeline) or length limitations.
- Cite Sources When Necessary: If directly quoting from an article or presenting a specific fact, provide appropriate attribution to the source.
Remember: Your primary goal is to provide users with accurate, informative, and objective insights based on the provided news articles. Avoid making subjective statements, drawing unsupported conclusions, or presenting information not found within the provided context.
""".strip()

In [29]:
response = client.chat.completions.create(
    model="deepseek-chat",
    messages=[
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": prompt},
    ],
    stream=False
)

In [30]:
ai_insight = response.choices[0].message.content

In [31]:

title_prompt = """
Extract title from this article, the title not more than 8 words.
Directly return title only without any introducing.
"""

In [32]:
response = client.chat.completions.create(
    model="deepseek-chat",
    messages=[
        {"role": "assistant", "content": ai_insight},
        {"role": "user", "content": title_prompt},
    ],
    stream=False
)

In [33]:
title = response.choices[0].message.content

In [None]:
title

### Insert

In [35]:
mail_id = cuid.cuid()

In [36]:
# Updated to use timezone-aware UTC now
createdAt = datetime.datetime.now(datetime.timezone.utc)

In [None]:
createdAt.isoformat()

In [38]:
newsSubscriptionId = subscription_id

In [39]:
scrapeContent = contents

In [40]:
searchResult = search_result

In [41]:
content = ai_insight

In [42]:
from app.routers.v1.markdown_to_pdf import generate_pdf

In [None]:
pdfUrl = await generate_pdf(content, title)

In [44]:
payload_to_insert = {
    "_id": cuid.cuid(),
    "createdAt": createdAt,
    "newsSubscriptionId": newsSubscriptionId,
    "scrapeContent": scrapeContent,
    "searchResult": searchResult,
    "content": content,
    "title": title,
    "pdfUrl": pdfUrl,
    "isRead": False,
    "isStarred": False,
    "isTrashed": False,
}

In [45]:
mail_id = db['Mail'].insert_one(payload_to_insert)

In [None]:
mail_id.inserted_id

In [53]:
import traceback


def func():
    try:
        num = 10
        division = 0
        return num / division
    except Exception as e:
        error_details = traceback.format_exc()
        return {"message": str(e), "trace": error_details}

In [None]:
print(func()['trace'])

In [63]:
import datetime

In [None]:
datetime.datetime.now(datetime.timezone.utc).isoformat()

In [65]:
import pandas as pd

In [None]:
pd.to_datetime("Jan 17, 2011").to_pydatetime().isoformat()