In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from sociaty_newsletter_generator.db import init_db

await init_db()

In [3]:
from sociaty_newsletter_generator.models import Cluster, ClusteringSession, Article
from sociaty_newsletter_generator.models import (
    Cluster,
    ClusteringSession,
    SetOfUniqueArticles,
)

In [4]:
session_id = "669a345da02feb1858343077"
session = await ClusteringSession.get(session_id)
assert session

In [5]:
from main import NewsletterCrew, NewsletterRequest, Subject

clusters = await Cluster.find_many({"session.$id": session.id,
                                    
                                        "evaluation.decision": "include"
                                    }).to_list()

# order clusters by size
clusters = sorted(clusters, key=lambda x: x.articles_count, reverse=True)

# limit to 5 clusters
clusters = clusters[:5]




In [6]:
sessions= [
    '6697a91d6720e5b25ef7946b',
    '6697ab4a6720e5b25ef794d9',
    '6697ab766720e5b25ef795e2',
    '6697ad356720e5b25ef79605',
    '6697aec36720e5b25ef796fc',
    '6697b08c6720e5b25ef797e2',
    '66993bf59a3b5803097f77df',
    '669a345da02feb1858343077',
    '669e16804f5d02e9b56027c9',
    '669e1f654042a3dbb35f9484',
    '669f6cbd20d8c0fd9be0259f',
    '669f794b8f71f776cd33b021',

]

In [7]:
from datetime import datetime
from beanie import PydanticObjectId
from slugify import slugify
from pathlib import Path
from main import NewsletterRequest, Subject, NewsletterCrew
from sociaty_newsletter_generator.models import SetOfUniqueArticles

async def get_formatted_material(subject_id:str | PydanticObjectId)->str:
    cluster = await Cluster.get(subject_id)
    assert cluster
    articles = await Article.find_many({"cluster.$id": cluster.id}).to_list()
    unique_articles = SetOfUniqueArticles(articles)

    return "\n\n".join([
        f"{article.title} ({article.date})\n{article.body}\n\n{article.url}"
        for article in unique_articles
    ])


last_request = None

for session in sessions[:1]:
    session = await ClusteringSession.get(session)
    assert session

    print(f"Generating newsletter for session {session.id}")

    clusters = await Cluster.find_many({"session.$id": session.id,
                                        "evaluation.decision": "include"
                                        }).to_list()

    # order clusters by size
    clusters = sorted(clusters, key=lambda x: x.articles_count, reverse=True)

    MAIN_SUBJECTS_COUNT = 5
    SECONDARY_SUBJECTS_COUNT = 5
    # limit to 5 clusters
    main_clusters = clusters[:MAIN_SUBJECTS_COUNT]
    secondary_clusters = clusters[MAIN_SUBJECTS_COUNT:MAIN_SUBJECTS_COUNT+SECONDARY_SUBJECTS_COUNT]

    print(f"Main clusters:")
    for i, cluster in enumerate(main_clusters, start=1):
        print(f"- {i}. {cluster.title}")
    print("Secondary clusters")
    for i, cluster in enumerate(secondary_clusters, start=1):
        print(f"- {i}. {cluster.title}")
    

    
    crew = NewsletterCrew(llm='gpt-4o-mini')

    main_subjects = [
        Subject(
            subject_id=str(cluster.id),
            title=cluster.title or '',
            summary=cluster.summary or '',
            formatted_material=await get_formatted_material(cluster.id)
        )
        for cluster in main_clusters
    ]

    secondary_subjects = [
        Subject(
            subject_id=str(cluster.id),
            title=cluster.title or '',
            summary=cluster.summary or '',
            formatted_material=await get_formatted_material(cluster.id)
        )
        for cluster in secondary_clusters
    ]

    request = NewsletterRequest(
            main_subjects=main_subjects,
            secondary_subjects=secondary_subjects,
            language='fr'
        )

    last_request = request

    output = crew.generate_newsletter(request)


    now = datetime.now()
    output_dir = Path("output")
    output_dir.mkdir(exist_ok=True)

    file_name = f"{now.strftime('%Y_%m_%d_%H_%M_%S')}_session_{session.id}.md"
    with open(output_dir / file_name, "w", encoding='utf-8') as f:
        f.write(output.raw)
    print(f"Generated {file_name}")


Generating newsletter for session 6697a91d6720e5b25ef7946b
Main clusters:
- 1. SK Hynix's $75 Billion Investment Focuses on AI-Enhanced HBM Processors
- 2. Controversy Surrounding AI Training on Web Content Sparks Legal Battles
- 3. Morgan Freeman Condemns Unauthorized AI Voice Imitations
- 4. Apple Working to Integrate AI Features into Vision Pro Headsets
- 5. States Addressing AI Skills Gap in Workforce
Secondary clusters
- 1. Apple Plans to Monetize Advanced AI Features with Subscription Model
- 2. Apple to Integrate Google's Gemini AI into iPhone for Enhanced AI Capabilities
- 3. Chinese AI Startups Flock to Singapore for Global Expansion Amid US Restrictions
- 4. Top Artificial Intelligence (AI) Stocks for Long-Term Investment
- 5. Robinhood Enhances Investing App with AI Tools Through Acquisition of Pluto Capital


  warn_beta(


[95m 

SK Hynix is making a significant leap in the semiconductor industry with a planned investment of $74.8 billion by 2028, focusing predominantly on high-bandwidth memory (HBM) chips tailored for artificial intelligence (AI) applications. This strategic move comes in response to the surging demand for advanced memory solutions that can deliver the high performance and speed essential for AI operations. Notably, 80% of this investment is earmarked for HBM technology, underscoring its critical role in revolutionizing AI capabilities.

In parallel, SK Group, the parent company of SK Hynix, is committing an additional $58 billion to bolster AI-related technologies, further ensuring robust shareholder returns. This dual investment strategy not only aims to position SK Hynix at the forefront of the semiconductor market but also addresses the escalating need for efficient data processing in AI systems. By focusing on HBM technology, SK Hynix is strategically aligning itself with the futu

In [10]:
for session in sessions:
    session = await ClusteringSession.get(session)
    assert session

    print(f"{session.id=}")

    clusters = await Cluster.find_many({"session.$id": session.id,
                                        "evaluation.decision": "include"
                                        }).to_list()
    print(f"{len(clusters)=}")

    # order clusters by size
    clusters = sorted(clusters, key=lambda x: x.articles_count, reverse=True)

    MAIN_SUBJECTS_COUNT = 5
    SECONDARY_SUBJECTS_COUNT = 5
    # limit to 5 clusters
    main_clusters = clusters[:MAIN_SUBJECTS_COUNT]
    secondary_clusters = clusters[MAIN_SUBJECTS_COUNT:MAIN_SUBJECTS_COUNT+SECONDARY_SUBJECTS_COUNT]

    print(f"Main clusters:")
    for i, cluster in enumerate(main_clusters, start=1):
        print(f"- {i}. {cluster.title}")
    print("Secondary clusters")
    for i, cluster in enumerate(secondary_clusters, start=1):
        print(f"- {i}. {cluster.title}")
    


    print('\n\n')

session.id=ObjectId('6697a91d6720e5b25ef7946b')
len(clusters)=73
Main clusters:
- 1. SK Hynix's $75 Billion Investment Focuses on AI-Enhanced HBM Processors
- 2. Controversy Surrounding AI Training on Web Content Sparks Legal Battles
- 3. Morgan Freeman Condemns Unauthorized AI Voice Imitations
- 4. Apple Working to Integrate AI Features into Vision Pro Headsets
- 5. States Addressing AI Skills Gap in Workforce
Secondary clusters
- 1. Apple Plans to Monetize Advanced AI Features with Subscription Model
- 2. Apple to Integrate Google's Gemini AI into iPhone for Enhanced AI Capabilities
- 3. Chinese AI Startups Flock to Singapore for Global Expansion Amid US Restrictions
- 4. Top Artificial Intelligence (AI) Stocks for Long-Term Investment
- 5. Robinhood Enhances Investing App with AI Tools Through Acquisition of Pluto Capital



session.id=ObjectId('6697ab4a6720e5b25ef794d9')
len(clusters)=152
Main clusters:
- 1. Google's AI-driven operations lead to 48% increase in carbon emissions
- 2