# MVP

In [8]:
import requests
import bs4
import selenium
from datetime import datetime
import calendar
from typing import List, Dict, Union, Optional
from pprint import pprint
import time
from typing_extensions import TypedDict
import os

In [9]:
class ArticleInfo(TypedDict):
    date_time: datetime
    category: str
    title: str
    url: str
    
class ArticleInfoLeadText(TypedDict):
    date_time: datetime
    category: str
    title: str
    url: str
    lead_text: str

class ArticleInfoContent(TypedDict):
    date_time: datetime
    category: str
    title: str
    url: str
    lead_text: str
    content: str

class ArticleSummary(TypedDict):
    date_time: datetime
    category: str
    title: str
    url: str
    lead_text: str
    content: str
    summary: str
    bullet_points: str

In [10]:
# Get date
def get_date(time):
    date_format = time.attrs['publish-date-format']
    date = time.text.split('Published')[1].strip()
    month = date.split(' ')[1]
    month_number = list(calendar.month_name).index(month)
    if month_number < 10:
        month_number = '0' + str(month_number)
    date = date.replace(month, str(month_number)).replace(' ', '-')
    date_format = date_format.replace('D', '%d').replace('MMM', '%m').replace('YYYY', '%Y').replace('HH', '%H').replace('mm', '%M').replace(' ', '-')
    datetime_object = datetime.strptime(date[:-4], date_format)
    return datetime_object

# Get category
def get_category(card):
    category = card.find('a').text.strip()
    return category

# Get title and href
def get_title_link(div):
    title = div.find('a').text.strip()
    href = div.find('a').get('href')
    link = 'https://www.tradewindsnews.com/' + href
    return title, link

In [11]:
def get_articles_with_info(verbose=False, hours_ago=8) -> List[ArticleInfo]:
    response = requests.get('https://www.tradewindsnews.com/latest')
    HTML = response.text
    soup = bs4.BeautifulSoup(HTML, 'html.parser')

    divs = soup.findAll('div', {'class': 'mb-auto'})
    cards = soup.findAll('div', {'class': 'card-body'})
    times = soup.findAll('span', {'class': 'published-at'})
    
    articles = []

    for i, (div, card, time) in enumerate(zip(divs, cards, times)):
        title, link = get_title_link(div)
        category = get_category(card)
        date = get_date(time)
        
        # Only get articles from the last 2 hours       # 7200 / 60 / 60
        seconds_per_hour = 60*60
        hours = hours_ago
        if (datetime.now() - date).seconds > hours*seconds_per_hour:
            if verbose:
                print(f'No more articles from the front page to show from the last {hours} hours ({i+1}/{len(divs)})\n')
            break
        
        article = ArticleInfo(
            date_time=date,
            category=category,
            title=title,
            url=link
            )
        articles.append(article)
        
        if verbose:
            print(f'{date} - {category} - {title}')
    
    return articles

In [19]:
articles = get_articles_with_info(verbose=True, hours_ago=24)

2024-10-18 21:16:00 - Containers - Houthis claim to target Maersk-chartered Costamare ship off Oman
2024-10-18 17:48:00 - Law - Seacor Marine takes broker to court after charterer’s bankrtupcy
2024-10-18 16:14:00 - Tankers - LR2s surge 36% in four days as Middle East product fixtures show ‘signs of recovery’
2024-10-18 15:43:00 - Tankers - The Scorpio fixture that wasn’t: Someone makes big money on an STI ship, but with a twist
2024-10-18 15:18:00 - Opinion - Wavelength podcast: US and UK increase sanctions pressure on Iran and Russia
2024-10-18 15:15:00 - Containers - Container freight rates slide for 12th week in succession
2024-10-18 15:10:00 - Law - Arrest warrant issued after Greek dockworkers block Israel-bound ammunition cargo
2024-10-18 13:27:00 - Shipyards - Hengli Heavy’s billionaire owners go for Shanghai listing through reverse takeover
2024-10-18 12:41:00 - Insurance - Why some shipowners are staring down the barrel of insurance hikes next year
2024-10-18 11:38:00 - Tanker

In [122]:
def get_articles_with_lead_text(articles: List[ArticleInfo]) -> List[ArticleInfoLeadText]:
    articles_with_lead_text = []
    for article in articles:
        time.sleep(0.5)
        response = requests.get(article['url'])
        HTML = response.text
        soup = bs4.BeautifulSoup(HTML, 'html.parser')

        divs = soup.findAll('p', {'class': 'fs-lg mb-4 article-lead-text'})
        article_with_lead_text = ArticleInfoLeadText(
            date_time=article['date_time'],
            category=article['category'],
            title=article['title'],
            url=article['url'],
            lead_text=divs[0].text  # Added this line
            )
        articles_with_lead_text.append(article_with_lead_text)
    return articles_with_lead_text

In [123]:
articles_test = articles
pprint(articles_test[0])
articles_test: List[ArticleInfoLeadText] = get_articles_with_lead_text(articles_test)
pprint(articles_test[0])

{'category': 'Containers',
 'date_time': datetime.datetime(2024, 10, 18, 21, 16),
 'title': 'Houthis claim to target Maersk-chartered Costamare ship off Oman',
 'url': 'https://www.tradewindsnews.com//containers/houthis-claim-to-target-maersk-chartered-costamare-ship-off-oman/2-1-1726937'}
{'category': 'Containers',
 'date_time': datetime.datetime(2024, 10, 18, 21, 16),
 'lead_text': 'Military forces have not independently confirmed targeting of '
              'vessel in Arabian Sea',
 'title': 'Houthis claim to target Maersk-chartered Costamare ship off Oman',
 'url': 'https://www.tradewindsnews.com//containers/houthis-claim-to-target-maersk-chartered-costamare-ship-off-oman/2-1-1726937'}


In [124]:
for article_test in articles_test:
    print(article_test['lead_text'])

Military forces have not independently confirmed targeting of vessel in Arabian Sea


### User Input

In [58]:
def get_articles_to_read(verbose=False, hours_ago=24) -> List[ArticleInfoLeadText]:
    articles = get_articles_with_info(verbose=False, hours_ago=hours_ago)
    articles = get_articles_with_lead_text(articles)

    if verbose:
        print("### Articles ###")
        for i, article in enumerate(articles):
            print(f"Article {i+1}/{len(articles)}")
            print(f"Date: {article['date_time'].strftime('%Y-%m-%d %H:%M:%S')} - Category: {article['category']}")
            print(f"- {article['title']}")
            print(f"- {article["lead_text"]}")
            print()
        
    user_input = input("Which article would you like to read? (ex. 1,3,4): ")
    articles_to_read = [int(i) for i in user_input.split(',')]
    articles_to_read = [articles[i-1] for i in articles_to_read]

    if verbose:
        print("### Articles to read ###")
        for i, article in enumerate(articles_to_read):
            print(f"Article {i+1}/{len(articles_to_read)}")
            print(f"Date: {article['date_time'].strftime('%Y-%m-%d %H:%M:%S')} - Category: {article['category']}")
            print(f"- {article['title']}")
            print(f"- {article["lead_text"]}")
            print()
            
    return articles_to_read

In [59]:
articles_to_read = get_articles_to_read(verbose=True, hours_ago=24)

### Articles ###
Article 1/20
Date: 2024-10-18 21:16:00 - Category: Containers
- Houthis claim to target Maersk-chartered Costamare ship off Oman
- Military forces have not independently confirmed targeting of vessel in Arabian Sea

Article 2/20
Date: 2024-10-18 17:48:00 - Category: Law
- Seacor Marine takes broker to court after charterer’s bankrtupcy
- Offshore vessel owner says it should be paid some of the $13m that GOL received from the Cox Operating Chapter 11 case

Article 3/20
Date: 2024-10-18 16:14:00 - Category: Tankers
- LR2s surge 36% in four days as Middle East product fixtures show ‘signs of recovery’
- Smaller LR1s also join in the rate recovery 

Article 4/20
Date: 2024-10-18 15:43:00 - Category: Tankers
- The Scorpio fixture that wasn’t: Someone makes big money on an STI ship, but with a twist
- New York-listed owner is not in period-cover mode, but others are tapping a strong LR2 market

Article 5/20
Date: 2024-10-18 15:18:00 - Category: Opinion
- Wavelength podcast: 

### Login

In [4]:
response = requests.get("https://www.tradewindsnews.com/auth/user/login?target=%2F")
HTML = response.text
soup = bs4.BeautifulSoup(HTML, 'html.parser')

divs = soup.findAll('div', {'class': 'input-field-wrapper d-flex'})
print(divs)

[<div class="input-field-wrapper d-flex" data-v-43684e78=""><input autocomplete="" data-v-43684e78="" name="username" placeholder="Your email address" value=""/> <div class="text-field-loader" data-v-523a1ff1="" style="display:none;"><svg aria-hidden="true" class="svg-inline--fa fa-spinner fa-w-16 fa-spin fa-pulse fa-1x" data-icon="spinner" data-prefix="fas" data-v-523a1ff1="" focusable="false" role="img" viewbox="0 0 512 512" xmlns="http://www.w3.org/2000/svg"><path d="M304 48c0 26.51-21.49 48-48 48s-48-21.49-48-48 21.49-48 48-48 48 21.49 48 48zm-48 368c-26.51 0-48 21.49-48 48s21.49 48 48 48 48-21.49 48-48-21.49-48-48-48zm208-208c-26.51 0-48 21.49-48 48s21.49 48 48 48 48-21.49 48-48-21.49-48-48-48zM96 256c0-26.51-21.49-48-48-48S0 229.49 0 256s21.49 48 48 48 48-21.49 48-48zm12.922 99.078c-26.51 0-48 21.49-48 48s21.49 48 48 48 48-21.49 48-48c0-26.509-21.491-48-48-48zm294.156 0c-26.51 0-48 21.49-48 48s21.49 48 48 48 48-21.49 48-48c0-26.509-21.49-48-48-48zM108.922 60.922c-26.51 0-48 21.49

In [125]:
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
import time

In [9]:
# Credentials
email = "henrrb@stud.ntnu.no"
password = "WebScrapingCarnegie1"

test_article_url = "https://www.tradewindsnews.com/containers/houthis-claim-to-target-maersk-chartered-costamare-ship-off-oman/2-1-1726937"
test_article_title = "Houthis claim to target Maersk-chartered Costamare ship off Oman"

# Open browser
driver = webdriver.Chrome()
driver.get("https://www.tradewindsnews.com/auth/user/login?target=%2F")

# Login
time.sleep(2)
driver.find_element(By.CSS_SELECTOR, "#app > div.auth > div > div.form-wrapper > div > div > form > div:nth-child(1) > span:nth-child(1) > div > div.input-field-wrapper.d-flex > input").click()
time.sleep(1)
driver.find_element(By.CSS_SELECTOR, "#app > div.auth > div > div.form-wrapper > div > div > form > div:nth-child(1) > span:nth-child(1) > div > div.input-field-wrapper.d-flex > input").send_keys(email)
time.sleep(2)
driver.find_element(By.CSS_SELECTOR, "#app > div.auth > div > div.form-wrapper > div > div > form > div:nth-child(1) > span:nth-child(2) > div > div.input-field-wrapper.d-flex > input[type=password]").click()
time.sleep(1)
driver.find_element(By.CSS_SELECTOR, "#app > div.auth > div > div.form-wrapper > div > div > form > div:nth-child(1) > span:nth-child(2) > div > div.input-field-wrapper.d-flex > input[type=password]").send_keys(password)
time.sleep(2)
driver.find_element(By.CSS_SELECTOR, "#app > div.auth > div > div.form-wrapper > div > div > form > div:nth-child(2) > div.mt-0 > div > div.loading-button > button").click()
time.sleep(5)

# Close Cookie-disclaimer
driver.find_element(By.CSS_SELECTOR, "#onetrust-pc-btn-handler").click()
time.sleep(2)
driver.find_element(By.CSS_SELECTOR, "#onetrust-pc-sdk > div > div.ot-pc-footer.ot-pc-scrollbar > div.ot-btn-container > button.ot-pc-refuse-all-handler").click()
time.sleep(1)
# driver.find_element(By.ID, "interactive-close-button").click()    # NOTE: Might be necessary to close the cookie-disclaimer. Usually it does not show up (might wait for user to be idle for a while)
# time.sleep(2)
    
driver.get(test_article_url)
time.sleep(4)
# # TOOD: Turn this into a function and call it for each article (from "Latest" scraping)
# # NOTE: Might be able to skip this and just use the URL from the article (from "Latest" scraping)
# # Search for article
# driver.find_element(By.CSS_SELECTOR, "#app > div > div.top-bar > nav > header > div > div.d-flex.justify-content-between.align-items-center > div > div > div.d-none.d-lg-block.action-menu-item.ml-4 > button").click()
# time.sleep(2)
# driver.find_element(By.CSS_SELECTOR, "#app > div > div.top-bar > nav > header > div > div.d-flex.justify-content-between.align-items-center > div > div > div.d-none.d-lg-block.action-menu-item.ml-4 > div > div > div > form > input.form-control").click()
# driver.find_element(By.CSS_SELECTOR, "#app > div > div.top-bar > nav > header > div > div.d-flex.justify-content-between.align-items-center > div > div > div.d-none.d-lg-block.action-menu-item.ml-4 > div > div > div > form > input.form-control").send_keys(test_article_title)
# time.sleep(1)
# driver.find_element(By.CSS_SELECTOR, "#app > div > div.top-bar > nav > header > div > div.d-flex.justify-content-between.align-items-center > div > div > div.d-none.d-lg-block.action-menu-item.ml-4 > div > div > div > form > button").click()
# time.sleep(5)

# # Open article
# driver.find_element(By.CSS_SELECTOR, "#app > div > div.container-fluid.tradewinds.archive.page-archive > div:nth-child(3) > div > div:nth-child(2) > div.col-12.col-md-9 > div:nth-child(3) > div > div > div > div.teaser-body.clearfix > div > div.col-sm-9 > div > div.mb-auto > h2 > a").click()
# time.sleep(5)

# Get article
article_data = {
    'url': driver.current_url,
    'content': bs4.BeautifulSoup(driver.page_source, 'html.parser')
}


In [10]:
print(article_data['url'])

https://www.tradewindsnews.com/containers/houthis-claim-to-target-maersk-chartered-costamare-ship-off-oman/2-1-1726937


In [11]:
paragraphs = article_data['content'].select('#app > div > div.container-fluid.tradewinds.articlepage.page-articlepage > div.wrapper > div > div:nth-child(2) > div:nth-child(1) > div.article-center-column.col-12.col-md-9.col-lg-6 > div.article-body > div')[0].select('p')
text = ""
for paragraph in paragraphs:
    text += paragraph.text + "\n"

print(text)
# print(article_data['content'].select('#app > div > div.container-fluid.tradewinds.articlepage.page-articlepage > div.wrapper > div > div:nth-child(2) > div:nth-child(1) > div.article-center-column.col-12.col-md-9.col-lg-6 > div.article-body > div')[0].select('p'))

The Houthis claimed to have targeted a container ship in the fleet of AP Moller-Maersk while it was off the coast of Oman.
The group’s military arm, describing itself as the Yemen’s armed forces, said it carried out an operation that took aim at the 4,957-teu Megalopolis (built 2013).
The container ship is owned by US-listed tonnage provider Costamare.
The shipowner and Maersk, the Danish liner giant, could not be immediately reached for confirmation of the incident.
Houthi armed forces spokesman Yahya Saree announced the operation on the group’s media website and in a post on X.
He said a number of drones targeted the Megalopolis while it was in the Arabian Sea.
“The operation has successfully achieved its objectives,” he said. 
Tracking data from VesselsValue shows that, as of an hour of this writing, the Megalopolis was off the Omani port of Salalah. Its location transponder was broadcasting that it was under way, but it was barely moving a 0.4 knots.
Tracking data shows that the sh

### Full Pipeline

In [140]:
def login_to_trade_winds(driver: webdriver.Chrome, email: str, password: str) -> webdriver.Chrome:
    
    # Login Page
    driver.get("https://www.tradewindsnews.com/auth/user/login?target=%2F")

    # Login
    time.sleep(2)
    driver.find_element(By.CSS_SELECTOR, "#app > div.auth > div > div.form-wrapper > div > div > form > div:nth-child(1) > span:nth-child(1) > div > div.input-field-wrapper.d-flex > input").click()
    time.sleep(1)
    for character in email:
        driver.find_element(By.CSS_SELECTOR, "#app > div.auth > div > div.form-wrapper > div > div > form > div:nth-child(1) > span:nth-child(1) > div > div.input-field-wrapper.d-flex > input").send_keys(character)
        time.sleep(0.1)
    time.sleep(2)
    driver.find_element(By.CSS_SELECTOR, "#app > div.auth > div > div.form-wrapper > div > div > form > div:nth-child(1) > span:nth-child(2) > div > div.input-field-wrapper.d-flex > input[type=password]").click()
    time.sleep(1)
    for character in password:
        driver.find_element(By.CSS_SELECTOR, "#app > div.auth > div > div.form-wrapper > div > div > form > div:nth-child(1) > span:nth-child(2) > div > div.input-field-wrapper.d-flex > input[type=password]").send_keys(character)
        time.sleep(0.1)
    time.sleep(2)
    try: # Sometimes the "Accept TOS" checkbox needs to be checked
        driver.find_element(By.CSS_SELECTOR, "#app > div.auth > div > div.form-wrapper > div > div > form > div:nth-child(2) > div.mt-0 > div > div.mt-0.mb-0 > div > div:nth-child(1)").click()
        time.sleep(1)
    except:
        pass
    driver.find_element(By.CSS_SELECTOR, "#app > div.auth > div > div.form-wrapper > div > div > form > div:nth-child(2) > div.mt-0 > div > div.loading-button > button").click()
    time.sleep(5)

    # Close Cookie-disclaimer
    driver.find_element(By.CSS_SELECTOR, "#onetrust-pc-btn-handler").click()
    time.sleep(2)
    driver.find_element(By.CSS_SELECTOR, "#onetrust-pc-sdk > div > div.ot-pc-footer.ot-pc-scrollbar > div.ot-btn-container > button.ot-pc-refuse-all-handler").click()
    time.sleep(1)

    return driver

def logout_trade_winds(driver: webdriver.Chrome) -> webdriver.Chrome:
    
    driver.find_element(By.CSS_SELECTOR, "#app > div > div.top-bar > nav > header > div > div.d-flex.justify-content-between.align-items-center > div > div > div:nth-child(2) > button").click()
    time.sleep(2)
    driver.find_element(By.CSS_SELECTOR, "#app > div > div.top-bar > nav > header > div > div.d-flex.justify-content-between.align-items-center > div > div > div:nth-child(2) > div > div > div > div > div:nth-child(2) > a").click()
    time.sleep(4)
    
    return driver

def get_articles_with_content_from_trade_winds(articles: List[ArticleInfoLeadText]) -> List[ArticleInfoContent]:
    articles_with_content = []

    email = os.getenv('TRADEWINDS_EMAIL')
    password = os.getenv('TRADEWINDS_PASSWORD')
    
    driver = webdriver.Chrome()
    driver = login_to_trade_winds(driver, email, password)
    for article in articles:
        
        # Get article content
        driver.get(article['url'])
        time.sleep(4)
        page_source = bs4.BeautifulSoup(driver.page_source, 'html.parser')
        paragraphs = page_source.select('#app > div > div.container-fluid.tradewinds.articlepage.page-articlepage > div.wrapper > div > div:nth-child(2) > div:nth-child(1) > div.article-center-column.col-12.col-md-9.col-lg-6 > div.article-body > div')[0].select('p')
        content = ""
        for paragraph in paragraphs:
            content += paragraph.text + "\n"
        
        article_with_content = ArticleInfoContent(
            date_time=article['date_time'],
            category=article['category'],
            title=article['title'],
            url=article['url'],
            lead_text=article['lead_text'],
            content=content
            )
        articles_with_content.append(article_with_content)
        time.sleep(0.5)
        
    driver = logout_trade_winds(driver)
    driver.quit()
    
    return articles_with_content

In [62]:
def get_articles_from_tradewinds(hours_ago: int = 24) -> List[ArticleInfoContent]:
    
    articles: List[ArticleInfo] = get_articles_with_info(hours_ago=hours_ago)
    articles: List[ArticleInfoLeadText] = get_articles_with_lead_text(articles)
    articles: List[ArticleInfoContent] = get_articles_with_content_from_trade_winds(articles)
    
    return articles

In [141]:
articles_from_testing = get_articles_with_content_from_trade_winds(articles_test)

henrik.raaen.bo.trading@gmail.com
WebScrapingCarnegie1


In [64]:
articles_to_read = get_articles_from_tradewinds(hours_ago=24)

In [65]:
print(len(articles_to_read))

20


In [66]:
for i, article in enumerate(articles_to_read):
    print(f"{i}: {article['title']}")

0: Houthis claim to target Maersk-chartered Costamare ship off Oman
1: Seacor Marine takes broker to court after charterer’s bankrtupcy
2: LR2s surge 36% in four days as Middle East product fixtures show ‘signs of recovery’
3: The Scorpio fixture that wasn’t: Someone makes big money on an STI ship, but with a twist
4: Wavelength podcast: US and UK increase sanctions pressure on Iran and Russia
5: Container freight rates slide for 12th week in succession
6: Arrest warrant issued after Greek dockworkers block Israel-bound ammunition cargo
7: Hengli Heavy’s billionaire owners go for Shanghai listing through reverse takeover
8: Why some shipowners are staring down the barrel of insurance hikes next year
9: Tankers International’s maritime walk raises £10,000 for Mercy Ships
10: Singapore Shipping Association hires logistics veteran to replace long-time chief Michael Phoon
11: Seatrade bags $150m from sale of container ship quintet to CMA CGM
12: Investment manager Basalt Partners kicks off

### Generate Summaries

In [67]:
from dotenv import load_dotenv
from openai import OpenAI

In [69]:
class MessageDict(TypedDict):
    role: str
    content: str

class ChoiceDict(TypedDict):
    index: int
    message: MessageDict
    logprobs: Optional[None]
    finish_reason: str

class UsageDetailsDict(TypedDict):
    reasoning_tokens: int

class UsageDict(TypedDict):
    prompt_tokens: int
    completion_tokens: int
    total_tokens: int
    completion_tokens_details: UsageDetailsDict

class ChatCompletionResponse(TypedDict):
    id: str
    object: str
    created: int
    model: str
    system_fingerprint: str
    choices: List[ChoiceDict]
    usage: UsageDict

In [91]:
def get_client():
    client = OpenAI()
    return client

def get_completion(client: OpenAI, model: str, messages: List[Dict[str, str]]) -> ChatCompletionResponse:
    try:
        completion: ChatCompletionResponse = client.chat.completions.create(
            model=model,
            messages=messages
        )
        return completion
    except Exception as e:
        print(e)
        return None

def get_summary(client: OpenAI, article: ArticleInfoContent) -> str:
    model = "gpt-4o-mini"
    messages = [
        {"role": "system", "content": "You are a ship broker and an expert at providing concise and information dense summaries of news articles."},
        {"role": "system", "content": "I will provide you with the title, lead text, and content from a news article. I want you to summarize the content in a few sentences. Respond only with the summary and a potential analysis and opinion from the author."},
        # {"role": "system", "content": f"I will provide you with a source to the article. The source is <source>{article['url']}</source>. Add the source at the start of the summary with the following markdown format: 'Source: [<title>{article['title']}</title>]({article['url']})'"},
        {"role": "system", "content": "Only use information from the news article, and do not hallucinate."},
        {"role": "system", "content": "The summary should focus on key events, key entities, key numbers, and the authors analysis and opinion."},
        {"role": "system", "content": "Write the summary first. If the authors analysis and opinion exist, add a newline, the text 'Authors Analysis:' in bold, and summarize the analysis and opinion as well in 1-2 sentences. If it does not exist, do not add this part."},
        {"role": "user", "content": f"<title>{article['content']}</title>"},
        {"role": "user", "content": f"<lead text>{article['lead_text']}</lead text>"},
        {"role": "user", "content": f"< content>{article['content']}</content>"}
    ]
    completion = get_completion(client, model, messages)
    summary = completion.choices[0].message.content
    return summary

def get_bullet_points(client: OpenAI, article: ArticleInfoContent) -> str:
    model = "gpt-4o-mini"
    messages = [
        {"role": "system", "content": "You are a ship broker and an expert at providing concise and informative bullet points of news articles."},
        {"role": "system", "content": "I will provide you with the title, lead text, and content from a news article. I want you to summarize the lead text and content with two bullet points. Respond only with the bullet points."},
        # {"role": "system", "content": f"I will provide you with a source to the article. The source is <source>{article['url']}</source>. Add the source at the start of the summary with the following markdown format: 'Source: [<title>{article['title']}</title>]({article['url']})'"},
        {"role": "system", "content": "Only use information from the news article, and do not hallucinate."},
        {"role": "system", "content": "The bullet points should focus on key events, key entities, key numbers, and the authors analysis and opinion."},
        {"role": "system", "content": "The bullet points should give a ship broker the information they need to understand what the article is about, and what insights they gain from reading it."},
        {"role": "user", "content": f"<title>{article['content']}</title>"},
        {"role": "user", "content": f"<lead text>{article['lead_text']}</lead text>"},
        {"role": "user", "content": f"< content>{article['content']}</content>"}
    ]
    completion = get_completion(client, model, messages)
    bullet_points = completion.choices[0].message.content
    return bullet_points

def get_articles_with_summaries_and_bullet_points(articles: List[ArticleInfoContent]) -> List[ArticleSummary]:
    articles_with_summaries_and_bullet_points = []
    client = get_client()
    for article in articles:
        summary = get_summary(client, article)
        bullet_points = get_bullet_points(client, article)
        articles_with_summary_and_bullet_points = ArticleSummary(
            date_time=article['date_time'],
            category=article['category'],
            title=article['title'],
            url=article['url'],
            lead_text=article['lead_text'],
            content=article['content'],
            summary=summary,
            bullet_points=bullet_points
        )
        articles_with_summaries_and_bullet_points.append(articles_with_summary_and_bullet_points)
    return articles_with_summaries_and_bullet_points

In [92]:
articles_with_summaries_and_bullet_points = get_articles_with_summaries_and_bullet_points(articles_to_read[:3])

In [98]:
for article in articles_with_summaries_and_bullet_points:
    print(f"Title: {article['title']}")
    print(f"{article['bullet_points']}")
    print()

Title: Houthis claim to target Maersk-chartered Costamare ship off Oman
- The Houthis claimed to have targeted the 4,957-teu container ship Megalopolis, allegedly due to its owner's violation of a ban on docking at Israeli ports, while military forces in the region have not confirmed the incident.
- The Megalopolis, owned by Costamare and insured by the Swedish Club, was reportedly moving at a slow 0.4 knots off the coast of Oman, and tracking data shows it took a lengthy route to the Arabian Sea, avoiding the Suez Canal.

Title: Seacor Marine takes broker to court after charterer’s bankrtupcy
- The Houthi military claimed to have targeted the 4,957-teu container ship Megalopolis off the coast of Oman, alleging it was in response to the shipowner's supposed violation of a ban on Israeli port calls, despite tracking data indicating it hadn't called in Israel. 
- The Megalopolis, owned by Costamare and insured by the Swedish Club, was reported stationary near Salalah, prompting skepticis

In [99]:
for article in articles_with_summaries_and_bullet_points:
    print(f"Title: {article['title']}")
    print(f"Summary: {article['summary']}")
    print()

Title: Houthis claim to target Maersk-chartered Costamare ship off Oman
Summary: The Houthis have claimed responsibility for targeting the container ship Megalopolis, owned by US-listed Costamare and part of the AP Moller-Maersk fleet, while it was near Oman. Houthi spokesperson Yahya Saree asserted that the operation involved drones and achieved its objectives, although independent verification of the incident is lacking. Tracking data indicates that the ship was moving very slowly off the Omani port of Salalah after a long route from the Mediterranean, circumventing the Suez Canal. Saree cited the act as retaliation due to the ship's alleged violation of a ban on calling at Israeli ports, a claim disputed by location data. Furthermore, he expressed solidarity with Hezbollah and Hamas amid ongoing regional tensions.

**Authors Analysis:** The author suggests a possibility of exaggeration or misinformation by the Houthis, noting their history of uncorroborated claims regarding maritime

In [None]:
def get_articles_from_tradewinds(hours_ago: int = 24) -> List[ArticleSummary]:
    
    load_dotenv()
    articles: List[ArticleInfo] = get_articles_with_info(hours_ago=hours_ago)
    articles: List[ArticleInfoLeadText] = get_articles_with_lead_text(articles)
    articles: List[ArticleInfoContent] = get_articles_with_content_from_trade_winds(articles)
    articles: List[ArticleSummary] = get_articles_with_summaries_and_bullet_points(articles)
    
    return articles

In [143]:
test_summary_articles = get_articles_with_summaries_and_bullet_points(articles_from_testing)