In [174]:
import requests
import json
from bs4 import BeautifulSoup
from datetime import datetime
from models import Article
import re

In [175]:
# The possible prefix of the URL on fool.com
prefixes_fool = [r'https://www.fool.com/quote/nasdaq/',
             r'https://www.fool.com/quote/nyse/']

headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.83 Safari/537.36 Edg/85.0.564.44'
}

TICKET = 'riot'

In [176]:
def make_request(url, headers):
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.text, 'html.parser')
    return soup

In [177]:
def get_fool_news_links(url, headers):

    soup = make_request(url, headers)
    
    # get the section with the 2 articles just below "News & Analysis"
    articles = soup.select('.flex.flex-col')
    
    if not articles:
        return None
    
    # get all the links, the "select" above returns a list
    articles = articles[0].find_all('a', href=True)
    
    links = []
    prefix = r'https://www.fool.com/'
    
    for article in articles:
        links.append(prefix + article['href'])
        
    return links
    

In [181]:
def fool_get_article_body(soup):    
    article_body = soup.find(class_='article-body')
    
    if not article_body:
        return None
    
    paragraphs = article_body.find_all('p')
    text_content = '\n'.join(paragraph.get_text(strip=True) for paragraph in paragraphs)
    
    return text_content

def extract_date_from_url(url: str) -> datetime:
    # Step 1: Define a regular expression pattern to match the date in the URL
    pattern = r'/(\d{4})/(\d{2})/(\d{2})/'  # Matches /YYYY/MM/DD/
    
    # Step 2: Search for the pattern in the URL
    match = re.search(pattern, url)
    
    if match:
        # Step 3: Extract year, month, and day from the match
        year, month, day = match.groups()
        
        # Step 4: Convert to a datetime object
        date_obj = datetime(int(year), int(month), int(day))
        return date_obj
    else:
        return None


def fool_get_article_info(url, headers, ticket):
    soup = make_request(url, headers)
    
    try:
        text_content = fool_get_article_body(soup)
    except Exception as e:
        text_content = None
        print('Something whent wrong :(. ', e)
        
    try:
        date_time_article = extract_date_from_url(url)
    except Exception as e:
        date_time_article = None
        print('Something whent wrong :(. ', e)
    
    try:
        title = soup.find('h1').text
    except Exception as e:
        title = None
        print('Something whent wrong :(. ', e)
    
    return Article(ticket=ticket, url=url, title=title, article_body=text_content, timestp=date_time_article)


In [179]:
fool_links = []

for prefix in prefixes_fool:
    try:
        url = prefix + TICKET + '/'
        
        links = get_fool_news_links(url, headers)
        
        # if the function finds no link it stops
        if links:
            fool_links = fool_links + links
    
    except Exception as e:
        print('Something whent wrong :(. ', e)
    
print(fool_links)

['https://www.fool.com//investing/2024/06/25/is-riot-platforms-a-millionaire-maker/', 'https://www.fool.com//investing/2024/06/15/is-riot-blockchain-a-buy-after-bitcoin-halving/']


In [182]:
for fool_link in fool_links:
    print(fool_get_article_info(url=fool_link, headers=headers, ticket=TICKET))

ticket='riot' url='https://www.fool.com//investing/2024/06/25/is-riot-platforms-a-millionaire-maker/' title='Is Riot Platforms a Millionaire Maker?' article_body="Riot Platforms(RIOT4.04%)became one of the world's largestBitcoin(BTC2.22%)miners during the past few years, but it's generated disappointing returns for its long-term investors. Riot actually used to be Bioptix, a developer of veterinary products, before it abruptly abandoned that business model (and name) nearly seven years ago.\nOn Oct. 4, 2017, Bioptix rebranded itself as Riot and declared it would become aBitcoin miner. It subsequently ordered thousands of Bitcoin miners from Bitmain, invested in other crypto companies, and expanded its business with more acquisitions. Riot's stock closed at $8.18 per share on the day of its rebranding, and it eventually soared to its post-rebranding high of $77.90 during the meme stock rally on Feb. 17, 2021.\nImage source: Getty Images.\nBut today, Riot's stock trades at about $9.50. A