In [3]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.probability import FreqDist
nltk.download('punkt')
nltk.download('stopwords')


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\USER\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping tokenizers\punkt.zip.
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\USER\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping corpora\stopwords.zip.


True

In [4]:
def fetch_news(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')
    articles = soup.find_all('article')
    news_items = []

    for article in articles:
        title = article.find('h2').text
        summary = article.find('p').text
        news_items.append({'title': title, 'summary': summary})

    return news_items

In [6]:
equity_news = fetch_news('https://www.bloomberg.com/markets/stocks')
fixed_income_news = fetch_news('https://www.bloomberg.com/markets/fixed-income')
fx_news = fetch_news('https://www.bloomberg.com/fx-center')
commodity_news = fetch_news('https://www.bloomberg.com/markets/commodities')


In [7]:
def summarize_text(text):
    stop_words = set(stopwords.words('english'))
    word_tokens = word_tokenize(text)
    filtered_sentence = [w for w in word_tokens if not w.lower() in stop_words]
    fdist = FreqDist(filtered_sentence)
    common_words = fdist.most_common(10)
    summary = ' '.join([word for word, freq in common_words])
    return summary



In [8]:
def summarize_news(news_items):
    summaries = []
    for item in news_items:
        summary = summarize_text(item['summary'])
        summaries.append({'title': item['title'], 'summary': summary})
    return summaries


In [9]:

equity_summaries = summarize_news(equity_news)
fixed_income_summaries = summarize_news(fixed_income_news)
fx_summaries = summarize_news(fx_news)
commodity_summaries = summarize_news(commodity_news)

In [10]:
def create_dataframe(news_summaries, category):
    df = pd.DataFrame(news_summaries)
    df['category'] = category
    return df

equity_df = create_dataframe(equity_summaries, 'Equity')
fixed_income_df = create_dataframe(fixed_income_summaries, 'Fixed Income')
fx_df = create_dataframe(fx_summaries, 'FX')
commodity_df = create_dataframe(commodity_summaries, 'Commodity')

all_news_df = pd.concat([equity_df, fixed_income_df, fx_df, commodity_df])
all_news_df.reset_index(drop=True, inplace=True)

print(all_news_df)


Empty DataFrame
Columns: [category]
Index: []
