## __BERTopic Model__

### __Import Packages__

In [1]:
import pandas as pd
import re

from bertopic import BERTopic
from umap import UMAP
from hdbscan import HDBSCAN
from sentence_transformers import SentenceTransformer
from sklearn.feature_extraction.text import CountVectorizer

from gensim.models.coherencemodel import CoherenceModel
from gensim import corpora

import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"

# Text style
BOLD = "\033[1m"
RESET = "\033[0m"

import numpy as np
import random
import torch

np.random.seed(42)
random.seed(42)
torch.manual_seed(42)

<torch._C.Generator at 0x7feacf9a45d0>

### __Load Data__

In [2]:
# Open raw CSV file
raw_df = pd.read_csv('data/raw/us_equities_news_dataset.csv')

# Drop duplicates
raw_df = raw_df.drop_duplicates(subset=['content'])

# Drop rows where 'content' column has NaNs
raw_df = raw_df.dropna(subset=['content'])

# Filter dataframe on content of either 'NVDA' OR 'NVIDIA'
raw_df = raw_df[raw_df['content'].fillna('').str.contains(r'\bnvda\b|\bnvidia\b', case=False, regex=True)]

# Print dataframe
raw_df.head(5)

Unnamed: 0,id,ticker,title,category,content,release_date,provider,url,article_id
24,221539,NIO,A Central Bank War Just Started And Its Good F...,opinion,ECB Effects\nThe move in the euro was huge fa...,2019-03-07,Michael Kramer,https://www.investing.com/analysis/a-central-b...,200395687
32,221547,NIO,6 Stocks To Watch Nivida Could Be Falling,opinion,6 Stocks To Watch March 6 Trading Session\nSt...,2019-03-06,Michael Kramer,https://www.investing.com/analysis/6-stocks-to...,200394931
57,221572,NIO,Stocks Dow Drops Nearly 400 Points as Apple ...,news,Investing com A rout in Apple and Facebook ...,2018-11-19,Investing.com,https://www.investing.com/news/stock-market-ne...,1694042
78,221593,UBER,The Zacks Analyst Blog Highlights Advanced Mi...,opinion,For Immediate ReleaseChicago IL January 13 ...,2020-01-12,Zacks Investment Research,https://www.investing.com/analysis/the-zacks-a...,200498277
82,221597,UBER,The Best Of CES 2020 Revised,opinion,With 4 500 companies bringing their innovation...,2020-01-16,Zacks Investment Research,https://www.investing.com/analysis/the-best-of...,200499164


### __Data Preparation Phase, Removing Numbers__

In [3]:
# This function removes standalone numbers as well as letters followed or proceded by numbers
# So for example: 40, Q4, 4abc

def preprocess_text(content_column):
    pattern = r'\b\w*\d+\w*\b'  
    cleaned = re.sub(pattern, '', content_column)
    cleaned = re.sub(r'\s+', ' ', cleaned).strip()
    return cleaned           

# Make a copy of raw dataframe
cleaned_df = raw_df.copy()

# Apply the cleaning function to the 'content' column
cleaned_df['cleaned_content'] = cleaned_df['content'].apply(preprocess_text)

### __Function to Calculate Coherence Score__

In [4]:
# This function is used to calculate the coherence score of the different models
# The coherence score measures the relationship between the highly repetitive words in a specific topic
# Coherence score is a scale from 0 to 1 in which a good coherence (high similarity) has a score of 1, 
# and a bad coherence (low similarity) has a score of 0 

def calculate_coherence(topic_model, articles):
    topics_info = topic_model.get_topics()

    # Remove the outlier topic (-1)
    topic_words = [[word for word, _ in topic_model.get_topic(topic)]
                   for topic in range(len(topics_info) - 1)]

    vectorizer = topic_model.vectorizer_model
    analyzer = vectorizer.build_analyzer()
    tokens = [analyzer(article) for article in articles]

    # Create dictionary and corpus for Gensim
    dictionary = corpora.Dictionary(tokens)
    corpus = [dictionary.doc2bow(token) for token in tokens]

    # Calculate Coherence using Gensim's CoherenceModel
    coherence_model = CoherenceModel(topics=topic_words,
                                     texts=tokens,
                                     corpus=corpus,
                                     dictionary=dictionary,
                                     coherence='c_v')

    return coherence_model.get_coherence()

### __Model 1, Initial Model:__

In [5]:
# Get the documents without numbers
articles = cleaned_df['cleaned_content'].tolist()

# In BERTopic we generally use a dimensionality reduction algorithm to reduce the size of the embeddings
# As a default, this is done with UMAP
# However, by default, it shows stochastic behavior which creates different results each time you run it
# To prevent this and fully reproduce the results every time we run the model, we need to set a random_state for the model
umap_model = UMAP(n_neighbors=15, n_components=5, min_dist=0.0, metric='cosine', random_state=42)

initial_topic_model = BERTopic(language='english',
                               umap_model=umap_model,
                               verbose=True)

topics, probs = initial_topic_model.fit_transform(articles)

initial_coherence_score = calculate_coherence(initial_topic_model, articles)
print(f"{BOLD}The coherence score for this model is: {initial_coherence_score} {RESET}")

2024-11-07 08:17:50,730 - BERTopic - Embedding - Transforming documents to embeddings.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Batches:   0%|          | 0/108 [00:00<?, ?it/s]

2024-11-07 08:20:02,824 - BERTopic - Embedding - Completed ✓
2024-11-07 08:20:02,825 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-11-07 08:20:26,938 - BERTopic - Dimensionality - Completed ✓
2024-11-07 08:20:26,940 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-11-07 08:20:27,078 - BERTopic - Cluster - Completed ✓
2024-11-07 08:20:27,095 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-11-07 08:20:28,482 - BERTopic - Representation - Completed ✓


[1mThe coherence score for this model is: 0.525621348758838 [0m


In [6]:
initial_topic_model.visualize_barchart()

### __Model 2, Get Sentences:__

In [7]:
# As a default, BERTopic is using sentence-transformers to embed the documents
# The embedding model works best for either sentences or paragraphs
# Meaning that whenever a document contains several paragraphs, the document is truncated 
# and the topic model is only trained on a small part of the data
# To solve this issue, we need to split longer documents into sentences

# The function cleans and splits articles, removes unwanted characters, 
# and filters out short sentences, returning only those that are longer than 5 words

def get_long_sentences(articles):
    sentences_list = []
    
    for sentence in articles:
        # Check if the sentence contains a capital letter following two spaces
        if re.search(r'  [A-Z]', sentence):
            # Replace the occurrence of two spaces followed by a capital letter with '/n'
            sentence = re.sub(r'  ([A-Z])', r'\n\1', sentence)
        
        # Split the modified text into a list of sentences using newline as a delimiter
        split_text = sentence.split('\n')
        # Filter out empty strings and clean up whitespace in each item
        filtered_list = [re.sub(r'\s+', ' ', item.strip()) for item in split_text if item]
        
        sentences_list.extend(filtered_list)
    
    # This removes standalone numbers as well as letters followed or preceded by numbers
    sentences_list_without_numbers = [re.sub(r'\b\w*\d+\w*\b', '', sentence) for sentence in sentences_list]
    
    # Remove extra spaces (including leading/trailing and multiple spaces)
    sentences_list_without_numbers = [re.sub(r'\s+', ' ', sentence).strip() for sentence in sentences_list_without_numbers]
    
    # Filter to keep only sentences longer than 5 words
    sentences_longer_than_5_words = [sentence for sentence in sentences_list_without_numbers if len(sentence.split()) > 5]
    
    return sentences_longer_than_5_words

# Example usage:
articles = cleaned_df['content'].tolist()
processed_sentences = get_long_sentences(articles)
print(processed_sentences[0])  

The move in the euro was huge falling over pips huge


In [8]:
# Pre-calculate embeddings of sentences
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
processed_sentences_embeddings = embedding_model.encode(processed_sentences, show_progress_bar=True)

# By default, BERTopic uses HDBSCAN to group documents into clusters based on similarity
# The 'min_cluster_size' parameter controls the minimum cluster size
# Setting min_cluster_size=150 ensures that only clusters with at least 150 similar documents are considered as valid topics
hdbscan_model = HDBSCAN(min_cluster_size=150, metric='euclidean', cluster_selection_method='eom', prediction_data=True)

# We use "auto" to automatically reduce the number of topics 
sentences_topic_model = BERTopic(nr_topics= "auto",
                                 embedding_model=embedding_model,
                                 umap_model=umap_model,
                                 hdbscan_model=hdbscan_model,
                                 verbose=True)

topics, probs = sentences_topic_model.fit_transform(processed_sentences, processed_sentences_embeddings)

sentences_coherence_score = calculate_coherence(sentences_topic_model, processed_sentences)
print(f"{BOLD}The coherence score for this model is: {sentences_coherence_score} {RESET}")

Batches: 100%|██████████| 3573/3573 [00:47<00:00, 75.67it/s] 
2024-11-06 21:39:24,848 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-11-06 21:41:30,392 - BERTopic - Dimensionality - Completed ✓
2024-11-06 21:41:30,393 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-11-06 21:41:38,885 - BERTopic - Cluster - Completed ✓
2024-11-06 21:41:38,886 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-11-06 21:41:39,438 - BERTopic - Representation - Completed ✓
2024-11-06 21:41:39,439 - BERTopic - Topic reduction - Reducing number of topics
2024-11-06 21:41:39,961 - BERTopic - Topic reduction - Reduced number of topics from 114 to 99


[1mThe coherence score for this model is: 0.6838556299631294 [0m


In [9]:
sentences_topic_model.visualize_barchart()

In [10]:
sentences_topic_model.get_topic_info()

Unnamed: 0,Topic,Count,Name,Representation,Representative_Docs
0,-1,53556,-1_the_and_to_of,"[the, and, to, of, in, for, is, on, year, zacks]",[With expected long term earnings per share gr...
1,0,4368,0_nvidia_gpu_gpus_gaming,"[nvidia, gpu, gpus, gaming, graphics, its, for...",[Also revenues increased year over year primar...
2,1,3501,1_rank_nasdaq_zacks_amzn,"[rank, nasdaq, zacks, amzn, carries, currently...","[The company currently has a Zacks Rank, The c..."
3,2,3238,2_earnings_cents_consensus_estimate,"[earnings, cents, consensus, estimate, revenue...",[Earnings of per share beat the Zacks Consensu...
4,3,2857,3_nyse_points_wmt_ba,"[nyse, points, wmt, ba, at, after, which, or, ...",[NYSE WMT which rose or points to trade at at ...
...,...,...,...,...,...
94,93,163,93_stoxx_ftse_dax_germany,"[stoxx, ftse, dax, germany, europe, european, ...",[In Europe the FTSE has ticked up to the DAX i...
95,94,160,94_produced_biotech_short_recommendations,"[produced, biotech, short, recommendations, do...",[Our recent biotech recommendations have produ...
96,95,157,95_cuban_famed_trillionaires_leave,"[cuban, famed, trillionaires, leave, produce, ...",[Famed investor Mark Cuban says it will produc...
97,96,157,96_spdr_select_etf_sector,"[spdr, select, etf, sector, technology, fund, ...","[The Technology Select Sector SPDR ETF, The Te..."


### __Model 3, Removing Stopwords:__

In [11]:
# Stopword removal
vectorizer_model = CountVectorizer(stop_words="english", min_df=2)

remove_stopwords_topic_model = BERTopic(nr_topics= "auto",
                                        embedding_model=embedding_model,
                                        umap_model=umap_model,
                                        hdbscan_model=hdbscan_model,
                                        vectorizer_model=vectorizer_model,
                                        verbose=True)

topics, probs = remove_stopwords_topic_model.fit_transform(processed_sentences, processed_sentences_embeddings)

remove_stopwords_coherence_score = calculate_coherence(remove_stopwords_topic_model, processed_sentences)
print(f"{BOLD}The coherence score for this model is: {remove_stopwords_coherence_score} {RESET}")

2024-11-06 21:41:49,476 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-11-06 21:43:50,183 - BERTopic - Dimensionality - Completed ✓
2024-11-06 21:43:50,184 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-11-06 21:43:56,774 - BERTopic - Cluster - Completed ✓
2024-11-06 21:43:56,775 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-11-06 21:43:57,309 - BERTopic - Representation - Completed ✓
2024-11-06 21:43:57,309 - BERTopic - Topic reduction - Reducing number of topics
2024-11-06 21:43:57,800 - BERTopic - Topic reduction - Reduced number of topics from 114 to 76


[1mThe coherence score for this model is: 0.6589698712619726 [0m


In [12]:
remove_stopwords_topic_model.visualize_barchart()

In [13]:
remove_stopwords_topic_model.get_topic_info()

Unnamed: 0,Topic,Count,Name,Representation,Representative_Docs
0,-1,53422,-1_year_zacks_earnings_company,"[year, zacks, earnings, company, stocks, growt...",[The iPhone maker is expected to report adjust...
1,0,22514,0_nvidia_amd_nasdaq_earnings,"[nvidia, amd, nasdaq, earnings, zacks, intel, ...",[NASDAQ AMD as well as Intel has surged more t...
2,1,3372,1_chart_resistance_book_week,"[chart, resistance, book, week, support, level...","[It s right up against key resistance, Twelve ..."
3,2,2817,2_nyse_points_wmt_ba,"[nyse, points, wmt, ba, cat, close, ge, trade,...","[NYSE NUE which was down to at the close, NYSE..."
4,3,2419,3_china_trade_tariffs_trump,"[china, trade, tariffs, trump, chinese, war, p...",[China trade war Markets have bounced back on ...
...,...,...,...,...,...
71,70,162,70_stoxx_ftse_germany_europe,"[stoxx, ftse, germany, europe, european, cac, ...",[In Europe the FTSE has ticked up to the DAX i...
72,71,162,71_hp_dell_packard_hewlett,"[hp, dell, packard, hewlett, lenovo, enterpris...","[Dell and HP are the revenue drivers here, Del..."
73,72,156,72_spdr_select_etf_sector,"[spdr, select, etf, sector, technology, fund, ...","[The Technology Select Sector SPDR ETF, The Te..."
74,73,156,73_goldman_bank_america_banks,"[goldman, bank, america, banks, sachs, morgan,...","[Global Markets Inc and Goldman Sachs, Bank st..."


In [14]:
remove_stopwords_topic_model.get_representative_docs(72)

['The Technology Select Sector SPDR ETF',
 'The Technology Select Sector SPDR ETF',
 'The Technology Select Sector SPDR ETF']

### __Model 4, Removing Duplicates__

In [None]:
# As we can see above, there are duplicate sentences, we want to remove these

# Function to remove duplicates
def remove_duplicates(input_list):
    return list(set(input_list))

# Remove the duplicates
sentences_without_duplicates = remove_duplicates(processed_sentences)

# Remove empty strings
sentences_without_duplicates = [item for item in sentences_without_duplicates if item]

# Pre-calculate embeddings of sentences
sentences_without_dup_embeddings = embedding_model.encode(sentences_without_duplicates, show_progress_bar=True)

remove_dup_topic_model = BERTopic(nr_topics= "auto",
                                  embedding_model=embedding_model,
                                  umap_model=umap_model,
                                  hdbscan_model=hdbscan_model,
                                  vectorizer_model=vectorizer_model,
                                  verbose=True)

topics, probs = remove_dup_topic_model.fit_transform(sentences_without_duplicates, sentences_without_dup_embeddings)
remove_dup_coherence_score = calculate_coherence(remove_dup_topic_model, sentences_without_duplicates)
print(f"{BOLD}The coherence score for this model is: {remove_dup_coherence_score} {RESET}")

Batches: 100%|██████████| 2911/2911 [00:36<00:00, 78.89it/s] 
2024-11-06 21:44:43,360 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-11-06 21:45:38,791 - BERTopic - Dimensionality - Completed ✓
2024-11-06 21:45:38,792 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-11-06 21:45:43,176 - BERTopic - Cluster - Completed ✓
2024-11-06 21:45:43,177 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-11-06 21:45:43,640 - BERTopic - Representation - Completed ✓
2024-11-06 21:45:43,640 - BERTopic - Topic reduction - Reducing number of topics
2024-11-06 21:45:44,070 - BERTopic - Topic reduction - Reduced number of topics from 83 to 36


[1mThe coherence score for this model is: 0.6552349388694452 [0m


In [16]:
remove_dup_topic_model.visualize_barchart()

In [17]:
remove_dup_topic_model.get_representative_docs(1)

['I just want to share the impact his book had on me when I first read it in',
 'And I just found the next book I m reading to learn more about it',
 'Considering our analysis on the weekly chart below the current bounce came from support however the daily chart shows a breakdown']

### __Model 5, Removing Single Letters:__

In [18]:
# As we can see above, there are single letters in the sentences since the data does not contain apostrophes 
# These add some noise, so we want to remove them

# Remove single letters (words of length 1) from each sentence
sentences_without_single_letters = [re.sub(r'\b\w\b', '', sentence).strip() for sentence in sentences_without_duplicates]

# Clean up extra spaces left after removal
sentences_without_single_letters = [re.sub(r'\s+', ' ', sentence).strip() for sentence in sentences_without_single_letters]

# Remove empty strings
sentences_without_single_letters = [sentence for sentence in sentences_without_single_letters if sentence]

# Pre-calculate embeddings of sentences
sentences_without_single_letters_embeddings = embedding_model.encode(sentences_without_single_letters, show_progress_bar=True)

remove_single_letters_topic_model = BERTopic(nr_topics= "auto",
                                             embedding_model=embedding_model,
                                             umap_model=umap_model,
                                             hdbscan_model=hdbscan_model,
                                             vectorizer_model=vectorizer_model,
                                             verbose=True)

topics, probs = remove_single_letters_topic_model.fit_transform(sentences_without_single_letters, 
                                                                sentences_without_single_letters_embeddings)
remove_single_letters_coherence_score = calculate_coherence(remove_single_letters_topic_model, sentences_without_single_letters)
print(f"{BOLD}The coherence score for this model is: {remove_single_letters_coherence_score} {RESET}")

Batches: 100%|██████████| 2911/2911 [00:36<00:00, 79.82it/s] 
2024-11-06 21:46:28,412 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-11-06 21:47:23,530 - BERTopic - Dimensionality - Completed ✓
2024-11-06 21:47:23,532 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-11-06 21:47:28,012 - BERTopic - Cluster - Completed ✓
2024-11-06 21:47:28,013 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-11-06 21:47:28,509 - BERTopic - Representation - Completed ✓
2024-11-06 21:47:28,510 - BERTopic - Topic reduction - Reducing number of topics
2024-11-06 21:47:28,978 - BERTopic - Topic reduction - Reduced number of topics from 87 to 62


[1mThe coherence score for this model is: 0.651089480775198 [0m


In [19]:
remove_single_letters_topic_model.get_topic_info()

Unnamed: 0,Topic,Count,Name,Representation,Representative_Docs
0,-1,43467,-1_earnings_year_company_stocks,"[earnings, year, company, stocks, growth, zack...",[Nvidia announced strong quarter across every ...
1,0,5103,0_amd_intel_rank_nasdaq,"[amd, intel, rank, nasdaq, zacks, amzn, chip, ...","[AMD may not be for you, NASDAQ AMD shares are..."
2,1,3759,1_chart_resistance_week_book,"[chart, resistance, week, book, support, day, ...",[In the short term remains key support while p...
3,2,3553,2_stock_semiconductor_shares_stocks,"[stock, semiconductor, shares, stocks, investo...",[Yet the semiconductor industry has bounced ba...
4,3,3372,3_nvidia_ai_gpus_artificial,"[nvidia, ai, gpus, artificial, intelligence, g...","[AMD shares are up and Nvidia shares are up, A..."
...,...,...,...,...,...
57,56,165,56_ratio_peg_industry_average,"[ratio, peg, industry, average, price, discoun...","[It has ratio of just and ratio of, ratio is b..."
58,57,161,57_googl_nasdaq_goog_google,"[googl, nasdaq, goog, google, search, cloud, s...","[NASDAQ GOOGL under each in June of, NASDAQ GO..."
59,58,160,58_stoxx_ftse_germany_europe,"[stoxx, ftse, germany, europe, european, cac, ...",[In Europe this morning markets opened mixed t...
60,59,158,59_broadcom_limited_infrastructure_brocade,"[broadcom, limited, infrastructure, brocade, a...",[You can see Long term earnings growth rate fo...


### __Model Evaluation__

In [20]:
df_coherence_scores = pd.DataFrame()
df_coherence_scores['iterations'] = [iteration for iteration in range(1, 6)]
df_coherence_scores['nr_of_topics'] = [len(initial_topic_model.get_topic_info()), len(sentences_topic_model.get_topic_info()), 
                                       len(remove_stopwords_topic_model.get_topic_info()), len(remove_dup_topic_model.get_topic_info()),
                                       len(remove_single_letters_topic_model.get_topic_info())]   
df_coherence_scores['coherence_scores'] = [initial_coherence_score, sentences_coherence_score, remove_stopwords_coherence_score, 
                                           remove_dup_coherence_score, remove_single_letters_coherence_score]
df_coherence_scores.head(5)

Unnamed: 0,iterations,nr_of_topics,coherence_scores
0,1,79,0.559904
1,2,99,0.683856
2,3,76,0.65897
3,4,36,0.655235
4,5,62,0.651089


### __Visualize Topics__

In [21]:
sentences_topic_model.visualize_topics()

### __Topic Similarity Matrix__

In [22]:
sentences_topic_model.visualize_heatmap()

### __GET TOPICS OF BEST MODEL:__

In [23]:
# Get the words for all topics
for topic_id in range(0, len(sentences_topic_model.get_topic_info())-1):
    words = sentences_topic_model.get_topic(topic_id)
    word_list = [word for word, _ in words] 
    print(f"{BOLD}topic {topic_id}:{RESET} {', '.join(word_list)}")

[1mtopic 0:[0m nvidia, gpu, gpus, gaming, graphics, its, for, ai, in, is
[1mtopic 1:[0m rank, nasdaq, zacks, amzn, carries, currently, which, settle, has, industry
[1mtopic 2:[0m earnings, cents, consensus, estimate, revenues, billion, share, per, year, quarter
[1mtopic 3:[0m nyse, points, wmt, ba, at, after, which, or, close, cat
[1mtopic 4:[0m china, trade, tariffs, chinese, trump, war, president, deal, talks, tariff
[1mtopic 5:[0m we, my, it, you, this, book, what, that, week, about
[1mtopic 6:[0m nvda, nasdaq, after, its, which, earnings, to, is, on, quarter
[1mtopic 7:[0m stock, investors, market, tech, stocks, this, it, the, that, to
[1mtopic 8:[0m driving, self, autonomous, vehicles, cars, car, vehicle, uber, technology, driverless
[1mtopic 9:[0m apple, iphone, smartphone, sales, smartphones, in, its, the, that, of
[1mtopic 10:[0m fed, inflation, rates, rate, interest, powell, federal, reserve, policy, meeting
[1mtopic 11:[0m cloud, microsoft, storage, da