In [6]:
import json
from concurrent.futures import ThreadPoolExecutor
from finvizfinance.quote import finvizfinance
from tqdm import tqdm
import pandas as pd
import time

In [2]:
with open('csvjson.json', 'r') as file:
    data = json.load(file)

In [3]:
tickers = [ticker_info['symbol'] for ticker_info in data]

In [4]:
print(tickers[:10])

['AADI', 'AAL', 'AAME', 'AAOI', 'AAON', 'AAPL', 'ABAT', 'ABEO', 'ABL', 'ABNB']


In [7]:
news_dfs = {}

def get_news(ticker):
    print(f'Fetching headlines for {ticker}...')
    stock = finvizfinance(ticker)
    news_df = stock.ticker_news()
    return news_df

def fetch_headlines(ticker, retries=3, delay=5):
    attempt = 0
    while attempt < retries:
        try:
            news_dfs[ticker] = get_news(ticker)
            break
        except Exception as e:
            print(f"Error fetching headlines for {ticker}: {e}")
            attempt += 1
            if attempt < retries:
                print(f"Retrying {ticker}... (Attempt {attempt}/{retries})")
                time.sleep(delay)
            else:
                print(f"Failed to fetch headlines for {ticker} after {retries} attempts. Skipping...")

with ThreadPoolExecutor(max_workers=3) as executor:
    with tqdm(total=len(tickers)) as pbar:
        futures = [executor.submit(fetch_headlines, ticker) for ticker in tickers]
        for future in futures:
            future.result()
            pbar.update(1)

  0%|          | 0/2474 [00:00<?, ?it/s]

Fetching headlines for AADI...
Fetching headlines for AAL...
Fetching headlines for AAME...
Fetching headlines for AAOI...
Fetching headlines for AAON...
Fetching headlines for AAPL...
Fetching headlines for ABAT...
Fetching headlines for ABEO...
Fetching headlines for ABL...
Fetching headlines for ABNB...
Fetching headlines for ABOS...
Fetching headlines for ABP...
Fetching headlines for ABSI...
Fetching headlines for ABVC...
Fetching headlines for ACAD...
Fetching headlines for ACCD...
Fetching headlines for ACDC...
Fetching headlines for ACET...
Fetching headlines for ACHC...
Fetching headlines for ACHV...
Fetching headlines for ACIC...
Fetching headlines for ACIW...
Fetching headlines for ACLS...
Fetching headlines for ACLX...
Fetching headlines for ACMR...
Fetching headlines for ACNB...
Fetching headlines for ACNT...
Fetching headlines for ACON...
Fetching headlines for ACRS...
Fetching headlines for ACRV...
Fetching headlines for ACT...
Fetching headlines for ACTG...
Fetching hea

In [8]:
combined_df = pd.concat(
    [df.assign(Ticker=ticker) for ticker, df in news_dfs.items()],
    ignore_index=True
)

In [9]:
combined_df = combined_df[['Date', 'Ticker', 'Title','Link']]

In [10]:
combined_df

Unnamed: 0,Date,Ticker,Title,Link
0,2024-11-06 08:00:00,AADI,Aadi Bioscience Announces Financial Results fo...,https://www.prnewswire.com/news-releases/aadi-...
1,2024-10-30 16:05:00,AADI,Aadi Bioscience to Report Third Quarter 2024 R...,https://www.prnewswire.com/news-releases/aadi-...
2,2024-08-20 17:00:00,AADI,Aadi Bioscience Provides PRECISION1 Trial and ...,https://www.prnewswire.com/news-releases/aadi-...
3,2024-08-07 08:00:00,AADI,Aadi Bioscience Announces Financial Results fo...,https://www.prnewswire.com/news-releases/aadi-...
4,2024-07-31 16:05:00,AADI,Aadi Bioscience to Report Second Quarter 2024 ...,https://www.prnewswire.com/news-releases/aadi-...
...,...,...,...,...
217081,2023-05-05 12:17:00,ZYXI,"After the recent decline, Zynex, Inc. (NASDAQ:...",https://finance.yahoo.com/news/recent-decline-...
217082,2023-05-04 21:04:00,ZYXI,Zynex Prices Upsized $52.5 Million Convertible...,https://finance.yahoo.com/news/zynex-prices-up...
217083,2023-05-04 07:30:00,ZYXI,Zynex Announces Update Regarding Previously An...,https://finance.yahoo.com/news/zynex-announces...
217084,2023-05-03 16:02:00,ZYXI,Zynex Announces Proposed Secondary Public Offe...,https://finance.yahoo.com/news/zynex-announces...


In [14]:
list(set(tickers) - set(list(combined_df['Ticker'].unique())))

['CUB',
 'LPAA',
 'IBAC',
 'PARAA',
 'VACH',
 'ANSC',
 'ALF',
 'METCB',
 'NETD',
 'DTSQ',
 'BKHA',
 'FBLA',
 'PRLH',
 'DYNX',
 'BACQ',
 'KELYB',
 'PPYA',
 'CCIR',
 'EURK',
 'ATMV',
 'DGICB',
 'FATBB',
 'SENEB',
 'RDIB',
 'QRTEB',
 'CAPN',
 'BELFA',
 'CHAR',
 'SQFTW',
 'SIMA',
 'GIG',
 'GPAT',
 'POLE',
 'ALDF']

In [16]:
combined_df.to_json('news.json', orient='records')