In [5]:
import pandas as pd
import yahoo_fin.stock_info as si
import datetime
import sqlite3 as sq

In [6]:
start = datetime.datetime(2023,9,12)
end = datetime.datetime(2023,9,13)
historical_datas = {}

In [7]:
tickers_csv = pd.read_csv('../src/rs_stocks.csv')
tickers_list= tickers_csv['Ticker'].tolist()

ticker_count = len(tickers_list)
print(f"Number of unique tickers: {ticker_count}")

Number of unique tickers: 6621


In [8]:
for idx, symbol in enumerate(tickers_list, start=1):  # start=1 makes idx start from 1 instead of 0
    try:
        historical_datas[symbol] = si.get_data(symbol, start_date=start, end_date=end, index_as_date=True)
        print(f"Fetching data for {symbol}: {idx}/{ticker_count}")
    except Exception as e:
        print(f"Error fetching data for {symbol}: {e}")
        continue

Fetching data for FNGR: 1/6621
Fetching data for CABA: 2/6621
Fetching data for CVNA: 3/6621
Fetching data for AAOI: 4/6621
Fetching data for ENLT: 5/6621
Fetching data for MLTX: 6/6621
Fetching data for HOLO: 7/6621
Fetching data for APTO: 8/6621
Fetching data for RETA: 9/6621
Fetching data for CDLX: 10/6621
Fetching data for IONQ: 11/6621
Fetching data for TSHA: 12/6621
Fetching data for CBAY: 13/6621
Fetching data for REKR: 14/6621
Fetching data for ELVN: 15/6621
Fetching data for ZYNE: 16/6621
Fetching data for JUPW: 17/6621
Fetching data for BTCY: 18/6621
Fetching data for IZM: 19/6621
Fetching data for APCX: 20/6621
Fetching data for DAKT: 21/6621
Fetching data for OLMA: 22/6621
Fetching data for EYPT: 23/6621
Fetching data for APP: 24/6621
Fetching data for TAST: 25/6621
Fetching data for SMCI: 26/6621
Fetching data for PFIE: 27/6621
Fetching data for LMB: 28/6621
Fetching data for AHG: 29/6621
Fetching data for VRT: 30/6621
Fetching data for AGLE: 31/6621
Fetching data for VKTX

In [9]:
# Concatenate the dataframes in the dictionary
all_data = pd.concat(historical_datas.values())
all_data = all_data.reset_index().rename(columns={'index': 'date'})
cols = ['date', 'ticker'] + [col for col in all_data.columns if col not in ['date', 'ticker']]
all_data = all_data[cols]
# Check for duplicate 'ticker' columns and keep only one
if all_data.columns.duplicated().sum() > 0:
    all_data = all_data.loc[:, ~all_data.columns.duplicated()]

all_data['date'] = all_data['date'].astype(str)
all_data

Unnamed: 0,date,ticker,open,high,low,close,adjclose,volume
0,2023-09-12,FNGR,7.250000,7.870000,6.471000,7.830000,7.830000,2949216
1,2023-09-12,CABA,15.230000,16.780001,15.230000,16.490000,16.490000,1501196
2,2023-09-12,CVNA,49.950001,53.770000,49.404900,50.799999,50.799999,12228190
3,2023-09-12,AAOI,11.650000,12.550000,11.310000,11.360000,11.360000,1758076
4,2023-09-12,ENLT,16.600000,16.620001,16.315001,16.600000,16.600000,5696
...,...,...,...,...,...,...,...,...
6341,2023-09-12,ALLR,1.030000,1.100000,1.030000,1.080000,1.080000,112046
6342,2023-09-12,AULT,0.775700,0.775700,0.695000,0.713000,0.713000,781717
6343,2023-09-12,GMBL,0.111500,0.123300,0.111500,0.115000,0.115000,2555979
6344,2023-09-12,SMX,1.420000,1.440000,1.370000,1.370000,1.370000,41956


In [10]:
conn = sq.connect('../market_data.db')

try:
    df_db = pd.read_sql_query("SELECT * from price_action", conn)
except:
    df_db = pd.DataFrame(columns=['date', 'ticker', 'open', 'high', 'low', 'close', 'adjclose', 'volume'])

combined_data = pd.concat([df_db, all_data]).drop_duplicates(subset=['date', 'ticker'], keep='last')
combined_data.to_sql('price_action', conn, if_exists='replace', index=False)

conn.close()