In [1]:
import pandas as pd
import yahoo_fin.stock_info as si
import datetime
import sqlite3 as sq

             requires requests_html, which is not installed.
             
             Install using: 
             pip install requests_html
             
             After installation, you may have to restart your Python session.


In [2]:
start = datetime.datetime(2023,10,15)
end = datetime.datetime(2023,10,17)
historical_datas = {}

In [3]:
tickers_csv = pd.read_csv('../src/rs_stocks.csv')
tickers_list= tickers_csv['Ticker'].tolist()

ticker_count = len(tickers_list)
print(f"Number of unique tickers: {ticker_count}")

Number of unique tickers: 6547


In [4]:
for idx, symbol in enumerate(tickers_list, start=1):  # start=1 makes idx start from 1 instead of 0
    try:
        historical_datas[symbol] = si.get_data(symbol, start_date=start, end_date=end, index_as_date=True)
        print(f"Fetching data for {symbol}: {idx}/{ticker_count}")
    except Exception as e:
        print(f"Error fetching data for {symbol}: {e}")
        continue

Fetching data for HLGN: 1/6547
Fetching data for CABA: 2/6547
Fetching data for LIFW: 3/6547
Fetching data for AHG: 4/6547
Fetching data for SYBX: 5/6547
Fetching data for ORTX: 6/6547
Fetching data for MLTX: 7/6547
Fetching data for ENLT: 8/6547
Fetching data for XBIOW: 9/6547
Fetching data for LFMD: 10/6547
Fetching data for PRFX: 11/6547
Fetching data for OLMA: 12/6547
Fetching data for APLT: 13/6547
Fetching data for AUGX: 14/6547
Fetching data for SMCI: 15/6547
Fetching data for APCX: 16/6547
Fetching data for VRT: 17/6547
Fetching data for CVNA: 18/6547
Fetching data for IZM: 19/6547
Fetching data for NSPR: 20/6547
Fetching data for APTO: 21/6547
Fetching data for TPST: 22/6547
Fetching data for ANF: 23/6547
Fetching data for TSHA: 24/6547
Fetching data for GPCR: 25/6547
Fetching data for WALDW: 26/6547
Fetching data for ULBI: 27/6547
Fetching data for IMVT: 28/6547
Fetching data for MNSO: 29/6547
Fetching data for AAOI: 30/6547
Fetching data for GEOS: 31/6547
Fetching data for F

In [5]:
# Concatenate the dataframes in the dictionary
all_data = pd.concat(historical_datas.values())
all_data = all_data.reset_index().rename(columns={'index': 'date'})
cols = ['date', 'ticker'] + [col for col in all_data.columns if col not in ['date', 'ticker']]
all_data = all_data[cols]
# Check for duplicate 'ticker' columns and keep only one
if all_data.columns.duplicated().sum() > 0:
    all_data = all_data.loc[:, ~all_data.columns.duplicated()]

all_data['date'] = all_data['date'].astype(str)
all_data

Unnamed: 0,date,ticker,open,high,low,close,adjclose,volume
0,2023-10-16,HLGN,2.570,2.5900,2.5100,2.5500,2.5500,34773
1,2023-10-16,CABA,15.250,15.3500,14.0400,14.5900,14.5900,473576
2,2023-10-16,LIFW,2.180,2.1800,1.8700,1.8900,1.8900,281427
3,2023-10-16,AHG,1.920,1.9200,1.7100,1.8500,1.8500,103983
4,2023-10-16,SYBX,2.460,2.7426,2.4000,2.4500,2.4500,25776
...,...,...,...,...,...,...,...,...
6171,2023-10-16,GMBL,0.080,0.0800,0.0663,0.0715,0.0715,12469309
6172,2023-10-16,CRKN,0.335,0.3750,0.3202,0.3370,0.3370,933244
6173,2023-10-16,FFIE,1.110,1.1600,1.0600,1.0800,1.0800,4409448
6174,2023-10-16,ASTI,1.260,1.2900,1.2000,1.2000,1.2000,102666


In [6]:
conn = sq.connect('../market_data.db')

try:
    df_db = pd.read_sql_query("SELECT * from price_action", conn)
except:
    df_db = pd.DataFrame(columns=['date', 'ticker', 'open', 'high', 'low', 'close', 'adjclose', 'volume'])

combined_data = pd.concat([df_db, all_data]).drop_duplicates(subset=['date', 'ticker'], keep='last')
combined_data.to_sql('price_action', conn, if_exists='replace', index=False)

conn.close()