In [1]:
import pandas as pd
import yahoo_fin.stock_info as si
import datetime
import sqlite3 as sq

             requires requests_html, which is not installed.
             
             Install using: 
             pip install requests_html
             
             After installation, you may have to restart your Python session.


In [2]:
start = datetime.datetime(2023,9,18)
end = datetime.datetime(2023,9,20)
historical_datas = {}

In [3]:
tickers_csv = pd.read_csv('../src/rs_stocks.csv')
tickers_list= tickers_csv['Ticker'].tolist()

ticker_count = len(tickers_list)
print(f"Number of unique tickers: {ticker_count}")

Number of unique tickers: 6618


In [4]:
for idx, symbol in enumerate(tickers_list, start=1):  # start=1 makes idx start from 1 instead of 0
    try:
        historical_datas[symbol] = si.get_data(symbol, start_date=start, end_date=end, index_as_date=True)
        print(f"Fetching data for {symbol}: {idx}/{ticker_count}")
    except Exception as e:
        print(f"Error fetching data for {symbol}: {e}")
        continue

Fetching data for FNGR: 1/6618
Fetching data for CABA: 2/6618
Fetching data for CVNA: 3/6618
Fetching data for MLTX: 4/6618
Fetching data for AAOI: 5/6618
Fetching data for HOLO: 6/6618
Fetching data for ENLT: 7/6618
Fetching data for APTO: 8/6618
Fetching data for RETA: 9/6618
Fetching data for FRLAW: 10/6618
Fetching data for CDLX: 11/6618
Fetching data for IONQ: 12/6618
Fetching data for CBAY: 13/6618
Fetching data for REKR: 14/6618
Fetching data for ELVN: 15/6618
Fetching data for ZYNE: 16/6618
Fetching data for BTCY: 17/6618
Fetching data for IZM: 18/6618
Fetching data for JUPW: 19/6618
Fetching data for GREEL: 20/6618
Fetching data for OLMA: 21/6618
Fetching data for DAKT: 22/6618
Fetching data for TSHA: 23/6618
Fetching data for EYPT: 24/6618
Fetching data for PFIE: 25/6618
Fetching data for APP: 26/6618
Fetching data for APCX: 27/6618
Fetching data for TAST: 28/6618
Fetching data for SMCI: 29/6618
Fetching data for AUGX: 30/6618
Fetching data for NPCE: 31/6618
Fetching data for

In [5]:
# Concatenate the dataframes in the dictionary
all_data = pd.concat(historical_datas.values())
all_data = all_data.reset_index().rename(columns={'index': 'date'})
cols = ['date', 'ticker'] + [col for col in all_data.columns if col not in ['date', 'ticker']]
all_data = all_data[cols]
# Check for duplicate 'ticker' columns and keep only one
if all_data.columns.duplicated().sum() > 0:
    all_data = all_data.loc[:, ~all_data.columns.duplicated()]

all_data['date'] = all_data['date'].astype(str)
all_data

Unnamed: 0,date,ticker,open,high,low,close,adjclose,volume
0,2023-09-18,FNGR,5.910000,5.940000,5.160000,5.200000,5.200000,1043800
1,2023-09-19,FNGR,5.170000,5.890000,5.120000,5.560000,5.560000,1250473
2,2023-09-18,CABA,16.170000,19.340000,16.030001,18.650000,18.650000,1759800
3,2023-09-19,CABA,18.930000,19.030001,17.151501,17.360001,17.360001,933589
4,2023-09-18,CVNA,52.660000,55.139999,50.700001,51.950001,51.950001,12129500
...,...,...,...,...,...,...,...,...
12835,2023-09-18,SMX,1.670000,1.894000,1.609000,1.820000,1.820000,115800
12836,2023-09-19,SMX,1.730000,1.820000,1.652000,1.700000,1.700000,57447
12837,2023-09-18,AVTX,0.140000,0.143000,0.115000,0.123000,0.123000,87771700
12838,2023-09-19,AVTX,0.120000,0.146000,0.107100,0.121500,0.121500,105480374


In [6]:
conn = sq.connect('../market_data.db')

try:
    df_db = pd.read_sql_query("SELECT * from price_action", conn)
except:
    df_db = pd.DataFrame(columns=['date', 'ticker', 'open', 'high', 'low', 'close', 'adjclose', 'volume'])

combined_data = pd.concat([df_db, all_data]).drop_duplicates(subset=['date', 'ticker'], keep='last')
combined_data.to_sql('price_action', conn, if_exists='replace', index=False)

conn.close()