In [10]:
import pandas as pd
import yahoo_fin.stock_info as si
import datetime
import sqlite3 as sq

In [11]:
start = datetime.datetime(2020,1,1)
end = datetime.datetime(2023,8,31)
historical_datas = {}

In [12]:
tickers_csv = pd.read_csv('../src/rs_stocks.csv')
tickers_list= tickers_csv['Ticker'].tolist()

ticker_count = len(tickers_list)
print(f"Number of unique tickers: {ticker_count}")

Number of unique tickers: 6569


In [13]:
for idx, symbol in enumerate(tickers_list, start=1):  # start=1 makes idx start from 1 instead of 0
    try:
        historical_datas[symbol] = si.get_data(symbol, start_date=start, end_date=end, index_as_date=True)
        print(f"Fetching data for {symbol}: {idx}/{ticker_count}")
    except Exception as e:
        print(f"Error fetching data for {symbol}: {e}")
        continue

Fetching data for AAOI: 1/6569
Fetching data for EDTX: 2/6569
Fetching data for EDTXU: 3/6569
Fetching data for RCRT: 4/6569
Fetching data for CABA: 5/6569
Fetching data for CVNA: 6/6569
Fetching data for ENLT: 7/6569
Fetching data for MLTX: 8/6569
Fetching data for ELVN: 9/6569
Fetching data for PHVS: 10/6569
Fetching data for HOLO: 11/6569
Fetching data for FNGR: 12/6569
Fetching data for RETA: 13/6569
Fetching data for EZGO: 14/6569
Fetching data for APCX: 15/6569
Fetching data for CDLX: 16/6569
Fetching data for TSHA: 17/6569
Fetching data for ZYNE: 18/6569
Fetching data for EOSEW: 19/6569
Fetching data for INOD: 20/6569
Fetching data for TAST: 21/6569
Fetching data for REKR: 22/6569
Fetching data for AKBA: 23/6569
Fetching data for MRKR: 24/6569
Fetching data for BBIO: 25/6569
Fetching data for NPCE: 26/6569
Fetching data for IONQ: 27/6569
Fetching data for CBAY: 28/6569
Fetching data for VRT: 29/6569
Fetching data for GREEL: 30/6569
Fetching data for USM: 31/6569
Fetching data fo

In [14]:
# Concatenate the dataframes in the dictionary
all_data = pd.concat(historical_datas.values())
all_data = all_data.reset_index().rename(columns={'index': 'date'})
cols = ['date', 'ticker'] + [col for col in all_data.columns if col not in ['date', 'ticker']]
all_data = all_data[cols]
# Check for duplicate 'ticker' columns and keep only one
if all_data.columns.duplicated().sum() > 0:
    all_data = all_data.loc[:, ~all_data.columns.duplicated()]

all_data['date'] = all_data['date'].astype(str)
all_data

Unnamed: 0,date,ticker,open,high,low,close,adjclose,volume
0,2020-01-02,AAOI,12.130000,12.530000,11.800000,12.500000,12.500000,885000.0
1,2020-01-03,AAOI,12.250000,12.325000,12.010000,12.120000,12.120000,498400.0
2,2020-01-06,AAOI,12.000000,12.200000,11.860000,12.150000,12.150000,361600.0
3,2020-01-07,AAOI,12.200000,12.465000,12.055000,12.430000,12.430000,330500.0
4,2020-01-08,AAOI,12.410000,12.820000,12.370000,12.700000,12.700000,520600.0
...,...,...,...,...,...,...,...,...
5319344,2023-08-24,RENEW,126.209999,126.919998,125.940002,126.019997,126.019997,0.0
5319345,2023-08-25,RENEW,125.260002,126.459999,125.169998,126.059998,126.059998,0.0
5319346,2023-08-28,RENEW,126.400002,127.019997,126.269997,126.709999,126.709999,0.0
5319347,2023-08-29,RENEW,126.760002,128.190002,126.760002,128.050003,128.050003,0.0


In [15]:
conn = sq.connect('../market_data.db')

try:
    df_db = pd.read_sql_query("SELECT * from price_action", conn)
except:
    df_db = pd.DataFrame(columns=['date', 'ticker', 'open', 'high', 'low', 'close', 'adjclose', 'volume'])

combined_data = pd.concat([df_db, all_data]).drop_duplicates(subset=['date', 'ticker'], keep='last')
combined_data.to_sql('price_action', conn, if_exists='replace', index=False)

conn.close()