In [2]:
import pandas as pd
import requests
import sqlite3
from lxml import html, etree
import time
from datetime import datetime, timedelta 

In [3]:
def convert_date (x):
    
    try:
        date = pd.to_datetime(x)
    
    except:
        date = None
        
    return date


def get_yahoo_data(ticker, market, oldest_date):

    request = "https://finance.yahoo.com/quote/{}/history?p={}".format(ticker, ticker)
    response = requests.get(request)

    doc = html.fromstring(response.content)
    
    table = doc.xpath("//table")
    table_tree = etree.tostring(table[0], method='xml')

    df = pd.read_html(table_tree)[0]
    
    df["Date"] = df["Date"].apply(lambda x: convert_date(x))

    df.dropna(inplace = True)
    df = df.loc[df["Open"].str.contains("Dividend") == False].copy()
    df.reset_index(inplace = True, drop = True)
    
    df.columns = ['date', 'open', 'high', 'low', 'close', 'adjusted', 'volume']
    df['ticker'] = ticker
    df['market'] = market
    
    df = df.loc[:,["ticker", "market", 'date', 'open', 'high', 'low', 'close', 'adjusted', 'volume']]
    df = df.loc[(df["date"] >= first_date_to_get) & (df['date'] < today)]
    
    return df

In [4]:
conn = sqlite3.connect("stocks.db")
c = conn.cursor()

max_date_available = list(c.execute("SELECT MAX(Date) FROM stocks"))[0][0]
max_date_available = datetime.strptime(max_date_available, "%Y-%m-%d %H:%M:%S" )

first_date_to_get = max_date_available + timedelta(1)

today = datetime.today() 
today = datetime(today.year, today.month, today.day, 0, 0)

business_dates = list(pd.bdate_range(first_date_to_get,today))
test_res = []
if today != first_date_to_get:
    business_dates = list(pd.bdate_range(first_date_to_get,today))
    
    ## If today's date is in the list we have to drop it. It may happen when there is a weekend in between: 
    
    if today.date() in [ts.date() for ts in business_dates]:
        
    ## Dropping the last element
        business_dates.pop()
    
    ## Now we have a final list with business days for which data should be gathered
    
    ## Loading ticker, market pairs to iterate through
    ticker_market_list = list(c.execute("SELECT DISTINCT ticker, market FROM stocks"))
    
    for ticker, market in ticker_market_list:
        
        try:
            print(ticker, market)
            df = get_yahoo_data(ticker, market, first_date_to_get)
            df.copy().to_sql("stocks", conn, if_exists = 'append', index = False)
            time.sleep(1)
        except Exception as e:
            print(e)
            pass

A NYSE
AA NYSE
AAN NYSE
AAP NYSE
ABBV NYSE
ABC NYSE
ABEV NYSE
ABG NYSE
ABM NYSE
ABT NYSE
ACB NYSE
ACM NYSE
ACN NYSE
ADM NYSE
ADS NYSE
ADT NYSE
AEE NYSE
AEO NYSE
AEP NYSE
AES NYSE
AFG NYSE
AFL NYSE
AGCO NYSE
AGRO NYSE
AIG NYSE
AIN NYSE
AIZ NYSE
AJG NYSE
ALB NYSE
ALK NYSE
ALL NYSE
ALLE NYSE
ALLY NYSE
AME NYSE
AMG NYSE
AMP NYSE
AMT NYSE
AMWL NYSE
AMX NYSE
AN NYSE
ANET NYSE
ANF NYSE
ANTM NYSE
AON NYSE
APA NYSE
APD NYSE
APH NYSE
APRN NYSE
APTV NYSE
ARCH NYSE
ARE NYSE
ARGO NYSE
ARMK NYSE
ARW NYSE
ASAN NYSE
ASH NYSE
ATGE NYSE
ATH NYSE
ATI NYSE
ATO NYSE
ATUS NYSE
AVB NYSE
AVY NYSE
AWK NYSE
AXP NYSE
AXTA NYSE
AYI NYSE
AYX NYSE
AZN NYSE
AZO NYSE
AZUL NYSE
BA NYSE
BABA NYSE
BAC NYSE
BAH NYSE
BAX NYSE
BB NYSE
BBD NYSE
BBY NYSE
BC NYSE
BCO NYSE
BDX NYSE
BEN NYSE
BERY NYSE
BFAM NYSE
BG NYSE
BHC NYSE
BHP NYSE
BHVN NYSE
BIG NYSE
BIO NYSE
BITA NYSE
BJ NYSE
BK NYSE
BLK NYSE
BLL NYSE
BMY NYSE
BOX NYSE
BR NYSE
BRFS NYSE
BRO NYSE
BSAC NYSE
BTI NYSE
BUD NYSE
BURL NYSE
BWA NYSE
BX NYSE
BXP NYSE
BXS NYSE
BYD 

GT NASDAQ
GWPH NASDAQ
HAIN NASDAQ
HAS NASDAQ
HBAN NASDAQ
HDS NASDAQ
HOLX NASDAQ
HQY NASDAQ
HTHT NASDAQ
IAC NASDAQ
IBKR NASDAQ
ICPT NASDAQ
IDCC NASDAQ
IDXX NASDAQ
IEP NASDAQ
IIVI NASDAQ
ILMN NASDAQ
IMMU NASDAQ
INCY NASDAQ
INFO NASDAQ
INGN NASDAQ
INTC NASDAQ
INTU NASDAQ
IONS NASDAQ
IPGP NASDAQ
IRBT NASDAQ
IRTC NASDAQ
ISBC NASDAQ
ISRG NASDAQ
JACK NASDAQ
JAZZ NASDAQ
JBHT NASDAQ
JBLU NASDAQ
JFU NASDAQ
JKHY NASDAQ
JOUT NASDAQ
KHC NASDAQ
KLAC NASDAQ
KTOS NASDAQ
LAMR NASDAQ
LBTYA NASDAQ
LBTYK NASDAQ
LILA NASDAQ
LITE NASDAQ
LKQ NASDAQ
LNG NASDAQ
LPSN NASDAQ
LRCX NASDAQ
LSXMA NASDAQ
LULU NASDAQ
LYFT NASDAQ
MAR NASDAQ
MASI NASDAQ
MAT NASDAQ
MCHP NASDAQ
MDB NASDAQ
MDLZ NASDAQ
MELI NASDAQ
MIK NASDAQ
MKTX NASDAQ
MMYT NASDAQ
MNST NASDAQ
MOMO NASDAQ
MRCY NASDAQ
MRNA NASDAQ
MRTX NASDAQ
MRVL NASDAQ
MSFT NASDAQ
MTCH NASDAQ
MU NASDAQ
MXIM NASDAQ
MYL NASDAQ
NATI NASDAQ
NAVI NASDAQ
NBIX NASDAQ
NET NASDAQ
NFLX NASDAQ
NIU NASDAQ
NLOK NASDAQ
NTAP NASDAQ
NTDOY NASDAQ
NTLA NASDAQ
NTNX NASDAQ
NTRA NASDAQ
NTRS NAS

In [11]:
start = time.time()


In [12]:
end = time.time()

In [13]:

end - start

3.1033596992492676