In [1]:
import pandas as pd
import numpy as np
import requests
from io import BytesIO
import time
from datetime import date, datetime, timedelta
import random

In [2]:
# Import packages
import yfinance as yf

# Read and print the stock tickers that make up S&P500
df_tickers = pd.read_html('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')[0]
df_tickers = df_tickers.rename(columns={'Symbol':'ticker'})
# Make sure GOOG and GOOGL are not both included
df_tickers = df_tickers[df_tickers['ticker'] != 'GOOG']

random.seed(1993)
# Get stratified random 17 samples per sector
df_tickers_sample = df_tickers.groupby('GICS Sector', group_keys=False).apply(lambda x: x.sample(17))

# Manually add few tech tickers
df_tickers_sample = pd.concat([df_tickers_sample, df_tickers[df_tickers['ticker'].isin(['META', 'AAPL', 'NFLX', 'GOOGL', 'AMZN', 'TSLA'])]])
df_tickers_sample = df_tickers_sample.drop_duplicates(subset='ticker')
tickers = list(df_tickers_sample['ticker'])

  df_tickers_sample = df_tickers.groupby('GICS Sector', group_keys=False).apply(lambda x: x.sample(17))


In [3]:
# Custom Tickers
tickers.extend(['BTC/USD', 'ETH/USD', 'XRP/USD', 'LTC/USD', 'ADA/USD', 'MATIC/USD'])

In [4]:
# Twelve Data
api_key = twelve_data_api

data_frames = []

for ticker in tickers:
        
    try:
        # Get Earliest Date
        url = f'https://api.twelvedata.com/earliest_timestamp?symbol={ticker}&interval=1day&apikey={api_key}'
        r = requests.get(url)
        
        start_date = r.text.split('"')[3]
        end_date = date.today().strftime('%Y-%m-%d')
        total_days = (pd.to_datetime(end_date) - pd.to_datetime(start_date)).days
        
        batch_size = np.ceil(total_days / 5000).astype(int)
        
        l_dates = []      
        
        # If multiple batches, append start and end dates for every 5000 days
        if batch_size > 1:  
            for i in range(batch_size-1):
                l_dates.append((pd.to_datetime(end_date)  - timedelta(days=(5000 * i))).strftime('%Y-%m-%d'))
            l_dates.append(start_date)
        else:
            l_dates.append(end_date)
            l_dates.append(start_date)
        
        # Check how many credits are left, and pause for 60 seconds to reset Twelve Data's limit
        api_credit = r.headers['Api-Credits-Left']
        if api_credit == '0':
            time.sleep(61)
        
        for i in range(batch_size):
            try:
                batch_end = l_dates[i]
                batch_start = l_dates[i+1]
                
                # Make sure date range don't overlap throughout the loop
                if (i > 0):
                    batch_end = (pd.to_datetime(batch_end) - timedelta(days=1)).strftime('%Y-%m-%d')
                
                url = f'https://api.twelvedata.com/time_series?&start_date={batch_start}&end_date={batch_end}&symbol={ticker}&format=CSV&interval=1day&apikey={api_key}'
                r = requests.get(url)

                # Decode bytes into a string
                data_string = r.content.decode('utf-8')

                # Use StringIO to treat the string as a file-like object
                data_file = BytesIO(data_string.encode())
                
                # Union the new data to data from previous iteration. 
                data = pd.read_csv(data_file, delimiter=';')
                data['ticker'] = ticker
                
                # Append data to list
                data_frames.append(data)
                
                # Check how many credits are left, and pause for 60 seconds to reset Twelve Data's limit
                api_credit = r.headers['Api-Credits-Left']
                if api_credit == '0':
                    time.sleep(61)
                
            except:
                pass
        
        print(data_frames[-1])

    except:
        print(f"Couldn't download {ticker}.")

df_final = pd.concat([i for i in data_frames if len(i) > 0])

merge_cols = ['GICS Sector', 'GICS Sub-Industry', 'Headquarters Location', 'Date added', 'Founded'] # 'CIK'
df_model = df_final.merge(df_tickers, on='ticker', how='left')
df_model.to_csv(f'data/{date.today().strftime("%Y-%m-%d")}.csv', index=False)

        datetime   open   high    low  close   volume ticker
0     2024-04-19  24.92  25.01  24.72  24.78   490400    NWS
1     2024-04-18  24.93  25.37  24.75  24.81   659100    NWS
2     2024-04-17  25.00  25.09  24.92  24.95   969800    NWS
3     2024-04-16  24.84  24.99  24.67  24.82   531200    NWS
4     2024-04-15  25.35  25.49  24.95  25.00   685800    NWS
...          ...    ...    ...    ...    ...      ...    ...
2723  2013-06-25  15.50  15.67  15.35  15.66   564400    NWS
2724  2013-06-24  15.79  15.79  15.19  15.52   758300    NWS
2725  2013-06-21  15.98  15.98  15.46  15.80   635300    NWS
2726  2013-06-20  16.25  16.30  15.36  15.44  1874600    NWS
2727  2013-06-19  15.21  16.03  14.96  15.53  7717000    NWS

[2728 rows x 7 columns]
        datetime   open    high    low  close    volume ticker
0     2024-04-19  30.89  31.520  30.87  31.39   2298600   FOXA
1     2024-04-18  30.63  30.910  30.39  30.77   2029300   FOXA
2     2024-04-17  30.64  30.750  30.30  30.46   210720

In [6]:
df_model.head()

Unnamed: 0,datetime,open,high,low,close,volume,ticker,Security,GICS Sector,GICS Sub-Industry,Headquarters Location,Date added,CIK,Founded
0,2024-04-19,24.92,25.01,24.72,24.78,490400.0,NWS,News Corp (Class B),Communication Services,Publishing,"New York City, New York",2015-09-18,1564708.0,2013 (News Corporation 1980)
1,2024-04-18,24.93,25.37,24.75,24.81,659100.0,NWS,News Corp (Class B),Communication Services,Publishing,"New York City, New York",2015-09-18,1564708.0,2013 (News Corporation 1980)
2,2024-04-17,25.0,25.09,24.92,24.95,969800.0,NWS,News Corp (Class B),Communication Services,Publishing,"New York City, New York",2015-09-18,1564708.0,2013 (News Corporation 1980)
3,2024-04-16,24.84,24.99,24.67,24.82,531200.0,NWS,News Corp (Class B),Communication Services,Publishing,"New York City, New York",2015-09-18,1564708.0,2013 (News Corporation 1980)
4,2024-04-15,25.35,25.49,24.95,25.0,685800.0,NWS,News Corp (Class B),Communication Services,Publishing,"New York City, New York",2015-09-18,1564708.0,2013 (News Corporation 1980)
