In [1]:
import logging, os, re

import pandas as pd
import numpy as np

logging.basicConfig(format = '%(asctime)-25s %(message)s', level = logging.INFO)

In [4]:
import sys
sys.path.append('./markets_lib/')
import alphavantage as av

api_key = av.get_api_key(config_file='./markets_lib/apikey.cfg')

#r = av.call_api('TSLA', 'TIME_SERIES_DAILY', api_key, parameters={})
r = av.call_api('TSLA', 'TIME_SERIES_INTRADAY', api_key, parameters={'pause_between_queries':5, 'interval':'60min', 'month_start':'2020_12', 'month_end':'2021_02'})

2023-08-30 14:34:03,246   TSLA: Downloading TIME_SERIES_INTRADAY: 2020-12
2023-08-30 14:34:03,471   Pausing for 5 secs.
2023-08-30 14:34:08,477   TSLA: Downloading TIME_SERIES_INTRADAY: 2021-01
2023-08-30 14:34:14,182   Pausing for 5 secs.
2023-08-30 14:34:19,184   TSLA: Downloading TIME_SERIES_INTRADAY: 2021-02
2023-08-30 14:34:20,526   Pausing for 5 secs.


In [22]:
'.'.join(['TSLA', 'BSE']).strip('.')

'TSLA.BSE'

In [None]:
df = {"Time Series (Daily)": {
        "2023-08-29": {
            "1. open": "146.3",
            "2. high": "146.73",
            "3. low": "145.62",
            "4. close": "146.45",
            "5. adjusted close": "146.45",
            "6. volume": "2778113",
            "7. dividend amount": "0.0000",
            "8. split coefficient": "1.0"
        },
        "2023-08-28": {
            "1. open": "145.41",
            "2. high": "146.74",
            "3. low": "145.21",
            "4. close": "146.02",
            "5. adjusted close": "146.02",
            "6. volume": "3561347",
            "7. dividend amount": "0.0000",
            "8. split coefficient": "1.0"
        },
        "2023-08-25": {
            "1. open": "144.18",
            "2. high": "145.47",
            "3. low": "143.5",
            "4. close": "145.35",
            "5. adjusted close": "145.35",
            "6. volume": "3660147",
            "7. dividend amount": "0.0000",
            "8. split coefficient": "1.0"
        }
}}

pd.DataFrame.from_dict(df[list(df.keys())[0]], orient='index')

# Clean data from https://stockanalysis.com/stocks/

* Clean up data and save to CSV.
* Uncomment the last line to write CSV file. 
* Tickers without market cap are printed out (Mkt Cap set to 0)

In [None]:
stocks = '../data/sac_tickers_raw.csv'
stocks = open(stocks, 'r').readlines()
columns = [s.strip() for s in stocks[0].split('\t')]

parsed_data = []
for start in range(1,len(stocks)-2, 2):
    stock = stocks[start:start+2]
    parsed = stock[0].split('\t')[:-1]
    parsed[0] = parsed[0].replace('.', '-')
    mkt_cap = stock[1].strip()
    try:
        mkt_cap = round(float(mkt_cap[:-1].replace(',',''))*(1000 if mkt_cap[-1]=='B' else 1))
    except ValueError:
        print(parsed, mkt_cap)
        mkt_cap = 0
    parsed.append(mkt_cap)
    parsed_data.append(tuple(parsed))

df = pd.DataFrame(parsed_data, columns=columns)
df = df.set_index('Symbol')
#df.to_csv('../data/sac_tickers.csv')

### Find tickers in StockAnalysisCom NOT in AV

In [None]:
sac = pd.read_csv('../data/sac_tickers.csv')
av = pd.read_csv('../data/av_listed_tickers.csv')

mask = [not any(av['Symbol'].str.contains(s)) for s in sac['Symbol']]
sac[mask]

### GroupBy Industry (also identify how many companies have n/a for Industry)

In [None]:
industries = sac.groupby('Industry', as_index=False, dropna=False).count()
print(industries.to_string())

sac[sac['Industry'].isnull()]

### Get all tickers in an industry sorted by market capitalization

In [None]:
industry = 'Auto Manufacturers'
print(sac[sac['Industry']==industry].sort_values(by=['Market Cap'], ascending=False).to_string())

### Large cap stocks > 10B

In [None]:
lg_cap = sac[sac['Market Cap']>10000]
lg_cap = lg_cap.set_index('Symbol')
lg_cap.to_csv('../data/sac_tickers_largecap.csv')
lg_cap.sort_values(by=['Market Cap'], ascending=False)

In [None]:
mid_cap = sac[(sac['Market Cap']<10000) & (sac['Market Cap']>2000)]
mid_cap = mid_cap.set_index('Symbol')
mid_cap.to_csv('../data/sac_tickers_midcap.csv')
mid_cap.sort_values(by=['Market Cap'], ascending=False)

# Identify ETFs from AlphaVantage's master list

In [None]:
av = pd.read_csv('../data/av_listed_tickers.csv')
av

In [None]:
asset_types = av.groupby('AssetType', as_index=False, dropna=False).count()
asset_types

In [None]:
etfs = av[av['AssetType']=='ETF']
etfs.to_csv('../data/av_listed_tickers_etfs_intraday_download.csv', index=False)
etfs[etfs['Symbol']=='LABD']

In [None]:
etfs.groupby('Exchange', as_index=False, dropna=False).count()

### Identify ETFs listed on NYSE and NASDAQ

* Then Identify Vanguard ETFs NOT on NYSE or NASDAQ (only 10 - we ignore those)

In [None]:
etfs_nn = etfs[(etfs['Exchange']=='NYSE')|(etfs['Exchange']=='NASDAQ')]
etfs_nn = etfs_nn.set_index('Symbol')
etfs_nn.sort_values(by=['Exchange'], ascending=True)
#etfs_nn.to_csv('../data/av_listed_tickers_etfs.csv')

### Vanguard ETFs

In [None]:
vanguard_etfs = etfs[etfs['Name'].str.contains('Vanguard')]
vanguard_etfs.groupby('Exchange', as_index=False, dropna=False).count()

In [None]:
vanguard_etfs_not_nn = etfs[(etfs['Name'].str.contains('Vanguard'))&(etfs['Exchange']=='BATS')]
vanguard_etfs_not_nn

### Extract list of tickers by sector

* Find sector, given ticker
* List all sectors
* List tickers in a sector, sorted by cap
* Eliminate ones that don't exist

In [None]:
ticker = 'FREYR'
sac[sac['Symbol']==ticker]

In [None]:
industries = sac.groupby('Industry', as_index=False, dropna=False).count().sort_values(by='Symbol', ascending=False).reset_index(drop=True)
print(industries.to_string())

In [None]:
sector = 'Software'
min_cap = 10000
sector_df = sac[(sac['Industry']==sector) & (sac['Market Cap']>=min_cap)].sort_values(by='Market Cap', ascending=False).reset_index(drop=True)
sector_df

In [None]:
path = '/Users/anupam/Desktop/Server/market_data/TIME_SERIES_DAILY/'

tickers = []
for t in sector_df['Symbol']:
    fpath = os.path.join(path, t+'.csv')
    if os.path.exists(fpath):
        tickers.append(t)
    else:
        print('File doesn\'t exist for {}'.format(fpath))
print("'{}'".format("','".join(tickers)))