In [None]:
# library information:

import os
# It is the fundamental library of python, used to perform scientific computing. 
# It provides high-performance multidimensional arrays and tools to deal with them.
import numpy as np
# Pandas provide high performance, fast, easy to use data structures and data analysis tools for manipulating 
# numeric data and time series. Pandas is built on the numpy library and written in languages like Python.
import pandas as pd
# import library to read date range for stock ticker historical data range
import datetime
import json

import alpaca_trade_api as tradeapi

from dotenv import load_dotenv
# import ipynb.fs.full
# https://stackoverflow.com/questions/44116194/import-a-function-from-another-ipynb-file
# from ipynb.fs.full.test_sr import *
# from ipynb.fs.full.date_functions import *
# from ipynb.fs.full.my_functions import *

# since functions will be in a different folder, import library to read path info
import sys
sys.path.append('Saeed/functions/')
sys.path.append('Saeed/functions/utils/')
# and now import my functions
from test_sr import *
from date_functions import *
from my_functions import *

# ===================================================================================================
# http://theautomatic.net/yahoo_fin-documentation/
# pip install yahoo-fin
# import yahoo_fin.stock_info as si

# import data libraries to read date range for stock ticker historical data range
# from datetime import date

# other libraries
# import pandas_datareader as pdr
%matplotlib inline
import yfinance as yf

In [None]:
# test call to function in a different folder
# ........................................................................display_greeting("Saeed")

In [None]:
# Load .env environment variables
load_dotenv()

In [None]:
# Set Alpaca API key and secret
alpaca_api_key = os.getenv("ALPACA_API_KEY")
alpaca_secret_key = os.getenv("ALPACA_SECRET_KEY")

In [None]:
# Verify that Alpaca key and secret were correctly loaded
print(f"Alpaca Key type: {type(alpaca_api_key)}")
print(f"Alpaca Secret Key type: {type(alpaca_secret_key)}")

In [None]:
# Create the Alpaca API object
alpaca = tradeapi.REST(
    alpaca_api_key,
    alpaca_secret_key,
    api_version="v2")

In [None]:
# SAM: Get S&P 500 tickers
sp500_tickers = pd.read_html("https://en.wikipedia.org/wiki/List_of_S%26P_500_companies")[0]
sp500_tickers = sp500_tickers.Symbol.to_list()
# print(sp500_tickers)

In [None]:
# SAM: Clean Tickers of periods
sp500_tickers = [x.replace(".", "-") for x in sp500_tickers]
# print(sp500_tickers)

In [None]:
# format current date as ISO format
this_moment = datetime.datetime.now()
this_moment

In [None]:
# number of days for which we need data. for 200SMA we should atleast get 250 days
num_of_days = 30    # 365 - Get atleast a year's worth of data.
num_of_days

In [None]:
# get the begin date for the date range.
start_date = get_begin_date(this_moment, num_of_days)
start_date

# start_date.strftime("%m/%d/%y")
# start_date = str(datetime.datetime.now() - datetime.timedelta(days=1*num_of_days))
# start_date

In [None]:
# get the end date for the date range
finish_date = str(this_moment).split()[0]
finish_date

# = current_date
# end_date
# current_date = date.today()
# current_date.strftime("%m/%d/%y")
# end_date = current_date

In [None]:
# conver date to ISO format
begin_date = pd.Timestamp(start_date, tz="America/New_York").isoformat()
begin_date

In [None]:
end_date = pd.Timestamp(finish_date, tz="America/New_York").isoformat()
end_date

In [None]:
# get tickers for "dow" or "sp500" or "nasdaq" one at a time

# dow30 has 30 symbols
# tickers_dow = get_tickers("dow")
# choice = "dow"
# tickers_dow

# sp500 has 505 symbols
tickers_sp500 = get_tickers("sp500")
choice = "sp500"
# tickers_sp500

# nasdaq has 4942 symbols
# tickers_nasdaq = get_tickers("nasdaq")
# choice = "nasdaq"
# tickers_nasdaq

# if we decide to use more than one market index, we must remove duplicates
# all_tickers = list(set(tickers_dow + tickers_sp500)) 

# all_tickers = tickers_dow
all_tickers = tickers_sp500
# all_tickers = tickers_nasdaq
all_tickers = ["AAPL", "IBM", "WMT"]     # ["AAPL", "V", "WMT"]
# choice = "sample"

# print(all_tickers)
type(all_tickers)
all_tickers

In [None]:
# sp_data = yf.download(sp500_tickers, period="1mo")
# sp_data = yf.download(all_tickers, start=start_date, end=finish_date)   # doesn't bring today
# sp_data = yf.download(all_tickers, period="1y", groupby="tickers")
# sp_data

In [None]:
selection = yf.download((all_tickers), period="1y", group_by="ticker")
# 70.47 seconds
selection

In [None]:
# select Walmart DF
# without group_by
# df_t1 = selection.loc(axis=1)[:, ['', 'WMT']]
# with group_by
df_t = selection.loc(axis=1)['WMT']
df_t

In [None]:
# split list of tickers

# the_list = all_tickers
# if choice == "sp500":                # change to sp500 after testing
#     chunk_size = 100
#     # the_list = all_tickers
#     if len(all_tickers) > chunk_size:
#         the_list = split_my_list(all_tickers, chunk_size)
#         #print("Saeed")
# # print(the_list)
# type(the_list)

In [None]:
# save the data
# json = final_df("hist_data.json")
# save the file for future read
with open('resources/data/hist_data.json', 'w') as outfile:
     json.dump(selection.to_json(), outfile)

In [None]:
# how do I read ?
# my_file = open('resources/data/hist_data.json', 'r')
# json_data = my_file.read()
# json_data
# df_a = json_data.df
# df_a
# with open('resources/data/hist_data.json', 'r') as infile:
#     data_read = json.load(infile)
# # df_a = pd.data_read
# # for i in range(0, len(data_read)):
# #     print(data_read[i])
# # data_read.splitlines()
# df_a = pd.DataFrame.from_dict(pd.json_normalize(data_read), orient='columns')
# df_a

In [None]:
selection

In [None]:
# select one process for all tickers

# without group_by
# selection['Close']
# with group_by
# selection.loc(axis=1)['Close', 'AAPL'] or selection['Close', 'AAPL']
df_t = selection.loc(axis=1)[:, ['', 'Close']]
df_t

In [None]:
df_t.loc['2021-08-13']

In [None]:
#selection.columns

In [None]:
# selection.index.values

In [None]:
# df_daily_change = selection['Close'] - selection['Open']
col_close = selection.loc(axis=1)[:, ['', 'Close']]
# col_close
col_open = selection.loc(axis=1)[:, ['', 'Open']]
col_open
df_daily_change = selection["Close"] - selection["Open"]
df_daily_change = col_close - col_open
df_daily_change
# df_change.columns

In [None]:
# # df_daily_change.columns
# headers = pd.MultiIndex.from_product([['Daily Change'], df_daily_change.columns])
# # headers
# df_daily_change.index

In [None]:
headers = pd.MultiIndex.from_product([['Daily Change'], df_daily_change.columns])
df_daily_change = pd.DataFrame(df_daily_change.values, df_daily_change.index, columns = headers)
df_daily_change

In [None]:
selection = pd.concat([selection, df_daily_change], axis=1, join="inner")
selection

In [None]:
#df_daily_change = selection['Close'] - selection['Open']
df_daily_change
x = df_daily_change / selection.loc(axis=1)[:, ['', 'Open']]
x
# df_change
#df_percent_daily_change
# df_t
# selection.loc(axis=1)[:, ['', 'Open']]

In [None]:
headers = pd.MultiIndex.from_product([['Percent Daily Change'], df_percent_daily_change.columns])
df_percent_daily_change = pd.DataFrame(df_percent_daily_change.values, df_percent_daily_change.index, columns = headers)
df_percent_daily_change

In [None]:
selection = pd.concat([selection, df_percent_daily_change], axis=1, join="inner")

In [None]:
selection

In [None]:
selection.swaplevel()

In [None]:
# selection['Delta']['AAPL'] = selection['High']['AAPL'] - selection['Low']['AAPL']
selection['Daily Change']

In [None]:
# create a dataframe to test swapping the header rows

# initilize Data
data = [['Mr X', 99], ['Mr Y', 108], ['Mr Z', 111]]

# create the dataframe
df = pd.DataFrame(data, columns = ['Name', 'Age'])

# print DataFrame
df

In [None]:
df_t = df.T
df_t

In [None]:
headers = pd.MultiIndex.from_product([['Percent Daily Change'], df_percent_daily_change.columns])
df_percent_daily_change = pd.DataFrame(df_percent_daily_change.values, df_percent_daily_change.index, columns = headers)
df_percent_daily_change

In [None]:

headers = pd.MultiIndex.from_product([['Parent'], ['chils1', 'child2']])
data = [[1, 11], [2, 22], [3, 33]]

df = pd.DataFrame(data, columns=headers)
df

In [None]:
df_2 = df.swaplevel()
df_2