In [137]:
#DAILY PERCENTAGE RETURNS DATA... need this to see how the price reacts to a given news release
# Remember, we are just doing the daily percentage gains, not going intraday yet, to keep things simple.


#Let's set up our Alpaca API...

# Initial imports
import os
import requests
import pandas as pd
from dotenv import load_dotenv
import alpaca_trade_api as tradeapi

%matplotlib inline



In [138]:
# Load .env environment variables
load_dotenv()

True

In [139]:
# Set Alpaca API key and secret
alpaca_api_key = os.getenv("ALPACA_API_KEY")
alpaca_secret_key = os.getenv("ALPACA_SECRET_KEY")

# Verify that Alpaca key and secret were correctly loaded
print(f"Alpaca Key type: {type(alpaca_api_key)}")
print(f"Alpaca Secret Key type: {type(alpaca_secret_key)}")

Alpaca Key type: <class 'str'>
Alpaca Secret Key type: <class 'str'>


In [140]:
# Create the Alpaca API object
alpaca = tradeapi.REST(
    alpaca_api_key,
    alpaca_secret_key,
    api_version="v2")

In [141]:
# Create our timeframe, Jan 1, 2022 - Dec 31, 2022
# Format date as ISO format
start_date = pd.Timestamp("2022-01-01", tz="America/New_York").isoformat()
end_date = pd.Timestamp("2022-12-31", tz="America/New_York").isoformat()

In [142]:
# We need to get the tickers for all stocks in the S&P 500
# Going to get the symbols of the stocks contained in the S&P 500 from the dataframe created in "DataCleaning.ipynb"

# Load in libraries needed to read the csv
import pandas as pd
from pathlib import Path

# Defining Paths
snp500_path = Path("../Group3Project1/Resources/constituents.csv")

# Reading CSV
snp500_data = pd.read_csv(snp500_path, index_col="Symbol")

# Preview Data
display(snp500_data.head())
display(snp500_data.tail())

Unnamed: 0_level_0,Security,GICS Sector,GICS Sub-Industry,Headquarters Location,Date added,CIK,Founded
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
MMM,3M,Industrials,Industrial Conglomerates,"Saint Paul, Minnesota",1957-03-04,66740,1902
AOS,A. O. Smith,Industrials,Building Products,"Milwaukee, Wisconsin",2017-07-26,91142,1916
ABT,Abbott,Health Care,Health Care Equipment,"North Chicago, Illinois",1957-03-04,1800,1888
ABBV,AbbVie,Health Care,Pharmaceuticals,"North Chicago, Illinois",2012-12-31,1551152,2013 (1888)
ACN,Accenture,Information Technology,IT Consulting & Other Services,"Dublin, Ireland",2011-07-06,1467373,1989


Unnamed: 0_level_0,Security,GICS Sector,GICS Sub-Industry,Headquarters Location,Date added,CIK,Founded
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
YUM,Yum! Brands,Consumer Discretionary,Restaurants,"Louisville, Kentucky",1997-10-06,1041061,1997
ZBRA,Zebra Technologies,Information Technology,Electronic Equipment & Instruments,"Lincolnshire, Illinois",2019-12-23,877212,1969
ZBH,Zimmer Biomet,Health Care,Health Care Equipment,"Warsaw, Indiana",2001-08-07,1136869,1927
ZION,Zions Bancorporation,Financials,Regional Banks,"Salt Lake City, Utah",2001-06-22,109380,1873
ZTS,Zoetis,Health Care,Pharmaceuticals,"Parsippany, New Jersey",2013-06-21,1555280,1952


In [143]:
# Capture the "Symbol" column of the snp500 dataframe

# Reset the index so that we have Symbol as a normal column. Save that with operation with inplace=True
snp500_data.reset_index(inplace=True)

snp500_data.head()

Unnamed: 0,Symbol,Security,GICS Sector,GICS Sub-Industry,Headquarters Location,Date added,CIK,Founded
0,MMM,3M,Industrials,Industrial Conglomerates,"Saint Paul, Minnesota",1957-03-04,66740,1902
1,AOS,A. O. Smith,Industrials,Building Products,"Milwaukee, Wisconsin",2017-07-26,91142,1916
2,ABT,Abbott,Health Care,Health Care Equipment,"North Chicago, Illinois",1957-03-04,1800,1888
3,ABBV,AbbVie,Health Care,Pharmaceuticals,"North Chicago, Illinois",2012-12-31,1551152,2013 (1888)
4,ACN,Accenture,Information Technology,IT Consulting & Other Services,"Dublin, Ireland",2011-07-06,1467373,1989


In [144]:
# Now we can capture the symbol column
snp500_symbols = snp500_data["Symbol"]

# Preview
# NOTICE: why do we have 503 rows? isn't it the S&P 500? 
# Although the it is called the "S&P 500", it constitutes of 503 stocks.
snp500_symbols

0       MMM
1       AOS
2       ABT
3      ABBV
4       ACN
       ... 
498     YUM
499    ZBRA
500     ZBH
501    ZION
502     ZTS
Name: Symbol, Length: 503, dtype: object

In [145]:
# Set the tickers, all stocks in the S&P 500 for the Alpaca API
tickers = snp500_symbols

In [146]:
# Set timeframe to one day ("1Day") for the Alpaca API
timeframe = "1Day"

# Get daily closing prices for year 2022 of all stocks in S&P 500 
snp500_price_df = alpaca.get_bars(
    tickers,
    timeframe,
    start = start_date,
    end = end_date
).df

# Display sample data
snp500_price_df

Unnamed: 0_level_0,close,high,low,trade_count,open,volume,vwap,symbol
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2022-01-03 05:00:00+00:00,18.75,19.1800,18.2000,162182,18.230,44860486,18.777172,AAL
2022-01-04 05:00:00+00:00,19.02,19.4600,18.8504,119441,19.215,30433127,19.089447,AAL
2022-01-05 05:00:00+00:00,18.68,19.3850,18.5100,133200,19.130,35253129,18.917920,AAL
2022-01-06 05:00:00+00:00,18.57,19.0599,18.3500,91484,18.890,19726860,18.669406,AAL
2022-01-07 05:00:00+00:00,19.28,19.5900,18.5800,158007,18.750,43826540,19.210886,AAL
...,...,...,...,...,...,...,...,...
2022-12-23 05:00:00+00:00,145.76,145.8920,143.5400,20025,144.510,1017604,145.236784,ZTS
2022-12-27 05:00:00+00:00,145.30,146.1474,143.5700,19294,145.910,957871,145.322438,ZTS
2022-12-28 05:00:00+00:00,143.83,146.6400,143.7700,19669,145.180,1443916,144.409503,ZTS
2022-12-29 05:00:00+00:00,148.15,148.5100,145.1378,21072,145.200,1298851,147.959023,ZTS


In [147]:
# AAL = snp500_price_df[snp500_price_df['symbol']=='AAL'].drop('symbol', axis=1)


snp500_symbols

# for symbol in range(len(snp500_symbols)):
#     print(symbol)
# cool_symbol = snp500_symbols[1]
# cool_symbol

snp500_symbols_list=[]

for x in range(len(snp500_symbols)):
    snp500_symbols_list.append(snp500_symbols[x])

snp500_symbols_list[:4]

['MMM', 'AOS', 'ABT', 'ABBV']

In [148]:

#snp500_2022_returns_df = pd.DataFrame()

# ticker_data_1 = snp500_price_df[snp500_price_df['symbol']==snp500_symbols_list[count]].drop('symbol', axis=1)


# ticker_data_2 = snp500_price_df[snp500_price_df['symbol']==snp500_symbols_list[count+1]].drop('symbol', axis=1)
# # ticker_data_1

# for x in range(len(snp500_symbols_list)-400):
#     ticker_data_1 = snp500_price_df[snp500_price_df['symbol']==snp500_symbols_list[count]].drop('symbol', axis=1)
#     snp500_2022_returns_df = pd.concat([ticker_data_1, ticker_data_2], axis=1, keys=snp500_symbols_list)
#     count+=1

#     snp500_2022_returns_df = pd.concat([AAL, ZTS],axis=1, keys=['AAL','ZTS'])
#snp500_2022_returns_df["hello"] = ticker_data_1
#snp500_2022_returns_df["symbols 1"] = ticker_data_1
#snp500_2022_returns_df
#new_new_df = pd.concat([snp500_2022_returns_df, ticker_data_1, ticker_data_2], axis=1, keys=snp500_symbols_list)
#new_new_df = pd.concat([new_new_df, ticker_data_2], axis=1, keys=["hello", "hello", "world", "work"])
#snp500_2022_returns_df
# concatenated_df = pd.concat([existing_df, new_data], axis=1)


# for x in range(len(snp500_symbols_list)-490):
#     ticker_data_1 = snp500_price_df[snp500_price_df['symbol']==snp500_symbols_list[count]].drop('symbol', axis=1)
#     concat_df = pd.concat([concat_df, ticker_data_1], axis=1)
#     count=+1
#concat_df

concat_df = pd.DataFrame()
df_list = []

for symbol in snp500_symbols_list:
    ticker_data_1 = snp500_price_df[snp500_price_df['symbol'] == symbol]#.drop('symbol', axis=1)
    df_list.append(ticker_data_1)    

#ticker_data_2 = snp500_price_df[snp500_price_df['symbol']==snp500_symbols_list[count]].drop('symbol', axis=1)
#concat_df = pd.concat([concat_df, ticker_data_1], axis=1, keys=snp500_symbols_list)
# concat_df = pd.concat([concat_df, ticker_data_1], axis=1, keys=["a" 'b', 'c', 'd'])


df_list

[                            close    high      low  trade_count    open  \
 timestamp                                                                 
 2022-01-03 05:00:00+00:00  177.74  179.09  175.840        32745  178.32   
 2022-01-04 05:00:00+00:00  180.23  181.26  178.025        43025  178.48   
 2022-01-05 05:00:00+00:00  179.49  181.78  177.000        48834  177.13   
 2022-01-06 05:00:00+00:00  178.00  181.28  177.540        39255  180.88   
 2022-01-07 05:00:00+00:00  179.95  180.49  177.220        39518  178.13   
 ...                           ...     ...      ...          ...     ...   
 2022-12-23 05:00:00+00:00  120.14  121.00  117.800        46384  120.79   
 2022-12-27 05:00:00+00:00  120.22  120.83  119.160        32087  120.14   
 2022-12-28 05:00:00+00:00  118.29  121.00  118.250        34405  120.35   
 2022-12-29 05:00:00+00:00  120.57  120.86  118.500        35391  118.87   
 2022-12-30 05:00:00+00:00  119.92  120.03  118.510        36478  119.65   
 
          

In [36]:

# Reorganize the DataFrame using the above data we just got from alpaca 

# Separate ticker data


AAL = snp500_price_df[snp500_price_df['symbol']=='AAL'].drop('symbol', axis=1)


ZTS = snp500_price_df[snp500_price_df['symbol']=='ZTS'].drop('symbol', axis=1)

# Concatenate the ticker DataFrames
test_new_df = pd.concat([AAL, ZTS],axis=1, keys=['AAL','ZTS'])

test_new_df.head()


Unnamed: 0_level_0,AAL,AAL,AAL,AAL,AAL,AAL,AAL,ZTS,ZTS,ZTS,ZTS,ZTS,ZTS,ZTS
Unnamed: 0_level_1,close,high,low,trade_count,open,volume,vwap,close,high,low,trade_count,open,volume,vwap
timestamp,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2
2022-01-03 05:00:00+00:00,18.75,19.18,18.2,162182,18.23,44860486,18.777172,234.03,243.24,233.32,40631,242.29,3110633,234.90299
2022-01-04 05:00:00+00:00,19.02,19.46,18.8504,119441,19.215,30433127,19.089447,225.12,234.02,222.14,72176,234.02,5082693,225.646202
2022-01-05 05:00:00+00:00,18.68,19.385,18.51,133200,19.13,35253129,18.91792,216.56,227.0,215.6,74079,227.0,5352738,218.720396
2022-01-06 05:00:00+00:00,18.57,19.0599,18.35,91484,18.89,19726860,18.669406,217.42,220.56,213.625,53574,215.48,3448961,217.727004
2022-01-07 05:00:00+00:00,19.28,19.59,18.58,158007,18.75,43826540,19.210886,211.09,217.42,210.78,35901,217.13,2584840,212.467092


In [None]:
# GET GEOSPACIAL DATA
# We are interested in US companies that release 'hot' news.
# Where in the US are these companies located?
# We get their location, and we should be able to translate the "name" of that location into "coordinates"
# Once it is in "coordinates", we can plot that point on the Geo map using HVplot


# Frist step would be to get the locations of each company.

# Step 2 is to get access to a dataset that translates cities/towns in the US to coordinates.

# Lastly we can plot those coordinates on the map.

In [5]:
# STEP 2: FIND DATASET TRANSLATING CITY NAME TO COORDINATES
# We can use this API: "GEOCODING API" https://openweathermap.org/api/geocoding-api 
# Here is a description from their website: 

# "Geocoding API is a simple tool that we have developed to ease the search for locations while working with geographic names and coordinates.
# Supporting API calls by geographical coordinates is the most accurate way to specify any location, that is why this method is integrated in all OpenWeather APIs. 
# However, this way is not always suitable for all users.
# Geocoding is the process of transformation of any location name into geographical coordinates, and the other way around (reverse geocoding).
# OpenWeather’s Geocoding API supports both the direct and reverse methods, working at the level of city names, areas and districts, countries and states:
# Direct geocoding converts the specified name of a location or zip/post code into the exact geographical coordinates;
# Reverse geocoding converts the geographical coordinates into the names of the nearby locations."

# We will use direct geocoding.

import requests
# import requests

# Create variable to hold request url
url = "http://api.openweathermap.org/geo/1.0/direct?q=Oakland&limit=5&appid=628878cc6edbe70ff1805be47e1af1ea"

# Add format specifier to request url
#url = url + "?format=json"

# Execute get request
requests.get(url)


# Execute GET request and store response
# response_data = requests.get(url)
# q	required	City name, state code (only for the US) and country code divided by comma. Please use ISO 3166 country codes.
# appid	required	Your unique API key (you can always find it on your account page under the "API key" tab)
# limit	optional	Number of the locations in the API response (up to 5 results can be returned in the API response)

# myCityNameVariable = ""
# for this many rows in our S&P 500 data
#     get the name of city
#     set that name to myCityNameVariable
#     put that myCityNameVariable into the link
#     make that API call

# http://api.openweathermap.org/geo/1.0/direct?q=myCityNameVariable&limit={limit}&appid={API key}


# http://api.openweathermap.org/geo/1.0/direct?q=London&limit=5&appid={API key}

#http://api.openweathermap.org/geo/1.0/direct?q=Oakland&limit=5&appid={3fcc50c01f481adc6307ae95cede2362}

<Response [200]>

In [6]:
print(response_data.content) #I think I am getting an error because I need to confirm my email. However, that email is taking a while to arrive...

b'[{"name":"Oakland","local_names":{"kw":"Oakland","mk":"\xd0\x9e\xd1\x83\xd0\xba\xd0\xbb\xd0\xb5\xd0\xbd\xd0\xb4","pt":"Oakland","pl":"Oakland","ka":"\xe1\x83\x9d\xe1\x83\x99\xe1\x83\x9a\xe1\x83\x94\xe1\x83\x9c\xe1\x83\x93\xe1\x83\x98","ja":"\xe3\x82\xaa\xe3\x83\xbc\xe3\x82\xaf\xe3\x83\xa9\xe3\x83\xb3\xe3\x83\x89","yo":"Oakland","fa":"\xd8\xa7\xd9\x88\xda\xa9\xd9\x84\xd9\x86\xd8\xaf\xd8\x8c \xda\xa9\xd8\xa7\xd9\x84\xdb\x8c\xd9\x81\xd8\xb1\xd9\x86\xdb\x8c\xd8\xa7","sv":"Oakland","sh":"Oakland","sw":"Oakland","fy":"Oakland","ht":"Oakland","hr":"Oakland","so":"Oakland","ur":"\xd8\xa7\xd9\x88\xda\xa9\xd9\x84\xdb\x8c\xd9\x86\xda\x88\xd8\x8c \xda\xa9\xdb\x8c\xd9\x84\xdb\x8c\xd9\x81\xd9\x88\xd8\xb1\xd9\x86\xdb\x8c\xd8\xa7","ro":"Oakland","jv":"Oakland","fi":"Oakland","bn":"\xe0\xa6\x93\xe0\xa6\x95\xe0\xa6\xb2\xe0\xa7\x8d\xe0\xa6\xaf\xe0\xa6\xbe\xe0\xa6\xa8\xe0\xa7\x8d\xe0\xa6\xa1","de":"Oakland","zh":"\xe5\xa5\xa5\xe5\x85\x8b\xe5\x85\xb0/\xe5\xa5\xa7\xe5\x85\x8b\xe8\x98\xad/\xe5\xb1\x8b\xe5\