In [137]:
#DAILY PERCENTAGE RETURNS DATA... need this to see how the price reacts to a given news release
# Remember, we are just doing the daily percentage gains, not going intraday yet, to keep things simple.


#Let's set up our Alpaca API...

# Initial imports
import os
import requests
import pandas as pd
from dotenv import load_dotenv
import alpaca_trade_api as tradeapi

%matplotlib inline



In [138]:
# Load .env environment variables
load_dotenv()

True

In [139]:
# Set Alpaca API key and secret
alpaca_api_key = os.getenv("ALPACA_API_KEY")
alpaca_secret_key = os.getenv("ALPACA_SECRET_KEY")

# Verify that Alpaca key and secret were correctly loaded
print(f"Alpaca Key type: {type(alpaca_api_key)}")
print(f"Alpaca Secret Key type: {type(alpaca_secret_key)}")

Alpaca Key type: <class 'str'>
Alpaca Secret Key type: <class 'str'>


In [140]:
# Create the Alpaca API object
alpaca = tradeapi.REST(
    alpaca_api_key,
    alpaca_secret_key,
    api_version="v2")

In [141]:
# Create our timeframe, Jan 1, 2022 - Dec 31, 2022
# Format date as ISO format
start_date = pd.Timestamp("2022-01-01", tz="America/New_York").isoformat()
end_date = pd.Timestamp("2022-12-31", tz="America/New_York").isoformat()

In [159]:
# We need to get the tickers for all stocks in the S&P 500
# Going to get the symbols of the stocks contained in the S&P 500 from the dataframe created in "DataCleaning.ipynb"

# Load in libraries needed to read the csv
import pandas as pd
from pathlib import Path

# Defining Paths
snp500_path = Path("../Group3Project1/Resources/constituents.csv")

# Reading CSV
snp500_data = pd.read_csv(snp500_path, index_col="Symbol")

# Sort data in alphabetical order. We will keep this consistient among the datasets we create.
snp500_data.sort_index(inplace=True)

# Preview Data
display(snp500_data.head())
display(snp500_data.tail())

Unnamed: 0_level_0,Security,GICS Sector,GICS Sub-Industry,Headquarters Location,Date added,CIK,Founded
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
A,Agilent Technologies,Health Care,Health Care Equipment,"Santa Clara, California",2000-06-05,1090872,1999
AAL,American Airlines Group,Industrials,Passenger Airlines,"Fort Worth, Texas",2015-03-23,6201,1934
AAPL,Apple Inc.,Information Technology,"Technology Hardware, Storage & Peripherals","Cupertino, California",1982-11-30,320193,1977
ABBV,AbbVie,Health Care,Pharmaceuticals,"North Chicago, Illinois",2012-12-31,1551152,2013 (1888)
ABNB,Airbnb,Consumer Discretionary,"Hotels, Resorts & Cruise Lines","San Francisco, California",2023-09-18,1559720,2008


Unnamed: 0_level_0,Security,GICS Sector,GICS Sub-Industry,Headquarters Location,Date added,CIK,Founded
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
YUM,Yum! Brands,Consumer Discretionary,Restaurants,"Louisville, Kentucky",1997-10-06,1041061,1997
ZBH,Zimmer Biomet,Health Care,Health Care Equipment,"Warsaw, Indiana",2001-08-07,1136869,1927
ZBRA,Zebra Technologies,Information Technology,Electronic Equipment & Instruments,"Lincolnshire, Illinois",2019-12-23,877212,1969
ZION,Zions Bancorporation,Financials,Regional Banks,"Salt Lake City, Utah",2001-06-22,109380,1873
ZTS,Zoetis,Health Care,Pharmaceuticals,"Parsippany, New Jersey",2013-06-21,1555280,1952


In [160]:
# Capture the "Symbol" column of the snp500 dataframe

# Reset the index so that we have Symbol as a normal column. Save that with operation with inplace=True
snp500_data.reset_index(inplace=True)

snp500_data.head()

Unnamed: 0,Symbol,Security,GICS Sector,GICS Sub-Industry,Headquarters Location,Date added,CIK,Founded
0,A,Agilent Technologies,Health Care,Health Care Equipment,"Santa Clara, California",2000-06-05,1090872,1999
1,AAL,American Airlines Group,Industrials,Passenger Airlines,"Fort Worth, Texas",2015-03-23,6201,1934
2,AAPL,Apple Inc.,Information Technology,"Technology Hardware, Storage & Peripherals","Cupertino, California",1982-11-30,320193,1977
3,ABBV,AbbVie,Health Care,Pharmaceuticals,"North Chicago, Illinois",2012-12-31,1551152,2013 (1888)
4,ABNB,Airbnb,Consumer Discretionary,"Hotels, Resorts & Cruise Lines","San Francisco, California",2023-09-18,1559720,2008


In [161]:
# Now we can capture the symbol column
snp500_symbols = snp500_data["Symbol"]

# Preview
# NOTICE: why do we have 503 rows? isn't it the S&P 500? 
# Although the it is called the "S&P 500", it constitutes of 503 stocks.
snp500_symbols

0         A
1       AAL
2      AAPL
3      ABBV
4      ABNB
       ... 
498     YUM
499     ZBH
500    ZBRA
501    ZION
502     ZTS
Name: Symbol, Length: 503, dtype: object

In [162]:
# Set the tickers, all stocks in the S&P 500 for the Alpaca API
tickers = snp500_symbols

In [166]:
# Set timeframe to one day ("1Day") for the Alpaca API
timeframe = "1Day"

# Get daily closing prices for year 2022 of all stocks in S&P 500 
snp500_price_df = alpaca.get_bars(
    tickers,
    timeframe,
    start = start_date,
    end = end_date
).df

# Display sample data
snp500_price_df.head()

Unnamed: 0_level_0,close,high,low,trade_count,open,volume,vwap,symbol
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2022-01-03 05:00:00+00:00,18.75,19.18,18.2,162182,18.23,44860486,18.777172,AAL
2022-01-04 05:00:00+00:00,19.02,19.46,18.8504,119441,19.215,30433127,19.089447,AAL
2022-01-05 05:00:00+00:00,18.68,19.385,18.51,133200,19.13,35253129,18.91792,AAL
2022-01-06 05:00:00+00:00,18.57,19.0599,18.35,91484,18.89,19726860,18.669406,AAL
2022-01-07 05:00:00+00:00,19.28,19.59,18.58,158007,18.75,43826540,19.210886,AAL
2022-01-10 05:00:00+00:00,18.79,19.39,18.68,117683,19.24,27174942,18.863209,AAL
2022-01-11 05:00:00+00:00,19.02,19.205,18.71,97962,18.88,23499371,19.000391,AAL
2022-01-12 05:00:00+00:00,18.5,19.255,18.35,151523,19.16,42333799,18.679602,AAL
2022-01-13 05:00:00+00:00,19.34,19.7597,18.63,197017,18.73,57325732,19.369388,AAL
2022-01-14 05:00:00+00:00,18.49,19.16,18.4,166605,19.11,46885161,18.651615,AAL


In [168]:
# NOTICE THAT WE DID NOT GET DATA FOR "A"... WHY???
# FIGURE THIS OUT AND THEN ENSURE THE SYMBOLS AND DATA MATCH UP. 
# THEN WE CAN CALCULATE THE DAILY RETURNS.

Unnamed: 0_level_0,close,high,low,trade_count,open,volume,vwap,symbol
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2022-01-03 05:00:00+00:00,18.75,19.1800,18.2000,162182,18.230,44860486,18.777172,AAL
2022-01-04 05:00:00+00:00,19.02,19.4600,18.8504,119441,19.215,30433127,19.089447,AAL
2022-01-05 05:00:00+00:00,18.68,19.3850,18.5100,133200,19.130,35253129,18.917920,AAL
2022-01-06 05:00:00+00:00,18.57,19.0599,18.3500,91484,18.890,19726860,18.669406,AAL
2022-01-07 05:00:00+00:00,19.28,19.5900,18.5800,158007,18.750,43826540,19.210886,AAL
...,...,...,...,...,...,...,...,...
2022-03-08 05:00:00+00:00,157.44,162.8800,155.8000,1139156,158.820,140524583,158.695921,AAPL
2022-03-09 05:00:00+00:00,162.95,163.4100,159.4100,733689,161.475,100505467,162.023601,AAPL
2022-03-10 05:00:00+00:00,158.52,160.3900,155.9800,882864,160.200,114471979,158.079271,AAPL
2022-03-11 05:00:00+00:00,154.73,159.2800,154.5000,877964,158.930,105127762,156.077568,AAPL


In [169]:
# AAL = snp500_price_df[snp500_price_df['symbol']=='AAL'].drop('symbol', axis=1)


snp500_symbols

# for symbol in range(len(snp500_symbols)):
#     print(symbol)
# cool_symbol = snp500_symbols[1]
# cool_symbol

snp500_symbols_list=[]

for x in range(len(snp500_symbols)):
    snp500_symbols_list.append(snp500_symbols[x])

len(snp500_symbols_list)

503

In [170]:


# for x in range(len(snp500_symbols_list)-400):
#     ticker_data_1 = snp500_price_df[snp500_price_df['symbol']==snp500_symbols_list[count]].drop('symbol', axis=1)
#     snp500_2022_returns_df = pd.concat([ticker_data_1, ticker_data_2], axis=1, keys=snp500_symbols_list)
#     count+=1

#snp500_2022_returns_df
# concatenated_df = pd.concat([existing_df, new_data], axis=1)


# for x in range(len(snp500_symbols_list)-490):
#     ticker_data_1 = snp500_price_df[snp500_price_df['symbol']==snp500_symbols_list[count]].drop('symbol', axis=1)
#     concat_df = pd.concat([concat_df, ticker_data_1], axis=1)
#     count=+1
#concat_df

snp500_2022_returns_df = pd.DataFrame()

df_list = []


# Make list of the dataframes. Dataframes we create by acessing the snp500_price_df.
# Then we can concatenate all of those dataframes at the same time.
for x in snp500_symbols_list:
    ticker_data_1 = snp500_price_df[snp500_price_df['symbol'] == x].drop('symbol', axis=1)
    df_list.append(ticker_data_1)    

#ticker_data_2 = snp500_price_df[snp500_price_df['symbol']==snp500_symbols_list[count]].drop('symbol', axis=1)
#concat_df = pd.concat([concat_df, ticker_data_1], axis=1, keys=snp500_symbols_list)

# Concat the list of dataframes. Set the keys of each df to the list of symbols that we created.
snp500_2022_returns_df = pd.concat(df_list, axis=1, keys=snp500_symbols_list)





In [171]:
snp500_2022_returns_df

Unnamed: 0_level_0,A,A,A,A,A,A,A,AAL,AAL,AAL,...,ZION,ZION,ZION,ZTS,ZTS,ZTS,ZTS,ZTS,ZTS,ZTS
Unnamed: 0_level_1,close,high,low,trade_count,open,volume,vwap,close,high,low,...,open,volume,vwap,close,high,low,trade_count,open,volume,vwap
timestamp,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2022-01-03 05:00:00+00:00,156.48,159.44,153.93,24320,159.00,1855491,155.916852,18.75,19.1800,18.2000,...,63.880,1414361,64.236825,234.03,243.2400,233.3200,40631,242.29,3110633,234.902990
2022-01-04 05:00:00+00:00,151.19,155.63,149.70,34709,155.49,2510184,151.522046,19.02,19.4600,18.8504,...,66.110,1557604,66.314555,225.12,234.0200,222.1400,72176,234.02,5082693,225.646202
2022-01-05 05:00:00+00:00,148.60,153.10,148.53,27423,150.83,2966289,149.705613,18.68,19.3850,18.5100,...,66.465,1121305,66.574460,216.56,227.0000,215.6000,74079,227.00,5352738,218.720396
2022-01-06 05:00:00+00:00,149.12,149.96,145.58,34443,148.85,2502203,148.493000,18.57,19.0599,18.3500,...,67.100,1660365,68.300571,217.42,220.5600,213.6250,53574,215.48,3448961,217.727004
2022-01-07 05:00:00+00:00,145.15,149.73,145.09,28610,149.12,2415233,146.271401,19.28,19.5900,18.5800,...,69.180,1456668,69.918369,211.09,217.4200,210.7800,35901,217.13,2584840,212.467092
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-12-23 05:00:00+00:00,149.23,150.25,148.22,17530,149.01,779426,149.203604,12.71,12.7100,12.3298,...,48.280,1354480,48.435983,145.76,145.8920,143.5400,20025,144.51,1017604,145.236784
2022-12-27 05:00:00+00:00,149.55,149.81,147.30,17148,149.23,879543,149.313607,12.53,12.7900,12.3950,...,48.650,795675,48.808658,145.30,146.1474,143.5700,19294,145.91,957871,145.322438
2022-12-28 05:00:00+00:00,148.09,151.37,147.64,19078,149.78,784312,148.753919,12.32,12.6800,12.2500,...,48.870,776919,48.189316,143.83,146.6400,143.7700,19669,145.18,1443916,144.409503
2022-12-29 05:00:00+00:00,151.09,151.90,149.02,17820,149.61,854392,151.121691,12.70,12.7100,12.2950,...,48.060,789947,48.992500,148.15,148.5100,145.1378,21072,145.20,1298851,147.959023


In [None]:
# GET GEOSPACIAL DATA
# We are interested in US companies that release 'hot' news.
# Where in the US are these companies located?
# We get their location, and we should be able to translate the "name" of that location into "coordinates"
# Once it is in "coordinates", we can plot that point on the Geo map using HVplot


# Frist step would be to get the locations of each company.

# Step 2 is to get access to a dataset that translates cities/towns in the US to coordinates.

# Lastly we can plot those coordinates on the map.

In [5]:
# STEP 2: FIND DATASET TRANSLATING CITY NAME TO COORDINATES
# We can use this API: "GEOCODING API" https://openweathermap.org/api/geocoding-api 
# Here is a description from their website: 

# "Geocoding API is a simple tool that we have developed to ease the search for locations while working with geographic names and coordinates.
# Supporting API calls by geographical coordinates is the most accurate way to specify any location, that is why this method is integrated in all OpenWeather APIs. 
# However, this way is not always suitable for all users.
# Geocoding is the process of transformation of any location name into geographical coordinates, and the other way around (reverse geocoding).
# OpenWeather’s Geocoding API supports both the direct and reverse methods, working at the level of city names, areas and districts, countries and states:
# Direct geocoding converts the specified name of a location or zip/post code into the exact geographical coordinates;
# Reverse geocoding converts the geographical coordinates into the names of the nearby locations."

# We will use direct geocoding.

import requests
# import requests

# Create variable to hold request url
url = "http://api.openweathermap.org/geo/1.0/direct?q=Oakland&limit=5&appid=628878cc6edbe70ff1805be47e1af1ea"

# Add format specifier to request url
#url = url + "?format=json"

# Execute get request
requests.get(url)


# Execute GET request and store response
# response_data = requests.get(url)
# q	required	City name, state code (only for the US) and country code divided by comma. Please use ISO 3166 country codes.
# appid	required	Your unique API key (you can always find it on your account page under the "API key" tab)
# limit	optional	Number of the locations in the API response (up to 5 results can be returned in the API response)

# myCityNameVariable = ""
# for this many rows in our S&P 500 data
#     get the name of city
#     set that name to myCityNameVariable
#     put that myCityNameVariable into the link
#     make that API call

# http://api.openweathermap.org/geo/1.0/direct?q=myCityNameVariable&limit={limit}&appid={API key}


# http://api.openweathermap.org/geo/1.0/direct?q=London&limit=5&appid={API key}

#http://api.openweathermap.org/geo/1.0/direct?q=Oakland&limit=5&appid={3fcc50c01f481adc6307ae95cede2362}

<Response [200]>

In [6]:
print(response_data.content) #I think I am getting an error because I need to confirm my email. However, that email is taking a while to arrive...

b'[{"name":"Oakland","local_names":{"kw":"Oakland","mk":"\xd0\x9e\xd1\x83\xd0\xba\xd0\xbb\xd0\xb5\xd0\xbd\xd0\xb4","pt":"Oakland","pl":"Oakland","ka":"\xe1\x83\x9d\xe1\x83\x99\xe1\x83\x9a\xe1\x83\x94\xe1\x83\x9c\xe1\x83\x93\xe1\x83\x98","ja":"\xe3\x82\xaa\xe3\x83\xbc\xe3\x82\xaf\xe3\x83\xa9\xe3\x83\xb3\xe3\x83\x89","yo":"Oakland","fa":"\xd8\xa7\xd9\x88\xda\xa9\xd9\x84\xd9\x86\xd8\xaf\xd8\x8c \xda\xa9\xd8\xa7\xd9\x84\xdb\x8c\xd9\x81\xd8\xb1\xd9\x86\xdb\x8c\xd8\xa7","sv":"Oakland","sh":"Oakland","sw":"Oakland","fy":"Oakland","ht":"Oakland","hr":"Oakland","so":"Oakland","ur":"\xd8\xa7\xd9\x88\xda\xa9\xd9\x84\xdb\x8c\xd9\x86\xda\x88\xd8\x8c \xda\xa9\xdb\x8c\xd9\x84\xdb\x8c\xd9\x81\xd9\x88\xd8\xb1\xd9\x86\xdb\x8c\xd8\xa7","ro":"Oakland","jv":"Oakland","fi":"Oakland","bn":"\xe0\xa6\x93\xe0\xa6\x95\xe0\xa6\xb2\xe0\xa7\x8d\xe0\xa6\xaf\xe0\xa6\xbe\xe0\xa6\xa8\xe0\xa7\x8d\xe0\xa6\xa1","de":"Oakland","zh":"\xe5\xa5\xa5\xe5\x85\x8b\xe5\x85\xb0/\xe5\xa5\xa7\xe5\x85\x8b\xe8\x98\xad/\xe5\xb1\x8b\xe5\