# This Notebook Is for Exploring Importing Data from IEX into Apache Cassandra
## Future works will be converted into python scripts which will be more easily 'dockerized' for managing requirements and run environments
### Although Cassandra's advantages are more in enterprise-level write performance, I think its widespread use and compatibility with DFS will make it a better option than Mongo for any crazy timelines where this turns into anything

In [None]:
# Import Requirements
import requests
import csv
from bs4 import BeautifulSoup

In [None]:
#Use requests session to pull Wikipedia's S&P500 list page, this will hopefully not change too much or else this might break
session = requests.Session()
url = 'https://en.wikipedia.org/wiki/List_of_S%26P_500_companies'
response = session.get(url)
#Pull the content (html) of the HTTP response object into BSoup and use the html parser
soup = BeautifulSoup(response.content,'html.parser')

In [None]:
#Utilize the constituents id to locate the relavant HTML info
const_table = soup.find(id='constituents')
#More parsing...
table_row = const_table.find_all('tr')[1:] #Use of list indexing removes the table header which is also a tr
#Create an empty list to receive the tickers for the S&P500 companies
sp500 = []
#Loop over BSoup result set, each 'row' being a BSoup object that can be further parsed into table data
for row in table_row:
    columns = row.find_all('td')
    #Based on the layout of the table, the second column will contain the symbols
    symbol_col = columns[1]
    sp500.append(symbol_col.text)
#Confirm the length of the list as 505
print(len(sp500))

In [None]:
#Write list to a csv file for durability
with open ('./sp500.csv','w',newline='') as csvfile:
    csvwriter = csv.writer(csvfile,delimiter=',')
    for ticker in sp500:
        csvwriter.writerow([ticker])

In [None]:
#Check the csv file
with open('./sp500.csv','r',newline='') as csvfile:
    csvreader = csv.reader(csvfile)
    counter = 0
    for row in csvreader:
        counter += 1
        print(row, counter)

In [None]:
#Import Cassandra driver
from cassandra.cluster import Cluster
from cassandra.cqlengine.management import sync_table

In [None]:
#Establish a Cluster object that will connect to the cassandra keyspace
cluster = Cluster(['127.0.0.1'])
session = cluster.connect()
session.set_keyspace('sp500')

In [None]:
#Import the ORM Model for future mapping, but I'm going to try and make this work iteratively
from cassandra.cqlengine.models import Model
from cassandra.cqlengine import columns

In [None]:
#Open the S&P500 csv and begin iterating over ticker symbols
list_of_sp500 = [] #This doesn't need to be done twice, but I'm keeping the cells modular for easier copypasta later
with open('./sp500.csv','r',newline='') as csvfile:
    csvreader = csv.reader(csvfile)
    for row in csvreader:
        #Wikipedia lists the tickers with hyphens but IEX's api expects periods
        if '-' in row[0]:
            ticker = row[0].replace('-','.')
            list_of_sp500.append(ticker)
        else:
            list_of_sp500.append(row[0]) #The reader will return an interative object so we reference the sole value

In [None]:
for ticker in list_of_sp500:
    print(ticker)

In [None]:
from cassandra.cqlengine import connection
connection.setup(['127.0.0.1'],"sp500",protocol_version=3)
#Define a function that will accept a ticker string and request
# the 5 year JSON history from IEX
def GetAndReturnJSON(ticker):
    '''This function accepts a string corresponding to a stock ticker'''
    #Build out the url string from the ticker for 5 year history
    url = "https://api.iextrading.com/1.0/stock/"+ticker+"/chart/5y"
    session = requests.Session()
    response = session.get(url)
    return response.json()

#TODO -- Build the function that will pass the JSON into a Cassandra Model
# and then create that row in the raw_data table
def InsertStockData(ticker,JSON):
    import uuid
    #This may be an atrocious way to do this, but in the exploratory phase
    # I'm willing to define the model within the function for right now
    class Raw_data(Model):
        id = columns.UUID(primary_key=True)
        ticker = columns.Text() #This one will not be returned by IEX, but will be populated while iterating
        change = columns.Float()
        changeOverTime = columns.Double()
        changePercent = columns.Float()
        close = columns.Float()
        date = columns.Text() #The IEX returns it as a date string, which would need to be processed later
        high = columns.Float()
        label = columns.Text()
        low = columns.Float()
        open = columns.Float()
        unadjustedVolume = columns.BigInt()
        volume = columns.BigInt()
        vwap = columns.Float()
    sync_table(Raw_data)
    try:
        daily_stock = Raw_data(id = uuid.uuid1(),
             ticker=ticker,
             change=JSON['change'],
             changeOverTime=JSON['changeOverTime'],
             changePercent=JSON['changePercent'],
             close=JSON['close'],
             date=JSON['date'],
             high=JSON['high'],
             label=JSON['label'],
             low=JSON['low'],
             open=JSON['open'],
             unadjustedVolume=JSON['unadjustedVolume'],
             volume=JSON['volume'],
             vwap=JSON['vwap'])
    except KeyError:
        vwap=''
        pass
    try:
        daily_stock.save()
    except:
        pass
    return

In [65]:
#Set up a loop to retrieve and insert 5 years of stock data
# this will need to be try/except'd for continuous use
counter = 0
for ticker in list_of_sp500:
    print('Grabbing 5 year stock data for '+ticker)
    json_collection = GetAndReturnJSON(ticker)
    print('Stock data acquired, inserting into db')
    for json in json_collection:
        InsertStockData(ticker,json)
    counter += 1
    print('Done with '+ticker+'. Currently on #'+str(counter)+' of '+str(len(list_of_sp500)))

Grabbing 5 year stock data for MMM
Stock data acquired, inserting into db
Done with MMM. Currently on #1 of 505
Grabbing 5 year stock data for ABT
Stock data acquired, inserting into db
Done with ABT. Currently on #2 of 505
Grabbing 5 year stock data for ABBV
Stock data acquired, inserting into db
Done with ABBV. Currently on #3 of 505
Grabbing 5 year stock data for ABMD
Stock data acquired, inserting into db
Done with ABMD. Currently on #4 of 505
Grabbing 5 year stock data for ACN
Stock data acquired, inserting into db
Done with ACN. Currently on #5 of 505
Grabbing 5 year stock data for ATVI
Stock data acquired, inserting into db
Done with ATVI. Currently on #6 of 505
Grabbing 5 year stock data for ADBE
Stock data acquired, inserting into db
Done with ADBE. Currently on #7 of 505
Grabbing 5 year stock data for AMD
Stock data acquired, inserting into db
Done with AMD. Currently on #8 of 505
Grabbing 5 year stock data for AAP
Stock data acquired, inserting into db
Done with AAP. Current

Stock data acquired, inserting into db
Done with BBY. Currently on #73 of 505
Grabbing 5 year stock data for BIIB
Stock data acquired, inserting into db
Done with BIIB. Currently on #74 of 505
Grabbing 5 year stock data for BLK
Stock data acquired, inserting into db
Done with BLK. Currently on #75 of 505
Grabbing 5 year stock data for HRB
Stock data acquired, inserting into db
Done with HRB. Currently on #76 of 505
Grabbing 5 year stock data for BA
Stock data acquired, inserting into db
Done with BA. Currently on #77 of 505
Grabbing 5 year stock data for BKNG
Stock data acquired, inserting into db
Done with BKNG. Currently on #78 of 505
Grabbing 5 year stock data for BWA
Stock data acquired, inserting into db
Done with BWA. Currently on #79 of 505
Grabbing 5 year stock data for BXP
Stock data acquired, inserting into db
Done with BXP. Currently on #80 of 505
Grabbing 5 year stock data for BSX
Stock data acquired, inserting into db
Done with BSX. Currently on #81 of 505
Grabbing 5 year 

Stock data acquired, inserting into db
Done with DRI. Currently on #145 of 505
Grabbing 5 year stock data for DVA
Stock data acquired, inserting into db
Done with DVA. Currently on #146 of 505
Grabbing 5 year stock data for DE
Stock data acquired, inserting into db
Done with DE. Currently on #147 of 505
Grabbing 5 year stock data for DAL
Stock data acquired, inserting into db
Done with DAL. Currently on #148 of 505
Grabbing 5 year stock data for XRAY
Stock data acquired, inserting into db
Done with XRAY. Currently on #149 of 505
Grabbing 5 year stock data for DVN
Stock data acquired, inserting into db
Done with DVN. Currently on #150 of 505
Grabbing 5 year stock data for FANG
Stock data acquired, inserting into db
Done with FANG. Currently on #151 of 505
Grabbing 5 year stock data for DLR
Stock data acquired, inserting into db
Done with DLR. Currently on #152 of 505
Grabbing 5 year stock data for DFS
Stock data acquired, inserting into db
Done with DFS. Currently on #153 of 505
Grabbin

Stock data acquired, inserting into db
Done with GIS. Currently on #217 of 505
Grabbing 5 year stock data for GM
Stock data acquired, inserting into db
Done with GM. Currently on #218 of 505
Grabbing 5 year stock data for GPC
Stock data acquired, inserting into db
Done with GPC. Currently on #219 of 505
Grabbing 5 year stock data for GILD
Stock data acquired, inserting into db
Done with GILD. Currently on #220 of 505
Grabbing 5 year stock data for GPN
Stock data acquired, inserting into db
Done with GPN. Currently on #221 of 505
Grabbing 5 year stock data for GS
Stock data acquired, inserting into db
Done with GS. Currently on #222 of 505
Grabbing 5 year stock data for GT
Stock data acquired, inserting into db
Done with GT. Currently on #223 of 505
Grabbing 5 year stock data for GWW
Stock data acquired, inserting into db
Done with GWW. Currently on #224 of 505
Grabbing 5 year stock data for HAL
Stock data acquired, inserting into db
Done with HAL. Currently on #225 of 505
Grabbing 5 ye

Stock data acquired, inserting into db
Done with LH. Currently on #289 of 505
Grabbing 5 year stock data for LRCX
Stock data acquired, inserting into db
Done with LRCX. Currently on #290 of 505
Grabbing 5 year stock data for LW
Stock data acquired, inserting into db
Done with LW. Currently on #291 of 505
Grabbing 5 year stock data for LEG
Stock data acquired, inserting into db
Done with LEG. Currently on #292 of 505
Grabbing 5 year stock data for LEN
Stock data acquired, inserting into db
Done with LEN. Currently on #293 of 505
Grabbing 5 year stock data for LLY
Stock data acquired, inserting into db
Done with LLY. Currently on #294 of 505
Grabbing 5 year stock data for LNC
Stock data acquired, inserting into db
Done with LNC. Currently on #295 of 505
Grabbing 5 year stock data for LIN
Stock data acquired, inserting into db
Done with LIN. Currently on #296 of 505
Grabbing 5 year stock data for LKQ
Stock data acquired, inserting into db
Done with LKQ. Currently on #297 of 505
Grabbing 5

Stock data acquired, inserting into db
Done with OKE. Currently on #361 of 505
Grabbing 5 year stock data for ORCL
Stock data acquired, inserting into db
Done with ORCL. Currently on #362 of 505
Grabbing 5 year stock data for PCAR
Stock data acquired, inserting into db
Done with PCAR. Currently on #363 of 505
Grabbing 5 year stock data for PKG
Stock data acquired, inserting into db
Done with PKG. Currently on #364 of 505
Grabbing 5 year stock data for PH
Stock data acquired, inserting into db
Done with PH. Currently on #365 of 505
Grabbing 5 year stock data for PAYX
Stock data acquired, inserting into db
Done with PAYX. Currently on #366 of 505
Grabbing 5 year stock data for PYPL
Stock data acquired, inserting into db
Done with PYPL. Currently on #367 of 505
Grabbing 5 year stock data for PNR
Stock data acquired, inserting into db
Done with PNR. Currently on #368 of 505
Grabbing 5 year stock data for PBCT
Stock data acquired, inserting into db
Done with PBCT. Currently on #369 of 505
G

Stock data acquired, inserting into db
Done with SYY. Currently on #433 of 505
Grabbing 5 year stock data for TROW
Stock data acquired, inserting into db
Done with TROW. Currently on #434 of 505
Grabbing 5 year stock data for TTWO
Stock data acquired, inserting into db
Done with TTWO. Currently on #435 of 505
Grabbing 5 year stock data for TPR
Stock data acquired, inserting into db
Done with TPR. Currently on #436 of 505
Grabbing 5 year stock data for TGT
Stock data acquired, inserting into db
Done with TGT. Currently on #437 of 505
Grabbing 5 year stock data for TEL
Stock data acquired, inserting into db
Done with TEL. Currently on #438 of 505
Grabbing 5 year stock data for FTI
Stock data acquired, inserting into db
Done with FTI. Currently on #439 of 505
Grabbing 5 year stock data for TFX
Stock data acquired, inserting into db
Done with TFX. Currently on #440 of 505
Grabbing 5 year stock data for TXN
Stock data acquired, inserting into db
Done with TXN. Currently on #441 of 505
Grabb

Stock data acquired, inserting into db
Done with ZTS. Currently on #505 of 505
