## Mount drive to read data

In [None]:
from google.colab import drive
drive.mount('drive')

Mounted at drive


## Read S&P500 companies from wikipedia and store as CSV

In [None]:
import pandas as pd

link = (
    "https://en.wikipedia.org/wiki/List_of_S%26P_500_companies#S&P_500_component_stocks"
)
df = pd.read_html(link, header=0)[0]

# Write to CSV
df.to_csv("s&pconstituents.csv", index=False)
!cp "s&pconstituents.csv" "drive/My Drive/Capstone/data/"

In [None]:
df

Unnamed: 0,Symbol,Security,GICS Sector,GICS Sub-Industry,Headquarters Location,Date added,CIK,Founded
0,MMM,3M,Industrials,Industrial Conglomerates,"Saint Paul, Minnesota",1957-03-04,66740,1902
1,AOS,A. O. Smith,Industrials,Building Products,"Milwaukee, Wisconsin",2017-07-26,91142,1916
2,ABT,Abbott,Health Care,Health Care Equipment,"North Chicago, Illinois",1957-03-04,1800,1888
3,ABBV,AbbVie,Health Care,Biotechnology,"North Chicago, Illinois",2012-12-31,1551152,2013 (1888)
4,ACN,Accenture,Information Technology,IT Consulting & Other Services,"Dublin, Ireland",2011-07-06,1467373,1989
...,...,...,...,...,...,...,...,...
498,YUM,Yum! Brands,Consumer Discretionary,Restaurants,"Louisville, Kentucky",1997-10-06,1041061,1997
499,ZBRA,Zebra Technologies,Information Technology,Electronic Equipment & Instruments,"Lincolnshire, Illinois",2019-12-23,877212,1969
500,ZBH,Zimmer Biomet,Health Care,Health Care Equipment,"Warsaw, Indiana",2001-08-07,1136869,1927
501,ZION,Zions Bancorporation,Financials,Regional Banks,"Salt Lake City, Utah",2001-06-22,109380,1873


## Read industry classifications from Wikipedia and store as CSV

In [None]:
gics_link = (
    "https://en.wikipedia.org/wiki/Global_Industry_Classification_Standard"
)
gics_df = pd.read_html(gics_link, header=0)[0]
gics_df = gics_df[["Sector.1", "Industry Group.1", "Industry.1", "Sub-Industry.1"]]
gics_df.columns = ["Sector", "Industry Group", "Industry", "Sub-Industry"]

# Write to CSV
gics_df.to_csv("gics_classifications.csv", index=False)
!cp "gics_classifications.csv" "drive/My Drive/Capstone/data/"

In [None]:
gics_df

Unnamed: 0,Sector,Industry Group,Industry,Sub-Industry
0,Energy,Energy,Energy Equipment & Services,Oil & Gas Drilling
1,Energy,Energy,Energy Equipment & Services,Oil & Gas Equipment & Services
2,Energy,Energy,"Oil, Gas & Consumable Fuels",Integrated Oil & Gas
3,Energy,Energy,"Oil, Gas & Consumable Fuels",Oil & Gas Exploration & Production
4,Energy,Energy,"Oil, Gas & Consumable Fuels",Oil & Gas Refining & Marketing
...,...,...,...,...
159,Real Estate,Equity Real Estate Investment Trusts (REITs),Specialized REITs,Data Center REITs
160,Real Estate,Real Estate Management & Development,Real Estate Management & Development,Diversified Real Estate Activities
161,Real Estate,Real Estate Management & Development,Real Estate Management & Development,Real Estate Operating Companies
162,Real Estate,Real Estate Management & Development,Real Estate Management & Development,Real Estate Development


## Read fundamentals from API calls

### Install rate limit to ensure 10 API Calls/minute

In [None]:
!pip install ratelimit

Collecting ratelimit
  Downloading ratelimit-2.2.1.tar.gz (5.3 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: ratelimit
  Building wheel for ratelimit (setup.py) ... [?25l[?25hdone
  Created wheel for ratelimit: filename=ratelimit-2.2.1-py3-none-any.whl size=5894 sha256=6648cc1bae2b5b24f99c8ab3397cee0921ae2e5877551b9ab07b024410bb6478
  Stored in directory: /root/.cache/pip/wheels/27/5f/ba/e972a56dcbf5de9f2b7d2b2a710113970bd173c4dcd3d2c902
Successfully built ratelimit
Installing collected packages: ratelimit
Successfully installed ratelimit-2.2.1


In [None]:
import requests
import json
from tqdm import tqdm
from ratelimit import limits, sleep_and_retry
import os
from dotenv import load_dotenv
load_dotenv()

api_key = os.getenv("DATA_JOCKEY_API_KEY")
@sleep_and_retry
@limits(calls=10, period=60)
def call_api(symbol):
    response = requests.get(f"https://api.datajockey.io/v0/company/financials?apikey={api_key}&ticker={symbol}&period=Q&filetype=json")

    if response.status_code != 200:
        raise Exception('API response: {} and {}'.format(response.status_code, response.json()))
    return response.json()


def build_fundamentals_data(df):
  results = []
  exception_statements = []
  for symbol in tqdm(df):
      try:
          result = call_api(symbol)
          results.append(result)
      except Exception as e:
          exception_statements.append(f"Failed for symbol {symbol} with message: {str(e)}")

  for statement in exception_statements:
      print(statement)

  with open('fundamentals_results.json', 'w') as output_file:
        json.dump(results, output_file)

  !cp "fundamentals_results.json" "drive/My Drive/Capstone/data/"

In [None]:
build_fundamentals_data(df["Symbol"])

100%|██████████| 503/503 [50:03<00:00,  5.97s/it]
