1 Getting data from APIs


In [2]:
import pandas as pd
import requests


1.1 Accessing APIs Through a URL

For this project, I'll extract stock market information from the AlphaVantage API.
First,get my own API key and save it in a .env file. Import it into the code base by creating a config module.


In [3]:
# Import settings
from config import settings

# Use `dir` to list attributes
dir(settings)

['Config',
 '__abstractmethods__',
 '__annotations__',
 '__class__',
 '__class_vars__',
 '__config__',
 '__custom_root_type__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__exclude_fields__',
 '__fields__',
 '__fields_set__',
 '__format__',
 '__ge__',
 '__get_validators__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__include_fields__',
 '__init__',
 '__init_subclass__',
 '__iter__',
 '__json_encoder__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__post_root_validators__',
 '__pre_root_validators__',
 '__pretty__',
 '__private_attributes__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__repr_args__',
 '__repr_name__',
 '__repr_str__',
 '__rich_repr__',
 '__schema_cache__',
 '__setattr__',
 '__setstate__',
 '__signature__',
 '__sizeof__',
 '__slots__',
 '__str__',
 '__subclasshook__',
 '__try_update_forward_refs__',
 '__validators__',
 '_abc_impl',
 '_build_values',
 '_calculate_keys',
 '_copy_and_set_values',
 '_decompo

In [4]:
#Create a URL to obtain "SCOM" (Safaricom) data
ticker = "SCOM"
output_size = "compact"
data_type = "json"

url = (
    "https://learn-api.wqu.edu/1/data-services/alpha-vantage/query?"
    "function=TIME_SERIES_DAILY&"
    f"symbol={ticker}&"
    f"outputsize={output_size}&"
    f"datatype={data_type}&"
    f"apikey={settings.alpha_api_key}"
)
print("url type:", type(url))
url

url type: <class 'str'>


'https://learn-api.wqu.edu/1/data-services/alpha-vantage/query?function=TIME_SERIES_DAILY&symbol=SCOM&outputsize=compact&datatype=json&apikey=ddf36331cfed136083139698889a37becd4236455a9458027e11fc05f25e7639b76d08270cda161f1550d7fc1bb675ca6a3a90ac6246d080028f00e7954020d604d2e7dac7d96f7aae7854839557e328531f4de8222907c21eab51b815bebf1db1d27bfac633c02b1c69e70912f587c2c25dcb2f1265af35d3a96391ef3d2dfd'

1.2 Accessing APIs Through a Request

To build applications, we need to access the data via requests as opposed to the url links since computer programs access APIs by making requests.I'll build the first request by using the url created above then use the requests library to make a get request to the URL and Assign the response to the variable 'response'


In [5]:
response = requests.get(url=url)

print("response type:", type(response))

response type: <class 'requests.models.Response'>


In [6]:
# Use `dir` on your `response`
dir(response)

['__attrs__',
 '__bool__',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__enter__',
 '__eq__',
 '__exit__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__iter__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__nonzero__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__setstate__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_content',
 '_content_consumed',
 '_next',
 'apparent_encoding',
 'close',
 'connection',
 'content',
 'cookies',
 'elapsed',
 'encoding',
 'headers',
 'history',
 'is_permanent_redirect',
 'is_redirect',
 'iter_content',
 'iter_lines',
 'json',
 'links',
 'next',
 'ok',
 'raise_for_status',
 'raw',
 'reason',
 'request',
 'status_code',
 'text',
 'url']

In [7]:
#Assign the status code for your response to the variable response_code.200 means "OK". It's the standard response for a successful HTTP request
response_code = response.status_code

print("code type:", type(response_code))
response_code

code type: <class 'int'>


200

In [8]:
#Assign the test for your response to the variable response_text
response_text = response.text

print("response_text type:", type(response_text))
print(response_text[:200])

response_text type: <class 'str'>
{
    "Meta Data": {
        "1. Information": "Daily Prices (open, high, low, close) and Volumes",
        "2. Symbol": "SCOM",
        "3. Last Refreshed": "2020-03-27",
        "4. Output Size": "C


In [9]:
#Assign the status code for your response to the variable response_code. 200 means "ok" .In other words, it worked! We successfully received data back from the AlphaVantage API.
response_code = response.status_code

print("code type:", type(response_code))
response_code

code type: <class 'int'>


200

In [10]:
#Use json method to access a dictionary version of the data. Assign it to the variable name response_data
response_data = response.json()

print("response_data type:", type(response_data))

response_data type: <class 'dict'>


In [11]:
# Print `response_data` keys
response_data.keys()

dict_keys(['Meta Data', 'Time Series (Daily)'])

In [12]:
# Extract `"Time Series (Daily)"` value from `response_data`
stock_data = response_data["Time Series (Daily)"]

print("stock_data type:", type(stock_data))

# Extract data for one of the days in `stock_data`
stock_data['2020-02-20']

stock_data type: <class 'dict'>


{'1. open': '16.9600',
 '2. high': '16.9600',
 '3. low': '16.7280',
 '4. close': '16.7280',
 '5. volume': '1152'}

In [13]:
#Read the data into a dataframe
df_safaricom = pd.DataFrame.from_dict(stock_data, orient="index", dtype=float)

print("df_safaricom shape:", df_safaricom.shape)
print()
print(df_safaricom.info())
df_safaricom.head(10)

df_safaricom shape: (100, 5)

<class 'pandas.core.frame.DataFrame'>
Index: 100 entries, 2020-03-27 to 2019-11-04
Data columns (total 5 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   1. open    100 non-null    float64
 1   2. high    100 non-null    float64
 2   3. low     100 non-null    float64
 3   4. close   100 non-null    float64
 4   5. volume  100 non-null    float64
dtypes: float64(5)
memory usage: 4.7+ KB
None


Unnamed: 0,1. open,2. high,3. low,4. close,5. volume
2020-03-27,24.8841,24.8841,24.8841,24.8841,14.0
2020-03-26,24.8841,24.8841,24.8841,24.8841,1.0
2020-03-25,29.892,29.892,29.892,29.892,0.0
2020-03-24,28.7573,28.7573,28.7573,28.7573,421.0
2020-03-23,35.5,35.5,35.5,35.5,92.0
2020-03-20,35.5,35.5,35.5,35.5,5.0
2020-03-19,34.02,34.02,31.86,31.86,410.0
2020-03-18,34.24,38.1705,34.24,34.4913,795.0
2020-03-17,34.746,34.746,34.746,34.746,76.0
2020-03-16,31.98,34.746,31.98,34.746,2387.0


In [14]:
# Convert `df_ambuja` index to `DatetimeIndex`
df_safaricom.index=pd.to_datetime(df_safaricom.index)

# Name index "date"
df_safaricom.index.name="date"

print(df_safaricom.info())
df_safaricom.head()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 100 entries, 2020-03-27 to 2019-11-04
Data columns (total 5 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   1. open    100 non-null    float64
 1   2. high    100 non-null    float64
 2   3. low     100 non-null    float64
 3   4. close   100 non-null    float64
 4   5. volume  100 non-null    float64
dtypes: float64(5)
memory usage: 4.7 KB
None


Unnamed: 0_level_0,1. open,2. high,3. low,4. close,5. volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-03-27,24.8841,24.8841,24.8841,24.8841,14.0
2020-03-26,24.8841,24.8841,24.8841,24.8841,1.0
2020-03-25,29.892,29.892,29.892,29.892,0.0
2020-03-24,28.7573,28.7573,28.7573,28.7573,421.0
2020-03-23,35.5,35.5,35.5,35.5,92.0


In [15]:
# Remove numbering from `df_safaricom` column names
df_safaricom.columns = [c.split(". ")[1] for c in df_safaricom.columns]

print(df_safaricom.info())
df_safaricom.head()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 100 entries, 2020-03-27 to 2019-11-04
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   open    100 non-null    float64
 1   high    100 non-null    float64
 2   low     100 non-null    float64
 3   close   100 non-null    float64
 4   volume  100 non-null    float64
dtypes: float64(5)
memory usage: 4.7 KB
None


Unnamed: 0_level_0,open,high,low,close,volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-03-27,24.8841,24.8841,24.8841,24.8841,14.0
2020-03-26,24.8841,24.8841,24.8841,24.8841,1.0
2020-03-25,29.892,29.892,29.892,29.892,0.0
2020-03-24,28.7573,28.7573,28.7573,28.7573,421.0
2020-03-23,35.5,35.5,35.5,35.5,92.0


2. Defensive Programming

Defensive programming is the practice of writing code which will continue to function, even if something goes wrong.

In [16]:
def get_daily(ticker,output_size):

    """Get daily time series of an equity from AlphaVantage API.

    Parameters
    ----------
    ticker : str
        The ticker symbol of the equity.
    output_size : str, optional
        Number of observations to retrieve. "compact" returns the
        latest 100 observations. "full" returns all observations for
        equity. By default "full".

    Returns
    -------
    pd.DataFrame
        Columns are 'open', 'high', 'low', 'close', and 'volume'.
        All are numeric.
    """
    # Create URL (8.1.5)
    url = (
        "https://learn-api.wqu.edu/1/data-services/alpha-vantage/query?"
        "function=TIME_SERIES_DAILY&"
        f"symbol={ticker}&"
        f"outputsize={output_size}&"
        f"datatype=json&"
        f"apikey={settings.alpha_api_key}"
    )

    # Send request to API (8.1.6)
    response = requests.get(url=url)
    # Extract JSON data from response (8.1.10)
    response_data = response.json()
    if "Time Series (Daily)" not in response_data.keys():
        raise Exception(
            f"invalid API call. Check that the {ticker} symbol is correct"
        )
    # Read data into DataFrame (8.1.12 & 8.1.13)
    stock_data = response_data["Time Series (Daily)"]
    df = pd.DataFrame.from_dict(stock_data, orient="index", dtype=float)

    # Convert index to `DatetimeIndex` named "date" (8.1.14)
    df.index=pd.to_datetime(df.index)
    df.index.name="date"

    # Remove numbering from columns (8.1.15)
    df.columns = [c.split(". ")[1] for c in df.columns]

    # Return DataFrame
    return df

In [17]:
# Test your function
df_safaricom = get_daily(ticker="SCOM", output_size="compact")

print(df_safaricom.info())
df_safaricom.head()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 100 entries, 2020-03-27 to 2019-11-04
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   open    100 non-null    float64
 1   high    100 non-null    float64
 2   low     100 non-null    float64
 3   close   100 non-null    float64
 4   volume  100 non-null    float64
dtypes: float64(5)
memory usage: 4.7 KB
None


Unnamed: 0_level_0,open,high,low,close,volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-03-27,24.8841,24.8841,24.8841,24.8841,14.0
2020-03-26,24.8841,24.8841,24.8841,24.8841,1.0
2020-03-25,29.892,29.892,29.892,29.892,0.0
2020-03-24,28.7573,28.7573,28.7573,28.7573,421.0
2020-03-23,35.5,35.5,35.5,35.5,92.0


In [18]:
# Test your Exception
df_test = get_daily(ticker="SCOM", output_size="compact")