# 06 Extract economic index from alphavantage

In [1]:
from pyspark.sql.types import *
from pyspark.sql import functions as f
import requests
import pandas as pd 
from datetime import datetime, timedelta
from notebookutils import mssparkutils

In [5]:
# get data from api 
def get_GDP_PC(apikey):
    url = 'https://www.alphavantage.co/query?function=REAL_GDP_PER_CAPITA&apikey={0}'.format(apikey)
    r = requests.get(url)
    data = r.json()
    return data

def get_FDI(apikey):
    url = 'https://www.alphavantage.co/query?function=FEDERAL_FUNDS_RATE&interval=monthly&apikey={0}'.format(apikey)
    r = requests.get(url)
    data = r.json()
    return data

def get_CPI(apikey):
    url = 'https://www.alphavantage.co/query?function=CPI&interval=monthly&apikey={0}'.format(apikey)
    r = requests.get(url)
    data = r.json()
    return data

def get_INF(apikey):
    url = 'https://www.alphavantage.co/query?function=INFLATION&interval=monthly&apikey={0}'.format(apikey)
    r = requests.get(url)
    data = r.json()
    return data

def get_UNEM(apikey):
    url = 'https://www.alphavantage.co/query?function=UNEMPLOYMENT&apikey={0}'.format(apikey)
    r = requests.get(url)
    data = r.json()
    return data    

def transform_spdf(data):
    pdf = pd.DataFrame(data)
    Spdf = spark.createDataFrame(pdf[['name','data']])
    Spdf = (
    Spdf.select('name','data')
    .withColumn('date',Spdf.data.getItem("date"))
    .withColumn('value',Spdf.data.getItem('value'))
    .select("name",'date','value')
    ) 
    return Spdf 

In [3]:
if __name__ == "__main__":
    # get apikey from key-vault 
    apikey = mssparkutils.credentials.getSecret('synapse-key12345','alphavantage-key')

    # get data from api
    GDP = get_GDP_PC(apikey)
    FDI = get_FDI(apikey)
    CPI = get_CPI(apikey)
    INF = get_INF(apikey)
    UNEM = get_UNEM(apikey)
    
    GDP_SP = transform_spdf(GDP)
    GDP_FDI = transform_spdf(FDI)
    GDP_CPI = transform_spdf(CPI)
    GDP_INF = transform_spdf(INF)
    GDP_UNEM = transform_spdf(UNEM)

    all_= GDP_SP.union(GDP_FDI).union(GDP_CPI).union(GDP_INF).union(GDP_UNEM)
        
    print('load data into filepath')
    
    datalake_nm = 'datalake'+mssparkutils.env.getWorkspaceName()[7:] # get datalake name 
    file_path ='abfss://files@{0}.dfs.core.windows.net/synapse/workspaces/data/economic_index'.format(datalake_nm) 
    all_.write.parquet(file_path,mode='overwrite')