# Data Collection and Storage

1) Preliminary Setup
2) Download Data from FRED API
3) Save Data into Google Bigquery

# 1. Preliminary Setup

In [1]:
# install api for access financial data from FRED website https://fred.stlouisfed.org/
!pip install pyfredapi --upgrade --quiet

# install api for access google bigquery
!pip install google-cloud-bigquery --quiet

In [20]:
# import api for access financial data 
import pyfredapi as pf
# import api for access google bigquery
from google.cloud import bigquery as bq
# import for getting details for authentication and project id
import google.auth
# import pandas for data manipulation
import pandas as pd

In [3]:
# Apply API Key for FRED from Below website
# https://fred.stlouisfed.org/docs/api/api_key.html

# 2. Download Data from FRED API

In [37]:
# define lambda function for simple data cleanse
def get(series_id): 
    df = pf.get_series(series_id)
    df = df[["date","value"]].set_index("date")
    df = df.rename({"value":series_id},axis=1)
    df = df.dropna()
    return df

# Define a function to change data frequency
def change_freq(df, freq='M'):
    temp_df = df.groupby(pd.Grouper(freq=freq)).max().interpolate().round(3)
    temp_df['year'] = temp_df.index.year
    temp_df['month'] = temp_df.index.month
    temp_df['date'] = temp_df['year'].astype(str) + "-" + temp_df['month'].astype(str) + "-1"
    temp_df.index = pd.to_datetime(temp_df['date'])
    return temp_df.drop(columns=['year', 'month', 'date'])

In [38]:
# define a dictionary for desired financial data
dict_data = {"FEDFUNDS": "Federal Funds Effective Rate",
             "TB3MS": "3-Month Treasury Bill Secondary Market Rate, Discount Basis",
             "CPALTT01USM657N":"Consumer Price Index: All Items: Total for United States",
             "UNRATE": "Unemployment Rate",
             "A191RP1Q027SBEA": "Gross Domestic Product"}

# define a dictionary for store the dataframe of various financial data
dict_df = {}
for data in dict_data:
    dict_df[data] = get(data)
    if pf.get_series_info(series_id=data).frequency_short != 'M':
        dict_df[data] = change_freq(dict_df[data])

# 3. Save Data into Google Bigquery

In [39]:
# key is generated at Service Account and downloaded in json format
# default credentials is set under environment variable "GOOGLE_APPLICATION_CREDENTIALS"
# the value of "GOOGLE_APPLICATION_CREDENTIALS" is set to path of json e.g. '/Users/.../project_id.json'

# get project_id from json
credentials, project_id = google.auth.default()
database_id = 'data'

# save financial data into Google bigquery
for df_name in dict_df:
    table_id = df_name
    dict_df[df_name].to_gbq(f'{database_id}.{table_id}', project_id=project_id, if_exists='replace')

100%|███████████████████████████████████████████| 1/1 [00:00<00:00, 7810.62it/s]
100%|███████████████████████████████████████████| 1/1 [00:00<00:00, 8081.51it/s]
100%|███████████████████████████████████████████| 1/1 [00:00<00:00, 8322.03it/s]
100%|███████████████████████████████████████████| 1/1 [00:00<00:00, 6223.00it/s]
100%|███████████████████████████████████████████| 1/1 [00:00<00:00, 8338.58it/s]


In [40]:
# Check if data can be accessed
client = bq.Client()
for df_name in dict_df:
    table_id = df_name
    sql = f'select * from {project_id}.{database_id}.{table_id}'
    # save result into dataframe
    print(client.query(sql).to_dataframe())

          date  FEDFUNDS
0   1961-08-01      2.00
1   2008-06-01      2.00
2   2008-08-01      2.00
3   1956-02-01      2.50
4   1956-03-01      2.50
..         ...       ...
831 1983-10-01      9.48
832 1980-05-01     10.98
833 2008-11-01      0.39
834 2016-07-01      0.39
835 2008-09-01      1.81

[836 rows x 2 columns]
           date  TB3MS
0    1956-03-01   2.25
1    1960-12-01   2.25
2    2018-10-01   2.25
3    2008-01-01   2.75
4    1993-01-01   3.00
...         ...    ...
1077 1948-08-01   1.06
1078 1950-10-01   1.31
1079 1951-07-01   1.56
1080 1951-11-01   1.56
1081 1952-07-01   1.81

[1082 rows x 2 columns]
          date  CPALTT01USM657N
0   1960-03-01         0.000000
1   1960-05-01         0.000000
2   1960-07-01         0.000000
3   1960-08-01         0.000000
4   1960-09-01         0.000000
..         ...              ...
763 2021-03-01         0.708327
764 2002-06-01         0.055617
765 2000-06-01         0.524781
766 2023-10-01        -0.038338
767 1995-03-01         